diff --git a/.github/ISSUE_TEMPLATE/rustdoc.md b/.github/ISSUE_TEMPLATE/rustdoc.md new file mode 100644 index 000000000000..130d5f67102a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/rustdoc.md @@ -0,0 +1,54 @@ +--- +name: Problem with rustdoc +about: Report an issue with how docs get generated. +labels: C-bug, T-rustdoc +--- + + +# Code + + +```rust + +``` + +# Reproduction Steps + + +# Expected Outcome + + +# Actual Output + +```console + +``` + + +# Version + + +# Additional Details + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 841bc39bf1e6..674b6ccf34f9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,10 +11,6 @@ name: CI on: push: branches: - # CI on master only serves for caching citool builds for the `calculate_matrix` job. - # In order to use GHA cache on PR CI (and auto/try) jobs, we need to write to it - # from the default branch. - - master - auto - try - try-perf @@ -57,13 +53,6 @@ jobs: steps: - name: Checkout the source code uses: actions/checkout@v4 - # Cache citool to make its build faster, as it's in the critical path. - # The rust-cache doesn't bleed into the main `job`, so it should not affect any other - # Rust compilation. - - name: Cache citool - uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8 - with: - workspaces: src/ci/citool - name: Test citool # Only test citool on the auto branch, to reduce latency of the calculate matrix job # on PR/try builds. diff --git a/.gitmodules b/.gitmodules index fbf2f59b38da..3426b1bc8dbd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -18,10 +18,6 @@ path = src/doc/rust-by-example url = https://github.com/rust-lang/rust-by-example.git shallow = true -[submodule "library/stdarch"] - path = library/stdarch - url = https://github.com/rust-lang/stdarch.git - shallow = true [submodule "src/doc/edition-guide"] path = src/doc/edition-guide url = https://github.com/rust-lang/edition-guide.git diff --git a/.mailmap b/.mailmap index b9fb7be0403c..2a53cbf9effc 100644 --- a/.mailmap +++ b/.mailmap @@ -162,8 +162,10 @@ David Carlier David Klein David Manescu David Ross -David Wood +David Wood +David Wood David Wood +David Wood Deadbeef Deadbeef dependabot[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> @@ -698,3 +700,4 @@ Zach Pomerantz Zack Corr Zack Slayton Zbigniew Siciarz Zbigniew Siciarz +y21 <30553356+y21@users.noreply.github.com> diff --git a/Cargo.lock b/Cargo.lock index 1a619096d34c..e1cf17e2c01c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -537,7 +537,7 @@ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "clippy" -version = "0.1.89" +version = "0.1.90" dependencies = [ "anstream", "askama", @@ -547,6 +547,7 @@ dependencies = [ "clippy_lints_internal", "clippy_utils", "color-print", + "declare_clippy_lint", "filetime", "futures", "if_chain", @@ -569,7 +570,7 @@ dependencies = [ [[package]] name = "clippy_config" -version = "0.1.89" +version = "0.1.90" dependencies = [ "clippy_utils", "itertools", @@ -592,12 +593,13 @@ dependencies = [ [[package]] name = "clippy_lints" -version = "0.1.89" +version = "0.1.90" dependencies = [ "arrayvec", "cargo_metadata 0.18.1", "clippy_config", "clippy_utils", + "declare_clippy_lint", "itertools", "quine-mc_cluskey", "regex-syntax 0.8.5", @@ -622,7 +624,7 @@ dependencies = [ [[package]] name = "clippy_utils" -version = "0.1.89" +version = "0.1.90" dependencies = [ "arrayvec", "itertools", @@ -931,6 +933,10 @@ dependencies = [ "winapi", ] +[[package]] +name = "declare_clippy_lint" +version = "0.1.90" + [[package]] name = "derive-where" version = "1.4.0" @@ -2983,6 +2989,15 @@ dependencies = [ "getrandom 0.3.3", ] +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core 0.9.3", +] + [[package]] name = "rand_xoshiro" version = "0.7.0" @@ -3166,9 +3181,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustc-literal-escaper" -version = "0.0.2" +version = "0.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04" +checksum = "ab03008eb631b703dd16978282ae36c73282e7922fe101a4bd072a40ecea7b8b" [[package]] name = "rustc-main" @@ -3182,16 +3197,6 @@ dependencies = [ "tikv-jemalloc-sys", ] -[[package]] -name = "rustc-rayon-core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f42932dcd3bcbe484b38a3ccf79b7906fac41c02d408b5b1bac26da3416efdb" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - [[package]] name = "rustc-semver" version = "1.1.0" @@ -3283,6 +3288,7 @@ dependencies = [ "rustc_abi", "rustc_ast", "rustc_ast_pretty", + "rustc_attr_data_structures", "rustc_attr_parsing", "rustc_data_structures", "rustc_errors", @@ -3317,6 +3323,7 @@ dependencies = [ "rustc_parse", "rustc_session", "rustc_span", + "rustc_target", "thin-vec", ] @@ -3558,7 +3565,6 @@ dependencies = [ "parking_lot", "portable-atomic", "rustc-hash 2.1.1", - "rustc-rayon-core", "rustc-stable-hash", "rustc_arena", "rustc_graphviz", @@ -3566,6 +3572,7 @@ dependencies = [ "rustc_index", "rustc_macros", "rustc_serialize", + "rustc_thread_pool", "smallvec", "stacker", "tempfile", @@ -3656,7 +3663,6 @@ dependencies = [ "rustc_macros", "rustc_serialize", "rustc_span", - "smallvec", "tracing", "unic-langid", ] @@ -3724,6 +3730,7 @@ dependencies = [ name = "rustc_feature" version = "0.0.0" dependencies = [ + "rustc_attr_data_structures", "rustc_data_structures", "rustc_span", "serde", @@ -3913,7 +3920,6 @@ dependencies = [ name = "rustc_interface" version = "0.0.0" dependencies = [ - "rustc-rayon-core", "rustc_abi", "rustc_ast", "rustc_ast_lowering", @@ -3952,6 +3958,7 @@ dependencies = [ "rustc_span", "rustc_symbol_mangling", "rustc_target", + "rustc_thread_pool", "rustc_trait_selection", "rustc_traits", "rustc_ty_utils", @@ -4079,7 +4086,6 @@ dependencies = [ "either", "gsgdt", "polonius-engine", - "rustc-rayon-core", "rustc_abi", "rustc_apfloat", "rustc_arena", @@ -4103,6 +4109,7 @@ dependencies = [ "rustc_session", "rustc_span", "rustc_target", + "rustc_thread_pool", "rustc_type_ir", "smallvec", "thin-vec", @@ -4118,6 +4125,7 @@ dependencies = [ "rustc_apfloat", "rustc_arena", "rustc_ast", + "rustc_attr_data_structures", "rustc_data_structures", "rustc_errors", "rustc_fluent_macro", @@ -4349,7 +4357,6 @@ version = "0.0.0" dependencies = [ "hashbrown", "parking_lot", - "rustc-rayon-core", "rustc_abi", "rustc_ast", "rustc_attr_data_structures", @@ -4364,6 +4371,7 @@ dependencies = [ "rustc_serialize", "rustc_session", "rustc_span", + "rustc_thread_pool", "smallvec", "tracing", ] @@ -4447,7 +4455,6 @@ dependencies = [ "rustc_serialize", "rustc_span", "rustc_target", - "smallvec", "termize", "tracing", "windows", @@ -4525,6 +4532,18 @@ dependencies = [ "tracing", ] +[[package]] +name = "rustc_thread_pool" +version = "0.0.0" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", + "libc", + "rand 0.9.1", + "rand_xorshift", + "scoped-tls", +] + [[package]] name = "rustc_tools_util" version = "0.4.2" @@ -5787,9 +5806,9 @@ dependencies = [ [[package]] name = "wasi-preview1-component-adapter-provider" -version = "31.0.0" +version = "34.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fabda09a0d89ffd1615b297b4a5d4b4d99df9598aeb24685837e63019e927b" +checksum = "aafa1e6af9a954a4bcf6ef420c33355d0ce84ddc6afbcba7bb6f05126f9120ae" [[package]] name = "wasm-bindgen" @@ -5851,9 +5870,9 @@ dependencies = [ [[package]] name = "wasm-component-ld" -version = "0.5.13" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a60a07a994a3538b57d8c5f8caba19f4793fb4c7156276e5e90e90acbb829e20" +checksum = "b015ec93764aa5517bc8b839efa9941b90be8ce680b1134f8224644ba1e48e3f" dependencies = [ "anyhow", "clap", @@ -5861,7 +5880,7 @@ dependencies = [ "libc", "tempfile", "wasi-preview1-component-adapter-provider", - "wasmparser 0.229.0", + "wasmparser 0.234.0", "wat", "windows-sys 0.59.0", "winsplit", @@ -5888,12 +5907,12 @@ dependencies = [ [[package]] name = "wasm-encoder" -version = "0.229.0" +version = "0.234.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38ba1d491ecacb085a2552025c10a675a6fddcbd03b1fc9b36c536010ce265d2" +checksum = "170a0157eef517a179f2d20ed7c68df9c3f7f6c1c047782d488bf5a464174684" dependencies = [ "leb128fmt", - "wasmparser 0.229.0", + "wasmparser 0.234.0", ] [[package]] @@ -5908,14 +5927,14 @@ dependencies = [ [[package]] name = "wasm-metadata" -version = "0.229.0" +version = "0.234.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78fdb7d29a79191ab363dc90c1ddd3a1e880ffd5348d92d48482393a9e6c5f4d" +checksum = "a42fe3f5cbfb56fc65311ef827930d06189160038e81db62188f66b4bf468e3a" dependencies = [ "anyhow", "indexmap", - "wasm-encoder 0.229.0", - "wasmparser 0.229.0", + "wasm-encoder 0.234.0", + "wasmparser 0.234.0", ] [[package]] @@ -5930,9 +5949,9 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.229.0" +version = "0.234.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc3b1f053f5d41aa55640a1fa9b6d1b8a9e4418d118ce308d20e24ff3575a8c" +checksum = "be22e5a8f600afce671dd53c8d2dd26b4b7aa810fd18ae27dfc49737f3e02fc5" dependencies = [ "bitflags", "hashbrown", @@ -5941,15 +5960,6 @@ dependencies = [ "serde", ] -[[package]] -name = "wasmparser" -version = "0.234.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be22e5a8f600afce671dd53c8d2dd26b4b7aa810fd18ae27dfc49737f3e02fc5" -dependencies = [ - "bitflags", -] - [[package]] name = "wasmparser" version = "0.235.0" @@ -6402,9 +6412,9 @@ dependencies = [ [[package]] name = "wit-component" -version = "0.229.0" +version = "0.234.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f550067740e223bfe6c4878998e81cdbe2529dd9a793dc49248dd6613394e8b" +checksum = "5a8888169acf4c6c4db535beb405b570eedac13215d6821ca9bd03190f7f8b8c" dependencies = [ "anyhow", "bitflags", @@ -6413,17 +6423,17 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "wasm-encoder 0.229.0", + "wasm-encoder 0.234.0", "wasm-metadata", - "wasmparser 0.229.0", + "wasmparser 0.234.0", "wit-parser", ] [[package]] name = "wit-parser" -version = "0.229.0" +version = "0.234.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459c6ba62bf511d6b5f2a845a2a736822e38059c1cfa0b644b467bbbfae4efa6" +checksum = "465492df47d8dcc015a3b7f241aed8ea03688fee7c5e04162285c5b1a3539c8b" dependencies = [ "anyhow", "id-arena", @@ -6434,7 +6444,7 @@ dependencies = [ "serde_derive", "serde_json", "unicode-xid", - "wasmparser 0.229.0", + "wasmparser 0.234.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c4d2a06f4cb1..6d3425f4115a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,7 +60,7 @@ exclude = [ "obj", ] -[profile.release.package.rustc-rayon-core] +[profile.release.package.rustc_thread_pool] # The rustc fork of Rayon has deadlock detection code which intermittently # causes overflows in the CI (see https://github.com/rust-lang/rust/issues/90227) # so we turn overflow checks off for now. @@ -89,3 +89,8 @@ codegen-units = 1 # FIXME: LTO cannot be enabled for binaries in a workspace # # lto = true + +# If you want to use a crate with local modifications, you can set a path or git dependency here. +# For git dependencies, also add your source to ALLOWED_SOURCES in src/tools/tidy/src/extdeps.rs. +#[patch.crates-io] + diff --git a/RELEASES.md b/RELEASES.md index 3c72cb1de0a3..1ae221774dc9 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -1,3 +1,115 @@ +Version 1.88.0 (2025-06-26) +========================== + + + +Language +-------- +- [Stabilize `#![feature(let_chains)]` in the 2024 edition.](https://github.com/rust-lang/rust/pull/132833) + This feature allows `&&`-chaining `let` statements inside `if` and `while`, allowing intermixture with boolean expressions. The patterns inside the `let` sub-expressions can be irrefutable or refutable. +- [Stabilize `#![feature(naked_functions)]`.](https://github.com/rust-lang/rust/pull/134213) + Naked functions allow writing functions with no compiler-generated epilogue and prologue, allowing full control over the generated assembly for a particular function. +- [Stabilize `#![feature(cfg_boolean_literals)]`.](https://github.com/rust-lang/rust/pull/138632) + This allows using boolean literals as `cfg` predicates, e.g. `#[cfg(true)]` and `#[cfg(false)]`. +- [Fully de-stabilize the `#[bench]` attribute](https://github.com/rust-lang/rust/pull/134273). Usage of `#[bench]` without `#![feature(custom_test_frameworks)]` already triggered a deny-by-default future-incompatibility lint since Rust 1.77, but will now become a hard error. +- [Add warn-by-default `dangerous_implicit_autorefs` lint against implicit autoref of raw pointer dereference.](https://github.com/rust-lang/rust/pull/123239) + The lint [will be bumped to deny-by-default](https://github.com/rust-lang/rust/pull/141661) in the next version of Rust. +- [Add `invalid_null_arguments` lint to prevent invalid usage of null pointers.](https://github.com/rust-lang/rust/pull/119220) + This lint is uplifted from `clippy::invalid_null_ptr_usage`. +- [Change trait impl candidate preference for builtin impls and trivial where-clauses.](https://github.com/rust-lang/rust/pull/138176) +- [Check types of generic const parameter defaults](https://github.com/rust-lang/rust/pull/139646) + + + +Compiler +-------- +- [Stabilize `-Cdwarf-version` for selecting the version of DWARF debug information to generate.](https://github.com/rust-lang/rust/pull/136926) + + + + +Platform Support +---------------- +- [Demote `i686-pc-windows-gnu` to Tier 2.](https://blog.rust-lang.org/2025/05/26/demoting-i686-pc-windows-gnu/) + + +Refer to Rust's [platform support page][platform-support-doc] +for more information on Rust's tiered platform support. + +[platform-support-doc]: https://doc.rust-lang.org/rustc/platform-support.html + + + +Libraries +--------- +- [Remove backticks from `#[should_panic]` test failure message.](https://github.com/rust-lang/rust/pull/136160) +- [Guarantee that `[T; N]::from_fn` is generated in order of increasing indices.](https://github.com/rust-lang/rust/pull/139099), for those passing it a stateful closure. +- [The libtest flag `--nocapture` is deprecated in favor of the more consistent `--no-capture` flag.](https://github.com/rust-lang/rust/pull/139224) +- [Guarantee that `{float}::NAN` is a quiet NaN.](https://github.com/rust-lang/rust/pull/139483) + + + + +Stabilized APIs +--------------- + +- [`Cell::update`](https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.update) +- [`impl Default for *const T`](https://doc.rust-lang.org/stable/std/primitive.pointer.html#impl-Default-for-*const+T) +- [`impl Default for *mut T`](https://doc.rust-lang.org/stable/std/primitive.pointer.html#impl-Default-for-*mut+T) +- [`HashMap::extract_if`](https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html#method.extract_if) +- [`HashSet::extract_if`](https://doc.rust-lang.org/stable/std/collections/struct.HashSet.html#method.extract_if) +- [`hint::select_unpredictable`](https://doc.rust-lang.org/stable/std/hint/fn.select_unpredictable.html) +- [`proc_macro::Span::line`](https://doc.rust-lang.org/stable/proc_macro/struct.Span.html#method.line) +- [`proc_macro::Span::column`](https://doc.rust-lang.org/stable/proc_macro/struct.Span.html#method.column) +- [`proc_macro::Span::start`](https://doc.rust-lang.org/stable/proc_macro/struct.Span.html#method.start) +- [`proc_macro::Span::end`](https://doc.rust-lang.org/stable/proc_macro/struct.Span.html#method.end) +- [`proc_macro::Span::file`](https://doc.rust-lang.org/stable/proc_macro/struct.Span.html#method.file) +- [`proc_macro::Span::local_file`](https://doc.rust-lang.org/stable/proc_macro/struct.Span.html#method.local_file) +- [`<[T]>::as_chunks`](https://doc.rust-lang.org/stable/std/primitive.slice.html#method.as_chunks) +- [`<[T]>::as_chunks_mut`](https://doc.rust-lang.org/stable/std/primitive.slice.html#method.as_chunks_mut) +- [`<[T]>::as_chunks_unchecked`](https://doc.rust-lang.org/stable/std/primitive.slice.html#method.as_chunks_unchecked) +- [`<[T]>::as_chunks_unchecked_mut`](https://doc.rust-lang.org/stable/std/primitive.slice.html#method.as_chunks_unchecked_mut) +- [`<[T]>::as_rchunks`](https://doc.rust-lang.org/stable/std/primitive.slice.html#method.as_rchunks) +- [`<[T]>::as_rchunks_mut`](https://doc.rust-lang.org/stable/std/primitive.slice.html#method.as_rchunks_mut) +- [`mod ffi::c_str`](https://doc.rust-lang.org/stable/std/ffi/c_str/index.html) + +These previously stable APIs are now stable in const contexts: + +- [`NonNull::replace`](https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.replace) +- [`<*mut T>::replace`](https://doc.rust-lang.org/stable/std/primitive.pointer.html#method.replace) +- [`std::ptr::swap_nonoverlapping`](https://doc.rust-lang.org/stable/std/ptr/fn.swap_nonoverlapping.html) +- [`Cell::replace`](https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.replace) +- [`Cell::get`](https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.get) +- [`Cell::get_mut`](https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.get_mut) +- [`Cell::from_mut`](https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.from_mut) +- [`Cell::as_slice_of_cells`](https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.as_slice_of_cells) + + + + +Cargo +----- +- [Stabilize automatic garbage collection.](https://github.com/rust-lang/cargo/pull/14287/) +- [use `zlib-rs` for gzip compression in rust code](https://github.com/rust-lang/cargo/pull/15417/) + + + +Rustdoc +----- +- [Doctests can be ignored based on target names using `ignore-*` attributes.](https://github.com/rust-lang/rust/pull/137096) +- [Stabilize the `--test-runtool` and `--test-runtool-arg` CLI options to specify a program (like qemu) and its arguments to run a doctest.](https://github.com/rust-lang/rust/pull/137096) + + + +Compatibility Notes +------------------- +- [Finish changing the internal representation of pasted tokens](https://github.com/rust-lang/rust/pull/124141). Certain invalid declarative macros that were previously accepted in obscure circumstances are now correctly rejected by the compiler. Use of a `tt` fragment specifier can often fix these macros. +- [Fully de-stabilize the `#[bench]` attribute](https://github.com/rust-lang/rust/pull/134273). Usage of `#[bench]` without `#![feature(custom_test_frameworks)]` already triggered a deny-by-default future-incompatibility lint since Rust 1.77, but will now become a hard error. +- [Fix borrow checking some always-true patterns.](https://github.com/rust-lang/rust/pull/139042) + The borrow checker was overly permissive in some cases, allowing programs that shouldn't have compiled. +- [Update the minimum external LLVM to 19.](https://github.com/rust-lang/rust/pull/139275) +- [Make it a hard error to use a vector type with a non-Rust ABI without enabling the required target feature.](https://github.com/rust-lang/rust/pull/139309) + Version 1.87.0 (2025-05-15) ========================== diff --git a/bootstrap.example.toml b/bootstrap.example.toml index 19cf360b0fb8..cc1ea796a028 100644 --- a/bootstrap.example.toml +++ b/bootstrap.example.toml @@ -8,6 +8,14 @@ # `bootstrap.toml` in the current directory of a build for build configuration, but # a custom configuration file can also be specified with `--config` to the build # system. +# +# Note that the following are equivelent, for more details see . +# +# build.verbose = 1 +# +# [build] +# verbose = 1 + # ============================================================================= # Global Settings @@ -44,7 +52,6 @@ # ============================================================================= # Tweaking how LLVM is compiled # ============================================================================= -[llvm] # Whether to use Rust CI built LLVM instead of locally building it. # @@ -62,50 +69,50 @@ # # Note that many of the LLVM options are not currently supported for # downloading. Currently only the "assertions" option can be toggled. -#download-ci-llvm = true +#llvm.download-ci-llvm = true # Indicates whether the LLVM build is a Release or Debug build -#optimize = true +#llvm.optimize = true # Indicates whether LLVM should be built with ThinLTO. Note that this will # only succeed if you use clang, lld, llvm-ar, and llvm-ranlib in your C/C++ # toolchain (see the `cc`, `cxx`, `linker`, `ar`, and `ranlib` options below). # More info at: https://clang.llvm.org/docs/ThinLTO.html#clang-bootstrap -#thin-lto = false +#llvm.thin-lto = false # Indicates whether an LLVM Release build should include debug info -#release-debuginfo = false +#llvm.release-debuginfo = false # Indicates whether the LLVM assertions are enabled or not # NOTE: When assertions are disabled, bugs in the integration between rustc and LLVM can lead to # unsoundness (segfaults, etc.) in the rustc process itself, not just in the generated code. -#assertions = false +#llvm.assertions = false # Indicates whether the LLVM testsuite is enabled in the build or not. Does # not execute the tests as part of the build as part of x.py build et al, # just makes it possible to do `ninja check-llvm` in the staged LLVM build # directory when doing LLVM development as part of Rust development. -#tests = false +#llvm.tests = false # Indicates whether the LLVM plugin is enabled or not -#plugins = false +#llvm.plugins = false # Whether to build Enzyme as AutoDiff backend. -#enzyme = false +#llvm.enzyme = false # Whether to build LLVM with support for it's gpu offload runtime. -#offload = false +#llvm.offload = false # When true, link libstdc++ statically into the rustc_llvm. # This is useful if you don't want to use the dynamic version of that # library provided by LLVM. -#static-libstdcpp = false +#llvm.static-libstdcpp = false # Enable LLVM to use zstd for compression. -#libzstd = false +#llvm.libzstd = false # Whether to use Ninja to build LLVM. This runs much faster than make. -#ninja = true +#llvm.ninja = true # LLVM targets to build support for. # Note: this is NOT related to Rust compilation targets. However, as Rust is @@ -113,13 +120,13 @@ # the resulting rustc being unable to compile for the disabled architectures. # # To add support for new targets, see https://rustc-dev-guide.rust-lang.org/building/new-target.html. -#targets = "AArch64;AMDGPU;ARM;BPF;Hexagon;LoongArch;MSP430;Mips;NVPTX;PowerPC;RISCV;Sparc;SystemZ;WebAssembly;X86" +#llvm.targets = "AArch64;AMDGPU;ARM;BPF;Hexagon;LoongArch;MSP430;Mips;NVPTX;PowerPC;RISCV;Sparc;SystemZ;WebAssembly;X86" # LLVM experimental targets to build support for. These targets are specified in # the same format as above, but since these targets are experimental, they are # not built by default and the experimental Rust compilation targets that depend # on them will not work unless the user opts in to building them. -#experimental-targets = "AVR;M68k;CSKY" +#llvm.experimental-targets = "AVR;M68k;CSKY" # Cap the number of parallel linker invocations when compiling LLVM. # This can be useful when building LLVM with debug info, which significantly @@ -127,86 +134,84 @@ # each linker process. # If set to 0, linker invocations are treated like any other job and # controlled by bootstrap's -j parameter. -#link-jobs = 0 +#llvm.link-jobs = 0 # Whether to build LLVM as a dynamically linked library (as opposed to statically linked). # Under the hood, this passes `--shared` to llvm-config. # NOTE: To avoid performing LTO multiple times, we suggest setting this to `true` when `thin-lto` is enabled. -#link-shared = llvm.thin-lto +#llvm.link-shared = llvm.thin-lto # When building llvm, this configures what is being appended to the version. # To use LLVM version as is, provide an empty string. -#version-suffix = if rust.channel == "dev" { "-rust-dev" } else { "-rust-$version-$channel" } +#llvm.version-suffix = if rust.channel == "dev" { "-rust-dev" } else { "-rust-$version-$channel" } # On MSVC you can compile LLVM with clang-cl, but the test suite doesn't pass # with clang-cl, so this is special in that it only compiles LLVM with clang-cl. # Note that this takes a /path/to/clang-cl, not a boolean. -#clang-cl = cc +#llvm.clang-cl = cc # Pass extra compiler and linker flags to the LLVM CMake build. -#cflags = "" -#cxxflags = "" -#ldflags = "" +#llvm.cflags = "" +#llvm.cxxflags = "" +#llvm.ldflags = "" # Use libc++ when building LLVM instead of libstdc++. This is the default on # platforms already use libc++ as the default C++ library, but this option # allows you to use libc++ even on platforms when it's not. You need to ensure # that your host compiler ships with libc++. -#use-libcxx = false +#llvm.use-libcxx = false # The value specified here will be passed as `-DLLVM_USE_LINKER` to CMake. -#use-linker = (path) +#llvm.use-linker = (path) # Whether or not to specify `-DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN=YES` -#allow-old-toolchain = false +#llvm.allow-old-toolchain = false # Whether to include the Polly optimizer. -#polly = false +#llvm.polly = false # Whether to build the clang compiler. -#clang = false +#llvm.clang = false # Whether to enable llvm compilation warnings. -#enable-warnings = false +#llvm.enable-warnings = false # Custom CMake defines to set when building LLVM. -#build-config = {} +#llvm.build-config = {} # ============================================================================= # Tweaking how GCC is compiled # ============================================================================= -[gcc] # Download GCC from CI instead of building it locally. # Note that this will attempt to download GCC even if there are local # modifications to the `src/gcc` submodule. # Currently, this is only supported for the `x86_64-unknown-linux-gnu` target. -#download-ci-gcc = false +#gcc.download-ci-gcc = false # ============================================================================= # General build configuration options # ============================================================================= -[build] # The default stage to use for the `check` subcommand -#check-stage = 0 +#build.check-stage = 0 # The default stage to use for the `doc` subcommand -#doc-stage = 0 +#build.doc-stage = 0 # The default stage to use for the `build` subcommand -#build-stage = 1 +#build.build-stage = 1 # The default stage to use for the `test` subcommand -#test-stage = 1 +#build.test-stage = 1 # The default stage to use for the `dist` subcommand -#dist-stage = 2 +#build.dist-stage = 2 # The default stage to use for the `install` subcommand -#install-stage = 2 +#build.install-stage = 2 # The default stage to use for the `bench` subcommand -#bench-stage = 2 +#build.bench-stage = 2 # A descriptive string to be appended to version output (e.g., `rustc --version`), # which is also used in places like debuginfo `DW_AT_producer`. This may be useful for @@ -217,7 +222,7 @@ # upstream Rust you need to set this to "". However, note that if you set this to "" but # are not actually compatible -- for example if you've backported patches that change # behavior -- this may lead to miscompilations or other bugs. -#description = "" +#build.description = "" # Build triple for the pre-compiled snapshot compiler. If `rustc` is set, this must match its host # triple (see `rustc --version --verbose`; cross-compiling the rust build system itself is NOT @@ -229,14 +234,14 @@ # Otherwise, `x.py` will try to infer it from the output of `uname`. # If `uname` is not found in PATH, we assume this is `x86_64-pc-windows-msvc`. # This may be changed in the future. -#build = "x86_64-unknown-linux-gnu" (as an example) +#build.build = "x86_64-unknown-linux-gnu" (as an example) # Which triples to produce a compiler toolchain for. Each of these triples will be bootstrapped from # the build triple themselves. In other words, this is the list of triples for which to build a # compiler that can RUN on that triple. # # Defaults to just the `build` triple. -#host = [build.build] (list of triples) +#build.host = [build.build] (list of triples) # Which triples to build libraries (core/alloc/std/test/proc_macro) for. Each of these triples will # be bootstrapped from the build triple themselves. In other words, this is the list of triples for @@ -245,32 +250,32 @@ # Defaults to `host`. If you set this explicitly, you likely want to add all # host triples to this list as well in order for those host toolchains to be # able to compile programs for their native target. -#target = build.host (list of triples) +#build.target = build.host (list of triples) # Use this directory to store build artifacts. Paths are relative to the current directory, not to # the root of the repository. -#build-dir = "build" +#build.build-dir = "build" # Instead of downloading the src/stage0 version of Cargo specified, use # this Cargo binary instead to build all Rust code # If you set this, you likely want to set `rustc` as well. -#cargo = "/path/to/cargo" +#build.cargo = "/path/to/cargo" # Instead of downloading the src/stage0 version of the compiler # specified, use this rustc binary instead as the stage0 snapshot compiler. # If you set this, you likely want to set `cargo` as well. -#rustc = "/path/to/rustc" +#build.rustc = "/path/to/rustc" # Instead of downloading the src/stage0 version of rustfmt specified, # use this rustfmt binary instead as the stage0 snapshot rustfmt. -#rustfmt = "/path/to/rustfmt" +#build.rustfmt = "/path/to/rustfmt" # Instead of downloading the src/stage0 version of cargo-clippy specified, # use this cargo-clippy binary instead as the stage0 snapshot cargo-clippy. # # Note that this option should be used with the same toolchain as the `rustc` option above. # Otherwise, clippy is likely to fail due to a toolchain conflict. -#cargo-clippy = "/path/to/cargo-clippy" +#build.cargo-clippy = "/path/to/cargo-clippy" # Whether to build documentation by default. If false, rustdoc and # friends will still be compiled but they will not be used to generate any @@ -278,47 +283,47 @@ # # You can still build documentation when this is disabled by explicitly passing paths, # e.g. `x doc library`. -#docs = true +#build.docs = true # Flag to specify whether CSS, JavaScript, and HTML are minified when # docs are generated. JSON is always minified, because it's enormous, # and generated in already-minified form from the beginning. -#docs-minification = true +#build.docs-minification = true # Flag to specify whether private items should be included in the library docs. -#library-docs-private-items = false +#build.library-docs-private-items = false # Indicate whether to build compiler documentation by default. # You can still build documentation when this is disabled by explicitly passing a path: `x doc compiler`. -#compiler-docs = false +#build.compiler-docs = false # Indicate whether git submodules are managed and updated automatically. -#submodules = true +#build.submodules = true # The path to (or name of) the GDB executable to use. This is only used for # executing the debuginfo test suite. -#gdb = "gdb" +#build.gdb = "gdb" # The path to (or name of) the LLDB executable to use. This is only used for # executing the debuginfo test suite. -#lldb = "lldb" +#build.lldb = "lldb" # The node.js executable to use. Note that this is only used for the emscripten # target when running tests, otherwise this can be omitted. -#nodejs = "node" +#build.nodejs = "node" # The npm executable to use. Note that this is used for rustdoc-gui tests, # otherwise this can be omitted. # # Under Windows this should be `npm.cmd` or path to it (verified on nodejs v18.06), or # error will be emitted. -#npm = "npm" +#build.npm = "npm" # Python interpreter to use for various tasks throughout the build, notably # rustdoc tests, the lldb python interpreter, and some dist bits and pieces. # # Defaults to the Python interpreter used to execute x.py. -#python = "python" +#build.python = "python" # The path to the REUSE executable to use. Note that REUSE is not required in # most cases, as our tooling relies on a cached (and shrunk) copy of the @@ -328,17 +333,17 @@ # repository to change, and the cached copy has to be regenerated. # # Defaults to the "reuse" command in the system path. -#reuse = "reuse" +#build.reuse = "reuse" # Force Cargo to check that Cargo.lock describes the precise dependency # set that all the Cargo.toml files create, instead of updating it. -#locked-deps = false +#build.locked-deps = false # Indicate whether the vendored sources are used for Rust dependencies or not. # # Vendoring requires additional setup. We recommend using the pre-generated source tarballs if you # want to use vendoring. See https://forge.rust-lang.org/infra/other-installation-methods.html#source-code. -#vendor = if "is a tarball source" && "vendor" dir exists && ".cargo/config.toml" file exists { true } else { false } +#build.vendor = if "is a tarball source" && "vendor" dir exists && ".cargo/config.toml" file exists { true } else { false } # Typically the build system will build the Rust compiler twice. The second # compiler, however, will simply use its own libraries to link against. If you @@ -346,11 +351,11 @@ # then you can set this option to true. # # This is only useful for verifying that rustc generates reproducible builds. -#full-bootstrap = false +#build.full-bootstrap = false # Set the bootstrap/download cache path. It is useful when building rust # repeatedly in a CI environment. -#bootstrap-cache-path = /path/to/shared/cache +#build.bootstrap-cache-path = /path/to/shared/cache # Enable a build of the extended Rust tool set which is not only the compiler # but also tools such as Cargo. This will also produce "combined installers" @@ -359,7 +364,7 @@ # which tools should be built if `extended = true`. # # This is disabled by default. -#extended = false +#build.extended = false # Set of tools to be included in the installation. # @@ -368,7 +373,7 @@ # If `extended = true`, they are all included. # # If any enabled tool fails to build, the installation fails. -#tools = [ +#build.tools = [ # "cargo", # "clippy", # "rustdoc", @@ -388,17 +393,17 @@ # # The default value for the `features` array is `[]`. However, please note that other flags in # `bootstrap.toml` might influence the features enabled for some tools. -#tool.TOOL_NAME.features = [FEATURE1, FEATURE2] +#build.tool.TOOL_NAME.features = [FEATURE1, FEATURE2] # Verbosity level: 0 == not verbose, 1 == verbose, 2 == very verbose, 3 == print environment variables on each rustc invocation -#verbose = 0 +#build.verbose = 0 # Build the sanitizer runtimes -#sanitizers = false +#build.sanitizers = false # Build the profiler runtime (required when compiling with options that depend # on this runtime, such as `-C profile-generate` or `-C instrument-coverage`). -#profiler = false +#build.profiler = false # Use the optimized LLVM C intrinsics for `compiler_builtins`, rather than Rust intrinsics. # Requires the LLVM submodule to be managed by bootstrap (i.e. not external) so that `compiler-rt` @@ -406,102 +411,100 @@ # # Setting this to `false` generates slower code, but removes the requirement for a C toolchain in # order to run `x check`. -#optimized-compiler-builtins = if rust.channel == "dev" { false } else { true } +#build.optimized-compiler-builtins = if rust.channel == "dev" { false } else { true } # Indicates whether the native libraries linked into Cargo will be statically # linked or not. -#cargo-native-static = false +#build.cargo-native-static = false # Run the build with low priority, by setting the process group's "nice" value # to +10 on Unix platforms, and by using a "low priority" job object on Windows. -#low-priority = false +#build.low-priority = false # Arguments passed to the `./configure` script, used during distcheck. You # probably won't fill this in but rather it's filled in by the `./configure` # script. Useful for debugging. -#configure-args = [] +#build.configure-args = [] # Indicates that a local rebuild is occurring instead of a full bootstrap, # essentially skipping stage0 as the local compiler is recompiling itself again. # Useful for modifying only the stage2 compiler without having to pass `--keep-stage 0` each time. -#local-rebuild = false +#build.local-rebuild = false # Print out how long each bootstrap step took (mostly intended for CI and # tracking over time) -#print-step-timings = false +#build.print-step-timings = false # Print out resource usage data for each bootstrap step, as defined by the Unix # struct rusage. (Note that this setting is completely unstable: the data it # captures, what platforms it supports, the format of its associated output, and # this setting's very existence, are all subject to change.) -#print-step-rusage = false +#build.print-step-rusage = false # Always patch binaries for usage with Nix toolchains. If `true` then binaries # will be patched unconditionally. If `false` or unset, binaries will be patched # only if the current distribution is NixOS. This option is useful when using # a Nix toolchain on non-NixOS distributions. -#patch-binaries-for-nix = false +#build.patch-binaries-for-nix = false # Collect information and statistics about the current build, and write it to # disk. Enabling this has no impact on the resulting build output. The # schema of the file generated by the build metrics feature is unstable, and # this is not intended to be used during local development. -#metrics = false +#build.metrics = false # Specify the location of the Android NDK. Used when targeting Android. -#android-ndk = "/path/to/android-ndk-r26d" +#build.android-ndk = "/path/to/android-ndk-r26d" # Number of parallel jobs to be used for building and testing. If set to `0` or # omitted, it will be automatically determined. This is the `-j`/`--jobs` flag # passed to cargo invocations. -#jobs = 0 +#build.jobs = 0 # What custom diff tool to use for displaying compiletest tests. -#compiletest-diff-tool = +#build.compiletest-diff-tool = # Whether to use the precompiled stage0 libtest with compiletest. -#compiletest-use-stage0-libtest = true +#build.compiletest-use-stage0-libtest = true # Indicates whether ccache is used when building certain artifacts (e.g. LLVM). # Set to `true` to use the first `ccache` in PATH, or set an absolute path to use # a specific version. -#ccache = false +#build.ccache = false # List of paths to exclude from the build and test processes. # For example, exclude = ["tests/ui", "src/tools/tidy"]. -#exclude = [] +#build.exclude = [] # ============================================================================= # General install configuration options # ============================================================================= -[install] # Where to install the generated toolchain. Must be an absolute path. -#prefix = "/usr/local" +#install.prefix = "/usr/local" # Where to install system configuration files. # If this is a relative path, it will get installed in `prefix` above -#sysconfdir = "/etc" +#install.sysconfdir = "/etc" # Where to install documentation in `prefix` above -#docdir = "share/doc/rust" +#install.docdir = "share/doc/rust" # Where to install binaries in `prefix` above -#bindir = "bin" +#install.bindir = "bin" # Where to install libraries in `prefix` above -#libdir = "lib" +#install.libdir = "lib" # Where to install man pages in `prefix` above -#mandir = "share/man" +#install.mandir = "share/man" # Where to install data in `prefix` above -#datadir = "share" +#install.datadir = "share" # ============================================================================= # Options for compiling Rust code itself # ============================================================================= -[rust] # Whether or not to optimize when compiling the compiler and standard library, # and what level of optimization to use. @@ -517,7 +520,7 @@ # 3 - All optimizations. # "s" - Optimize for binary size. # "z" - Optimize for binary size, but also turn off loop vectorization. -#optimize = true +#rust.optimize = true # Indicates that the build should be configured for debugging Rust. A # `debug`-enabled compiler and standard library will be somewhat @@ -540,7 +543,7 @@ # "maximally debuggable" environment (notably libstd) takes # hours to build. # -#debug = false +#rust.debug = false # Whether to download the stage 1 and 2 compilers from CI. This is useful if you # are working on tools, doc-comments, or library (you will be able to build the @@ -553,37 +556,37 @@ # # Set this to `true` to always download or `false` to always use the in-tree # compiler. -#download-rustc = false +#rust.download-rustc = false # Number of codegen units to use for each compiler invocation. A value of 0 # means "the number of cores on this machine", and 1+ is passed through to the # compiler. # # Uses the rustc defaults: https://doc.rust-lang.org/rustc/codegen-options/index.html#codegen-units -#codegen-units = if incremental { 256 } else { 16 } +#rust.codegen-units = if incremental { 256 } else { 16 } # Sets the number of codegen units to build the standard library with, # regardless of what the codegen-unit setting for the rest of the compiler is. # NOTE: building with anything other than 1 is known to occasionally have bugs. -#codegen-units-std = codegen-units +#rust.codegen-units-std = codegen-units # Whether or not debug assertions are enabled for the compiler and standard library. # These can help find bugs at the cost of a small runtime slowdown. # # Defaults to rust.debug value -#debug-assertions = rust.debug (boolean) +#rust.debug-assertions = rust.debug (boolean) # Whether or not debug assertions are enabled for the standard library. # Overrides the `debug-assertions` option, if defined. # # Defaults to rust.debug-assertions value -#debug-assertions-std = rust.debug-assertions (boolean) +#rust.debug-assertions-std = rust.debug-assertions (boolean) # Whether or not debug assertions are enabled for the tools built by bootstrap. # Overrides the `debug-assertions` option, if defined. # # Defaults to rust.debug-assertions value -#debug-assertions-tools = rust.debug-assertions (boolean) +#rust.debug-assertions-tools = rust.debug-assertions (boolean) # Whether or not to leave debug! and trace! calls in the rust binary. # @@ -591,22 +594,22 @@ # # If you see a message from `tracing` saying "some trace filter directives would enable traces that # are disabled statically" because `max_level_info` is enabled, set this value to `true`. -#debug-logging = rust.debug-assertions (boolean) +#rust.debug-logging = rust.debug-assertions (boolean) # Whether or not to build rustc, tools and the libraries with randomized type layout -#randomize-layout = false +#rust.randomize-layout = false # Whether or not overflow checks are enabled for the compiler and standard # library. # # Defaults to rust.debug value -#overflow-checks = rust.debug (boolean) +#rust.overflow-checks = rust.debug (boolean) # Whether or not overflow checks are enabled for the standard library. # Overrides the `overflow-checks` option, if defined. # # Defaults to rust.overflow-checks value -#overflow-checks-std = rust.overflow-checks (boolean) +#rust.overflow-checks-std = rust.overflow-checks (boolean) # Debuginfo level for most of Rust code, corresponds to the `-C debuginfo=N` option of `rustc`. # See https://doc.rust-lang.org/rustc/codegen-options/index.html#debuginfo for available options. @@ -617,20 +620,20 @@ # # Note that debuginfo-level = 2 generates several gigabytes of debuginfo # and will slow down the linking process significantly. -#debuginfo-level = if rust.debug { 1 } else { 0 } +#rust.debuginfo-level = if rust.debug { 1 } else { 0 } # Debuginfo level for the compiler. -#debuginfo-level-rustc = rust.debuginfo-level +#rust.debuginfo-level-rustc = rust.debuginfo-level # Debuginfo level for the standard library. -#debuginfo-level-std = rust.debuginfo-level +#rust.debuginfo-level-std = rust.debuginfo-level # Debuginfo level for the tools. -#debuginfo-level-tools = rust.debuginfo-level +#rust.debuginfo-level-tools = rust.debuginfo-level # Debuginfo level for the test suites run with compiletest. # FIXME(#61117): Some tests fail when this option is enabled. -#debuginfo-level-tests = 0 +#rust.debuginfo-level-tests = 0 # Should rustc and the standard library be built with split debuginfo? Default # is platform dependent. @@ -640,13 +643,13 @@ # The value specified here is only used when targeting the `build.build` triple, # and is overridden by `target..split-debuginfo` if specified. # -#split-debuginfo = see target..split-debuginfo +#rust.split-debuginfo = see target..split-debuginfo # Whether or not `panic!`s generate backtraces (RUST_BACKTRACE) -#backtrace = true +#rust.backtrace = true # Whether to always use incremental compilation when building rustc -#incremental = false +#rust.incremental = false # The default linker that will be hard-coded into the generated # compiler for targets that don't specify a default linker explicitly @@ -656,7 +659,7 @@ # setting. # # See https://doc.rust-lang.org/rustc/codegen-options/index.html#linker for more information. -#default-linker = (path) +#rust.default-linker = (path) # The "channel" for the Rust build to produce. The stable/beta channels only # allow using stable features, whereas the nightly and dev channels allow using @@ -665,7 +668,7 @@ # You can set the channel to "auto-detect" to load the channel name from `src/ci/channel`. # # If using tarball sources, default value is "auto-detect", otherwise, it's "dev". -#channel = if "is a tarball source" { "auto-detect" } else { "dev" } +#rust.channel = if "is a tarball source" { "auto-detect" } else { "dev" } # The root location of the musl installation directory. The library directory # will also need to contain libunwind.a for an unwinding implementation. Note @@ -673,65 +676,65 @@ # linked binaries. # # Defaults to /usr on musl hosts. Has no default otherwise. -#musl-root = (path) +#rust.musl-root = (path) # By default the `rustc` executable is built with `-Wl,-rpath` flags on Unix # platforms to ensure that the compiler is usable by default from the build # directory (as it links to a number of dynamic libraries). This may not be # desired in distributions, for example. -#rpath = true +#rust.rpath = true # Indicates whether symbols should be stripped using `-Cstrip=symbols`. -#strip = false +#rust.strip = false # Forces frame pointers to be used with `-Cforce-frame-pointers`. # This can be helpful for profiling at a small performance cost. -#frame-pointers = false +#rust.frame-pointers = false # Indicates whether stack protectors should be used # via the unstable option `-Zstack-protector`. # # Valid options are : `none`(default),`basic`,`strong`, or `all`. # `strong` and `basic` options may be buggy and are not recommended, see rust-lang/rust#114903. -#stack-protector = "none" +#rust.stack-protector = "none" # Prints each test name as it is executed, to help debug issues in the test harness itself. -#verbose-tests = if is_verbose { true } else { false } +#rust.verbose-tests = if is_verbose { true } else { false } # Flag indicating whether tests are compiled with optimizations (the -O flag). -#optimize-tests = true +#rust.optimize-tests = true # Flag indicating whether codegen tests will be run or not. If you get an error # saying that the FileCheck executable is missing, you may want to disable this. # Also see the target's llvm-filecheck option. -#codegen-tests = true +#rust.codegen-tests = true # Flag indicating whether git info will be retrieved from .git automatically. # Having the git information can cause a lot of rebuilds during development. -#omit-git-hash = if rust.channel == "dev" { true } else { false } +#rust.omit-git-hash = if rust.channel == "dev" { true } else { false } # Whether to create a source tarball by default when running `x dist`. # # You can still build a source tarball when this is disabled by explicitly passing `x dist rustc-src`. -#dist-src = true +#rust.dist-src = true # After building or testing an optional component (e.g. the nomicon or reference), append the # result (broken, compiling, testing) into this JSON file. -#save-toolstates = (path) +#rust.save-toolstates = (path) # This is an array of the codegen backends that will be compiled for the rustc # that's being compiled. The default is to only build the LLVM codegen backend, # and currently the only standard options supported are `"llvm"`, `"cranelift"` # and `"gcc"`. The first backend in this list will be used as default by rustc # when no explicit backend is specified. -#codegen-backends = ["llvm"] +#rust.codegen-backends = ["llvm"] # Indicates whether LLD will be compiled and made available in the sysroot for rustc to execute, and # whether to set it as rustc's default linker on `x86_64-unknown-linux-gnu`. This will also only be # when *not* building an external LLVM (so only when using `download-ci-llvm` or building LLVM from # the in-tree source): setting `llvm-config` in the `[target.x86_64-unknown-linux-gnu]` section will # make this default to false. -#lld = false in all cases, except on `x86_64-unknown-linux-gnu` as described above, where it is true +#rust.lld = false in all cases, except on `x86_64-unknown-linux-gnu` as described above, where it is true # Indicates whether LLD will be used to link Rust crates during bootstrap on # supported platforms. @@ -742,56 +745,56 @@ # On MSVC, LLD will not be used if we're cross linking. # # Explicitly setting the linker for a target will override this option when targeting MSVC. -#use-lld = false +#rust.use-lld = false # Indicates whether some LLVM tools, like llvm-objdump, will be made available in the # sysroot. -#llvm-tools = true +#rust.llvm-tools = true # Indicates whether the `self-contained` llvm-bitcode-linker, will be made available # in the sysroot. It is required for running nvptx tests. -#llvm-bitcode-linker = false +#rust.llvm-bitcode-linker = false # Whether to deny warnings in crates -#deny-warnings = true +#rust.deny-warnings = true # Print backtrace on internal compiler errors during bootstrap -#backtrace-on-ice = false +#rust.backtrace-on-ice = false # Whether to verify generated LLVM IR -#verify-llvm-ir = false +#rust.verify-llvm-ir = false # Compile the compiler with a non-default ThinLTO import limit. This import # limit controls the maximum size of functions imported by ThinLTO. Decreasing # will make code compile faster at the expense of lower runtime performance. -#thin-lto-import-instr-limit = if incremental { 10 } else { LLVM default (currently 100) } +#rust.thin-lto-import-instr-limit = if incremental { 10 } else { LLVM default (currently 100) } # Map debuginfo paths to `/rust/$sha/...`. # Useful for reproducible builds. Generally only set for releases -#remap-debuginfo = false +#rust.remap-debuginfo = false # Link the compiler and LLVM against `jemalloc` instead of the default libc allocator. # This option is only tested on Linux and OSX. It can also be configured per-target in the # [target.] section. -#jemalloc = false +#rust.jemalloc = false # Run tests in various test suites with the "nll compare mode" in addition to # running the tests in normal mode. Largely only used on CI and during local # development of NLL -#test-compare-mode = false +#rust.test-compare-mode = false # Global default for llvm-libunwind for all targets. See the target-specific # documentation for llvm-libunwind below. Note that the target-specific # option will override this if set. -#llvm-libunwind = 'no' +#rust.llvm-libunwind = 'no' # Enable Windows Control Flow Guard checks in the standard library. # This only applies from stage 1 onwards, and only for Windows targets. -#control-flow-guard = false +#rust.control-flow-guard = false # Enable Windows EHCont Guard checks in the standard library. # This only applies from stage 1 onwards, and only for Windows targets. -#ehcont-guard = false +#rust.ehcont-guard = false # Enable symbol-mangling-version v0. This can be helpful when profiling rustc, # as generics will be preserved in symbols (rather than erased into opaque T). @@ -799,16 +802,16 @@ # compiler and its tools and the legacy scheme will be used when compiling the # standard library. # If an explicit setting is given, it will be used for all parts of the codebase. -#new-symbol-mangling = true|false (see comment) +#rust.new-symbol-mangling = true|false (see comment) # Select LTO mode that will be used for compiling rustc. By default, thin local LTO # (LTO within a single crate) is used (like for any Rust crate). You can also select # "thin" or "fat" to apply Thin/Fat LTO to the `rustc_driver` dylib, or "off" to disable # LTO entirely. -#lto = "thin-local" +#rust.lto = "thin-local" # Build compiler with the optimization enabled and -Zvalidate-mir, currently only for `std` -#validate-mir-opts = 3 +#rust.validate-mir-opts = 3 # Configure `std` features used during bootstrap. # @@ -822,7 +825,57 @@ # # Since libstd also builds libcore and liballoc as dependencies and all their features are mirrored # as libstd features, this option can also be used to configure features such as optimize_for_size. -#std-features = ["panic_unwind"] +#rust.std-features = ["panic_unwind"] + +# ============================================================================= +# Distribution options +# +# These options are related to distribution, mostly for the Rust project itself. +# You probably won't need to concern yourself with any of these options +# ============================================================================= + +# This is the folder of artifacts that the build system will sign. All files in +# this directory will be signed with the default gpg key using the system `gpg` +# binary. The `asc` and `sha256` files will all be output into the standard dist +# output folder (currently `build/dist`) +# +# This folder should be populated ahead of time before the build system is +# invoked. +#dist.sign-folder = (path) + +# The remote address that all artifacts will eventually be uploaded to. The +# build system generates manifests which will point to these urls, and for the +# manifests to be correct they'll have to have the right URLs encoded. +# +# Note that this address should not contain a trailing slash as file names will +# be appended to it. +#dist.upload-addr = (URL) + +# Whether to build a plain source tarball to upload +# We disable that on Windows not to override the one already uploaded on S3 +# as the one built on Windows will contain backslashes in paths causing problems +# on linux +#dist.src-tarball = true + +# List of compression formats to use when generating dist tarballs. The list of +# formats is provided to rust-installer, which must support all of them. +# +# This list must be non-empty. +#dist.compression-formats = ["gz", "xz"] + +# How much time should be spent compressing the tarballs. The better the +# compression profile, the longer compression will take. +# +# Available options: fast, balanced, best +#dist.compression-profile = "fast" + +# Copy the linker, DLLs, and various libraries from MinGW into the Rust toolchain. +# Only applies when the host or target is pc-windows-gnu. +#dist.include-mingw-linker = true + +# Whether to vendor dependencies for the dist tarball. +#dist.vendor = if "is a tarball source" || "is a git repository" { true } else { false } + # ============================================================================= # Options for specific targets @@ -973,53 +1026,3 @@ # Link the compiler and LLVM against `jemalloc` instead of the default libc allocator. # This overrides the global `rust.jemalloc` option. See that option for more info. #jemalloc = rust.jemalloc (bool) - -# ============================================================================= -# Distribution options -# -# These options are related to distribution, mostly for the Rust project itself. -# You probably won't need to concern yourself with any of these options -# ============================================================================= -[dist] - -# This is the folder of artifacts that the build system will sign. All files in -# this directory will be signed with the default gpg key using the system `gpg` -# binary. The `asc` and `sha256` files will all be output into the standard dist -# output folder (currently `build/dist`) -# -# This folder should be populated ahead of time before the build system is -# invoked. -#sign-folder = (path) - -# The remote address that all artifacts will eventually be uploaded to. The -# build system generates manifests which will point to these urls, and for the -# manifests to be correct they'll have to have the right URLs encoded. -# -# Note that this address should not contain a trailing slash as file names will -# be appended to it. -#upload-addr = (URL) - -# Whether to build a plain source tarball to upload -# We disable that on Windows not to override the one already uploaded on S3 -# as the one built on Windows will contain backslashes in paths causing problems -# on linux -#src-tarball = true - -# List of compression formats to use when generating dist tarballs. The list of -# formats is provided to rust-installer, which must support all of them. -# -# This list must be non-empty. -#compression-formats = ["gz", "xz"] - -# How much time should be spent compressing the tarballs. The better the -# compression profile, the longer compression will take. -# -# Available options: fast, balanced, best -#compression-profile = "fast" - -# Copy the linker, DLLs, and various libraries from MinGW into the Rust toolchain. -# Only applies when the host or target is pc-windows-gnu. -#include-mingw-linker = true - -# Whether to vendor dependencies for the dist tarball. -#vendor = if "is a tarball source" || "is a git repository" { true } else { false } diff --git a/compiler/rustc_abi/src/canon_abi.rs b/compiler/rustc_abi/src/canon_abi.rs index 7c020be67617..13f9a04b286f 100644 --- a/compiler/rustc_abi/src/canon_abi.rs +++ b/compiler/rustc_abi/src/canon_abi.rs @@ -63,8 +63,8 @@ impl fmt::Display for CanonAbi { CanonAbi::Custom => ExternAbi::Custom, CanonAbi::Arm(arm_call) => match arm_call { ArmCall::Aapcs => ExternAbi::Aapcs { unwind: false }, - ArmCall::CCmseNonSecureCall => ExternAbi::CCmseNonSecureCall, - ArmCall::CCmseNonSecureEntry => ExternAbi::CCmseNonSecureEntry, + ArmCall::CCmseNonSecureCall => ExternAbi::CmseNonSecureCall, + ArmCall::CCmseNonSecureEntry => ExternAbi::CmseNonSecureEntry, }, CanonAbi::GpuKernel => ExternAbi::GpuKernel, CanonAbi::Interrupt(interrupt_kind) => match interrupt_kind { diff --git a/compiler/rustc_abi/src/extern_abi.rs b/compiler/rustc_abi/src/extern_abi.rs index 7457ae1f0334..29a3678abf3f 100644 --- a/compiler/rustc_abi/src/extern_abi.rs +++ b/compiler/rustc_abi/src/extern_abi.rs @@ -36,6 +36,10 @@ pub enum ExternAbi { /// Stronger than just `#[cold]` because `fn` pointers might be incompatible. RustCold, + /// An always-invalid ABI that's used to test "this ABI is not supported by this platform" + /// in a platform-agnostic way. + RustInvalid, + /// Unstable impl detail that directly uses Rust types to describe the ABI to LLVM. /// Even normally-compatible Rust types can become ABI-incompatible with this ABI! Unadjusted, @@ -55,9 +59,9 @@ pub enum ExternAbi { unwind: bool, }, /// extremely constrained barely-C ABI for TrustZone - CCmseNonSecureCall, + CmseNonSecureCall, /// extremely constrained barely-C ABI for TrustZone - CCmseNonSecureEntry, + CmseNonSecureEntry, /* gpu */ /// An entry-point function called by the GPU's host @@ -136,8 +140,6 @@ macro_rules! abi_impls { abi_impls! { ExternAbi = { C { unwind: false } =><= "C", - CCmseNonSecureCall =><= "C-cmse-nonsecure-call", - CCmseNonSecureEntry =><= "C-cmse-nonsecure-entry", C { unwind: true } =><= "C-unwind", Rust =><= "Rust", Aapcs { unwind: false } =><= "aapcs", @@ -146,6 +148,8 @@ abi_impls! { AvrNonBlockingInterrupt =><= "avr-non-blocking-interrupt", Cdecl { unwind: false } =><= "cdecl", Cdecl { unwind: true } =><= "cdecl-unwind", + CmseNonSecureCall =><= "cmse-nonsecure-call", + CmseNonSecureEntry =><= "cmse-nonsecure-entry", Custom =><= "custom", EfiApi =><= "efiapi", Fastcall { unwind: false } =><= "fastcall", @@ -157,6 +161,7 @@ abi_impls! { RiscvInterruptS =><= "riscv-interrupt-s", RustCall =><= "rust-call", RustCold =><= "rust-cold", + RustInvalid =><= "rust-invalid", Stdcall { unwind: false } =><= "stdcall", Stdcall { unwind: true } =><= "stdcall-unwind", System { unwind: false } =><= "system", diff --git a/compiler/rustc_abi/src/layout.rs b/compiler/rustc_abi/src/layout.rs index 58a7fcae9f64..80b44e432eeb 100644 --- a/compiler/rustc_abi/src/layout.rs +++ b/compiler/rustc_abi/src/layout.rs @@ -432,7 +432,7 @@ impl LayoutCalculator { align = align.min(AbiAlign::new(pack)); } // The unadjusted ABI alignment does not include repr(align), but does include repr(pack). - // See documentation on `LayoutS::unadjusted_abi_align`. + // See documentation on `LayoutData::unadjusted_abi_align`. let unadjusted_abi_align = align.abi; if let Some(repr_align) = repr.align { align = align.max(AbiAlign::new(repr_align)); @@ -602,10 +602,10 @@ impl LayoutCalculator { dont_niche_optimize_enum: bool, ) -> LayoutCalculatorResult { // Until we've decided whether to use the tagged or - // niche filling LayoutS, we don't want to intern the + // niche filling LayoutData, we don't want to intern the // variant layouts, so we can't store them in the - // overall LayoutS. Store the overall LayoutS - // and the variant LayoutSs here until then. + // overall LayoutData. Store the overall LayoutData + // and the variant LayoutDatas here until then. struct TmpLayout { layout: LayoutData, variants: IndexVec>, @@ -1214,7 +1214,7 @@ impl LayoutCalculator { match kind { StructKind::AlwaysSized | StructKind::MaybeUnsized => { - // Currently `LayoutS` only exposes a single niche so sorting is usually + // Currently `LayoutData` only exposes a single niche so sorting is usually // sufficient to get one niche into the preferred position. If it ever // supported multiple niches then a more advanced pick-and-pack approach could // provide better results. But even for the single-niche cache it's not @@ -1333,7 +1333,7 @@ impl LayoutCalculator { } // The unadjusted ABI alignment does not include repr(align), but does include repr(pack). - // See documentation on `LayoutS::unadjusted_abi_align`. + // See documentation on `LayoutData::unadjusted_abi_align`. let unadjusted_abi_align = align.abi; if let Some(repr_align) = repr.align { align = align.max(AbiAlign::new(repr_align)); diff --git a/compiler/rustc_abi/src/layout/ty.rs b/compiler/rustc_abi/src/layout/ty.rs index bb880a58e527..18f0750aaa1f 100644 --- a/compiler/rustc_abi/src/layout/ty.rs +++ b/compiler/rustc_abi/src/layout/ty.rs @@ -71,7 +71,7 @@ pub struct Layout<'a>(pub Interned<'a, LayoutData>); impl<'a> fmt::Debug for Layout<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // See comment on `::fmt` above. + // See comment on `::fmt` above. self.0.0.fmt(f) } } diff --git a/compiler/rustc_abi/src/lib.rs b/compiler/rustc_abi/src/lib.rs index 4268e68b2e42..6d729b6919a7 100644 --- a/compiler/rustc_abi/src/lib.rs +++ b/compiler/rustc_abi/src/lib.rs @@ -1785,7 +1785,7 @@ where { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // This is how `Layout` used to print before it become - // `Interned`. We print it like this to avoid having to update + // `Interned`. We print it like this to avoid having to update // expected output in a lot of tests. let LayoutData { size, diff --git a/compiler/rustc_ast/Cargo.toml b/compiler/rustc_ast/Cargo.toml index b2d3b90fc449..5de2e69072fa 100644 --- a/compiler/rustc_ast/Cargo.toml +++ b/compiler/rustc_ast/Cargo.toml @@ -7,7 +7,7 @@ edition = "2024" # tidy-alphabetical-start bitflags = "2.4.1" memchr = "2.7.4" -rustc-literal-escaper = "0.0.2" +rustc-literal-escaper = "0.0.4" rustc_ast_ir = { path = "../rustc_ast_ir" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_index = { path = "../rustc_index" } diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index 11afd359e5ad..b2d8881e3f6f 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -323,7 +323,7 @@ impl ParenthesizedArgs { pub use crate::node_id::{CRATE_NODE_ID, DUMMY_NODE_ID, NodeId}; -/// Modifiers on a trait bound like `~const`, `?` and `!`. +/// Modifiers on a trait bound like `[const]`, `?` and `!`. #[derive(Copy, Clone, PartialEq, Eq, Encodable, Decodable, Debug)] pub struct TraitBoundModifiers { pub constness: BoundConstness, @@ -904,6 +904,10 @@ pub enum BorrowKind { /// The resulting type is either `*const T` or `*mut T` /// where `T = typeof($expr)`. Raw, + /// A pinned borrow, `&pin const $expr` or `&pin mut $expr`. + /// The resulting type is either `Pin<&'a T>` or `Pin<&'a mut T>` + /// where `T = typeof($expr)` and `'a` is some lifetime. + Pin, } #[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)] @@ -3111,7 +3115,7 @@ pub enum BoundConstness { Never, /// `Type: const Trait` Always(Span), - /// `Type: ~const Trait` + /// `Type: [const] Trait` Maybe(Span), } @@ -3120,7 +3124,7 @@ impl BoundConstness { match self { Self::Never => "", Self::Always(_) => "const", - Self::Maybe(_) => "~const", + Self::Maybe(_) => "[const]", } } } @@ -4060,9 +4064,9 @@ mod size_asserts { static_assert_size!(MetaItemLit, 40); static_assert_size!(Param, 40); static_assert_size!(Pat, 72); + static_assert_size!(PatKind, 48); static_assert_size!(Path, 24); static_assert_size!(PathSegment, 24); - static_assert_size!(PatKind, 48); static_assert_size!(Stmt, 32); static_assert_size!(StmtKind, 16); static_assert_size!(Ty, 64); diff --git a/compiler/rustc_ast/src/ast_traits.rs b/compiler/rustc_ast/src/ast_traits.rs index 797ab297319b..9d91f41d6c79 100644 --- a/compiler/rustc_ast/src/ast_traits.rs +++ b/compiler/rustc_ast/src/ast_traits.rs @@ -321,6 +321,13 @@ impl AstNodeWrapper { } } +// FIXME: remove after `stmt_expr_attributes` is stabilized. +impl From, Tag>> for AstNodeWrapper { + fn from(value: AstNodeWrapper, Tag>) -> Self { + AstNodeWrapper { wrapped: *value.wrapped, tag: value.tag } + } +} + impl HasNodeId for AstNodeWrapper { fn node_id(&self) -> NodeId { self.wrapped.node_id() diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 621e3042b62e..44865c493b3b 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -206,12 +206,24 @@ impl AttributeExt for Attribute { } } - fn style(&self) -> AttrStyle { - self.style + fn doc_resolution_scope(&self) -> Option { + match &self.kind { + AttrKind::DocComment(..) => Some(self.style), + AttrKind::Normal(normal) + if normal.item.path == sym::doc && normal.item.value_str().is_some() => + { + Some(self.style) + } + _ => None, + } } } impl Attribute { + pub fn style(&self) -> AttrStyle { + self.style + } + pub fn may_have_doc_links(&self) -> bool { self.doc_str().is_some_and(|s| comments::may_have_doc_links(s.as_str())) } @@ -806,7 +818,14 @@ pub trait AttributeExt: Debug { /// * `#[doc(...)]` returns `None`. fn doc_str_and_comment_kind(&self) -> Option<(Symbol, CommentKind)>; - fn style(&self) -> AttrStyle; + /// Returns outer or inner if this is a doc attribute or a sugared doc + /// comment, otherwise None. + /// + /// This is used in the case of doc comments on modules, to decide whether + /// to resolve intra-doc links against the symbols in scope within the + /// commented module (for inner doc) vs within its parent module (for outer + /// doc). + fn doc_resolution_scope(&self) -> Option; } // FIXME(fn_delegation): use function delegation instead of manually forwarding @@ -881,8 +900,4 @@ impl Attribute { pub fn doc_str_and_comment_kind(&self) -> Option<(Symbol, CommentKind)> { AttributeExt::doc_str_and_comment_kind(self) } - - pub fn style(&self) -> AttrStyle { - AttributeExt::style(self) - } } diff --git a/compiler/rustc_ast/src/format.rs b/compiler/rustc_ast/src/format.rs index b611ddea1d9f..28d260419c51 100644 --- a/compiler/rustc_ast/src/format.rs +++ b/compiler/rustc_ast/src/format.rs @@ -50,6 +50,14 @@ pub struct FormatArgs { /// /// Generally only useful for lints that care about the raw bytes the user wrote. pub uncooked_fmt_str: (LitKind, Symbol), + /// Was the format literal written in the source? + /// - `format!("boo")` => true, + /// - `format!(concat!("b", "o", "o"))` => false, + /// - `format!(include_str!("boo.txt"))` => false, + /// + /// If it wasn't written in the source then we have to be careful with spans pointing into it + /// and suggestions about rewriting it. + pub is_source_literal: bool, } /// A piece of a format template string. diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs index 07fbe8045fc2..3eae19f4daa1 100644 --- a/compiler/rustc_ast/src/mut_visit.rs +++ b/compiler/rustc_ast/src/mut_visit.rs @@ -13,7 +13,7 @@ use std::panic; use rustc_data_structures::flat_map_in_place::FlatMapInPlace; use rustc_span::source_map::Spanned; use rustc_span::{Ident, Span}; -use smallvec::{Array, SmallVec, smallvec}; +use smallvec::{SmallVec, smallvec}; use thin_vec::ThinVec; use crate::ast::*; @@ -21,17 +21,6 @@ use crate::ptr::P; use crate::tokenstream::*; use crate::visit::{AssocCtxt, BoundKind, FnCtxt, VisitorResult, try_visit, visit_opt, walk_list}; -pub trait ExpectOne { - fn expect_one(self, err: &'static str) -> A::Item; -} - -impl ExpectOne for SmallVec { - fn expect_one(self, err: &'static str) -> A::Item { - assert!(self.len() == 1, "{}", err); - self.into_iter().next().unwrap() - } -} - mod sealed { use rustc_ast_ir::visit::VisitorResult; @@ -47,323 +36,6 @@ mod sealed { use sealed::MutVisitorResult; -pub trait MutVisitor: Sized + MutVisitorResult { - // Methods in this trait have one of three forms: - // - // fn visit_t(&mut self, t: &mut T); // common - // fn flat_map_t(&mut self, t: T) -> SmallVec<[T; 1]>; // rare - // fn filter_map_t(&mut self, t: T) -> Option; // rarest - // - // When writing these methods, it is better to use destructuring like this: - // - // fn visit_abc(&mut self, ABC { a, b, c: _ }: &mut ABC) { - // visit_a(a); - // visit_b(b); - // } - // - // than to use field access like this: - // - // fn visit_abc(&mut self, abc: &mut ABC) { - // visit_a(&mut abc.a); - // visit_b(&mut abc.b); - // // ignore abc.c - // } - // - // As well as being more concise, the former is explicit about which fields - // are skipped. Furthermore, if a new field is added, the destructuring - // version will cause a compile error, which is good. In comparison, the - // field access version will continue working and it would be easy to - // forget to add handling for it. - - fn visit_crate(&mut self, c: &mut Crate) { - walk_crate(self, c) - } - - fn visit_meta_list_item(&mut self, list_item: &mut MetaItemInner) { - walk_meta_list_item(self, list_item); - } - - fn visit_meta_item(&mut self, meta_item: &mut MetaItem) { - walk_meta_item(self, meta_item); - } - - fn visit_use_tree(&mut self, use_tree: &mut UseTree) { - walk_use_tree(self, use_tree); - } - - fn visit_foreign_item(&mut self, ni: &mut ForeignItem) { - walk_item(self, ni); - } - - fn flat_map_foreign_item(&mut self, ni: P) -> SmallVec<[P; 1]> { - walk_flat_map_foreign_item(self, ni) - } - - fn visit_item(&mut self, i: &mut Item) { - walk_item(self, i); - } - - fn flat_map_item(&mut self, i: P) -> SmallVec<[P; 1]> { - walk_flat_map_item(self, i) - } - - fn visit_fn_header(&mut self, header: &mut FnHeader) { - walk_fn_header(self, header); - } - - fn visit_field_def(&mut self, fd: &mut FieldDef) { - walk_field_def(self, fd); - } - - fn flat_map_field_def(&mut self, fd: FieldDef) -> SmallVec<[FieldDef; 1]> { - walk_flat_map_field_def(self, fd) - } - - fn visit_assoc_item(&mut self, i: &mut AssocItem, ctxt: AssocCtxt) { - walk_assoc_item(self, i, ctxt) - } - - fn flat_map_assoc_item( - &mut self, - i: P, - ctxt: AssocCtxt, - ) -> SmallVec<[P; 1]> { - walk_flat_map_assoc_item(self, i, ctxt) - } - - fn visit_contract(&mut self, c: &mut FnContract) { - walk_contract(self, c); - } - - fn visit_fn_decl(&mut self, d: &mut FnDecl) { - walk_fn_decl(self, d); - } - - /// `Span` and `NodeId` are mutated at the caller site. - fn visit_fn(&mut self, fk: FnKind<'_>, _: Span, _: NodeId) { - walk_fn(self, fk) - } - - fn visit_coroutine_kind(&mut self, a: &mut CoroutineKind) { - walk_coroutine_kind(self, a); - } - - fn visit_closure_binder(&mut self, b: &mut ClosureBinder) { - walk_closure_binder(self, b); - } - - fn visit_block(&mut self, b: &mut Block) { - walk_block(self, b); - } - - fn flat_map_stmt(&mut self, s: Stmt) -> SmallVec<[Stmt; 1]> { - walk_flat_map_stmt(self, s) - } - - fn visit_arm(&mut self, arm: &mut Arm) { - walk_arm(self, arm); - } - - fn flat_map_arm(&mut self, arm: Arm) -> SmallVec<[Arm; 1]> { - walk_flat_map_arm(self, arm) - } - - fn visit_pat(&mut self, p: &mut Pat) { - walk_pat(self, p); - } - - fn visit_anon_const(&mut self, c: &mut AnonConst) { - walk_anon_const(self, c); - } - - fn visit_expr(&mut self, e: &mut Expr) { - walk_expr(self, e); - } - - /// This method is a hack to workaround unstable of `stmt_expr_attributes`. - /// It can be removed once that feature is stabilized. - fn visit_method_receiver_expr(&mut self, ex: &mut P) { - self.visit_expr(ex) - } - - fn filter_map_expr(&mut self, e: P) -> Option> { - walk_filter_map_expr(self, e) - } - - fn visit_generic_arg(&mut self, arg: &mut GenericArg) { - walk_generic_arg(self, arg); - } - - fn visit_ty(&mut self, t: &mut Ty) { - walk_ty(self, t); - } - - fn visit_ty_pat(&mut self, t: &mut TyPat) { - walk_ty_pat(self, t); - } - - fn visit_lifetime(&mut self, l: &mut Lifetime) { - walk_lifetime(self, l); - } - - fn visit_assoc_item_constraint(&mut self, c: &mut AssocItemConstraint) { - walk_assoc_item_constraint(self, c); - } - - fn visit_foreign_mod(&mut self, nm: &mut ForeignMod) { - walk_foreign_mod(self, nm); - } - - fn visit_variant(&mut self, v: &mut Variant) { - walk_variant(self, v); - } - - fn flat_map_variant(&mut self, v: Variant) -> SmallVec<[Variant; 1]> { - walk_flat_map_variant(self, v) - } - - fn visit_ident(&mut self, i: &mut Ident) { - self.visit_span(&mut i.span); - } - - fn visit_path(&mut self, p: &mut Path) { - walk_path(self, p); - } - - fn visit_path_segment(&mut self, p: &mut PathSegment) { - walk_path_segment(self, p) - } - - fn visit_qself(&mut self, qs: &mut Option>) { - walk_qself(self, qs); - } - - fn visit_generic_args(&mut self, p: &mut GenericArgs) { - walk_generic_args(self, p); - } - - fn visit_local(&mut self, l: &mut Local) { - walk_local(self, l); - } - - fn visit_mac_call(&mut self, mac: &mut MacCall) { - walk_mac(self, mac); - } - - fn visit_macro_def(&mut self, def: &mut MacroDef) { - walk_macro_def(self, def); - } - - fn visit_label(&mut self, label: &mut Label) { - walk_label(self, label); - } - - fn visit_attribute(&mut self, at: &mut Attribute) { - walk_attribute(self, at); - } - - fn visit_param(&mut self, param: &mut Param) { - walk_param(self, param); - } - - fn flat_map_param(&mut self, param: Param) -> SmallVec<[Param; 1]> { - walk_flat_map_param(self, param) - } - - fn visit_generics(&mut self, generics: &mut Generics) { - walk_generics(self, generics); - } - - fn visit_trait_ref(&mut self, tr: &mut TraitRef) { - walk_trait_ref(self, tr); - } - - fn visit_poly_trait_ref(&mut self, p: &mut PolyTraitRef) { - walk_poly_trait_ref(self, p); - } - - fn visit_variant_data(&mut self, vdata: &mut VariantData) { - walk_variant_data(self, vdata); - } - - fn visit_generic_param(&mut self, param: &mut GenericParam) { - walk_generic_param(self, param) - } - - fn flat_map_generic_param(&mut self, param: GenericParam) -> SmallVec<[GenericParam; 1]> { - walk_flat_map_generic_param(self, param) - } - - fn visit_param_bound(&mut self, tpb: &mut GenericBound, _ctxt: BoundKind) { - walk_param_bound(self, tpb); - } - - fn visit_precise_capturing_arg(&mut self, arg: &mut PreciseCapturingArg) { - walk_precise_capturing_arg(self, arg); - } - - fn visit_expr_field(&mut self, f: &mut ExprField) { - walk_expr_field(self, f); - } - - fn flat_map_expr_field(&mut self, f: ExprField) -> SmallVec<[ExprField; 1]> { - walk_flat_map_expr_field(self, f) - } - - fn flat_map_where_predicate( - &mut self, - where_predicate: WherePredicate, - ) -> SmallVec<[WherePredicate; 1]> { - walk_flat_map_where_predicate(self, where_predicate) - } - - fn visit_where_predicate_kind(&mut self, kind: &mut WherePredicateKind) { - walk_where_predicate_kind(self, kind) - } - - fn visit_vis(&mut self, vis: &mut Visibility) { - walk_vis(self, vis); - } - - fn visit_id(&mut self, _id: &mut NodeId) { - // Do nothing. - } - - // Span visiting is no longer used, but we keep it for now, - // in case it's needed for something like #127241. - fn visit_span(&mut self, _sp: &mut Span) { - // Do nothing. - } - - fn visit_pat_field(&mut self, fp: &mut PatField) { - walk_pat_field(self, fp) - } - - fn flat_map_pat_field(&mut self, fp: PatField) -> SmallVec<[PatField; 1]> { - walk_flat_map_pat_field(self, fp) - } - - fn visit_inline_asm(&mut self, asm: &mut InlineAsm) { - walk_inline_asm(self, asm) - } - - fn visit_inline_asm_sym(&mut self, sym: &mut InlineAsmSym) { - walk_inline_asm_sym(self, sym) - } - - fn visit_format_args(&mut self, fmt: &mut FormatArgs) { - walk_format_args(self, fmt) - } - - fn visit_capture_by(&mut self, capture_by: &mut CaptureBy) { - walk_capture_by(self, capture_by) - } - - fn visit_fn_ret_ty(&mut self, fn_ret_ty: &mut FnRetTy) { - walk_fn_ret_ty(self, fn_ret_ty) - } -} - super::common_visitor_and_walkers!((mut) MutVisitor); macro_rules! generate_flat_map_visitor_fns { @@ -398,22 +70,6 @@ generate_flat_map_visitor_fns! { visit_arms, Arm, flat_map_arm; } -#[inline] -fn visit_thin_vec(elems: &mut ThinVec, mut visit_elem: F) -where - F: FnMut(&mut T), -{ - for elem in elems { - visit_elem(elem); - } -} - -fn visit_attrs(vis: &mut T, attrs: &mut AttrVec) { - for attr in attrs.iter_mut() { - vis.visit_attribute(attr); - } -} - pub fn walk_flat_map_pat_field( vis: &mut T, mut fp: PatField, @@ -431,47 +87,26 @@ fn visit_nested_use_tree( vis.visit_use_tree(nested_tree); } -pub fn walk_flat_map_arm(vis: &mut T, mut arm: Arm) -> SmallVec<[Arm; 1]> { - vis.visit_arm(&mut arm); - smallvec![arm] +macro_rules! generate_walk_flat_map_fns { + ($($fn_name:ident($Ty:ty$(,$extra_name:ident: $ExtraTy:ty)*) => $visit_fn_name:ident;)+) => {$( + pub fn $fn_name(vis: &mut V, mut value: $Ty$(,$extra_name: $ExtraTy)*) -> SmallVec<[$Ty; 1]> { + vis.$visit_fn_name(&mut value$(,$extra_name)*); + smallvec![value] + } + )+}; } -pub fn walk_flat_map_variant( - vis: &mut T, - mut variant: Variant, -) -> SmallVec<[Variant; 1]> { - vis.visit_variant(&mut variant); - smallvec![variant] -} - -fn walk_meta_list_item(vis: &mut T, li: &mut MetaItemInner) { - match li { - MetaItemInner::MetaItem(mi) => vis.visit_meta_item(mi), - MetaItemInner::Lit(_lit) => {} - } -} - -fn walk_meta_item(vis: &mut T, mi: &mut MetaItem) { - let MetaItem { unsafety: _, path: _, kind, span } = mi; - match kind { - MetaItemKind::Word => {} - MetaItemKind::List(mis) => visit_thin_vec(mis, |mi| vis.visit_meta_list_item(mi)), - MetaItemKind::NameValue(_s) => {} - } - vis.visit_span(span); -} - -pub fn walk_flat_map_param(vis: &mut T, mut param: Param) -> SmallVec<[Param; 1]> { - vis.visit_param(&mut param); - smallvec![param] -} - -pub fn walk_flat_map_generic_param( - vis: &mut T, - mut param: GenericParam, -) -> SmallVec<[GenericParam; 1]> { - vis.visit_generic_param(&mut param); - smallvec![param] +generate_walk_flat_map_fns! { + walk_flat_map_arm(Arm) => visit_arm; + walk_flat_map_variant(Variant) => visit_variant; + walk_flat_map_param(Param) => visit_param; + walk_flat_map_generic_param(GenericParam) => visit_generic_param; + walk_flat_map_where_predicate(WherePredicate) => visit_where_predicate; + walk_flat_map_field_def(FieldDef) => visit_field_def; + walk_flat_map_expr_field(ExprField) => visit_expr_field; + walk_flat_map_item(P) => visit_item; + walk_flat_map_foreign_item(P) => visit_foreign_item; + walk_flat_map_assoc_item(P, ctxt: AssocCtxt) => visit_assoc_item; } fn walk_ty_alias_where_clauses(vis: &mut T, tawcs: &mut TyAliasWhereClauses) { @@ -482,63 +117,6 @@ fn walk_ty_alias_where_clauses(vis: &mut T, tawcs: &mut TyAliasWh vis.visit_span(span_after); } -pub fn walk_flat_map_where_predicate( - vis: &mut T, - mut pred: WherePredicate, -) -> SmallVec<[WherePredicate; 1]> { - walk_where_predicate(vis, &mut pred); - smallvec![pred] -} - -pub fn walk_flat_map_field_def( - vis: &mut T, - mut fd: FieldDef, -) -> SmallVec<[FieldDef; 1]> { - vis.visit_field_def(&mut fd); - smallvec![fd] -} - -pub fn walk_flat_map_expr_field( - vis: &mut T, - mut f: ExprField, -) -> SmallVec<[ExprField; 1]> { - vis.visit_expr_field(&mut f); - smallvec![f] -} - -pub fn walk_item_kind( - kind: &mut K, - span: Span, - id: NodeId, - visibility: &mut Visibility, - ctxt: K::Ctxt, - vis: &mut impl MutVisitor, -) { - kind.walk(span, id, visibility, ctxt, vis) -} - -pub fn walk_flat_map_item(vis: &mut impl MutVisitor, mut item: P) -> SmallVec<[P; 1]> { - vis.visit_item(&mut item); - smallvec![item] -} - -pub fn walk_flat_map_foreign_item( - vis: &mut impl MutVisitor, - mut item: P, -) -> SmallVec<[P; 1]> { - vis.visit_foreign_item(&mut item); - smallvec![item] -} - -pub fn walk_flat_map_assoc_item( - vis: &mut impl MutVisitor, - mut item: P, - ctxt: AssocCtxt, -) -> SmallVec<[P; 1]> { - vis.visit_assoc_item(&mut item, ctxt); - smallvec![item] -} - pub fn walk_filter_map_expr(vis: &mut T, mut e: P) -> Option> { vis.visit_expr(&mut e); Some(e) @@ -576,35 +154,11 @@ fn walk_flat_map_stmt_kind(vis: &mut T, kind: StmtKind) -> SmallV StmtKind::Empty => smallvec![StmtKind::Empty], StmtKind::MacCall(mut mac) => { let MacCallStmt { mac: mac_, style: _, attrs, tokens: _ } = mac.deref_mut(); - visit_attrs(vis, attrs); + for attr in attrs { + vis.visit_attribute(attr); + } vis.visit_mac_call(mac_); smallvec![StmtKind::MacCall(mac)] } } } - -fn walk_capture_by(vis: &mut T, capture_by: &mut CaptureBy) { - match capture_by { - CaptureBy::Ref => {} - CaptureBy::Value { move_kw } => { - vis.visit_span(move_kw); - } - CaptureBy::Use { use_kw } => { - vis.visit_span(use_kw); - } - } -} - -#[derive(Debug)] -pub enum FnKind<'a> { - /// E.g., `fn foo()`, `fn foo(&self)`, or `extern "Abi" fn foo()`. - Fn(FnCtxt, &'a mut Visibility, &'a mut Fn), - - /// E.g., `|x, y| body`. - Closure( - &'a mut ClosureBinder, - &'a mut Option, - &'a mut P, - &'a mut P, - ), -} diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index 9b4535dcfbce..fc816f2cb792 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -893,7 +893,7 @@ impl Token { || self.is_qpath_start() || matches!(self.is_metavar_seq(), Some(MetaVarKind::Path)) || self.is_path_segment_keyword() - || self.is_ident() && !self.is_reserved_ident() + || self.is_non_reserved_ident() } /// Returns `true` if the token is a given keyword, `kw`. @@ -937,6 +937,10 @@ impl Token { self.is_non_raw_ident_where(Ident::is_reserved) } + pub fn is_non_reserved_ident(&self) -> bool { + self.ident().is_some_and(|(id, raw)| raw == IdentIsRaw::Yes || !Ident::is_reserved(id)) + } + /// Returns `true` if the token is the identifier `true` or `false`. pub fn is_bool_lit(&self) -> bool { self.is_non_raw_ident_where(|id| id.name.is_bool_lit()) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 3c231be20dce..c60185cdde00 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -634,10 +634,8 @@ impl TokenStream { ( TokenTree::Token(token_left, Spacing::Alone), TokenTree::Token(token_right, _), - ) if ((token_left.is_ident() && !token_left.is_reserved_ident()) - || token_left.is_lit()) - && ((token_right.is_ident() && !token_right.is_reserved_ident()) - || token_right.is_lit()) => + ) if (token_left.is_non_reserved_ident() || token_left.is_lit()) + && (token_right.is_non_reserved_ident() || token_right.is_lit()) => { token_left.span } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index b8526cf9d952..ad9e5d1468b0 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -3,7 +3,7 @@ use std::{ascii, fmt, str}; use rustc_literal_escaper::{ - MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, + MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str, }; use rustc_span::{Span, Symbol, kw, sym}; use tracing::debug; @@ -87,11 +87,10 @@ impl LitKind { // Force-inlining here is aggressive but the closure is // called on every char in the string, so it can be hot in // programs with many long strings containing escapes. - unescape_unicode( + unescape_str( s, - Mode::Str, - &mut #[inline(always)] - |_, c| match c { + #[inline(always)] + |_, res| match res { Ok(c) => buf.push(c), Err(err) => { assert!(!err.is_fatal(), "failed to unescape string literal") @@ -111,8 +110,8 @@ impl LitKind { token::ByteStr => { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c { - Ok(c) => buf.push(byte_from_char(c)), + unescape_byte_str(s, |_, res| match res { + Ok(b) => buf.push(b), Err(err) => { assert!(!err.is_fatal(), "failed to unescape string literal") } @@ -128,7 +127,7 @@ impl LitKind { token::CStr => { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - unescape_mixed(s, Mode::CStr, &mut |_span, c| match c { + unescape_c_str(s, |_span, c| match c { Ok(MixedUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) } diff --git a/compiler/rustc_ast/src/visit.rs b/compiler/rustc_ast/src/visit.rs index bd2ab34bfc19..d0c2b2bf68b0 100644 --- a/compiler/rustc_ast/src/visit.rs +++ b/compiler/rustc_ast/src/visit.rs @@ -65,45 +65,6 @@ impl BoundKind { } } -#[derive(Copy, Clone, Debug)] -pub enum FnKind<'a> { - /// E.g., `fn foo()`, `fn foo(&self)`, or `extern "Abi" fn foo()`. - Fn(FnCtxt, &'a Visibility, &'a Fn), - - /// E.g., `|x, y| body`. - Closure(&'a ClosureBinder, &'a Option, &'a FnDecl, &'a Expr), -} - -impl<'a> FnKind<'a> { - pub fn header(&self) -> Option<&'a FnHeader> { - match *self { - FnKind::Fn(_, _, Fn { sig, .. }) => Some(&sig.header), - FnKind::Closure(..) => None, - } - } - - pub fn ident(&self) -> Option<&Ident> { - match self { - FnKind::Fn(_, _, Fn { ident, .. }) => Some(ident), - _ => None, - } - } - - pub fn decl(&self) -> &'a FnDecl { - match self { - FnKind::Fn(_, _, Fn { sig, .. }) => &sig.decl, - FnKind::Closure(_, _, decl, _) => decl, - } - } - - pub fn ctxt(&self) -> Option { - match self { - FnKind::Fn(ctxt, ..) => Some(*ctxt), - FnKind::Closure(..) => None, - } - } -} - #[derive(Copy, Clone, Debug)] pub enum LifetimeCtxt { /// Appears in a reference type. @@ -114,206 +75,405 @@ pub enum LifetimeCtxt { GenericArg, } -/// Each method of the `Visitor` trait is a hook to be potentially -/// overridden. Each method's default implementation recursively visits -/// the substructure of the input via the corresponding `walk` method; -/// e.g., the `visit_item` method by default calls `visit::walk_item`. -/// -/// If you want to ensure that your code handles every variant -/// explicitly, you need to override each method. (And you also need -/// to monitor future changes to `Visitor` in case a new method with a -/// new default implementation gets introduced.) -/// -/// Every `walk_*` method uses deconstruction to access fields of structs and -/// enums. This will result in a compile error if a field is added, which makes -/// it more likely the appropriate visit call will be added for it. -pub trait Visitor<'ast>: Sized { - /// The result type of the `visit_*` methods. Can be either `()`, - /// or `ControlFlow`. - type Result: VisitorResult = (); - - fn visit_ident(&mut self, _ident: &'ast Ident) -> Self::Result { - Self::Result::output() - } - fn visit_foreign_mod(&mut self, nm: &'ast ForeignMod) -> Self::Result { - walk_foreign_mod(self, nm) - } - fn visit_foreign_item(&mut self, i: &'ast ForeignItem) -> Self::Result { - walk_item(self, i) - } - fn visit_item(&mut self, i: &'ast Item) -> Self::Result { - walk_item(self, i) - } - fn visit_local(&mut self, l: &'ast Local) -> Self::Result { - walk_local(self, l) - } - fn visit_block(&mut self, b: &'ast Block) -> Self::Result { - walk_block(self, b) - } - fn visit_stmt(&mut self, s: &'ast Stmt) -> Self::Result { - walk_stmt(self, s) - } - fn visit_param(&mut self, param: &'ast Param) -> Self::Result { - walk_param(self, param) - } - fn visit_arm(&mut self, a: &'ast Arm) -> Self::Result { - walk_arm(self, a) - } - fn visit_pat(&mut self, p: &'ast Pat) -> Self::Result { - walk_pat(self, p) - } - fn visit_anon_const(&mut self, c: &'ast AnonConst) -> Self::Result { - walk_anon_const(self, c) - } - fn visit_expr(&mut self, ex: &'ast Expr) -> Self::Result { - walk_expr(self, ex) - } - /// This method is a hack to workaround unstable of `stmt_expr_attributes`. - /// It can be removed once that feature is stabilized. - fn visit_method_receiver_expr(&mut self, ex: &'ast Expr) -> Self::Result { - self.visit_expr(ex) - } - fn visit_ty(&mut self, t: &'ast Ty) -> Self::Result { - walk_ty(self, t) - } - fn visit_ty_pat(&mut self, t: &'ast TyPat) -> Self::Result { - walk_ty_pat(self, t) - } - fn visit_generic_param(&mut self, param: &'ast GenericParam) -> Self::Result { - walk_generic_param(self, param) - } - fn visit_generics(&mut self, g: &'ast Generics) -> Self::Result { - walk_generics(self, g) - } - fn visit_closure_binder(&mut self, b: &'ast ClosureBinder) -> Self::Result { - walk_closure_binder(self, b) - } - fn visit_contract(&mut self, c: &'ast FnContract) -> Self::Result { - walk_contract(self, c) - } - fn visit_where_predicate(&mut self, p: &'ast WherePredicate) -> Self::Result { - walk_where_predicate(self, p) - } - fn visit_where_predicate_kind(&mut self, k: &'ast WherePredicateKind) -> Self::Result { - walk_where_predicate_kind(self, k) - } - fn visit_fn(&mut self, fk: FnKind<'ast>, _: Span, _: NodeId) -> Self::Result { - walk_fn(self, fk) - } - fn visit_assoc_item(&mut self, i: &'ast AssocItem, ctxt: AssocCtxt) -> Self::Result { - walk_assoc_item(self, i, ctxt) - } - fn visit_trait_ref(&mut self, t: &'ast TraitRef) -> Self::Result { - walk_trait_ref(self, t) - } - fn visit_param_bound(&mut self, bounds: &'ast GenericBound, _ctxt: BoundKind) -> Self::Result { - walk_param_bound(self, bounds) - } - fn visit_precise_capturing_arg(&mut self, arg: &'ast PreciseCapturingArg) -> Self::Result { - walk_precise_capturing_arg(self, arg) - } - fn visit_poly_trait_ref(&mut self, t: &'ast PolyTraitRef) -> Self::Result { - walk_poly_trait_ref(self, t) - } - fn visit_variant_data(&mut self, s: &'ast VariantData) -> Self::Result { - walk_variant_data(self, s) - } - fn visit_field_def(&mut self, s: &'ast FieldDef) -> Self::Result { - walk_field_def(self, s) - } - fn visit_variant(&mut self, v: &'ast Variant) -> Self::Result { - walk_variant(self, v) - } - fn visit_variant_discr(&mut self, discr: &'ast AnonConst) -> Self::Result { - self.visit_anon_const(discr) - } - fn visit_label(&mut self, label: &'ast Label) -> Self::Result { - walk_label(self, label) - } - fn visit_lifetime(&mut self, lifetime: &'ast Lifetime, _: LifetimeCtxt) -> Self::Result { - walk_lifetime(self, lifetime) - } - fn visit_mac_call(&mut self, mac: &'ast MacCall) -> Self::Result { - walk_mac(self, mac) - } - fn visit_id(&mut self, _id: NodeId) -> Self::Result { - Self::Result::output() - } - fn visit_macro_def(&mut self, macro_def: &'ast MacroDef) -> Self::Result { - walk_macro_def(self, macro_def) - } - fn visit_path(&mut self, path: &'ast Path) -> Self::Result { - walk_path(self, path) - } - fn visit_use_tree(&mut self, use_tree: &'ast UseTree) -> Self::Result { - walk_use_tree(self, use_tree) - } - fn visit_nested_use_tree(&mut self, use_tree: &'ast UseTree, id: NodeId) -> Self::Result { - try_visit!(self.visit_id(id)); - self.visit_use_tree(use_tree) - } - fn visit_path_segment(&mut self, path_segment: &'ast PathSegment) -> Self::Result { - walk_path_segment(self, path_segment) - } - fn visit_generic_args(&mut self, generic_args: &'ast GenericArgs) -> Self::Result { - walk_generic_args(self, generic_args) - } - fn visit_generic_arg(&mut self, generic_arg: &'ast GenericArg) -> Self::Result { - walk_generic_arg(self, generic_arg) - } - fn visit_assoc_item_constraint( - &mut self, - constraint: &'ast AssocItemConstraint, - ) -> Self::Result { - walk_assoc_item_constraint(self, constraint) - } - fn visit_attribute(&mut self, attr: &'ast Attribute) -> Self::Result { - walk_attribute(self, attr) - } - fn visit_vis(&mut self, vis: &'ast Visibility) -> Self::Result { - walk_vis(self, vis) - } - fn visit_fn_ret_ty(&mut self, ret_ty: &'ast FnRetTy) -> Self::Result { - walk_fn_ret_ty(self, ret_ty) - } - fn visit_fn_header(&mut self, header: &'ast FnHeader) -> Self::Result { - walk_fn_header(self, header) - } - fn visit_expr_field(&mut self, f: &'ast ExprField) -> Self::Result { - walk_expr_field(self, f) - } - fn visit_pat_field(&mut self, fp: &'ast PatField) -> Self::Result { - walk_pat_field(self, fp) - } - fn visit_crate(&mut self, krate: &'ast Crate) -> Self::Result { - walk_crate(self, krate) - } - fn visit_inline_asm(&mut self, asm: &'ast InlineAsm) -> Self::Result { - walk_inline_asm(self, asm) - } - fn visit_format_args(&mut self, fmt: &'ast FormatArgs) -> Self::Result { - walk_format_args(self, fmt) - } - fn visit_inline_asm_sym(&mut self, sym: &'ast InlineAsmSym) -> Self::Result { - walk_inline_asm_sym(self, sym) - } - fn visit_capture_by(&mut self, _capture_by: &'ast CaptureBy) -> Self::Result { - Self::Result::output() - } - fn visit_coroutine_kind(&mut self, coroutine_kind: &'ast CoroutineKind) -> Self::Result { - walk_coroutine_kind(self, coroutine_kind) - } - fn visit_fn_decl(&mut self, fn_decl: &'ast FnDecl) -> Self::Result { - walk_fn_decl(self, fn_decl) - } - fn visit_qself(&mut self, qs: &'ast Option>) -> Self::Result { - walk_qself(self, qs) - } -} - #[macro_export] macro_rules! common_visitor_and_walkers { ($(($mut: ident))? $Visitor:ident$(<$lt:lifetime>)?) => { + $(${ignore($lt)} + #[derive(Copy, Clone)] + )? + #[derive(Debug)] + pub enum FnKind<'a> { + /// E.g., `fn foo()`, `fn foo(&self)`, or `extern "Abi" fn foo()`. + Fn(FnCtxt, &'a $($mut)? Visibility, &'a $($mut)? Fn), + + /// E.g., `|x, y| body`. + Closure(&'a $($mut)? ClosureBinder, &'a $($mut)? Option, &'a $($mut)? P, &'a $($mut)? P), + } + + impl<'a> FnKind<'a> { + pub fn header(&'a $($mut)? self) -> Option<&'a $($mut)? FnHeader> { + match *self { + FnKind::Fn(_, _, Fn { sig, .. }) => Some(&$($mut)? sig.header), + FnKind::Closure(..) => None, + } + } + + pub fn ident(&'a $($mut)? self) -> Option<&'a $($mut)? Ident> { + match self { + FnKind::Fn(_, _, Fn { ident, .. }) => Some(ident), + _ => None, + } + } + + pub fn decl(&'a $($mut)? self) -> &'a $($mut)? FnDecl { + match self { + FnKind::Fn(_, _, Fn { sig, .. }) => &$($mut)? sig.decl, + FnKind::Closure(_, _, decl, _) => decl, + } + } + + pub fn ctxt(&self) -> Option { + match self { + FnKind::Fn(ctxt, ..) => Some(*ctxt), + FnKind::Closure(..) => None, + } + } + } + + /// Each method of this trait is a hook to be potentially + /// overridden. Each method's default implementation recursively visits + /// the substructure of the input via the corresponding `walk` method; + #[doc = concat!(" e.g., the `visit_item` method by default calls `visit"$(, "_", stringify!($mut))?, "::walk_item`.")] + /// + /// If you want to ensure that your code handles every variant + /// explicitly, you need to override each method. (And you also need + /// to monitor future changes to this trait in case a new method with a + /// new default implementation gets introduced.) + /// + /// Every `walk_*` method uses deconstruction to access fields of structs and + /// enums. This will result in a compile error if a field is added, which makes + /// it more likely the appropriate visit call will be added for it. + pub trait $Visitor<$($lt)?> : Sized $(${ignore($mut)} + MutVisitorResult)? { + $( + ${ignore($lt)} + /// The result type of the `visit_*` methods. Can be either `()`, + /// or `ControlFlow`. + type Result: VisitorResult = (); + )? + + // Methods in this trait have one of three forms, with the last two forms + // only occuring on `MutVisitor`: + // + // fn visit_t(&mut self, t: &mut T); // common + // fn flat_map_t(&mut self, t: T) -> SmallVec<[T; 1]>; // rare + // fn filter_map_t(&mut self, t: T) -> Option; // rarest + // + // When writing these methods, it is better to use destructuring like this: + // + // fn visit_abc(&mut self, ABC { a, b, c: _ }: &mut ABC) { + // visit_a(a); + // visit_b(b); + // } + // + // than to use field access like this: + // + // fn visit_abc(&mut self, abc: &mut ABC) { + // visit_a(&mut abc.a); + // visit_b(&mut abc.b); + // // ignore abc.c + // } + // + // As well as being more concise, the former is explicit about which fields + // are skipped. Furthermore, if a new field is added, the destructuring + // version will cause a compile error, which is good. In comparison, the + // field access version will continue working and it would be easy to + // forget to add handling for it. + fn visit_ident(&mut self, Ident { name: _, span }: &$($lt)? $($mut)? Ident) -> Self::Result { + visit_span(self, span) + } + + fn visit_foreign_mod(&mut self, nm: &$($lt)? $($mut)? ForeignMod) -> Self::Result { + walk_foreign_mod(self, nm) + } + + fn visit_foreign_item(&mut self, i: &$($lt)? $($mut)? ForeignItem) -> Self::Result { + walk_item(self, i) + } + + fn visit_item(&mut self, i: &$($lt)? $($mut)? Item) -> Self::Result { + walk_item(self, i) + } + + fn visit_local(&mut self, l: &$($lt)? $($mut)? Local) -> Self::Result { + walk_local(self, l) + } + + fn visit_block(&mut self, b: &$($lt)? $($mut)? Block) -> Self::Result { + walk_block(self, b) + } + + fn visit_param(&mut self, param: &$($lt)? $($mut)? Param) -> Self::Result { + walk_param(self, param) + } + + fn visit_arm(&mut self, a: &$($lt)? $($mut)? Arm) -> Self::Result { + walk_arm(self, a) + } + + fn visit_pat(&mut self, p: &$($lt)? $($mut)? Pat) -> Self::Result { + walk_pat(self, p) + } + + fn visit_anon_const(&mut self, c: &$($lt)? $($mut)? AnonConst) -> Self::Result { + walk_anon_const(self, c) + } + + fn visit_expr(&mut self, ex: &$($lt)? $($mut)? Expr) -> Self::Result { + walk_expr(self, ex) + } + + /// This method is a hack to workaround unstable of `stmt_expr_attributes`. + /// It can be removed once that feature is stabilized. + fn visit_method_receiver_expr(&mut self, ex: &$($lt)? $($mut)? Expr) -> Self::Result { + self.visit_expr(ex) + } + + fn visit_ty(&mut self, t: &$($lt)? $($mut)? Ty) -> Self::Result { + walk_ty(self, t) + } + + fn visit_ty_pat(&mut self, t: &$($lt)? $($mut)? TyPat) -> Self::Result { + walk_ty_pat(self, t) + } + + fn visit_generic_param(&mut self, param: &$($lt)? $($mut)? GenericParam) -> Self::Result { + walk_generic_param(self, param) + } + + fn visit_generics(&mut self, g: &$($lt)? $($mut)? Generics) -> Self::Result { + walk_generics(self, g) + } + fn visit_closure_binder(&mut self, b: &$($lt)? $($mut)? ClosureBinder) -> Self::Result { + walk_closure_binder(self, b) + } + fn visit_contract(&mut self, c: &$($lt)? $($mut)? FnContract) -> Self::Result { + walk_contract(self, c) + } + + fn visit_where_predicate(&mut self, p: &$($lt)? $($mut)? WherePredicate) -> Self::Result { + walk_where_predicate(self, p) + } + + fn visit_where_predicate_kind(&mut self, k: &$($lt)? $($mut)? WherePredicateKind) -> Self::Result { + walk_where_predicate_kind(self, k) + } + + // for `MutVisitor`: `Span` and `NodeId` are mutated at the caller site. + fn visit_fn( + &mut self, + fk: FnKind<$($lt)? $(${ignore($mut)} '_)?>, + _: Span, + _: NodeId + ) -> Self::Result { + walk_fn(self, fk) + } + + fn visit_assoc_item(&mut self, i: &$($lt)? $($mut)? AssocItem, ctxt: AssocCtxt) -> Self::Result { + walk_assoc_item(self, i, ctxt) + } + + fn visit_trait_ref(&mut self, t: &$($lt)? $($mut)? TraitRef) -> Self::Result { + walk_trait_ref(self, t) + } + + fn visit_param_bound(&mut self, bounds: &$($lt)? $($mut)? GenericBound, _ctxt: BoundKind) -> Self::Result { + walk_param_bound(self, bounds) + } + + fn visit_precise_capturing_arg(&mut self, arg: &$($lt)? $($mut)? PreciseCapturingArg) -> Self::Result { + walk_precise_capturing_arg(self, arg) + } + + fn visit_poly_trait_ref(&mut self, t: &$($lt)? $($mut)? PolyTraitRef) -> Self::Result { + walk_poly_trait_ref(self, t) + } + + fn visit_variant_data(&mut self, s: &$($lt)? $($mut)? VariantData) -> Self::Result { + walk_variant_data(self, s) + } + + fn visit_field_def(&mut self, s: &$($lt)? $($mut)? FieldDef) -> Self::Result { + walk_field_def(self, s) + } + + fn visit_variant(&mut self, v: &$($lt)? $($mut)? Variant) -> Self::Result { + walk_variant(self, v) + } + + fn visit_label(&mut self, label: &$($lt)? $($mut)? Label) -> Self::Result { + walk_label(self, label) + } + + fn visit_lifetime(&mut self, lifetime: &$($lt)? $($mut)? Lifetime, $(${ignore($lt)} _: LifetimeCtxt )?) -> Self::Result { + walk_lifetime(self, lifetime) + } + + fn visit_mac_call(&mut self, mac: &$($lt)? $($mut)? MacCall) -> Self::Result { + walk_mac(self, mac) + } + + fn visit_id(&mut self, _id: $(&$mut)? NodeId) -> Self::Result { + Self::Result::output() + } + + fn visit_macro_def(&mut self, macro_def: &$($lt)? $($mut)? MacroDef) -> Self::Result { + walk_macro_def(self, macro_def) + } + + fn visit_path(&mut self, path: &$($lt)? $($mut)? Path) -> Self::Result { + walk_path(self, path) + } + + fn visit_use_tree(&mut self, use_tree: &$($lt)? $($mut)? UseTree) -> Self::Result { + walk_use_tree(self, use_tree) + } + + fn visit_path_segment(&mut self, path_segment: &$($lt)? $($mut)? PathSegment) -> Self::Result { + walk_path_segment(self, path_segment) + } + + fn visit_generic_args(&mut self, generic_args: &$($lt)? $($mut)? GenericArgs) -> Self::Result { + walk_generic_args(self, generic_args) + } + + fn visit_generic_arg(&mut self, generic_arg: &$($lt)? $($mut)? GenericArg) -> Self::Result { + walk_generic_arg(self, generic_arg) + } + + fn visit_assoc_item_constraint( + &mut self, + constraint: &$($lt)? $($mut)? AssocItemConstraint, + ) -> Self::Result { + walk_assoc_item_constraint(self, constraint) + } + + fn visit_attribute(&mut self, attr: &$($lt)? $($mut)? Attribute) -> Self::Result { + walk_attribute(self, attr) + } + + fn visit_vis(&mut self, vis: &$($lt)? $($mut)? Visibility) -> Self::Result { + walk_vis(self, vis) + } + + fn visit_fn_ret_ty(&mut self, ret_ty: &$($lt)? $($mut)? FnRetTy) -> Self::Result { + walk_fn_ret_ty(self, ret_ty) + } + + fn visit_fn_header(&mut self, header: &$($lt)? $($mut)? FnHeader) -> Self::Result { + walk_fn_header(self, header) + } + + fn visit_expr_field(&mut self, f: &$($lt)? $($mut)? ExprField) -> Self::Result { + walk_expr_field(self, f) + } + + fn visit_pat_field(&mut self, fp: &$($lt)? $($mut)? PatField) -> Self::Result { + walk_pat_field(self, fp) + } + + fn visit_crate(&mut self, krate: &$($lt)? $($mut)? Crate) -> Self::Result { + walk_crate(self, krate) + } + + fn visit_inline_asm(&mut self, asm: &$($lt)? $($mut)? InlineAsm) -> Self::Result { + walk_inline_asm(self, asm) + } + + fn visit_format_args(&mut self, fmt: &$($lt)? $($mut)? FormatArgs) -> Self::Result { + walk_format_args(self, fmt) + } + + fn visit_inline_asm_sym(&mut self, sym: &$($lt)? $($mut)? InlineAsmSym) -> Self::Result { + walk_inline_asm_sym(self, sym) + } + + fn visit_capture_by(&mut self, capture_by: &$($lt)? $($mut)? CaptureBy) -> Self::Result { + walk_capture_by(self, capture_by) + } + + fn visit_coroutine_kind(&mut self, coroutine_kind: &$($lt)? $($mut)? CoroutineKind) -> Self::Result { + walk_coroutine_kind(self, coroutine_kind) + } + + fn visit_fn_decl(&mut self, fn_decl: &$($lt)? $($mut)? FnDecl) -> Self::Result { + walk_fn_decl(self, fn_decl) + } + + fn visit_qself(&mut self, qs: &$($lt)? $($mut)? Option>) -> Self::Result { + walk_qself(self, qs) + } + + // (non-mut) `Visitor`-only methods + $( + fn visit_stmt(&mut self, s: &$lt Stmt) -> Self::Result { + walk_stmt(self, s) + } + + fn visit_nested_use_tree(&mut self, use_tree: &$lt UseTree, id: NodeId) -> Self::Result { + try_visit!(self.visit_id(id)); + self.visit_use_tree(use_tree) + } + )? + + // `MutVisitor`-only methods + $( + fn flat_map_foreign_item(&mut self, ni: P) -> SmallVec<[P; 1]> { + walk_flat_map_foreign_item(self, ni) + } + + fn flat_map_item(&mut self, i: P) -> SmallVec<[P; 1]> { + walk_flat_map_item(self, i) + } + + fn flat_map_field_def(&mut self, fd: FieldDef) -> SmallVec<[FieldDef; 1]> { + walk_flat_map_field_def(self, fd) + } + + fn flat_map_assoc_item( + &mut self, + i: P, + ctxt: AssocCtxt, + ) -> SmallVec<[P; 1]> { + walk_flat_map_assoc_item(self, i, ctxt) + } + + fn flat_map_stmt(&mut self, s: Stmt) -> SmallVec<[Stmt; 1]> { + walk_flat_map_stmt(self, s) + } + + fn flat_map_arm(&mut self, arm: Arm) -> SmallVec<[Arm; 1]> { + walk_flat_map_arm(self, arm) + } + + fn filter_map_expr(&mut self, e: P) -> Option> { + walk_filter_map_expr(self, e) + } + + fn flat_map_variant(&mut self, v: Variant) -> SmallVec<[Variant; 1]> { + walk_flat_map_variant(self, v) + } + + fn flat_map_param(&mut self, param: Param) -> SmallVec<[Param; 1]> { + walk_flat_map_param(self, param) + } + + fn flat_map_generic_param(&mut self, param: GenericParam) -> SmallVec<[GenericParam; 1]> { + walk_flat_map_generic_param(self, param) + } + + fn flat_map_expr_field(&mut self, f: ExprField) -> SmallVec<[ExprField; 1]> { + walk_flat_map_expr_field(self, f) + } + + fn flat_map_where_predicate( + &mut self, + where_predicate: WherePredicate, + ) -> SmallVec<[WherePredicate; 1]> { + walk_flat_map_where_predicate(self, where_predicate) + } + + // Span visiting is no longer used, but we keep it for now, + // in case it's needed for something like #127241. + fn visit_span(&mut self, _sp: &$mut Span) { + // Do nothing. + } + + fn flat_map_pat_field(&mut self, fp: PatField) -> SmallVec<[PatField; 1]> { + walk_flat_map_pat_field(self, fp) + } + )? + } + pub trait WalkItemKind { type Ctxt; fn walk<$($lt,)? V: $Visitor$(<$lt>)?>( @@ -409,6 +569,24 @@ macro_rules! common_visitor_and_walkers { V::Result::output() } + $(${ignore($lt)} + #[inline] + )? + fn walk_capture_by<$($lt,)? V: $Visitor$(<$lt>)?>( + vis: &mut V, + capture_by: &$($lt)? $($mut)? CaptureBy + ) -> V::Result { + match capture_by { + CaptureBy::Ref => { V::Result::output() } + CaptureBy::Value { move_kw } => { + visit_span(vis, move_kw) + } + CaptureBy::Use { use_kw } => { + visit_span(vis, use_kw) + } + } + } + fn visit_bounds<$($lt,)? V: $Visitor$(<$lt>)?>(visitor: &mut V, bounds: &$($lt)? $($mut)? GenericBounds, ctxt: BoundKind) -> V::Result { walk_list!(visitor, visit_param_bound, bounds, ctxt); V::Result::output() @@ -989,8 +1167,7 @@ macro_rules! common_visitor_and_walkers { try_visit!(vis.visit_vis(visibility)); try_visit!(vis.visit_ident(ident)); try_visit!(vis.visit_variant_data(data)); - $(${ignore($lt)} visit_opt!(vis, visit_variant_discr, disr_expr); )? - $(${ignore($mut)} visit_opt!(vis, visit_anon_const, disr_expr); )? + visit_opt!(vis, visit_anon_const, disr_expr); visit_span(vis, span) } @@ -1389,7 +1566,7 @@ macro_rules! common_visitor_and_walkers { // FIXME: visit the template exhaustively. pub fn walk_format_args<$($lt,)? V: $Visitor$(<$lt>)?>(vis: &mut V, fmt: &$($lt)? $($mut)? FormatArgs) -> V::Result { - let FormatArgs { span, template: _, arguments, uncooked_fmt_str: _ } = fmt; + let FormatArgs { span, template: _, arguments, uncooked_fmt_str: _, is_source_literal: _ } = fmt; let args = $(${ignore($mut)} arguments.all_args_mut())? $(${ignore($lt)} arguments.all_args())? ; for FormatArgument { kind, expr } in args { match kind { diff --git a/compiler/rustc_ast_lowering/Cargo.toml b/compiler/rustc_ast_lowering/Cargo.toml index 6ac258155fe9..dc571f5c3671 100644 --- a/compiler/rustc_ast_lowering/Cargo.toml +++ b/compiler/rustc_ast_lowering/Cargo.toml @@ -11,6 +11,7 @@ doctest = false rustc_abi = { path = "../rustc_abi" } rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } +rustc_attr_data_structures = { path = "../rustc_attr_data_structures" } rustc_attr_parsing = { path = "../rustc_attr_parsing" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_errors = { path = "../rustc_errors" } diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index f297bf9f4cfe..c2140514e311 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -4,6 +4,7 @@ use std::sync::Arc; use rustc_ast::ptr::P as AstP; use rustc_ast::*; use rustc_ast_pretty::pprust::expr_to_string; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_hir as hir; use rustc_hir::HirId; @@ -831,7 +832,7 @@ impl<'hir> LoweringContext<'_, 'hir> { ) { if self.tcx.features().async_fn_track_caller() && let Some(attrs) = self.attrs.get(&outer_hir_id.local_id) - && attrs.into_iter().any(|attr| attr.has_name(sym::track_caller)) + && find_attr!(*attrs, AttributeKind::TrackCaller(_)) { let unstable_span = self.mark_span_with_reason( DesugaringKind::Async, diff --git a/compiler/rustc_ast_lowering/src/format.rs b/compiler/rustc_ast_lowering/src/format.rs index 12f0af754868..943cde90dd20 100644 --- a/compiler/rustc_ast_lowering/src/format.rs +++ b/compiler/rustc_ast_lowering/src/format.rs @@ -4,7 +4,7 @@ use rustc_ast::*; use rustc_data_structures::fx::FxIndexMap; use rustc_hir as hir; use rustc_session::config::FmtDebug; -use rustc_span::{Ident, Span, Symbol, sym}; +use rustc_span::{DesugaringKind, Ident, Span, Symbol, sym}; use super::LoweringContext; @@ -14,6 +14,13 @@ impl<'hir> LoweringContext<'_, 'hir> { // format_args!() had any arguments _before_ flattening/inlining. let allow_const = fmt.arguments.all_args().is_empty(); let mut fmt = Cow::Borrowed(fmt); + + let sp = self.mark_span_with_reason( + DesugaringKind::FormatLiteral { source: fmt.is_source_literal }, + sp, + sp.ctxt().outer_expn_data().allow_internal_unstable, + ); + if self.tcx.sess.opts.unstable_opts.flatten_format_args { fmt = flatten_format_args(fmt); fmt = self.inline_literals(fmt); diff --git a/compiler/rustc_ast_lowering/src/item.rs b/compiler/rustc_ast_lowering/src/item.rs index ef27d0ef69b1..8acb51057730 100644 --- a/compiler/rustc_ast_lowering/src/item.rs +++ b/compiler/rustc_ast_lowering/src/item.rs @@ -2,7 +2,7 @@ use rustc_abi::ExternAbi; use rustc_ast::ptr::P; use rustc_ast::visit::AssocCtxt; use rustc_ast::*; -use rustc_errors::ErrorGuaranteed; +use rustc_errors::{E0570, ErrorGuaranteed, struct_span_code_err}; use rustc_hir::def::{DefKind, PerNS, Res}; use rustc_hir::def_id::{CRATE_DEF_ID, LocalDefId}; use rustc_hir::{self as hir, HirId, LifetimeSource, PredicateOrigin}; @@ -1644,9 +1644,29 @@ impl<'hir> LoweringContext<'_, 'hir> { self.error_on_invalid_abi(abi_str); ExternAbi::Rust }); - let sess = self.tcx.sess; - let features = self.tcx.features(); - gate_unstable_abi(sess, features, span, extern_abi); + let tcx = self.tcx; + + // we can't do codegen for unsupported ABIs, so error now so we won't get farther + if !tcx.sess.target.is_abi_supported(extern_abi) { + let mut err = struct_span_code_err!( + tcx.dcx(), + span, + E0570, + "{extern_abi} is not a supported ABI for the current target", + ); + + if let ExternAbi::Stdcall { unwind } = extern_abi { + let c_abi = ExternAbi::C { unwind }; + let system_abi = ExternAbi::System { unwind }; + err.help(format!("if you need `extern {extern_abi}` on win32 and `extern {c_abi}` everywhere else, \ + use `extern {system_abi}`" + )); + } + err.emit(); + } + // Show required feature gate even if we already errored, as the user is likely to build the code + // for the actually intended target next and then they will need the feature gate. + gate_unstable_abi(tcx.sess, tcx.features(), span, extern_abi); extern_abi } diff --git a/compiler/rustc_ast_lowering/src/stability.rs b/compiler/rustc_ast_lowering/src/stability.rs index b8fa2dd3dd62..6752218fa0d4 100644 --- a/compiler/rustc_ast_lowering/src/stability.rs +++ b/compiler/rustc_ast_lowering/src/stability.rs @@ -96,6 +96,9 @@ pub fn extern_abi_stability(abi: ExternAbi) -> Result<(), UnstableAbi> { ExternAbi::RustCold => { Err(UnstableAbi { abi, feature: sym::rust_cold_cc, explain: GateReason::Experimental }) } + ExternAbi::RustInvalid => { + Err(UnstableAbi { abi, feature: sym::rustc_attrs, explain: GateReason::ImplDetail }) + } ExternAbi::GpuKernel => Err(UnstableAbi { abi, feature: sym::abi_gpu_kernel, @@ -124,12 +127,12 @@ pub fn extern_abi_stability(abi: ExternAbi) -> Result<(), UnstableAbi> { feature: sym::abi_riscv_interrupt, explain: GateReason::Experimental, }), - ExternAbi::CCmseNonSecureCall => Err(UnstableAbi { + ExternAbi::CmseNonSecureCall => Err(UnstableAbi { abi, - feature: sym::abi_c_cmse_nonsecure_call, + feature: sym::abi_cmse_nonsecure_call, explain: GateReason::Experimental, }), - ExternAbi::CCmseNonSecureEntry => Err(UnstableAbi { + ExternAbi::CmseNonSecureEntry => Err(UnstableAbi { abi, feature: sym::cmse_nonsecure_entry, explain: GateReason::Experimental, diff --git a/compiler/rustc_ast_passes/Cargo.toml b/compiler/rustc_ast_passes/Cargo.toml index c738cb2aa2fd..1940628b44a5 100644 --- a/compiler/rustc_ast_passes/Cargo.toml +++ b/compiler/rustc_ast_passes/Cargo.toml @@ -18,5 +18,6 @@ rustc_macros = { path = "../rustc_macros" } rustc_parse = { path = "../rustc_parse" } rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } +rustc_target = { path = "../rustc_target" } thin-vec = "0.2.12" # tidy-alphabetical-end diff --git a/compiler/rustc_ast_passes/messages.ftl b/compiler/rustc_ast_passes/messages.ftl index 9a267501230f..4290f7b7edeb 100644 --- a/compiler/rustc_ast_passes/messages.ftl +++ b/compiler/rustc_ast_passes/messages.ftl @@ -1,20 +1,25 @@ -ast_passes_abi_custom_coroutine = - functions with the `"custom"` ABI cannot be `{$coroutine_kind_str}` +ast_passes_abi_cannot_be_coroutine = + functions with the {$abi} ABI cannot be `{$coroutine_kind_str}` .suggestion = remove the `{$coroutine_kind_str}` keyword from this definiton -ast_passes_abi_custom_invalid_signature = - invalid signature for `extern "custom"` function - .note = functions with the `"custom"` ABI cannot have any parameters or return type - .suggestion = remove the parameters and return type - ast_passes_abi_custom_safe_foreign_function = - foreign functions with the `"custom"` ABI cannot be safe + foreign functions with the "custom" ABI cannot be safe .suggestion = remove the `safe` keyword from this definition ast_passes_abi_custom_safe_function = - functions with the `"custom"` ABI must be unsafe + functions with the "custom" ABI must be unsafe .suggestion = add the `unsafe` keyword to this definition +ast_passes_abi_must_not_have_parameters_or_return_type= + invalid signature for `extern {$abi}` function + .note = functions with the {$abi} ABI cannot have any parameters or return type + .suggestion = remove the parameters and return type + +ast_passes_abi_must_not_have_return_type= + invalid signature for `extern {$abi}` function + .note = functions with the "custom" ABI cannot have a return type + .help = remove the return type + ast_passes_assoc_const_without_body = associated constant in `impl` without body .suggestion = provide a definition for the constant @@ -232,17 +237,17 @@ ast_passes_static_without_body = free static item without body .suggestion = provide a definition for the static -ast_passes_tilde_const_disallowed = `~const` is not allowed here - .closure = closures cannot have `~const` trait bounds - .function = this function is not `const`, so it cannot have `~const` trait bounds - .trait = this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds - .trait_impl = this impl is not `const`, so it cannot have `~const` trait bounds - .impl = inherent impls cannot have `~const` trait bounds - .trait_assoc_ty = associated types in non-`#[const_trait]` traits cannot have `~const` trait bounds - .trait_impl_assoc_ty = associated types in non-const impls cannot have `~const` trait bounds - .inherent_assoc_ty = inherent associated types cannot have `~const` trait bounds - .object = trait objects cannot have `~const` trait bounds - .item = this item cannot have `~const` trait bounds +ast_passes_tilde_const_disallowed = `[const]` is not allowed here + .closure = closures cannot have `[const]` trait bounds + .function = this function is not `const`, so it cannot have `[const]` trait bounds + .trait = this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds + .trait_impl = this impl is not `const`, so it cannot have `[const]` trait bounds + .impl = inherent impls cannot have `[const]` trait bounds + .trait_assoc_ty = associated types in non-`#[const_trait]` traits cannot have `[const]` trait bounds + .trait_impl_assoc_ty = associated types in non-const impls cannot have `[const]` trait bounds + .inherent_assoc_ty = inherent associated types cannot have `[const]` trait bounds + .object = trait objects cannot have `[const]` trait bounds + .item = this item cannot have `[const]` trait bounds ast_passes_trait_fn_const = functions in {$in_impl -> diff --git a/compiler/rustc_ast_passes/src/ast_validation.rs b/compiler/rustc_ast_passes/src/ast_validation.rs index b69a91e2f5f8..da2482512037 100644 --- a/compiler/rustc_ast_passes/src/ast_validation.rs +++ b/compiler/rustc_ast_passes/src/ast_validation.rs @@ -21,7 +21,7 @@ use std::ops::{Deref, DerefMut}; use std::str::FromStr; use itertools::{Either, Itertools}; -use rustc_abi::ExternAbi; +use rustc_abi::{CanonAbi, ExternAbi, InterruptKind}; use rustc_ast::ptr::P; use rustc_ast::visit::{AssocCtxt, BoundKind, FnCtxt, FnKind, Visitor, walk_list}; use rustc_ast::*; @@ -37,6 +37,7 @@ use rustc_session::lint::builtin::{ }; use rustc_session::lint::{BuiltinLintDiag, LintBuffer}; use rustc_span::{Ident, Span, kw, sym}; +use rustc_target::spec::{AbiMap, AbiMapping}; use thin_vec::thin_vec; use crate::errors::{self, TildeConstReason}; @@ -365,31 +366,77 @@ impl<'a> AstValidator<'a> { } } - /// An `extern "custom"` function must be unsafe, and must not have any parameters or return - /// type. - fn check_custom_abi(&self, ctxt: FnCtxt, ident: &Ident, sig: &FnSig) { + /// Check that the signature of this function does not violate the constraints of its ABI. + fn check_extern_fn_signature(&self, abi: ExternAbi, ctxt: FnCtxt, ident: &Ident, sig: &FnSig) { + match AbiMap::from_target(&self.sess.target).canonize_abi(abi, false) { + AbiMapping::Direct(canon_abi) | AbiMapping::Deprecated(canon_abi) => { + match canon_abi { + CanonAbi::C + | CanonAbi::Rust + | CanonAbi::RustCold + | CanonAbi::Arm(_) + | CanonAbi::GpuKernel + | CanonAbi::X86(_) => { /* nothing to check */ } + + CanonAbi::Custom => { + // An `extern "custom"` function must be unsafe. + self.reject_safe_fn(abi, ctxt, sig); + + // An `extern "custom"` function cannot be `async` and/or `gen`. + self.reject_coroutine(abi, sig); + + // An `extern "custom"` function must have type `fn()`. + self.reject_params_or_return(abi, ident, sig); + } + + CanonAbi::Interrupt(interrupt_kind) => { + // An interrupt handler cannot be `async` and/or `gen`. + self.reject_coroutine(abi, sig); + + if let InterruptKind::X86 = interrupt_kind { + // "x86-interrupt" is special because it does have arguments. + // FIXME(workingjubilee): properly lint on acceptable input types. + if let FnRetTy::Ty(ref ret_ty) = sig.decl.output { + self.dcx().emit_err(errors::AbiMustNotHaveReturnType { + span: ret_ty.span, + abi, + }); + } + } else { + // An `extern "interrupt"` function must have type `fn()`. + self.reject_params_or_return(abi, ident, sig); + } + } + } + } + AbiMapping::Invalid => { /* ignore */ } + } + } + + fn reject_safe_fn(&self, abi: ExternAbi, ctxt: FnCtxt, sig: &FnSig) { let dcx = self.dcx(); - // An `extern "custom"` function must be unsafe. match sig.header.safety { Safety::Unsafe(_) => { /* all good */ } Safety::Safe(safe_span) => { - let safe_span = - self.sess.psess.source_map().span_until_non_whitespace(safe_span.to(sig.span)); + let source_map = self.sess.psess.source_map(); + let safe_span = source_map.span_until_non_whitespace(safe_span.to(sig.span)); dcx.emit_err(errors::AbiCustomSafeForeignFunction { span: sig.span, safe_span }); } Safety::Default => match ctxt { FnCtxt::Foreign => { /* all good */ } FnCtxt::Free | FnCtxt::Assoc(_) => { - self.dcx().emit_err(errors::AbiCustomSafeFunction { + dcx.emit_err(errors::AbiCustomSafeFunction { span: sig.span, + abi, unsafe_span: sig.span.shrink_to_lo(), }); } }, } + } - // An `extern "custom"` function cannot be `async` and/or `gen`. + fn reject_coroutine(&self, abi: ExternAbi, sig: &FnSig) { if let Some(coroutine_kind) = sig.header.coroutine_kind { let coroutine_kind_span = self .sess @@ -397,14 +444,16 @@ impl<'a> AstValidator<'a> { .source_map() .span_until_non_whitespace(coroutine_kind.span().to(sig.span)); - self.dcx().emit_err(errors::AbiCustomCoroutine { + self.dcx().emit_err(errors::AbiCannotBeCoroutine { span: sig.span, + abi, coroutine_kind_span, coroutine_kind_str: coroutine_kind.as_str(), }); } + } - // An `extern "custom"` function must not have any parameters or return type. + fn reject_params_or_return(&self, abi: ExternAbi, ident: &Ident, sig: &FnSig) { let mut spans: Vec<_> = sig.decl.inputs.iter().map(|p| p.span).collect(); if let FnRetTy::Ty(ref ret_ty) = sig.decl.output { spans.push(ret_ty.span); @@ -415,11 +464,12 @@ impl<'a> AstValidator<'a> { let suggestion_span = header_span.shrink_to_hi().to(sig.decl.output.span()); let padding = if header_span.is_empty() { "" } else { " " }; - self.dcx().emit_err(errors::AbiCustomInvalidSignature { + self.dcx().emit_err(errors::AbiMustNotHaveParametersOrReturnType { spans, symbol: ident.name, suggestion_span, padding, + abi, }); } } @@ -1199,9 +1249,12 @@ impl<'a> Visitor<'a> for AstValidator<'a> { self.check_foreign_fn_bodyless(*ident, body.as_deref()); self.check_foreign_fn_headerless(sig.header); self.check_foreign_item_ascii_only(*ident); - if self.extern_mod_abi == Some(ExternAbi::Custom) { - self.check_custom_abi(FnCtxt::Foreign, ident, sig); - } + self.check_extern_fn_signature( + self.extern_mod_abi.unwrap_or(ExternAbi::FALLBACK), + FnCtxt::Foreign, + ident, + sig, + ); } ForeignItemKind::TyAlias(box TyAlias { defaultness, @@ -1411,9 +1464,9 @@ impl<'a> Visitor<'a> for AstValidator<'a> { if let FnKind::Fn(ctxt, _, fun) = fk && let Extern::Explicit(str_lit, _) = fun.sig.header.ext - && let Ok(ExternAbi::Custom) = ExternAbi::from_str(str_lit.symbol.as_str()) + && let Ok(abi) = ExternAbi::from_str(str_lit.symbol.as_str()) { - self.check_custom_abi(ctxt, &fun.ident, &fun.sig); + self.check_extern_fn_signature(abi, ctxt, &fun.ident, &fun.sig); } self.check_c_variadic_type(fk); diff --git a/compiler/rustc_ast_passes/src/errors.rs b/compiler/rustc_ast_passes/src/errors.rs index c437e62f4d37..d387a4a310ea 100644 --- a/compiler/rustc_ast_passes/src/errors.rs +++ b/compiler/rustc_ast_passes/src/errors.rs @@ -1,5 +1,6 @@ //! Errors emitted by ast_passes. +use rustc_abi::ExternAbi; use rustc_ast::ParamKindOrd; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, EmissionGuarantee, Subdiagnostic}; @@ -845,6 +846,7 @@ pub(crate) struct AbiCustomSafeForeignFunction { pub(crate) struct AbiCustomSafeFunction { #[primary_span] pub span: Span, + pub abi: ExternAbi, #[suggestion( ast_passes_suggestion, @@ -856,10 +858,11 @@ pub(crate) struct AbiCustomSafeFunction { } #[derive(Diagnostic)] -#[diag(ast_passes_abi_custom_coroutine)] -pub(crate) struct AbiCustomCoroutine { +#[diag(ast_passes_abi_cannot_be_coroutine)] +pub(crate) struct AbiCannotBeCoroutine { #[primary_span] pub span: Span, + pub abi: ExternAbi, #[suggestion( ast_passes_suggestion, @@ -872,11 +875,12 @@ pub(crate) struct AbiCustomCoroutine { } #[derive(Diagnostic)] -#[diag(ast_passes_abi_custom_invalid_signature)] +#[diag(ast_passes_abi_must_not_have_parameters_or_return_type)] #[note] -pub(crate) struct AbiCustomInvalidSignature { +pub(crate) struct AbiMustNotHaveParametersOrReturnType { #[primary_span] pub spans: Vec, + pub abi: ExternAbi, #[suggestion( ast_passes_suggestion, @@ -888,3 +892,13 @@ pub(crate) struct AbiCustomInvalidSignature { pub symbol: Symbol, pub padding: &'static str, } + +#[derive(Diagnostic)] +#[diag(ast_passes_abi_must_not_have_return_type)] +#[note] +pub(crate) struct AbiMustNotHaveReturnType { + #[primary_span] + #[help] + pub span: Span, + pub abi: ExternAbi, +} diff --git a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs index ee49246a4bbf..7651e8365a29 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs @@ -357,6 +357,10 @@ impl<'a> State<'a> { self.word_nbsp("raw"); self.print_mutability(mutability, true); } + ast::BorrowKind::Pin => { + self.word_nbsp("pin"); + self.print_mutability(mutability, true); + } } self.print_expr_cond_paren( expr, @@ -386,18 +390,44 @@ impl<'a> State<'a> { let ib = self.ibox(INDENT_UNIT); - // The Match subexpression in `match x {} - 1` must be parenthesized if - // it is the leftmost subexpression in a statement: - // - // (match x {}) - 1; - // - // But not otherwise: - // - // let _ = match x {} - 1; - // - // Same applies to a small set of other expression kinds which eagerly - // terminate a statement which opens with them. - let needs_par = fixup.would_cause_statement_boundary(expr); + let needs_par = { + // The Match subexpression in `match x {} - 1` must be parenthesized + // if it is the leftmost subexpression in a statement: + // + // (match x {}) - 1; + // + // But not otherwise: + // + // let _ = match x {} - 1; + // + // Same applies to a small set of other expression kinds which + // eagerly terminate a statement which opens with them. + fixup.would_cause_statement_boundary(expr) + } || { + // If a binary operation ends up with an attribute, such as + // resulting from the following macro expansion, then parentheses + // are required so that the attribute encompasses the right + // subexpression and not just the left one. + // + // #![feature(stmt_expr_attributes)] + // + // macro_rules! add_attr { + // ($e:expr) => { #[attr] $e }; + // } + // + // let _ = add_attr!(1 + 1); + // + // We must pretty-print `#[attr] (1 + 1)` not `#[attr] 1 + 1`. + !attrs.is_empty() + && matches!( + expr.kind, + ast::ExprKind::Binary(..) + | ast::ExprKind::Cast(..) + | ast::ExprKind::Assign(..) + | ast::ExprKind::AssignOp(..) + | ast::ExprKind::Range(..) + ) + }; if needs_par { self.popen(); fixup = FixupContext::default(); diff --git a/compiler/rustc_attr_data_structures/src/attributes.rs b/compiler/rustc_attr_data_structures/src/attributes.rs index cdc01dc6c91a..60a4f2893069 100644 --- a/compiler/rustc_attr_data_structures/src/attributes.rs +++ b/compiler/rustc_attr_data_structures/src/attributes.rs @@ -38,7 +38,8 @@ pub enum InstructionSetAttr { ArmT32, } -#[derive(Clone, Encodable, Decodable, Debug, PartialEq, Eq, HashStable_Generic, Default)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Default, PrintAttribute)] +#[derive(Encodable, Decodable, HashStable_Generic)] pub enum OptimizeAttr { /// No `#[optimize(..)]` attribute #[default] @@ -130,6 +131,17 @@ impl Deprecation { } } +/// There are three valid forms of the attribute: +/// `#[used]`, which is semantically equivalent to `#[used(linker)]` except that the latter is currently unstable. +/// `#[used(compiler)]` +/// `#[used(linker)]` +#[derive(Encodable, Decodable, Copy, Clone, Debug, PartialEq, Eq, Hash)] +#[derive(HashStable_Generic, PrintAttribute)] +pub enum UsedBy { + Compiler, + Linker, +} + /// Represents parsed *built-in* inert attributes. /// /// ## Overview @@ -201,6 +213,9 @@ pub enum AttributeKind { span: Span, }, + /// Represents `#[cold]`. + Cold(Span), + /// Represents `#[rustc_confusables]`. Confusables { symbols: ThinVec, @@ -208,6 +223,9 @@ pub enum AttributeKind { first_span: Span, }, + /// Represents `#[const_continue]`. + ConstContinue(Span), + /// Represents `#[rustc_const_stable]` and `#[rustc_const_unstable]`. ConstStability { stability: PartialConstStability, @@ -224,20 +242,62 @@ pub enum AttributeKind { /// Represents [`#[doc]`](https://doc.rust-lang.org/stable/rustdoc/write-documentation/the-doc-attribute.html). DocComment { style: AttrStyle, kind: CommentKind, span: Span, comment: Symbol }, + /// Represents [`#[export_name]`](https://doc.rust-lang.org/reference/abi.html#the-export_name-attribute). + ExportName { + /// The name to export this item with. + /// It may not contain \0 bytes as it will be converted to a null-terminated string. + name: Symbol, + span: Span, + }, + /// Represents `#[inline]` and `#[rustc_force_inline]`. Inline(InlineAttr, Span), + /// Represents `#[loop_match]`. + LoopMatch(Span), + /// Represents `#[rustc_macro_transparency]`. MacroTransparency(Transparency), + /// Represents [`#[may_dangle]`](https://std-dev-guide.rust-lang.org/tricky/may-dangle.html). + MayDangle(Span), + + /// Represents `#[must_use]`. + MustUse { + span: Span, + /// must_use can optionally have a reason: `#[must_use = "reason this must be used"]` + reason: Option, + }, + + /// Represents `#[naked]` + Naked(Span), + + /// Represents `#[no_mangle]` + NoMangle(Span), + + /// Represents `#[optimize(size|speed)]` + Optimize(OptimizeAttr, Span), + + /// Represents `#[rustc_pub_transparent]` (used by the `repr_transparent_external_private_fields` lint). + PubTransparent(Span), + /// Represents [`#[repr]`](https://doc.rust-lang.org/stable/reference/type-layout.html#representations). Repr(ThinVec<(ReprAttr, Span)>), + /// Represents `#[rustc_skip_during_method_dispatch]`. + SkipDuringMethodDispatch { array: bool, boxed_slice: bool, span: Span }, + /// Represents `#[stable]`, `#[unstable]` and `#[rustc_allowed_through_unstable_modules]`. Stability { stability: Stability, /// Span of the attribute. span: Span, }, + + /// Represents `#[track_caller]` + TrackCaller(Span), + + /// Represents `#[used]` + Used { used_by: UsedBy, span: Span }, // tidy-alphabetical-end } diff --git a/compiler/rustc_attr_data_structures/src/encode_cross_crate.rs b/compiler/rustc_attr_data_structures/src/encode_cross_crate.rs new file mode 100644 index 000000000000..64bcf1fe6cce --- /dev/null +++ b/compiler/rustc_attr_data_structures/src/encode_cross_crate.rs @@ -0,0 +1,44 @@ +use crate::AttributeKind; + +#[derive(PartialEq)] +pub enum EncodeCrossCrate { + Yes, + No, +} + +impl AttributeKind { + pub fn encode_cross_crate(&self) -> EncodeCrossCrate { + use AttributeKind::*; + use EncodeCrossCrate::*; + + match self { + Align { .. } => No, + AllowConstFnUnstable(..) => No, + AllowInternalUnstable(..) => Yes, + AsPtr(..) => Yes, + BodyStability { .. } => No, + Confusables { .. } => Yes, + ConstStability { .. } => Yes, + ConstStabilityIndirect => No, + Deprecation { .. } => Yes, + DocComment { .. } => Yes, + ExportName { .. } => Yes, + Inline(..) => No, + MacroTransparency(..) => Yes, + Repr(..) => No, + Stability { .. } => Yes, + Cold(..) => No, + ConstContinue(..) => No, + LoopMatch(..) => No, + MayDangle(..) => No, + MustUse { .. } => Yes, + Naked(..) => No, + NoMangle(..) => No, + Optimize(..) => No, + PubTransparent(..) => Yes, + SkipDuringMethodDispatch { .. } => No, + TrackCaller(..) => Yes, + Used { .. } => No, + } + } +} diff --git a/compiler/rustc_attr_data_structures/src/lib.rs b/compiler/rustc_attr_data_structures/src/lib.rs index f8355be09adf..86c73f0d9a08 100644 --- a/compiler/rustc_attr_data_structures/src/lib.rs +++ b/compiler/rustc_attr_data_structures/src/lib.rs @@ -9,6 +9,7 @@ // tidy-alphabetical-end mod attributes; +mod encode_cross_crate; mod stability; mod version; @@ -17,6 +18,7 @@ pub mod lints; use std::num::NonZero; pub use attributes::*; +pub use encode_cross_crate::EncodeCrossCrate; use rustc_abi::Align; use rustc_ast::token::CommentKind; use rustc_ast::{AttrStyle, IntTy, UintTy}; diff --git a/compiler/rustc_attr_parsing/messages.ftl b/compiler/rustc_attr_parsing/messages.ftl index 0891afc003e1..39652335f555 100644 --- a/compiler/rustc_attr_parsing/messages.ftl +++ b/compiler/rustc_attr_parsing/messages.ftl @@ -89,9 +89,16 @@ attr_parsing_missing_since = attr_parsing_multiple_stability_levels = multiple stability levels +attr_parsing_naked_functions_incompatible_attribute = + attribute incompatible with `#[unsafe(naked)]` + .label = the `{$attr}` attribute is incompatible with `#[unsafe(naked)]` + .naked_attribute = function marked with `#[unsafe(naked)]` here + attr_parsing_non_ident_feature = 'feature' is not an identifier +attr_parsing_null_on_export = `export_name` may not contain null characters + attr_parsing_repr_ident = meta item in `repr` must be an identifier diff --git a/compiler/rustc_attr_parsing/src/attributes/codegen_attrs.rs b/compiler/rustc_attr_parsing/src/attributes/codegen_attrs.rs new file mode 100644 index 000000000000..7c412d4fa892 --- /dev/null +++ b/compiler/rustc_attr_parsing/src/attributes/codegen_attrs.rs @@ -0,0 +1,311 @@ +use rustc_attr_data_structures::{AttributeKind, OptimizeAttr, UsedBy}; +use rustc_feature::{AttributeTemplate, template}; +use rustc_session::parse::feature_err; +use rustc_span::{Span, Symbol, sym}; + +use super::{AcceptMapping, AttributeOrder, AttributeParser, OnDuplicate, SingleAttributeParser}; +use crate::context::{AcceptContext, FinalizeContext, Stage}; +use crate::parser::ArgParser; +use crate::session_diagnostics::{NakedFunctionIncompatibleAttribute, NullOnExport}; + +pub(crate) struct OptimizeParser; + +impl SingleAttributeParser for OptimizeParser { + const PATH: &[Symbol] = &[sym::optimize]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepLast; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::WarnButFutureError; + const TEMPLATE: AttributeTemplate = template!(List: "size|speed|none"); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + let Some(list) = args.list() else { + cx.expected_list(cx.attr_span); + return None; + }; + + let Some(single) = list.single() else { + cx.expected_single_argument(list.span); + return None; + }; + + let res = match single.meta_item().and_then(|i| i.path().word().map(|i| i.name)) { + Some(sym::size) => OptimizeAttr::Size, + Some(sym::speed) => OptimizeAttr::Speed, + Some(sym::none) => OptimizeAttr::DoNotOptimize, + _ => { + cx.expected_specific_argument(single.span(), vec!["size", "speed", "none"]); + OptimizeAttr::Default + } + }; + + Some(AttributeKind::Optimize(res, cx.attr_span)) + } +} + +pub(crate) struct ColdParser; + +impl SingleAttributeParser for ColdParser { + const PATH: &[Symbol] = &[sym::cold]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepLast; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::Warn; + const TEMPLATE: AttributeTemplate = template!(Word); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + if let Err(span) = args.no_args() { + cx.expected_no_args(span); + return None; + } + + Some(AttributeKind::Cold(cx.attr_span)) + } +} + +pub(crate) struct ExportNameParser; + +impl SingleAttributeParser for ExportNameParser { + const PATH: &[rustc_span::Symbol] = &[sym::export_name]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepFirst; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::WarnButFutureError; + const TEMPLATE: AttributeTemplate = template!(NameValueStr: "name"); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + let Some(nv) = args.name_value() else { + cx.expected_name_value(cx.attr_span, None); + return None; + }; + let Some(name) = nv.value_as_str() else { + cx.expected_string_literal(nv.value_span, Some(nv.value_as_lit())); + return None; + }; + if name.as_str().contains('\0') { + // `#[export_name = ...]` will be converted to a null-terminated string, + // so it may not contain any null characters. + cx.emit_err(NullOnExport { span: cx.attr_span }); + return None; + } + Some(AttributeKind::ExportName { name, span: cx.attr_span }) + } +} + +#[derive(Default)] +pub(crate) struct NakedParser { + span: Option, +} + +impl AttributeParser for NakedParser { + const ATTRIBUTES: AcceptMapping = + &[(&[sym::naked], template!(Word), |this, cx, args| { + if let Err(span) = args.no_args() { + cx.expected_no_args(span); + return; + } + + if let Some(earlier) = this.span { + let span = cx.attr_span; + cx.warn_unused_duplicate(earlier, span); + } else { + this.span = Some(cx.attr_span); + } + })]; + + fn finalize(self, cx: &FinalizeContext<'_, '_, S>) -> Option { + // FIXME(jdonszelmann): upgrade this list to *parsed* attributes + // once all of these have parsed forms. That'd make the check much nicer... + // + // many attributes don't make sense in combination with #[naked]. + // Notable attributes that are incompatible with `#[naked]` are: + // + // * `#[inline]` + // * `#[track_caller]` + // * `#[test]`, `#[ignore]`, `#[should_panic]` + // + // NOTE: when making changes to this list, check that `error_codes/E0736.md` remains + // accurate. + const ALLOW_LIST: &[rustc_span::Symbol] = &[ + // conditional compilation + sym::cfg_trace, + sym::cfg_attr_trace, + // testing (allowed here so better errors can be generated in `rustc_builtin_macros::test`) + sym::test, + sym::ignore, + sym::should_panic, + sym::bench, + // diagnostics + sym::allow, + sym::warn, + sym::deny, + sym::forbid, + sym::deprecated, + sym::must_use, + // abi, linking and FFI + sym::cold, + sym::export_name, + sym::link_section, + sym::linkage, + sym::no_mangle, + sym::instruction_set, + sym::repr, + sym::rustc_std_internal_symbol, + sym::align, + // obviously compatible with self + sym::naked, + // documentation + sym::doc, + ]; + + let span = self.span?; + + // only if we found a naked attribute do we do the somewhat expensive check + 'outer: for other_attr in cx.all_attrs { + for allowed_attr in ALLOW_LIST { + if other_attr.segments().next().is_some_and(|i| cx.tools.contains(&i.name)) { + // effectively skips the error message being emitted below + // if it's a tool attribute + continue 'outer; + } + if other_attr.word_is(*allowed_attr) { + // effectively skips the error message being emitted below + // if its an allowed attribute + continue 'outer; + } + + if other_attr.word_is(sym::target_feature) { + if !cx.features().naked_functions_target_feature() { + feature_err( + &cx.sess(), + sym::naked_functions_target_feature, + other_attr.span(), + "`#[target_feature(/* ... */)]` is currently unstable on `#[naked]` functions", + ).emit(); + } + + continue 'outer; + } + } + + cx.emit_err(NakedFunctionIncompatibleAttribute { + span: other_attr.span(), + naked_span: span, + attr: other_attr.get_attribute_path().to_string(), + }); + } + + Some(AttributeKind::Naked(span)) + } +} + +pub(crate) struct TrackCallerParser; + +impl SingleAttributeParser for TrackCallerParser { + const PATH: &[Symbol] = &[sym::track_caller]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepLast; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::Warn; + const TEMPLATE: AttributeTemplate = template!(Word); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + if let Err(span) = args.no_args() { + cx.expected_no_args(span); + return None; + } + + Some(AttributeKind::TrackCaller(cx.attr_span)) + } +} + +pub(crate) struct NoMangleParser; + +impl SingleAttributeParser for NoMangleParser { + const PATH: &[rustc_span::Symbol] = &[sym::no_mangle]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepLast; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::Warn; + const TEMPLATE: AttributeTemplate = template!(Word); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + if let Err(span) = args.no_args() { + cx.expected_no_args(span); + return None; + } + + Some(AttributeKind::NoMangle(cx.attr_span)) + } +} + +#[derive(Default)] +pub(crate) struct UsedParser { + first_compiler: Option, + first_linker: Option, +} + +// A custom `AttributeParser` is used rather than a Simple attribute parser because +// - Specifying two `#[used]` attributes is a warning (but will be an error in the future) +// - But specifying two conflicting attributes: `#[used(compiler)]` and `#[used(linker)]` is already an error today +// We can change this to a Simple parser once the warning becomes an error +impl AttributeParser for UsedParser { + const ATTRIBUTES: AcceptMapping = &[( + &[sym::used], + template!(Word, List: "compiler|linker"), + |group: &mut Self, cx, args| { + let used_by = match args { + ArgParser::NoArgs => UsedBy::Linker, + ArgParser::List(list) => { + let Some(l) = list.single() else { + cx.expected_single_argument(list.span); + return; + }; + + match l.meta_item().and_then(|i| i.path().word_sym()) { + Some(sym::compiler) => { + if !cx.features().used_with_arg() { + feature_err( + &cx.sess(), + sym::used_with_arg, + cx.attr_span, + "`#[used(compiler)]` is currently unstable", + ) + .emit(); + } + UsedBy::Compiler + } + Some(sym::linker) => { + if !cx.features().used_with_arg() { + feature_err( + &cx.sess(), + sym::used_with_arg, + cx.attr_span, + "`#[used(linker)]` is currently unstable", + ) + .emit(); + } + UsedBy::Linker + } + _ => { + cx.expected_specific_argument(l.span(), vec!["compiler", "linker"]); + return; + } + } + } + ArgParser::NameValue(_) => return, + }; + + let target = match used_by { + UsedBy::Compiler => &mut group.first_compiler, + UsedBy::Linker => &mut group.first_linker, + }; + + let attr_span = cx.attr_span; + if let Some(prev) = *target { + cx.warn_unused_duplicate(prev, attr_span); + } else { + *target = Some(attr_span); + } + }, + )]; + + fn finalize(self, _cx: &FinalizeContext<'_, '_, S>) -> Option { + // Ratcheting behaviour, if both `linker` and `compiler` are specified, use `linker` + Some(match (self.first_compiler, self.first_linker) { + (_, Some(span)) => AttributeKind::Used { used_by: UsedBy::Linker, span }, + (Some(span), _) => AttributeKind::Used { used_by: UsedBy::Compiler, span }, + (None, None) => return None, + }) + } +} diff --git a/compiler/rustc_attr_parsing/src/attributes/inline.rs b/compiler/rustc_attr_parsing/src/attributes/inline.rs index 25efc3ae49b9..11844f4cd950 100644 --- a/compiler/rustc_attr_parsing/src/attributes/inline.rs +++ b/compiler/rustc_attr_parsing/src/attributes/inline.rs @@ -45,10 +45,8 @@ impl SingleAttributeParser for InlineParser { ArgParser::NameValue(_) => { let suggestions = >::TEMPLATE.suggestions(false, "inline"); - cx.emit_lint( - AttributeLintKind::IllFormedAttributeInput { suggestions }, - cx.attr_span, - ); + let span = cx.attr_span; + cx.emit_lint(AttributeLintKind::IllFormedAttributeInput { suggestions }, span); return None; } } diff --git a/compiler/rustc_attr_parsing/src/attributes/lint_helpers.rs b/compiler/rustc_attr_parsing/src/attributes/lint_helpers.rs index d4c846de56eb..1c8fc5079dad 100644 --- a/compiler/rustc_attr_parsing/src/attributes/lint_helpers.rs +++ b/compiler/rustc_attr_parsing/src/attributes/lint_helpers.rs @@ -14,8 +14,25 @@ impl SingleAttributeParser for AsPtrParser { const ON_DUPLICATE: OnDuplicate = OnDuplicate::Error; const TEMPLATE: AttributeTemplate = template!(Word); - fn convert(cx: &mut AcceptContext<'_, '_, S>, _args: &ArgParser<'_>) -> Option { - // FIXME: check that there's no args (this is currently checked elsewhere) + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + if let Err(span) = args.no_args() { + cx.expected_no_args(span); + } Some(AttributeKind::AsPtr(cx.attr_span)) } } + +pub(crate) struct PubTransparentParser; +impl SingleAttributeParser for PubTransparentParser { + const PATH: &[Symbol] = &[sym::rustc_pub_transparent]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepFirst; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::Error; + const TEMPLATE: AttributeTemplate = template!(Word); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + if let Err(span) = args.no_args() { + cx.expected_no_args(span); + } + Some(AttributeKind::PubTransparent(cx.attr_span)) + } +} diff --git a/compiler/rustc_attr_parsing/src/attributes/loop_match.rs b/compiler/rustc_attr_parsing/src/attributes/loop_match.rs new file mode 100644 index 000000000000..f6c7ac5e3a39 --- /dev/null +++ b/compiler/rustc_attr_parsing/src/attributes/loop_match.rs @@ -0,0 +1,31 @@ +use rustc_attr_data_structures::AttributeKind; +use rustc_feature::{AttributeTemplate, template}; +use rustc_span::{Symbol, sym}; + +use crate::attributes::{AttributeOrder, OnDuplicate, SingleAttributeParser}; +use crate::context::{AcceptContext, Stage}; +use crate::parser::ArgParser; + +pub(crate) struct LoopMatchParser; +impl SingleAttributeParser for LoopMatchParser { + const PATH: &[Symbol] = &[sym::loop_match]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepFirst; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::Warn; + const TEMPLATE: AttributeTemplate = template!(Word); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, _args: &ArgParser<'_>) -> Option { + Some(AttributeKind::LoopMatch(cx.attr_span)) + } +} + +pub(crate) struct ConstContinueParser; +impl SingleAttributeParser for ConstContinueParser { + const PATH: &[Symbol] = &[sym::const_continue]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepFirst; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::Warn; + const TEMPLATE: AttributeTemplate = template!(Word); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, _args: &ArgParser<'_>) -> Option { + Some(AttributeKind::ConstContinue(cx.attr_span)) + } +} diff --git a/compiler/rustc_attr_parsing/src/attributes/mod.rs b/compiler/rustc_attr_parsing/src/attributes/mod.rs index fa2a6087506c..d407669cb410 100644 --- a/compiler/rustc_attr_parsing/src/attributes/mod.rs +++ b/compiler/rustc_attr_parsing/src/attributes/mod.rs @@ -17,7 +17,6 @@ use std::marker::PhantomData; use rustc_attr_data_structures::AttributeKind; -use rustc_attr_data_structures::lints::AttributeLintKind; use rustc_feature::AttributeTemplate; use rustc_span::{Span, Symbol}; use thin_vec::ThinVec; @@ -28,12 +27,17 @@ use crate::session_diagnostics::UnusedMultiple; pub(crate) mod allow_unstable; pub(crate) mod cfg; +pub(crate) mod codegen_attrs; pub(crate) mod confusables; pub(crate) mod deprecation; pub(crate) mod inline; pub(crate) mod lint_helpers; +pub(crate) mod loop_match; +pub(crate) mod must_use; pub(crate) mod repr; +pub(crate) mod semantics; pub(crate) mod stability; +pub(crate) mod traits; pub(crate) mod transparency; pub(crate) mod util; @@ -86,8 +90,19 @@ pub(crate) trait AttributeParser: Default + 'static { /// [`SingleAttributeParser`] can only convert attributes one-to-one, and cannot combine multiple /// attributes together like is necessary for `#[stable()]` and `#[unstable()]` for example. pub(crate) trait SingleAttributeParser: 'static { + /// The single path of the attribute this parser accepts. + /// + /// If you need the parser to accept more than one path, use [`AttributeParser`] instead const PATH: &[Symbol]; + + /// Configures the precedence of attributes with the same `PATH` on a syntax node. const ATTRIBUTE_ORDER: AttributeOrder; + + /// Configures what to do when when the same attribute is + /// applied more than once on the same syntax node. + /// + /// [`ATTRIBUTE_ORDER`](Self::ATTRIBUTE_ORDER) specified which one is assumed to be correct, + /// and this specified whether to, for example, warn or error on the other one. const ON_DUPLICATE: OnDuplicate; /// The template this attribute parser should implement. Used for diagnostics. @@ -97,6 +112,8 @@ pub(crate) trait SingleAttributeParser: 'static { fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option; } +/// Use in combination with [`SingleAttributeParser`]. +/// `Single` implements [`AttributeParser`]. pub(crate) struct Single, S: Stage>( PhantomData<(S, T)>, Option<(AttributeKind, Span)>, @@ -173,14 +190,8 @@ impl OnDuplicate { unused: Span, ) { match self { - OnDuplicate::Warn => cx.emit_lint( - AttributeLintKind::UnusedDuplicate { this: unused, other: used, warning: false }, - unused, - ), - OnDuplicate::WarnButFutureError => cx.emit_lint( - AttributeLintKind::UnusedDuplicate { this: unused, other: used, warning: true }, - unused, - ), + OnDuplicate::Warn => cx.warn_unused_duplicate(used, unused), + OnDuplicate::WarnButFutureError => cx.warn_unused_duplicate_future_error(used, unused), OnDuplicate::Error => { cx.emit_err(UnusedMultiple { this: used, @@ -229,6 +240,10 @@ pub(crate) trait CombineAttributeParser: 'static { const PATH: &[rustc_span::Symbol]; type Item; + /// A function that converts individual items (of type [`Item`](Self::Item)) into the final attribute. + /// + /// For example, individual representations fomr `#[repr(...)]` attributes into an `AttributeKind::Repr(x)`, + /// where `x` is a vec of these individual reprs. const CONVERT: ConvertFn; /// The template this attribute parser should implement. Used for diagnostics. @@ -241,6 +256,8 @@ pub(crate) trait CombineAttributeParser: 'static { ) -> impl IntoIterator + 'c; } +/// Use in combination with [`CombineAttributeParser`]. +/// `Combine` implements [`AttributeParser`]. pub(crate) struct Combine, S: Stage>( PhantomData<(S, T)>, ThinVec<>::Item>, diff --git a/compiler/rustc_attr_parsing/src/attributes/must_use.rs b/compiler/rustc_attr_parsing/src/attributes/must_use.rs new file mode 100644 index 000000000000..a672d9561274 --- /dev/null +++ b/compiler/rustc_attr_parsing/src/attributes/must_use.rs @@ -0,0 +1,40 @@ +use rustc_attr_data_structures::AttributeKind; +use rustc_errors::DiagArgValue; +use rustc_feature::{AttributeTemplate, template}; +use rustc_span::{Symbol, sym}; + +use crate::attributes::{AttributeOrder, OnDuplicate, SingleAttributeParser}; +use crate::context::{AcceptContext, Stage}; +use crate::parser::ArgParser; +use crate::session_diagnostics; + +pub(crate) struct MustUseParser; + +impl SingleAttributeParser for MustUseParser { + const PATH: &[Symbol] = &[sym::must_use]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepLast; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::WarnButFutureError; + const TEMPLATE: AttributeTemplate = template!(Word, NameValueStr: "reason"); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + Some(AttributeKind::MustUse { + span: cx.attr_span, + reason: match args { + ArgParser::NoArgs => None, + ArgParser::NameValue(name_value) => name_value.value_as_str(), + ArgParser::List(_) => { + let suggestions = + >::TEMPLATE.suggestions(false, "must_use"); + cx.emit_err(session_diagnostics::MustUseIllFormedAttributeInput { + num_suggestions: suggestions.len(), + suggestions: DiagArgValue::StrListSepByAnd( + suggestions.into_iter().map(|s| format!("`{s}`").into()).collect(), + ), + span: cx.attr_span, + }); + return None; + } + }, + }) + } +} diff --git a/compiler/rustc_attr_parsing/src/attributes/repr.rs b/compiler/rustc_attr_parsing/src/attributes/repr.rs index c9f9f34bdb7c..4aa27043e989 100644 --- a/compiler/rustc_attr_parsing/src/attributes/repr.rs +++ b/compiler/rustc_attr_parsing/src/attributes/repr.rs @@ -25,7 +25,8 @@ impl CombineAttributeParser for ReprParser { const PATH: &[Symbol] = &[sym::repr]; const CONVERT: ConvertFn = AttributeKind::Repr; // FIXME(jdonszelmann): never used - const TEMPLATE: AttributeTemplate = template!(List: "C"); + const TEMPLATE: AttributeTemplate = + template!(List: "C | Rust | align(...) | packed(...) | | transparent"); fn extend<'c>( cx: &'c mut AcceptContext<'_, '_, S>, @@ -273,7 +274,7 @@ pub(crate) struct AlignParser(Option<(Align, Span)>); impl AlignParser { const PATH: &'static [Symbol] = &[sym::align]; - const TEMPLATE: AttributeTemplate = template!(Word, List: ""); + const TEMPLATE: AttributeTemplate = template!(List: ""); fn parse<'c, S: Stage>( &mut self, diff --git a/compiler/rustc_attr_parsing/src/attributes/semantics.rs b/compiler/rustc_attr_parsing/src/attributes/semantics.rs new file mode 100644 index 000000000000..54f50445fbdf --- /dev/null +++ b/compiler/rustc_attr_parsing/src/attributes/semantics.rs @@ -0,0 +1,22 @@ +use rustc_attr_data_structures::AttributeKind; +use rustc_feature::{AttributeTemplate, template}; +use rustc_span::{Symbol, sym}; + +use crate::attributes::{AttributeOrder, OnDuplicate, SingleAttributeParser}; +use crate::context::{AcceptContext, Stage}; +use crate::parser::ArgParser; + +pub(crate) struct MayDangleParser; +impl SingleAttributeParser for MayDangleParser { + const PATH: &[Symbol] = &[sym::may_dangle]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepFirst; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::Warn; + const TEMPLATE: AttributeTemplate = template!(Word); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + if let Err(span) = args.no_args() { + cx.expected_no_args(span); + } + Some(AttributeKind::MayDangle(cx.attr_span)) + } +} diff --git a/compiler/rustc_attr_parsing/src/attributes/stability.rs b/compiler/rustc_attr_parsing/src/attributes/stability.rs index 6871ff4ec9f3..37104855623f 100644 --- a/compiler/rustc_attr_parsing/src/attributes/stability.rs +++ b/compiler/rustc_attr_parsing/src/attributes/stability.rs @@ -139,7 +139,10 @@ impl SingleAttributeParser for ConstStabilityIndirectParser { const ON_DUPLICATE: OnDuplicate = OnDuplicate::Ignore; const TEMPLATE: AttributeTemplate = template!(Word); - fn convert(_cx: &mut AcceptContext<'_, '_, S>, _args: &ArgParser<'_>) -> Option { + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + if let Err(span) = args.no_args() { + cx.expected_no_args(span); + } Some(AttributeKind::ConstStabilityIndirect) } } @@ -361,8 +364,8 @@ pub(crate) fn parse_unstability( }; } Some(sym::soft) => { - if !param.args().no_args() { - cx.emit_err(session_diagnostics::SoftNoArgs { span: param.span() }); + if let Err(span) = args.no_args() { + cx.emit_err(session_diagnostics::SoftNoArgs { span }); } is_soft = true; } diff --git a/compiler/rustc_attr_parsing/src/attributes/traits.rs b/compiler/rustc_attr_parsing/src/attributes/traits.rs new file mode 100644 index 000000000000..83a98c53c7f7 --- /dev/null +++ b/compiler/rustc_attr_parsing/src/attributes/traits.rs @@ -0,0 +1,54 @@ +use core::mem; + +use rustc_attr_data_structures::AttributeKind; +use rustc_feature::{AttributeTemplate, template}; +use rustc_span::{Symbol, sym}; + +use crate::attributes::{AttributeOrder, OnDuplicate, SingleAttributeParser}; +use crate::context::{AcceptContext, Stage}; +use crate::parser::ArgParser; + +pub(crate) struct SkipDuringMethodDispatchParser; + +impl SingleAttributeParser for SkipDuringMethodDispatchParser { + const PATH: &[Symbol] = &[sym::rustc_skip_during_method_dispatch]; + const ATTRIBUTE_ORDER: AttributeOrder = AttributeOrder::KeepFirst; + const ON_DUPLICATE: OnDuplicate = OnDuplicate::Error; + + const TEMPLATE: AttributeTemplate = template!(List: "array, boxed_slice"); + + fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser<'_>) -> Option { + let mut array = false; + let mut boxed_slice = false; + let Some(args) = args.list() else { + cx.expected_list(cx.attr_span); + return None; + }; + if args.is_empty() { + cx.expected_at_least_one_argument(args.span); + return None; + } + for arg in args.mixed() { + let Some(arg) = arg.meta_item() else { + cx.unexpected_literal(arg.span()); + continue; + }; + if let Err(span) = arg.args().no_args() { + cx.expected_no_args(span); + } + let path = arg.path(); + let (key, skip): (Symbol, &mut bool) = match path.word_sym() { + Some(key @ sym::array) => (key, &mut array), + Some(key @ sym::boxed_slice) => (key, &mut boxed_slice), + _ => { + cx.expected_specific_argument(path.span(), vec!["array", "boxed_slice"]); + continue; + } + }; + if mem::replace(skip, true) { + cx.duplicate_key(arg.span(), key); + } + } + Some(AttributeKind::SkipDuringMethodDispatch { array, boxed_slice, span: cx.attr_span }) + } +} diff --git a/compiler/rustc_attr_parsing/src/context.rs b/compiler/rustc_attr_parsing/src/context.rs index d7570634c1f7..71bb86ca3d3b 100644 --- a/compiler/rustc_attr_parsing/src/context.rs +++ b/compiler/rustc_attr_parsing/src/context.rs @@ -15,17 +15,25 @@ use rustc_session::Session; use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span, Symbol, sym}; use crate::attributes::allow_unstable::{AllowConstFnUnstableParser, AllowInternalUnstableParser}; +use crate::attributes::codegen_attrs::{ + ColdParser, ExportNameParser, NakedParser, NoMangleParser, OptimizeParser, TrackCallerParser, + UsedParser, +}; use crate::attributes::confusables::ConfusablesParser; use crate::attributes::deprecation::DeprecationParser; use crate::attributes::inline::{InlineParser, RustcForceInlineParser}; -use crate::attributes::lint_helpers::AsPtrParser; +use crate::attributes::lint_helpers::{AsPtrParser, PubTransparentParser}; +use crate::attributes::loop_match::{ConstContinueParser, LoopMatchParser}; +use crate::attributes::must_use::MustUseParser; use crate::attributes::repr::{AlignParser, ReprParser}; +use crate::attributes::semantics::MayDangleParser; use crate::attributes::stability::{ BodyStabilityParser, ConstStabilityIndirectParser, ConstStabilityParser, StabilityParser, }; +use crate::attributes::traits::SkipDuringMethodDispatchParser; use crate::attributes::transparency::TransparencyParser; use crate::attributes::{AttributeParser as _, Combine, Single}; -use crate::parser::{ArgParser, MetaItemParser}; +use crate::parser::{ArgParser, MetaItemParser, PathParser}; use crate::session_diagnostics::{AttributeParseError, AttributeParseErrorReason, UnknownMetaItem}; macro_rules! group_type { @@ -94,7 +102,9 @@ attribute_parsers!( BodyStabilityParser, ConfusablesParser, ConstStabilityParser, + NakedParser, StabilityParser, + UsedParser, // tidy-alphabetical-end // tidy-alphabetical-start @@ -105,10 +115,21 @@ attribute_parsers!( // tidy-alphabetical-start Single, + Single, + Single, Single, Single, + Single, Single, + Single, + Single, + Single, + Single, + Single, + Single, Single, + Single, + Single, Single, // tidy-alphabetical-end ]; @@ -165,7 +186,7 @@ pub struct Late; /// /// Gives [`AttributeParser`]s enough information to create errors, for example. pub(crate) struct AcceptContext<'f, 'sess, S: Stage> { - pub(crate) finalize_cx: FinalizeContext<'f, 'sess, S>, + pub(crate) shared: SharedContext<'f, 'sess, S>, /// The span of the attribute currently being parsed pub(crate) attr_span: Span, @@ -178,7 +199,7 @@ pub(crate) struct AcceptContext<'f, 'sess, S: Stage> { pub(crate) attr_path: AttrPath, } -impl<'f, 'sess: 'f, S: Stage> AcceptContext<'f, 'sess, S> { +impl<'f, 'sess: 'f, S: Stage> SharedContext<'f, 'sess, S> { pub(crate) fn emit_err(&self, diag: impl for<'x> Diagnostic<'x>) -> ErrorGuaranteed { S::emit_err(&self.sess, diag) } @@ -191,6 +212,34 @@ impl<'f, 'sess: 'f, S: Stage> AcceptContext<'f, 'sess, S> { (self.emit_lint)(AttributeLint { id, span, kind: lint }); } + pub(crate) fn warn_unused_duplicate(&mut self, used_span: Span, unused_span: Span) { + self.emit_lint( + AttributeLintKind::UnusedDuplicate { + this: unused_span, + other: used_span, + warning: false, + }, + unused_span, + ) + } + + pub(crate) fn warn_unused_duplicate_future_error( + &mut self, + used_span: Span, + unused_span: Span, + ) { + self.emit_lint( + AttributeLintKind::UnusedDuplicate { + this: unused_span, + other: used_span, + warning: true, + }, + unused_span, + ) + } +} + +impl<'f, 'sess: 'f, S: Stage> AcceptContext<'f, 'sess, S> { pub(crate) fn unknown_key( &self, span: Span, @@ -232,6 +281,16 @@ impl<'f, 'sess: 'f, S: Stage> AcceptContext<'f, 'sess, S> { }) } + pub(crate) fn expected_no_args(&self, args_span: Span) -> ErrorGuaranteed { + self.emit_err(AttributeParseError { + span: args_span, + attr_span: self.attr_span, + template: self.template.clone(), + attribute: self.attr_path.clone(), + reason: AttributeParseErrorReason::ExpectedNoArgs, + }) + } + /// emit an error that a `name = value` pair was expected at this span. The symbol can be given for /// a nicer error message talking about the specific name that was found lacking a value. pub(crate) fn expected_name_value(&self, span: Span, name: Option) -> ErrorGuaranteed { @@ -277,6 +336,16 @@ impl<'f, 'sess: 'f, S: Stage> AcceptContext<'f, 'sess, S> { }) } + pub(crate) fn expected_at_least_one_argument(&self, span: Span) -> ErrorGuaranteed { + self.emit_err(AttributeParseError { + span, + attr_span: self.attr_span, + template: self.template.clone(), + attribute: self.attr_path.clone(), + reason: AttributeParseErrorReason::ExpectedAtLeastOneArgument, + }) + } + pub(crate) fn expected_specific_argument( &self, span: Span, @@ -313,16 +382,16 @@ impl<'f, 'sess: 'f, S: Stage> AcceptContext<'f, 'sess, S> { } impl<'f, 'sess, S: Stage> Deref for AcceptContext<'f, 'sess, S> { - type Target = FinalizeContext<'f, 'sess, S>; + type Target = SharedContext<'f, 'sess, S>; fn deref(&self) -> &Self::Target { - &self.finalize_cx + &self.shared } } impl<'f, 'sess, S: Stage> DerefMut for AcceptContext<'f, 'sess, S> { fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.finalize_cx + &mut self.shared } } @@ -330,7 +399,7 @@ impl<'f, 'sess, S: Stage> DerefMut for AcceptContext<'f, 'sess, S> { /// /// Gives [`AttributeParser`](crate::attributes::AttributeParser)s enough information to create /// errors, for example. -pub(crate) struct FinalizeContext<'p, 'sess, S: Stage> { +pub(crate) struct SharedContext<'p, 'sess, S: Stage> { /// The parse context, gives access to the session and the /// diagnostics context. pub(crate) cx: &'p mut AttributeParser<'sess, S>, @@ -339,10 +408,40 @@ pub(crate) struct FinalizeContext<'p, 'sess, S: Stage> { /// The id ([`NodeId`] if `S` is `Early`, [`HirId`] if `S` is `Late`) of the syntactical component this attribute was applied to pub(crate) target_id: S::Id, - pub(crate) emit_lint: &'p mut dyn FnMut(AttributeLint), + emit_lint: &'p mut dyn FnMut(AttributeLint), +} + +/// Context given to every attribute parser during finalization. +/// +/// Gives [`AttributeParser`](crate::attributes::AttributeParser)s enough information to create +/// errors, for example. +pub(crate) struct FinalizeContext<'p, 'sess, S: Stage> { + pub(crate) shared: SharedContext<'p, 'sess, S>, + + /// A list of all attribute on this syntax node. + /// + /// Useful for compatibility checks with other attributes in [`finalize`](crate::attributes::AttributeParser::finalize) + /// + /// Usually, you should use normal attribute parsing logic instead, + /// especially when making a *denylist* of other attributes. + pub(crate) all_attrs: &'p [PathParser<'p>], } impl<'p, 'sess: 'p, S: Stage> Deref for FinalizeContext<'p, 'sess, S> { + type Target = SharedContext<'p, 'sess, S>; + + fn deref(&self) -> &Self::Target { + &self.shared + } +} + +impl<'p, 'sess: 'p, S: Stage> DerefMut for FinalizeContext<'p, 'sess, S> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.shared + } +} + +impl<'p, 'sess: 'p, S: Stage> Deref for SharedContext<'p, 'sess, S> { type Target = AttributeParser<'sess, S>; fn deref(&self) -> &Self::Target { @@ -350,7 +449,7 @@ impl<'p, 'sess: 'p, S: Stage> Deref for FinalizeContext<'p, 'sess, S> { } } -impl<'p, 'sess: 'p, S: Stage> DerefMut for FinalizeContext<'p, 'sess, S> { +impl<'p, 'sess: 'p, S: Stage> DerefMut for SharedContext<'p, 'sess, S> { fn deref_mut(&mut self) -> &mut Self::Target { self.cx } @@ -365,8 +464,7 @@ pub enum OmitDoc { /// Context created once, for example as part of the ast lowering /// context, through which all attributes can be lowered. pub struct AttributeParser<'sess, S: Stage = Late> { - #[expect(dead_code)] // FIXME(jdonszelmann): needed later to verify we parsed all attributes - tools: Vec, + pub(crate) tools: Vec, features: Option<&'sess Features>, sess: &'sess Session, stage: PhantomData, @@ -420,19 +518,13 @@ impl<'sess> AttributeParser<'sess, Early> { parsed.pop() } - - pub fn new_early(sess: &'sess Session, features: &'sess Features, tools: Vec) -> Self { - Self { features: Some(features), tools, parse_only: None, sess, stage: PhantomData } - } -} - -impl<'sess> AttributeParser<'sess, Late> { - pub fn new(sess: &'sess Session, features: &'sess Features, tools: Vec) -> Self { - Self { features: Some(features), tools, parse_only: None, sess, stage: PhantomData } - } } impl<'sess, S: Stage> AttributeParser<'sess, S> { + pub fn new(sess: &'sess Session, features: &'sess Features, tools: Vec) -> Self { + Self { features: Some(features), tools, parse_only: None, sess, stage: PhantomData } + } + pub(crate) fn sess(&self) -> &'sess Session { &self.sess } @@ -460,6 +552,7 @@ impl<'sess, S: Stage> AttributeParser<'sess, S> { mut emit_lint: impl FnMut(AttributeLint), ) -> Vec { let mut attributes = Vec::new(); + let mut attr_paths = Vec::new(); for attr in attrs { // If we're only looking for a single attribute, skip all the ones we don't care about. @@ -503,6 +596,8 @@ impl<'sess, S: Stage> AttributeParser<'sess, S> { // })) // } ast::AttrKind::Normal(n) => { + attr_paths.push(PathParser::Ast(&n.item.path)); + let parser = MetaItemParser::from_attr(n, self.dcx()); let path = parser.path(); let args = parser.args(); @@ -511,7 +606,7 @@ impl<'sess, S: Stage> AttributeParser<'sess, S> { if let Some(accepts) = S::parsers().0.get(parts.as_slice()) { for (template, accept) in accepts { let mut cx: AcceptContext<'_, 'sess, S> = AcceptContext { - finalize_cx: FinalizeContext { + shared: SharedContext { cx: self, target_span, target_id, @@ -555,10 +650,13 @@ impl<'sess, S: Stage> AttributeParser<'sess, S> { let mut parsed_attributes = Vec::new(); for f in &S::parsers().1 { if let Some(attr) = f(&mut FinalizeContext { - cx: self, - target_span, - target_id, - emit_lint: &mut emit_lint, + shared: SharedContext { + cx: self, + target_span, + target_id, + emit_lint: &mut emit_lint, + }, + all_attrs: &attr_paths, }) { parsed_attributes.push(Attribute::Parsed(attr)); } diff --git a/compiler/rustc_attr_parsing/src/parser.rs b/compiler/rustc_attr_parsing/src/parser.rs index 1edbe3a9d27a..aecaae947c99 100644 --- a/compiler/rustc_attr_parsing/src/parser.rs +++ b/compiler/rustc_attr_parsing/src/parser.rs @@ -87,6 +87,14 @@ impl<'a> PathParser<'a> { pub fn word_is(&self, sym: Symbol) -> bool { self.word().map(|i| i.name == sym).unwrap_or(false) } + + /// Checks whether the first segments match the givens. + /// + /// Unlike [`segments_is`](Self::segments_is), + /// `self` may contain more segments than the number matched against. + pub fn starts_with(&self, segments: &[Symbol]) -> bool { + segments.len() < self.len() && self.segments().zip(segments).all(|(a, b)| a.name == *b) + } } impl Display for PathParser<'_> { @@ -161,9 +169,15 @@ impl<'a> ArgParser<'a> { } } - /// Asserts that there are no arguments - pub fn no_args(&self) -> bool { - matches!(self, Self::NoArgs) + /// Assert that there were no args. + /// If there were, get a span to the arguments + /// (to pass to [`AcceptContext::expected_no_args`](crate::context::AcceptContext::expected_no_args)). + pub fn no_args(&self) -> Result<(), Span> { + match self { + Self::NoArgs => Ok(()), + Self::List(args) => Err(args.span), + Self::NameValue(args) => Err(args.eq_span.to(args.value_span)), + } } } diff --git a/compiler/rustc_attr_parsing/src/session_diagnostics.rs b/compiler/rustc_attr_parsing/src/session_diagnostics.rs index 337921a318c3..7cfce5799792 100644 --- a/compiler/rustc_attr_parsing/src/session_diagnostics.rs +++ b/compiler/rustc_attr_parsing/src/session_diagnostics.rs @@ -436,6 +436,22 @@ pub(crate) struct IllFormedAttributeInput { pub suggestions: DiagArgValue, } +#[derive(Diagnostic)] +#[diag(attr_parsing_ill_formed_attribute_input)] +pub(crate) struct MustUseIllFormedAttributeInput { + #[primary_span] + pub span: Span, + pub num_suggestions: usize, + pub suggestions: DiagArgValue, +} + +#[derive(Diagnostic)] +#[diag(attr_parsing_null_on_export, code = E0648)] +pub(crate) struct NullOnExport { + #[primary_span] + pub span: Span, +} + #[derive(Diagnostic)] #[diag(attr_parsing_stability_outside_std, code = E0734)] pub(crate) struct StabilityOutsideStd { @@ -473,8 +489,21 @@ pub(crate) struct UnrecognizedReprHint { pub span: Span, } +#[derive(Diagnostic)] +#[diag(attr_parsing_naked_functions_incompatible_attribute, code = E0736)] +pub(crate) struct NakedFunctionIncompatibleAttribute { + #[primary_span] + #[label] + pub span: Span, + #[label(attr_parsing_naked_attribute)] + pub naked_span: Span, + pub attr: String, +} + pub(crate) enum AttributeParseErrorReason { + ExpectedNoArgs, ExpectedStringLiteral { byte_string: Option }, + ExpectedAtLeastOneArgument, ExpectedSingleArgument, ExpectedList, UnexpectedLiteral, @@ -518,6 +547,9 @@ impl<'a, G: EmissionGuarantee> Diagnostic<'a, G> for AttributeParseError { diag.span_label(self.span, "expected a single argument here"); diag.code(E0805); } + AttributeParseErrorReason::ExpectedAtLeastOneArgument => { + diag.span_label(self.span, "expected at least 1 argument here"); + } AttributeParseErrorReason::ExpectedList => { diag.span_label(self.span, "expected this to be a list"); } @@ -529,6 +561,10 @@ impl<'a, G: EmissionGuarantee> Diagnostic<'a, G> for AttributeParseError { diag.span_label(self.span, format!("didn't expect a literal here")); diag.code(E0565); } + AttributeParseErrorReason::ExpectedNoArgs => { + diag.span_label(self.span, format!("didn't expect any arguments here")); + diag.code(E0565); + } AttributeParseErrorReason::ExpectedNameValue(None) => { diag.span_label( self.span, diff --git a/compiler/rustc_borrowck/src/diagnostics/conflict_errors.rs b/compiler/rustc_borrowck/src/diagnostics/conflict_errors.rs index 98dc898db232..d1dac1c7145d 100644 --- a/compiler/rustc_borrowck/src/diagnostics/conflict_errors.rs +++ b/compiler/rustc_borrowck/src/diagnostics/conflict_errors.rs @@ -518,11 +518,11 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { } = move_spans && can_suggest_clone { - self.suggest_cloning(err, ty, expr, Some(move_spans)); + self.suggest_cloning(err, place.as_ref(), ty, expr, Some(move_spans)); } else if self.suggest_hoisting_call_outside_loop(err, expr) && can_suggest_clone { // The place where the type moves would be misleading to suggest clone. // #121466 - self.suggest_cloning(err, ty, expr, Some(move_spans)); + self.suggest_cloning(err, place.as_ref(), ty, expr, Some(move_spans)); } } @@ -1224,6 +1224,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { pub(crate) fn suggest_cloning( &self, err: &mut Diag<'_>, + place: PlaceRef<'tcx>, ty: Ty<'tcx>, expr: &'tcx hir::Expr<'tcx>, use_spans: Option>, @@ -1238,7 +1239,13 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { } if self.implements_clone(ty) { - self.suggest_cloning_inner(err, ty, expr); + if self.in_move_closure(expr) { + if let Some(name) = self.describe_place(place) { + self.suggest_clone_of_captured_var_in_move_closure(err, &name, use_spans); + } + } else { + self.suggest_cloning_inner(err, ty, expr); + } } else if let ty::Adt(def, args) = ty.kind() && def.did().as_local().is_some() && def.variants().iter().all(|variant| { @@ -1505,7 +1512,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { if let hir::ExprKind::AddrOf(_, _, borrowed_expr) = expr.kind && let Some(ty) = typeck_results.expr_ty_opt(borrowed_expr) { - self.suggest_cloning(&mut err, ty, borrowed_expr, Some(move_spans)); + self.suggest_cloning(&mut err, place.as_ref(), ty, borrowed_expr, Some(move_spans)); } else if typeck_results.expr_adjustments(expr).first().is_some_and(|adj| { matches!( adj.kind, @@ -1518,7 +1525,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { ) }) && let Some(ty) = typeck_results.expr_ty_opt(expr) { - self.suggest_cloning(&mut err, ty, expr, Some(move_spans)); + self.suggest_cloning(&mut err, place.as_ref(), ty, expr, Some(move_spans)); } } self.buffer_error(err); diff --git a/compiler/rustc_borrowck/src/diagnostics/explain_borrow.rs b/compiler/rustc_borrowck/src/diagnostics/explain_borrow.rs index 095c0df98acc..f9e52239d6f6 100644 --- a/compiler/rustc_borrowck/src/diagnostics/explain_borrow.rs +++ b/compiler/rustc_borrowck/src/diagnostics/explain_borrow.rs @@ -71,7 +71,6 @@ impl<'tcx> BorrowExplanation<'tcx> { ) { let tcx = cx.infcx.tcx; let body = cx.body; - let local_names = &cx.local_names; if let Some(span) = borrow_span { let def_id = body.source.def_id(); @@ -220,7 +219,7 @@ impl<'tcx> BorrowExplanation<'tcx> { _ => ("destructor", format!("type `{}`", local_decl.ty)), }; - match local_names[dropped_local] { + match cx.local_name(dropped_local) { Some(local_name) if !local_decl.from_compiler_desugaring() => { let message = format!( "{borrow_desc}borrow might be used here, when `{local_name}` is dropped \ @@ -670,10 +669,10 @@ impl<'tcx> MirBorrowckCtxt<'_, '_, 'tcx> { Some(Cause::DropVar(local, location)) => { let mut should_note_order = false; - if self.local_names[local].is_some() + if self.local_name(local).is_some() && let Some((WriteKind::StorageDeadOrDrop, place)) = kind_place && let Some(borrowed_local) = place.as_local() - && self.local_names[borrowed_local].is_some() + && self.local_name(borrowed_local).is_some() && local != borrowed_local { should_note_order = true; @@ -748,7 +747,7 @@ impl<'tcx> MirBorrowckCtxt<'_, '_, 'tcx> { Operand::Copy(place) | Operand::Move(place) => { if let Some(l) = place.as_local() { let local_decl = &self.body.local_decls[l]; - if self.local_names[l].is_none() { + if self.local_name(l).is_none() { local_decl.source_info.span } else { span @@ -793,7 +792,7 @@ impl<'tcx> MirBorrowckCtxt<'_, '_, 'tcx> { Operand::Copy(place) | Operand::Move(place) => { if let Some(l) = place.as_local() { let local_decl = &self.body.local_decls[l]; - if self.local_names[l].is_none() { + if self.local_name(l).is_none() { local_decl.source_info.span } else { span diff --git a/compiler/rustc_borrowck/src/diagnostics/mod.rs b/compiler/rustc_borrowck/src/diagnostics/mod.rs index 5e3f3ffa2ea8..9ad91d605a77 100644 --- a/compiler/rustc_borrowck/src/diagnostics/mod.rs +++ b/compiler/rustc_borrowck/src/diagnostics/mod.rs @@ -7,17 +7,17 @@ use rustc_data_structures::fx::FxIndexMap; use rustc_errors::{Applicability, Diag, EmissionGuarantee, MultiSpan, listify}; use rustc_hir::def::{CtorKind, Namespace}; use rustc_hir::{self as hir, CoroutineKind, LangItem}; -use rustc_index::IndexSlice; +use rustc_index::{IndexSlice, IndexVec}; use rustc_infer::infer::{BoundRegionConversionTime, NllRegionVariableOrigin}; use rustc_infer::traits::SelectionError; -use rustc_middle::bug; use rustc_middle::mir::{ AggregateKind, CallSource, ConstOperand, ConstraintCategory, FakeReadCause, Local, LocalInfo, LocalKind, Location, Operand, Place, PlaceRef, PlaceTy, ProjectionElem, Rvalue, Statement, - StatementKind, Terminator, TerminatorKind, find_self_call, + StatementKind, Terminator, TerminatorKind, VarDebugInfoContents, find_self_call, }; use rustc_middle::ty::print::Print; use rustc_middle::ty::{self, Ty, TyCtxt}; +use rustc_middle::{bug, span_bug}; use rustc_mir_dataflow::move_paths::{InitLocation, LookupResult, MoveOutIndex}; use rustc_span::def_id::LocalDefId; use rustc_span::source_map::Spanned; @@ -190,6 +190,36 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { ) -> Option<&(PlaceRef<'tcx>, Diag<'infcx>)> { self.diags_buffer.buffered_move_errors.get(move_out_indices) } + + /// Uses `body.var_debug_info` to find the symbol + fn local_name(&self, index: Local) -> Option { + *self.local_names().get(index)? + } + + fn local_names(&self) -> &IndexSlice> { + self.local_names.get_or_init(|| { + let mut local_names = IndexVec::from_elem(None, &self.body.local_decls); + for var_debug_info in &self.body.var_debug_info { + if let VarDebugInfoContents::Place(place) = var_debug_info.value { + if let Some(local) = place.as_local() { + if let Some(prev_name) = local_names[local] + && var_debug_info.name != prev_name + { + span_bug!( + var_debug_info.source_info.span, + "local {:?} has many names (`{}` vs `{}`)", + local, + prev_name, + var_debug_info.name + ); + } + local_names[local] = Some(var_debug_info.name); + } + } + } + local_names + }) + } } impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { @@ -430,7 +460,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { /// a name, or its name was generated by the compiler, then `Err` is returned fn append_local_to_string(&self, local: Local, buf: &mut String) -> Result<(), ()> { let decl = &self.body.local_decls[local]; - match self.local_names[local] { + match self.local_name(local) { Some(name) if !decl.from_compiler_desugaring() => { buf.push_str(name.as_str()); Ok(()) @@ -1254,8 +1284,14 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { && !spans.is_empty() { let mut span: MultiSpan = spans.clone().into(); + err.arg("ty", param_ty.to_string()); + let msg = err.dcx.eagerly_translate_to_string( + fluent::borrowck_moved_a_fn_once_in_call_def, + err.args.iter(), + ); + err.remove_arg("ty"); for sp in spans { - span.push_span_label(sp, fluent::borrowck_moved_a_fn_once_in_call_def); + span.push_span_label(sp, msg.clone()); } span.push_span_label( fn_call_span, @@ -1500,4 +1536,9 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { } } } + + /// Skip over locals that begin with an underscore or have no name + pub(crate) fn local_excluded_from_unused_mut_lint(&self, index: Local) -> bool { + self.local_name(index).is_none_or(|name| name.as_str().starts_with('_')) + } } diff --git a/compiler/rustc_borrowck/src/diagnostics/move_errors.rs b/compiler/rustc_borrowck/src/diagnostics/move_errors.rs index 0394a42ea9c7..92ca868eb992 100644 --- a/compiler/rustc_borrowck/src/diagnostics/move_errors.rs +++ b/compiler/rustc_borrowck/src/diagnostics/move_errors.rs @@ -325,25 +325,17 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { self.cannot_move_out_of(span, &description) } - fn suggest_clone_of_captured_var_in_move_closure( + pub(in crate::diagnostics) fn suggest_clone_of_captured_var_in_move_closure( &self, err: &mut Diag<'_>, - upvar_hir_id: HirId, upvar_name: &str, use_spans: Option>, ) { let tcx = self.infcx.tcx; - let typeck_results = tcx.typeck(self.mir_def_id()); let Some(use_spans) = use_spans else { return }; // We only care about the case where a closure captured a binding. let UseSpans::ClosureUse { args_span, .. } = use_spans else { return }; let Some(body_id) = tcx.hir_node(self.mir_hir_id()).body_id() else { return }; - // Fetch the type of the expression corresponding to the closure-captured binding. - let Some(captured_ty) = typeck_results.node_type_opt(upvar_hir_id) else { return }; - if !self.implements_clone(captured_ty) { - // We only suggest cloning the captured binding if the type can actually be cloned. - return; - }; // Find the closure that captured the binding. let mut expr_finder = FindExprBySpan::new(args_span, tcx); expr_finder.include_closures = true; @@ -396,7 +388,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { .indentation_before(stmt.span) .unwrap_or_else(|| " ".to_string()); err.multipart_suggestion_verbose( - "clone the value before moving it into the closure", + "consider cloning the value before moving it into the closure", vec![ ( stmt.span.shrink_to_lo(), @@ -426,7 +418,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { .indentation_before(closure_expr.span) .unwrap_or_else(|| " ".to_string()); err.multipart_suggestion_verbose( - "clone the value before moving it into the closure", + "consider cloning the value before moving it into the closure", vec![ ( closure_expr.span.shrink_to_lo(), @@ -465,11 +457,15 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { if let PlaceRef { local, projection: [] } = deref_base { let decl = &self.body.local_decls[local]; + let local_name = self.local_name(local).map(|sym| format!("`{sym}`")); if decl.is_ref_for_guard() { return self .cannot_move_out_of( span, - &format!("`{}` in pattern guard", self.local_names[local].unwrap()), + &format!( + "{} in pattern guard", + local_name.as_deref().unwrap_or("the place") + ), ) .with_note( "variables bound in patterns cannot be moved from \ @@ -519,20 +515,12 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { ); let closure_span = tcx.def_span(def_id); - let mut err = self - .cannot_move_out_of(span, &place_description) + self.cannot_move_out_of(span, &place_description) .with_span_label(upvar_span, "captured outer variable") .with_span_label( closure_span, format!("captured by this `{closure_kind}` closure"), - ); - self.suggest_clone_of_captured_var_in_move_closure( - &mut err, - upvar_hir_id, - &upvar_name, - use_spans, - ); - err + ) } _ => { let source = self.borrowed_content_source(deref_base); @@ -593,7 +581,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { }; if let Some(expr) = self.find_expr(span) { - self.suggest_cloning(err, place_ty, expr, None); + self.suggest_cloning(err, move_from.as_ref(), place_ty, expr, None); } err.subdiagnostic(crate::session_diagnostics::TypeNoCopy::Label { @@ -625,7 +613,13 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { }; if let Some(expr) = self.find_expr(use_span) { - self.suggest_cloning(err, place_ty, expr, Some(use_spans)); + self.suggest_cloning( + err, + original_path.as_ref(), + place_ty, + expr, + Some(use_spans), + ); } err.subdiagnostic(crate::session_diagnostics::TypeNoCopy::Label { @@ -825,16 +819,17 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { } if binds_to.len() == 1 { - let place_desc = &format!("`{}`", self.local_names[*local].unwrap()); + let place_desc = self.local_name(*local).map(|sym| format!("`{sym}`")); if let Some(expr) = self.find_expr(binding_span) { - self.suggest_cloning(err, bind_to.ty, expr, None); + let local_place: PlaceRef<'tcx> = (*local).into(); + self.suggest_cloning(err, local_place, bind_to.ty, expr, None); } err.subdiagnostic(crate::session_diagnostics::TypeNoCopy::Label { is_partial_move: false, ty: bind_to.ty, - place: place_desc, + place: place_desc.as_deref().unwrap_or("the place"), span: binding_span, }); } diff --git a/compiler/rustc_borrowck/src/diagnostics/mutability_errors.rs b/compiler/rustc_borrowck/src/diagnostics/mutability_errors.rs index a5c9bad3ac2d..fd8a2a6bc354 100644 --- a/compiler/rustc_borrowck/src/diagnostics/mutability_errors.rs +++ b/compiler/rustc_borrowck/src/diagnostics/mutability_errors.rs @@ -60,7 +60,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { if access_place.as_local().is_some() { reason = ", as it is not declared as mutable".to_string(); } else { - let name = self.local_names[local].expect("immutable unnamed local"); + let name = self.local_name(local).expect("immutable unnamed local"); reason = format!(", as `{name}` is not declared as mutable"); } } @@ -285,7 +285,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { .body .local_decls .get(local) - .is_some_and(|l| mut_borrow_of_mutable_ref(l, self.local_names[local])) => + .is_some_and(|l| mut_borrow_of_mutable_ref(l, self.local_name(local))) => { let decl = &self.body.local_decls[local]; err.span_label(span, format!("cannot {act}")); @@ -481,7 +481,7 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { let (pointer_sigil, pointer_desc) = if local_decl.ty.is_ref() { ("&", "reference") } else { ("*const", "pointer") }; - match self.local_names[local] { + match self.local_name(local) { Some(name) if !local_decl.from_compiler_desugaring() => { err.span_label( span, diff --git a/compiler/rustc_borrowck/src/diagnostics/region_errors.rs b/compiler/rustc_borrowck/src/diagnostics/region_errors.rs index 3bec07afa0fe..a611557dc924 100644 --- a/compiler/rustc_borrowck/src/diagnostics/region_errors.rs +++ b/compiler/rustc_borrowck/src/diagnostics/region_errors.rs @@ -10,7 +10,7 @@ use rustc_hir::def::Res::Def; use rustc_hir::def_id::DefId; use rustc_hir::intravisit::VisitorExt; use rustc_hir::{PolyTraitRef, TyKind, WhereBoundPredicate}; -use rustc_infer::infer::{NllRegionVariableOrigin, RelateParamBound}; +use rustc_infer::infer::{NllRegionVariableOrigin, SubregionOrigin}; use rustc_middle::bug; use rustc_middle::hir::place::PlaceBase; use rustc_middle::mir::{AnnotationSource, ConstraintCategory, ReturnConstraint}; @@ -329,7 +329,8 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { self.infcx.tcx, type_test.generic_kind.to_ty(self.infcx.tcx), ); - let origin = RelateParamBound(type_test_span, generic_ty, None); + let origin = + SubregionOrigin::RelateParamBound(type_test_span, generic_ty, None); self.buffer_error(self.infcx.err_ctxt().construct_generic_bound_failure( self.body.source.def_id().expect_local(), type_test_span, @@ -664,14 +665,14 @@ impl<'infcx, 'tcx> MirBorrowckCtxt<'_, 'infcx, 'tcx> { let fr_name_and_span = self.regioncx.get_var_name_and_span_for_region( self.infcx.tcx, self.body, - &self.local_names, + &self.local_names(), &self.upvars, errci.fr, ); let outlived_fr_name_and_span = self.regioncx.get_var_name_and_span_for_region( self.infcx.tcx, self.body, - &self.local_names, + &self.local_names(), &self.upvars, errci.outlived_fr, ); diff --git a/compiler/rustc_borrowck/src/diagnostics/region_name.rs b/compiler/rustc_borrowck/src/diagnostics/region_name.rs index 487f78058a8c..1ad629ad167d 100644 --- a/compiler/rustc_borrowck/src/diagnostics/region_name.rs +++ b/compiler/rustc_borrowck/src/diagnostics/region_name.rs @@ -399,7 +399,7 @@ impl<'tcx> MirBorrowckCtxt<'_, '_, 'tcx> { [implicit_inputs + argument_index]; let (_, span) = self.regioncx.get_argument_name_and_span_for_region( self.body, - &self.local_names, + self.local_names(), argument_index, ); @@ -973,7 +973,7 @@ impl<'tcx> MirBorrowckCtxt<'_, '_, 'tcx> { { let (arg_name, arg_span) = self.regioncx.get_argument_name_and_span_for_region( self.body, - &self.local_names, + self.local_names(), arg_index, ); let region_name = self.synthesize_region_name(); diff --git a/compiler/rustc_borrowck/src/lib.rs b/compiler/rustc_borrowck/src/lib.rs index 4d85f1090201..82b300dcb17d 100644 --- a/compiler/rustc_borrowck/src/lib.rs +++ b/compiler/rustc_borrowck/src/lib.rs @@ -16,7 +16,7 @@ // tidy-alphabetical-end use std::borrow::Cow; -use std::cell::RefCell; +use std::cell::{OnceCell, RefCell}; use std::marker::PhantomData; use std::ops::{ControlFlow, Deref}; @@ -391,7 +391,7 @@ fn do_mir_borrowck<'tcx>( used_mut_upvars: SmallVec::new(), borrow_set: &borrow_set, upvars: &[], - local_names: IndexVec::from_elem(None, &promoted_body.local_decls), + local_names: OnceCell::from(IndexVec::from_elem(None, &promoted_body.local_decls)), region_names: RefCell::default(), next_region_name: RefCell::new(1), polonius_output: None, @@ -414,26 +414,6 @@ fn do_mir_borrowck<'tcx>( promoted_mbcx.report_move_errors(); } - let mut local_names = IndexVec::from_elem(None, &body.local_decls); - for var_debug_info in &body.var_debug_info { - if let VarDebugInfoContents::Place(place) = var_debug_info.value { - if let Some(local) = place.as_local() { - if let Some(prev_name) = local_names[local] - && var_debug_info.name != prev_name - { - span_bug!( - var_debug_info.source_info.span, - "local {:?} has many names (`{}` vs `{}`)", - local, - prev_name, - var_debug_info.name - ); - } - local_names[local] = Some(var_debug_info.name); - } - } - } - let mut mbcx = MirBorrowckCtxt { root_cx, infcx: &infcx, @@ -450,7 +430,7 @@ fn do_mir_borrowck<'tcx>( used_mut_upvars: SmallVec::new(), borrow_set: &borrow_set, upvars: tcx.closure_captures(def), - local_names, + local_names: OnceCell::new(), region_names: RefCell::default(), next_region_name: RefCell::new(1), move_errors: Vec::new(), @@ -682,7 +662,7 @@ struct MirBorrowckCtxt<'a, 'infcx, 'tcx> { upvars: &'tcx [&'tcx ty::CapturedPlace<'tcx>], /// Names of local (user) variables (extracted from `var_debug_info`). - local_names: IndexVec>, + local_names: OnceCell>>, /// Record the region names generated for each region in the given /// MIR def so that we can reuse them later in help/error messages. @@ -2610,7 +2590,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { }; // Skip over locals that begin with an underscore or have no name - if self.local_names[local].is_none_or(|name| name.as_str().starts_with('_')) { + if self.local_excluded_from_unused_mut_lint(local) { continue; } diff --git a/compiler/rustc_borrowck/src/type_check/constraint_conversion.rs b/compiler/rustc_borrowck/src/type_check/constraint_conversion.rs index 0a114467f432..8ed552cfa4f0 100644 --- a/compiler/rustc_borrowck/src/type_check/constraint_conversion.rs +++ b/compiler/rustc_borrowck/src/type_check/constraint_conversion.rs @@ -3,7 +3,7 @@ use rustc_infer::infer::canonical::QueryRegionConstraints; use rustc_infer::infer::outlives::env::RegionBoundPairs; use rustc_infer::infer::outlives::obligations::{TypeOutlives, TypeOutlivesDelegate}; use rustc_infer::infer::region_constraints::{GenericKind, VerifyBound}; -use rustc_infer::infer::{self, InferCtxt, SubregionOrigin}; +use rustc_infer::infer::{InferCtxt, SubregionOrigin}; use rustc_infer::traits::query::type_op::DeeplyNormalize; use rustc_middle::bug; use rustc_middle::ty::{ @@ -172,7 +172,7 @@ impl<'a, 'tcx> ConstraintConversion<'a, 'tcx> { ty::Region::new_var(tcx, universal_regions.implicit_region_bound()); // we don't actually use this for anything, but // the `TypeOutlives` code needs an origin. - let origin = infer::RelateParamBound(self.span, t1, None); + let origin = SubregionOrigin::RelateParamBound(self.span, t1, None); TypeOutlives::new( &mut *self, tcx, diff --git a/compiler/rustc_borrowck/src/type_check/input_output.rs b/compiler/rustc_borrowck/src/type_check/input_output.rs index 0c46e0c0c220..99392ea19151 100644 --- a/compiler/rustc_borrowck/src/type_check/input_output.rs +++ b/compiler/rustc_borrowck/src/type_check/input_output.rs @@ -66,10 +66,9 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> { Ty::new_tup(self.tcx(), user_provided_sig.inputs()), args.tupled_upvars_ty(), args.coroutine_captures_by_ref_ty(), - self.infcx - .next_region_var(RegionVariableOrigin::MiscVariable(self.body.span), || { - RegionCtxt::Unknown - }), + self.infcx.next_region_var(RegionVariableOrigin::Misc(self.body.span), || { + RegionCtxt::Unknown + }), ); let next_ty_var = || self.infcx.next_ty_var(self.body.span); diff --git a/compiler/rustc_borrowck/src/type_check/mod.rs b/compiler/rustc_borrowck/src/type_check/mod.rs index 9b6dcfd17c62..e37b5a33af82 100644 --- a/compiler/rustc_borrowck/src/type_check/mod.rs +++ b/compiler/rustc_borrowck/src/type_check/mod.rs @@ -16,7 +16,7 @@ use rustc_infer::infer::canonical::QueryRegionConstraints; use rustc_infer::infer::outlives::env::RegionBoundPairs; use rustc_infer::infer::region_constraints::RegionConstraintData; use rustc_infer::infer::{ - BoundRegion, BoundRegionConversionTime, InferCtxt, NllRegionVariableOrigin, + BoundRegionConversionTime, InferCtxt, NllRegionVariableOrigin, RegionVariableOrigin, }; use rustc_infer::traits::PredicateObligations; use rustc_middle::mir::visit::{NonMutatingUseContext, PlaceContext, Visitor}; @@ -794,7 +794,7 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> { }; self.infcx.next_region_var( - BoundRegion( + RegionVariableOrigin::BoundRegion( term.source_info.span, br.kind, BoundRegionConversionTime::FnCall, diff --git a/compiler/rustc_builtin_macros/messages.ftl b/compiler/rustc_builtin_macros/messages.ftl index d32e6f1558e4..3594c7ec2104 100644 --- a/compiler/rustc_builtin_macros/messages.ftl +++ b/compiler/rustc_builtin_macros/messages.ftl @@ -104,6 +104,8 @@ builtin_macros_concat_bytes_bad_repeat = repeat count is not a positive number builtin_macros_concat_bytes_invalid = cannot concatenate {$lit_kind} literals .byte_char = try using a byte character .byte_str = try using a byte string + .c_str = try using a null-terminated byte string + .c_str_note = concatenating C strings is ambiguous about including the '\0' .number_array = try wrapping the number in an array builtin_macros_concat_bytes_missing_literal = expected a byte literal @@ -116,10 +118,6 @@ builtin_macros_concat_bytes_oob = numeric literal is out of bounds builtin_macros_concat_bytestr = cannot concatenate a byte string literal builtin_macros_concat_c_str_lit = cannot concatenate a C string literal -builtin_macros_concat_idents_ident_args = `concat_idents!()` requires ident args - -builtin_macros_concat_idents_missing_args = `concat_idents!()` takes 1 or more arguments -builtin_macros_concat_idents_missing_comma = `concat_idents!()` expecting comma builtin_macros_concat_missing_literal = expected a literal .note = only literals (like `"foo"`, `-42` and `3.14`) can be passed to `concat!()` diff --git a/compiler/rustc_builtin_macros/src/alloc_error_handler.rs b/compiler/rustc_builtin_macros/src/alloc_error_handler.rs index ea406e706660..e75bc944d7ec 100644 --- a/compiler/rustc_builtin_macros/src/alloc_error_handler.rs +++ b/compiler/rustc_builtin_macros/src/alloc_error_handler.rs @@ -62,8 +62,8 @@ pub(crate) fn expand( fn generate_handler(cx: &ExtCtxt<'_>, handler: Ident, span: Span, sig_span: Span) -> Stmt { let usize = cx.path_ident(span, Ident::new(sym::usize, span)); let ty_usize = cx.ty_path(usize); - let size = Ident::from_str_and_span("size", span); - let align = Ident::from_str_and_span("align", span); + let size = Ident::new(sym::size, span); + let align = Ident::new(sym::align, span); let layout_new = cx.std_path(&[sym::alloc, sym::Layout, sym::from_size_align_unchecked]); let layout_new = cx.expr_path(cx.path(span, layout_new)); diff --git a/compiler/rustc_builtin_macros/src/autodiff.rs b/compiler/rustc_builtin_macros/src/autodiff.rs index dc3bb8ab52a5..df1b1eb60e18 100644 --- a/compiler/rustc_builtin_macros/src/autodiff.rs +++ b/compiler/rustc_builtin_macros/src/autodiff.rs @@ -652,8 +652,10 @@ mod llvm_enzyme { exprs = ecx.expr_call(new_decl_span, bb_call_expr, thin_vec![exprs]); } else { let q = QSelf { ty: d_ret_ty, path_span: span, position: 0 }; - let y = - ExprKind::Path(Some(P(q)), ecx.path_ident(span, Ident::from_str("default"))); + let y = ExprKind::Path( + Some(P(q)), + ecx.path_ident(span, Ident::with_dummy_span(kw::Default)), + ); let default_call_expr = ecx.expr(span, y); let default_call_expr = ecx.expr_call(new_decl_span, default_call_expr, thin_vec![]); diff --git a/compiler/rustc_builtin_macros/src/cfg_eval.rs b/compiler/rustc_builtin_macros/src/cfg_eval.rs index fe44350863c9..ec3b87467a9b 100644 --- a/compiler/rustc_builtin_macros/src/cfg_eval.rs +++ b/compiler/rustc_builtin_macros/src/cfg_eval.rs @@ -161,7 +161,7 @@ impl MutVisitor for CfgEval<'_> { } #[instrument(level = "trace", skip(self))] - fn visit_method_receiver_expr(&mut self, expr: &mut P) { + fn visit_method_receiver_expr(&mut self, expr: &mut ast::Expr) { self.0.configure_expr(expr, true); mut_visit::walk_expr(self, expr); } diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 456f2b9ab31d..92d011fb9d1f 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -1,6 +1,6 @@ use rustc_ast::ptr::P; use rustc_ast::tokenstream::TokenStream; -use rustc_ast::{ExprKind, LitIntType, LitKind, UintTy, token}; +use rustc_ast::{ExprKind, LitIntType, LitKind, StrStyle, UintTy, token}; use rustc_expand::base::{DummyResult, ExpandResult, ExtCtxt, MacEager, MacroExpanderResult}; use rustc_session::errors::report_lit_error; use rustc_span::{ErrorGuaranteed, Span}; @@ -21,15 +21,32 @@ fn invalid_type_err( let snippet = cx.sess.source_map().span_to_snippet(span).ok(); let dcx = cx.dcx(); match LitKind::from_token_lit(token_lit) { - Ok(LitKind::CStr(_, _)) => { + Ok(LitKind::CStr(_, style)) => { // Avoid ambiguity in handling of terminal `NUL` by refusing to // concatenate C string literals as bytes. - dcx.emit_err(errors::ConcatCStrLit { span }) + let sugg = if let Some(mut as_bstr) = snippet + && style == StrStyle::Cooked + && as_bstr.starts_with('c') + && as_bstr.ends_with('"') + { + // Suggest`c"foo"` -> `b"foo\0"` if we can + as_bstr.replace_range(0..1, "b"); + as_bstr.pop(); + as_bstr.push_str(r#"\0""#); + Some(ConcatBytesInvalidSuggestion::CStrLit { span, as_bstr }) + } else { + // No suggestion for a missing snippet, raw strings, or if for some reason we have + // a span that doesn't match `c"foo"` (possible if a proc macro assigns a span + // that doesn't actually point to a C string). + None + }; + // We can only provide a suggestion if we have a snip and it is not a raw string + dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "C string", sugg, cs_note: Some(()) }) } Ok(LitKind::Char(_)) => { let sugg = snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet }); - dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "character", sugg }) + dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "character", sugg, cs_note: None }) } Ok(LitKind::Str(_, _)) => { // suggestion would be invalid if we are nested @@ -38,18 +55,21 @@ fn invalid_type_err( } else { None }; - dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "string", sugg }) + dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "string", sugg, cs_note: None }) } Ok(LitKind::Float(_, _)) => { - dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "float", sugg: None }) - } - Ok(LitKind::Bool(_)) => { - dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "boolean", sugg: None }) + dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "float", sugg: None, cs_note: None }) } + Ok(LitKind::Bool(_)) => dcx.emit_err(ConcatBytesInvalid { + span, + lit_kind: "boolean", + sugg: None, + cs_note: None, + }), Ok(LitKind::Int(_, _)) if !is_nested => { let sugg = snippet.map(|snippet| ConcatBytesInvalidSuggestion::IntLit { span, snippet }); - dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "numeric", sugg }) + dcx.emit_err(ConcatBytesInvalid { span, lit_kind: "numeric", sugg, cs_note: None }) } Ok(LitKind::Int(val, LitIntType::Unsuffixed | LitIntType::Unsigned(UintTy::U8))) => { assert!(val.get() > u8::MAX.into()); // must be an error diff --git a/compiler/rustc_builtin_macros/src/concat_idents.rs b/compiler/rustc_builtin_macros/src/concat_idents.rs deleted file mode 100644 index a721f5b84c55..000000000000 --- a/compiler/rustc_builtin_macros/src/concat_idents.rs +++ /dev/null @@ -1,71 +0,0 @@ -use rustc_ast::ptr::P; -use rustc_ast::token::{self, Token}; -use rustc_ast::tokenstream::{TokenStream, TokenTree}; -use rustc_ast::{AttrVec, DUMMY_NODE_ID, Expr, ExprKind, Path, Ty, TyKind}; -use rustc_expand::base::{DummyResult, ExpandResult, ExtCtxt, MacResult, MacroExpanderResult}; -use rustc_span::{Ident, Span, Symbol}; - -use crate::errors; - -pub(crate) fn expand_concat_idents<'cx>( - cx: &'cx mut ExtCtxt<'_>, - sp: Span, - tts: TokenStream, -) -> MacroExpanderResult<'cx> { - if tts.is_empty() { - let guar = cx.dcx().emit_err(errors::ConcatIdentsMissingArgs { span: sp }); - return ExpandResult::Ready(DummyResult::any(sp, guar)); - } - - let mut res_str = String::new(); - for (i, e) in tts.iter().enumerate() { - if i & 1 == 1 { - match e { - TokenTree::Token(Token { kind: token::Comma, .. }, _) => {} - _ => { - let guar = cx.dcx().emit_err(errors::ConcatIdentsMissingComma { span: sp }); - return ExpandResult::Ready(DummyResult::any(sp, guar)); - } - } - } else { - if let TokenTree::Token(token, _) = e { - if let Some((ident, _)) = token.ident() { - res_str.push_str(ident.name.as_str()); - continue; - } - } - - let guar = cx.dcx().emit_err(errors::ConcatIdentsIdentArgs { span: sp }); - return ExpandResult::Ready(DummyResult::any(sp, guar)); - } - } - - let ident = Ident::new(Symbol::intern(&res_str), cx.with_call_site_ctxt(sp)); - - struct ConcatIdentsResult { - ident: Ident, - } - - impl MacResult for ConcatIdentsResult { - fn make_expr(self: Box) -> Option> { - Some(P(Expr { - id: DUMMY_NODE_ID, - kind: ExprKind::Path(None, Path::from_ident(self.ident)), - span: self.ident.span, - attrs: AttrVec::new(), - tokens: None, - })) - } - - fn make_ty(self: Box) -> Option> { - Some(P(Ty { - id: DUMMY_NODE_ID, - kind: TyKind::Path(None, Path::from_ident(self.ident)), - span: self.ident.span, - tokens: None, - })) - } - } - - ExpandResult::Ready(Box::new(ConcatIdentsResult { ident })) -} diff --git a/compiler/rustc_builtin_macros/src/errors.rs b/compiler/rustc_builtin_macros/src/errors.rs index 3a2e96a5e5af..a5ee7349fc68 100644 --- a/compiler/rustc_builtin_macros/src/errors.rs +++ b/compiler/rustc_builtin_macros/src/errors.rs @@ -215,6 +215,8 @@ pub(crate) struct ConcatBytesInvalid { pub(crate) lit_kind: &'static str, #[subdiagnostic] pub(crate) sugg: Option, + #[note(builtin_macros_c_str_note)] + pub(crate) cs_note: Option<()>, } #[derive(Subdiagnostic)] @@ -239,6 +241,13 @@ pub(crate) enum ConcatBytesInvalidSuggestion { span: Span, snippet: String, }, + #[note(builtin_macros_c_str_note)] + #[suggestion(builtin_macros_c_str, code = "{as_bstr}", applicability = "machine-applicable")] + CStrLit { + #[primary_span] + span: Span, + as_bstr: String, + }, #[suggestion( builtin_macros_number_array, code = "[{snippet}]", @@ -290,27 +299,6 @@ pub(crate) struct ConcatBytesBadRepeat { pub(crate) span: Span, } -#[derive(Diagnostic)] -#[diag(builtin_macros_concat_idents_missing_args)] -pub(crate) struct ConcatIdentsMissingArgs { - #[primary_span] - pub(crate) span: Span, -} - -#[derive(Diagnostic)] -#[diag(builtin_macros_concat_idents_missing_comma)] -pub(crate) struct ConcatIdentsMissingComma { - #[primary_span] - pub(crate) span: Span, -} - -#[derive(Diagnostic)] -#[diag(builtin_macros_concat_idents_ident_args)] -pub(crate) struct ConcatIdentsIdentArgs { - #[primary_span] - pub(crate) span: Span, -} - #[derive(Diagnostic)] #[diag(builtin_macros_bad_derive_target, code = E0774)] pub(crate) struct BadDeriveTarget { @@ -672,6 +660,7 @@ impl Subdiagnostic for FormatUnusedArg { fn add_to_diag(self, diag: &mut Diag<'_, G>) { diag.arg("named", self.named); let msg = diag.eagerly_translate(crate::fluent_generated::builtin_macros_format_unused_arg); + diag.remove_arg("named"); diag.span_label(self.span, msg); } } diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs index 39f9d5f90051..6785cb6aef5a 100644 --- a/compiler/rustc_builtin_macros/src/format.rs +++ b/compiler/rustc_builtin_macros/src/format.rs @@ -606,6 +606,7 @@ fn make_format_args( template, arguments: args, uncooked_fmt_str, + is_source_literal, })) } diff --git a/compiler/rustc_builtin_macros/src/lib.rs b/compiler/rustc_builtin_macros/src/lib.rs index 9e7d0ec9e814..9b6dea214385 100644 --- a/compiler/rustc_builtin_macros/src/lib.rs +++ b/compiler/rustc_builtin_macros/src/lib.rs @@ -36,7 +36,6 @@ mod cfg_eval; mod compile_error; mod concat; mod concat_bytes; -mod concat_idents; mod define_opaque; mod derive; mod deriving; @@ -84,7 +83,6 @@ pub fn register_builtin_macros(resolver: &mut dyn ResolverExpand) { compile_error: compile_error::expand_compile_error, concat: concat::expand_concat, concat_bytes: concat_bytes::expand_concat_bytes, - concat_idents: concat_idents::expand_concat_idents, const_format_args: format::expand_format_args, core_panic: edition_panic::expand_panic, env: env::expand_env, diff --git a/compiler/rustc_builtin_macros/src/proc_macro_harness.rs b/compiler/rustc_builtin_macros/src/proc_macro_harness.rs index daf480a9ce47..42b7e0e06d1f 100644 --- a/compiler/rustc_builtin_macros/src/proc_macro_harness.rs +++ b/compiler/rustc_builtin_macros/src/proc_macro_harness.rs @@ -56,7 +56,7 @@ pub fn inject( is_test_crate: bool, dcx: DiagCtxtHandle<'_>, ) { - let ecfg = ExpansionConfig::default("proc_macro".to_string(), features); + let ecfg = ExpansionConfig::default(sym::proc_macro, features); let mut cx = ExtCtxt::new(sess, ecfg, resolver, None); let mut collect = CollectProcMacros { diff --git a/compiler/rustc_builtin_macros/src/standard_library_imports.rs b/compiler/rustc_builtin_macros/src/standard_library_imports.rs index a1ee53b7ca21..682e7c9b17ae 100644 --- a/compiler/rustc_builtin_macros/src/standard_library_imports.rs +++ b/compiler/rustc_builtin_macros/src/standard_library_imports.rs @@ -36,7 +36,7 @@ pub fn inject( let span = DUMMY_SP.with_def_site_ctxt(expn_id.to_expn_id()); let call_site = DUMMY_SP.with_call_site_ctxt(expn_id.to_expn_id()); - let ecfg = ExpansionConfig::default("std_lib_injection".to_string(), features); + let ecfg = ExpansionConfig::default(sym::std_lib_injection, features); let cx = ExtCtxt::new(sess, ecfg, resolver, None); let ident_span = if edition >= Edition2018 { span } else { call_site }; diff --git a/compiler/rustc_builtin_macros/src/test_harness.rs b/compiler/rustc_builtin_macros/src/test_harness.rs index 0bc313cbdacb..111c85d49eb0 100644 --- a/compiler/rustc_builtin_macros/src/test_harness.rs +++ b/compiler/rustc_builtin_macros/src/test_harness.rs @@ -6,7 +6,7 @@ use rustc_ast as ast; use rustc_ast::entry::EntryPointType; use rustc_ast::mut_visit::*; use rustc_ast::ptr::P; -use rustc_ast::visit::{Visitor, walk_item}; +use rustc_ast::visit::Visitor; use rustc_ast::{ModKind, attr}; use rustc_errors::DiagCtxtHandle; use rustc_expand::base::{ExtCtxt, ResolverExpand}; @@ -146,11 +146,11 @@ impl<'a> MutVisitor for TestHarnessGenerator<'a> { ) = item.kind { let prev_tests = mem::take(&mut self.tests); - walk_item_kind(&mut item.kind, item.span, item.id, &mut item.vis, (), self); + ast::mut_visit::walk_item(self, item); self.add_test_cases(item.id, span, prev_tests); } else { // But in those cases, we emit a lint to warn the user of these missing tests. - walk_item(&mut InnerItemLinter { sess: self.cx.ext_cx.sess }, &item); + ast::visit::walk_item(&mut InnerItemLinter { sess: self.cx.ext_cx.sess }, &item); } } } @@ -227,7 +227,7 @@ fn generate_test_harness( panic_strategy: PanicStrategy, test_runner: Option, ) { - let econfig = ExpansionConfig::default("test".to_string(), features); + let econfig = ExpansionConfig::default(sym::test, features); let ext_cx = ExtCtxt::new(sess, econfig, resolver, None); let expn_id = ext_cx.resolver.expansion_for_ast_pass( diff --git a/compiler/rustc_codegen_cranelift/Cargo.lock b/compiler/rustc_codegen_cranelift/Cargo.lock index a906bec8b7e2..b893a2be9a2c 100644 --- a/compiler/rustc_codegen_cranelift/Cargo.lock +++ b/compiler/rustc_codegen_cranelift/Cargo.lock @@ -43,42 +43,42 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cranelift-assembler-x64" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff8e35182c7372df00447cb90a04e584e032c42b9b9b6e8c50ddaaf0d7900d5" +checksum = "f6f53499803b1607b6ee0ba0de4ba036e6da700c2e489fe8f9d0f683d0b84d31" dependencies = [ "cranelift-assembler-x64-meta", ] [[package]] name = "cranelift-assembler-x64-meta" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14220f9c2698015c3b94dc6b84ae045c1c45509ddc406e43c6139252757fdb7a" +checksum = "1aadaa5bc8430d0e7bb999459369bedd0e5816ad4a82a0e20748341c4e333eda" dependencies = [ "cranelift-srcgen", ] [[package]] name = "cranelift-bforest" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d372ef2777ceefd75829e1390211ac240e9196bc60699218f7ea2419038288ee" +checksum = "2005fda2fc52a2dbce58229b4fb4483b70cbc806ba8ecc11b3f050c1a2d26cac" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-bitset" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56323783e423818fa89ce8078e90a3913d2a6e0810399bfce8ebd7ee87baa81f" +checksum = "56935e02452ca1249d39ad5c45a96304d0b4300a158a391fd113451e0cd4483d" [[package]] name = "cranelift-codegen" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74ffb780aab6186c6e9ba26519654b1ac55a09c0a866f6088a4efbbd84da68ed" +checksum = "62612786bf00e10999f50217d6f455d02b31591155881a45a903d1a95d1a4043" dependencies = [ "bumpalo", "cranelift-assembler-x64", @@ -97,13 +97,14 @@ dependencies = [ "serde", "smallvec", "target-lexicon", + "wasmtime-math", ] [[package]] name = "cranelift-codegen-meta" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c23ef13814d3b39c869650d5961128cbbecad83fbdff4e6836a03ecf6862d7ed" +checksum = "07bae789df91ef236079733af9df11d852256c64af196f0bc6471ea0f5f301be" dependencies = [ "cranelift-assembler-x64-meta", "cranelift-codegen-shared", @@ -112,33 +113,33 @@ dependencies = [ [[package]] name = "cranelift-codegen-shared" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9f623300657679f847803ce80811454bfff89cea4f6bf684be5c468d4a73631" +checksum = "1be319616d36527782558a8312508757815f64deb19b094c7b8f4337229a9bc6" [[package]] name = "cranelift-control" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f4168af69989aa6b91fab46799ed4df6096f3209f4a6c8fb4358f49c60188f" +checksum = "8810ee1ab5e9bd5cff4c0c8d240e2009cb5c2b79888fde1d5256d605712314b7" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6fa9bae1c8de26d71ac2162f069447610fd91e7780cb480ee0d76ac81eabb8" +checksum = "086452c97cfbe116bf17dbe622dc5fdf2ea97299c7d4ce42460f284387c9928a" dependencies = [ "cranelift-bitset", ] [[package]] name = "cranelift-frontend" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8219205608aa0b0e6769b580284a7e055c7e0c323c1041cde7ca078add3e412" +checksum = "4c27947010ab759330f252610c17a8cd64d123358be4f33164233d04fcd77b80" dependencies = [ "cranelift-codegen", "log", @@ -148,15 +149,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588d0c5964f10860b04043e55aab26d7f7a206b0fd4f10c5260e8aa5773832bd" +checksum = "ec67bfb8bd55b1e9760eb9f5186dca8d81bd4d86110f8d5af01154a044c91802" [[package]] name = "cranelift-jit" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56bd917ddc524f84f4066f954062875bdfc0dffea068ee94e906d98de5ac7c33" +checksum = "d67cdfc447f2abdb46bb30a6582cce189539c3c051c1d5330692376e1400edff" dependencies = [ "anyhow", "cranelift-codegen", @@ -174,9 +175,9 @@ dependencies = [ [[package]] name = "cranelift-module" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68a03c057d8a992e06596c871341e446af43ff9224f941e5b8adea39137a5391" +checksum = "e4597eaa52bca1ed111986c7a7f70cdbe192f83d271d627201365078e37b7e84" dependencies = [ "anyhow", "cranelift-codegen", @@ -185,9 +186,9 @@ dependencies = [ [[package]] name = "cranelift-native" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ed3c94cb97b14f92b6a94a1d45ef8c851f6a2ad9114e5d91d233f7da638fed" +checksum = "75a9b63edea46e013fce459c46e500462cb03a0490fdd9c18fe42b1dd7b93aa1" dependencies = [ "cranelift-codegen", "libc", @@ -196,9 +197,9 @@ dependencies = [ [[package]] name = "cranelift-object" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a64dacef362a69375a604f6636e5e9a174fb96dba3b273646fcd9fa85c1d0997" +checksum = "ce706f0166d5b7f31693dff521e87cb9858e12adf22ffcde93c4a2826f8f04a9" dependencies = [ "anyhow", "cranelift-codegen", @@ -211,9 +212,9 @@ dependencies = [ [[package]] name = "cranelift-srcgen" -version = "0.120.0" +version = "0.121.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85256fac1519a7d25a040c1d850fba67478f3f021ad5fdf738ba4425ee862dbf" +checksum = "7d5870e266df8237b56cc98b04f5739c228565c92dd629ec6c66efa87271a158" [[package]] name = "crc32fast" @@ -288,6 +289,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + [[package]] name = "log" version = "0.4.22" @@ -446,9 +453,9 @@ checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "wasmtime-jit-icache-coherence" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "175e924dbc944c185808466d1e90b5a7feb610f3b9abdfe26f8ee25fd1086d1c" +checksum = "2eedc0324e37cf39b049f4dca0c30997eaab49f09006d5f4c1994e64e7b7dba8" dependencies = [ "anyhow", "cfg-if", @@ -456,6 +463,15 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "wasmtime-math" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd35fae4cf51d2b4a9bd2ef04b0eb309fa1849cab6a6ab5ac27cbd054ea284d" +dependencies = [ + "libm", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/compiler/rustc_codegen_cranelift/Cargo.toml b/compiler/rustc_codegen_cranelift/Cargo.toml index 94fcbd0a5023..9066e4dbbb52 100644 --- a/compiler/rustc_codegen_cranelift/Cargo.toml +++ b/compiler/rustc_codegen_cranelift/Cargo.toml @@ -8,12 +8,12 @@ crate-type = ["dylib"] [dependencies] # These have to be in sync with each other -cranelift-codegen = { version = "0.120.0", default-features = false, features = ["std", "timing", "unwind", "all-native-arch"] } -cranelift-frontend = { version = "0.120.0" } -cranelift-module = { version = "0.120.0" } -cranelift-native = { version = "0.120.0" } -cranelift-jit = { version = "0.120.0", optional = true } -cranelift-object = { version = "0.120.0" } +cranelift-codegen = { version = "0.121.0", default-features = false, features = ["std", "timing", "unwind", "all-native-arch"] } +cranelift-frontend = { version = "0.121.0" } +cranelift-module = { version = "0.121.0" } +cranelift-native = { version = "0.121.0" } +cranelift-jit = { version = "0.121.0", optional = true } +cranelift-object = { version = "0.121.0" } target-lexicon = "0.13" gimli = { version = "0.31", default-features = false, features = ["write"] } object = { version = "0.36", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] } @@ -24,12 +24,12 @@ smallvec = "1.8.1" [patch.crates-io] # Uncomment to use an unreleased version of cranelift -#cranelift-codegen = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-33.0.0", version = "0.120.0" } -#cranelift-frontend = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-33.0.0", version = "0.120.0" } -#cranelift-module = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-33.0.0", version = "0.120.0" } -#cranelift-native = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-33.0.0", version = "0.120.0" } -#cranelift-jit = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-33.0.0", version = "0.120.0" } -#cranelift-object = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-33.0.0", version = "0.120.0" } +#cranelift-codegen = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-34.0.0", version = "0.121.0" } +#cranelift-frontend = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-34.0.0", version = "0.121.0" } +#cranelift-module = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-34.0.0", version = "0.121.0" } +#cranelift-native = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-34.0.0", version = "0.121.0" } +#cranelift-jit = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-34.0.0", version = "0.121.0" } +#cranelift-object = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "release-34.0.0", version = "0.121.0" } # Uncomment to use local checkout of cranelift #cranelift-codegen = { path = "../wasmtime/cranelift/codegen" } diff --git a/compiler/rustc_codegen_cranelift/build_system/abi_cafe.rs b/compiler/rustc_codegen_cranelift/build_system/abi_cafe.rs index 674acfbd3097..43025137bc6b 100644 --- a/compiler/rustc_codegen_cranelift/build_system/abi_cafe.rs +++ b/compiler/rustc_codegen_cranelift/build_system/abi_cafe.rs @@ -6,8 +6,8 @@ use crate::{CodegenBackend, SysrootKind, build_sysroot}; static ABI_CAFE_REPO: GitRepo = GitRepo::github( "Gankra", "abi-cafe", - "f1220cfd13b57f5c0082c26529163865ee25e115", - "fe93a9acd461425d", + "94d38030419eb00a1ba80e5e2b4d763dcee58db4", + "6efb4457893c8670", "abi-cafe", ); @@ -46,6 +46,10 @@ pub(crate) fn run( let mut cmd = ABI_CAFE.run(bootstrap_host_compiler, dirs); cmd.arg("--"); + cmd.arg("--debug"); + + cmd.arg("--rules").arg(dirs.source_dir.join("scripts/abi-cafe-rules.toml")); + // stdcall, vectorcall and such don't work yet cmd.arg("--conventions").arg("c").arg("--conventions").arg("rust"); diff --git a/compiler/rustc_codegen_cranelift/patches/0002-abi-cafe-Disable-broken-tests.patch b/compiler/rustc_codegen_cranelift/patches/0002-abi-cafe-Disable-broken-tests.patch deleted file mode 100644 index 01b6a990b720..000000000000 --- a/compiler/rustc_codegen_cranelift/patches/0002-abi-cafe-Disable-broken-tests.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 236df390f3bc4ed69c26f4d51d584bea246da886 Mon Sep 17 00:00:00 2001 -From: bjorn3 <17426603+bjorn3@users.noreply.github.com> -Date: Tue, 9 Jul 2024 11:25:14 +0000 -Subject: [PATCH] Disable broken tests - ---- - src/report.rs | 36 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 36 insertions(+) - -diff --git a/src/toolchains/rust.rs b/src/toolchains/rust.rs -index 0c50f7a..bfde2b1 100644 ---- a/src/toolchains/rust.rs -+++ b/src/toolchains/rust.rs -@@ -83,6 +83,7 @@ impl Toolchain for RustcToolchain { - .arg(out_dir) - .arg("--target") - .arg(built_info::TARGET) -+ .arg("-g") - .arg(format!("-Cmetadata={lib_name}")) - .arg(src_path); - if let Some(codegen_backend) = &self.codegen_backend { -diff --git a/src/report.rs b/src/report.rs -index 958ab43..dcf1044 100644 ---- a/src/report.rs -+++ b/src/report.rs -@@ -48,6 +48,40 @@ pub fn get_test_rules(test: &TestKey, caller: &dyn Toolchain, callee: &dyn Toolc - // - // THIS AREA RESERVED FOR VENDORS TO APPLY PATCHES - -+ if cfg!(all(target_arch = "aarch64", target_os = "linux")) { -+ if test.test == "F32Array" && test.options.convention == CallingConvention::C { -+ result.check = Busted(Check); -+ } -+ } -+ -+ if cfg!(all(target_arch = "aarch64", target_os = "macos")) { -+ if test.test == "SingleVariantUnion" && test.options.convention == CallingConvention::C && test.options.repr == LangRepr::C { -+ result.check = Busted(Check); -+ } -+ -+ if test.test == "OptionU128" && test.caller == "rustc" && test.options.convention == CallingConvention::Rust && test.options.repr == LangRepr::C { -+ result.check = Busted(Run); -+ } -+ -+ if test.test == "OptionU128" && test.caller == "cgclif" && test.options.convention == CallingConvention::Rust && test.options.repr == LangRepr::C { -+ result.check = Busted(Check); -+ } -+ } -+ -+ if cfg!(all(target_arch = "x86_64", windows)) { -+ if test.test == "simple" && test.options.convention == CallingConvention::Rust { -+ result.check = Busted(Check); -+ } -+ -+ if test.test == "simple" && test.options.convention == CallingConvention::Rust && test.caller == "rustc" { -+ result.check = Busted(Run); -+ } -+ } -+ -+ if test.test == "f16" || test.test == "f128" { -+ result.run = Skip; -+ } -+ - // END OF VENDOR RESERVED AREA - // - // --- -2.34.1 - diff --git a/compiler/rustc_codegen_cranelift/rust-toolchain b/compiler/rustc_codegen_cranelift/rust-toolchain index af4bd6dc6b85..150bb562f74a 100644 --- a/compiler/rustc_codegen_cranelift/rust-toolchain +++ b/compiler/rustc_codegen_cranelift/rust-toolchain @@ -1,4 +1,4 @@ [toolchain] -channel = "nightly-2025-05-25" +channel = "nightly-2025-06-24" components = ["rust-src", "rustc-dev", "llvm-tools"] profile = "minimal" diff --git a/compiler/rustc_codegen_cranelift/scripts/abi-cafe-rules.toml b/compiler/rustc_codegen_cranelift/scripts/abi-cafe-rules.toml new file mode 100644 index 000000000000..54f9445c8e52 --- /dev/null +++ b/compiler/rustc_codegen_cranelift/scripts/abi-cafe-rules.toml @@ -0,0 +1,17 @@ +[target.'cfg(all(target_arch = "aarch64", target_os = "linux"))'] +'F32Array::conv_c'.busted = "check" + +[target.'cfg(all(target_arch = "aarch64", target_os = "macos"))'] +'SingleVariantUnion::conv_c::repr_c'.busted = "check" +'OptionU128::conv_rust::repr_c::rustc_caller'.busted = "run" +'OptionU128::conv_rust::repr_c::cgclif_caller'.busted = "check" + +[target.'cfg(all(target_arch = "x86_64", windows))'] +'simple::conv_rust'.busted = "check" +'simple::conv_rust::rustc_caller'.busted = "run" + +[target.'*'.'f16'] +run = "skip" + +[target.'*'.'f128'] +run = "skip" diff --git a/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh b/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh index 32c71f433b0f..7e356b4b462b 100755 --- a/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh +++ b/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh @@ -151,20 +151,6 @@ rm tests/ui/process/process-panic-after-fork.rs # same cp ../dist/bin/rustdoc-clif ../dist/bin/rustdoc # some tests expect bin/rustdoc to exist cat <( crate::pointer::Pointer::new(global_ptr) } -pub(crate) fn data_id_for_alloc_id( +fn data_id_for_alloc_id( cx: &mut ConstantCx, module: &mut dyn Module, alloc_id: AllocId, diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs index 615f6c47d902..37fbe4be1b0f 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs @@ -202,9 +202,10 @@ pub(super) fn codegen_x86_llvm_intrinsic_call<'tcx>( }; let x = codegen_operand(fx, &x.node); let y = codegen_operand(fx, &y.node); - let kind = match &kind.node { - Operand::Constant(const_) => crate::constant::eval_mir_constant(fx, const_).0, - Operand::Copy(_) | Operand::Move(_) => unreachable!("{kind:?}"), + let kind = if let Some(const_) = kind.node.constant() { + crate::constant::eval_mir_constant(fx, const_).0 + } else { + unreachable!("{kind:?}") }; let flt_cc = match kind diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index 46a441488fa6..68ff0b622c8f 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -205,9 +205,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( // Find a way to reuse `immediate_const_vector` from `codegen_ssa` instead. let indexes = { use rustc_middle::mir::interpret::*; - let idx_const = match &idx.node { - Operand::Constant(const_) => crate::constant::eval_mir_constant(fx, const_).0, - Operand::Copy(_) | Operand::Move(_) => unreachable!("{idx:?}"), + let idx_const = if let Some(const_) = idx.node.constant() { + crate::constant::eval_mir_constant(fx, const_).0 + } else { + unreachable!("{idx:?}") }; let idx_bytes = match idx_const { diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs index 07ea29f3024e..8e34436fb5e0 100644 --- a/compiler/rustc_codegen_cranelift/src/lib.rs +++ b/compiler/rustc_codegen_cranelift/src/lib.rs @@ -184,7 +184,7 @@ impl CodegenBackend for CraneliftCodegenBackend { // FIXME return the actually used target features. this is necessary for #[cfg(target_feature)] let target_features = if sess.target.arch == "x86_64" && sess.target.os != "none" { // x86_64 mandates SSE2 support and rustc requires the x87 feature to be enabled - vec![sym::fsxr, sym::sse, sym::sse2, Symbol::intern("x87")] + vec![sym::fxsr, sym::sse, sym::sse2, Symbol::intern("x87")] } else if sess.target.arch == "aarch64" { match &*sess.target.os { "none" => vec![], diff --git a/compiler/rustc_codegen_gcc/messages.ftl b/compiler/rustc_codegen_gcc/messages.ftl index 18a8a5a1e048..55a28bc9493e 100644 --- a/compiler/rustc_codegen_gcc/messages.ftl +++ b/compiler/rustc_codegen_gcc/messages.ftl @@ -1,7 +1,3 @@ -codegen_gcc_unknown_ctarget_feature_prefix = - unknown feature specified for `-Ctarget-feature`: `{$feature}` - .note = features must begin with a `+` to enable or `-` to disable it - codegen_gcc_unwinding_inline_asm = GCC backend does not support unwinding from inline asm @@ -16,15 +12,3 @@ codegen_gcc_lto_disallowed = lto can only be run for executables, cdylibs and st codegen_gcc_lto_dylib = lto cannot be used for `dylib` crate type without `-Zdylib-lto` codegen_gcc_lto_bitcode_from_rlib = failed to get bitcode from object file for LTO ({$gcc_err}) - -codegen_gcc_unknown_ctarget_feature = - unknown and unstable feature specified for `-Ctarget-feature`: `{$feature}` - .note = it is still passed through to the codegen backend, but use of this feature might be unsound and the behavior of this feature can change in the future - .possible_feature = you might have meant: `{$rust_feature}` - .consider_filing_feature_request = consider filing a feature request - -codegen_gcc_missing_features = - add the missing features in a `target_feature` attribute - -codegen_gcc_target_feature_disable_or_enable = - the target features {$features} must all be either enabled or disabled together diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs index a2e34d1f8fbc..7852aebe0c23 100644 --- a/compiler/rustc_codegen_gcc/src/builder.rs +++ b/compiler/rustc_codegen_gcc/src/builder.rs @@ -1591,9 +1591,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { (value1, value2) } - fn filter_landing_pad(&mut self, pers_fn: RValue<'gcc>) -> (RValue<'gcc>, RValue<'gcc>) { + fn filter_landing_pad(&mut self, pers_fn: RValue<'gcc>) { // TODO(antoyo): generate the correct landing pad - self.cleanup_landing_pad(pers_fn) + self.cleanup_landing_pad(pers_fn); } #[cfg(feature = "master")] diff --git a/compiler/rustc_codegen_gcc/src/errors.rs b/compiler/rustc_codegen_gcc/src/errors.rs index 7786be9ae5de..b7e7343460fb 100644 --- a/compiler/rustc_codegen_gcc/src/errors.rs +++ b/compiler/rustc_codegen_gcc/src/errors.rs @@ -1,30 +1,6 @@ -use rustc_macros::{Diagnostic, Subdiagnostic}; +use rustc_macros::Diagnostic; use rustc_span::Span; -#[derive(Diagnostic)] -#[diag(codegen_gcc_unknown_ctarget_feature_prefix)] -#[note] -pub(crate) struct UnknownCTargetFeaturePrefix<'a> { - pub feature: &'a str, -} - -#[derive(Diagnostic)] -#[diag(codegen_gcc_unknown_ctarget_feature)] -#[note] -pub(crate) struct UnknownCTargetFeature<'a> { - pub feature: &'a str, - #[subdiagnostic] - pub rust_feature: PossibleFeature<'a>, -} - -#[derive(Subdiagnostic)] -pub(crate) enum PossibleFeature<'a> { - #[help(codegen_gcc_possible_feature)] - Some { rust_feature: &'a str }, - #[help(codegen_gcc_consider_filing_feature_request)] - None, -} - #[derive(Diagnostic)] #[diag(codegen_gcc_unwinding_inline_asm)] pub(crate) struct UnwindingInlineAsm { diff --git a/compiler/rustc_codegen_gcc/src/gcc_util.rs b/compiler/rustc_codegen_gcc/src/gcc_util.rs index 2e00d5fcb612..42ba40692b75 100644 --- a/compiler/rustc_codegen_gcc/src/gcc_util.rs +++ b/compiler/rustc_codegen_gcc/src/gcc_util.rs @@ -1,20 +1,12 @@ #[cfg(feature = "master")] use gccjit::Context; -use rustc_codegen_ssa::codegen_attrs::check_tied_features; -use rustc_codegen_ssa::errors::TargetFeatureDisableOrEnable; -use rustc_data_structures::fx::FxHashMap; -use rustc_data_structures::unord::UnordSet; +use rustc_codegen_ssa::target_features; use rustc_session::Session; -use rustc_session::features::{StabilityExt, retpoline_features_by_flags}; -use rustc_target::target_features::RUSTC_SPECIFIC_FEATURES; use smallvec::{SmallVec, smallvec}; -use crate::errors::{PossibleFeature, UnknownCTargetFeature, UnknownCTargetFeaturePrefix}; - -fn gcc_features_by_flags(sess: &Session) -> Vec<&str> { - let mut features: Vec<&str> = Vec::new(); - retpoline_features_by_flags(sess, &mut features); - features +fn gcc_features_by_flags(sess: &Session, features: &mut Vec) { + target_features::retpoline_features_by_flags(sess, features); + // FIXME: LLVM also sets +reserve-x18 here under some conditions. } /// The list of GCC features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`, @@ -44,98 +36,29 @@ pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec. - all_rust_features.push((false, feature)); - } else if !feature.is_empty() && diagnostics { - sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature }); - } - } - // Remove features that are meant for rustc, not codegen. - all_rust_features.retain(|&(_, feature)| { - // Retain if it is not a rustc feature - !RUSTC_SPECIFIC_FEATURES.contains(&feature) - }); - - // Check feature validity. - if diagnostics { - for &(enable, feature) in &all_rust_features { - let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature); - match feature_state { - None => { - let rust_feature = known_features.iter().find_map(|&(rust_feature, _, _)| { - let gcc_features = to_gcc_features(sess, rust_feature); - if gcc_features.contains(&feature) && !gcc_features.contains(&rust_feature) - { - Some(rust_feature) - } else { - None - } - }); - let unknown_feature = if let Some(rust_feature) = rust_feature { - UnknownCTargetFeature { - feature, - rust_feature: PossibleFeature::Some { rust_feature }, - } - } else { - UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None } - }; - sess.dcx().emit_warn(unknown_feature); - } - Some(&(_, stability, _)) => { - stability.verify_feature_enabled_by_flag(sess, enable, feature); - } - } - - // FIXME(nagisa): figure out how to not allocate a full hashset here. - featsmap.insert(feature, enable); - } - } - - // Translate this into GCC features. - let feats = - all_rust_features.iter().flat_map(|&(enable, feature)| { - let enable_disable = if enable { '+' } else { '-' }; + target_features::flag_to_backend_features( + sess, + diagnostics, + |feature| to_gcc_features(sess, feature), + |feature, enable| { // We run through `to_gcc_features` when // passing requests down to GCC. This means that all in-language // features also work on the command line instead of having two // different names when the GCC name and the Rust name differ. - to_gcc_features(sess, feature) - .iter() - .flat_map(|feat| to_gcc_features(sess, feat).into_iter()) - .map(|feature| { - if enable_disable == '-' { - format!("-{}", feature) - } else { - feature.to_string() - } - }) - .collect::>() - }); - features.extend(feats); + features.extend( + to_gcc_features(sess, feature) + .iter() + .flat_map(|feat| to_gcc_features(sess, feat).into_iter()) + .map( + |feature| { + if !enable { format!("-{}", feature) } else { feature.to_string() } + }, + ), + ); + }, + ); - if diagnostics && let Some(f) = check_tied_features(sess, &featsmap) { - sess.dcx().emit_err(TargetFeatureDisableOrEnable { - features: f, - span: None, - missing_features: None, - }); - } + gcc_features_by_flags(sess, &mut features); features } diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs index 6f6bc93b8b26..ac8b7f4ea482 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs @@ -61,7 +61,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( let (len, _) = args[1].layout.ty.simd_size_and_type(bx.tcx()); let expected_int_bits = (len.max(8) - 1).next_power_of_two(); - let expected_bytes = len / 8 + ((len % 8 > 0) as u64); + let expected_bytes = len / 8 + ((!len.is_multiple_of(8)) as u64); let mask_ty = args[0].layout.ty; let mut mask = match *mask_ty.kind() { @@ -676,7 +676,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( let elem_type = vector_type.get_element_type(); let expected_int_bits = in_len.max(8); - let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64); + let expected_bytes = + expected_int_bits / 8 + ((!expected_int_bits.is_multiple_of(8)) as u64); // FIXME(antoyo): that's not going to work for masks bigger than 128 bits. let result_type = bx.type_ix(expected_int_bits); diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs index aa57655921d4..a912678ef2a1 100644 --- a/compiler/rustc_codegen_gcc/src/lib.rs +++ b/compiler/rustc_codegen_gcc/src/lib.rs @@ -102,6 +102,7 @@ use rustc_codegen_ssa::back::write::{ CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn, }; use rustc_codegen_ssa::base::codegen_crate; +use rustc_codegen_ssa::target_features::cfg_target_feature; use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, WriteBackendMethods}; use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen, TargetConfig}; use rustc_data_structures::fx::FxIndexMap; @@ -476,42 +477,21 @@ fn to_gcc_opt_level(optlevel: Option) -> OptimizationLevel { /// Returns the features that should be set in `cfg(target_feature)`. fn target_config(sess: &Session, target_info: &LockedTargetInfo) -> TargetConfig { - // TODO(antoyo): use global_gcc_features. - let f = |allow_unstable| { - sess.target - .rust_target_features() - .iter() - .filter_map(|&(feature, gate, _)| { - if allow_unstable - || (gate.in_cfg() - && (sess.is_nightly_build() || gate.requires_nightly().is_none())) - { - Some(feature) - } else { - None - } - }) - .filter(|feature| { - // TODO: we disable Neon for now since we don't support the LLVM intrinsics for it. - if *feature == "neon" { - return false; - } - target_info.cpu_supports(feature) - // cSpell:disable - /* - adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma, - avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq, - bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm, - sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves - */ - // cSpell:enable - }) - .map(Symbol::intern) - .collect() - }; - - let target_features = f(false); - let unstable_target_features = f(true); + let (unstable_target_features, target_features) = cfg_target_feature(sess, |feature| { + // TODO: we disable Neon for now since we don't support the LLVM intrinsics for it. + if feature == "neon" { + return false; + } + target_info.cpu_supports(feature) + // cSpell:disable + /* + adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma, + avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq, + bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm, + sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves + */ + // cSpell:enable + }); let has_reliable_f16 = target_info.supports_target_dependent_type(CType::Float16); let has_reliable_f128 = target_info.supports_target_dependent_type(CType::Float128); diff --git a/compiler/rustc_codegen_llvm/messages.ftl b/compiler/rustc_codegen_llvm/messages.ftl index 3faeb9b3b221..3885f18271f1 100644 --- a/compiler/rustc_codegen_llvm/messages.ftl +++ b/compiler/rustc_codegen_llvm/messages.ftl @@ -59,16 +59,6 @@ codegen_llvm_symbol_already_defined = codegen_llvm_target_machine = could not create LLVM TargetMachine for triple: {$triple} codegen_llvm_target_machine_with_llvm_err = could not create LLVM TargetMachine for triple: {$triple}: {$llvm_err} -codegen_llvm_unknown_ctarget_feature = - unknown and unstable feature specified for `-Ctarget-feature`: `{$feature}` - .note = it is still passed through to the codegen backend, but use of this feature might be unsound and the behavior of this feature can change in the future - .possible_feature = you might have meant: `{$rust_feature}` - .consider_filing_feature_request = consider filing a feature request - -codegen_llvm_unknown_ctarget_feature_prefix = - unknown feature specified for `-Ctarget-feature`: `{$feature}` - .note = features must begin with a `+` to enable or `-` to disable it - codegen_llvm_unknown_debuginfo_compression = unknown debuginfo compression algorithm {$algorithm} - will fall back to uncompressed debuginfo codegen_llvm_write_bytecode = failed to write bytecode to {$path}: {$err} diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index 27fd09745ff0..adb53e0b66c2 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -491,11 +491,7 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>( let allocated_pointer = AttributeKind::AllocatedPointer.create_attr(cx.llcx); attributes::apply_to_llfn(llfn, AttributePlace::Argument(0), &[allocated_pointer]); } - // function alignment can be set globally with the `-Zmin-function-alignment=` flag; - // the alignment from a `#[repr(align())]` is used if it specifies a higher alignment. - if let Some(align) = - Ord::max(cx.tcx.sess.opts.unstable_opts.min_function_alignment, codegen_fn_attrs.alignment) - { + if let Some(align) = codegen_fn_attrs.alignment { llvm::set_alignment(llfn, align); } if let Some(backchain) = backchain_attr(cx) { diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index ee46b49a094c..9c62244f3c9f 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -587,7 +587,7 @@ fn thin_lto( } fn enable_autodiff_settings(ad: &[config::AutoDiff]) { - for &val in ad { + for val in ad { // We intentionally don't use a wildcard, to not forget handling anything new. match val { config::AutoDiff::PrintPerf => { @@ -599,6 +599,10 @@ fn enable_autodiff_settings(ad: &[config::AutoDiff]) { config::AutoDiff::PrintTA => { llvm::set_print_type(true); } + config::AutoDiff::PrintTAFn(fun) => { + llvm::set_print_type(true); // Enable general type printing + llvm::set_print_type_fun(&fun); // Set specific function to analyze + } config::AutoDiff::Inline => { llvm::set_inline(true); } diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 5e9594dd06bb..d0aa7320b4b6 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -1166,11 +1166,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { (self.extract_value(landing_pad, 0), self.extract_value(landing_pad, 1)) } - fn filter_landing_pad(&mut self, pers_fn: &'ll Value) -> (&'ll Value, &'ll Value) { + fn filter_landing_pad(&mut self, pers_fn: &'ll Value) { let ty = self.type_struct(&[self.type_ptr(), self.type_i32()], false); let landing_pad = self.landing_pad(ty, pers_fn, 1); self.add_clause(landing_pad, self.const_array(self.type_ptr(), &[])); - (self.extract_value(landing_pad, 0), self.extract_value(landing_pad, 1)) } fn resume(&mut self, exn0: &'ll Value, exn1: &'ll Value) { diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs index 8bc74fbec7ec..d50ad8a1a9cb 100644 --- a/compiler/rustc_codegen_llvm/src/errors.rs +++ b/compiler/rustc_codegen_llvm/src/errors.rs @@ -3,35 +3,11 @@ use std::path::Path; use rustc_data_structures::small_c_str::SmallCStr; use rustc_errors::{Diag, DiagCtxtHandle, Diagnostic, EmissionGuarantee, Level}; -use rustc_macros::{Diagnostic, Subdiagnostic}; +use rustc_macros::Diagnostic; use rustc_span::Span; use crate::fluent_generated as fluent; -#[derive(Diagnostic)] -#[diag(codegen_llvm_unknown_ctarget_feature_prefix)] -#[note] -pub(crate) struct UnknownCTargetFeaturePrefix<'a> { - pub feature: &'a str, -} - -#[derive(Diagnostic)] -#[diag(codegen_llvm_unknown_ctarget_feature)] -#[note] -pub(crate) struct UnknownCTargetFeature<'a> { - pub feature: &'a str, - #[subdiagnostic] - pub rust_feature: PossibleFeature<'a>, -} - -#[derive(Subdiagnostic)] -pub(crate) enum PossibleFeature<'a> { - #[help(codegen_llvm_possible_feature)] - Some { rust_feature: &'a str }, - #[help(codegen_llvm_consider_filing_feature_request)] - None, -} - #[derive(Diagnostic)] #[diag(codegen_llvm_symbol_already_defined)] pub(crate) struct SymbolAlreadyDefined<'a> { diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs index 2ad39fc85381..b94716b89d61 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs @@ -57,14 +57,19 @@ pub(crate) use self::Enzyme_AD::*; #[cfg(llvm_enzyme)] pub(crate) mod Enzyme_AD { + use std::ffi::{CString, c_char}; + use libc::c_void; + unsafe extern "C" { pub(crate) fn EnzymeSetCLBool(arg1: *mut ::std::os::raw::c_void, arg2: u8); + pub(crate) fn EnzymeSetCLString(arg1: *mut ::std::os::raw::c_void, arg2: *const c_char); } unsafe extern "C" { static mut EnzymePrintPerf: c_void; static mut EnzymePrintActivity: c_void; static mut EnzymePrintType: c_void; + static mut EnzymeFunctionToAnalyze: c_void; static mut EnzymePrint: c_void; static mut EnzymeStrictAliasing: c_void; static mut looseTypeAnalysis: c_void; @@ -86,6 +91,15 @@ pub(crate) mod Enzyme_AD { EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintType), print as u8); } } + pub(crate) fn set_print_type_fun(fun_name: &str) { + let c_fun_name = CString::new(fun_name).unwrap(); + unsafe { + EnzymeSetCLString( + std::ptr::addr_of_mut!(EnzymeFunctionToAnalyze), + c_fun_name.as_ptr() as *const c_char, + ); + } + } pub(crate) fn set_print(print: bool) { unsafe { EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrint), print as u8); @@ -132,6 +146,9 @@ pub(crate) mod Fallback_AD { pub(crate) fn set_print_type(print: bool) { unimplemented!() } + pub(crate) fn set_print_type_fun(fun_name: &str) { + unimplemented!() + } pub(crate) fn set_print(print: bool) { unimplemented!() } diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 0e77bc43df80..6fd07d562afd 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -6,27 +6,20 @@ use std::sync::Once; use std::{ptr, slice, str}; use libc::c_int; -use rustc_codegen_ssa::TargetConfig; use rustc_codegen_ssa::base::wants_wasm_eh; -use rustc_codegen_ssa::codegen_attrs::check_tied_features; -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_codegen_ssa::target_features::cfg_target_feature; +use rustc_codegen_ssa::{TargetConfig, target_features}; +use rustc_data_structures::fx::FxHashSet; use rustc_data_structures::small_c_str::SmallCStr; -use rustc_data_structures::unord::UnordSet; use rustc_fs_util::path_to_c_string; use rustc_middle::bug; use rustc_session::Session; use rustc_session::config::{PrintKind, PrintRequest}; -use rustc_session::features::{StabilityExt, retpoline_features_by_flags}; -use rustc_span::Symbol; use rustc_target::spec::{MergeFunctions, PanicStrategy, SmallDataThresholdSupport}; -use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATURES}; use smallvec::{SmallVec, smallvec}; use crate::back::write::create_informational_target_machine; -use crate::errors::{ - FixedX18InvalidArch, PossibleFeature, UnknownCTargetFeature, UnknownCTargetFeaturePrefix, -}; -use crate::llvm; +use crate::{errors, llvm}; static INIT: Once = Once::new(); @@ -195,15 +188,6 @@ impl<'a> LLVMFeature<'a> { ) -> Self { Self { llvm_feature_name, dependencies } } - - fn contains(&'a self, feat: &str) -> bool { - self.iter().any(|dep| dep == feat) - } - - fn iter(&'a self) -> impl Iterator { - let dependencies = self.dependencies.iter().map(|feat| feat.as_str()); - std::iter::once(self.llvm_feature_name).chain(dependencies) - } } impl<'a> IntoIterator for LLVMFeature<'a> { @@ -216,18 +200,22 @@ impl<'a> IntoIterator for LLVMFeature<'a> { } } -// WARNING: the features after applying `to_llvm_features` must be known -// to LLVM or the feature detection code will walk past the end of the feature -// array, leading to crashes. -// -// To find a list of LLVM's names, see llvm-project/llvm/lib/Target/{ARCH}/*.td -// where `{ARCH}` is the architecture name. Look for instances of `SubtargetFeature`. -// -// Check the current rustc fork of LLVM in the repo at https://github.com/rust-lang/llvm-project/. -// The commit in use can be found via the `llvm-project` submodule in -// https://github.com/rust-lang/rust/tree/master/src Though note that Rust can also be build with -// an external precompiled version of LLVM which might lead to failures if the oldest tested / -// supported LLVM version doesn't yet support the relevant intrinsics. +/// Convert a Rust feature name to an LLVM feature name. Returning `None` means the +/// feature should be skipped, usually because it is not supported by the current +/// LLVM version. +/// +/// WARNING: the features after applying `to_llvm_features` must be known +/// to LLVM or the feature detection code will walk past the end of the feature +/// array, leading to crashes. +/// +/// To find a list of LLVM's names, see llvm-project/llvm/lib/Target/{ARCH}/*.td +/// where `{ARCH}` is the architecture name. Look for instances of `SubtargetFeature`. +/// +/// Check the current rustc fork of LLVM in the repo at +/// . The commit in use can be found via the +/// `llvm-project` submodule in Though note that +/// Rust can also be build with an external precompiled version of LLVM which might lead to failures +/// if the oldest tested / supported LLVM version doesn't yet support the relevant intrinsics. pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option> { let arch = if sess.target.arch == "x86_64" { "x86" @@ -343,98 +331,25 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig { // the target CPU, that is still expanded to target features (with all their implied features) // by LLVM. let target_machine = create_informational_target_machine(sess, true); - // Compute which of the known target features are enabled in the 'base' target machine. We only - // consider "supported" features; "forbidden" features are not reflected in `cfg` as of now. - let mut features: FxHashSet = sess - .target - .rust_target_features() - .iter() - .filter(|(feature, _, _)| { - // skip checking special features, as LLVM may not understand them - if RUSTC_SPECIAL_FEATURES.contains(feature) { - return true; - } - if let Some(feat) = to_llvm_features(sess, feature) { - for llvm_feature in feat { - let cstr = SmallCStr::new(llvm_feature); - // `LLVMRustHasFeature` is moderately expensive. On targets with many - // features (e.g. x86) these calls take a non-trivial fraction of runtime - // when compiling very small programs. - if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } { - return false; - } + + let (unstable_target_features, target_features) = cfg_target_feature(sess, |feature| { + if let Some(feat) = to_llvm_features(sess, feature) { + // All the LLVM features this expands to must be enabled. + for llvm_feature in feat { + let cstr = SmallCStr::new(llvm_feature); + // `LLVMRustHasFeature` is moderately expensive. On targets with many + // features (e.g. x86) these calls take a non-trivial fraction of runtime + // when compiling very small programs. + if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } { + return false; } - true - } else { - false } - }) - .map(|(feature, _, _)| Symbol::intern(feature)) - .collect(); - - // Add enabled and remove disabled features. - for (enabled, feature) in - sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() { - Some('+') => Some((true, Symbol::intern(&s[1..]))), - Some('-') => Some((false, Symbol::intern(&s[1..]))), - _ => None, - }) - { - if enabled { - // Also add all transitively implied features. - - // We don't care about the order in `features` since the only thing we use it for is the - // `features.contains` below. - #[allow(rustc::potential_query_instability)] - features.extend( - sess.target - .implied_target_features(feature.as_str()) - .iter() - .map(|s| Symbol::intern(s)), - ); + true } else { - // Remove transitively reverse-implied features. - - // We don't care about the order in `features` since the only thing we use it for is the - // `features.contains` below. - #[allow(rustc::potential_query_instability)] - features.retain(|f| { - if sess.target.implied_target_features(f.as_str()).contains(&feature.as_str()) { - // If `f` if implies `feature`, then `!feature` implies `!f`, so we have to - // remove `f`. (This is the standard logical contraposition principle.) - false - } else { - // We can keep `f`. - true - } - }); + false } - } + }); - // Filter enabled features based on feature gates. - let f = |allow_unstable| { - sess.target - .rust_target_features() - .iter() - .filter_map(|(feature, gate, _)| { - // The `allow_unstable` set is used by rustc internally to determined which target - // features are truly available, so we want to return even perma-unstable - // "forbidden" features. - if allow_unstable - || (gate.in_cfg() - && (sess.is_nightly_build() || gate.requires_nightly().is_none())) - { - Some(Symbol::intern(feature)) - } else { - None - } - }) - .filter(|feature| features.contains(&feature)) - .collect() - }; - - let target_features = f(false); - let unstable_target_features = f(true); let mut cfg = TargetConfig { target_features, unstable_target_features, @@ -707,10 +622,18 @@ pub(crate) fn target_cpu(sess: &Session) -> &str { handle_native(cpu_name) } -fn llvm_features_by_flags(sess: &Session) -> Vec<&str> { - let mut features: Vec<&str> = Vec::new(); - retpoline_features_by_flags(sess, &mut features); - features +/// The target features for compiler flags other than `-Ctarget-features`. +fn llvm_features_by_flags(sess: &Session, features: &mut Vec) { + target_features::retpoline_features_by_flags(sess, features); + + // -Zfixed-x18 + if sess.opts.unstable_opts.fixed_x18 { + if sess.target.arch != "aarch64" { + sess.dcx().emit_fatal(errors::FixedX18InvalidArch { arch: &sess.target.arch }); + } else { + features.push("+reserve-x18".into()); + } + } } /// The list of LLVM features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`, @@ -777,6 +700,8 @@ pub(crate) fn global_llvm_features( .split(',') .filter(|v| !v.is_empty()) // Drop +v8plus feature introduced in LLVM 20. + // (Hard-coded target features do not go through `to_llvm_feature` since they already + // are LLVM feature names, hence we need a special case here.) .filter(|v| *v != "+v8plus" || get_version() >= (20, 0, 0)) .map(String::from), ); @@ -787,86 +712,23 @@ pub(crate) fn global_llvm_features( // -Ctarget-features if !only_base_features { - let known_features = sess.target.rust_target_features(); - // Will only be filled when `diagnostics` is set! - let mut featsmap = FxHashMap::default(); - - // Compute implied features - let mut all_rust_features = vec![]; - for feature in sess.opts.cg.target_feature.split(',').chain(llvm_features_by_flags(sess)) { - if let Some(feature) = feature.strip_prefix('+') { - all_rust_features.extend( - UnordSet::from(sess.target.implied_target_features(feature)) - .to_sorted_stable_ord() - .iter() - .map(|&&s| (true, s)), - ) - } else if let Some(feature) = feature.strip_prefix('-') { - // FIXME: Why do we not remove implied features on "-" here? - // We do the equivalent above in `target_config`. - // See . - all_rust_features.push((false, feature)); - } else if !feature.is_empty() { - if diagnostics { - sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature }); - } - } - } - // Remove features that are meant for rustc, not LLVM. - all_rust_features.retain(|(_, feature)| { - // Retain if it is not a rustc feature - !RUSTC_SPECIFIC_FEATURES.contains(feature) - }); - - // Check feature validity. - if diagnostics { - for &(enable, feature) in &all_rust_features { - let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature); - match feature_state { - None => { - let rust_feature = - known_features.iter().find_map(|&(rust_feature, _, _)| { - let llvm_features = to_llvm_features(sess, rust_feature)?; - if llvm_features.contains(feature) - && !llvm_features.contains(rust_feature) - { - Some(rust_feature) - } else { - None - } - }); - let unknown_feature = if let Some(rust_feature) = rust_feature { - UnknownCTargetFeature { - feature, - rust_feature: PossibleFeature::Some { rust_feature }, - } - } else { - UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None } - }; - sess.dcx().emit_warn(unknown_feature); - } - Some((_, stability, _)) => { - stability.verify_feature_enabled_by_flag(sess, enable, feature); - } - } - - // FIXME(nagisa): figure out how to not allocate a full hashset here. - featsmap.insert(feature, enable); - } - } - - // Translate this into LLVM features. - let feats = all_rust_features - .iter() - .filter_map(|&(enable, feature)| { + target_features::flag_to_backend_features( + sess, + diagnostics, + |feature| { + to_llvm_features(sess, feature) + .map(|f| SmallVec::<[&str; 2]>::from_iter(f.into_iter())) + .unwrap_or_default() + }, + |feature, enable| { let enable_disable = if enable { '+' } else { '-' }; // We run through `to_llvm_features` when // passing requests down to LLVM. This means that all in-language // features also work on the command line instead of having two // different names when the LLVM name and the Rust name differ. - let llvm_feature = to_llvm_features(sess, feature)?; + let Some(llvm_feature) = to_llvm_features(sess, feature) else { return }; - Some( + features.extend( std::iter::once(format!( "{}{}", enable_disable, llvm_feature.llvm_feature_name @@ -881,27 +743,12 @@ pub(crate) fn global_llvm_features( }, )), ) - }) - .flatten(); - features.extend(feats); - - if diagnostics && let Some(f) = check_tied_features(sess, &featsmap) { - sess.dcx().emit_err(rustc_codegen_ssa::errors::TargetFeatureDisableOrEnable { - features: f, - span: None, - missing_features: None, - }); - } + }, + ); } - // -Zfixed-x18 - if sess.opts.unstable_opts.fixed_x18 { - if sess.target.arch != "aarch64" { - sess.dcx().emit_fatal(FixedX18InvalidArch { arch: &sess.target.arch }); - } else { - features.push("+reserve-x18".into()); - } - } + // We add this in the "base target" so that these show up in `sess.unstable_target_features`. + llvm_features_by_flags(sess, &mut features); features } diff --git a/compiler/rustc_codegen_ssa/messages.ftl b/compiler/rustc_codegen_ssa/messages.ftl index 5322fe58cf33..84d638193435 100644 --- a/compiler/rustc_codegen_ssa/messages.ftl +++ b/compiler/rustc_codegen_ssa/messages.ftl @@ -48,10 +48,6 @@ codegen_ssa_error_writing_def_file = codegen_ssa_expected_name_value_pair = expected name value pair -codegen_ssa_expected_one_argument = expected one argument - -codegen_ssa_expected_used_symbol = expected `used`, `used(compiler)` or `used(linker)` - codegen_ssa_extern_funcs_not_found = some `extern` functions couldn't be found; some native libraries may need to be installed or have their path specified codegen_ssa_extract_bundled_libs_archive_member = failed to get data from archive member '{$rlib}': {$error} @@ -68,6 +64,11 @@ codegen_ssa_failed_to_write = failed to write {$path}: {$error} codegen_ssa_field_associated_value_expected = associated value expected for `{$name}` +codegen_ssa_forbidden_ctarget_feature = + target feature `{$feature}` cannot be {$enabled} with `-Ctarget-feature`: {$reason} + .note = this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! +codegen_ssa_forbidden_ctarget_feature_issue = for more information, see issue #116344 + codegen_ssa_forbidden_target_feature_attr = target feature `{$feature}` cannot be enabled with `#[target_feature]`: {$reason} @@ -86,9 +87,6 @@ codegen_ssa_incorrect_cgu_reuse_type = codegen_ssa_insufficient_vs_code_product = VS Code is a different product, and is not sufficient. -codegen_ssa_invalid_argument = invalid argument - .help = valid inline arguments are `always` and `never` - codegen_ssa_invalid_instruction_set = invalid instruction set specified codegen_ssa_invalid_link_ordinal_nargs = incorrect number of arguments to `#[link_ordinal]` @@ -205,11 +203,6 @@ codegen_ssa_missing_features = add the missing features in a `target_feature` at codegen_ssa_missing_query_depgraph = found CGU-reuse attribute but `-Zquery-dep-graph` was not specified -codegen_ssa_mixed_export_name_and_no_mangle = `{$no_mangle_attr}` attribute may not be used in combination with `#[export_name]` - .label = `{$no_mangle_attr}` is ignored - .note = `#[export_name]` takes precedence - .suggestion = remove the `{$no_mangle_attr}` attribute - codegen_ssa_msvc_missing_linker = the msvc targets depend on the msvc linker but `link.exe` was not found codegen_ssa_multiple_external_func_decl = multiple declarations of external function `{$function}` from library `{$library_name}` have different calling conventions @@ -221,6 +214,8 @@ codegen_ssa_multiple_main_functions = entry symbol `main` declared multiple time codegen_ssa_no_field = no field `{$name}` +codegen_ssa_no_mangle_nameless = `#[no_mangle]` cannot be used on {$definition} as it has no name + codegen_ssa_no_module_named = no module named `{$user_path}` (mangled: {$cgu_name}). available modules: {$cgu_names} @@ -228,8 +223,6 @@ codegen_ssa_no_natvis_directory = error enumerating natvis directory: {$error} codegen_ssa_no_saved_object_file = cached cgu {$cgu_name} should have an object file, but doesn't -codegen_ssa_null_on_export = `export_name` may not contain null characters - codegen_ssa_out_of_range_integer = integer value out of range .label = value must be between `0` and `255` @@ -368,8 +361,22 @@ codegen_ssa_unexpected_parameter_name = unexpected parameter name codegen_ssa_unknown_archive_kind = Don't know how to build archive of type: {$kind} +codegen_ssa_unknown_ctarget_feature = + unknown and unstable feature specified for `-Ctarget-feature`: `{$feature}` + .note = it is still passed through to the codegen backend, but use of this feature might be unsound and the behavior of this feature can change in the future + .possible_feature = you might have meant: `{$rust_feature}` + .consider_filing_feature_request = consider filing a feature request + +codegen_ssa_unknown_ctarget_feature_prefix = + unknown feature specified for `-Ctarget-feature`: `{$feature}` + .note = features must begin with a `+` to enable or `-` to disable it + codegen_ssa_unknown_reuse_kind = unknown cgu-reuse-kind `{$kind}` specified +codegen_ssa_unstable_ctarget_feature = + unstable feature specified for `-Ctarget-feature`: `{$feature}` + .note = this feature is not stably supported; its behavior can change in the future + codegen_ssa_unsupported_instruction_set = target does not support `#[instruction_set]` codegen_ssa_unsupported_link_self_contained = option `-C link-self-contained` is not supported on this target diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index 8882ba359b77..4a2425967e4f 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -865,7 +865,7 @@ fn link_natively( command: cmd, escaped_output, verbose: sess.opts.verbose, - sysroot_dir: sess.sysroot.clone(), + sysroot_dir: sess.opts.sysroot.path().to_owned(), }; sess.dcx().emit_err(err); // If MSVC's `link.exe` was expected but the return code @@ -1249,10 +1249,10 @@ fn link_sanitizer_runtime( if path.exists() { sess.target_tlib_path.dir.clone() } else { - let default_sysroot = filesearch::get_or_default_sysroot(); - let default_tlib = - filesearch::make_target_lib_path(&default_sysroot, sess.opts.target_triple.tuple()); - default_tlib + filesearch::make_target_lib_path( + &sess.opts.sysroot.default, + sess.opts.target_triple.tuple(), + ) } } @@ -1758,7 +1758,7 @@ fn detect_self_contained_mingw(sess: &Session, linker: &Path) -> bool { for dir in env::split_paths(&env::var_os("PATH").unwrap_or_default()) { let full_path = dir.join(&linker_with_extension); // If linker comes from sysroot assume self-contained mode - if full_path.is_file() && !full_path.starts_with(&sess.sysroot) { + if full_path.is_file() && !full_path.starts_with(sess.opts.sysroot.path()) { return false; } } diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs index 8fc83908efbc..fba84dec0973 100644 --- a/compiler/rustc_codegen_ssa/src/back/linker.rs +++ b/compiler/rustc_codegen_ssa/src/back/linker.rs @@ -337,7 +337,12 @@ pub(crate) trait Linker { fn debuginfo(&mut self, strip: Strip, natvis_debugger_visualizers: &[PathBuf]); fn no_crt_objects(&mut self); fn no_default_libraries(&mut self); - fn export_symbols(&mut self, tmpdir: &Path, crate_type: CrateType, symbols: &[String]); + fn export_symbols( + &mut self, + tmpdir: &Path, + crate_type: CrateType, + symbols: &[(String, SymbolExportKind)], + ); fn subsystem(&mut self, subsystem: &str); fn linker_plugin_lto(&mut self); fn add_eh_frame_header(&mut self) {} @@ -770,7 +775,12 @@ impl<'a> Linker for GccLinker<'a> { } } - fn export_symbols(&mut self, tmpdir: &Path, crate_type: CrateType, symbols: &[String]) { + fn export_symbols( + &mut self, + tmpdir: &Path, + crate_type: CrateType, + symbols: &[(String, SymbolExportKind)], + ) { // Symbol visibility in object files typically takes care of this. if crate_type == CrateType::Executable { let should_export_executable_symbols = @@ -799,7 +809,7 @@ impl<'a> Linker for GccLinker<'a> { // Write a plain, newline-separated list of symbols let res: io::Result<()> = try { let mut f = File::create_buffered(&path)?; - for sym in symbols { + for (sym, _) in symbols { debug!(" _{sym}"); writeln!(f, "_{sym}")?; } @@ -814,11 +824,12 @@ impl<'a> Linker for GccLinker<'a> { // .def file similar to MSVC one but without LIBRARY section // because LD doesn't like when it's empty writeln!(f, "EXPORTS")?; - for symbol in symbols { + for (symbol, kind) in symbols { + let kind_marker = if *kind == SymbolExportKind::Data { " DATA" } else { "" }; debug!(" _{symbol}"); // Quote the name in case it's reserved by linker in some way // (this accounts for names with dots in particular). - writeln!(f, " \"{symbol}\"")?; + writeln!(f, " \"{symbol}\"{kind_marker}")?; } }; if let Err(error) = res { @@ -831,7 +842,7 @@ impl<'a> Linker for GccLinker<'a> { writeln!(f, "{{")?; if !symbols.is_empty() { writeln!(f, " global:")?; - for sym in symbols { + for (sym, _) in symbols { debug!(" {sym};"); writeln!(f, " {sym};")?; } @@ -1059,7 +1070,7 @@ impl<'a> Linker for MsvcLinker<'a> { self.link_arg("/PDBALTPATH:%_PDB%"); // This will cause the Microsoft linker to embed .natvis info into the PDB file - let natvis_dir_path = self.sess.sysroot.join("lib\\rustlib\\etc"); + let natvis_dir_path = self.sess.opts.sysroot.path().join("lib\\rustlib\\etc"); if let Ok(natvis_dir) = fs::read_dir(&natvis_dir_path) { for entry in natvis_dir { match entry { @@ -1098,7 +1109,12 @@ impl<'a> Linker for MsvcLinker<'a> { // crates. Upstream rlibs may be linked statically to this dynamic library, // in which case they may continue to transitively be used and hence need // their symbols exported. - fn export_symbols(&mut self, tmpdir: &Path, crate_type: CrateType, symbols: &[String]) { + fn export_symbols( + &mut self, + tmpdir: &Path, + crate_type: CrateType, + symbols: &[(String, SymbolExportKind)], + ) { // Symbol visibility takes care of this typically if crate_type == CrateType::Executable { let should_export_executable_symbols = @@ -1116,9 +1132,10 @@ impl<'a> Linker for MsvcLinker<'a> { // straight to exports. writeln!(f, "LIBRARY")?; writeln!(f, "EXPORTS")?; - for symbol in symbols { + for (symbol, kind) in symbols { + let kind_marker = if *kind == SymbolExportKind::Data { " DATA" } else { "" }; debug!(" _{symbol}"); - writeln!(f, " {symbol}")?; + writeln!(f, " {symbol}{kind_marker}")?; } }; if let Err(error) = res { @@ -1259,14 +1276,19 @@ impl<'a> Linker for EmLinker<'a> { self.cc_arg("-nodefaultlibs"); } - fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType, symbols: &[String]) { + fn export_symbols( + &mut self, + _tmpdir: &Path, + _crate_type: CrateType, + symbols: &[(String, SymbolExportKind)], + ) { debug!("EXPORTED SYMBOLS:"); self.cc_arg("-s"); let mut arg = OsString::from("EXPORTED_FUNCTIONS="); let encoded = serde_json::to_string( - &symbols.iter().map(|sym| "_".to_owned() + sym).collect::>(), + &symbols.iter().map(|(sym, _)| "_".to_owned() + sym).collect::>(), ) .unwrap(); debug!("{encoded}"); @@ -1428,8 +1450,13 @@ impl<'a> Linker for WasmLd<'a> { fn no_default_libraries(&mut self) {} - fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType, symbols: &[String]) { - for sym in symbols { + fn export_symbols( + &mut self, + _tmpdir: &Path, + _crate_type: CrateType, + symbols: &[(String, SymbolExportKind)], + ) { + for (sym, _) in symbols { self.link_args(&["--export", sym]); } @@ -1563,7 +1590,7 @@ impl<'a> Linker for L4Bender<'a> { self.cc_arg("-nostdlib"); } - fn export_symbols(&mut self, _: &Path, _: CrateType, _: &[String]) { + fn export_symbols(&mut self, _: &Path, _: CrateType, _: &[(String, SymbolExportKind)]) { // ToDo, not implemented, copy from GCC self.sess.dcx().emit_warn(errors::L4BenderExportingSymbolsUnimplemented); } @@ -1720,12 +1747,17 @@ impl<'a> Linker for AixLinker<'a> { fn no_default_libraries(&mut self) {} - fn export_symbols(&mut self, tmpdir: &Path, _crate_type: CrateType, symbols: &[String]) { + fn export_symbols( + &mut self, + tmpdir: &Path, + _crate_type: CrateType, + symbols: &[(String, SymbolExportKind)], + ) { let path = tmpdir.join("list.exp"); let res: io::Result<()> = try { let mut f = File::create_buffered(&path)?; // FIXME: use llvm-nm to generate export list. - for symbol in symbols { + for (symbol, _) in symbols { debug!(" _{symbol}"); writeln!(f, " {symbol}")?; } @@ -1769,9 +1801,23 @@ fn for_each_exported_symbols_include_dep<'tcx>( } } -pub(crate) fn exported_symbols(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec { +pub(crate) fn exported_symbols( + tcx: TyCtxt<'_>, + crate_type: CrateType, +) -> Vec<(String, SymbolExportKind)> { if let Some(ref exports) = tcx.sess.target.override_export_symbols { - return exports.iter().map(ToString::to_string).collect(); + return exports + .iter() + .map(|name| { + ( + name.to_string(), + // FIXME use the correct export kind for this symbol. override_export_symbols + // can't directly specify the SymbolExportKind as it is defined in rustc_middle + // which rustc_target can't depend on. + SymbolExportKind::Text, + ) + }) + .collect(); } if let CrateType::ProcMacro = crate_type { @@ -1781,7 +1827,10 @@ pub(crate) fn exported_symbols(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec, crate_type: CrateType) -> Vec { +fn exported_symbols_for_non_proc_macro( + tcx: TyCtxt<'_>, + crate_type: CrateType, +) -> Vec<(String, SymbolExportKind)> { let mut symbols = Vec::new(); let export_threshold = symbol_export::crates_export_threshold(&[crate_type]); for_each_exported_symbols_include_dep(tcx, crate_type, |symbol, info, cnum| { @@ -1789,8 +1838,9 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) - // from any cdylib. The latter doesn't work anyway as we use hidden visibility for // compiler-builtins. Most linkers silently ignore it, but ld64 gives a warning. if info.level.is_below_threshold(export_threshold) && !tcx.is_compiler_builtins(cnum) { - symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate( - tcx, symbol, cnum, + symbols.push(( + symbol_export::exporting_symbol_name_for_instance_in_crate(tcx, symbol, cnum), + info.kind, )); symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum); } @@ -1799,7 +1849,7 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) - symbols } -fn exported_symbols_for_proc_macro_crate(tcx: TyCtxt<'_>) -> Vec { +fn exported_symbols_for_proc_macro_crate(tcx: TyCtxt<'_>) -> Vec<(String, SymbolExportKind)> { // `exported_symbols` will be empty when !should_codegen. if !tcx.sess.opts.output_types.should_codegen() { return Vec::new(); @@ -1809,7 +1859,10 @@ fn exported_symbols_for_proc_macro_crate(tcx: TyCtxt<'_>) -> Vec { let proc_macro_decls_name = tcx.sess.generate_proc_macro_decls_symbol(stable_crate_id); let metadata_symbol_name = exported_symbols::metadata_symbol_name(tcx); - vec![proc_macro_decls_name, metadata_symbol_name] + vec![ + (proc_macro_decls_name, SymbolExportKind::Data), + (metadata_symbol_name, SymbolExportKind::Data), + ] } pub(crate) fn linked_symbols( @@ -1823,15 +1876,33 @@ pub(crate) fn linked_symbols( } } + match tcx.sess.lto() { + Lto::No | Lto::ThinLocal => {} + Lto::Thin | Lto::Fat => { + // We really only need symbols from upstream rlibs to end up in the linked symbols list. + // The rest are in separate object files which the linker will always link in and + // doesn't have rules around the order in which they need to appear. + // When doing LTO, some of the symbols in the linked symbols list happen to be + // internalized by LTO, which then prevents referencing them from symbols.o. When doing + // LTO, all object files that get linked in will be local object files rather than + // pulled in from rlibs, so an empty linked symbols list works fine to avoid referencing + // all those internalized symbols from symbols.o. + return Vec::new(); + } + } + let mut symbols = Vec::new(); let export_threshold = symbol_export::crates_export_threshold(&[crate_type]); for_each_exported_symbols_include_dep(tcx, crate_type, |symbol, info, cnum| { if info.level.is_below_threshold(export_threshold) && !tcx.is_compiler_builtins(cnum) || info.used + || info.rustc_std_internal_symbol { symbols.push(( - symbol_export::linking_symbol_name_for_instance_in_crate(tcx, symbol, cnum), + symbol_export::linking_symbol_name_for_instance_in_crate( + tcx, symbol, info.kind, cnum, + ), info.kind, )); } @@ -1906,7 +1977,13 @@ impl<'a> Linker for PtxLinker<'a> { fn ehcont_guard(&mut self) {} - fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType, _symbols: &[String]) {} + fn export_symbols( + &mut self, + _tmpdir: &Path, + _crate_type: CrateType, + _symbols: &[(String, SymbolExportKind)], + ) { + } fn subsystem(&mut self, _subsystem: &str) {} @@ -1975,10 +2052,15 @@ impl<'a> Linker for LlbcLinker<'a> { fn ehcont_guard(&mut self) {} - fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType, symbols: &[String]) { + fn export_symbols( + &mut self, + _tmpdir: &Path, + _crate_type: CrateType, + symbols: &[(String, SymbolExportKind)], + ) { match _crate_type { CrateType::Cdylib => { - for sym in symbols { + for (sym, _) in symbols { self.link_args(&["--export-symbol", sym]); } } @@ -2052,11 +2134,16 @@ impl<'a> Linker for BpfLinker<'a> { fn ehcont_guard(&mut self) {} - fn export_symbols(&mut self, tmpdir: &Path, _crate_type: CrateType, symbols: &[String]) { + fn export_symbols( + &mut self, + tmpdir: &Path, + _crate_type: CrateType, + symbols: &[(String, SymbolExportKind)], + ) { let path = tmpdir.join("symbols"); let res: io::Result<()> = try { let mut f = File::create_buffered(&path)?; - for sym in symbols { + for (sym, _) in symbols { writeln!(f, "{sym}")?; } }; diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs index d0b6c7470fb9..75f7a4635565 100644 --- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs +++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs @@ -131,6 +131,9 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap, _: LocalCrate) -> DefIdMap( level: info.level, kind: SymbolExportKind::Text, used: info.used, + rustc_std_internal_symbol: info.rustc_std_internal_symbol, }, ) }) @@ -207,6 +212,7 @@ fn exported_symbols_provider_local<'tcx>( level: SymbolExportLevel::C, kind: SymbolExportKind::Text, used: false, + rustc_std_internal_symbol: false, }, )); } @@ -230,6 +236,7 @@ fn exported_symbols_provider_local<'tcx>( level: SymbolExportLevel::Rust, kind: SymbolExportKind::Text, used: false, + rustc_std_internal_symbol: true, }, )); } @@ -250,6 +257,7 @@ fn exported_symbols_provider_local<'tcx>( level: SymbolExportLevel::C, kind: SymbolExportKind::Data, used: false, + rustc_std_internal_symbol: false, }, ) })); @@ -275,6 +283,7 @@ fn exported_symbols_provider_local<'tcx>( level: SymbolExportLevel::C, kind: SymbolExportKind::Data, used: false, + rustc_std_internal_symbol: false, }, ) })); @@ -292,6 +301,7 @@ fn exported_symbols_provider_local<'tcx>( level: SymbolExportLevel::C, kind: SymbolExportKind::Data, used: true, + rustc_std_internal_symbol: false, }, )); } @@ -367,6 +377,8 @@ fn exported_symbols_provider_local<'tcx>( } } + // Note: These all set rustc_std_internal_symbol to false as generic functions must not + // be marked with this attribute and we are only handling generic functions here. match *mono_item { MonoItem::Fn(Instance { def: InstanceKind::Item(def), args }) => { let has_generics = args.non_erasable_generics().next().is_some(); @@ -382,6 +394,7 @@ fn exported_symbols_provider_local<'tcx>( level: SymbolExportLevel::Rust, kind: SymbolExportKind::Text, used: false, + rustc_std_internal_symbol: false, }, )); } @@ -404,6 +417,7 @@ fn exported_symbols_provider_local<'tcx>( level: SymbolExportLevel::Rust, kind: SymbolExportKind::Text, used: false, + rustc_std_internal_symbol: false, }, )); } @@ -420,6 +434,7 @@ fn exported_symbols_provider_local<'tcx>( level: SymbolExportLevel::Rust, kind: SymbolExportKind::Text, used: false, + rustc_std_internal_symbol: false, }, )); } @@ -430,6 +445,7 @@ fn exported_symbols_provider_local<'tcx>( level: SymbolExportLevel::Rust, kind: SymbolExportKind::Text, used: false, + rustc_std_internal_symbol: false, }, )); } @@ -680,6 +696,7 @@ fn calling_convention_for_symbol<'tcx>( pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>( tcx: TyCtxt<'tcx>, symbol: ExportedSymbol<'tcx>, + export_kind: SymbolExportKind, instantiating_crate: CrateNum, ) -> String { let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate); @@ -700,8 +717,9 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>( let prefix = match &target.arch[..] { "x86" => Some('_'), "x86_64" => None, - "arm64ec" => Some('#'), - // Only x86/64 use symbol decorations. + // Only functions are decorated for arm64ec. + "arm64ec" if export_kind == SymbolExportKind::Text => Some('#'), + // Only x86/64 and arm64ec use symbol decorations. _ => return undecorated, }; @@ -741,7 +759,7 @@ pub(crate) fn exporting_symbol_name_for_instance_in_crate<'tcx>( /// Add it to the symbols list for all kernel functions, so that it is exported in the linked /// object. pub(crate) fn extend_exported_symbols<'tcx>( - symbols: &mut Vec, + symbols: &mut Vec<(String, SymbolExportKind)>, tcx: TyCtxt<'tcx>, symbol: ExportedSymbol<'tcx>, instantiating_crate: CrateNum, @@ -755,7 +773,9 @@ pub(crate) fn extend_exported_symbols<'tcx>( let undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate); // Add the symbol for the kernel descriptor (with .kd suffix) - symbols.push(format!("{undecorated}.kd")); + // Per https://llvm.org/docs/AMDGPUUsage.html#symbols these will always be `STT_OBJECT` so + // export as data. + symbols.push((format!("{undecorated}.kd"), SymbolExportKind::Data)); } fn maybe_emutls_symbol_name<'tcx>( diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index bbf9cceef2a0..c3bfe4c13cdf 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -14,10 +14,10 @@ use rustc_data_structures::jobserver::{self, Acquired}; use rustc_data_structures::memmap::Mmap; use rustc_data_structures::profiling::{SelfProfilerRef, VerboseTimingGuard}; use rustc_errors::emitter::Emitter; -use rustc_errors::translation::Translate; +use rustc_errors::translation::Translator; use rustc_errors::{ - Diag, DiagArgMap, DiagCtxt, DiagMessage, ErrCode, FatalError, FluentBundle, Level, MultiSpan, - Style, Suggestions, + Diag, DiagArgMap, DiagCtxt, DiagMessage, ErrCode, FatalError, Level, MultiSpan, Style, + Suggestions, }; use rustc_fs_util::link_or_copy; use rustc_hir::def_id::{CrateNum, LOCAL_CRATE}; @@ -1889,16 +1889,6 @@ impl SharedEmitter { } } -impl Translate for SharedEmitter { - fn fluent_bundle(&self) -> Option<&FluentBundle> { - None - } - - fn fallback_fluent_bundle(&self) -> &FluentBundle { - panic!("shared emitter attempted to translate a diagnostic"); - } -} - impl Emitter for SharedEmitter { fn emit_diagnostic( &mut self, @@ -1932,6 +1922,10 @@ impl Emitter for SharedEmitter { fn source_map(&self) -> Option<&SourceMap> { None } + + fn translator(&self) -> &Translator { + panic!("shared emitter attempted to translate a diagnostic"); + } } impl SharedEmitterMain { diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index a3d6c73ba856..b06cfd1e4730 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -6,15 +6,15 @@ use std::time::{Duration, Instant}; use itertools::Itertools; use rustc_abi::FIRST_VARIANT; use rustc_ast as ast; -use rustc_ast::expand::allocator::{ALLOCATOR_METHODS, AllocatorKind, global_fn_name}; +use rustc_ast::expand::allocator::AllocatorKind; use rustc_attr_data_structures::OptimizeAttr; use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; use rustc_data_structures::profiling::{get_resident_set_size, print_time_passes_entry}; use rustc_data_structures::sync::{IntoDynSyncSend, par_map}; use rustc_data_structures::unord::UnordMap; -use rustc_hir::ItemId; use rustc_hir::def_id::{DefId, LOCAL_CRATE}; use rustc_hir::lang_items::LangItem; +use rustc_hir::{ItemId, Target}; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; use rustc_middle::middle::debugger_visualizer::{DebuggerVisualizerFile, DebuggerVisualizerType}; use rustc_middle::middle::exported_symbols::{self, SymbolExportKind}; @@ -1003,21 +1003,35 @@ impl CrateInfo { // by the compiler, but that's ok because all this stuff is unstable anyway. let target = &tcx.sess.target; if !are_upstream_rust_objects_already_included(tcx.sess) { - let missing_weak_lang_items: FxIndexSet = info + let add_prefix = match (target.is_like_windows, target.arch.as_ref()) { + (true, "x86") => |name: String, _: SymbolExportKind| format!("_{name}"), + (true, "arm64ec") => { + // Only functions are decorated for arm64ec. + |name: String, export_kind: SymbolExportKind| match export_kind { + SymbolExportKind::Text => format!("#{name}"), + _ => name, + } + } + _ => |name: String, _: SymbolExportKind| name, + }; + let missing_weak_lang_items: FxIndexSet<(Symbol, SymbolExportKind)> = info .used_crates .iter() .flat_map(|&cnum| tcx.missing_lang_items(cnum)) .filter(|l| l.is_weak()) .filter_map(|&l| { let name = l.link_name()?; - lang_items::required(tcx, l).then_some(name) + let export_kind = match l.target() { + Target::Fn => SymbolExportKind::Text, + Target::Static => SymbolExportKind::Data, + _ => bug!( + "Don't know what the export kind is for lang item of kind {:?}", + l.target() + ), + }; + lang_items::required(tcx, l).then_some((name, export_kind)) }) .collect(); - let prefix = match (target.is_like_windows, target.arch.as_ref()) { - (true, "x86") => "_", - (true, "arm64ec") => "#", - _ => "", - }; // This loop only adds new items to values of the hash map, so the order in which we // iterate over the values is not important. @@ -1030,35 +1044,18 @@ impl CrateInfo { .for_each(|(_, linked_symbols)| { let mut symbols = missing_weak_lang_items .iter() - .map(|item| { + .map(|(item, export_kind)| { ( - format!("{prefix}{}", mangle_internal_symbol(tcx, item.as_str())), - SymbolExportKind::Text, + add_prefix( + mangle_internal_symbol(tcx, item.as_str()), + *export_kind, + ), + *export_kind, ) }) .collect::>(); symbols.sort_unstable_by(|a, b| a.0.cmp(&b.0)); linked_symbols.extend(symbols); - if tcx.allocator_kind(()).is_some() { - // At least one crate needs a global allocator. This crate may be placed - // after the crate that defines it in the linker order, in which case some - // linkers return an error. By adding the global allocator shim methods to - // the linked_symbols list, linking the generated symbols.o will ensure that - // circular dependencies involving the global allocator don't lead to linker - // errors. - linked_symbols.extend(ALLOCATOR_METHODS.iter().map(|method| { - ( - format!( - "{prefix}{}", - mangle_internal_symbol( - tcx, - global_fn_name(method.name).as_str() - ) - ), - SymbolExportKind::Text, - ) - })); - } }); } diff --git a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs index 98742255063f..acdda32d58a3 100644 --- a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs +++ b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs @@ -4,13 +4,12 @@ use rustc_abi::ExternAbi; use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, DiffActivity, DiffMode}; use rustc_ast::{LitKind, MetaItem, MetaItemInner, attr}; use rustc_attr_data_structures::{ - AttributeKind, InlineAttr, InstructionSetAttr, OptimizeAttr, find_attr, + AttributeKind, InlineAttr, InstructionSetAttr, OptimizeAttr, ReprAttr, UsedBy, find_attr, }; -use rustc_data_structures::fx::FxHashMap; use rustc_hir::def::DefKind; use rustc_hir::def_id::{DefId, LOCAL_CRATE, LocalDefId}; use rustc_hir::weak_lang_items::WEAK_LANG_ITEMS; -use rustc_hir::{self as hir, HirId, LangItem, lang_items}; +use rustc_hir::{self as hir, LangItem, lang_items}; use rustc_middle::middle::codegen_fn_attrs::{ CodegenFnAttrFlags, CodegenFnAttrs, PatchableFunctionEntry, }; @@ -18,13 +17,16 @@ use rustc_middle::mir::mono::Linkage; use rustc_middle::query::Providers; use rustc_middle::span_bug; use rustc_middle::ty::{self as ty, TyCtxt}; +use rustc_session::lint; use rustc_session::parse::feature_err; -use rustc_session::{Session, lint}; use rustc_span::{Ident, Span, sym}; use rustc_target::spec::SanitizerSet; use crate::errors; -use crate::target_features::{check_target_feature_trait_unsafe, from_target_feature_attr}; +use crate::errors::NoMangleNameless; +use crate::target_features::{ + check_target_feature_trait_unsafe, check_tied_features, from_target_feature_attr, +}; fn linkage_by_name(tcx: TyCtxt<'_>, def_id: LocalDefId, name: &str) -> Linkage { use rustc_middle::mir::mono::Linkage::*; @@ -85,8 +87,6 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { let mut link_ordinal_span = None; let mut no_sanitize_span = None; - let mut mixed_export_name_no_mangle_lint_state = MixedExportNameAndNoMangleState::default(); - let mut no_mangle_span = None; for attr in attrs.iter() { // In some cases, attribute are only valid on functions, but it's the `check_attr` @@ -94,23 +94,78 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { // In these cases, we bail from performing further checks that are only meaningful for // functions (such as calling `fn_sig`, which ICEs if given a non-function). We also // report a delayed bug, just in case `check_attr` isn't doing its job. - let fn_sig = || { + let fn_sig = |attr_span| { use DefKind::*; let def_kind = tcx.def_kind(did); if let Fn | AssocFn | Variant | Ctor(..) = def_kind { Some(tcx.fn_sig(did)) } else { - tcx.dcx().span_delayed_bug( - attr.span(), - "this attribute can only be applied to functions", - ); + tcx.dcx() + .span_delayed_bug(attr_span, "this attribute can only be applied to functions"); None } }; - if let hir::Attribute::Parsed(AttributeKind::Align { align, .. }) = attr { - codegen_fn_attrs.alignment = Some(*align); + if let hir::Attribute::Parsed(p) = attr { + match p { + AttributeKind::Repr(reprs) => { + codegen_fn_attrs.alignment = reprs + .iter() + .filter_map( + |(r, _)| if let ReprAttr::ReprAlign(x) = r { Some(*x) } else { None }, + ) + .max(); + } + AttributeKind::Cold(_) => codegen_fn_attrs.flags |= CodegenFnAttrFlags::COLD, + AttributeKind::ExportName { name, .. } => { + codegen_fn_attrs.export_name = Some(*name); + } + AttributeKind::Naked(_) => codegen_fn_attrs.flags |= CodegenFnAttrFlags::NAKED, + AttributeKind::Align { align, .. } => codegen_fn_attrs.alignment = Some(*align), + AttributeKind::NoMangle(attr_span) => { + if tcx.opt_item_name(did.to_def_id()).is_some() { + codegen_fn_attrs.flags |= CodegenFnAttrFlags::NO_MANGLE; + } else { + tcx.dcx().emit_err(NoMangleNameless { + span: *attr_span, + definition: format!( + "{} {}", + tcx.def_descr_article(did.to_def_id()), + tcx.def_descr(did.to_def_id()) + ), + }); + } + } + AttributeKind::TrackCaller(attr_span) => { + let is_closure = tcx.is_closure_like(did.to_def_id()); + + if !is_closure + && let Some(fn_sig) = fn_sig(*attr_span) + && fn_sig.skip_binder().abi() != ExternAbi::Rust + { + tcx.dcx().emit_err(errors::RequiresRustAbi { span: *attr_span }); + } + if is_closure + && !tcx.features().closure_track_caller() + && !attr_span.allows_unstable(sym::closure_track_caller) + { + feature_err( + &tcx.sess, + sym::closure_track_caller, + *attr_span, + "`#[track_caller]` on closures is currently unstable", + ) + .emit(); + } + codegen_fn_attrs.flags |= CodegenFnAttrFlags::TRACK_CALLER + } + AttributeKind::Used { used_by, .. } => match used_by { + UsedBy::Compiler => codegen_fn_attrs.flags |= CodegenFnAttrFlags::USED_COMPILER, + UsedBy::Linker => codegen_fn_attrs.flags |= CodegenFnAttrFlags::USED_LINKER, + }, + _ => {} + } } let Some(Ident { name, .. }) = attr.ident() else { @@ -118,7 +173,6 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { }; match name { - sym::cold => codegen_fn_attrs.flags |= CodegenFnAttrFlags::COLD, sym::rustc_allocator => codegen_fn_attrs.flags |= CodegenFnAttrFlags::ALLOCATOR, sym::ffi_pure => codegen_fn_attrs.flags |= CodegenFnAttrFlags::FFI_PURE, sym::ffi_const => codegen_fn_attrs.flags |= CodegenFnAttrFlags::FFI_CONST, @@ -128,105 +182,10 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { sym::rustc_allocator_zeroed => { codegen_fn_attrs.flags |= CodegenFnAttrFlags::ALLOCATOR_ZEROED } - sym::naked => codegen_fn_attrs.flags |= CodegenFnAttrFlags::NAKED, - sym::no_mangle => { - no_mangle_span = Some(attr.span()); - if tcx.opt_item_name(did.to_def_id()).is_some() { - codegen_fn_attrs.flags |= CodegenFnAttrFlags::NO_MANGLE; - mixed_export_name_no_mangle_lint_state.track_no_mangle( - attr.span(), - tcx.local_def_id_to_hir_id(did), - attr, - ); - } else { - tcx.dcx() - .struct_span_err( - attr.span(), - format!( - "`#[no_mangle]` cannot be used on {} {} as it has no name", - tcx.def_descr_article(did.to_def_id()), - tcx.def_descr(did.to_def_id()), - ), - ) - .emit(); - } - } sym::rustc_std_internal_symbol => { codegen_fn_attrs.flags |= CodegenFnAttrFlags::RUSTC_STD_INTERNAL_SYMBOL } - sym::used => { - let inner = attr.meta_item_list(); - match inner.as_deref() { - Some([item]) if item.has_name(sym::linker) => { - if !tcx.features().used_with_arg() { - feature_err( - &tcx.sess, - sym::used_with_arg, - attr.span(), - "`#[used(linker)]` is currently unstable", - ) - .emit(); - } - codegen_fn_attrs.flags |= CodegenFnAttrFlags::USED_LINKER; - } - Some([item]) if item.has_name(sym::compiler) => { - if !tcx.features().used_with_arg() { - feature_err( - &tcx.sess, - sym::used_with_arg, - attr.span(), - "`#[used(compiler)]` is currently unstable", - ) - .emit(); - } - codegen_fn_attrs.flags |= CodegenFnAttrFlags::USED_COMPILER; - } - Some(_) => { - tcx.dcx().emit_err(errors::ExpectedUsedSymbol { span: attr.span() }); - } - None => { - // Unconditionally using `llvm.used` causes issues in handling - // `.init_array` with the gold linker. Luckily gold has been - // deprecated with GCC 15 and rustc now warns about using gold. - codegen_fn_attrs.flags |= CodegenFnAttrFlags::USED_LINKER - } - } - } sym::thread_local => codegen_fn_attrs.flags |= CodegenFnAttrFlags::THREAD_LOCAL, - sym::track_caller => { - let is_closure = tcx.is_closure_like(did.to_def_id()); - - if !is_closure - && let Some(fn_sig) = fn_sig() - && fn_sig.skip_binder().abi() != ExternAbi::Rust - { - tcx.dcx().emit_err(errors::RequiresRustAbi { span: attr.span() }); - } - if is_closure - && !tcx.features().closure_track_caller() - && !attr.span().allows_unstable(sym::closure_track_caller) - { - feature_err( - &tcx.sess, - sym::closure_track_caller, - attr.span(), - "`#[track_caller]` on closures is currently unstable", - ) - .emit(); - } - codegen_fn_attrs.flags |= CodegenFnAttrFlags::TRACK_CALLER - } - sym::export_name => { - if let Some(s) = attr.value_str() { - if s.as_str().contains('\0') { - // `#[export_name = ...]` will be converted to a null-terminated string, - // so it may not contain any null characters. - tcx.dcx().emit_err(errors::NullOnExport { span: attr.span() }); - } - codegen_fn_attrs.export_name = Some(s); - mixed_export_name_no_mangle_lint_state.track_export_name(attr.span()); - } - } sym::target_feature => { let Some(sig) = tcx.hir_node_by_def_id(did).fn_sig() else { tcx.dcx().span_delayed_bug(attr.span(), "target_feature applied to non-fn"); @@ -437,7 +396,11 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { } } - mixed_export_name_no_mangle_lint_state.lint_if_mixed(tcx); + // Apply the minimum function alignment here. This ensures that a function's alignment is + // determined by the `-C` flags of the crate it is defined in, not the `-C` flags of the crate + // it happens to be codegen'd (or const-eval'd) in. + codegen_fn_attrs.alignment = + Ord::max(codegen_fn_attrs.alignment, tcx.sess.opts.unstable_opts.min_function_alignment); let inline_span; (codegen_fn_attrs.inline, inline_span) = if let Some((inline_attr, span)) = @@ -455,33 +418,8 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { codegen_fn_attrs.inline = InlineAttr::Never; } - codegen_fn_attrs.optimize = attrs.iter().fold(OptimizeAttr::Default, |ia, attr| { - if !attr.has_name(sym::optimize) { - return ia; - } - if attr.is_word() { - tcx.dcx().emit_err(errors::ExpectedOneArgumentOptimize { span: attr.span() }); - return ia; - } - let Some(ref items) = attr.meta_item_list() else { - return OptimizeAttr::Default; - }; - - let [item] = &items[..] else { - tcx.dcx().emit_err(errors::ExpectedOneArgumentOptimize { span: attr.span() }); - return OptimizeAttr::Default; - }; - if item.has_name(sym::size) { - OptimizeAttr::Size - } else if item.has_name(sym::speed) { - OptimizeAttr::Speed - } else if item.has_name(sym::none) { - OptimizeAttr::DoNotOptimize - } else { - tcx.dcx().emit_err(errors::InvalidArgumentOptimize { span: item.span() }); - OptimizeAttr::Default - } - }); + codegen_fn_attrs.optimize = + find_attr!(attrs, AttributeKind::Optimize(i, _) => *i).unwrap_or(OptimizeAttr::Default); // #73631: closures inherit `#[target_feature]` annotations // @@ -557,12 +495,15 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::RUSTC_STD_INTERNAL_SYMBOL) && codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NO_MANGLE) { + let no_mangle_span = + find_attr!(attrs, AttributeKind::NoMangle(no_mangle_span) => *no_mangle_span) + .unwrap_or_default(); let lang_item = lang_items::extract(attrs).map_or(None, |(name, _span)| LangItem::from_name(name)); let mut err = tcx .dcx() .struct_span_err( - no_mangle_span.unwrap_or_default(), + no_mangle_span, "`#[no_mangle]` cannot be used on internal language items", ) .with_note("Rustc requires this item to have a specific mangled name.") @@ -615,25 +556,6 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { codegen_fn_attrs } -/// Given a map from target_features to whether they are enabled or disabled, ensure only valid -/// combinations are allowed. -pub fn check_tied_features( - sess: &Session, - features: &FxHashMap<&str, bool>, -) -> Option<&'static [&'static str]> { - if !features.is_empty() { - for tied in sess.target.tied_target_features() { - // Tied features must be set to the same value, or not set at all - let mut tied_iter = tied.iter(); - let enabled = features.get(tied_iter.next().unwrap()); - if tied_iter.any(|f| enabled != features.get(f)) { - return Some(tied); - } - } - } - None -} - /// Checks if the provided DefId is a method in a trait impl for a trait which has track_caller /// applied to the method prototype. fn should_inherit_track_caller(tcx: TyCtxt<'_>, def_id: DefId) -> bool { @@ -702,49 +624,6 @@ fn check_link_name_xor_ordinal( } } -#[derive(Default)] -struct MixedExportNameAndNoMangleState<'a> { - export_name: Option, - hir_id: Option, - no_mangle: Option, - no_mangle_attr: Option<&'a hir::Attribute>, -} - -impl<'a> MixedExportNameAndNoMangleState<'a> { - fn track_export_name(&mut self, span: Span) { - self.export_name = Some(span); - } - - fn track_no_mangle(&mut self, span: Span, hir_id: HirId, attr_name: &'a hir::Attribute) { - self.no_mangle = Some(span); - self.hir_id = Some(hir_id); - self.no_mangle_attr = Some(attr_name); - } - - /// Emit diagnostics if the lint condition is met. - fn lint_if_mixed(self, tcx: TyCtxt<'_>) { - if let Self { - export_name: Some(export_name), - no_mangle: Some(no_mangle), - hir_id: Some(hir_id), - no_mangle_attr: Some(_), - } = self - { - tcx.emit_node_span_lint( - lint::builtin::UNUSED_ATTRIBUTES, - hir_id, - no_mangle, - errors::MixedExportNameAndNoMangle { - no_mangle, - no_mangle_attr: "#[unsafe(no_mangle)]".to_string(), - export_name, - removal_span: no_mangle, - }, - ); - } - } -} - /// We now check the #\[rustc_autodiff\] attributes which we generated from the #[autodiff(...)] /// macros. There are two forms. The pure one without args to mark primal functions (the functions /// being differentiated). The other form is #[rustc_autodiff(Mode, ActivityList)] on top of the diff --git a/compiler/rustc_codegen_ssa/src/errors.rs b/compiler/rustc_codegen_ssa/src/errors.rs index 5387b2a7f818..1950a35b364d 100644 --- a/compiler/rustc_codegen_ssa/src/errors.rs +++ b/compiler/rustc_codegen_ssa/src/errors.rs @@ -140,13 +140,6 @@ pub(crate) struct RequiresRustAbi { pub span: Span, } -#[derive(Diagnostic)] -#[diag(codegen_ssa_null_on_export, code = E0648)] -pub(crate) struct NullOnExport { - #[primary_span] - pub span: Span, -} - #[derive(Diagnostic)] #[diag(codegen_ssa_unsupported_instruction_set, code = E0779)] pub(crate) struct UnsupportedInstructionSet { @@ -208,20 +201,6 @@ pub(crate) struct OutOfRangeInteger { pub span: Span, } -#[derive(Diagnostic)] -#[diag(codegen_ssa_expected_one_argument, code = E0722)] -pub(crate) struct ExpectedOneArgumentOptimize { - #[primary_span] - pub span: Span, -} - -#[derive(Diagnostic)] -#[diag(codegen_ssa_invalid_argument, code = E0722)] -pub(crate) struct InvalidArgumentOptimize { - #[primary_span] - pub span: Span, -} - #[derive(Diagnostic)] #[diag(codegen_ssa_copy_path_buf)] pub(crate) struct CopyPathBuf { @@ -747,13 +726,6 @@ pub struct UnknownArchiveKind<'a> { pub kind: &'a str, } -#[derive(Diagnostic)] -#[diag(codegen_ssa_expected_used_symbol)] -pub(crate) struct ExpectedUsedSymbol { - #[primary_span] - pub span: Span, -} - #[derive(Diagnostic)] #[diag(codegen_ssa_multiple_main_functions)] #[help] @@ -1217,46 +1189,10 @@ pub(crate) struct ErrorCreatingImportLibrary<'a> { pub error: String, } -pub struct TargetFeatureDisableOrEnable<'a> { - pub features: &'a [&'a str], - pub span: Option, - pub missing_features: Option, -} - -#[derive(Subdiagnostic)] -#[help(codegen_ssa_missing_features)] -pub struct MissingFeatures; - -impl Diagnostic<'_, G> for TargetFeatureDisableOrEnable<'_> { - fn into_diag(self, dcx: DiagCtxtHandle<'_>, level: Level) -> Diag<'_, G> { - let mut diag = Diag::new(dcx, level, fluent::codegen_ssa_target_feature_disable_or_enable); - if let Some(span) = self.span { - diag.span(span); - }; - if let Some(missing_features) = self.missing_features { - diag.subdiagnostic(missing_features); - } - diag.arg("features", self.features.join(", ")); - diag - } -} - #[derive(Diagnostic)] #[diag(codegen_ssa_aix_strip_not_used)] pub(crate) struct AixStripNotUsed; -#[derive(LintDiagnostic)] -#[diag(codegen_ssa_mixed_export_name_and_no_mangle)] -pub(crate) struct MixedExportNameAndNoMangle { - #[label] - pub no_mangle: Span, - pub no_mangle_attr: String, - #[note] - pub export_name: Span, - #[suggestion(style = "verbose", code = "", applicability = "machine-applicable")] - pub removal_span: Span, -} - #[derive(Diagnostic, Debug)] pub(crate) enum XcrunError { #[diag(codegen_ssa_xcrun_failed_invoking)] @@ -1283,3 +1219,76 @@ pub(crate) struct XcrunSdkPathWarning { #[derive(LintDiagnostic)] #[diag(codegen_ssa_aarch64_softfloat_neon)] pub(crate) struct Aarch64SoftfloatNeon; + +#[derive(Diagnostic)] +#[diag(codegen_ssa_unknown_ctarget_feature_prefix)] +#[note] +pub(crate) struct UnknownCTargetFeaturePrefix<'a> { + pub feature: &'a str, +} + +#[derive(Subdiagnostic)] +pub(crate) enum PossibleFeature<'a> { + #[help(codegen_ssa_possible_feature)] + Some { rust_feature: &'a str }, + #[help(codegen_ssa_consider_filing_feature_request)] + None, +} + +#[derive(Diagnostic)] +#[diag(codegen_ssa_unknown_ctarget_feature)] +#[note] +pub(crate) struct UnknownCTargetFeature<'a> { + pub feature: &'a str, + #[subdiagnostic] + pub rust_feature: PossibleFeature<'a>, +} + +#[derive(Diagnostic)] +#[diag(codegen_ssa_unstable_ctarget_feature)] +#[note] +pub(crate) struct UnstableCTargetFeature<'a> { + pub feature: &'a str, +} + +#[derive(Diagnostic)] +#[diag(codegen_ssa_forbidden_ctarget_feature)] +#[note] +#[note(codegen_ssa_forbidden_ctarget_feature_issue)] +pub(crate) struct ForbiddenCTargetFeature<'a> { + pub feature: &'a str, + pub enabled: &'a str, + pub reason: &'a str, +} + +pub struct TargetFeatureDisableOrEnable<'a> { + pub features: &'a [&'a str], + pub span: Option, + pub missing_features: Option, +} + +#[derive(Subdiagnostic)] +#[help(codegen_ssa_missing_features)] +pub struct MissingFeatures; + +impl Diagnostic<'_, G> for TargetFeatureDisableOrEnable<'_> { + fn into_diag(self, dcx: DiagCtxtHandle<'_>, level: Level) -> Diag<'_, G> { + let mut diag = Diag::new(dcx, level, fluent::codegen_ssa_target_feature_disable_or_enable); + if let Some(span) = self.span { + diag.span(span); + }; + if let Some(missing_features) = self.missing_features { + diag.subdiagnostic(missing_features); + } + diag.arg("features", self.features.join(", ")); + diag + } +} + +#[derive(Diagnostic)] +#[diag(codegen_ssa_no_mangle_nameless)] +pub(crate) struct NoMangleNameless { + #[primary_span] + pub span: Span, + pub definition: String, +} diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 523c9f2ad1ce..23ed387a3ff9 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -218,7 +218,7 @@ pub struct CrateInfo { pub target_cpu: String, pub target_features: Vec, pub crate_types: Vec, - pub exported_symbols: UnordMap>, + pub exported_symbols: UnordMap>, pub linked_symbols: FxIndexMap>, pub local_crate_name: Symbol, pub compiler_builtins: Option, diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs index 9f66457a7400..9da4b8cc8fd4 100644 --- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs +++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs @@ -131,12 +131,8 @@ fn prefix_and_suffix<'tcx>( let attrs = tcx.codegen_fn_attrs(instance.def_id()); let link_section = attrs.link_section.map(|symbol| symbol.as_str().to_string()); - // function alignment can be set globally with the `-Zmin-function-alignment=` flag; - // the alignment from a `#[repr(align())]` is used if it specifies a higher alignment. - // if no alignment is specified, an alignment of 4 bytes is used. - let min_function_alignment = tcx.sess.opts.unstable_opts.min_function_alignment; - let align_bytes = - Ord::max(min_function_alignment, attrs.alignment).map(|a| a.bytes()).unwrap_or(4); + // If no alignment is specified, an alignment of 4 bytes is used. + let align_bytes = attrs.alignment.map(|a| a.bytes()).unwrap_or(4); // In particular, `.arm` can also be written `.code 32` and `.thumb` as `.code 16`. let (arch_prefix, arch_suffix) = if is_arm { diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs index e1d8b7546cf4..db5ac6a514fb 100644 --- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs +++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs @@ -1123,7 +1123,7 @@ pub(super) fn transmute_immediate<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( // While optimizations will remove no-op transmutes, they might still be // there in debug or things that aren't no-op in MIR because they change // the Rust type but not the underlying layout/niche. - if from_scalar == to_scalar && from_backend_ty == to_backend_ty { + if from_scalar == to_scalar { return imm; } @@ -1142,7 +1142,13 @@ pub(super) fn transmute_immediate<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( assume_scalar_range(bx, imm, from_scalar, from_backend_ty); imm = match (from_scalar.primitive(), to_scalar.primitive()) { - (Int(..) | Float(_), Int(..) | Float(_)) => bx.bitcast(imm, to_backend_ty), + (Int(..) | Float(_), Int(..) | Float(_)) => { + if from_backend_ty == to_backend_ty { + imm + } else { + bx.bitcast(imm, to_backend_ty) + } + } (Pointer(..), Pointer(..)) => bx.pointercast(imm, to_backend_ty), (Int(..), Pointer(..)) => bx.ptradd(bx.const_null(bx.type_ptr()), imm), (Pointer(..), Int(..)) => { diff --git a/compiler/rustc_codegen_ssa/src/target_features.rs b/compiler/rustc_codegen_ssa/src/target_features.rs index 640d197c219a..67ac619091be 100644 --- a/compiler/rustc_codegen_ssa/src/target_features.rs +++ b/compiler/rustc_codegen_ssa/src/target_features.rs @@ -1,5 +1,5 @@ use rustc_attr_data_structures::InstructionSetAttr; -use rustc_data_structures::fx::FxIndexSet; +use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet}; use rustc_data_structures::unord::{UnordMap, UnordSet}; use rustc_errors::Applicability; use rustc_hir as hir; @@ -8,11 +8,12 @@ use rustc_hir::def_id::{DefId, LOCAL_CRATE, LocalDefId}; use rustc_middle::middle::codegen_fn_attrs::TargetFeature; use rustc_middle::query::Providers; use rustc_middle::ty::TyCtxt; -use rustc_session::features::StabilityExt; +use rustc_session::Session; use rustc_session::lint::builtin::AARCH64_SOFTFLOAT_NEON; use rustc_session::parse::feature_err; use rustc_span::{Span, Symbol, sym}; -use rustc_target::target_features::{self, Stability}; +use rustc_target::target_features::{self, RUSTC_SPECIFIC_FEATURES, Stability}; +use smallvec::SmallVec; use crate::errors; @@ -67,7 +68,7 @@ pub(crate) fn from_target_feature_attr( // Only allow target features whose feature gates have been enabled // and which are permitted to be toggled. - if let Err(reason) = stability.is_toggle_permitted(tcx.sess) { + if let Err(reason) = stability.toggle_allowed() { tcx.dcx().emit_err(errors::ForbiddenTargetFeatureAttr { span: item.span(), feature, @@ -88,7 +89,7 @@ pub(crate) fn from_target_feature_attr( let feature_sym = Symbol::intern(feature); for &name in tcx.implied_target_features(feature_sym) { // But ensure the ABI does not forbid enabling this. - // Here we do assume that LLVM doesn't add even more implied features + // Here we do assume that the backend doesn't add even more implied features // we don't know about, at least no features that would have ABI effects! // We skip this logic in rustdoc, where we want to allow all target features of // all targets, so we can't check their ABI compatibility and anyway we are not @@ -156,6 +157,276 @@ pub(crate) fn check_target_feature_trait_unsafe(tcx: TyCtxt<'_>, id: LocalDefId, } } +/// Parse the value of `-Ctarget-feature`, also expanding implied features, +/// and call the closure for each (expanded) Rust feature. If the list contains +/// a syntactically invalid item (not starting with `+`/`-`), the error callback is invoked. +fn parse_rust_feature_flag<'a>( + sess: &'a Session, + err_callback: impl Fn(&'a str), + mut callback: impl FnMut( + /* base_feature */ &'a str, + /* with_implied */ FxHashSet<&'a str>, + /* enable */ bool, + ), +) { + // A cache for the backwards implication map. + let mut inverse_implied_features: Option>> = None; + + for feature in sess.opts.cg.target_feature.split(',') { + if let Some(base_feature) = feature.strip_prefix('+') { + // Skip features that are not target features, but rustc features. + if RUSTC_SPECIFIC_FEATURES.contains(&base_feature) { + return; + } + + callback(base_feature, sess.target.implied_target_features(base_feature), true) + } else if let Some(base_feature) = feature.strip_prefix('-') { + // Skip features that are not target features, but rustc features. + if RUSTC_SPECIFIC_FEATURES.contains(&base_feature) { + return; + } + + // If `f1` implies `f2`, then `!f2` implies `!f1` -- this is standard logical + // contraposition. So we have to find all the reverse implications of `base_feature` and + // disable them, too. + + let inverse_implied_features = inverse_implied_features.get_or_insert_with(|| { + let mut set: FxHashMap<&str, FxHashSet<&str>> = FxHashMap::default(); + for (f, _, is) in sess.target.rust_target_features() { + for i in is.iter() { + set.entry(i).or_default().insert(f); + } + } + set + }); + + // Inverse implied target features have their own inverse implied target features, so we + // traverse the map until there are no more features to add. + let mut features = FxHashSet::default(); + let mut new_features = vec![base_feature]; + while let Some(new_feature) = new_features.pop() { + if features.insert(new_feature) { + if let Some(implied_features) = inverse_implied_features.get(&new_feature) { + new_features.extend(implied_features) + } + } + } + + callback(base_feature, features, false) + } else if !feature.is_empty() { + err_callback(feature) + } + } +} + +/// Utility function for a codegen backend to compute `cfg(target_feature)`, or more specifically, +/// to populate `sess.unstable_target_features` and `sess.target_features` (these are the first and +/// 2nd component of the return value, respectively). +/// +/// `target_base_has_feature` should check whether the given feature (a Rust feature name!) is +/// enabled in the "base" target machine, i.e., without applying `-Ctarget-feature`. +/// +/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled elsewhere. +pub fn cfg_target_feature( + sess: &Session, + mut target_base_has_feature: impl FnMut(&str) -> bool, +) -> (Vec, Vec) { + // Compute which of the known target features are enabled in the 'base' target machine. We only + // consider "supported" features; "forbidden" features are not reflected in `cfg` as of now. + let mut features: UnordSet = sess + .target + .rust_target_features() + .iter() + .filter(|(feature, _, _)| target_base_has_feature(feature)) + .map(|(feature, _, _)| Symbol::intern(feature)) + .collect(); + + // Add enabled and remove disabled features. + parse_rust_feature_flag( + sess, + /* err_callback */ + |_| { + // Errors are already emitted in `flag_to_backend_features`; avoid duplicates. + }, + |_base_feature, new_features, enabled| { + // Iteration order is irrelevant since this only influences an `UnordSet`. + #[allow(rustc::potential_query_instability)] + if enabled { + features.extend(new_features.into_iter().map(|f| Symbol::intern(f))); + } else { + // Remove `new_features` from `features`. + for new in new_features { + features.remove(&Symbol::intern(new)); + } + } + }, + ); + + // Filter enabled features based on feature gates. + let f = |allow_unstable| { + sess.target + .rust_target_features() + .iter() + .filter_map(|(feature, gate, _)| { + // The `allow_unstable` set is used by rustc internally to determine which target + // features are truly available, so we want to return even perma-unstable + // "forbidden" features. + if allow_unstable + || (gate.in_cfg() + && (sess.is_nightly_build() || gate.requires_nightly().is_none())) + { + Some(Symbol::intern(feature)) + } else { + None + } + }) + .filter(|feature| features.contains(&feature)) + .collect() + }; + + (f(true), f(false)) +} + +/// Given a map from target_features to whether they are enabled or disabled, ensure only valid +/// combinations are allowed. +pub fn check_tied_features( + sess: &Session, + features: &FxHashMap<&str, bool>, +) -> Option<&'static [&'static str]> { + if !features.is_empty() { + for tied in sess.target.tied_target_features() { + // Tied features must be set to the same value, or not set at all + let mut tied_iter = tied.iter(); + let enabled = features.get(tied_iter.next().unwrap()); + if tied_iter.any(|f| enabled != features.get(f)) { + return Some(tied); + } + } + } + None +} + +/// Translates the `-Ctarget-feature` flag into a backend target feature list. +/// +/// `to_backend_features` converts a Rust feature name into a list of backend feature names; this is +/// used for diagnostic purposes only. +/// +/// `extend_backend_features` extends the set of backend features (assumed to be in mutable state +/// accessible by that closure) to enable/disable the given Rust feature name. +pub fn flag_to_backend_features<'a, const N: usize>( + sess: &'a Session, + diagnostics: bool, + to_backend_features: impl Fn(&'a str) -> SmallVec<[&'a str; N]>, + mut extend_backend_features: impl FnMut(&'a str, /* enable */ bool), +) { + let known_features = sess.target.rust_target_features(); + + // Compute implied features + let mut rust_features = vec![]; + parse_rust_feature_flag( + sess, + /* err_callback */ + |feature| { + if diagnostics { + sess.dcx().emit_warn(errors::UnknownCTargetFeaturePrefix { feature }); + } + }, + |base_feature, new_features, enable| { + rust_features.extend( + UnordSet::from(new_features).to_sorted_stable_ord().iter().map(|&&s| (enable, s)), + ); + // Check feature validity. + if diagnostics { + let feature_state = known_features.iter().find(|&&(v, _, _)| v == base_feature); + match feature_state { + None => { + // This is definitely not a valid Rust feature name. Maybe it is a backend + // feature name? If so, give a better error message. + let rust_feature = + known_features.iter().find_map(|&(rust_feature, _, _)| { + let backend_features = to_backend_features(rust_feature); + if backend_features.contains(&base_feature) + && !backend_features.contains(&rust_feature) + { + Some(rust_feature) + } else { + None + } + }); + let unknown_feature = if let Some(rust_feature) = rust_feature { + errors::UnknownCTargetFeature { + feature: base_feature, + rust_feature: errors::PossibleFeature::Some { rust_feature }, + } + } else { + errors::UnknownCTargetFeature { + feature: base_feature, + rust_feature: errors::PossibleFeature::None, + } + }; + sess.dcx().emit_warn(unknown_feature); + } + Some((_, stability, _)) => { + if let Err(reason) = stability.toggle_allowed() { + sess.dcx().emit_warn(errors::ForbiddenCTargetFeature { + feature: base_feature, + enabled: if enable { "enabled" } else { "disabled" }, + reason, + }); + } else if stability.requires_nightly().is_some() { + // An unstable feature. Warn about using it. It makes little sense + // to hard-error here since we just warn about fully unknown + // features above. + sess.dcx().emit_warn(errors::UnstableCTargetFeature { + feature: base_feature, + }); + } + } + } + } + }, + ); + + if diagnostics { + // FIXME(nagisa): figure out how to not allocate a full hashmap here. + if let Some(f) = check_tied_features( + sess, + &FxHashMap::from_iter(rust_features.iter().map(|&(enable, feature)| (feature, enable))), + ) { + sess.dcx().emit_err(errors::TargetFeatureDisableOrEnable { + features: f, + span: None, + missing_features: None, + }); + } + } + + // Add this to the backend features. + for (enable, feature) in rust_features { + extend_backend_features(feature, enable); + } +} + +/// Computes the backend target features to be added to account for retpoline flags. +/// Used by both LLVM and GCC since their target features are, conveniently, the same. +pub fn retpoline_features_by_flags(sess: &Session, features: &mut Vec) { + // -Zretpoline without -Zretpoline-external-thunk enables + // retpoline-indirect-branches and retpoline-indirect-calls target features + let unstable_opts = &sess.opts.unstable_opts; + if unstable_opts.retpoline && !unstable_opts.retpoline_external_thunk { + features.push("+retpoline-indirect-branches".into()); + features.push("+retpoline-indirect-calls".into()); + } + // -Zretpoline-external-thunk (maybe, with -Zretpoline too) enables + // retpoline-external-thunk, retpoline-indirect-branches and + // retpoline-indirect-calls target features + if unstable_opts.retpoline_external_thunk { + features.push("+retpoline-external-thunk".into()); + features.push("+retpoline-indirect-branches".into()); + features.push("+retpoline-indirect-calls".into()); + } +} + pub(crate) fn provide(providers: &mut Providers) { *providers = Providers { rust_target_features: |tcx, cnum| { @@ -182,7 +453,8 @@ pub(crate) fn provide(providers: &mut Providers) { Stability::Unstable { .. } | Stability::Forbidden { .. }, ) | (Stability::Forbidden { .. }, Stability::Forbidden { .. }) => { - // The stability in the entry is at least as good as the new one, just keep it. + // The stability in the entry is at least as good as the new + // one, just keep it. } _ => { // Overwrite stabilite. diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs index f35f551d5906..d19de6f5d267 100644 --- a/compiler/rustc_codegen_ssa/src/traits/builder.rs +++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs @@ -516,7 +516,7 @@ pub trait BuilderMethods<'a, 'tcx>: // These are used by everyone except msvc fn cleanup_landing_pad(&mut self, pers_fn: Self::Function) -> (Self::Value, Self::Value); - fn filter_landing_pad(&mut self, pers_fn: Self::Function) -> (Self::Value, Self::Value); + fn filter_landing_pad(&mut self, pers_fn: Self::Function); fn resume(&mut self, exn0: Self::Value, exn1: Self::Value); // These are used only by msvc diff --git a/compiler/rustc_const_eval/messages.ftl b/compiler/rustc_const_eval/messages.ftl index 7f9abe8aa8e7..2a2c3e6aee2a 100644 --- a/compiler/rustc_const_eval/messages.ftl +++ b/compiler/rustc_const_eval/messages.ftl @@ -124,16 +124,12 @@ const_eval_incompatible_return_types = const_eval_incompatible_types = calling a function with argument of type {$callee_ty} passing data of type {$caller_ty} -const_eval_interior_mutable_ref_escaping = - {const_eval_const_context}s cannot refer to interior mutable data - .label = this borrow of an interior mutable value may end up in the final value - .help = to fix this, the value can be extracted to a separate `static` item and then referenced - .teach_note = - References that escape into the final value of a constant or static must be immutable. - This is to avoid accidentally creating shared mutable state. - - - If you really want global mutable state, try using an interior mutable `static` or a `static mut`. +const_eval_interior_mutable_borrow_escaping = + interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed + .label = this borrow of an interior mutable value refers to such a temporary + .note = Temporaries in constants and statics can have their lifetime extended until the end of the program + .note2 = To avoid accidentally creating global mutable state, such temporaries must be immutable + .help = If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` const_eval_intern_kind = {$kind -> [static] static @@ -207,34 +203,21 @@ const_eval_long_running = .label = the const evaluator is currently interpreting this expression .help = the constant being evaluated -const_eval_max_num_nodes_in_const = maximum number of nodes exceeded in constant {$global_const_id} - const_eval_memory_exhausted = tried to allocate more memory than available to compiler const_eval_modified_global = modifying a static's initial value from another static's initializer +const_eval_mutable_borrow_escaping = + mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed + .label = this mutable borrow refers to such a temporary + .note = Temporaries in constants and statics can have their lifetime extended until the end of the program + .note2 = To avoid accidentally creating global mutable state, such temporaries must be immutable + .help = If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` + const_eval_mutable_ptr_in_final = encountered mutable pointer in final value of {const_eval_intern_kind} -const_eval_mutable_raw_escaping = - raw mutable pointers are not allowed in the final value of {const_eval_const_context}s - .teach_note = - Pointers that escape into the final value of a constant or static must be immutable. - This is to avoid accidentally creating shared mutable state. - - - If you really want global mutable state, try using an interior mutable `static` or a `static mut`. - -const_eval_mutable_ref_escaping = - mutable references are not allowed in the final value of {const_eval_const_context}s - .teach_note = - References that escape into the final value of a constant or static must be immutable. - This is to avoid accidentally creating shared mutable state. - - - If you really want global mutable state, try using an interior mutable `static` or a `static mut`. - const_eval_nested_static_in_thread_local = #[thread_local] does not support implicit nested statics, please create explicit static items and refer to them instead const_eval_non_const_await = @@ -361,7 +344,7 @@ const_eval_realloc_or_alloc_with_offset = *[other] {""} } {$ptr} which does not point to the beginning of an object -const_eval_recursive_static = encountered static that tried to initialize itself with itself +const_eval_recursive_static = encountered static that tried to access itself during initialization const_eval_remainder_by_zero = calculating the remainder with a divisor of zero @@ -437,9 +420,6 @@ const_eval_unwind_past_top = ## (We'd love to sort this differently to make that more clear but tidy won't let us...) const_eval_validation_box_to_uninhabited = {$front_matter}: encountered a box pointing to uninhabited type {$ty} -const_eval_validation_const_ref_to_extern = {$front_matter}: encountered reference to `extern` static in `const` -const_eval_validation_const_ref_to_mutable = {$front_matter}: encountered reference to mutable memory in `const` - const_eval_validation_dangling_box_no_provenance = {$front_matter}: encountered a dangling box ({$pointer} has no provenance) const_eval_validation_dangling_box_out_of_bounds = {$front_matter}: encountered a dangling box (going beyond the bounds of its allocation) const_eval_validation_dangling_box_use_after_free = {$front_matter}: encountered a dangling box (use-after-free) @@ -479,6 +459,7 @@ const_eval_validation_invalid_ref_meta = {$front_matter}: encountered invalid re const_eval_validation_invalid_ref_slice_meta = {$front_matter}: encountered invalid reference metadata: slice is bigger than largest supported object const_eval_validation_invalid_vtable_ptr = {$front_matter}: encountered {$value}, but expected a vtable pointer const_eval_validation_invalid_vtable_trait = {$front_matter}: wrong trait in wide pointer vtable: expected `{$expected_dyn_type}`, but encountered `{$vtable_dyn_type}` +const_eval_validation_mutable_ref_in_const = {$front_matter}: encountered mutable reference in `const` value const_eval_validation_mutable_ref_to_immutable = {$front_matter}: encountered mutable reference or box pointing to read-only memory const_eval_validation_never_val = {$front_matter}: encountered a value of the never type `!` const_eval_validation_null_box = {$front_matter}: encountered a null box diff --git a/compiler/rustc_const_eval/src/check_consts/check.rs b/compiler/rustc_const_eval/src/check_consts/check.rs index 4f252f3ccd48..c1d91f98957e 100644 --- a/compiler/rustc_const_eval/src/check_consts/check.rs +++ b/compiler/rustc_const_eval/src/check_consts/check.rs @@ -421,7 +421,7 @@ impl<'mir, 'tcx> Checker<'mir, 'tcx> { Some(ConstConditionsHold::Yes) } else { tcx.dcx() - .span_delayed_bug(call_span, "this should have reported a ~const error in HIR"); + .span_delayed_bug(call_span, "this should have reported a [const] error in HIR"); Some(ConstConditionsHold::No) } } @@ -463,12 +463,6 @@ impl<'mir, 'tcx> Checker<'mir, 'tcx> { ); } - fn crate_inject_span(&self) -> Option { - self.tcx.hir_crate_items(()).definitions().next().and_then(|id| { - self.tcx.crate_level_attribute_injection_span(self.tcx.local_def_id_to_hir_id(id)) - }) - } - /// Check the const stability of the given item (fn or trait). fn check_callee_stability(&mut self, def_id: DefId) { match self.tcx.lookup_const_stability(def_id) { @@ -543,7 +537,6 @@ impl<'mir, 'tcx> Checker<'mir, 'tcx> { feature, feature_enabled, safe_to_expose_on_stable: callee_safe_to_expose_on_stable, - suggestion_span: self.crate_inject_span(), is_function_call: self.tcx.def_kind(def_id) != DefKind::Trait, }); } @@ -602,11 +595,7 @@ impl<'tcx> Visitor<'tcx> for Checker<'_, 'tcx> { self.const_kind() == hir::ConstContext::Static(hir::Mutability::Mut); if !is_allowed && self.place_may_escape(place) { - self.check_op(ops::EscapingMutBorrow(if matches!(rvalue, Rvalue::Ref(..)) { - hir::BorrowKind::Ref - } else { - hir::BorrowKind::Raw - })); + self.check_op(ops::EscapingMutBorrow); } } @@ -919,7 +908,6 @@ impl<'tcx> Visitor<'tcx> for Checker<'_, 'tcx> { name: intrinsic.name, feature, const_stable_indirect: is_const_stable, - suggestion: self.crate_inject_span(), }); } Some(attrs::ConstStability { diff --git a/compiler/rustc_const_eval/src/check_consts/ops.rs b/compiler/rustc_const_eval/src/check_consts/ops.rs index 9c30dbff99eb..b2e0577cc824 100644 --- a/compiler/rustc_const_eval/src/check_consts/ops.rs +++ b/compiler/rustc_const_eval/src/check_consts/ops.rs @@ -1,8 +1,8 @@ //! Concrete error types for all operations which may be invalid in a certain const context. use hir::{ConstContext, LangItem}; +use rustc_errors::Diag; use rustc_errors::codes::*; -use rustc_errors::{Applicability, Diag}; use rustc_hir as hir; use rustc_hir::def_id::DefId; use rustc_infer::infer::TyCtxtInferExt; @@ -149,7 +149,7 @@ impl<'tcx> NonConstOp<'tcx> for FnCallNonConst<'tcx> { debug!(?param_ty); if let Some(generics) = tcx.hir_node_by_def_id(caller).generics() { let constraint = with_no_trimmed_paths!(format!( - "~const {}", + "[const] {}", trait_ref.print_trait_sugared(), )); suggest_constraining_type_param( @@ -384,7 +384,6 @@ pub(crate) struct CallUnstable { /// expose on stable. pub feature_enabled: bool, pub safe_to_expose_on_stable: bool, - pub suggestion_span: Option, /// true if `def_id` is the function we are calling, false if `def_id` is an unstable trait. pub is_function_call: bool, } @@ -412,20 +411,7 @@ impl<'tcx> NonConstOp<'tcx> for CallUnstable { def_path: ccx.tcx.def_path_str(self.def_id), }) }; - // FIXME: make this translatable - let msg = format!("add `#![feature({})]` to the crate attributes to enable", self.feature); - #[allow(rustc::untranslatable_diagnostic)] - if let Some(span) = self.suggestion_span { - err.span_suggestion_verbose( - span, - msg, - format!("#![feature({})]\n", self.feature), - Applicability::MachineApplicable, - ); - } else { - err.help(msg); - } - + ccx.tcx.disabled_nightly_features(&mut err, [(String::new(), self.feature)]); err } } @@ -452,7 +438,6 @@ pub(crate) struct IntrinsicUnstable { pub name: Symbol, pub feature: Symbol, pub const_stable_indirect: bool, - pub suggestion: Option, } impl<'tcx> NonConstOp<'tcx> for IntrinsicUnstable { @@ -472,8 +457,7 @@ impl<'tcx> NonConstOp<'tcx> for IntrinsicUnstable { span, name: self.name, feature: self.feature, - suggestion: self.suggestion, - help: self.suggestion.is_none(), + suggestion: ccx.tcx.crate_level_attribute_injection_span(), }) } } @@ -583,12 +567,7 @@ impl<'tcx> NonConstOp<'tcx> for EscapingCellBorrow { DiagImportance::Secondary } fn build_error(&self, ccx: &ConstCx<'_, 'tcx>, span: Span) -> Diag<'tcx> { - ccx.dcx().create_err(errors::InteriorMutableRefEscaping { - span, - opt_help: matches!(ccx.const_kind(), hir::ConstContext::Static(_)), - kind: ccx.const_kind(), - teach: ccx.tcx.sess.teach(E0492), - }) + ccx.dcx().create_err(errors::InteriorMutableBorrowEscaping { span, kind: ccx.const_kind() }) } } @@ -596,7 +575,7 @@ impl<'tcx> NonConstOp<'tcx> for EscapingCellBorrow { /// This op is for `&mut` borrows in the trailing expression of a constant /// which uses the "enclosing scopes rule" to leak its locals into anonymous /// static or const items. -pub(crate) struct EscapingMutBorrow(pub hir::BorrowKind); +pub(crate) struct EscapingMutBorrow; impl<'tcx> NonConstOp<'tcx> for EscapingMutBorrow { fn status_in_item(&self, _ccx: &ConstCx<'_, 'tcx>) -> Status { @@ -610,18 +589,7 @@ impl<'tcx> NonConstOp<'tcx> for EscapingMutBorrow { } fn build_error(&self, ccx: &ConstCx<'_, 'tcx>, span: Span) -> Diag<'tcx> { - match self.0 { - hir::BorrowKind::Raw => ccx.tcx.dcx().create_err(errors::MutableRawEscaping { - span, - kind: ccx.const_kind(), - teach: ccx.tcx.sess.teach(E0764), - }), - hir::BorrowKind::Ref => ccx.dcx().create_err(errors::MutableRefEscaping { - span, - kind: ccx.const_kind(), - teach: ccx.tcx.sess.teach(E0764), - }), - } + ccx.dcx().create_err(errors::MutableBorrowEscaping { span, kind: ccx.const_kind() }) } } diff --git a/compiler/rustc_const_eval/src/check_consts/qualifs.rs b/compiler/rustc_const_eval/src/check_consts/qualifs.rs index c1a37ab6a83f..166491b47a1f 100644 --- a/compiler/rustc_const_eval/src/check_consts/qualifs.rs +++ b/compiler/rustc_const_eval/src/check_consts/qualifs.rs @@ -170,14 +170,14 @@ impl Qualif for NeedsNonConstDrop { #[instrument(level = "trace", skip(cx), ret)] fn in_any_value_of_ty<'tcx>(cx: &ConstCx<'_, 'tcx>, ty: Ty<'tcx>) -> bool { - // If this doesn't need drop at all, then don't select `~const Destruct`. + // If this doesn't need drop at all, then don't select `[const] Destruct`. if !ty.needs_drop(cx.tcx, cx.typing_env) { return false; } - // We check that the type is `~const Destruct` since that will verify that - // the type is both `~const Drop` (if a drop impl exists for the adt), *and* - // that the components of this type are also `~const Destruct`. This + // We check that the type is `[const] Destruct` since that will verify that + // the type is both `[const] Drop` (if a drop impl exists for the adt), *and* + // that the components of this type are also `[const] Destruct`. This // amounts to verifying that there are no values in this ADT that may have // a non-const drop. let destruct_def_id = cx.tcx.require_lang_item(LangItem::Destruct, cx.body.span); @@ -203,9 +203,9 @@ impl Qualif for NeedsNonConstDrop { fn is_structural_in_adt_value<'tcx>(cx: &ConstCx<'_, 'tcx>, adt: AdtDef<'tcx>) -> bool { // As soon as an ADT has a destructor, then the drop becomes non-structural // in its value since: - // 1. The destructor may have `~const` bounds which are not present on the type. + // 1. The destructor may have `[const]` bounds which are not present on the type. // Someone needs to check that those are satisfied. - // While this could be instead satisfied by checking that the `~const Drop` + // While this could be instead satisfied by checking that the `[const] Drop` // impl holds (i.e. replicating part of the `in_any_value_of_ty` logic above), // even in this case, we have another problem, which is, // 2. The destructor may *modify* the operand being dropped, so even if we diff --git a/compiler/rustc_const_eval/src/const_eval/eval_queries.rs b/compiler/rustc_const_eval/src/const_eval/eval_queries.rs index be8401915471..569a07c3a011 100644 --- a/compiler/rustc_const_eval/src/const_eval/eval_queries.rs +++ b/compiler/rustc_const_eval/src/const_eval/eval_queries.rs @@ -7,7 +7,7 @@ use rustc_hir::def::DefKind; use rustc_middle::mir::interpret::{AllocId, ErrorHandled, InterpErrorInfo, ReportedErrorInfo}; use rustc_middle::mir::{self, ConstAlloc, ConstValue}; use rustc_middle::query::TyCtxtAt; -use rustc_middle::ty::layout::{HasTypingEnv, LayoutOf}; +use rustc_middle::ty::layout::HasTypingEnv; use rustc_middle::ty::print::with_no_trimmed_paths; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::{bug, throw_inval}; diff --git a/compiler/rustc_const_eval/src/const_eval/machine.rs b/compiler/rustc_const_eval/src/const_eval/machine.rs index a68dcf299886..2ec3f8432c77 100644 --- a/compiler/rustc_const_eval/src/const_eval/machine.rs +++ b/compiler/rustc_const_eval/src/const_eval/machine.rs @@ -62,7 +62,7 @@ pub struct CompileTimeMachine<'tcx> { /// If `Some`, we are evaluating the initializer of the static with the given `LocalDefId`, /// storing the result in the given `AllocId`. - /// Used to prevent reads from a static's base allocation, as that may allow for self-initialization loops. + /// Used to prevent accesses to a static's base allocation, as that may allow for self-initialization loops. pub(crate) static_root_ids: Option<(AllocId, LocalDefId)>, /// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes). @@ -705,19 +705,27 @@ impl<'tcx> interpret::Machine<'tcx> for CompileTimeMachine<'tcx> { interp_ok(()) } - fn before_alloc_read(ecx: &InterpCx<'tcx, Self>, alloc_id: AllocId) -> InterpResult<'tcx> { + fn before_alloc_access( + tcx: TyCtxtAt<'tcx>, + machine: &Self, + alloc_id: AllocId, + ) -> InterpResult<'tcx> { + if machine.stack.is_empty() { + // Get out of the way for the final copy. + return interp_ok(()); + } // Check if this is the currently evaluated static. - if Some(alloc_id) == ecx.machine.static_root_ids.map(|(id, _)| id) { + if Some(alloc_id) == machine.static_root_ids.map(|(id, _)| id) { return Err(ConstEvalErrKind::RecursiveStatic).into(); } // If this is another static, make sure we fire off the query to detect cycles. // But only do that when checks for static recursion are enabled. - if ecx.machine.static_root_ids.is_some() { - if let Some(GlobalAlloc::Static(def_id)) = ecx.tcx.try_get_global_alloc(alloc_id) { - if ecx.tcx.is_foreign_item(def_id) { + if machine.static_root_ids.is_some() { + if let Some(GlobalAlloc::Static(def_id)) = tcx.try_get_global_alloc(alloc_id) { + if tcx.is_foreign_item(def_id) { throw_unsup!(ExternStatic(def_id)); } - ecx.ctfe_query(|tcx| tcx.eval_static_initializer(def_id))?; + tcx.eval_static_initializer(def_id)?; } } interp_ok(()) diff --git a/compiler/rustc_const_eval/src/const_eval/mod.rs b/compiler/rustc_const_eval/src/const_eval/mod.rs index 6fd0b9d26e39..d95d552d7d54 100644 --- a/compiler/rustc_const_eval/src/const_eval/mod.rs +++ b/compiler/rustc_const_eval/src/const_eval/mod.rs @@ -2,7 +2,6 @@ use rustc_abi::{FieldIdx, VariantIdx}; use rustc_middle::query::Key; -use rustc_middle::ty::layout::LayoutOf; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::{bug, mir}; use tracing::instrument; @@ -26,13 +25,6 @@ pub(crate) use self::valtrees::{eval_to_valtree, valtree_to_const_value}; // We forbid type-level constants that contain more than `VALTREE_MAX_NODES` nodes. const VALTREE_MAX_NODES: usize = 100000; -pub(crate) enum ValTreeCreationError<'tcx> { - NodesOverflow, - /// Values of this type, or this particular value, are not supported as valtrees. - NonSupportedType(Ty<'tcx>), -} -pub(crate) type ValTreeCreationResult<'tcx> = Result, ValTreeCreationError<'tcx>>; - #[instrument(skip(tcx), level = "debug")] pub(crate) fn try_destructure_mir_constant_for_user_output<'tcx>( tcx: TyCtxt<'tcx>, @@ -74,18 +66,13 @@ pub(crate) fn try_destructure_mir_constant_for_user_output<'tcx>( #[instrument(skip(tcx), level = "debug")] pub fn tag_for_variant_provider<'tcx>( tcx: TyCtxt<'tcx>, - (ty, variant_index): (Ty<'tcx>, VariantIdx), + key: ty::PseudoCanonicalInput<'tcx, (Ty<'tcx>, VariantIdx)>, ) -> Option { + let (ty, variant_index) = key.value; assert!(ty.is_enum()); - // FIXME: This uses an empty `TypingEnv` even though - // it may be used by a generic CTFE. - let ecx = InterpCx::new( - tcx, - ty.default_span(tcx), - ty::TypingEnv::fully_monomorphized(), - crate::const_eval::DummyMachine, - ); + let ecx = + InterpCx::new(tcx, ty.default_span(tcx), key.typing_env, crate::const_eval::DummyMachine); let layout = ecx.layout_of(ty).unwrap(); ecx.tag_for_variant(layout, variant_index).unwrap().map(|(tag, _tag_field)| tag) diff --git a/compiler/rustc_const_eval/src/const_eval/valtrees.rs b/compiler/rustc_const_eval/src/const_eval/valtrees.rs index 58d230af683e..5ab72c853c4f 100644 --- a/compiler/rustc_const_eval/src/const_eval/valtrees.rs +++ b/compiler/rustc_const_eval/src/const_eval/valtrees.rs @@ -1,17 +1,16 @@ use rustc_abi::{BackendRepr, FieldIdx, VariantIdx}; use rustc_data_structures::stack::ensure_sufficient_stack; -use rustc_middle::mir::interpret::{EvalToValTreeResult, GlobalId, ReportedErrorInfo}; -use rustc_middle::ty::layout::{LayoutCx, LayoutOf, TyAndLayout}; +use rustc_middle::mir::interpret::{EvalToValTreeResult, GlobalId, ValTreeCreationError}; +use rustc_middle::ty::layout::{LayoutCx, TyAndLayout}; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::{bug, mir}; use rustc_span::DUMMY_SP; use tracing::{debug, instrument, trace}; +use super::VALTREE_MAX_NODES; use super::eval_queries::{mk_eval_cx_to_read_const_val, op_to_const}; use super::machine::CompileTimeInterpCx; -use super::{VALTREE_MAX_NODES, ValTreeCreationError, ValTreeCreationResult}; use crate::const_eval::CanAccessMutGlobal; -use crate::errors::MaxNumNodesInConstErr; use crate::interpret::{ ImmTy, Immediate, InternKind, MPlaceTy, MemPlaceMeta, MemoryKind, PlaceTy, Projectable, Scalar, intern_const_alloc_recursive, @@ -24,7 +23,7 @@ fn branches<'tcx>( field_count: usize, variant: Option, num_nodes: &mut usize, -) -> ValTreeCreationResult<'tcx> { +) -> EvalToValTreeResult<'tcx> { let place = match variant { Some(variant) => ecx.project_downcast(place, variant).unwrap(), None => place.clone(), @@ -58,7 +57,7 @@ fn slice_branches<'tcx>( ecx: &CompileTimeInterpCx<'tcx>, place: &MPlaceTy<'tcx>, num_nodes: &mut usize, -) -> ValTreeCreationResult<'tcx> { +) -> EvalToValTreeResult<'tcx> { let n = place.len(ecx).unwrap_or_else(|_| panic!("expected to use len of place {place:?}")); let mut elems = Vec::with_capacity(n as usize); @@ -76,7 +75,7 @@ fn const_to_valtree_inner<'tcx>( ecx: &CompileTimeInterpCx<'tcx>, place: &MPlaceTy<'tcx>, num_nodes: &mut usize, -) -> ValTreeCreationResult<'tcx> { +) -> EvalToValTreeResult<'tcx> { let tcx = *ecx.tcx; let ty = place.layout.ty; debug!("ty kind: {:?}", ty.kind()); @@ -91,7 +90,7 @@ fn const_to_valtree_inner<'tcx>( Ok(ty::ValTree::zst(tcx)) } ty::Bool | ty::Int(_) | ty::Uint(_) | ty::Float(_) | ty::Char => { - let val = ecx.read_immediate(place).unwrap(); + let val = ecx.read_immediate(place).report_err()?; let val = val.to_scalar_int().unwrap(); *num_nodes += 1; @@ -113,7 +112,7 @@ fn const_to_valtree_inner<'tcx>( // equality at compile-time (see `ptr_guaranteed_cmp`). // However we allow those that are just integers in disguise. // First, get the pointer. Remember it might be wide! - let val = ecx.read_immediate(place).unwrap(); + let val = ecx.read_immediate(place).report_err()?; // We could allow wide raw pointers where both sides are integers in the future, // but for now we reject them. if matches!(val.layout.backend_repr, BackendRepr::ScalarPair(..)) { @@ -134,7 +133,7 @@ fn const_to_valtree_inner<'tcx>( ty::FnPtr(..) => Err(ValTreeCreationError::NonSupportedType(ty)), ty::Ref(_, _, _) => { - let derefd_place = ecx.deref_pointer(place).unwrap(); + let derefd_place = ecx.deref_pointer(place).report_err()?; const_to_valtree_inner(ecx, &derefd_place, num_nodes) } @@ -158,7 +157,7 @@ fn const_to_valtree_inner<'tcx>( bug!("uninhabited types should have errored and never gotten converted to valtree") } - let variant = ecx.read_discriminant(place).unwrap(); + let variant = ecx.read_discriminant(place).report_err()?; branches(ecx, place, def.variant(variant).fields.len(), def.is_enum().then_some(variant), num_nodes) } @@ -249,24 +248,7 @@ pub(crate) fn eval_to_valtree<'tcx>( debug!(?place); let mut num_nodes = 0; - let valtree_result = const_to_valtree_inner(&ecx, &place, &mut num_nodes); - - match valtree_result { - Ok(valtree) => Ok(Ok(valtree)), - Err(err) => { - let did = cid.instance.def_id(); - let global_const_id = cid.display(tcx); - let span = tcx.hir_span_if_local(did); - match err { - ValTreeCreationError::NodesOverflow => { - let handled = - tcx.dcx().emit_err(MaxNumNodesInConstErr { span, global_const_id }); - Err(ReportedErrorInfo::allowed_in_infallible(handled).into()) - } - ValTreeCreationError::NonSupportedType(ty) => Ok(Err(ty)), - } - } - } + const_to_valtree_inner(&ecx, &place, &mut num_nodes) } /// Converts a `ValTree` to a `ConstValue`, which is needed after mir diff --git a/compiler/rustc_const_eval/src/errors.rs b/compiler/rustc_const_eval/src/errors.rs index 037cbf777e70..14abdd8c98c1 100644 --- a/compiler/rustc_const_eval/src/errors.rs +++ b/compiler/rustc_const_eval/src/errors.rs @@ -92,14 +92,6 @@ pub(crate) struct PanicNonStrErr { pub span: Span, } -#[derive(Diagnostic)] -#[diag(const_eval_max_num_nodes_in_const)] -pub(crate) struct MaxNumNodesInConstErr { - #[primary_span] - pub span: Option, - pub global_const_id: String, -} - #[derive(Diagnostic)] #[diag(const_eval_unallowed_fn_pointer_call)] pub(crate) struct UnallowedFnPointerCall { @@ -136,9 +128,7 @@ pub(crate) struct UnstableIntrinsic { code = "#![feature({feature})]\n", applicability = "machine-applicable" )] - pub suggestion: Option, - #[help(const_eval_unstable_intrinsic_suggestion)] - pub help: bool, + pub suggestion: Span, } #[derive(Diagnostic)] @@ -160,24 +150,17 @@ pub(crate) struct UnmarkedIntrinsicExposed { } #[derive(Diagnostic)] -#[diag(const_eval_mutable_ref_escaping, code = E0764)] -pub(crate) struct MutableRefEscaping { +#[diag(const_eval_mutable_borrow_escaping, code = E0764)] +#[note] +#[note(const_eval_note2)] +#[help] +pub(crate) struct MutableBorrowEscaping { #[primary_span] + #[label] pub span: Span, pub kind: ConstContext, - #[note(const_eval_teach_note)] - pub teach: bool, } -#[derive(Diagnostic)] -#[diag(const_eval_mutable_raw_escaping, code = E0764)] -pub(crate) struct MutableRawEscaping { - #[primary_span] - pub span: Span, - pub kind: ConstContext, - #[note(const_eval_teach_note)] - pub teach: bool, -} #[derive(Diagnostic)] #[diag(const_eval_non_const_fmt_macro_call, code = E0015)] pub(crate) struct NonConstFmtMacroCall { @@ -235,16 +218,15 @@ pub(crate) struct UnallowedInlineAsm { } #[derive(Diagnostic)] -#[diag(const_eval_interior_mutable_ref_escaping, code = E0492)] -pub(crate) struct InteriorMutableRefEscaping { +#[diag(const_eval_interior_mutable_borrow_escaping, code = E0492)] +#[note] +#[note(const_eval_note2)] +#[help] +pub(crate) struct InteriorMutableBorrowEscaping { #[primary_span] #[label] pub span: Span, - #[help] - pub opt_help: bool, pub kind: ConstContext, - #[note(const_eval_teach_note)] - pub teach: bool, } #[derive(LintDiagnostic)] @@ -293,6 +275,9 @@ impl Subdiagnostic for FrameNote { span.push_span_label(self.span, fluent::const_eval_frame_note_last); } let msg = diag.eagerly_translate(fluent::const_eval_frame_note); + diag.remove_arg("times"); + diag.remove_arg("where_"); + diag.remove_arg("instance"); diag.span_note(span, msg); } } @@ -654,9 +639,8 @@ impl<'tcx> ReportErrorExt for ValidationErrorInfo<'tcx> { PointerAsInt { .. } => const_eval_validation_pointer_as_int, PartialPointer => const_eval_validation_partial_pointer, - ConstRefToMutable => const_eval_validation_const_ref_to_mutable, - ConstRefToExtern => const_eval_validation_const_ref_to_extern, MutableRefToImmutable => const_eval_validation_mutable_ref_to_immutable, + MutableRefInConst => const_eval_validation_mutable_ref_in_const, NullFnPtr => const_eval_validation_null_fn_ptr, NeverVal => const_eval_validation_never_val, NullablePtrOutOfRange { .. } => const_eval_validation_nullable_ptr_out_of_range, @@ -814,9 +798,8 @@ impl<'tcx> ReportErrorExt for ValidationErrorInfo<'tcx> { err.arg("expected_dyn_type", expected_dyn_type.to_string()); } NullPtr { .. } - | ConstRefToMutable - | ConstRefToExtern | MutableRefToImmutable + | MutableRefInConst | NullFnPtr | NeverVal | UnsafeCellInImmutable diff --git a/compiler/rustc_const_eval/src/interpret/call.rs b/compiler/rustc_const_eval/src/interpret/call.rs index 37677f9e0483..79c14b204e36 100644 --- a/compiler/rustc_const_eval/src/interpret/call.rs +++ b/compiler/rustc_const_eval/src/interpret/call.rs @@ -6,7 +6,7 @@ use std::borrow::Cow; use either::{Left, Right}; use rustc_abi::{self as abi, ExternAbi, FieldIdx, Integer, VariantIdx}; use rustc_hir::def_id::DefId; -use rustc_middle::ty::layout::{FnAbiOf, IntegerExt, LayoutOf, TyAndLayout}; +use rustc_middle::ty::layout::{FnAbiOf, IntegerExt, TyAndLayout}; use rustc_middle::ty::{self, AdtDef, Instance, Ty, VariantDef}; use rustc_middle::{bug, mir, span_bug}; use rustc_span::sym; diff --git a/compiler/rustc_const_eval/src/interpret/cast.rs b/compiler/rustc_const_eval/src/interpret/cast.rs index 9e15f4572d7b..1036935bb106 100644 --- a/compiler/rustc_const_eval/src/interpret/cast.rs +++ b/compiler/rustc_const_eval/src/interpret/cast.rs @@ -6,7 +6,7 @@ use rustc_apfloat::{Float, FloatConvert}; use rustc_middle::mir::CastKind; use rustc_middle::mir::interpret::{InterpResult, PointerArithmetic, Scalar}; use rustc_middle::ty::adjustment::PointerCoercion; -use rustc_middle::ty::layout::{IntegerExt, LayoutOf, TyAndLayout}; +use rustc_middle::ty::layout::{IntegerExt, TyAndLayout}; use rustc_middle::ty::{self, FloatTy, Ty}; use rustc_middle::{bug, span_bug}; use tracing::trace; diff --git a/compiler/rustc_const_eval/src/interpret/discriminant.rs b/compiler/rustc_const_eval/src/interpret/discriminant.rs index 6c4b000e16b5..b7e7f65c95c7 100644 --- a/compiler/rustc_const_eval/src/interpret/discriminant.rs +++ b/compiler/rustc_const_eval/src/interpret/discriminant.rs @@ -1,7 +1,7 @@ //! Functions for reading and writing discriminants of multi-variant layouts (enums and coroutines). use rustc_abi::{self as abi, FieldIdx, TagEncoding, VariantIdx, Variants}; -use rustc_middle::ty::layout::{LayoutOf, PrimitiveExt, TyAndLayout}; +use rustc_middle::ty::layout::{PrimitiveExt, TyAndLayout}; use rustc_middle::ty::{self, CoroutineArgsExt, ScalarInt, Ty}; use rustc_middle::{mir, span_bug}; use tracing::{instrument, trace}; diff --git a/compiler/rustc_const_eval/src/interpret/eval_context.rs b/compiler/rustc_const_eval/src/interpret/eval_context.rs index b69bc0918be8..46c784b41c66 100644 --- a/compiler/rustc_const_eval/src/interpret/eval_context.rs +++ b/compiler/rustc_const_eval/src/interpret/eval_context.rs @@ -7,7 +7,8 @@ use rustc_hir::def_id::DefId; use rustc_middle::mir::interpret::{ErrorHandled, InvalidMetaKind, ReportedErrorInfo}; use rustc_middle::query::TyCtxtAt; use rustc_middle::ty::layout::{ - self, FnAbiError, FnAbiOfHelpers, FnAbiRequest, LayoutError, LayoutOfHelpers, TyAndLayout, + self, FnAbiError, FnAbiOfHelpers, FnAbiRequest, LayoutError, LayoutOf, LayoutOfHelpers, + TyAndLayout, }; use rustc_middle::ty::{self, GenericArgsRef, Ty, TyCtxt, TypeFoldable, TypingEnv, Variance}; use rustc_middle::{mir, span_bug}; @@ -21,7 +22,7 @@ use super::{ MemPlaceMeta, Memory, OpTy, Place, PlaceTy, PointerArithmetic, Projectable, Provenance, err_inval, interp_ok, throw_inval, throw_ub, throw_ub_custom, }; -use crate::{ReportErrorExt, fluent_generated as fluent, util}; +use crate::{ReportErrorExt, enter_trace_span, fluent_generated as fluent, util}; pub struct InterpCx<'tcx, M: Machine<'tcx>> { /// Stores the `Machine` instance. @@ -91,6 +92,20 @@ impl<'tcx, M: Machine<'tcx>> LayoutOfHelpers<'tcx> for InterpCx<'tcx, M> { } } +impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { + /// This inherent method takes priority over the trait method with the same name in LayoutOf, + /// and allows wrapping the actual [LayoutOf::layout_of] with a tracing span. + /// See [LayoutOf::layout_of] for the original documentation. + #[inline] + pub fn layout_of( + &self, + ty: Ty<'tcx>, + ) -> as LayoutOfHelpers<'tcx>>::LayoutOfResult { + let _span = enter_trace_span!(M, "InterpCx::layout_of", "ty = {:?}", ty.kind()); + LayoutOf::layout_of(self, ty) + } +} + impl<'tcx, M: Machine<'tcx>> FnAbiOfHelpers<'tcx> for InterpCx<'tcx, M> { type FnAbiOfResult = Result<&'tcx FnAbi<'tcx, Ty<'tcx>>, InterpErrorKind<'tcx>>; @@ -284,6 +299,12 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { frame: &Frame<'tcx, M::Provenance, M::FrameExtra>, value: T, ) -> Result { + let _span = enter_trace_span!( + M, + "instantiate_from_frame_and_normalize_erasing_regions", + "{}", + frame.instance + ); frame .instance .try_instantiate_mir_and_normalize_erasing_regions( @@ -362,7 +383,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// Returns the actual dynamic size and alignment of the place at the given type. /// Only the "meta" (metadata) part of the place matters. /// This can fail to provide an answer for extern types. - pub(super) fn size_and_align_of( + pub(super) fn size_and_align_from_meta( &self, metadata: &MemPlaceMeta, layout: &TyAndLayout<'tcx>, @@ -388,7 +409,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { // adjust alignment and size for them? let field = layout.field(self, layout.fields.count() - 1); let Some((unsized_size, mut unsized_align)) = - self.size_and_align_of(metadata, &field)? + self.size_and_align_from_meta(metadata, &field)? else { // A field with an extern type. We don't know the actual dynamic size // or the alignment. @@ -450,11 +471,11 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { } } #[inline] - pub fn size_and_align_of_mplace( + pub fn size_and_align_of_val( &self, - mplace: &MPlaceTy<'tcx, M::Provenance>, + val: &impl Projectable<'tcx, M::Provenance>, ) -> InterpResult<'tcx, Option<(Size, Align)>> { - self.size_and_align_of(&mplace.meta(), &mplace.layout) + self.size_and_align_from_meta(&val.meta(), &val.layout()) } /// Jump to the given block. diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs index 96c39c7bb32b..b29c5c7c7d7b 100644 --- a/compiler/rustc_const_eval/src/interpret/intrinsics.rs +++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs @@ -8,7 +8,7 @@ use rustc_abi::Size; use rustc_apfloat::ieee::{Double, Half, Quad, Single}; use rustc_hir::def_id::DefId; use rustc_middle::mir::{self, BinOp, ConstValue, NonDivergingIntrinsic}; -use rustc_middle::ty::layout::{LayoutOf as _, TyAndLayout, ValidityRequirement}; +use rustc_middle::ty::layout::{TyAndLayout, ValidityRequirement}; use rustc_middle::ty::{GenericArgsRef, Ty, TyCtxt}; use rustc_middle::{bug, ty}; use rustc_span::{Symbol, sym}; @@ -125,7 +125,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { // dereferenceable! let place = self.ref_to_mplace(&self.read_immediate(&args[0])?)?; let (size, align) = self - .size_and_align_of_mplace(&place)? + .size_and_align_of_val(&place)? .ok_or_else(|| err_unsup_format!("`extern type` does not have known layout"))?; let result = match intrinsic_name { diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs index b9e022c96043..d6d230fbd177 100644 --- a/compiler/rustc_const_eval/src/interpret/machine.rs +++ b/compiler/rustc_const_eval/src/interpret/machine.rs @@ -443,7 +443,11 @@ pub trait Machine<'tcx>: Sized { /// /// Used to prevent statics from self-initializing by reading from their own memory /// as it is being initialized. - fn before_alloc_read(_ecx: &InterpCx<'tcx, Self>, _alloc_id: AllocId) -> InterpResult<'tcx> { + fn before_alloc_access( + _tcx: TyCtxtAt<'tcx>, + _machine: &Self, + _alloc_id: AllocId, + ) -> InterpResult<'tcx> { interp_ok(()) } diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index 99a4bc1b7d6e..69fceb02ff93 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -720,7 +720,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { // do this after `check_and_deref_ptr` to ensure some basic sanity has already been checked. if !self.memory.validation_in_progress.get() { if let Ok((alloc_id, ..)) = self.ptr_try_get_alloc_id(ptr, size_i64) { - M::before_alloc_read(self, alloc_id)?; + M::before_alloc_access(self.tcx, &self.machine, alloc_id)?; } } @@ -821,6 +821,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { if let Some((alloc_id, offset, prov, alloc, machine)) = ptr_and_alloc { let range = alloc_range(offset, size); if !validation_in_progress { + // For writes, it's okay to only call those when there actually is a non-zero + // amount of bytes to be written: a zero-sized write doesn't manifest anything. + M::before_alloc_access(tcx, machine, alloc_id)?; M::before_memory_write( tcx, machine, @@ -877,12 +880,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { if let Some(fn_val) = self.get_fn_alloc(id) { let align = match fn_val { FnVal::Instance(instance) => { - // Function alignment can be set globally with the `-Zmin-function-alignment=` flag; - // the alignment from a `#[repr(align())]` is used if it specifies a higher alignment. - let fn_align = self.tcx.codegen_fn_attrs(instance.def_id()).alignment; - let global_align = self.tcx.sess.opts.unstable_opts.min_function_alignment; - - Ord::max(global_align, fn_align).unwrap_or(Align::ONE) + self.tcx.codegen_fn_attrs(instance.def_id()).alignment.unwrap_or(Align::ONE) } // Machine-specific extra functions currently do not support alignment restrictions. FnVal::Other(_) => Align::ONE, @@ -1401,6 +1399,14 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { let src_parts = self.get_ptr_access(src, size)?; let dest_parts = self.get_ptr_access(dest, size * num_copies)?; // `Size` multiplication + // Similar to `get_ptr_alloc`, we need to call `before_alloc_access` even for zero-sized + // reads. However, just like in `get_ptr_alloc_mut`, the write part is okay to skip for + // zero-sized writes. + if let Ok((alloc_id, ..)) = self.ptr_try_get_alloc_id(src, size.bytes().try_into().unwrap()) + { + M::before_alloc_access(tcx, &self.machine, alloc_id)?; + } + // FIXME: we look up both allocations twice here, once before for the `check_ptr_access` // and once below to get the underlying `&[mut] Allocation`. @@ -1412,6 +1418,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { let src_alloc = self.get_alloc_raw(src_alloc_id)?; let src_range = alloc_range(src_offset, size); assert!(!self.memory.validation_in_progress.get(), "we can't be copying during validation"); + + // Trigger read hook. // For the overlapping case, it is crucial that we trigger the read hook // before the write hook -- the aliasing model cares about the order. M::before_memory_read( @@ -1438,16 +1446,18 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { let provenance = src_alloc .provenance() .prepare_copy(src_range, dest_offset, num_copies, self) - .map_err(|e| e.to_interp_error(dest_alloc_id))?; + .map_err(|e| e.to_interp_error(src_alloc_id))?; // Prepare a copy of the initialization mask. let init = src_alloc.init_mask().prepare_copy(src_range); - // Destination alloc preparations and access hooks. - let (dest_alloc, extra) = self.get_alloc_raw_mut(dest_alloc_id)?; + // Destination alloc preparations... + let (dest_alloc, machine) = self.get_alloc_raw_mut(dest_alloc_id)?; let dest_range = alloc_range(dest_offset, size * num_copies); + // ...and access hooks. + M::before_alloc_access(tcx, machine, dest_alloc_id)?; M::before_memory_write( tcx, - extra, + machine, &mut dest_alloc.extra, dest, (dest_alloc_id, dest_prov), diff --git a/compiler/rustc_const_eval/src/interpret/operand.rs b/compiler/rustc_const_eval/src/interpret/operand.rs index 77667ba823a7..62cbbae24a8f 100644 --- a/compiler/rustc_const_eval/src/interpret/operand.rs +++ b/compiler/rustc_const_eval/src/interpret/operand.rs @@ -8,7 +8,7 @@ use rustc_abi as abi; use rustc_abi::{BackendRepr, HasDataLayout, Size}; use rustc_hir::def::Namespace; use rustc_middle::mir::interpret::ScalarSizeMismatch; -use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutOf, TyAndLayout}; +use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, TyAndLayout}; use rustc_middle::ty::print::{FmtPrinter, PrettyPrinter}; use rustc_middle::ty::{ConstInt, ScalarInt, Ty, TyCtxt}; use rustc_middle::{bug, mir, span_bug, ty}; @@ -878,9 +878,9 @@ mod size_asserts { use super::*; // tidy-alphabetical-start - static_assert_size!(Immediate, 48); static_assert_size!(ImmTy<'_>, 64); - static_assert_size!(Operand, 56); + static_assert_size!(Immediate, 48); static_assert_size!(OpTy<'_>, 72); + static_assert_size!(Operand, 56); // tidy-alphabetical-end } diff --git a/compiler/rustc_const_eval/src/interpret/operator.rs b/compiler/rustc_const_eval/src/interpret/operator.rs index 899670aeb62d..74f8a0a7b093 100644 --- a/compiler/rustc_const_eval/src/interpret/operator.rs +++ b/compiler/rustc_const_eval/src/interpret/operator.rs @@ -3,7 +3,7 @@ use rustc_abi::Size; use rustc_apfloat::{Float, FloatConvert}; use rustc_middle::mir::NullOp; use rustc_middle::mir::interpret::{InterpResult, PointerArithmetic, Scalar}; -use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; +use rustc_middle::ty::layout::TyAndLayout; use rustc_middle::ty::{self, FloatTy, ScalarInt, Ty}; use rustc_middle::{bug, mir, span_bug}; use rustc_span::sym; diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs index f5d3de7b1b27..3028568dd8f0 100644 --- a/compiler/rustc_const_eval/src/interpret/place.rs +++ b/compiler/rustc_const_eval/src/interpret/place.rs @@ -7,7 +7,7 @@ use std::assert_matches::assert_matches; use either::{Either, Left, Right}; use rustc_abi::{BackendRepr, HasDataLayout, Size}; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; +use rustc_middle::ty::layout::TyAndLayout; use rustc_middle::{bug, mir, span_bug}; use tracing::{instrument, trace}; @@ -470,7 +470,7 @@ where ) -> InterpResult<'tcx, Option>> { let (size, _align) = self - .size_and_align_of_mplace(mplace)? + .size_and_align_of_val(mplace)? .unwrap_or((mplace.layout.size, mplace.layout.align.abi)); // We check alignment separately, and *after* checking everything else. // If an access is both OOB and misaligned, we want to see the bounds error. @@ -486,7 +486,7 @@ where ) -> InterpResult<'tcx, Option>> { let (size, _align) = self - .size_and_align_of_mplace(mplace)? + .size_and_align_of_val(mplace)? .unwrap_or((mplace.layout.size, mplace.layout.align.abi)); // We check alignment separately, and raise that error *after* checking everything else. // If an access is both OOB and misaligned, we want to see the bounds error. @@ -888,11 +888,11 @@ where trace!("copy_op: {:?} <- {:?}: {}", *dest, src, dest.layout().ty); let dest = dest.force_mplace(self)?; - let Some((dest_size, _)) = self.size_and_align_of_mplace(&dest)? else { + let Some((dest_size, _)) = self.size_and_align_of_val(&dest)? else { span_bug!(self.cur_span(), "copy_op needs (dynamically) sized values") }; if cfg!(debug_assertions) { - let src_size = self.size_and_align_of_mplace(&src)?.unwrap().0; + let src_size = self.size_and_align_of_val(&src)?.unwrap().0; assert_eq!(src_size, dest_size, "Cannot copy differently-sized data"); } else { // As a cheap approximation, we compare the fixed parts of the size. @@ -980,7 +980,7 @@ where kind: MemoryKind, meta: MemPlaceMeta, ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { - let Some((size, align)) = self.size_and_align_of(&meta, &layout)? else { + let Some((size, align)) = self.size_and_align_from_meta(&meta, &layout)? else { span_bug!(self.cur_span(), "cannot allocate space for `extern` type, size is not known") }; let ptr = self.allocate_ptr(size, align, kind, AllocInit::Uninit)?; @@ -1056,9 +1056,9 @@ mod size_asserts { use super::*; // tidy-alphabetical-start + static_assert_size!(MPlaceTy<'_>, 64); static_assert_size!(MemPlace, 48); static_assert_size!(MemPlaceMeta, 24); - static_assert_size!(MPlaceTy<'_>, 64); static_assert_size!(Place, 48); static_assert_size!(PlaceTy<'_>, 64); // tidy-alphabetical-end diff --git a/compiler/rustc_const_eval/src/interpret/projection.rs b/compiler/rustc_const_eval/src/interpret/projection.rs index ad47a19a14d5..306697d4ec99 100644 --- a/compiler/rustc_const_eval/src/interpret/projection.rs +++ b/compiler/rustc_const_eval/src/interpret/projection.rs @@ -12,7 +12,7 @@ use std::ops::Range; use rustc_abi::{self as abi, FieldIdx, Size, VariantIdx}; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; +use rustc_middle::ty::layout::TyAndLayout; use rustc_middle::{bug, mir, span_bug, ty}; use tracing::{debug, instrument}; @@ -168,7 +168,7 @@ where // Re-use parent metadata to determine dynamic field layout. // With custom DSTS, this *will* execute user-defined code, but the same // happens at run-time so that's okay. - match self.size_and_align_of(&base_meta, &field_layout)? { + match self.size_and_align_from_meta(&base_meta, &field_layout)? { Some((_, align)) => { // For packed types, we need to cap alignment. let align = if let ty::Adt(def, _) = base.layout().ty.kind() diff --git a/compiler/rustc_const_eval/src/interpret/stack.rs b/compiler/rustc_const_eval/src/interpret/stack.rs index 2a2d1bb27547..3361a586b8ee 100644 --- a/compiler/rustc_const_eval/src/interpret/stack.rs +++ b/compiler/rustc_const_eval/src/interpret/stack.rs @@ -7,7 +7,7 @@ use either::{Either, Left, Right}; use rustc_hir as hir; use rustc_hir::definitions::DefPathData; use rustc_index::IndexVec; -use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; +use rustc_middle::ty::layout::TyAndLayout; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::{bug, mir}; use rustc_mir_dataflow::impls::always_storage_live_locals; diff --git a/compiler/rustc_const_eval/src/interpret/traits.rs b/compiler/rustc_const_eval/src/interpret/traits.rs index 7249ef23bf62..8b634955bb79 100644 --- a/compiler/rustc_const_eval/src/interpret/traits.rs +++ b/compiler/rustc_const_eval/src/interpret/traits.rs @@ -1,6 +1,5 @@ use rustc_abi::{Align, FieldIdx, Size}; use rustc_middle::mir::interpret::{InterpResult, Pointer}; -use rustc_middle::ty::layout::LayoutOf; use rustc_middle::ty::{self, ExistentialPredicateStableCmpExt, Ty, TyCtxt, VtblEntry}; use tracing::trace; diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs index 7d76d925ef23..998ef3729eaf 100644 --- a/compiler/rustc_const_eval/src/interpret/validity.rs +++ b/compiler/rustc_const_eval/src/interpret/validity.rs @@ -24,7 +24,7 @@ use rustc_middle::mir::interpret::{ ExpectedKind, InterpErrorKind, InvalidMetaKind, Misalignment, PointerKind, Provenance, UnsupportedOpInfo, ValidationErrorInfo, alloc_range, interp_ok, }; -use rustc_middle::ty::layout::{LayoutCx, LayoutOf, TyAndLayout}; +use rustc_middle::ty::layout::{LayoutCx, TyAndLayout}; use rustc_middle::ty::{self, Ty}; use rustc_span::{Symbol, sym}; use tracing::trace; @@ -35,6 +35,7 @@ use super::{ Machine, MemPlaceMeta, PlaceTy, Pointer, Projectable, Scalar, ValueVisitor, err_ub, format_interp_error, }; +use crate::enter_trace_span; // for the validation errors #[rustfmt::skip] @@ -493,7 +494,7 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { } // Make sure this is dereferenceable and all. let size_and_align = try_validation!( - self.ecx.size_and_align_of_mplace(&place), + self.ecx.size_and_align_of_val(&place), self.path, Ub(InvalidMeta(msg)) => match msg { InvalidMetaKind::SliceTooBig => InvalidMetaSliceTooLarge { ptr_kind }, @@ -570,6 +571,8 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { }; let (size, _align) = global_alloc.size_and_align(*self.ecx.tcx, self.ecx.typing_env); + let alloc_actual_mutbl = + global_alloc.mutability(*self.ecx.tcx, self.ecx.typing_env); if let GlobalAlloc::Static(did) = global_alloc { let DefKind::Static { nested, .. } = self.ecx.tcx.def_kind(did) else { @@ -597,9 +600,11 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { skip_recursive_check = !nested; } CtfeValidationMode::Const { .. } => { - // We can't recursively validate `extern static`, so we better reject them. - if self.ecx.tcx.is_foreign_item(did) { - throw_validation_failure!(self.path, ConstRefToExtern); + // If this is mutable memory or an `extern static`, there's no point in checking it -- we'd + // just get errors trying to read the value. + if alloc_actual_mutbl.is_mut() || self.ecx.tcx.is_foreign_item(did) + { + skip_recursive_check = true; } } } @@ -618,9 +623,6 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { mutbl } }; - // Determine what it actually points to. - let alloc_actual_mutbl = - global_alloc.mutability(*self.ecx.tcx, self.ecx.typing_env); // Mutable pointer to immutable memory is no good. if ptr_expected_mutbl == Mutability::Mut && alloc_actual_mutbl == Mutability::Not @@ -628,12 +630,10 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { // This can actually occur with transmutes. throw_validation_failure!(self.path, MutableRefToImmutable); } - // In a const, everything must be completely immutable. + // In a const, any kind of mutable reference is not good. if matches!(self.ctfe_mode, Some(CtfeValidationMode::Const { .. })) { - if ptr_expected_mutbl == Mutability::Mut - || alloc_actual_mutbl == Mutability::Mut - { - throw_validation_failure!(self.path, ConstRefToMutable); + if ptr_expected_mutbl == Mutability::Mut { + throw_validation_failure!(self.path, MutableRefInConst); } } } @@ -906,7 +906,7 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { let (_prov, start_offset) = mplace.ptr().into_parts(); let (size, _align) = self .ecx - .size_and_align_of_mplace(&mplace)? + .size_and_align_of_val(&mplace)? .unwrap_or((mplace.layout.size, mplace.layout.align.abi)); // If there is no padding at all, we can skip the rest: check for // a single data range covering the entire value. @@ -1086,8 +1086,10 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, ) -> InterpResult<'tcx> { // Special check for CTFE validation, preventing `UnsafeCell` inside unions in immutable memory. if self.ctfe_mode.is_some_and(|c| !c.allow_immutable_unsafe_cell()) { - if !val.layout.is_zst() && !val.layout.ty.is_freeze(*self.ecx.tcx, self.ecx.typing_env) - { + // Unsized unions are currently not a thing, but let's keep this code consistent with + // the check in `visit_value`. + let zst = self.ecx.size_and_align_of_val(val)?.is_some_and(|(s, _a)| s.bytes() == 0); + if !zst && !val.layout.ty.is_freeze(*self.ecx.tcx, self.ecx.typing_env) { if !self.in_mutable_memory(val) { throw_validation_failure!(self.path, UnsafeCellInImmutable); } @@ -1131,7 +1133,10 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, // Special check preventing `UnsafeCell` in the inner part of constants if self.ctfe_mode.is_some_and(|c| !c.allow_immutable_unsafe_cell()) { - if !val.layout.is_zst() + // Exclude ZST values. We need to compute the dynamic size/align to properly + // handle slices and trait objects. + let zst = self.ecx.size_and_align_of_val(val)?.is_some_and(|(s, _a)| s.bytes() == 0); + if !zst && let Some(def) = val.layout.ty.ty_adt_def() && def.is_unsafe_cell() { @@ -1364,8 +1369,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { }) } - /// This function checks the data at `op` to be const-valid. - /// `op` is assumed to cover valid memory if it is an indirect operand. + /// This function checks the data at `val` to be const-valid. + /// `val` is assumed to cover valid memory if it is an indirect operand. /// It will error if the bits at the destination do not match the ones described by the layout. /// /// `ref_tracking` is used to record references that we encounter so that they @@ -1391,8 +1396,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { ) } - /// This function checks the data at `op` to be runtime-valid. - /// `op` is assumed to cover valid memory if it is an indirect operand. + /// This function checks the data at `val` to be runtime-valid. + /// `val` is assumed to cover valid memory if it is an indirect operand. /// It will error if the bits at the destination do not match the ones described by the layout. #[inline(always)] pub fn validate_operand( @@ -1401,6 +1406,12 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { recursive: bool, reset_provenance_and_padding: bool, ) -> InterpResult<'tcx> { + let _span = enter_trace_span!( + M, + "validate_operand", + "recursive={recursive}, reset_provenance_and_padding={reset_provenance_and_padding}, val={val:?}" + ); + // Note that we *could* actually be in CTFE here with `-Zextra-const-ub-checks`, but it's // still correct to not use `ctfe_mode`: that mode is for validation of the final constant // value, it rules out things like `UnsafeCell` in awkward places. diff --git a/compiler/rustc_const_eval/src/interpret/visitor.rs b/compiler/rustc_const_eval/src/interpret/visitor.rs index 5aea91233bda..d5970b69baf7 100644 --- a/compiler/rustc_const_eval/src/interpret/visitor.rs +++ b/compiler/rustc_const_eval/src/interpret/visitor.rs @@ -6,7 +6,6 @@ use std::num::NonZero; use rustc_abi::{FieldIdx, FieldsShape, VariantIdx, Variants}; use rustc_index::IndexVec; use rustc_middle::mir::interpret::InterpResult; -use rustc_middle::ty::layout::LayoutOf; use rustc_middle::ty::{self, Ty}; use tracing::trace; diff --git a/compiler/rustc_const_eval/src/util/caller_location.rs b/compiler/rustc_const_eval/src/util/caller_location.rs index 671214002a0d..ab2de279ed8d 100644 --- a/compiler/rustc_const_eval/src/util/caller_location.rs +++ b/compiler/rustc_const_eval/src/util/caller_location.rs @@ -1,6 +1,5 @@ use rustc_abi::FieldIdx; use rustc_hir::LangItem; -use rustc_middle::ty::layout::LayoutOf; use rustc_middle::ty::{self, TyCtxt}; use rustc_middle::{bug, mir}; use rustc_span::Symbol; diff --git a/compiler/rustc_data_structures/Cargo.toml b/compiler/rustc_data_structures/Cargo.toml index f6a020116185..17204883fb03 100644 --- a/compiler/rustc_data_structures/Cargo.toml +++ b/compiler/rustc_data_structures/Cargo.toml @@ -14,7 +14,6 @@ indexmap = "2.4.0" jobserver_crate = { version = "0.1.28", package = "jobserver" } measureme = "12.0.1" rustc-hash = "2.0.0" -rustc-rayon-core = { version = "0.5.0" } rustc-stable-hash = { version = "0.1.0", features = ["nightly"] } rustc_arena = { path = "../rustc_arena" } rustc_graphviz = { path = "../rustc_graphviz" } @@ -22,6 +21,7 @@ rustc_hashes = { path = "../rustc_hashes" } rustc_index = { path = "../rustc_index", package = "rustc_index" } rustc_macros = { path = "../rustc_macros" } rustc_serialize = { path = "../rustc_serialize" } +rustc_thread_pool = { path = "../rustc_thread_pool" } smallvec = { version = "1.8.1", features = ["const_generics", "union", "may_dangle"] } stacker = "0.1.17" tempfile = "3.2" diff --git a/compiler/rustc_data_structures/src/sync.rs b/compiler/rustc_data_structures/src/sync.rs index b28c333d860c..3881f3c2aa84 100644 --- a/compiler/rustc_data_structures/src/sync.rs +++ b/compiler/rustc_data_structures/src/sync.rs @@ -22,8 +22,6 @@ //! | | | `parking_lot::Mutex` | //! | `RwLock` | `RefCell` | `parking_lot::RwLock` | //! | `MTLock` [^1] | `T` | `Lock` | -//! | | | | -//! | `ParallelIterator` | `Iterator` | `rayon::iter::ParallelIterator` | //! //! [^1]: `MTLock` is similar to `Lock`, but the serial version avoids the cost //! of a `RefCell`. This is appropriate when interior mutability is not diff --git a/compiler/rustc_data_structures/src/sync/lock.rs b/compiler/rustc_data_structures/src/sync/lock.rs index 2ccf06ccd4f0..a8161c515115 100644 --- a/compiler/rustc_data_structures/src/sync/lock.rs +++ b/compiler/rustc_data_structures/src/sync/lock.rs @@ -1,8 +1,6 @@ //! This module implements a lock which only uses synchronization if `might_be_dyn_thread_safe` is true. //! It implements `DynSend` and `DynSync` instead of the typical `Send` and `Sync` traits. -#![allow(dead_code)] - use std::fmt; #[derive(Clone, Copy, PartialEq)] diff --git a/compiler/rustc_data_structures/src/sync/parallel.rs b/compiler/rustc_data_structures/src/sync/parallel.rs index ab65c7f3a6b5..b515c0bee8a6 100644 --- a/compiler/rustc_data_structures/src/sync/parallel.rs +++ b/compiler/rustc_data_structures/src/sync/parallel.rs @@ -1,8 +1,6 @@ //! This module defines parallel operations that are implemented in //! one way for the serial compiler, and another way the parallel compiler. -#![allow(dead_code)] - use std::any::Any; use std::panic::{AssertUnwindSafe, catch_unwind, resume_unwind}; @@ -96,7 +94,7 @@ macro_rules! parallel { pub fn spawn(func: impl FnOnce() + DynSend + 'static) { if mode::is_dyn_thread_safe() { let func = FromDyn::from(func); - rayon_core::spawn(|| { + rustc_thread_pool::spawn(|| { (func.into_inner())(); }); } else { @@ -107,11 +105,11 @@ pub fn spawn(func: impl FnOnce() + DynSend + 'static) { // This function only works when `mode::is_dyn_thread_safe()`. pub fn scope<'scope, OP, R>(op: OP) -> R where - OP: FnOnce(&rayon_core::Scope<'scope>) -> R + DynSend, + OP: FnOnce(&rustc_thread_pool::Scope<'scope>) -> R + DynSend, R: DynSend, { let op = FromDyn::from(op); - rayon_core::scope(|s| FromDyn::from(op.into_inner()(s))).into_inner() + rustc_thread_pool::scope(|s| FromDyn::from(op.into_inner()(s))).into_inner() } #[inline] @@ -124,7 +122,7 @@ where let oper_a = FromDyn::from(oper_a); let oper_b = FromDyn::from(oper_b); let (a, b) = parallel_guard(|guard| { - rayon_core::join( + rustc_thread_pool::join( move || guard.run(move || FromDyn::from(oper_a.into_inner()())), move || guard.run(move || FromDyn::from(oper_b.into_inner()())), ) @@ -158,7 +156,7 @@ fn par_slice( let (left, right) = items.split_at_mut(items.len() / 2); let mut left = state.for_each.derive(left); let mut right = state.for_each.derive(right); - rayon_core::join(move || par_rec(*left, state), move || par_rec(*right, state)); + rustc_thread_pool::join(move || par_rec(*left, state), move || par_rec(*right, state)); } } @@ -241,7 +239,7 @@ pub fn par_map, R: DynSend, C: FromIterato pub fn broadcast(op: impl Fn(usize) -> R + DynSync) -> Vec { if mode::is_dyn_thread_safe() { let op = FromDyn::from(op); - let results = rayon_core::broadcast(|context| op.derive(op(context.index()))); + let results = rustc_thread_pool::broadcast(|context| op.derive(op(context.index()))); results.into_iter().map(|r| r.into_inner()).collect() } else { vec![op(0)] diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs index d53126d04143..4855fc58d034 100644 --- a/compiler/rustc_driver_impl/src/lib.rs +++ b/compiler/rustc_driver_impl/src/lib.rs @@ -38,6 +38,7 @@ use rustc_data_structures::profiling::{ }; use rustc_errors::emitter::stderr_destination; use rustc_errors::registry::Registry; +use rustc_errors::translation::Translator; use rustc_errors::{ColorConfig, DiagCtxt, ErrCode, FatalError, PResult, markdown}; use rustc_feature::find_gated_cfg; // This avoids a false positive with `-Wunused_crate_dependencies`. @@ -52,13 +53,13 @@ use rustc_metadata::locator; use rustc_middle::ty::TyCtxt; use rustc_parse::{new_parser_from_file, new_parser_from_source_str, unwrap_or_emit_fatal}; use rustc_session::config::{ - CG_OPTIONS, CrateType, ErrorOutputType, Input, OptionDesc, OutFileName, OutputType, + CG_OPTIONS, CrateType, ErrorOutputType, Input, OptionDesc, OutFileName, OutputType, Sysroot, UnstableOptions, Z_OPTIONS, nightly_options, parse_target_triple, }; use rustc_session::getopts::{self, Matches}; use rustc_session::lint::{Lint, LintId}; use rustc_session::output::{CRATE_TYPES, collect_crate_types, invalid_output_for_target}; -use rustc_session::{EarlyDiagCtxt, Session, config, filesearch}; +use rustc_session::{EarlyDiagCtxt, Session, config}; use rustc_span::FileName; use rustc_span::def_id::LOCAL_CRATE; use rustc_target::json::ToJson; @@ -109,6 +110,10 @@ use crate::session_diagnostics::{ rustc_fluent_macro::fluent_messages! { "../messages.ftl" } +pub fn default_translator() -> Translator { + Translator::with_fallback_bundle(DEFAULT_LOCALE_RESOURCES.to_vec(), false) +} + pub static DEFAULT_LOCALE_RESOURCES: &[&str] = &[ // tidy-alphabetical-start crate::DEFAULT_LOCALE_RESOURCE, @@ -657,7 +662,7 @@ fn print_crate_info( println_info!("{}", targets.join("\n")); } HostTuple => println_info!("{}", rustc_session::config::host_tuple()), - Sysroot => println_info!("{}", sess.sysroot.display()), + Sysroot => println_info!("{}", sess.opts.sysroot.path().display()), TargetLibdir => println_info!("{}", sess.target_tlib_path.dir.display()), TargetSpecJson => { println_info!("{}", serde_json::to_string_pretty(&sess.target.to_json()).unwrap()); @@ -1109,8 +1114,8 @@ fn get_backend_from_raw_matches( let debug_flags = matches.opt_strs("Z"); let backend_name = debug_flags.iter().find_map(|x| x.strip_prefix("codegen-backend=")); let target = parse_target_triple(early_dcx, matches); - let sysroot = filesearch::materialize_sysroot(matches.opt_str("sysroot").map(PathBuf::from)); - let target = config::build_target_config(early_dcx, &target, &sysroot); + let sysroot = Sysroot::new(matches.opt_str("sysroot").map(PathBuf::from)); + let target = config::build_target_config(early_dcx, &target, sysroot.path()); get_codegen_backend(early_dcx, &sysroot, backend_name, &target) } @@ -1413,11 +1418,10 @@ fn report_ice( extra_info: fn(&DiagCtxt), using_internal_features: &AtomicBool, ) { - let fallback_bundle = - rustc_errors::fallback_fluent_bundle(crate::DEFAULT_LOCALE_RESOURCES.to_vec(), false); + let translator = default_translator(); let emitter = Box::new(rustc_errors::emitter::HumanEmitter::new( stderr_destination(rustc_errors::ColorConfig::Auto), - fallback_bundle, + translator, )); let dcx = rustc_errors::DiagCtxt::new(emitter); let dcx = dcx.handle(); diff --git a/compiler/rustc_driver_impl/src/pretty.rs b/compiler/rustc_driver_impl/src/pretty.rs index ec77043cd128..688307a941f3 100644 --- a/compiler/rustc_driver_impl/src/pretty.rs +++ b/compiler/rustc_driver_impl/src/pretty.rs @@ -292,7 +292,11 @@ pub fn print<'tcx>(sess: &Session, ppm: PpMode, ex: PrintExtra<'tcx>) { } HirTree => { debug!("pretty printing HIR tree"); - format!("{:#?}", ex.tcx().hir_crate(())) + ex.tcx() + .hir_crate_items(()) + .owners() + .map(|owner| format!("{:#?} => {:#?}\n", owner, ex.tcx().hir_owner_nodes(owner))) + .collect() } Mir => { let mut out = Vec::new(); diff --git a/compiler/rustc_error_codes/src/error_codes/E0722.md b/compiler/rustc_error_codes/src/error_codes/E0722.md index 570717a92bd7..1799458d46cb 100644 --- a/compiler/rustc_error_codes/src/error_codes/E0722.md +++ b/compiler/rustc_error_codes/src/error_codes/E0722.md @@ -1,8 +1,14 @@ +#### Note: this error code is no longer emitted by the compiler + +This is because it was too specific to the `optimize` attribute. +Similar diagnostics occur for other attributes too. +The example here will now emit `E0539` + The `optimize` attribute was malformed. Erroneous code example: -```compile_fail,E0722 +```compile_fail,E0539 #![feature(optimize_attribute)] #[optimize(something)] // error: invalid argument diff --git a/compiler/rustc_error_codes/src/error_codes/E0775.md b/compiler/rustc_error_codes/src/error_codes/E0775.md index efbd51e89ea3..9fcd3a6eef74 100644 --- a/compiler/rustc_error_codes/src/error_codes/E0775.md +++ b/compiler/rustc_error_codes/src/error_codes/E0775.md @@ -8,7 +8,7 @@ Erroneous code example: ```ignore (no longer emitted) #![feature(cmse_nonsecure_entry)] -pub extern "C-cmse-nonsecure-entry" fn entry_function() {} +pub extern "cmse-nonsecure-entry" fn entry_function() {} ``` To fix this error, compile your code for a Rust target that supports the diff --git a/compiler/rustc_error_codes/src/error_codes/E0781.md b/compiler/rustc_error_codes/src/error_codes/E0781.md index 7641acfb5249..22abe0e3cb15 100644 --- a/compiler/rustc_error_codes/src/error_codes/E0781.md +++ b/compiler/rustc_error_codes/src/error_codes/E0781.md @@ -1,12 +1,12 @@ -The `C-cmse-nonsecure-call` ABI can only be used with function pointers. +The `cmse-nonsecure-call` ABI can only be used with function pointers. Erroneous code example: ```compile_fail,E0781 -#![feature(abi_c_cmse_nonsecure_call)] +#![feature(abi_cmse_nonsecure_call)] -pub extern "C-cmse-nonsecure-call" fn test() {} +pub extern "cmse-nonsecure-call" fn test() {} ``` -The `C-cmse-nonsecure-call` ABI should be used by casting function pointers to +The `cmse-nonsecure-call` ABI should be used by casting function pointers to specific addresses. diff --git a/compiler/rustc_error_codes/src/error_codes/E0798.md b/compiler/rustc_error_codes/src/error_codes/E0798.md index da08cde30100..e5f356ef4d50 100644 --- a/compiler/rustc_error_codes/src/error_codes/E0798.md +++ b/compiler/rustc_error_codes/src/error_codes/E0798.md @@ -1,4 +1,4 @@ -Functions marked as `C-cmse-nonsecure-call` place restrictions on their +Functions marked as `cmse-nonsecure-call` place restrictions on their inputs and outputs. - inputs must fit in the 4 available 32-bit argument registers. Alignment @@ -12,12 +12,12 @@ see [arm's aapcs32](https://github.com/ARM-software/abi-aa/releases). Erroneous code example: -```ignore (only fails on supported targets) -#![feature(abi_c_cmse_nonsecure_call)] +```ignore (host errors will not match for target) +#![feature(abi_cmse_nonsecure_call)] #[no_mangle] pub fn test( - f: extern "C-cmse-nonsecure-call" fn(u32, u32, u32, u32, u32) -> u32, + f: extern "cmse-nonsecure-call" fn(u32, u32, u32, u32, u32) -> u32, ) -> u32 { f(1, 2, 3, 4, 5) } @@ -27,12 +27,12 @@ Arguments' alignment is respected. In the example below, padding is inserted so that the `u64` argument is passed in registers r2 and r3. There is then no room left for the final `f32` argument -```ignore (only fails on supported targets) -#![feature(abi_c_cmse_nonsecure_call)] +```ignore (host errors will not match for target) +#![feature(abi_cmse_nonsecure_call)] #[no_mangle] pub fn test( - f: extern "C-cmse-nonsecure-call" fn(u32, u64, f32) -> u32, + f: extern "cmse-nonsecure-call" fn(u32, u64, f32) -> u32, ) -> u32 { f(1, 2, 3.0) } diff --git a/compiler/rustc_error_codes/src/lib.rs b/compiler/rustc_error_codes/src/lib.rs index 6f5e4829802e..0aff1c06e0a8 100644 --- a/compiler/rustc_error_codes/src/lib.rs +++ b/compiler/rustc_error_codes/src/lib.rs @@ -686,8 +686,9 @@ E0805: 0805, // E0707, // multiple elided lifetimes used in arguments of `async fn` // E0709, // multiple different lifetimes used in arguments of `async fn` // E0721, // `await` keyword +// E0722, // replaced with a generic attribute input check // E0723, // unstable feature in `const` context // E0738, // Removed; errored on `#[track_caller] fn`s in `extern "Rust" { ... }`. // E0744, // merged into E0728 -// E0776, // Removed; cmse_nonsecure_entry is now `C-cmse-nonsecure-entry` +// E0776, // Removed; `#[cmse_nonsecure_entry]` is now `extern "cmse-nonsecure-entry"` // E0796, // unused error code. We use `static_mut_refs` lint instead. diff --git a/compiler/rustc_error_messages/Cargo.toml b/compiler/rustc_error_messages/Cargo.toml index 5dc582b9c3a5..0951859fa531 100644 --- a/compiler/rustc_error_messages/Cargo.toml +++ b/compiler/rustc_error_messages/Cargo.toml @@ -16,7 +16,6 @@ rustc_data_structures = { path = "../rustc_data_structures" } rustc_macros = { path = "../rustc_macros" } rustc_serialize = { path = "../rustc_serialize" } rustc_span = { path = "../rustc_span" } -smallvec = { version = "1.8.1", features = ["union", "may_dangle"] } tracing = "0.1" unic-langid = { version = "0.9.0", features = ["macros"] } # tidy-alphabetical-end diff --git a/compiler/rustc_error_messages/src/lib.rs b/compiler/rustc_error_messages/src/lib.rs index 1d3b5b20751a..4e4345cfe0fd 100644 --- a/compiler/rustc_error_messages/src/lib.rs +++ b/compiler/rustc_error_messages/src/lib.rs @@ -8,7 +8,7 @@ use std::borrow::Cow; use std::error::Error; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::sync::{Arc, LazyLock}; use std::{fmt, fs, io}; @@ -18,10 +18,9 @@ pub use fluent_bundle::{self, FluentArgs, FluentError, FluentValue}; use fluent_syntax::parser::ParserError; use icu_provider_adapters::fallback::{LocaleFallbackProvider, LocaleFallbacker}; use intl_memoizer::concurrent::IntlLangMemoizer; -use rustc_data_structures::sync::IntoDynSyncSend; +use rustc_data_structures::sync::{DynSend, IntoDynSyncSend}; use rustc_macros::{Decodable, Encodable}; use rustc_span::Span; -use smallvec::SmallVec; use tracing::{instrument, trace}; pub use unic_langid::{LanguageIdentifier, langid}; @@ -107,7 +106,7 @@ impl From> for TranslationBundleError { /// (overriding any conflicting messages). #[instrument(level = "trace")] pub fn fluent_bundle( - sysroot_candidates: SmallVec<[PathBuf; 2]>, + sysroot_candidates: &[&Path], requested_locale: Option, additional_ftl_path: Option<&Path>, with_directionality_markers: bool, @@ -141,7 +140,8 @@ pub fn fluent_bundle( // If the user requests the default locale then don't try to load anything. if let Some(requested_locale) = requested_locale { let mut found_resources = false; - for mut sysroot in sysroot_candidates { + for sysroot in sysroot_candidates { + let mut sysroot = sysroot.to_path_buf(); sysroot.push("share"); sysroot.push("locale"); sysroot.push(requested_locale.to_string()); @@ -204,16 +204,16 @@ fn register_functions(bundle: &mut FluentBundle) { /// Type alias for the result of `fallback_fluent_bundle` - a reference-counted pointer to a lazily /// evaluated fluent bundle. -pub type LazyFallbackBundle = Arc FluentBundle>>; +pub type LazyFallbackBundle = + Arc FluentBundle + DynSend>>>; /// Return the default `FluentBundle` with standard "en-US" diagnostic messages. #[instrument(level = "trace", skip(resources))] -#[define_opaque(LazyFallbackBundle)] pub fn fallback_fluent_bundle( resources: Vec<&'static str>, with_directionality_markers: bool, ) -> LazyFallbackBundle { - Arc::new(LazyLock::new(move || { + Arc::new(LazyLock::new(Box::new(move || { let mut fallback_bundle = new_bundle(vec![langid!("en-US")]); register_functions(&mut fallback_bundle); @@ -228,7 +228,7 @@ pub fn fallback_fluent_bundle( } fallback_bundle - })) + }))) } /// Identifier for the Fluent message/attribute corresponding to a diagnostic message. diff --git a/compiler/rustc_errors/src/annotate_snippet_emitter_writer.rs b/compiler/rustc_errors/src/annotate_snippet_emitter_writer.rs index f3aeb8d224b9..2eb3c23259ff 100644 --- a/compiler/rustc_errors/src/annotate_snippet_emitter_writer.rs +++ b/compiler/rustc_errors/src/annotate_snippet_emitter_writer.rs @@ -15,17 +15,15 @@ use rustc_span::source_map::SourceMap; use crate::emitter::FileWithAnnotatedLines; use crate::registry::Registry; use crate::snippet::Line; -use crate::translation::{Translate, to_fluent_args}; +use crate::translation::{Translator, to_fluent_args}; use crate::{ - CodeSuggestion, DiagInner, DiagMessage, Emitter, ErrCode, FluentBundle, LazyFallbackBundle, - Level, MultiSpan, Style, Subdiag, + CodeSuggestion, DiagInner, DiagMessage, Emitter, ErrCode, Level, MultiSpan, Style, Subdiag, }; /// Generates diagnostics using annotate-snippet pub struct AnnotateSnippetEmitter { source_map: Option>, - fluent_bundle: Option>, - fallback_bundle: LazyFallbackBundle, + translator: Translator, /// If true, hides the longer explanation text short_message: bool, @@ -35,16 +33,6 @@ pub struct AnnotateSnippetEmitter { macro_backtrace: bool, } -impl Translate for AnnotateSnippetEmitter { - fn fluent_bundle(&self) -> Option<&FluentBundle> { - self.fluent_bundle.as_deref() - } - - fn fallback_fluent_bundle(&self) -> &FluentBundle { - &self.fallback_bundle - } -} - impl Emitter for AnnotateSnippetEmitter { /// The entry point for the diagnostics generation fn emit_diagnostic(&mut self, mut diag: DiagInner, _registry: &Registry) { @@ -78,6 +66,10 @@ impl Emitter for AnnotateSnippetEmitter { fn should_show_explain(&self) -> bool { !self.short_message } + + fn translator(&self) -> &Translator { + &self.translator + } } /// Provides the source string for the given `line` of `file` @@ -104,19 +96,11 @@ fn annotation_level_for_level(level: Level) -> annotate_snippets::Level { impl AnnotateSnippetEmitter { pub fn new( source_map: Option>, - fluent_bundle: Option>, - fallback_bundle: LazyFallbackBundle, + translator: Translator, short_message: bool, macro_backtrace: bool, ) -> Self { - Self { - source_map, - fluent_bundle, - fallback_bundle, - short_message, - ui_testing: false, - macro_backtrace, - } + Self { source_map, translator, short_message, ui_testing: false, macro_backtrace } } /// Allows to modify `Self` to enable or disable the `ui_testing` flag. @@ -137,7 +121,7 @@ impl AnnotateSnippetEmitter { _children: &[Subdiag], _suggestions: &[CodeSuggestion], ) { - let message = self.translate_messages(messages, args); + let message = self.translator.translate_messages(messages, args); if let Some(source_map) = &self.source_map { // Make sure our primary file comes first let primary_lo = if let Some(primary_span) = msp.primary_span().as_ref() { diff --git a/compiler/rustc_errors/src/diagnostic.rs b/compiler/rustc_errors/src/diagnostic.rs index a11f81b55bb8..8da7cdd93588 100644 --- a/compiler/rustc_errors/src/diagnostic.rs +++ b/compiler/rustc_errors/src/diagnostic.rs @@ -289,6 +289,9 @@ pub struct DiagInner { pub suggestions: Suggestions, pub args: DiagArgMap, + // This is used to store args and restore them after a subdiagnostic is rendered. + pub reserved_args: DiagArgMap, + /// This is not used for highlighting or rendering any error message. Rather, it can be used /// as a sort key to sort a buffer of diagnostics. By default, it is the primary span of /// `span` if there is one. Otherwise, it is `DUMMY_SP`. @@ -319,6 +322,7 @@ impl DiagInner { children: vec![], suggestions: Suggestions::Enabled(vec![]), args: Default::default(), + reserved_args: Default::default(), sort_span: DUMMY_SP, is_lint: None, long_ty_path: None, @@ -390,7 +394,27 @@ impl DiagInner { } pub(crate) fn arg(&mut self, name: impl Into, arg: impl IntoDiagArg) { - self.args.insert(name.into(), arg.into_diag_arg(&mut self.long_ty_path)); + let name = name.into(); + let value = arg.into_diag_arg(&mut self.long_ty_path); + // This assertion is to avoid subdiagnostics overwriting an existing diagnostic arg. + debug_assert!( + !self.args.contains_key(&name) || self.args.get(&name) == Some(&value), + "arg {} already exists", + name + ); + self.args.insert(name, value); + } + + pub fn remove_arg(&mut self, name: &str) { + self.args.swap_remove(name); + } + + pub fn store_args(&mut self) { + self.reserved_args = self.args.clone(); + } + + pub fn restore_args(&mut self) { + self.args = std::mem::take(&mut self.reserved_args); } /// Fields used for Hash, and PartialEq trait. @@ -1423,6 +1447,12 @@ impl<'a, G: EmissionGuarantee> Diag<'a, G> { self.downgrade_to_delayed_bug(); self.emit() } + + pub fn remove_arg(&mut self, name: &str) { + if let Some(diag) = self.diag.as_mut() { + diag.remove_arg(name); + } + } } /// Destructor bomb: every `Diag` must be consumed (emitted, cancelled, etc.) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 6ab6f96079eb..e333de4b660b 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -35,10 +35,10 @@ use crate::snippet::{ }; use crate::styled_buffer::StyledBuffer; use crate::timings::TimingRecord; -use crate::translation::{Translate, to_fluent_args}; +use crate::translation::{Translator, to_fluent_args}; use crate::{ - CodeSuggestion, DiagInner, DiagMessage, ErrCode, FluentBundle, LazyFallbackBundle, Level, - MultiSpan, Subdiag, SubstitutionHighlight, SuggestionStyle, TerminalUrl, + CodeSuggestion, DiagInner, DiagMessage, ErrCode, Level, MultiSpan, Subdiag, + SubstitutionHighlight, SuggestionStyle, TerminalUrl, }; /// Default column width, used in tests and when terminal dimensions cannot be determined. @@ -175,7 +175,7 @@ const ANONYMIZED_LINE_NUM: &str = "LL"; pub type DynEmitter = dyn Emitter + DynSend; /// Emitter trait for emitting errors and other structured information. -pub trait Emitter: Translate { +pub trait Emitter { /// Emit a structured diagnostic. fn emit_diagnostic(&mut self, diag: DiagInner, registry: &Registry); @@ -212,6 +212,8 @@ pub trait Emitter: Translate { fn source_map(&self) -> Option<&SourceMap>; + fn translator(&self) -> &Translator; + /// Formats the substitutions of the primary_span /// /// There are a lot of conditions to this method, but in short: @@ -224,13 +226,17 @@ pub trait Emitter: Translate { /// * If the current `DiagInner` has multiple suggestions, /// we leave `primary_span` and the suggestions untouched. fn primary_span_formatted( - &mut self, + &self, primary_span: &mut MultiSpan, suggestions: &mut Vec, fluent_args: &FluentArgs<'_>, ) { if let Some((sugg, rest)) = suggestions.split_first() { - let msg = self.translate_message(&sugg.msg, fluent_args).map_err(Report::new).unwrap(); + let msg = self + .translator() + .translate_message(&sugg.msg, fluent_args) + .map_err(Report::new) + .unwrap(); if rest.is_empty() // ^ if there is only one suggestion // don't display multi-suggestions as labels @@ -491,16 +497,6 @@ pub trait Emitter: Translate { } } -impl Translate for HumanEmitter { - fn fluent_bundle(&self) -> Option<&FluentBundle> { - self.fluent_bundle.as_deref() - } - - fn fallback_fluent_bundle(&self) -> &FluentBundle { - &self.fallback_bundle - } -} - impl Emitter for HumanEmitter { fn source_map(&self) -> Option<&SourceMap> { self.sm.as_deref() @@ -538,25 +534,41 @@ impl Emitter for HumanEmitter { fn supports_color(&self) -> bool { self.dst.supports_color() } + + fn translator(&self) -> &Translator { + &self.translator + } } /// An emitter that does nothing when emitting a non-fatal diagnostic. /// Fatal diagnostics are forwarded to `fatal_emitter` to avoid silent /// failures of rustc, as witnessed e.g. in issue #89358. -pub struct SilentEmitter { +pub struct FatalOnlyEmitter { pub fatal_emitter: Box, pub fatal_note: Option, - pub emit_fatal_diagnostic: bool, } -impl Translate for SilentEmitter { - fn fluent_bundle(&self) -> Option<&FluentBundle> { +impl Emitter for FatalOnlyEmitter { + fn source_map(&self) -> Option<&SourceMap> { None } - fn fallback_fluent_bundle(&self) -> &FluentBundle { - self.fatal_emitter.fallback_fluent_bundle() + fn emit_diagnostic(&mut self, mut diag: DiagInner, registry: &Registry) { + if diag.level == Level::Fatal { + if let Some(fatal_note) = &self.fatal_note { + diag.sub(Level::Note, fatal_note.clone(), MultiSpan::new()); + } + self.fatal_emitter.emit_diagnostic(diag, registry); + } } + + fn translator(&self) -> &Translator { + self.fatal_emitter.translator() + } +} + +pub struct SilentEmitter { + pub translator: Translator, } impl Emitter for SilentEmitter { @@ -564,13 +576,10 @@ impl Emitter for SilentEmitter { None } - fn emit_diagnostic(&mut self, mut diag: DiagInner, registry: &Registry) { - if self.emit_fatal_diagnostic && diag.level == Level::Fatal { - if let Some(fatal_note) = &self.fatal_note { - diag.sub(Level::Note, fatal_note.clone(), MultiSpan::new()); - } - self.fatal_emitter.emit_diagnostic(diag, registry); - } + fn emit_diagnostic(&mut self, _diag: DiagInner, _registry: &Registry) {} + + fn translator(&self) -> &Translator { + &self.translator } } @@ -615,9 +624,8 @@ pub struct HumanEmitter { #[setters(skip)] dst: IntoDynSyncSend, sm: Option>, - fluent_bundle: Option>, #[setters(skip)] - fallback_bundle: LazyFallbackBundle, + translator: Translator, short_message: bool, ui_testing: bool, ignored_directories_in_source_blocks: Vec, @@ -637,12 +645,11 @@ pub(crate) struct FileWithAnnotatedLines { } impl HumanEmitter { - pub fn new(dst: Destination, fallback_bundle: LazyFallbackBundle) -> HumanEmitter { + pub fn new(dst: Destination, translator: Translator) -> HumanEmitter { HumanEmitter { dst: IntoDynSyncSend(dst), sm: None, - fluent_bundle: None, - fallback_bundle, + translator, short_message: false, ui_testing: false, ignored_directories_in_source_blocks: Vec::new(), @@ -1433,7 +1440,7 @@ impl HumanEmitter { // very *weird* formats // see? for (text, style) in msgs.iter() { - let text = self.translate_message(text, args).map_err(Report::new).unwrap(); + let text = self.translator.translate_message(text, args).map_err(Report::new).unwrap(); let text = &normalize_whitespace(&text); let lines = text.split('\n').collect::>(); if lines.len() > 1 { @@ -1528,7 +1535,8 @@ impl HumanEmitter { } let mut line = 0; for (text, style) in msgs.iter() { - let text = self.translate_message(text, args).map_err(Report::new).unwrap(); + let text = + self.translator.translate_message(text, args).map_err(Report::new).unwrap(); // Account for newlines to align output to its label. for text in normalize_whitespace(&text).lines() { buffer.append( @@ -1560,7 +1568,7 @@ impl HumanEmitter { .into_iter() .filter_map(|label| match label.label { Some(msg) if label.is_primary => { - let text = self.translate_message(&msg, args).ok()?; + let text = self.translator.translate_message(&msg, args).ok()?; if !text.trim().is_empty() { Some(text.to_string()) } else { None } } _ => None, @@ -3104,7 +3112,11 @@ impl FileWithAnnotatedLines { let label = label.as_ref().map(|m| { normalize_whitespace( - &emitter.translate_message(m, args).map_err(Report::new).unwrap(), + &emitter + .translator() + .translate_message(m, args) + .map_err(Report::new) + .unwrap(), ) }); diff --git a/compiler/rustc_errors/src/json.rs b/compiler/rustc_errors/src/json.rs index d67e2ba2d60e..4348610be0af 100644 --- a/compiler/rustc_errors/src/json.rs +++ b/compiler/rustc_errors/src/json.rs @@ -32,11 +32,8 @@ use crate::emitter::{ }; use crate::registry::Registry; use crate::timings::{TimingRecord, TimingSection}; -use crate::translation::{Translate, to_fluent_args}; -use crate::{ - CodeSuggestion, FluentBundle, LazyFallbackBundle, MultiSpan, SpanLabel, Subdiag, Suggestions, - TerminalUrl, -}; +use crate::translation::{Translator, to_fluent_args}; +use crate::{CodeSuggestion, MultiSpan, SpanLabel, Subdiag, Suggestions, TerminalUrl}; #[cfg(test)] mod tests; @@ -47,9 +44,8 @@ pub struct JsonEmitter { dst: IntoDynSyncSend>, #[setters(skip)] sm: Option>, - fluent_bundle: Option>, #[setters(skip)] - fallback_bundle: LazyFallbackBundle, + translator: Translator, #[setters(skip)] pretty: bool, ui_testing: bool, @@ -67,7 +63,7 @@ impl JsonEmitter { pub fn new( dst: Box, sm: Option>, - fallback_bundle: LazyFallbackBundle, + translator: Translator, pretty: bool, json_rendered: HumanReadableErrorType, color_config: ColorConfig, @@ -75,8 +71,7 @@ impl JsonEmitter { JsonEmitter { dst: IntoDynSyncSend(dst), sm, - fluent_bundle: None, - fallback_bundle, + translator, pretty, ui_testing: false, ignored_directories_in_source_blocks: Vec::new(), @@ -110,16 +105,6 @@ enum EmitTyped<'a> { UnusedExtern(UnusedExterns<'a>), } -impl Translate for JsonEmitter { - fn fluent_bundle(&self) -> Option<&FluentBundle> { - self.fluent_bundle.as_deref() - } - - fn fallback_fluent_bundle(&self) -> &FluentBundle { - &self.fallback_bundle - } -} - impl Emitter for JsonEmitter { fn emit_diagnostic(&mut self, diag: crate::DiagInner, registry: &Registry) { let data = Diagnostic::from_errors_diagnostic(diag, self, registry); @@ -144,6 +129,7 @@ impl Emitter for JsonEmitter { }; let name = match record.section { TimingSection::Linking => "link", + TimingSection::Codegen => "codegen", }; let data = SectionTimestamp { name, event, timestamp: record.timestamp }; let result = self.emit(EmitTyped::SectionTiming(data)); @@ -194,6 +180,10 @@ impl Emitter for JsonEmitter { fn should_show_explain(&self) -> bool { !self.json_rendered.short() } + + fn translator(&self) -> &Translator { + &self.translator + } } // The following data types are provided just for serialisation. @@ -324,7 +314,7 @@ impl Diagnostic { let args = to_fluent_args(diag.args.iter()); let sugg_to_diag = |sugg: &CodeSuggestion| { let translated_message = - je.translate_message(&sugg.msg, &args).map_err(Report::new).unwrap(); + je.translator.translate_message(&sugg.msg, &args).map_err(Report::new).unwrap(); Diagnostic { message: translated_message.to_string(), code: None, @@ -368,7 +358,7 @@ impl Diagnostic { } } - let translated_message = je.translate_messages(&diag.messages, &args); + let translated_message = je.translator.translate_messages(&diag.messages, &args); let code = if let Some(code) = diag.code { Some(DiagnosticCode { @@ -396,10 +386,9 @@ impl Diagnostic { ColorConfig::Always | ColorConfig::Auto => dst = Box::new(termcolor::Ansi::new(dst)), ColorConfig::Never => {} } - HumanEmitter::new(dst, Arc::clone(&je.fallback_bundle)) + HumanEmitter::new(dst, je.translator.clone()) .short_message(short) .sm(je.sm.clone()) - .fluent_bundle(je.fluent_bundle.clone()) .diagnostic_width(je.diagnostic_width) .macro_backtrace(je.macro_backtrace) .track_diagnostics(je.track_diagnostics) @@ -430,7 +419,7 @@ impl Diagnostic { args: &FluentArgs<'_>, je: &JsonEmitter, ) -> Diagnostic { - let translated_message = je.translate_messages(&subdiag.messages, args); + let translated_message = je.translator.translate_messages(&subdiag.messages, args); Diagnostic { message: translated_message.to_string(), code: None, @@ -454,7 +443,7 @@ impl DiagnosticSpan { span.is_primary, span.label .as_ref() - .map(|m| je.translate_message(m, args).unwrap()) + .map(|m| je.translator.translate_message(m, args).unwrap()) .map(|m| m.to_string()), suggestion, je, diff --git a/compiler/rustc_errors/src/json/tests.rs b/compiler/rustc_errors/src/json/tests.rs index 40973e8e5d8a..8cf81f467d84 100644 --- a/compiler/rustc_errors/src/json/tests.rs +++ b/compiler/rustc_errors/src/json/tests.rs @@ -41,14 +41,14 @@ fn test_positions(code: &str, span: (u32, u32), expected_output: SpanTestData) { rustc_span::create_default_session_globals_then(|| { let sm = Arc::new(SourceMap::new(FilePathMapping::empty())); sm.new_source_file(Path::new("test.rs").to_owned().into(), code.to_owned()); - let fallback_bundle = - crate::fallback_fluent_bundle(vec![crate::DEFAULT_LOCALE_RESOURCE], false); + let translator = + Translator::with_fallback_bundle(vec![crate::DEFAULT_LOCALE_RESOURCE], false); let output = Arc::new(Mutex::new(Vec::new())); let je = JsonEmitter::new( Box::new(Shared { data: output.clone() }), Some(sm), - fallback_bundle, + translator, true, // pretty HumanReadableErrorType::Short, ColorConfig::Never, diff --git a/compiler/rustc_errors/src/lib.rs b/compiler/rustc_errors/src/lib.rs index 0bd259366def..207aed8c7554 100644 --- a/compiler/rustc_errors/src/lib.rs +++ b/compiler/rustc_errors/src/lib.rs @@ -748,40 +748,10 @@ impl DiagCtxt { Self { inner: Lock::new(DiagCtxtInner::new(emitter)) } } - pub fn make_silent(&self, fatal_note: Option, emit_fatal_diagnostic: bool) { - // An empty type that implements `Emitter` to temporarily swap in place of the real one, - // which will be used in constructing its replacement. - struct FalseEmitter; - - impl Emitter for FalseEmitter { - fn emit_diagnostic(&mut self, _: DiagInner, _: &Registry) { - unimplemented!("false emitter must only used during `make_silent`") - } - - fn source_map(&self) -> Option<&SourceMap> { - unimplemented!("false emitter must only used during `make_silent`") - } - } - - impl translation::Translate for FalseEmitter { - fn fluent_bundle(&self) -> Option<&FluentBundle> { - unimplemented!("false emitter must only used during `make_silent`") - } - - fn fallback_fluent_bundle(&self) -> &FluentBundle { - unimplemented!("false emitter must only used during `make_silent`") - } - } - + pub fn make_silent(&self) { let mut inner = self.inner.borrow_mut(); - let mut prev_emitter = Box::new(FalseEmitter) as Box; - std::mem::swap(&mut inner.emitter, &mut prev_emitter); - let new_emitter = Box::new(emitter::SilentEmitter { - fatal_emitter: prev_emitter, - fatal_note, - emit_fatal_diagnostic, - }); - inner.emitter = new_emitter; + let translator = inner.emitter.translator().clone(); + inner.emitter = Box::new(emitter::SilentEmitter { translator }); } pub fn set_emitter(&self, emitter: Box) { @@ -1771,7 +1741,12 @@ impl DiagCtxtInner { args: impl Iterator>, ) -> String { let args = crate::translation::to_fluent_args(args); - self.emitter.translate_message(&message, &args).map_err(Report::new).unwrap().to_string() + self.emitter + .translator() + .translate_message(&message, &args) + .map_err(Report::new) + .unwrap() + .to_string() } fn eagerly_translate_for_subdiag( diff --git a/compiler/rustc_errors/src/tests.rs b/compiler/rustc_errors/src/tests.rs index 376fd24d57ba..34ebac0fde11 100644 --- a/compiler/rustc_errors/src/tests.rs +++ b/compiler/rustc_errors/src/tests.rs @@ -1,3 +1,5 @@ +use std::sync::{Arc, LazyLock}; + use rustc_data_structures::sync::IntoDynSyncSend; use rustc_error_messages::fluent_bundle::resolver::errors::{ReferenceKind, ResolverError}; use rustc_error_messages::{DiagMessage, langid}; @@ -5,23 +7,9 @@ use rustc_error_messages::{DiagMessage, langid}; use crate::FluentBundle; use crate::error::{TranslateError, TranslateErrorKind}; use crate::fluent_bundle::*; -use crate::translation::Translate; +use crate::translation::Translator; -struct Dummy { - bundle: FluentBundle, -} - -impl Translate for Dummy { - fn fluent_bundle(&self) -> Option<&FluentBundle> { - None - } - - fn fallback_fluent_bundle(&self) -> &FluentBundle { - &self.bundle - } -} - -fn make_dummy(ftl: &'static str) -> Dummy { +fn make_translator(ftl: &'static str) -> Translator { let resource = FluentResource::try_new(ftl.into()).expect("Failed to parse an FTL string."); let langid_en = langid!("en-US"); @@ -33,12 +21,15 @@ fn make_dummy(ftl: &'static str) -> Dummy { bundle.add_resource(resource).expect("Failed to add FTL resources to the bundle."); - Dummy { bundle } + Translator { + fluent_bundle: None, + fallback_fluent_bundle: Arc::new(LazyLock::new(Box::new(|| bundle))), + } } #[test] fn wellformed_fluent() { - let dummy = make_dummy("mir_build_borrow_of_moved_value = borrow of moved value + let translator = make_translator("mir_build_borrow_of_moved_value = borrow of moved value .label = value moved into `{$name}` here .occurs_because_label = move occurs because `{$name}` has type `{$ty}` which does not implement the `Copy` trait .value_borrowed_label = value borrowed here after move @@ -54,7 +45,7 @@ fn wellformed_fluent() { ); assert_eq!( - dummy.translate_message(&message, &args).unwrap(), + translator.translate_message(&message, &args).unwrap(), "borrow this binding in the pattern to avoid moving the value" ); } @@ -66,7 +57,7 @@ fn wellformed_fluent() { ); assert_eq!( - dummy.translate_message(&message, &args).unwrap(), + translator.translate_message(&message, &args).unwrap(), "value borrowed here after move" ); } @@ -78,7 +69,7 @@ fn wellformed_fluent() { ); assert_eq!( - dummy.translate_message(&message, &args).unwrap(), + translator.translate_message(&message, &args).unwrap(), "move occurs because `\u{2068}Foo\u{2069}` has type `\u{2068}std::string::String\u{2069}` which does not implement the `Copy` trait" ); @@ -89,7 +80,7 @@ fn wellformed_fluent() { ); assert_eq!( - dummy.translate_message(&message, &args).unwrap(), + translator.translate_message(&message, &args).unwrap(), "value moved into `\u{2068}Foo\u{2069}` here" ); } @@ -98,7 +89,7 @@ fn wellformed_fluent() { #[test] fn misformed_fluent() { - let dummy = make_dummy("mir_build_borrow_of_moved_value = borrow of moved value + let translator = make_translator("mir_build_borrow_of_moved_value = borrow of moved value .label = value moved into `{name}` here .occurs_because_label = move occurs because `{$oops}` has type `{$ty}` which does not implement the `Copy` trait .suggestion = borrow this binding in the pattern to avoid moving the value"); @@ -112,7 +103,7 @@ fn misformed_fluent() { Some("value_borrowed_label".into()), ); - let err = dummy.translate_message(&message, &args).unwrap_err(); + let err = translator.translate_message(&message, &args).unwrap_err(); assert!( matches!( &err, @@ -141,7 +132,7 @@ fn misformed_fluent() { Some("label".into()), ); - let err = dummy.translate_message(&message, &args).unwrap_err(); + let err = translator.translate_message(&message, &args).unwrap_err(); if let TranslateError::Two { primary: box TranslateError::One { kind: TranslateErrorKind::PrimaryBundleMissing, .. }, fallback: box TranslateError::One { kind: TranslateErrorKind::Fluent { errs }, .. }, @@ -168,7 +159,7 @@ fn misformed_fluent() { Some("occurs_because_label".into()), ); - let err = dummy.translate_message(&message, &args).unwrap_err(); + let err = translator.translate_message(&message, &args).unwrap_err(); if let TranslateError::Two { primary: box TranslateError::One { kind: TranslateErrorKind::PrimaryBundleMissing, .. }, fallback: box TranslateError::One { kind: TranslateErrorKind::Fluent { errs }, .. }, diff --git a/compiler/rustc_errors/src/timings.rs b/compiler/rustc_errors/src/timings.rs index 27fc9df8d796..0d82f3e8db8b 100644 --- a/compiler/rustc_errors/src/timings.rs +++ b/compiler/rustc_errors/src/timings.rs @@ -1,10 +1,15 @@ use std::time::Instant; +use rustc_data_structures::fx::FxHashSet; +use rustc_data_structures::sync::Lock; + use crate::DiagCtxtHandle; /// A high-level section of the compilation process. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum TimingSection { + /// Time spent doing codegen. + Codegen, /// Time spent linking. Linking, } @@ -36,23 +41,59 @@ pub struct TimingSectionHandler { /// Time when the compilation session started. /// If `None`, timing is disabled. origin: Option, + /// Sanity check to ensure that we open and close sections correctly. + opened_sections: Lock>, } impl TimingSectionHandler { pub fn new(enabled: bool) -> Self { let origin = if enabled { Some(Instant::now()) } else { None }; - Self { origin } + Self { origin, opened_sections: Lock::new(FxHashSet::default()) } } /// Returns a RAII guard that will immediately emit a start the provided section, and then emit /// its end when it is dropped. - pub fn start_section<'a>( + pub fn section_guard<'a>( &self, diag_ctxt: DiagCtxtHandle<'a>, section: TimingSection, ) -> TimingSectionGuard<'a> { + if self.is_enabled() && self.opened_sections.borrow().contains(§ion) { + diag_ctxt + .bug(format!("Section `{section:?}` was started again before it was finished")); + } + TimingSectionGuard::create(diag_ctxt, section, self.origin) } + + /// Start the provided section. + pub fn start_section(&self, diag_ctxt: DiagCtxtHandle<'_>, section: TimingSection) { + if let Some(origin) = self.origin { + let mut opened = self.opened_sections.borrow_mut(); + if !opened.insert(section) { + diag_ctxt + .bug(format!("Section `{section:?}` was started again before it was finished")); + } + + diag_ctxt.emit_timing_section_start(TimingRecord::from_origin(origin, section)); + } + } + + /// End the provided section. + pub fn end_section(&self, diag_ctxt: DiagCtxtHandle<'_>, section: TimingSection) { + if let Some(origin) = self.origin { + let mut opened = self.opened_sections.borrow_mut(); + if !opened.remove(§ion) { + diag_ctxt.bug(format!("Section `{section:?}` was ended before being started")); + } + + diag_ctxt.emit_timing_section_end(TimingRecord::from_origin(origin, section)); + } + } + + fn is_enabled(&self) -> bool { + self.origin.is_some() + } } /// RAII wrapper for starting and ending section timings. diff --git a/compiler/rustc_errors/src/translation.rs b/compiler/rustc_errors/src/translation.rs index 156f5e5d26e6..c0bcec093c7e 100644 --- a/compiler/rustc_errors/src/translation.rs +++ b/compiler/rustc_errors/src/translation.rs @@ -1,8 +1,9 @@ use std::borrow::Cow; use std::env; use std::error::Report; +use std::sync::Arc; -pub use rustc_error_messages::FluentArgs; +pub use rustc_error_messages::{FluentArgs, LazyFallbackBundle}; use tracing::{debug, trace}; use crate::error::{TranslateError, TranslateErrorKind}; @@ -28,19 +29,33 @@ pub fn to_fluent_args<'iter>(iter: impl Iterator>) -> Flue args } -pub trait Translate { - /// Return `FluentBundle` with localized diagnostics for the locale requested by the user. If no - /// language was requested by the user then this will be `None` and `fallback_fluent_bundle` - /// should be used. - fn fluent_bundle(&self) -> Option<&FluentBundle>; - +#[derive(Clone)] +pub struct Translator { + /// Localized diagnostics for the locale requested by the user. If no language was requested by + /// the user then this will be `None` and `fallback_fluent_bundle` should be used. + pub fluent_bundle: Option>, /// Return `FluentBundle` with localized diagnostics for the default locale of the compiler. /// Used when the user has not requested a specific language or when a localized diagnostic is /// unavailable for the requested locale. - fn fallback_fluent_bundle(&self) -> &FluentBundle; + pub fallback_fluent_bundle: LazyFallbackBundle, +} + +impl Translator { + pub fn with_fallback_bundle( + resources: Vec<&'static str>, + with_directionality_markers: bool, + ) -> Translator { + Translator { + fluent_bundle: None, + fallback_fluent_bundle: crate::fallback_fluent_bundle( + resources, + with_directionality_markers, + ), + } + } /// Convert `DiagMessage`s to a string, performing translation if necessary. - fn translate_messages( + pub fn translate_messages( &self, messages: &[(DiagMessage, Style)], args: &FluentArgs<'_>, @@ -54,7 +69,7 @@ pub trait Translate { } /// Convert a `DiagMessage` to a string, performing translation if necessary. - fn translate_message<'a>( + pub fn translate_message<'a>( &'a self, message: &'a DiagMessage, args: &'a FluentArgs<'_>, @@ -91,7 +106,7 @@ pub trait Translate { }; try { - match self.fluent_bundle().map(|b| translate_with_bundle(b)) { + match self.fluent_bundle.as_ref().map(|b| translate_with_bundle(b)) { // The primary bundle was present and translation succeeded Some(Ok(t)) => t, @@ -102,7 +117,7 @@ pub trait Translate { primary @ TranslateError::One { kind: TranslateErrorKind::MessageMissing, .. }, - )) => translate_with_bundle(self.fallback_fluent_bundle()) + )) => translate_with_bundle(&self.fallback_fluent_bundle) .map_err(|fallback| primary.and(fallback))?, // Always yeet out for errors on debug (unless @@ -118,11 +133,11 @@ pub trait Translate { // ..otherwise, for end users, an error about this wouldn't be useful or actionable, so // just hide it and try with the fallback bundle. - Some(Err(primary)) => translate_with_bundle(self.fallback_fluent_bundle()) + Some(Err(primary)) => translate_with_bundle(&self.fallback_fluent_bundle) .map_err(|fallback| primary.and(fallback))?, // The primary bundle is missing, proceed to the fallback bundle - None => translate_with_bundle(self.fallback_fluent_bundle()) + None => translate_with_bundle(&self.fallback_fluent_bundle) .map_err(|fallback| TranslateError::primary(identifier, args).and(fallback))?, } } diff --git a/compiler/rustc_expand/messages.ftl b/compiler/rustc_expand/messages.ftl index 8b7c47dad991..b7555bba28ec 100644 --- a/compiler/rustc_expand/messages.ftl +++ b/compiler/rustc_expand/messages.ftl @@ -62,7 +62,7 @@ expand_feature_not_allowed = expand_feature_removed = feature has been removed .label = feature has been removed - .note = removed in {$removed_rustc_version} (you are using {$current_rustc_version}){$pull_note} + .note = removed in {$removed_rustc_version}{$pull_note} .reason = {$reason} expand_glob_delegation_outside_impls = diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs index 7a29f8c9fbde..fe76d9e0b64e 100644 --- a/compiler/rustc_expand/src/base.rs +++ b/compiler/rustc_expand/src/base.rs @@ -1118,6 +1118,10 @@ pub trait ResolverExpand { trait_def_id: DefId, impl_def_id: LocalDefId, ) -> Result)>, Indeterminate>; + + /// Record the name of an opaque `Ty::ImplTrait` pre-expansion so that it can be used + /// to generate an item name later that does not reference placeholder macros. + fn insert_impl_trait_name(&mut self, id: NodeId, name: Symbol); } pub trait LintStoreExpand { diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs index 9a359e9b0310..170ac39d1ec3 100644 --- a/compiler/rustc_expand/src/config.rs +++ b/compiler/rustc_expand/src/config.rs @@ -92,7 +92,6 @@ pub fn features(sess: &Session, krate_attrs: &[Attribute], crate_name: Symbol) - span: mi.span(), reason: f.reason.map(|reason| FeatureRemovedReason { reason }), removed_rustc_version: f.feature.since, - current_rustc_version: sess.cfg_version, pull_note, }); continue; diff --git a/compiler/rustc_expand/src/errors.rs b/compiler/rustc_expand/src/errors.rs index b697f2049bd8..fe4d2af97a0d 100644 --- a/compiler/rustc_expand/src/errors.rs +++ b/compiler/rustc_expand/src/errors.rs @@ -162,7 +162,6 @@ pub(crate) struct FeatureRemoved<'a> { #[subdiagnostic] pub reason: Option>, pub removed_rustc_version: &'a str, - pub current_rustc_version: &'a str, pub pull_note: String, } @@ -183,12 +182,12 @@ pub(crate) struct FeatureNotAllowed { #[derive(Diagnostic)] #[diag(expand_recursion_limit_reached)] #[help] -pub(crate) struct RecursionLimitReached<'a> { +pub(crate) struct RecursionLimitReached { #[primary_span] pub span: Span, pub descr: String, pub suggested_limit: Limit, - pub crate_name: &'a str, + pub crate_name: Symbol, } #[derive(Diagnostic)] diff --git a/compiler/rustc_expand/src/expand.rs b/compiler/rustc_expand/src/expand.rs index 9fd524ef45cd..2de09aa1a280 100644 --- a/compiler/rustc_expand/src/expand.rs +++ b/compiler/rustc_expand/src/expand.rs @@ -26,7 +26,7 @@ use rustc_session::lint::builtin::{UNUSED_ATTRIBUTES, UNUSED_DOC_COMMENTS}; use rustc_session::parse::feature_err; use rustc_session::{Limit, Session}; use rustc_span::hygiene::SyntaxContext; -use rustc_span::{ErrorGuaranteed, FileName, Ident, LocalExpnId, Span, sym}; +use rustc_span::{ErrorGuaranteed, FileName, Ident, LocalExpnId, Span, Symbol, sym}; use smallvec::SmallVec; use crate::base::*; @@ -86,7 +86,7 @@ macro_rules! ast_fragments { } } - fn make_from<'a>(self, result: Box) -> Option { + fn make_from(self, result: Box) -> Option { match self { AstFragmentKind::OptExpr => result.make_expr().map(Some).map(AstFragment::OptExpr), @@ -136,7 +136,7 @@ macro_rules! ast_fragments { T::fragment_to_output(self) } - pub(crate) fn mut_visit_with(&mut self, vis: &mut F) { + pub(crate) fn mut_visit_with(&mut self, vis: &mut impl MutVisitor) { match self { AstFragment::OptExpr(opt_expr) => { if let Some(expr) = opt_expr.take() { @@ -316,9 +316,9 @@ impl AstFragmentKind { } } - pub(crate) fn expect_from_annotatables>( + pub(crate) fn expect_from_annotatables( self, - items: I, + items: impl IntoIterator, ) -> AstFragment { let mut items = items.into_iter(); match self { @@ -473,7 +473,7 @@ impl<'a, 'b> MacroExpander<'a, 'b> { let dir_path = file_path.parent().unwrap_or(&file_path).to_owned(); self.cx.root_path = dir_path.clone(); self.cx.current_expansion.module = Rc::new(ModuleData { - mod_path: vec![Ident::from_str(&self.cx.ecfg.crate_name)], + mod_path: vec![Ident::with_dummy_span(self.cx.ecfg.crate_name)], file_path_stack: vec![file_path], dir_path, }); @@ -689,7 +689,7 @@ impl<'a, 'b> MacroExpander<'a, 'b> { span: expn_data.call_site, descr: expn_data.kind.descr(), suggested_limit, - crate_name: &self.cx.ecfg.crate_name, + crate_name: self.cx.ecfg.crate_name, }); self.cx.trace_macros_diag(); @@ -1218,10 +1218,10 @@ trait InvocationCollectorNode: HasAttrs + HasNodeId + Sized { fn descr() -> &'static str { unreachable!() } - fn walk_flat_map(self, _visitor: &mut V) -> Self::OutputTy { + fn walk_flat_map(self, _collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { unreachable!() } - fn walk(&mut self, _visitor: &mut V) { + fn walk(&mut self, _collector: &mut InvocationCollector<'_, '_>) { unreachable!() } fn is_mac_call(&self) -> bool { @@ -1276,8 +1276,8 @@ impl InvocationCollectorNode for P { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_items() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_item(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_item(collector, self) } fn is_mac_call(&self) -> bool { matches!(self.kind, ItemKind::MacCall(..)) @@ -1431,8 +1431,8 @@ impl InvocationCollectorNode for AstNodeWrapper, TraitItemTag> fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_trait_items() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_assoc_item(visitor, self.wrapped, AssocCtxt::Trait) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_assoc_item(collector, self.wrapped, AssocCtxt::Trait) } fn is_mac_call(&self) -> bool { matches!(self.wrapped.kind, AssocItemKind::MacCall(..)) @@ -1472,8 +1472,8 @@ impl InvocationCollectorNode for AstNodeWrapper, ImplItemTag> fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_impl_items() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_assoc_item(visitor, self.wrapped, AssocCtxt::Impl { of_trait: false }) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_assoc_item(collector, self.wrapped, AssocCtxt::Impl { of_trait: false }) } fn is_mac_call(&self) -> bool { matches!(self.wrapped.kind, AssocItemKind::MacCall(..)) @@ -1513,8 +1513,8 @@ impl InvocationCollectorNode for AstNodeWrapper, TraitImplItem fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_trait_impl_items() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_assoc_item(visitor, self.wrapped, AssocCtxt::Impl { of_trait: true }) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_assoc_item(collector, self.wrapped, AssocCtxt::Impl { of_trait: true }) } fn is_mac_call(&self) -> bool { matches!(self.wrapped.kind, AssocItemKind::MacCall(..)) @@ -1551,8 +1551,8 @@ impl InvocationCollectorNode for P { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_foreign_items() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_foreign_item(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_foreign_item(collector, self) } fn is_mac_call(&self) -> bool { matches!(self.kind, ForeignItemKind::MacCall(..)) @@ -1573,8 +1573,8 @@ impl InvocationCollectorNode for ast::Variant { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_variants() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_variant(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_variant(collector, self) } } @@ -1586,8 +1586,8 @@ impl InvocationCollectorNode for ast::WherePredicate { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_where_predicates() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_where_predicate(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_where_predicate(collector, self) } } @@ -1599,8 +1599,8 @@ impl InvocationCollectorNode for ast::FieldDef { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_field_defs() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_field_def(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_field_def(collector, self) } } @@ -1612,8 +1612,8 @@ impl InvocationCollectorNode for ast::PatField { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_pat_fields() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_pat_field(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_pat_field(collector, self) } } @@ -1625,8 +1625,8 @@ impl InvocationCollectorNode for ast::ExprField { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_expr_fields() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_expr_field(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_expr_field(collector, self) } } @@ -1638,8 +1638,8 @@ impl InvocationCollectorNode for ast::Param { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_params() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_param(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_param(collector, self) } } @@ -1651,8 +1651,8 @@ impl InvocationCollectorNode for ast::GenericParam { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_generic_params() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_generic_param(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_generic_param(collector, self) } } @@ -1664,8 +1664,8 @@ impl InvocationCollectorNode for ast::Arm { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_arms() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_arm(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_arm(collector, self) } } @@ -1677,8 +1677,8 @@ impl InvocationCollectorNode for ast::Stmt { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_stmts() } - fn walk_flat_map(self, visitor: &mut V) -> Self::OutputTy { - walk_flat_map_stmt(visitor, self) + fn walk_flat_map(self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_flat_map_stmt(collector, self) } fn is_mac_call(&self) -> bool { match &self.kind { @@ -1751,8 +1751,8 @@ impl InvocationCollectorNode for ast::Crate { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_crate() } - fn walk(&mut self, visitor: &mut V) { - walk_crate(visitor, self) + fn walk(&mut self, collector: &mut InvocationCollector<'_, '_>) { + walk_crate(collector, self) } fn expand_cfg_false( &mut self, @@ -1777,8 +1777,18 @@ impl InvocationCollectorNode for ast::Ty { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_ty() } - fn walk(&mut self, visitor: &mut V) { - walk_ty(visitor, self) + fn walk(&mut self, collector: &mut InvocationCollector<'_, '_>) { + // Save the pre-expanded name of this `ImplTrait`, so that later when defining + // an APIT we use a name that doesn't have any placeholder fragments in it. + if let ast::TyKind::ImplTrait(..) = self.kind { + // HACK: pprust breaks strings with newlines when the type + // gets too long. We don't want these to show up in compiler + // output or built artifacts, so replace them here... + // Perhaps we should instead format APITs more robustly. + let name = Symbol::intern(&pprust::ty_to_string(self).replace('\n', " ")); + collector.cx.resolver.insert_impl_trait_name(self.id, name); + } + walk_ty(collector, self) } fn is_mac_call(&self) -> bool { matches!(self.kind, ast::TyKind::MacCall(..)) @@ -1800,8 +1810,8 @@ impl InvocationCollectorNode for ast::Pat { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_pat() } - fn walk(&mut self, visitor: &mut V) { - walk_pat(visitor, self) + fn walk(&mut self, collector: &mut InvocationCollector<'_, '_>) { + walk_pat(collector, self) } fn is_mac_call(&self) -> bool { matches!(self.kind, PatKind::MacCall(..)) @@ -1826,8 +1836,8 @@ impl InvocationCollectorNode for ast::Expr { fn descr() -> &'static str { "an expression" } - fn walk(&mut self, visitor: &mut V) { - walk_expr(visitor, self) + fn walk(&mut self, collector: &mut InvocationCollector<'_, '_>) { + walk_expr(collector, self) } fn is_mac_call(&self) -> bool { matches!(self.kind, ExprKind::MacCall(..)) @@ -1850,8 +1860,8 @@ impl InvocationCollectorNode for AstNodeWrapper, OptExprTag> { fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { fragment.make_opt_expr() } - fn walk_flat_map(mut self, visitor: &mut V) -> Self::OutputTy { - walk_expr(visitor, &mut self.wrapped); + fn walk_flat_map(mut self, collector: &mut InvocationCollector<'_, '_>) -> Self::OutputTy { + walk_expr(collector, &mut self.wrapped); Some(self.wrapped) } fn is_mac_call(&self) -> bool { @@ -1873,20 +1883,20 @@ impl InvocationCollectorNode for AstNodeWrapper, OptExprTag> { /// It can be removed once that feature is stabilized. struct MethodReceiverTag; -impl InvocationCollectorNode for AstNodeWrapper, MethodReceiverTag> { - type OutputTy = Self; +impl InvocationCollectorNode for AstNodeWrapper { + type OutputTy = AstNodeWrapper, MethodReceiverTag>; const KIND: AstFragmentKind = AstFragmentKind::MethodReceiverExpr; fn descr() -> &'static str { "an expression" } fn to_annotatable(self) -> Annotatable { - Annotatable::Expr(self.wrapped) + Annotatable::Expr(P(self.wrapped)) } fn fragment_to_output(fragment: AstFragment) -> Self::OutputTy { AstNodeWrapper::new(fragment.make_method_receiver_expr(), MethodReceiverTag) } - fn walk(&mut self, visitor: &mut V) { - walk_expr(visitor, &mut self.wrapped) + fn walk(&mut self, collector: &mut InvocationCollector<'_, '_>) { + walk_expr(collector, &mut self.wrapped) } fn is_mac_call(&self) -> bool { matches!(self.wrapped.kind, ast::ExprKind::MacCall(..)) @@ -1983,9 +1993,9 @@ impl DummyAstNode for ast::Expr { } } -impl DummyAstNode for AstNodeWrapper, MethodReceiverTag> { +impl DummyAstNode for AstNodeWrapper { fn dummy() -> Self { - AstNodeWrapper::new(P(ast::Expr::dummy()), MethodReceiverTag) + AstNodeWrapper::new(ast::Expr::dummy(), MethodReceiverTag) } } @@ -2431,7 +2441,7 @@ impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> { self.visit_node(node) } - fn visit_method_receiver_expr(&mut self, node: &mut P) { + fn visit_method_receiver_expr(&mut self, node: &mut ast::Expr) { self.visit_node(AstNodeWrapper::from_mut(node, MethodReceiverTag)) } @@ -2458,7 +2468,7 @@ impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> { } pub struct ExpansionConfig<'feat> { - pub crate_name: String, + pub crate_name: Symbol, pub features: &'feat Features, pub recursion_limit: Limit, pub trace_mac: bool, @@ -2471,7 +2481,7 @@ pub struct ExpansionConfig<'feat> { } impl ExpansionConfig<'_> { - pub fn default(crate_name: String, features: &Features) -> ExpansionConfig<'_> { + pub fn default(crate_name: Symbol, features: &Features) -> ExpansionConfig<'_> { ExpansionConfig { crate_name, features, diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index c9168ed4ff25..a8c4a9e4b1b4 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -9,7 +9,7 @@ use rustc_data_structures::fx::FxHashMap; use rustc_errors::{Diag, DiagCtxtHandle, PResult, pluralize}; use rustc_parse::lexer::nfc_normalize; use rustc_parse::parser::ParseNtResult; -use rustc_session::parse::{ParseSess, SymbolGallery}; +use rustc_session::parse::ParseSess; use rustc_span::hygiene::{LocalExpnId, Transparency}; use rustc_span::{ Ident, MacroRulesNormalizedIdent, Span, Symbol, SyntaxContext, sym, with_metavar_spans, @@ -25,20 +25,77 @@ use crate::mbe::macro_parser::NamedMatch::*; use crate::mbe::metavar_expr::{MetaVarExprConcatElem, RAW_IDENT_ERR}; use crate::mbe::{self, KleeneOp, MetaVarExpr}; -// A Marker adds the given mark to the syntax context. -struct Marker(LocalExpnId, Transparency, FxHashMap); +/// Context needed to perform transcription of metavariable expressions. +struct TranscrCtx<'psess, 'itp> { + psess: &'psess ParseSess, + + /// Map from metavars to matched tokens + interp: &'itp FxHashMap, + + /// Allow marking spans. + marker: Marker, + + /// The stack of things yet to be completely expanded. + /// + /// We descend into the RHS (`src`), expanding things as we go. This stack contains the things + /// we have yet to expand/are still expanding. We start the stack off with the whole RHS. The + /// choice of spacing values doesn't matter. + stack: SmallVec<[Frame<'itp>; 1]>, + + /// A stack of where we are in the repeat expansion. + /// + /// As we descend in the RHS, we will need to be able to match nested sequences of matchers. + /// `repeats` keeps track of where we are in matching at each level, with the last element + /// being the most deeply nested sequence. This is used as a stack. + repeats: Vec<(usize, usize)>, + + /// The resulting token stream from the `TokenTree` we just finished processing. + /// + /// At the end, this will contain the full result of transcription, but at arbitrary points + /// during `transcribe`, `result` will contain subsets of the final result. + /// + /// Specifically, as we descend into each TokenTree, we will push the existing results onto the + /// `result_stack` and clear `results`. We will then produce the results of transcribing the + /// TokenTree into `results`. Then, as we unwind back out of the `TokenTree`, we will pop the + /// `result_stack` and append `results` too it to produce the new `results` up to that point. + /// + /// Thus, if we try to pop the `result_stack` and it is empty, we have reached the top-level + /// again, and we are done transcribing. + result: Vec, + + /// The in-progress `result` lives at the top of this stack. Each entered `TokenTree` adds a + /// new entry. + result_stack: Vec>, +} + +impl<'psess> TranscrCtx<'psess, '_> { + /// Span marked with the correct expansion and transparency. + fn visited_dspan(&mut self, dspan: DelimSpan) -> Span { + let mut span = dspan.entire(); + self.marker.mark_span(&mut span); + span + } +} + +/// A Marker adds the given mark to the syntax context. +struct Marker { + expand_id: LocalExpnId, + transparency: Transparency, + cache: FxHashMap, +} impl Marker { + /// Mark a span with the stored expansion ID and transparency. fn mark_span(&mut self, span: &mut Span) { // `apply_mark` is a relatively expensive operation, both due to taking hygiene lock, and // by itself. All tokens in a macro body typically have the same syntactic context, unless // it's some advanced case with macro-generated macros. So if we cache the marked version // of that context once, we'll typically have a 100% cache hit rate after that. - let Marker(expn_id, transparency, ref mut cache) = *self; *span = span.map_ctxt(|ctxt| { - *cache + *self + .cache .entry(ctxt) - .or_insert_with(|| ctxt.apply_mark(expn_id.to_expn_id(), transparency)) + .or_insert_with(|| ctxt.apply_mark(self.expand_id.to_expn_id(), self.transparency)) }); } } @@ -116,52 +173,36 @@ pub(super) fn transcribe<'a>( return Ok(TokenStream::default()); } - // We descend into the RHS (`src`), expanding things as we go. This stack contains the things - // we have yet to expand/are still expanding. We start the stack off with the whole RHS. The - // choice of spacing values doesn't matter. - let mut stack: SmallVec<[Frame<'_>; 1]> = smallvec![Frame::new_delimited( - src, - src_span, - DelimSpacing::new(Spacing::Alone, Spacing::Alone) - )]; + let mut tscx = TranscrCtx { + psess, + interp, + marker: Marker { expand_id, transparency, cache: Default::default() }, + repeats: Vec::new(), + stack: smallvec![Frame::new_delimited( + src, + src_span, + DelimSpacing::new(Spacing::Alone, Spacing::Alone) + )], + result: Vec::new(), + result_stack: Vec::new(), + }; - // As we descend in the RHS, we will need to be able to match nested sequences of matchers. - // `repeats` keeps track of where we are in matching at each level, with the last element being - // the most deeply nested sequence. This is used as a stack. - let mut repeats: Vec<(usize, usize)> = Vec::new(); - - // `result` contains resulting token stream from the TokenTree we just finished processing. At - // the end, this will contain the full result of transcription, but at arbitrary points during - // `transcribe`, `result` will contain subsets of the final result. - // - // Specifically, as we descend into each TokenTree, we will push the existing results onto the - // `result_stack` and clear `results`. We will then produce the results of transcribing the - // TokenTree into `results`. Then, as we unwind back out of the `TokenTree`, we will pop the - // `result_stack` and append `results` too it to produce the new `results` up to that point. - // - // Thus, if we try to pop the `result_stack` and it is empty, we have reached the top-level - // again, and we are done transcribing. - let mut result: Vec = Vec::new(); - let mut result_stack = Vec::new(); - let mut marker = Marker(expand_id, transparency, Default::default()); - - let dcx = psess.dcx(); loop { // Look at the last frame on the stack. // If it still has a TokenTree we have not looked at yet, use that tree. - let Some(tree) = stack.last_mut().unwrap().next() else { + let Some(tree) = tscx.stack.last_mut().unwrap().next() else { // This else-case never produces a value for `tree` (it `continue`s or `return`s). // Otherwise, if we have just reached the end of a sequence and we can keep repeating, // go back to the beginning of the sequence. - let frame = stack.last_mut().unwrap(); + let frame = tscx.stack.last_mut().unwrap(); if let FrameKind::Sequence { sep, .. } = &frame.kind { - let (repeat_idx, repeat_len) = repeats.last_mut().unwrap(); + let (repeat_idx, repeat_len) = tscx.repeats.last_mut().unwrap(); *repeat_idx += 1; if repeat_idx < repeat_len { frame.idx = 0; if let Some(sep) = sep { - result.push(TokenTree::Token(*sep, Spacing::Alone)); + tscx.result.push(TokenTree::Token(*sep, Spacing::Alone)); } continue; } @@ -170,10 +211,10 @@ pub(super) fn transcribe<'a>( // We are done with the top of the stack. Pop it. Depending on what it was, we do // different things. Note that the outermost item must be the delimited, wrapped RHS // that was passed in originally to `transcribe`. - match stack.pop().unwrap().kind { + match tscx.stack.pop().unwrap().kind { // Done with a sequence. Pop from repeats. FrameKind::Sequence { .. } => { - repeats.pop(); + tscx.repeats.pop(); } // We are done processing a Delimited. If this is the top-level delimited, we are @@ -185,15 +226,16 @@ pub(super) fn transcribe<'a>( if delim == Delimiter::Bracket { spacing.close = Spacing::Alone; } - if result_stack.is_empty() { + if tscx.result_stack.is_empty() { // No results left to compute! We are back at the top-level. - return Ok(TokenStream::new(result)); + return Ok(TokenStream::new(tscx.result)); } // Step back into the parent Delimited. - let tree = TokenTree::Delimited(span, spacing, delim, TokenStream::new(result)); - result = result_stack.pop().unwrap(); - result.push(tree); + let tree = + TokenTree::Delimited(span, spacing, delim, TokenStream::new(tscx.result)); + tscx.result = tscx.result_stack.pop().unwrap(); + tscx.result.push(tree); } } continue; @@ -202,223 +244,19 @@ pub(super) fn transcribe<'a>( // At this point, we know we are in the middle of a TokenTree (the last one on `stack`). // `tree` contains the next `TokenTree` to be processed. match tree { - // We are descending into a sequence. We first make sure that the matchers in the RHS - // and the matches in `interp` have the same shape. Otherwise, either the caller or the - // macro writer has made a mistake. + // Replace the sequence with its expansion. seq @ mbe::TokenTree::Sequence(_, seq_rep) => { - match lockstep_iter_size(seq, interp, &repeats) { - LockstepIterSize::Unconstrained => { - return Err(dcx.create_err(NoSyntaxVarsExprRepeat { span: seq.span() })); - } - - LockstepIterSize::Contradiction(msg) => { - // FIXME: this really ought to be caught at macro definition time... It - // happens when two meta-variables are used in the same repetition in a - // sequence, but they come from different sequence matchers and repeat - // different amounts. - return Err( - dcx.create_err(MetaVarsDifSeqMatchers { span: seq.span(), msg }) - ); - } - - LockstepIterSize::Constraint(len, _) => { - // We do this to avoid an extra clone above. We know that this is a - // sequence already. - let mbe::TokenTree::Sequence(sp, seq) = seq else { unreachable!() }; - - // Is the repetition empty? - if len == 0 { - if seq.kleene.op == KleeneOp::OneOrMore { - // FIXME: this really ought to be caught at macro definition - // time... It happens when the Kleene operator in the matcher and - // the body for the same meta-variable do not match. - return Err(dcx.create_err(MustRepeatOnce { span: sp.entire() })); - } - } else { - // 0 is the initial counter (we have done 0 repetitions so far). `len` - // is the total number of repetitions we should generate. - repeats.push((0, len)); - - // The first time we encounter the sequence we push it to the stack. It - // then gets reused (see the beginning of the loop) until we are done - // repeating. - stack.push(Frame::new_sequence( - seq_rep, - seq.separator.clone(), - seq.kleene.op, - )); - } - } - } + transcribe_sequence(&mut tscx, seq, seq_rep)?; } // Replace the meta-var with the matched token tree from the invocation. - &mbe::TokenTree::MetaVar(mut sp, mut original_ident) => { - // Find the matched nonterminal from the macro invocation, and use it to replace - // the meta-var. - // - // We use `Spacing::Alone` everywhere here, because that's the conservative choice - // and spacing of declarative macros is tricky. E.g. in this macro: - // ``` - // macro_rules! idents { - // ($($a:ident,)*) => { stringify!($($a)*) } - // } - // ``` - // `$a` has no whitespace after it and will be marked `JointHidden`. If you then - // call `idents!(x,y,z,)`, each of `x`, `y`, and `z` will be marked as `Joint`. So - // if you choose to use `$x`'s spacing or the identifier's spacing, you'll end up - // producing "xyz", which is bad because it effectively merges tokens. - // `Spacing::Alone` is the safer option. Fortunately, `space_between` will avoid - // some of the unnecessary whitespace. - let ident = MacroRulesNormalizedIdent::new(original_ident); - if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { - // We wrap the tokens in invisible delimiters, unless they are already wrapped - // in invisible delimiters with the same `MetaVarKind`. Because some proc - // macros can't handle multiple layers of invisible delimiters of the same - // `MetaVarKind`. This loses some span info, though it hopefully won't matter. - let mut mk_delimited = |mk_span, mv_kind, mut stream: TokenStream| { - if stream.len() == 1 { - let tree = stream.iter().next().unwrap(); - if let TokenTree::Delimited(_, _, delim, inner) = tree - && let Delimiter::Invisible(InvisibleOrigin::MetaVar(mvk)) = delim - && mv_kind == *mvk - { - stream = inner.clone(); - } - } - - // Emit as a token stream within `Delimiter::Invisible` to maintain - // parsing priorities. - marker.mark_span(&mut sp); - with_metavar_spans(|mspans| mspans.insert(mk_span, sp)); - // Both the open delim and close delim get the same span, which covers the - // `$foo` in the decl macro RHS. - TokenTree::Delimited( - DelimSpan::from_single(sp), - DelimSpacing::new(Spacing::Alone, Spacing::Alone), - Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind)), - stream, - ) - }; - let tt = match cur_matched { - MatchedSingle(ParseNtResult::Tt(tt)) => { - // `tt`s are emitted into the output stream directly as "raw tokens", - // without wrapping them into groups. Other variables are emitted into - // the output stream as groups with `Delimiter::Invisible` to maintain - // parsing priorities. - maybe_use_metavar_location(psess, &stack, sp, tt, &mut marker) - } - MatchedSingle(ParseNtResult::Ident(ident, is_raw)) => { - marker.mark_span(&mut sp); - with_metavar_spans(|mspans| mspans.insert(ident.span, sp)); - let kind = token::NtIdent(*ident, *is_raw); - TokenTree::token_alone(kind, sp) - } - MatchedSingle(ParseNtResult::Lifetime(ident, is_raw)) => { - marker.mark_span(&mut sp); - with_metavar_spans(|mspans| mspans.insert(ident.span, sp)); - let kind = token::NtLifetime(*ident, *is_raw); - TokenTree::token_alone(kind, sp) - } - MatchedSingle(ParseNtResult::Item(item)) => { - mk_delimited(item.span, MetaVarKind::Item, TokenStream::from_ast(item)) - } - MatchedSingle(ParseNtResult::Block(block)) => mk_delimited( - block.span, - MetaVarKind::Block, - TokenStream::from_ast(block), - ), - MatchedSingle(ParseNtResult::Stmt(stmt)) => { - let stream = if let StmtKind::Empty = stmt.kind { - // FIXME: Properly collect tokens for empty statements. - TokenStream::token_alone(token::Semi, stmt.span) - } else { - TokenStream::from_ast(stmt) - }; - mk_delimited(stmt.span, MetaVarKind::Stmt, stream) - } - MatchedSingle(ParseNtResult::Pat(pat, pat_kind)) => mk_delimited( - pat.span, - MetaVarKind::Pat(*pat_kind), - TokenStream::from_ast(pat), - ), - MatchedSingle(ParseNtResult::Expr(expr, kind)) => { - let (can_begin_literal_maybe_minus, can_begin_string_literal) = - match &expr.kind { - ExprKind::Lit(_) => (true, true), - ExprKind::Unary(UnOp::Neg, e) - if matches!(&e.kind, ExprKind::Lit(_)) => - { - (true, false) - } - _ => (false, false), - }; - mk_delimited( - expr.span, - MetaVarKind::Expr { - kind: *kind, - can_begin_literal_maybe_minus, - can_begin_string_literal, - }, - TokenStream::from_ast(expr), - ) - } - MatchedSingle(ParseNtResult::Literal(lit)) => { - mk_delimited(lit.span, MetaVarKind::Literal, TokenStream::from_ast(lit)) - } - MatchedSingle(ParseNtResult::Ty(ty)) => { - let is_path = matches!(&ty.kind, TyKind::Path(None, _path)); - mk_delimited( - ty.span, - MetaVarKind::Ty { is_path }, - TokenStream::from_ast(ty), - ) - } - MatchedSingle(ParseNtResult::Meta(attr_item)) => { - let has_meta_form = attr_item.meta_kind().is_some(); - mk_delimited( - attr_item.span(), - MetaVarKind::Meta { has_meta_form }, - TokenStream::from_ast(attr_item), - ) - } - MatchedSingle(ParseNtResult::Path(path)) => { - mk_delimited(path.span, MetaVarKind::Path, TokenStream::from_ast(path)) - } - MatchedSingle(ParseNtResult::Vis(vis)) => { - mk_delimited(vis.span, MetaVarKind::Vis, TokenStream::from_ast(vis)) - } - MatchedSeq(..) => { - // We were unable to descend far enough. This is an error. - return Err(dcx.create_err(VarStillRepeating { span: sp, ident })); - } - }; - result.push(tt) - } else { - // If we aren't able to match the meta-var, we push it back into the result but - // with modified syntax context. (I believe this supports nested macros). - marker.mark_span(&mut sp); - marker.mark_span(&mut original_ident.span); - result.push(TokenTree::token_joint_hidden(token::Dollar, sp)); - result.push(TokenTree::Token( - Token::from_ast_ident(original_ident), - Spacing::Alone, - )); - } + &mbe::TokenTree::MetaVar(sp, original_ident) => { + transcribe_metavar(&mut tscx, sp, original_ident)?; } // Replace meta-variable expressions with the result of their expansion. - mbe::TokenTree::MetaVarExpr(sp, expr) => { - transcribe_metavar_expr( - dcx, - expr, - interp, - &mut marker, - &repeats, - &mut result, - sp, - &psess.symbol_gallery, - )?; + mbe::TokenTree::MetaVarExpr(dspan, expr) => { + transcribe_metavar_expr(&mut tscx, *dspan, expr)?; } // If we are entering a new delimiter, we push its contents to the `stack` to be @@ -427,21 +265,21 @@ pub(super) fn transcribe<'a>( // jump back out of the Delimited, pop the result_stack and add the new results back to // the previous results (from outside the Delimited). &mbe::TokenTree::Delimited(mut span, ref spacing, ref delimited) => { - marker.mark_span(&mut span.open); - marker.mark_span(&mut span.close); - stack.push(Frame::new_delimited(delimited, span, *spacing)); - result_stack.push(mem::take(&mut result)); + tscx.marker.mark_span(&mut span.open); + tscx.marker.mark_span(&mut span.close); + tscx.stack.push(Frame::new_delimited(delimited, span, *spacing)); + tscx.result_stack.push(mem::take(&mut tscx.result)); } // Nothing much to do here. Just push the token to the result, being careful to // preserve syntax context. &mbe::TokenTree::Token(mut token) => { - marker.mark_span(&mut token.span); + tscx.marker.mark_span(&mut token.span); if let token::NtIdent(ident, _) | token::NtLifetime(ident, _) = &mut token.kind { - marker.mark_span(&mut ident.span); + tscx.marker.mark_span(&mut ident.span); } let tt = TokenTree::Token(token, Spacing::Alone); - result.push(tt); + tscx.result.push(tt); } // There should be no meta-var declarations in the invocation of a macro. @@ -450,6 +288,305 @@ pub(super) fn transcribe<'a>( } } +/// Turn `$(...)*` sequences into tokens. +fn transcribe_sequence<'tx, 'itp>( + tscx: &mut TranscrCtx<'tx, 'itp>, + seq: &mbe::TokenTree, + seq_rep: &'itp mbe::SequenceRepetition, +) -> PResult<'tx, ()> { + let dcx = tscx.psess.dcx(); + + // We are descending into a sequence. We first make sure that the matchers in the RHS + // and the matches in `interp` have the same shape. Otherwise, either the caller or the + // macro writer has made a mistake. + match lockstep_iter_size(seq, tscx.interp, &tscx.repeats) { + LockstepIterSize::Unconstrained => { + return Err(dcx.create_err(NoSyntaxVarsExprRepeat { span: seq.span() })); + } + + LockstepIterSize::Contradiction(msg) => { + // FIXME: this really ought to be caught at macro definition time... It + // happens when two meta-variables are used in the same repetition in a + // sequence, but they come from different sequence matchers and repeat + // different amounts. + return Err(dcx.create_err(MetaVarsDifSeqMatchers { span: seq.span(), msg })); + } + + LockstepIterSize::Constraint(len, _) => { + // We do this to avoid an extra clone above. We know that this is a + // sequence already. + let mbe::TokenTree::Sequence(sp, seq) = seq else { unreachable!() }; + + // Is the repetition empty? + if len == 0 { + if seq.kleene.op == KleeneOp::OneOrMore { + // FIXME: this really ought to be caught at macro definition + // time... It happens when the Kleene operator in the matcher and + // the body for the same meta-variable do not match. + return Err(dcx.create_err(MustRepeatOnce { span: sp.entire() })); + } + } else { + // 0 is the initial counter (we have done 0 repetitions so far). `len` + // is the total number of repetitions we should generate. + tscx.repeats.push((0, len)); + + // The first time we encounter the sequence we push it to the stack. It + // then gets reused (see the beginning of the loop) until we are done + // repeating. + tscx.stack.push(Frame::new_sequence(seq_rep, seq.separator.clone(), seq.kleene.op)); + } + } + } + + Ok(()) +} + +/// Find the matched nonterminal from the macro invocation, and use it to replace +/// the meta-var. +/// +/// We use `Spacing::Alone` everywhere here, because that's the conservative choice +/// and spacing of declarative macros is tricky. E.g. in this macro: +/// ``` +/// macro_rules! idents { +/// ($($a:ident,)*) => { stringify!($($a)*) } +/// } +/// ``` +/// `$a` has no whitespace after it and will be marked `JointHidden`. If you then +/// call `idents!(x,y,z,)`, each of `x`, `y`, and `z` will be marked as `Joint`. So +/// if you choose to use `$x`'s spacing or the identifier's spacing, you'll end up +/// producing "xyz", which is bad because it effectively merges tokens. +/// `Spacing::Alone` is the safer option. Fortunately, `space_between` will avoid +/// some of the unnecessary whitespace. +fn transcribe_metavar<'tx>( + tscx: &mut TranscrCtx<'tx, '_>, + mut sp: Span, + mut original_ident: Ident, +) -> PResult<'tx, ()> { + let dcx = tscx.psess.dcx(); + + let ident = MacroRulesNormalizedIdent::new(original_ident); + let Some(cur_matched) = lookup_cur_matched(ident, tscx.interp, &tscx.repeats) else { + // If we aren't able to match the meta-var, we push it back into the result but + // with modified syntax context. (I believe this supports nested macros). + tscx.marker.mark_span(&mut sp); + tscx.marker.mark_span(&mut original_ident.span); + tscx.result.push(TokenTree::token_joint_hidden(token::Dollar, sp)); + tscx.result.push(TokenTree::Token(Token::from_ast_ident(original_ident), Spacing::Alone)); + return Ok(()); + }; + + // We wrap the tokens in invisible delimiters, unless they are already wrapped + // in invisible delimiters with the same `MetaVarKind`. Because some proc + // macros can't handle multiple layers of invisible delimiters of the same + // `MetaVarKind`. This loses some span info, though it hopefully won't matter. + let mut mk_delimited = |mk_span, mv_kind, mut stream: TokenStream| { + if stream.len() == 1 { + let tree = stream.iter().next().unwrap(); + if let TokenTree::Delimited(_, _, delim, inner) = tree + && let Delimiter::Invisible(InvisibleOrigin::MetaVar(mvk)) = delim + && mv_kind == *mvk + { + stream = inner.clone(); + } + } + + // Emit as a token stream within `Delimiter::Invisible` to maintain + // parsing priorities. + tscx.marker.mark_span(&mut sp); + with_metavar_spans(|mspans| mspans.insert(mk_span, sp)); + // Both the open delim and close delim get the same span, which covers the + // `$foo` in the decl macro RHS. + TokenTree::Delimited( + DelimSpan::from_single(sp), + DelimSpacing::new(Spacing::Alone, Spacing::Alone), + Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind)), + stream, + ) + }; + + let tt = match cur_matched { + MatchedSingle(ParseNtResult::Tt(tt)) => { + // `tt`s are emitted into the output stream directly as "raw tokens", + // without wrapping them into groups. Other variables are emitted into + // the output stream as groups with `Delimiter::Invisible` to maintain + // parsing priorities. + maybe_use_metavar_location(tscx.psess, &tscx.stack, sp, tt, &mut tscx.marker) + } + MatchedSingle(ParseNtResult::Ident(ident, is_raw)) => { + tscx.marker.mark_span(&mut sp); + with_metavar_spans(|mspans| mspans.insert(ident.span, sp)); + let kind = token::NtIdent(*ident, *is_raw); + TokenTree::token_alone(kind, sp) + } + MatchedSingle(ParseNtResult::Lifetime(ident, is_raw)) => { + tscx.marker.mark_span(&mut sp); + with_metavar_spans(|mspans| mspans.insert(ident.span, sp)); + let kind = token::NtLifetime(*ident, *is_raw); + TokenTree::token_alone(kind, sp) + } + MatchedSingle(ParseNtResult::Item(item)) => { + mk_delimited(item.span, MetaVarKind::Item, TokenStream::from_ast(item)) + } + MatchedSingle(ParseNtResult::Block(block)) => { + mk_delimited(block.span, MetaVarKind::Block, TokenStream::from_ast(block)) + } + MatchedSingle(ParseNtResult::Stmt(stmt)) => { + let stream = if let StmtKind::Empty = stmt.kind { + // FIXME: Properly collect tokens for empty statements. + TokenStream::token_alone(token::Semi, stmt.span) + } else { + TokenStream::from_ast(stmt) + }; + mk_delimited(stmt.span, MetaVarKind::Stmt, stream) + } + MatchedSingle(ParseNtResult::Pat(pat, pat_kind)) => { + mk_delimited(pat.span, MetaVarKind::Pat(*pat_kind), TokenStream::from_ast(pat)) + } + MatchedSingle(ParseNtResult::Expr(expr, kind)) => { + let (can_begin_literal_maybe_minus, can_begin_string_literal) = match &expr.kind { + ExprKind::Lit(_) => (true, true), + ExprKind::Unary(UnOp::Neg, e) if matches!(&e.kind, ExprKind::Lit(_)) => { + (true, false) + } + _ => (false, false), + }; + mk_delimited( + expr.span, + MetaVarKind::Expr { + kind: *kind, + can_begin_literal_maybe_minus, + can_begin_string_literal, + }, + TokenStream::from_ast(expr), + ) + } + MatchedSingle(ParseNtResult::Literal(lit)) => { + mk_delimited(lit.span, MetaVarKind::Literal, TokenStream::from_ast(lit)) + } + MatchedSingle(ParseNtResult::Ty(ty)) => { + let is_path = matches!(&ty.kind, TyKind::Path(None, _path)); + mk_delimited(ty.span, MetaVarKind::Ty { is_path }, TokenStream::from_ast(ty)) + } + MatchedSingle(ParseNtResult::Meta(attr_item)) => { + let has_meta_form = attr_item.meta_kind().is_some(); + mk_delimited( + attr_item.span(), + MetaVarKind::Meta { has_meta_form }, + TokenStream::from_ast(attr_item), + ) + } + MatchedSingle(ParseNtResult::Path(path)) => { + mk_delimited(path.span, MetaVarKind::Path, TokenStream::from_ast(path)) + } + MatchedSingle(ParseNtResult::Vis(vis)) => { + mk_delimited(vis.span, MetaVarKind::Vis, TokenStream::from_ast(vis)) + } + MatchedSeq(..) => { + // We were unable to descend far enough. This is an error. + return Err(dcx.create_err(VarStillRepeating { span: sp, ident })); + } + }; + + tscx.result.push(tt); + Ok(()) +} + +/// Turn `${expr(...)}` metavariable expressionss into tokens. +fn transcribe_metavar_expr<'tx>( + tscx: &mut TranscrCtx<'tx, '_>, + dspan: DelimSpan, + expr: &MetaVarExpr, +) -> PResult<'tx, ()> { + let dcx = tscx.psess.dcx(); + let tt = match *expr { + MetaVarExpr::Concat(ref elements) => metavar_expr_concat(tscx, dspan, elements)?, + MetaVarExpr::Count(original_ident, depth) => { + let matched = matched_from_ident(dcx, original_ident, tscx.interp)?; + let count = count_repetitions(dcx, depth, matched, &tscx.repeats, &dspan)?; + TokenTree::token_alone( + TokenKind::lit(token::Integer, sym::integer(count), None), + tscx.visited_dspan(dspan), + ) + } + MetaVarExpr::Ignore(original_ident) => { + // Used to ensure that `original_ident` is present in the LHS + let _ = matched_from_ident(dcx, original_ident, tscx.interp)?; + return Ok(()); + } + MetaVarExpr::Index(depth) => match tscx.repeats.iter().nth_back(depth) { + Some((index, _)) => TokenTree::token_alone( + TokenKind::lit(token::Integer, sym::integer(*index), None), + tscx.visited_dspan(dspan), + ), + None => { + return Err(out_of_bounds_err(dcx, tscx.repeats.len(), dspan.entire(), "index")); + } + }, + MetaVarExpr::Len(depth) => match tscx.repeats.iter().nth_back(depth) { + Some((_, length)) => TokenTree::token_alone( + TokenKind::lit(token::Integer, sym::integer(*length), None), + tscx.visited_dspan(dspan), + ), + None => { + return Err(out_of_bounds_err(dcx, tscx.repeats.len(), dspan.entire(), "len")); + } + }, + }; + tscx.result.push(tt); + Ok(()) +} + +/// Handle the `${concat(...)}` metavariable expression. +fn metavar_expr_concat<'tx>( + tscx: &mut TranscrCtx<'tx, '_>, + dspan: DelimSpan, + elements: &[MetaVarExprConcatElem], +) -> PResult<'tx, TokenTree> { + let dcx = tscx.psess.dcx(); + let mut concatenated = String::new(); + for element in elements.into_iter() { + let symbol = match element { + MetaVarExprConcatElem::Ident(elem) => elem.name, + MetaVarExprConcatElem::Literal(elem) => *elem, + MetaVarExprConcatElem::Var(ident) => { + match matched_from_ident(dcx, *ident, tscx.interp)? { + NamedMatch::MatchedSeq(named_matches) => { + let Some((curr_idx, _)) = tscx.repeats.last() else { + return Err(dcx.struct_span_err(dspan.entire(), "invalid syntax")); + }; + match &named_matches[*curr_idx] { + // FIXME(c410-f3r) Nested repetitions are unimplemented + MatchedSeq(_) => unimplemented!(), + MatchedSingle(pnr) => extract_symbol_from_pnr(dcx, pnr, ident.span)?, + } + } + NamedMatch::MatchedSingle(pnr) => { + extract_symbol_from_pnr(dcx, pnr, ident.span)? + } + } + } + }; + concatenated.push_str(symbol.as_str()); + } + let symbol = nfc_normalize(&concatenated); + let concatenated_span = tscx.visited_dspan(dspan); + if !rustc_lexer::is_ident(symbol.as_str()) { + return Err(dcx.struct_span_err( + concatenated_span, + "`${concat(..)}` is not generating a valid identifier", + )); + } + tscx.psess.symbol_gallery.insert(symbol, concatenated_span); + + // The current implementation marks the span as coming from the macro regardless of + // contexts of the concatenated identifiers but this behavior may change in the + // future. + Ok(TokenTree::Token( + Token::from_ast_ident(Ident::new(symbol, concatenated_span)), + Spacing::Alone, + )) +} + /// Store the metavariable span for this original span into a side table. /// FIXME: Try to put the metavariable span into `SpanData` instead of a side table (#118517). /// An optimal encoding for inlined spans will need to be selected to minimize regressions. @@ -671,13 +808,13 @@ fn lockstep_iter_size( /// * `[ $( ${count(foo, 0)} ),* ]` will be the same as `[ $( ${count(foo)} ),* ]` /// * `[ $( ${count(foo, 1)} ),* ]` will return an error because `${count(foo, 1)}` is /// declared inside a single repetition and the index `1` implies two nested repetitions. -fn count_repetitions<'a>( - dcx: DiagCtxtHandle<'a>, +fn count_repetitions<'dx>( + dcx: DiagCtxtHandle<'dx>, depth_user: usize, mut matched: &NamedMatch, repeats: &[(usize, usize)], sp: &DelimSpan, -) -> PResult<'a, usize> { +) -> PResult<'dx, usize> { // Recursively count the number of matches in `matched` at given depth // (or at the top-level of `matched` if no depth is given). fn count<'a>(depth_curr: usize, depth_max: usize, matched: &NamedMatch) -> PResult<'a, usize> { @@ -762,102 +899,6 @@ fn out_of_bounds_err<'a>(dcx: DiagCtxtHandle<'a>, max: usize, span: Span, ty: &s dcx.struct_span_err(span, msg) } -fn transcribe_metavar_expr<'a>( - dcx: DiagCtxtHandle<'a>, - expr: &MetaVarExpr, - interp: &FxHashMap, - marker: &mut Marker, - repeats: &[(usize, usize)], - result: &mut Vec, - sp: &DelimSpan, - symbol_gallery: &SymbolGallery, -) -> PResult<'a, ()> { - let mut visited_span = || { - let mut span = sp.entire(); - marker.mark_span(&mut span); - span - }; - match *expr { - MetaVarExpr::Concat(ref elements) => { - let mut concatenated = String::new(); - for element in elements.into_iter() { - let symbol = match element { - MetaVarExprConcatElem::Ident(elem) => elem.name, - MetaVarExprConcatElem::Literal(elem) => *elem, - MetaVarExprConcatElem::Var(ident) => { - match matched_from_ident(dcx, *ident, interp)? { - NamedMatch::MatchedSeq(named_matches) => { - let Some((curr_idx, _)) = repeats.last() else { - return Err(dcx.struct_span_err(sp.entire(), "invalid syntax")); - }; - match &named_matches[*curr_idx] { - // FIXME(c410-f3r) Nested repetitions are unimplemented - MatchedSeq(_) => unimplemented!(), - MatchedSingle(pnr) => { - extract_symbol_from_pnr(dcx, pnr, ident.span)? - } - } - } - NamedMatch::MatchedSingle(pnr) => { - extract_symbol_from_pnr(dcx, pnr, ident.span)? - } - } - } - }; - concatenated.push_str(symbol.as_str()); - } - let symbol = nfc_normalize(&concatenated); - let concatenated_span = visited_span(); - if !rustc_lexer::is_ident(symbol.as_str()) { - return Err(dcx.struct_span_err( - concatenated_span, - "`${concat(..)}` is not generating a valid identifier", - )); - } - symbol_gallery.insert(symbol, concatenated_span); - // The current implementation marks the span as coming from the macro regardless of - // contexts of the concatenated identifiers but this behavior may change in the - // future. - result.push(TokenTree::Token( - Token::from_ast_ident(Ident::new(symbol, concatenated_span)), - Spacing::Alone, - )); - } - MetaVarExpr::Count(original_ident, depth) => { - let matched = matched_from_ident(dcx, original_ident, interp)?; - let count = count_repetitions(dcx, depth, matched, repeats, sp)?; - let tt = TokenTree::token_alone( - TokenKind::lit(token::Integer, sym::integer(count), None), - visited_span(), - ); - result.push(tt); - } - MetaVarExpr::Ignore(original_ident) => { - // Used to ensure that `original_ident` is present in the LHS - let _ = matched_from_ident(dcx, original_ident, interp)?; - } - MetaVarExpr::Index(depth) => match repeats.iter().nth_back(depth) { - Some((index, _)) => { - result.push(TokenTree::token_alone( - TokenKind::lit(token::Integer, sym::integer(*index), None), - visited_span(), - )); - } - None => return Err(out_of_bounds_err(dcx, repeats.len(), sp.entire(), "index")), - }, - MetaVarExpr::Len(depth) => match repeats.iter().nth_back(depth) { - Some((_, length)) => { - result.push(TokenTree::token_alone( - TokenKind::lit(token::Integer, sym::integer(*length), None), - visited_span(), - )); - } - None => return Err(out_of_bounds_err(dcx, repeats.len(), sp.entire(), "len")), - }, - } - Ok(()) -} - /// Extracts an metavariable symbol that can be an identifier, a token tree or a literal. fn extract_symbol_from_pnr<'a>( dcx: DiagCtxtHandle<'a>, diff --git a/compiler/rustc_expand/src/placeholders.rs b/compiler/rustc_expand/src/placeholders.rs index 2c486a02bdf1..6e1c6df4bcb4 100644 --- a/compiler/rustc_expand/src/placeholders.rs +++ b/compiler/rustc_expand/src/placeholders.rs @@ -339,9 +339,9 @@ impl MutVisitor for PlaceholderExpander { } } - fn visit_method_receiver_expr(&mut self, expr: &mut P) { + fn visit_method_receiver_expr(&mut self, expr: &mut ast::Expr) { match expr.kind { - ast::ExprKind::MacCall(_) => *expr = self.remove(expr.id).make_method_receiver_expr(), + ast::ExprKind::MacCall(_) => *expr = *self.remove(expr.id).make_method_receiver_expr(), _ => walk_expr(self, expr), } } diff --git a/compiler/rustc_expand/src/stats.rs b/compiler/rustc_expand/src/stats.rs index 6b2ad30dffd0..b4c4eac028fe 100644 --- a/compiler/rustc_expand/src/stats.rs +++ b/compiler/rustc_expand/src/stats.rs @@ -15,15 +15,11 @@ pub struct MacroStat { /// Number of uses of the macro. pub uses: usize, - /// Net increase in number of lines of code (when pretty-printed), i.e. - /// `lines(output) - lines(invocation)`. Can be negative because a macro - /// output may be smaller than the invocation. - pub lines: isize, + /// Number of lines of code (when pretty-printed). + pub lines: usize, - /// Net increase in number of lines of code (when pretty-printed), i.e. - /// `bytes(output) - bytes(invocation)`. Can be negative because a macro - /// output may be smaller than the invocation. - pub bytes: isize, + /// Number of bytes of code (when pretty-printed). + pub bytes: usize, } pub(crate) fn elems_to_string(elems: &SmallVec<[T; 1]>, f: impl Fn(&T) -> String) -> String { @@ -131,16 +127,12 @@ pub(crate) fn update_macro_stats( input: &str, fragment: &AstFragment, ) { - fn lines_and_bytes(s: &str) -> (usize, usize) { - (s.trim_end().split('\n').count(), s.len()) - } - // Measure the size of the output by pretty-printing it and counting // the lines and bytes. let name = Symbol::intern(&pprust::path_to_string(path)); let output = fragment.to_string(); - let (in_l, in_b) = lines_and_bytes(input); - let (out_l, out_b) = lines_and_bytes(&output); + let num_lines = output.trim_end().split('\n').count(); + let num_bytes = output.len(); // This code is useful for debugging `-Zmacro-stats`. For every // invocation it prints the full input and output. @@ -157,7 +149,7 @@ pub(crate) fn update_macro_stats( {name}: [{crate_name}] ({fragment_kind:?}) {span}\n\ -------------------------------\n\ {input}\n\ - -- ({in_l} lines, {in_b} bytes) --> ({out_l} lines, {out_b} bytes) --\n\ + -- {num_lines} lines, {num_bytes} bytes --\n\ {output}\n\ " ); @@ -166,6 +158,6 @@ pub(crate) fn update_macro_stats( // The recorded size is the difference between the input and the output. let entry = ecx.macro_stats.entry((name, macro_kind)).or_insert(MacroStat::default()); entry.uses += 1; - entry.lines += out_l as isize - in_l as isize; - entry.bytes += out_b as isize - in_b as isize; + entry.lines += num_lines; + entry.bytes += num_bytes; } diff --git a/compiler/rustc_feature/Cargo.toml b/compiler/rustc_feature/Cargo.toml index a5ae06473cbe..78d7b698b720 100644 --- a/compiler/rustc_feature/Cargo.toml +++ b/compiler/rustc_feature/Cargo.toml @@ -5,8 +5,9 @@ edition = "2024" [dependencies] # tidy-alphabetical-start +rustc_attr_data_structures = { path = "../rustc_attr_data_structures" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_span = { path = "../rustc_span" } -serde = { version = "1.0.125", features = [ "derive" ] } +serde = { version = "1.0.125", features = ["derive"] } serde_json = "1.0.59" # tidy-alphabetical-end diff --git a/compiler/rustc_feature/src/builtin_attrs.rs b/compiler/rustc_feature/src/builtin_attrs.rs index 5b1f1684d54c..7d9915d7f68b 100644 --- a/compiler/rustc_feature/src/builtin_attrs.rs +++ b/compiler/rustc_feature/src/builtin_attrs.rs @@ -5,6 +5,7 @@ use std::sync::LazyLock; use AttributeDuplicates::*; use AttributeGate::*; use AttributeType::*; +use rustc_attr_data_structures::EncodeCrossCrate; use rustc_data_structures::fx::FxHashMap; use rustc_span::edition::Edition; use rustc_span::{Symbol, sym}; @@ -368,12 +369,6 @@ macro_rules! experimental { }; } -#[derive(PartialEq)] -pub enum EncodeCrossCrate { - Yes, - No, -} - pub struct BuiltinAttribute { pub name: Symbol, /// Whether this attribute is encode cross crate. @@ -657,6 +652,19 @@ pub static BUILTIN_ATTRIBUTES: &[BuiltinAttribute] = &[ EncodeCrossCrate::Yes, min_generic_const_args, experimental!(type_const), ), + // The `#[loop_match]` and `#[const_continue]` attributes are part of the + // lang experiment for RFC 3720 tracked in: + // + // - https://github.com/rust-lang/rust/issues/132306 + gated!( + const_continue, Normal, template!(Word), ErrorFollowing, + EncodeCrossCrate::No, loop_match, experimental!(const_continue) + ), + gated!( + loop_match, Normal, template!(Word), ErrorFollowing, + EncodeCrossCrate::No, loop_match, experimental!(loop_match) + ), + // ========================================================================== // Internal attributes: Stability, deprecation, and unsafe: // ========================================================================== @@ -710,7 +718,7 @@ pub static BUILTIN_ATTRIBUTES: &[BuiltinAttribute] = &[ ), rustc_attr!( rustc_pub_transparent, Normal, template!(Word), - WarnFollowing, EncodeCrossCrate::Yes, + ErrorFollowing, EncodeCrossCrate::Yes, "used internally to mark types with a `transparent` representation when it is guaranteed by the documentation", ), @@ -1083,7 +1091,7 @@ pub static BUILTIN_ATTRIBUTES: &[BuiltinAttribute] = &[ "the `#[rustc_main]` attribute is used internally to specify test entry point function", ), rustc_attr!( - rustc_skip_during_method_dispatch, Normal, template!(List: "array, boxed_slice"), WarnFollowing, + rustc_skip_during_method_dispatch, Normal, template!(List: "array, boxed_slice"), ErrorFollowing, EncodeCrossCrate::No, "the `#[rustc_skip_during_method_dispatch]` attribute is used to exclude a trait \ from method dispatch when the receiver is of the following type, for compatibility in \ @@ -1131,6 +1139,10 @@ pub static BUILTIN_ATTRIBUTES: &[BuiltinAttribute] = &[ TEST, rustc_insignificant_dtor, Normal, template!(Word), WarnFollowing, EncodeCrossCrate::Yes ), + rustc_attr!( + TEST, rustc_no_implicit_bounds, CrateLevel, template!(Word), + WarnFollowing, EncodeCrossCrate::No + ), rustc_attr!( TEST, rustc_strict_coherence, Normal, template!(Word), WarnFollowing, EncodeCrossCrate::Yes diff --git a/compiler/rustc_feature/src/removed.rs b/compiler/rustc_feature/src/removed.rs index 0cd090b25a4f..a855e4c1b0ef 100644 --- a/compiler/rustc_feature/src/removed.rs +++ b/compiler/rustc_feature/src/removed.rs @@ -54,6 +54,7 @@ declare_features! ( /// Allows using the `amdgpu-kernel` ABI. (removed, abi_amdgpu_kernel, "1.77.0", Some(51575), None, 120495), + (removed, abi_c_cmse_nonsecure_call, "CURRENT_RUSTC_VERSION", Some(81391), Some("renamed to abi_cmse_nonsecure_call"), 142146), (removed, advanced_slice_patterns, "1.42.0", Some(62254), Some("merged into `#![feature(slice_patterns)]`"), 67712), (removed, allocator, "1.0.0", None, None), @@ -86,7 +87,7 @@ declare_features! ( Some("at compile-time, pointers do not have an integer value, so these casts cannot be properly supported"), 87020), /// Allows `T: ?const Trait` syntax in bounds. (removed, const_trait_bound_opt_out, "1.56.0", Some(67794), - Some("Removed in favor of `~const` bound in #![feature(const_trait_impl)]"), 88328), + Some("Removed in favor of `[const]` bound in #![feature(const_trait_impl)]"), 88328), /// Allows using `crate` as visibility modifier, synonymous with `pub(crate)`. (removed, crate_visibility_modifier, "1.63.0", Some(53120), Some("removed in favor of `pub(crate)`"), 97254), /// Allows using custom attributes (RFC 572). @@ -122,7 +123,7 @@ declare_features! ( /// [^1]: Formerly known as "object safe". (removed, dyn_compatible_for_dispatch, "1.87.0", Some(43561), Some("removed, not used heavily and represented additional complexity in dyn compatibility"), 136522), - /// Uses generic effect parameters for ~const bounds + /// Uses generic effect parameters for [const] bounds (removed, effects, "1.84.0", Some(102090), Some("removed, redundant with `#![feature(const_trait_impl)]`"), 132479), /// Allows defining `existential type`s. @@ -285,4 +286,18 @@ declare_features! ( // ------------------------------------------------------------------------- // feature-group-end: removed features // ------------------------------------------------------------------------- + + + // ------------------------------------------------------------------------- + // feature-group-start: removed library features + // ------------------------------------------------------------------------- + // + // FIXME(#141617): we should have a better way to track removed library features, but we reuse + // the infrastructure here so users still get hints. The symbols used here can be remove from + // `symbol.rs` when that happens. + (removed, concat_idents, "CURRENT_RUSTC_VERSION", Some(29599), + Some("use the `${concat(..)}` metavariable expression instead"), 142704), + // ------------------------------------------------------------------------- + // feature-group-end: removed library features + // ------------------------------------------------------------------------- ); diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs index a7bc6207149f..719ba597da19 100644 --- a/compiler/rustc_feature/src/unstable.rs +++ b/compiler/rustc_feature/src/unstable.rs @@ -353,8 +353,8 @@ declare_features! ( /// Allows `extern "avr-interrupt" fn()` and `extern "avr-non-blocking-interrupt" fn()`. (unstable, abi_avr_interrupt, "1.45.0", Some(69664)), - /// Allows `extern "C-cmse-nonsecure-call" fn()`. - (unstable, abi_c_cmse_nonsecure_call, "1.51.0", Some(81391)), + /// Allows `extern "cmse-nonsecure-call" fn()`. + (unstable, abi_cmse_nonsecure_call, "CURRENT_RUSTC_VERSION", Some(81391)), /// Allows `extern "custom" fn()`. (unstable, abi_custom, "CURRENT_RUSTC_VERSION", Some(140829)), /// Allows `extern "gpu-kernel" fn()`. @@ -431,13 +431,13 @@ declare_features! ( (unstable, closure_lifetime_binder, "1.64.0", Some(97362)), /// Allows `#[track_caller]` on closures and coroutines. (unstable, closure_track_caller, "1.57.0", Some(87417)), - /// Allows `extern "C-cmse-nonsecure-entry" fn()`. + /// Allows `extern "cmse-nonsecure-entry" fn()`. (unstable, cmse_nonsecure_entry, "1.48.0", Some(75835)), /// Allows `async {}` expressions in const contexts. (unstable, const_async_blocks, "1.53.0", Some(85368)), /// Allows `const || {}` closures in const contexts. (incomplete, const_closures, "1.68.0", Some(106003)), - /// Allows using `~const Destruct` bounds and calling drop impls in const contexts. + /// Allows using `[const] Destruct` bounds and calling drop impls in const contexts. (unstable, const_destruct, "1.85.0", Some(133214)), /// Allows `for _ in _` loops in const contexts. (unstable, const_for, "1.56.0", Some(87575)), @@ -510,7 +510,7 @@ declare_features! ( (unstable, ffi_pure, "1.45.0", Some(58329)), /// Controlling the behavior of fmt::Debug (unstable, fmt_debug, "1.82.0", Some(129709)), - /// Allows using `#[repr(align(...))]` on function items + /// Allows using `#[align(...)]` on function items (unstable, fn_align, "1.53.0", Some(82232)), /// Support delegating implementation of functions to other already implemented functions. (incomplete, fn_delegation, "1.76.0", Some(118212)), @@ -557,6 +557,8 @@ declare_features! ( /// Allows using `#[link(kind = "link-arg", name = "...")]` /// to pass custom arguments to the linker. (unstable, link_arg_attribute, "1.76.0", Some(99427)), + /// Allows fused `loop`/`match` for direct intraprocedural jumps. + (incomplete, loop_match, "CURRENT_RUSTC_VERSION", Some(132306)), /// Give access to additional metadata about declarative macro meta-variables. (unstable, macro_metavar_expr, "1.61.0", Some(83527)), /// Provides a way to concatenate identifiers using metavariable expressions. diff --git a/compiler/rustc_hir/src/hir.rs b/compiler/rustc_hir/src/hir.rs index 556f50a85af7..75dff588669a 100644 --- a/compiler/rustc_hir/src/hir.rs +++ b/compiler/rustc_hir/src/hir.rs @@ -1302,6 +1302,7 @@ impl AttributeExt for Attribute { // FIXME: should not be needed anymore when all attrs are parsed Attribute::Parsed(AttributeKind::Deprecation { span, .. }) => *span, Attribute::Parsed(AttributeKind::DocComment { span, .. }) => *span, + Attribute::Parsed(AttributeKind::MayDangle(span)) => *span, a => panic!("can't get the span of an arbitrary parsed attribute: {a:?}"), } } @@ -1345,12 +1346,13 @@ impl AttributeExt for Attribute { } } - #[inline] - fn style(&self) -> AttrStyle { - match &self { - Attribute::Unparsed(u) => u.style, - Attribute::Parsed(AttributeKind::DocComment { style, .. }) => *style, - _ => panic!(), + fn doc_resolution_scope(&self) -> Option { + match self { + Attribute::Parsed(AttributeKind::DocComment { style, .. }) => Some(*style), + Attribute::Unparsed(attr) if self.has_name(sym::doc) && self.value_str().is_some() => { + Some(attr.style) + } + _ => None, } } } @@ -1441,11 +1443,6 @@ impl Attribute { pub fn doc_str_and_comment_kind(&self) -> Option<(Symbol, CommentKind)> { AttributeExt::doc_str_and_comment_kind(self) } - - #[inline] - pub fn style(&self) -> AttrStyle { - AttributeExt::style(self) - } } /// Attributes owned by a HIR owner. @@ -2285,16 +2282,9 @@ pub struct Expr<'hir> { } impl Expr<'_> { - pub fn precedence( - &self, - for_each_attr: &dyn Fn(HirId, &mut dyn FnMut(&Attribute)), - ) -> ExprPrecedence { + pub fn precedence(&self, has_attr: &dyn Fn(HirId) -> bool) -> ExprPrecedence { let prefix_attrs_precedence = || -> ExprPrecedence { - let mut has_outer_attr = false; - for_each_attr(self.hir_id, &mut |attr: &Attribute| { - has_outer_attr |= matches!(attr.style(), AttrStyle::Outer) - }); - if has_outer_attr { ExprPrecedence::Prefix } else { ExprPrecedence::Unambiguous } + if has_attr(self.hir_id) { ExprPrecedence::Prefix } else { ExprPrecedence::Unambiguous } }; match &self.kind { @@ -2350,7 +2340,7 @@ impl Expr<'_> { | ExprKind::Use(..) | ExprKind::Err(_) => prefix_attrs_precedence(), - ExprKind::DropTemps(expr, ..) => expr.precedence(for_each_attr), + ExprKind::DropTemps(expr, ..) => expr.precedence(has_attr), } } @@ -3151,6 +3141,15 @@ pub enum TraitItemKind<'hir> { /// type. Type(GenericBounds<'hir>, Option<&'hir Ty<'hir>>), } +impl TraitItemKind<'_> { + pub fn descr(&self) -> &'static str { + match self { + TraitItemKind::Const(..) => "associated constant", + TraitItemKind::Fn(..) => "function", + TraitItemKind::Type(..) => "associated type", + } + } +} // The bodies for items are stored "out of line", in a separate // hashmap in the `Crate`. Here we just record the hir-id of the item @@ -3212,6 +3211,15 @@ pub enum ImplItemKind<'hir> { /// An associated type. Type(&'hir Ty<'hir>), } +impl ImplItemKind<'_> { + pub fn descr(&self) -> &'static str { + match self { + ImplItemKind::Const(..) => "associated constant", + ImplItemKind::Fn(..) => "function", + ImplItemKind::Type(..) => "associated type", + } + } +} /// A constraint on an associated item. /// @@ -4537,6 +4545,16 @@ pub enum ForeignItemKind<'hir> { Type, } +impl ForeignItemKind<'_> { + pub fn descr(&self) -> &'static str { + match self { + ForeignItemKind::Fn(..) => "function", + ForeignItemKind::Static(..) => "static variable", + ForeignItemKind::Type => "type", + } + } +} + /// A variable captured by a closure. #[derive(Debug, Copy, Clone, HashStable_Generic)] pub struct Upvar { @@ -5002,9 +5020,9 @@ mod size_asserts { static_assert_size!(LetStmt<'_>, 72); static_assert_size!(Param<'_>, 32); static_assert_size!(Pat<'_>, 72); + static_assert_size!(PatKind<'_>, 48); static_assert_size!(Path<'_>, 40); static_assert_size!(PathSegment<'_>, 48); - static_assert_size!(PatKind<'_>, 48); static_assert_size!(QPath<'_>, 24); static_assert_size!(Res, 12); static_assert_size!(Stmt<'_>, 32); diff --git a/compiler/rustc_hir_analysis/messages.ftl b/compiler/rustc_hir_analysis/messages.ftl index bd2252c1bf8f..529d3578985a 100644 --- a/compiler/rustc_hir_analysis/messages.ftl +++ b/compiler/rustc_hir_analysis/messages.ftl @@ -1,5 +1,5 @@ hir_analysis_abi_custom_clothed_function = - items with the `"custom"` ABI can only be declared externally or defined via naked functions + items with the "custom" ABI can only be declared externally or defined via naked functions .suggestion = convert this to an `#[unsafe(naked)]` function hir_analysis_ambiguous_assoc_item = ambiguous associated {$assoc_kind} `{$assoc_ident}` in bounds of `{$qself}` @@ -73,10 +73,10 @@ hir_analysis_closure_implicit_hrtb = implicit types in closure signatures are fo .label = `for<...>` is here hir_analysis_cmse_call_generic = - function pointers with the `"C-cmse-nonsecure-call"` ABI cannot contain generics in their type + function pointers with the `"cmse-nonsecure-call"` ABI cannot contain generics in their type hir_analysis_cmse_entry_generic = - functions with the `"C-cmse-nonsecure-entry"` ABI cannot contain generics in their type + functions with the `"cmse-nonsecure-entry"` ABI cannot contain generics in their type hir_analysis_cmse_inputs_stack_spill = arguments for `{$abi}` function too large to pass via registers @@ -447,6 +447,9 @@ hir_analysis_parenthesized_fn_trait_expansion = hir_analysis_placeholder_not_allowed_item_signatures = the placeholder `_` is not allowed within types on item signatures for {$kind} .label = not allowed in type signatures + +hir_analysis_pointee_sized_trait_object = `PointeeSized` cannot be used with trait objects + hir_analysis_precise_capture_self_alias = `Self` can't be captured in `use<...>` precise captures list, since it is an alias .label = `Self` is not a generic argument, but an alias to the type of the {$what} diff --git a/compiler/rustc_hir_analysis/src/check/check.rs b/compiler/rustc_hir_analysis/src/check/check.rs index 32fec0604c0f..a361679e8ad6 100644 --- a/compiler/rustc_hir_analysis/src/check/check.rs +++ b/compiler/rustc_hir_analysis/src/check/check.rs @@ -1,8 +1,9 @@ use std::cell::LazyCell; use std::ops::ControlFlow; -use rustc_abi::FieldIdx; +use rustc_abi::{ExternAbi, FieldIdx}; use rustc_attr_data_structures::ReprAttr::ReprPacked; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_data_structures::unord::{UnordMap, UnordSet}; use rustc_errors::codes::*; use rustc_errors::{EmissionGuarantee, MultiSpan}; @@ -12,7 +13,6 @@ use rustc_infer::infer::{RegionVariableOrigin, TyCtxtInferExt}; use rustc_infer::traits::{Obligation, ObligationCauseCode}; use rustc_lint_defs::builtin::{ REPR_TRANSPARENT_EXTERNAL_PRIVATE_FIELDS, UNSUPPORTED_CALLING_CONVENTIONS, - UNSUPPORTED_FN_PTR_CALLING_CONVENTIONS, }; use rustc_middle::hir::nested_filter; use rustc_middle::middle::resolve_bound_vars::ResolvedArg; @@ -52,49 +52,22 @@ fn add_abi_diag_help(abi: ExternAbi, diag: &mut Diag<'_, T } pub fn check_abi(tcx: TyCtxt<'_>, hir_id: hir::HirId, span: Span, abi: ExternAbi) { - // FIXME: this should be checked earlier, e.g. in `rustc_ast_lowering`, to fix - // things like #86232. + // FIXME: This should be checked earlier, e.g. in `rustc_ast_lowering`, as this + // currently only guards function imports, function definitions, and function pointer types. + // Functions in trait declarations can still use "deprecated" ABIs without any warning. match AbiMap::from_target(&tcx.sess.target).canonize_abi(abi, false) { AbiMapping::Direct(..) => (), + // already erred in rustc_ast_lowering AbiMapping::Invalid => { - let mut err = struct_span_code_err!( - tcx.dcx(), - span, - E0570, - "`{abi}` is not a supported ABI for the current target", - ); - add_abi_diag_help(abi, &mut err); - err.emit(); + tcx.dcx().span_delayed_bug(span, format!("{abi} should be rejected in ast_lowering")); } AbiMapping::Deprecated(..) => { tcx.node_span_lint(UNSUPPORTED_CALLING_CONVENTIONS, hir_id, span, |lint| { - lint.primary_message("use of calling convention not supported on this target"); - add_abi_diag_help(abi, lint); - }); - } - } -} - -pub fn check_abi_fn_ptr(tcx: TyCtxt<'_>, hir_id: hir::HirId, span: Span, abi: ExternAbi) { - // This is always an FCW, even for `AbiMapping::Invalid`, since we started linting later than - // in `check_abi` above. - match AbiMap::from_target(&tcx.sess.target).canonize_abi(abi, false) { - AbiMapping::Direct(..) => (), - // This is not a redundant match arm: these ABIs started linting after introducing - // UNSUPPORTED_FN_PTR_CALLING_CONVENTIONS already existed and we want to - // avoid expanding the scope of that lint so it can move to a hard error sooner. - AbiMapping::Deprecated(..) => { - tcx.node_span_lint(UNSUPPORTED_CALLING_CONVENTIONS, hir_id, span, |lint| { - lint.primary_message("use of calling convention not supported on this target"); - add_abi_diag_help(abi, lint); - }); - } - AbiMapping::Invalid => { - tcx.node_span_lint(UNSUPPORTED_FN_PTR_CALLING_CONVENTIONS, hir_id, span, |lint| { lint.primary_message(format!( - "the calling convention {abi} is not supported on this target" + "{abi} is not a supported ABI for the current target" )); + add_abi_diag_help(abi, lint); }); } } @@ -103,7 +76,7 @@ pub fn check_abi_fn_ptr(tcx: TyCtxt<'_>, hir_id: hir::HirId, span: Span, abi: Ex pub fn check_custom_abi(tcx: TyCtxt<'_>, def_id: LocalDefId, fn_sig: FnSig<'_>, fn_sig_span: Span) { if fn_sig.abi == ExternAbi::Custom { // Function definitions that use `extern "custom"` must be naked functions. - if !tcx.has_attr(def_id, sym::naked) { + if !find_attr!(tcx.get_all_attrs(def_id), AttributeKind::Naked(_)) { tcx.dcx().emit_err(crate::errors::AbiCustomClothedFunction { span: fn_sig_span, naked_span: tcx.def_span(def_id).shrink_to_lo(), @@ -339,7 +312,7 @@ fn check_opaque_meets_bounds<'tcx>( // here rather than using ReErased. let hidden_ty = tcx.type_of(def_id.to_def_id()).instantiate(tcx, args); let hidden_ty = fold_regions(tcx, hidden_ty, |re, _dbi| match re.kind() { - ty::ReErased => infcx.next_region_var(RegionVariableOrigin::MiscVariable(span)), + ty::ReErased => infcx.next_region_var(RegionVariableOrigin::Misc(span)), _ => re, }); @@ -371,7 +344,7 @@ fn check_opaque_meets_bounds<'tcx>( let misc_cause = ObligationCause::misc(span, def_id); // FIXME: We should just register the item bounds here, rather than equating. // FIXME(const_trait_impl): When we do that, please make sure to also register - // the `~const` bounds. + // the `[const]` bounds. match ocx.eq(&misc_cause, param_env, opaque_ty, hidden_ty) { Ok(()) => {} Err(ty_err) => { @@ -867,6 +840,7 @@ pub(crate) fn check_item_type(tcx: TyCtxt<'_>, def_id: LocalDefId) { let hir::ItemKind::ForeignMod { abi, items } = it.kind else { return; }; + check_abi(tcx, it.hir_id(), it.span, abi); for item in items { @@ -1384,7 +1358,11 @@ pub(super) fn check_transparent<'tcx>(tcx: TyCtxt<'tcx>, adt: ty::AdtDef<'tcx>) ty::Tuple(list) => list.iter().try_for_each(|t| check_non_exhaustive(tcx, t)), ty::Array(ty, _) => check_non_exhaustive(tcx, *ty), ty::Adt(def, args) => { - if !def.did().is_local() && !tcx.has_attr(def.did(), sym::rustc_pub_transparent) + if !def.did().is_local() + && !attrs::find_attr!( + tcx.get_all_attrs(def.did()), + AttributeKind::PubTransparent(_) + ) { let non_exhaustive = def.is_variant_list_non_exhaustive() || def diff --git a/compiler/rustc_hir_analysis/src/check/compare_impl_item.rs b/compiler/rustc_hir_analysis/src/check/compare_impl_item.rs index 47681a78ecca..abbe497858bb 100644 --- a/compiler/rustc_hir_analysis/src/check/compare_impl_item.rs +++ b/compiler/rustc_hir_analysis/src/check/compare_impl_item.rs @@ -9,7 +9,7 @@ use rustc_errors::{Applicability, ErrorGuaranteed, MultiSpan, pluralize, struct_ use rustc_hir::def::{DefKind, Res}; use rustc_hir::intravisit::VisitorExt; use rustc_hir::{self as hir, AmbigArg, GenericParamKind, ImplItemKind, intravisit}; -use rustc_infer::infer::{self, InferCtxt, TyCtxtInferExt}; +use rustc_infer::infer::{self, BoundRegionConversionTime, InferCtxt, TyCtxtInferExt}; use rustc_infer::traits::util; use rustc_middle::ty::error::{ExpectedFound, TypeError}; use rustc_middle::ty::{ @@ -264,9 +264,9 @@ fn compare_method_predicate_entailment<'tcx>( } // If we're within a const implementation, we need to make sure that the method - // does not assume stronger `~const` bounds than the trait definition. + // does not assume stronger `[const]` bounds than the trait definition. // - // This registers the `~const` bounds of the impl method, which we will prove + // This registers the `[const]` bounds of the impl method, which we will prove // using the hybrid param-env that we earlier augmented with the const conditions // from the impl header and trait method declaration. if is_conditionally_const { @@ -311,7 +311,7 @@ fn compare_method_predicate_entailment<'tcx>( let unnormalized_impl_sig = infcx.instantiate_binder_with_fresh_vars( impl_m_span, - infer::HigherRankedType, + BoundRegionConversionTime::HigherRankedType, tcx.fn_sig(impl_m.def_id).instantiate_identity(), ); @@ -518,7 +518,7 @@ pub(super) fn collect_return_position_impl_trait_in_trait_tys<'tcx>( param_env, infcx.instantiate_binder_with_fresh_vars( return_span, - infer::HigherRankedType, + BoundRegionConversionTime::HigherRankedType, tcx.fn_sig(impl_m.def_id).instantiate_identity(), ), ); @@ -2335,7 +2335,7 @@ pub(super) fn check_type_bounds<'tcx>( ) .collect(); - // Only in a const implementation do we need to check that the `~const` item bounds hold. + // Only in a const implementation do we need to check that the `[const]` item bounds hold. if tcx.is_conditionally_const(impl_ty_def_id) { obligations.extend(util::elaborate( tcx, diff --git a/compiler/rustc_hir_analysis/src/check/entry.rs b/compiler/rustc_hir_analysis/src/check/entry.rs index 3bad36da9990..b556683e80a5 100644 --- a/compiler/rustc_hir_analysis/src/check/entry.rs +++ b/compiler/rustc_hir_analysis/src/check/entry.rs @@ -1,14 +1,15 @@ use std::ops::Not; use rustc_abi::ExternAbi; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_hir as hir; use rustc_hir::Node; use rustc_infer::infer::TyCtxtInferExt; use rustc_middle::span_bug; use rustc_middle::ty::{self, TyCtxt, TypingMode}; use rustc_session::config::EntryFnType; +use rustc_span::Span; use rustc_span::def_id::{CRATE_DEF_ID, DefId, LocalDefId}; -use rustc_span::{Span, sym}; use rustc_trait_selection::error_reporting::InferCtxtErrorExt; use rustc_trait_selection::traits::{self, ObligationCause, ObligationCauseCode}; @@ -98,8 +99,10 @@ fn check_main_fn_ty(tcx: TyCtxt<'_>, main_def_id: DefId) { error = true; } - for attr in tcx.get_attrs(main_def_id, sym::track_caller) { - tcx.dcx().emit_err(errors::TrackCallerOnMain { span: attr.span(), annotated: main_span }); + if let Some(attr_span) = + find_attr!(tcx.get_all_attrs(main_def_id), AttributeKind::TrackCaller(span) => *span) + { + tcx.dcx().emit_err(errors::TrackCallerOnMain { span: attr_span, annotated: main_span }); error = true; } diff --git a/compiler/rustc_hir_analysis/src/check/mod.rs b/compiler/rustc_hir_analysis/src/check/mod.rs index ca24d5a6424d..5cec3331bb19 100644 --- a/compiler/rustc_hir_analysis/src/check/mod.rs +++ b/compiler/rustc_hir_analysis/src/check/mod.rs @@ -72,8 +72,8 @@ pub mod wfcheck; use std::num::NonZero; -pub use check::{check_abi, check_abi_fn_ptr, check_custom_abi}; -use rustc_abi::{ExternAbi, VariantIdx}; +pub use check::{check_abi, check_custom_abi}; +use rustc_abi::VariantIdx; use rustc_data_structures::fx::{FxHashSet, FxIndexMap}; use rustc_errors::{Diag, ErrorGuaranteed, pluralize, struct_span_code_err}; use rustc_hir::LangItem; @@ -311,9 +311,7 @@ fn default_body_is_unstable( reason: reason_str, }); - let inject_span = item_did - .as_local() - .and_then(|id| tcx.crate_level_attribute_injection_span(tcx.local_def_id_to_hir_id(id))); + let inject_span = item_did.is_local().then(|| tcx.crate_level_attribute_injection_span()); rustc_session::parse::add_feature_diagnostics_for_issue( &mut err, &tcx.sess, diff --git a/compiler/rustc_hir_analysis/src/check/wfcheck.rs b/compiler/rustc_hir_analysis/src/check/wfcheck.rs index 20d0e87b7a76..b9124ea0e5ee 100644 --- a/compiler/rustc_hir_analysis/src/check/wfcheck.rs +++ b/compiler/rustc_hir_analysis/src/check/wfcheck.rs @@ -11,7 +11,7 @@ use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_hir::lang_items::LangItem; use rustc_hir::{AmbigArg, ItemKind}; use rustc_infer::infer::outlives::env::OutlivesEnvironment; -use rustc_infer::infer::{self, InferCtxt, TyCtxtInferExt}; +use rustc_infer::infer::{self, InferCtxt, SubregionOrigin, TyCtxtInferExt}; use rustc_lint_defs::builtin::SUPERTRAIT_ITEM_SHADOWING_DEFINITION; use rustc_macros::LintDiagnostic; use rustc_middle::mir::interpret::ErrorHandled; @@ -231,7 +231,6 @@ fn check_item<'tcx>(tcx: TyCtxt<'tcx>, item: &'tcx hir::Item<'tcx>) -> Result<() item.name = ? tcx.def_path_str(def_id) ); crate::collect::lower_item(tcx, item.item_id()); - crate::collect::reject_placeholder_type_signatures_in_item(tcx, item); let res = match item.kind { // Right now we check that every default trait implementation @@ -382,8 +381,6 @@ fn check_trait_item<'tcx>( _ => (None, trait_item.span), }; - check_dyn_incompatible_self_trait_by_name(tcx, trait_item); - // Check that an item definition in a subtrait is shadowing a supertrait item. lint_item_shadowing_supertrait_item(tcx, def_id); @@ -741,7 +738,7 @@ fn ty_known_to_outlive<'tcx>( infcx.register_type_outlives_constraint_inner(infer::TypeOutlivesConstraint { sub_region: region, sup_type: ty, - origin: infer::RelateParamBound(DUMMY_SP, ty, None), + origin: SubregionOrigin::RelateParamBound(DUMMY_SP, ty, None), }); }) } @@ -757,7 +754,11 @@ fn region_known_to_outlive<'tcx>( region_b: ty::Region<'tcx>, ) -> bool { test_region_obligations(tcx, id, param_env, wf_tys, |infcx| { - infcx.sub_regions(infer::RelateRegionParamBound(DUMMY_SP, None), region_b, region_a); + infcx.sub_regions( + SubregionOrigin::RelateRegionParamBound(DUMMY_SP, None), + region_b, + region_a, + ); }) } @@ -832,70 +833,6 @@ impl<'tcx> TypeVisitor> for GATArgsCollector<'tcx> { } } -fn could_be_self(trait_def_id: LocalDefId, ty: &hir::Ty<'_>) -> bool { - match ty.kind { - hir::TyKind::TraitObject([trait_ref], ..) => match trait_ref.trait_ref.path.segments { - [s] => s.res.opt_def_id() == Some(trait_def_id.to_def_id()), - _ => false, - }, - _ => false, - } -} - -/// Detect when a dyn-incompatible trait is referring to itself in one of its associated items. -/// -/// In such cases, suggest using `Self` instead. -fn check_dyn_incompatible_self_trait_by_name(tcx: TyCtxt<'_>, item: &hir::TraitItem<'_>) { - let (trait_ident, trait_def_id) = - match tcx.hir_node_by_def_id(tcx.hir_get_parent_item(item.hir_id()).def_id) { - hir::Node::Item(item) => match item.kind { - hir::ItemKind::Trait(_, _, ident, ..) => (ident, item.owner_id), - _ => return, - }, - _ => return, - }; - let mut trait_should_be_self = vec![]; - match &item.kind { - hir::TraitItemKind::Const(ty, _) | hir::TraitItemKind::Type(_, Some(ty)) - if could_be_self(trait_def_id.def_id, ty) => - { - trait_should_be_self.push(ty.span) - } - hir::TraitItemKind::Fn(sig, _) => { - for ty in sig.decl.inputs { - if could_be_self(trait_def_id.def_id, ty) { - trait_should_be_self.push(ty.span); - } - } - match sig.decl.output { - hir::FnRetTy::Return(ty) if could_be_self(trait_def_id.def_id, ty) => { - trait_should_be_self.push(ty.span); - } - _ => {} - } - } - _ => {} - } - if !trait_should_be_self.is_empty() { - if tcx.is_dyn_compatible(trait_def_id) { - return; - } - let sugg = trait_should_be_self.iter().map(|span| (*span, "Self".to_string())).collect(); - tcx.dcx() - .struct_span_err( - trait_should_be_self, - "associated item referring to unboxed trait object for its own trait", - ) - .with_span_label(trait_ident.span, "in this trait") - .with_multipart_suggestion( - "you might have meant to use `Self` to refer to the implementing type", - sugg, - Applicability::MachineApplicable, - ) - .emit(); - } -} - fn lint_item_shadowing_supertrait_item<'tcx>(tcx: TyCtxt<'tcx>, trait_item_def_id: LocalDefId) { let item_name = tcx.item_name(trait_item_def_id.to_def_id()); let trait_def_id = tcx.local_parent(trait_item_def_id); @@ -1064,7 +1001,7 @@ fn check_param_wf(tcx: TyCtxt<'_>, param: &hir::GenericParam<'_>) -> Result<(), Ok(..) => Some(vec![(adt_const_params_feature_string, sym::adt_const_params)]), }; if let Some(features) = may_suggest_feature { - tcx.disabled_nightly_features(&mut diag, Some(param.hir_id), features); + tcx.disabled_nightly_features(&mut diag, features); } Err(diag.emit()) @@ -1465,7 +1402,7 @@ fn check_impl<'tcx>( } } - // Ensure that the `~const` where clauses of the trait hold for the impl. + // Ensure that the `[const]` where clauses of the trait hold for the impl. if tcx.is_conditionally_const(item.owner_id.def_id) { for (bound, _) in tcx.const_conditions(trait_ref.def_id).instantiate(tcx, trait_ref.args) @@ -1557,7 +1494,9 @@ fn check_where_clauses<'tcx>(wfcx: &WfCheckingCtxt<'_, 'tcx>, span: Span, def_id ty::ConstKind::Unevaluated(uv) => { infcx.tcx.type_of(uv.def).instantiate(infcx.tcx, uv.args) } - ty::ConstKind::Param(param_ct) => param_ct.find_ty_from_env(wfcx.param_env), + ty::ConstKind::Param(param_ct) => { + param_ct.find_const_ty_from_env(wfcx.param_env) + } }; let param_ty = tcx.type_of(param.def_id).instantiate_identity(); diff --git a/compiler/rustc_hir_analysis/src/coherence/builtin.rs b/compiler/rustc_hir_analysis/src/coherence/builtin.rs index 4779f4fb702b..734c9c58c08a 100644 --- a/compiler/rustc_hir_analysis/src/coherence/builtin.rs +++ b/compiler/rustc_hir_analysis/src/coherence/builtin.rs @@ -10,7 +10,7 @@ use rustc_hir as hir; use rustc_hir::ItemKind; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_hir::lang_items::LangItem; -use rustc_infer::infer::{self, RegionResolutionError, TyCtxtInferExt}; +use rustc_infer::infer::{self, RegionResolutionError, SubregionOrigin, TyCtxtInferExt}; use rustc_infer::traits::Obligation; use rustc_middle::ty::adjustment::CoerceUnsizedInfo; use rustc_middle::ty::print::PrintTraitRefExt as _; @@ -415,7 +415,7 @@ pub(crate) fn coerce_unsized_info<'tcx>( }; let (source, target, trait_def_id, kind, field_span) = match (source.kind(), target.kind()) { (&ty::Ref(r_a, ty_a, mutbl_a), &ty::Ref(r_b, ty_b, mutbl_b)) => { - infcx.sub_regions(infer::RelateObjectBound(span), r_b, r_a); + infcx.sub_regions(SubregionOrigin::RelateObjectBound(span), r_b, r_a); let mt_a = ty::TypeAndMut { ty: ty_a, mutbl: mutbl_a }; let mt_b = ty::TypeAndMut { ty: ty_b, mutbl: mutbl_b }; check_mutbl(mt_a, mt_b, &|ty| Ty::new_imm_ref(tcx, r_b, ty)) diff --git a/compiler/rustc_hir_analysis/src/collect.rs b/compiler/rustc_hir_analysis/src/collect.rs index 176d955bf032..d7568554669e 100644 --- a/compiler/rustc_hir_analysis/src/collect.rs +++ b/compiler/rustc_hir_analysis/src/collect.rs @@ -21,6 +21,7 @@ use std::ops::Bound; use rustc_abi::ExternAbi; use rustc_ast::Recovered; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_data_structures::fx::{FxHashSet, FxIndexMap}; use rustc_data_structures::unord::UnordMap; use rustc_errors::{ @@ -28,7 +29,7 @@ use rustc_errors::{ }; use rustc_hir::def::DefKind; use rustc_hir::def_id::{DefId, LocalDefId}; -use rustc_hir::intravisit::{InferKind, Visitor, VisitorExt, walk_generics}; +use rustc_hir::intravisit::{InferKind, Visitor, VisitorExt}; use rustc_hir::{self as hir, GenericParamKind, HirId, Node, PreciseCapturingArgKind}; use rustc_infer::infer::{InferCtxt, TyCtxtInferExt}; use rustc_infer::traits::{DynCompatibilityViolation, ObligationCause}; @@ -153,26 +154,7 @@ impl<'v> Visitor<'v> for HirPlaceholderCollector { } } -/// If there are any placeholder types (`_`), emit an error explaining that this is not allowed -/// and suggest adding type parameters in the appropriate place, taking into consideration any and -/// all already existing generic type parameters to avoid suggesting a name that is already in use. -pub(crate) fn placeholder_type_error<'tcx>( - cx: &dyn HirTyLowerer<'tcx>, - generics: Option<&hir::Generics<'_>>, - placeholder_types: Vec, - suggest: bool, - hir_ty: Option<&hir::Ty<'_>>, - kind: &'static str, -) { - if placeholder_types.is_empty() { - return; - } - - placeholder_type_error_diag(cx, generics, placeholder_types, vec![], suggest, hir_ty, kind) - .emit(); -} - -pub(crate) fn placeholder_type_error_diag<'cx, 'tcx>( +fn placeholder_type_error_diag<'cx, 'tcx>( cx: &'cx dyn HirTyLowerer<'tcx>, generics: Option<&hir::Generics<'_>>, placeholder_types: Vec, @@ -244,37 +226,6 @@ pub(crate) fn placeholder_type_error_diag<'cx, 'tcx>( err } -pub(super) fn reject_placeholder_type_signatures_in_item<'tcx>( - tcx: TyCtxt<'tcx>, - item: &'tcx hir::Item<'tcx>, -) { - let (generics, suggest) = match &item.kind { - hir::ItemKind::Union(_, generics, _) - | hir::ItemKind::Enum(_, generics, _) - | hir::ItemKind::TraitAlias(_, generics, _) - | hir::ItemKind::Trait(_, _, _, generics, ..) - | hir::ItemKind::Impl(hir::Impl { generics, .. }) - | hir::ItemKind::Struct(_, generics, _) => (generics, true), - hir::ItemKind::TyAlias(_, generics, _) => (generics, false), - // `static`, `fn` and `const` are handled elsewhere to suggest appropriate type. - _ => return, - }; - - let mut visitor = HirPlaceholderCollector::default(); - visitor.visit_item(item); - - let icx = ItemCtxt::new(tcx, item.owner_id.def_id); - - placeholder_type_error( - icx.lowerer(), - Some(generics), - visitor.spans, - suggest && !visitor.may_contain_const_infer, - None, - item.kind.descr(), - ); -} - /////////////////////////////////////////////////////////////////////////// // Utility types and common code for the above passes. @@ -312,6 +263,54 @@ impl<'tcx> ItemCtxt<'tcx> { None => Ok(()), } } + + fn report_placeholder_type_error( + &self, + placeholder_types: Vec, + infer_replacements: Vec<(Span, String)>, + ) -> ErrorGuaranteed { + let node = self.tcx.hir_node_by_def_id(self.item_def_id); + let generics = node.generics(); + let kind_id = match node { + Node::GenericParam(_) | Node::WherePredicate(_) | Node::Field(_) => { + self.tcx.local_parent(self.item_def_id) + } + _ => self.item_def_id, + }; + // FIXME: just invoke `tcx.def_descr` instead of going through the HIR + // Can also remove most `descr` methods then. + let kind = match self.tcx.hir_node_by_def_id(kind_id) { + Node::Item(it) => it.kind.descr(), + Node::ImplItem(it) => it.kind.descr(), + Node::TraitItem(it) => it.kind.descr(), + Node::ForeignItem(it) => it.kind.descr(), + Node::OpaqueTy(_) => "opaque type", + Node::Synthetic => self.tcx.def_descr(kind_id.into()), + node => todo!("{node:#?}"), + }; + let mut diag = placeholder_type_error_diag( + self, + generics, + placeholder_types, + infer_replacements.iter().map(|&(span, _)| span).collect(), + false, + None, + kind, + ); + if !infer_replacements.is_empty() { + diag.multipart_suggestion( + format!( + "try replacing `_` with the type{} in the corresponding trait method \ + signature", + rustc_errors::pluralize!(infer_replacements.len()), + ), + infer_replacements, + Applicability::MachineApplicable, + ); + } + + diag.emit() + } } impl<'tcx> HirTyLowerer<'tcx> for ItemCtxt<'tcx> { @@ -345,10 +344,14 @@ impl<'tcx> HirTyLowerer<'tcx> for ItemCtxt<'tcx> { } fn ty_infer(&self, _: Option<&ty::GenericParamDef>, span: Span) -> Ty<'tcx> { + if !self.tcx.dcx().has_stashed_diagnostic(span, StashKey::ItemNoType) { + self.report_placeholder_type_error(vec![span], vec![]); + } Ty::new_error_with_message(self.tcx(), span, "bad placeholder type") } fn ct_infer(&self, _: Option<&ty::GenericParamDef>, span: Span) -> Const<'tcx> { + self.report_placeholder_type_error(vec![span], vec![]); ty::Const::new_error_with_message(self.tcx(), span, "bad placeholder constant") } @@ -523,18 +526,13 @@ impl<'tcx> HirTyLowerer<'tcx> for ItemCtxt<'tcx> { fn lower_fn_sig( &self, decl: &hir::FnDecl<'tcx>, - generics: Option<&hir::Generics<'_>>, + _generics: Option<&hir::Generics<'_>>, hir_id: rustc_hir::HirId, - hir_ty: Option<&hir::Ty<'_>>, + _hir_ty: Option<&hir::Ty<'_>>, ) -> (Vec>, Ty<'tcx>) { let tcx = self.tcx(); - // We proactively collect all the inferred type params to emit a single error per fn def. - let mut visitor = HirPlaceholderCollector::default(); - let mut infer_replacements = vec![]; - if let Some(generics) = generics { - walk_generics(&mut visitor, generics); - } + let mut infer_replacements = vec![]; let input_tys = decl .inputs @@ -550,8 +548,6 @@ impl<'tcx> HirTyLowerer<'tcx> for ItemCtxt<'tcx> { } } - // Only visit the type looking for `_` if we didn't fix the type above - visitor.visit_ty_unambig(a); self.lowerer().lower_ty(a) }) .collect(); @@ -565,42 +561,15 @@ impl<'tcx> HirTyLowerer<'tcx> for ItemCtxt<'tcx> { infer_replacements.push((output.span, suggested_ty.to_string())); Ty::new_error_with_message(tcx, output.span, suggested_ty.to_string()) } else { - visitor.visit_ty_unambig(output); self.lower_ty(output) } } hir::FnRetTy::DefaultReturn(..) => tcx.types.unit, }; - if !(visitor.spans.is_empty() && infer_replacements.is_empty()) { - // We check for the presence of - // `ident_span` to not emit an error twice when we have `fn foo(_: fn() -> _)`. - - let mut diag = crate::collect::placeholder_type_error_diag( - self, - generics, - visitor.spans, - infer_replacements.iter().map(|(s, _)| *s).collect(), - !visitor.may_contain_const_infer, - hir_ty, - "function", - ); - - if !infer_replacements.is_empty() { - diag.multipart_suggestion( - format!( - "try replacing `_` with the type{} in the corresponding trait method \ - signature", - rustc_errors::pluralize!(infer_replacements.len()), - ), - infer_replacements, - Applicability::MachineApplicable, - ); - } - - diag.emit(); + if !infer_replacements.is_empty() { + self.report_placeholder_type_error(vec![], infer_replacements); } - (input_tys, output_ty) } @@ -651,7 +620,6 @@ pub(super) fn lower_item(tcx: TyCtxt<'_>, item_id: hir::ItemId) { let it = tcx.hir_item(item_id); debug!(item = ?it.kind.ident(), id = %it.hir_id()); let def_id = item_id.owner_id.def_id; - let icx = ItemCtxt::new(tcx, def_id); match &it.kind { // These don't define types. @@ -677,16 +645,6 @@ pub(super) fn lower_item(tcx: TyCtxt<'_>, item_id: hir::ItemId) { } hir::ForeignItemKind::Static(..) => { tcx.ensure_ok().codegen_fn_attrs(item.owner_id); - let mut visitor = HirPlaceholderCollector::default(); - visitor.visit_foreign_item(item); - placeholder_type_error( - icx.lowerer(), - None, - visitor.spans, - false, - None, - "static variable", - ); } _ => (), } @@ -740,22 +698,10 @@ pub(super) fn lower_item(tcx: TyCtxt<'_>, item_id: hir::ItemId) { tcx.ensure_ok().predicates_of(def_id); } - hir::ItemKind::Static(_, _, ty, _) | hir::ItemKind::Const(_, _, ty, _) => { + hir::ItemKind::Static(..) | hir::ItemKind::Const(..) => { tcx.ensure_ok().generics_of(def_id); tcx.ensure_ok().type_of(def_id); tcx.ensure_ok().predicates_of(def_id); - if !ty.is_suggestable_infer_ty() { - let mut visitor = HirPlaceholderCollector::default(); - visitor.visit_item(it); - placeholder_type_error( - icx.lowerer(), - None, - visitor.spans, - false, - None, - it.kind.descr(), - ); - } } hir::ItemKind::Fn { .. } => { @@ -772,7 +718,6 @@ pub(crate) fn lower_trait_item(tcx: TyCtxt<'_>, trait_item_id: hir::TraitItemId) let trait_item = tcx.hir_trait_item(trait_item_id); let def_id = trait_item_id.owner_id; tcx.ensure_ok().generics_of(def_id); - let icx = ItemCtxt::new(tcx, def_id.def_id); match trait_item.kind { hir::TraitItemKind::Fn(..) => { @@ -781,58 +726,19 @@ pub(crate) fn lower_trait_item(tcx: TyCtxt<'_>, trait_item_id: hir::TraitItemId) tcx.ensure_ok().fn_sig(def_id); } - hir::TraitItemKind::Const(ty, body_id) => { + hir::TraitItemKind::Const(..) => { tcx.ensure_ok().type_of(def_id); - if !tcx.dcx().has_stashed_diagnostic(ty.span, StashKey::ItemNoType) - && !(ty.is_suggestable_infer_ty() && body_id.is_some()) - { - // Account for `const C: _;`. - let mut visitor = HirPlaceholderCollector::default(); - visitor.visit_trait_item(trait_item); - placeholder_type_error( - icx.lowerer(), - None, - visitor.spans, - false, - None, - "associated constant", - ); - } } hir::TraitItemKind::Type(_, Some(_)) => { tcx.ensure_ok().item_bounds(def_id); tcx.ensure_ok().item_self_bounds(def_id); tcx.ensure_ok().type_of(def_id); - // Account for `type T = _;`. - let mut visitor = HirPlaceholderCollector::default(); - visitor.visit_trait_item(trait_item); - placeholder_type_error( - icx.lowerer(), - None, - visitor.spans, - false, - None, - "associated type", - ); } hir::TraitItemKind::Type(_, None) => { tcx.ensure_ok().item_bounds(def_id); tcx.ensure_ok().item_self_bounds(def_id); - // #74612: Visit and try to find bad placeholders - // even if there is no concrete type. - let mut visitor = HirPlaceholderCollector::default(); - visitor.visit_trait_item(trait_item); - - placeholder_type_error( - icx.lowerer(), - None, - visitor.spans, - false, - None, - "associated type", - ); } }; @@ -845,41 +751,13 @@ pub(super) fn lower_impl_item(tcx: TyCtxt<'_>, impl_item_id: hir::ImplItemId) { tcx.ensure_ok().type_of(def_id); tcx.ensure_ok().predicates_of(def_id); let impl_item = tcx.hir_impl_item(impl_item_id); - let icx = ItemCtxt::new(tcx, def_id.def_id); match impl_item.kind { hir::ImplItemKind::Fn(..) => { tcx.ensure_ok().codegen_fn_attrs(def_id); tcx.ensure_ok().fn_sig(def_id); } - hir::ImplItemKind::Type(_) => { - // Account for `type T = _;` - let mut visitor = HirPlaceholderCollector::default(); - visitor.visit_impl_item(impl_item); - - placeholder_type_error( - icx.lowerer(), - None, - visitor.spans, - false, - None, - "associated type", - ); - } - hir::ImplItemKind::Const(ty, _) => { - // Account for `const T: _ = ..;` - if !ty.is_suggestable_infer_ty() { - let mut visitor = HirPlaceholderCollector::default(); - visitor.visit_impl_item(impl_item); - placeholder_type_error( - icx.lowerer(), - None, - visitor.spans, - false, - None, - "associated constant", - ); - } - } + hir::ImplItemKind::Type(_) => {} + hir::ImplItemKind::Const(..) => {} } } @@ -1151,22 +1029,11 @@ fn trait_def(tcx: TyCtxt<'_>, def_id: LocalDefId) -> ty::TraitDef { let rustc_coinductive = tcx.has_attr(def_id, sym::rustc_coinductive); let is_fundamental = tcx.has_attr(def_id, sym::fundamental); - // FIXME: We could probably do way better attribute validation here. - let mut skip_array_during_method_dispatch = false; - let mut skip_boxed_slice_during_method_dispatch = false; - for attr in tcx.get_attrs(def_id, sym::rustc_skip_during_method_dispatch) { - if let Some(lst) = attr.meta_item_list() { - for item in lst { - if let Some(ident) = item.ident() { - match ident.as_str() { - "array" => skip_array_during_method_dispatch = true, - "boxed_slice" => skip_boxed_slice_during_method_dispatch = true, - _ => (), - } - } - } - } - } + let [skip_array_during_method_dispatch, skip_boxed_slice_during_method_dispatch] = find_attr!( + tcx.get_all_attrs(def_id), + AttributeKind::SkipDuringMethodDispatch { array, boxed_slice, span:_ } => [*array, *boxed_slice] + ) + .unwrap_or([false; 2]); let specialization_kind = if tcx.has_attr(def_id, sym::rustc_unsafe_specialization_marker) { ty::trait_def::TraitSpecializationKind::Marker diff --git a/compiler/rustc_hir_analysis/src/collect/item_bounds.rs b/compiler/rustc_hir_analysis/src/collect/item_bounds.rs index 53c44cdc4115..e51ef46afb72 100644 --- a/compiler/rustc_hir_analysis/src/collect/item_bounds.rs +++ b/compiler/rustc_hir_analysis/src/collect/item_bounds.rs @@ -54,7 +54,7 @@ fn associated_type_bounds<'tcx>( ); icx.lowerer().add_default_traits(&mut bounds, item_ty, hir_bounds, None, span); } - // `ConstIfConst` is only interested in `~const` bounds. + // `ConstIfConst` is only interested in `[const]` bounds. PredicateFilter::ConstIfConst | PredicateFilter::SelfConstIfConst => {} } @@ -351,7 +351,7 @@ fn opaque_type_bounds<'tcx>( ); icx.lowerer().add_default_traits(&mut bounds, item_ty, hir_bounds, None, span); } - //`ConstIfConst` is only interested in `~const` bounds. + //`ConstIfConst` is only interested in `[const]` bounds. PredicateFilter::ConstIfConst | PredicateFilter::SelfConstIfConst => {} } debug!(?bounds); diff --git a/compiler/rustc_hir_analysis/src/collect/predicates_of.rs b/compiler/rustc_hir_analysis/src/collect/predicates_of.rs index c337765c5fec..a93e58b101fe 100644 --- a/compiler/rustc_hir_analysis/src/collect/predicates_of.rs +++ b/compiler/rustc_hir_analysis/src/collect/predicates_of.rs @@ -421,7 +421,9 @@ fn const_evaluatable_predicates_of<'tcx>( impl<'tcx> TypeVisitor> for ConstCollector<'tcx> { fn visit_const(&mut self, c: ty::Const<'tcx>) { if let ty::ConstKind::Unevaluated(uv) = c.kind() { - if is_const_param_default(self.tcx, uv.def.expect_local()) { + if let Some(local) = uv.def.as_local() + && is_const_param_default(self.tcx, local) + { // Do not look into const param defaults, // these get checked when they are actually instantiated. // @@ -666,7 +668,7 @@ pub(super) fn implied_predicates_with_filter<'tcx>( item.span, ); } - //`ConstIfConst` is only interested in `~const` bounds. + //`ConstIfConst` is only interested in `[const]` bounds. PredicateFilter::ConstIfConst | PredicateFilter::SelfConstIfConst => {} } @@ -821,7 +823,7 @@ pub(super) fn assert_only_contains_predicates_from<'tcx>( assert_eq!( pred.constness, ty::BoundConstness::Maybe, - "expected `~const` predicate when computing `{filter:?}` \ + "expected `[const]` predicate when computing `{filter:?}` \ implied bounds: {clause:?}", ); assert_eq!( @@ -1009,7 +1011,7 @@ pub(super) fn const_conditions<'tcx>( } _ => bug!("const_conditions called on wrong item: {def_id:?}"), }, - // While associated types are not really const, we do allow them to have `~const` + // While associated types are not really const, we do allow them to have `[const]` // bounds and where clauses. `const_conditions` is responsible for gathering // these up so we can check them in `compare_type_predicate_entailment`, and // in `HostEffect` goal computation. diff --git a/compiler/rustc_hir_analysis/src/collect/resolve_bound_vars.rs b/compiler/rustc_hir_analysis/src/collect/resolve_bound_vars.rs index d45f0475e991..95743f9a63eb 100644 --- a/compiler/rustc_hir_analysis/src/collect/resolve_bound_vars.rs +++ b/compiler/rustc_hir_analysis/src/collect/resolve_bound_vars.rs @@ -2177,84 +2177,80 @@ impl<'a, 'tcx> BoundVarContext<'a, 'tcx> { /// Walk the generics of the item for a trait bound whose self type /// corresponds to the expected res, and return the trait def id. fn for_each_trait_bound_on_res(&self, expected_res: Res) -> impl Iterator { - std::iter::from_coroutine( - #[coroutine] - move || { - let mut scope = self.scope; - loop { - let hir_id = match *scope { - Scope::Binder { hir_id, .. } => Some(hir_id), - Scope::Root { opt_parent_item: Some(parent_def_id) } => { - Some(self.tcx.local_def_id_to_hir_id(parent_def_id)) - } - Scope::Body { .. } - | Scope::ObjectLifetimeDefault { .. } - | Scope::Supertrait { .. } - | Scope::TraitRefBoundary { .. } - | Scope::LateBoundary { .. } - | Scope::Opaque { .. } - | Scope::Root { opt_parent_item: None } => None, - }; + gen move { + let mut scope = self.scope; + loop { + let hir_id = match *scope { + Scope::Binder { hir_id, .. } => Some(hir_id), + Scope::Root { opt_parent_item: Some(parent_def_id) } => { + Some(self.tcx.local_def_id_to_hir_id(parent_def_id)) + } + Scope::Body { .. } + | Scope::ObjectLifetimeDefault { .. } + | Scope::Supertrait { .. } + | Scope::TraitRefBoundary { .. } + | Scope::LateBoundary { .. } + | Scope::Opaque { .. } + | Scope::Root { opt_parent_item: None } => None, + }; - if let Some(hir_id) = hir_id { - let node = self.tcx.hir_node(hir_id); - // If this is a `Self` bound in a trait, yield the trait itself. - // Specifically, we don't need to look at any supertraits since - // we already do that in `BoundVarContext::supertrait_hrtb_vars`. - if let Res::SelfTyParam { trait_: _ } = expected_res - && let hir::Node::Item(item) = node - && let hir::ItemKind::Trait(..) = item.kind - { - // Yield the trait's def id. Supertraits will be - // elaborated from that. - yield item.owner_id.def_id.to_def_id(); - } else if let Some(generics) = node.generics() { - for pred in generics.predicates { - let hir::WherePredicateKind::BoundPredicate(pred) = pred.kind - else { - continue; - }; - let hir::TyKind::Path(hir::QPath::Resolved(None, bounded_path)) = - pred.bounded_ty.kind - else { - continue; - }; - // Match the expected res. - if bounded_path.res != expected_res { - continue; - } - for pred in pred.bounds { - match pred { - hir::GenericBound::Trait(poly_trait_ref) => { - if let Some(def_id) = - poly_trait_ref.trait_ref.trait_def_id() - { - yield def_id; - } + if let Some(hir_id) = hir_id { + let node = self.tcx.hir_node(hir_id); + // If this is a `Self` bound in a trait, yield the trait itself. + // Specifically, we don't need to look at any supertraits since + // we already do that in `BoundVarContext::supertrait_hrtb_vars`. + if let Res::SelfTyParam { trait_: _ } = expected_res + && let hir::Node::Item(item) = node + && let hir::ItemKind::Trait(..) = item.kind + { + // Yield the trait's def id. Supertraits will be + // elaborated from that. + yield item.owner_id.def_id.to_def_id(); + } else if let Some(generics) = node.generics() { + for pred in generics.predicates { + let hir::WherePredicateKind::BoundPredicate(pred) = pred.kind else { + continue; + }; + let hir::TyKind::Path(hir::QPath::Resolved(None, bounded_path)) = + pred.bounded_ty.kind + else { + continue; + }; + // Match the expected res. + if bounded_path.res != expected_res { + continue; + } + for pred in pred.bounds { + match pred { + hir::GenericBound::Trait(poly_trait_ref) => { + if let Some(def_id) = + poly_trait_ref.trait_ref.trait_def_id() + { + yield def_id; } - hir::GenericBound::Outlives(_) - | hir::GenericBound::Use(_, _) => {} } + hir::GenericBound::Outlives(_) + | hir::GenericBound::Use(_, _) => {} } } } } - - match *scope { - Scope::Binder { s, .. } - | Scope::Body { s, .. } - | Scope::ObjectLifetimeDefault { s, .. } - | Scope::Supertrait { s, .. } - | Scope::TraitRefBoundary { s } - | Scope::LateBoundary { s, .. } - | Scope::Opaque { s, .. } => { - scope = s; - } - Scope::Root { .. } => break, - } } - }, - ) + + match *scope { + Scope::Binder { s, .. } + | Scope::Body { s, .. } + | Scope::ObjectLifetimeDefault { s, .. } + | Scope::Supertrait { s, .. } + | Scope::TraitRefBoundary { s } + | Scope::LateBoundary { s, .. } + | Scope::Opaque { s, .. } => { + scope = s; + } + Scope::Root { .. } => break, + } + } + } } } diff --git a/compiler/rustc_hir_analysis/src/errors.rs b/compiler/rustc_hir_analysis/src/errors.rs index 318aaab50f4d..c1c828392126 100644 --- a/compiler/rustc_hir_analysis/src/errors.rs +++ b/compiler/rustc_hir_analysis/src/errors.rs @@ -127,6 +127,7 @@ pub(crate) enum AssocItemNotFoundSugg<'a> { SimilarInOtherTrait { #[primary_span] span: Span, + trait_name: &'a str, assoc_kind: &'static str, suggested_name: Symbol, }, @@ -317,6 +318,13 @@ pub(crate) struct TraitObjectDeclaredWithNoTraits { pub trait_alias_span: Option, } +#[derive(Diagnostic)] +#[diag(hir_analysis_pointee_sized_trait_object)] +pub(crate) struct PointeeSizedTraitObject { + #[primary_span] + pub span: Span, +} + #[derive(Diagnostic)] #[diag(hir_analysis_ambiguous_lifetime_bound, code = E0227)] pub(crate) struct AmbiguousLifetimeBound { diff --git a/compiler/rustc_hir_analysis/src/errors/wrong_number_of_generic_args.rs b/compiler/rustc_hir_analysis/src/errors/wrong_number_of_generic_args.rs index a3c8ce620b36..ef789743e06f 100644 --- a/compiler/rustc_hir_analysis/src/errors/wrong_number_of_generic_args.rs +++ b/compiler/rustc_hir_analysis/src/errors/wrong_number_of_generic_args.rs @@ -635,7 +635,7 @@ impl<'a, 'tcx> WrongNumberOfGenericArgs<'a, 'tcx> { self.suggest_adding_type_and_const_args(err); } ExcessTypesOrConsts { .. } => { - // this can happen with `~const T` where T isn't a const_trait. + // this can happen with `[const] T` where T isn't a const_trait. } _ => unreachable!(), } diff --git a/compiler/rustc_hir_analysis/src/hir_ty_lowering/bounds.rs b/compiler/rustc_hir_analysis/src/hir_ty_lowering/bounds.rs index ea1dfdfd8061..d17986d45d2f 100644 --- a/compiler/rustc_hir_analysis/src/hir_ty_lowering/bounds.rs +++ b/compiler/rustc_hir_analysis/src/hir_ty_lowering/bounds.rs @@ -5,14 +5,14 @@ use rustc_errors::codes::*; use rustc_errors::struct_span_code_err; use rustc_hir as hir; use rustc_hir::def::{DefKind, Res}; -use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_hir::def_id::{CRATE_DEF_ID, DefId, LocalDefId}; use rustc_hir::{AmbigArg, LangItem, PolyTraitRef}; use rustc_middle::bug; use rustc_middle::ty::{ self as ty, IsSuggestable, Ty, TyCtxt, TypeSuperVisitable, TypeVisitable, TypeVisitableExt, TypeVisitor, Upcast, }; -use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw}; +use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw, sym}; use rustc_trait_selection::traits; use smallvec::SmallVec; use tracing::{debug, instrument}; @@ -188,6 +188,11 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { ) { let tcx = self.tcx(); + // Skip adding any default bounds if `#![rustc_no_implicit_bounds]` + if tcx.has_attr(CRATE_DEF_ID, sym::rustc_no_implicit_bounds) { + return; + } + let meta_sized_did = tcx.require_lang_item(LangItem::MetaSized, span); let pointee_sized_did = tcx.require_lang_item(LangItem::PointeeSized, span); @@ -408,24 +413,21 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { let tcx = self.tcx(); let trait_id = tcx.lang_items().get(trait_); if let Some(trait_id) = trait_id - && self.do_not_provide_default_trait_bound( - trait_id, - hir_bounds, - self_ty_where_predicates, - ) + && self.should_add_default_traits(trait_id, hir_bounds, self_ty_where_predicates) { add_trait_bound(tcx, bounds, self_ty, trait_id, span); } } - fn do_not_provide_default_trait_bound<'a>( + /// Returns `true` if default trait bound should be added. + fn should_add_default_traits<'a>( &self, trait_def_id: DefId, hir_bounds: &'a [hir::GenericBound<'tcx>], self_ty_where_predicates: Option<(LocalDefId, &'tcx [hir::WherePredicate<'tcx>])>, ) -> bool { let collected = collect_bounds(hir_bounds, self_ty_where_predicates, trait_def_id); - !collected.any() + !self.tcx().has_attr(CRATE_DEF_ID, sym::rustc_no_implicit_bounds) && !collected.any() } /// Lower HIR bounds into `bounds` given the self type `param_ty` and the overarching late-bound vars if any. @@ -492,7 +494,7 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { ); } hir::GenericBound::Outlives(lifetime) => { - // `ConstIfConst` is only interested in `~const` bounds. + // `ConstIfConst` is only interested in `[const]` bounds. if matches!( predicate_filter, PredicateFilter::ConstIfConst | PredicateFilter::SelfConstIfConst @@ -708,7 +710,7 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { } // SelfTraitThatDefines is only interested in trait predicates. PredicateFilter::SelfTraitThatDefines(_) => {} - // `ConstIfConst` is only interested in `~const` bounds. + // `ConstIfConst` is only interested in `[const]` bounds. PredicateFilter::ConstIfConst | PredicateFilter::SelfConstIfConst => {} } } diff --git a/compiler/rustc_hir_analysis/src/hir_ty_lowering/cmse.rs b/compiler/rustc_hir_analysis/src/hir_ty_lowering/cmse.rs index ebeb3b58208e..82e5f65476ff 100644 --- a/compiler/rustc_hir_analysis/src/hir_ty_lowering/cmse.rs +++ b/compiler/rustc_hir_analysis/src/hir_ty_lowering/cmse.rs @@ -18,7 +18,7 @@ pub(crate) fn validate_cmse_abi<'tcx>( fn_sig: ty::PolyFnSig<'tcx>, ) { match abi { - ExternAbi::CCmseNonSecureCall => { + ExternAbi::CmseNonSecureCall => { let hir_node = tcx.hir_node(hir_id); let hir::Node::Ty(hir::Ty { span: bare_fn_span, @@ -38,7 +38,7 @@ pub(crate) fn validate_cmse_abi<'tcx>( dcx, span, E0781, - "the `\"C-cmse-nonsecure-call\"` ABI is only allowed on function pointers" + "the `\"cmse-nonsecure-call\"` ABI is only allowed on function pointers" ) .emit(); return; @@ -78,7 +78,7 @@ pub(crate) fn validate_cmse_abi<'tcx>( } }; } - ExternAbi::CCmseNonSecureEntry => { + ExternAbi::CmseNonSecureEntry => { let hir_node = tcx.hir_node(hir_id); let Some(hir::FnSig { decl, span: fn_sig_span, .. }) = hir_node.fn_sig() else { // might happen when this ABI is used incorrectly. That will be handled elsewhere @@ -203,11 +203,11 @@ fn should_emit_generic_error<'tcx>(abi: ExternAbi, layout_err: &'tcx LayoutError match layout_err { TooGeneric(ty) => { match abi { - ExternAbi::CCmseNonSecureCall => { + ExternAbi::CmseNonSecureCall => { // prevent double reporting of this error !ty.is_impl_trait() } - ExternAbi::CCmseNonSecureEntry => true, + ExternAbi::CmseNonSecureEntry => true, _ => bug!("invalid ABI: {abi}"), } } diff --git a/compiler/rustc_hir_analysis/src/hir_ty_lowering/dyn_compatibility.rs b/compiler/rustc_hir_analysis/src/hir_ty_lowering/dyn_compatibility.rs index 05465b47a26a..cb106962be18 100644 --- a/compiler/rustc_hir_analysis/src/hir_ty_lowering/dyn_compatibility.rs +++ b/compiler/rustc_hir_analysis/src/hir_ty_lowering/dyn_compatibility.rs @@ -2,6 +2,7 @@ use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet}; use rustc_errors::codes::*; use rustc_errors::struct_span_code_err; use rustc_hir as hir; +use rustc_hir::LangItem; use rustc_hir::def::{DefKind, Res}; use rustc_lint_defs::builtin::UNUSED_ASSOCIATED_TYPE_BOUNDS; use rustc_middle::ty::elaborate::ClauseWithSupertraitSpan; @@ -69,7 +70,7 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { .into_iter() .partition(|(trait_ref, _)| !tcx.trait_is_auto(trait_ref.def_id())); - // We don't support empty trait objects. + // We don't support empty trait objects. if regular_traits.is_empty() && auto_traits.is_empty() { let guar = self.report_trait_object_with_no_traits(span, user_written_bounds.iter().copied()); @@ -80,6 +81,13 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { let guar = self.report_trait_object_addition_traits(®ular_traits); return Ty::new_error(tcx, guar); } + // We don't support `PointeeSized` principals + let pointee_sized_did = tcx.require_lang_item(LangItem::PointeeSized, span); + if regular_traits.iter().any(|(pred, _)| pred.def_id() == pointee_sized_did) { + let guar = self.report_pointee_sized_trait_object(span); + return Ty::new_error(tcx, guar); + } + // Don't create a dyn trait if we have errors in the principal. if let Err(guar) = regular_traits.error_reported() { return Ty::new_error(tcx, guar); diff --git a/compiler/rustc_hir_analysis/src/hir_ty_lowering/errors.rs b/compiler/rustc_hir_analysis/src/hir_ty_lowering/errors.rs index 0e79a8918b05..5d85a3f8455e 100644 --- a/compiler/rustc_hir_analysis/src/hir_ty_lowering/errors.rs +++ b/compiler/rustc_hir_analysis/src/hir_ty_lowering/errors.rs @@ -29,7 +29,7 @@ use tracing::debug; use super::InherentAssocCandidate; use crate::errors::{ self, AssocItemConstraintsNotAllowedHere, ManualImplementation, MissingTypeParams, - ParenthesizedFnTraitExpansion, TraitObjectDeclaredWithNoTraits, + ParenthesizedFnTraitExpansion, PointeeSizedTraitObject, TraitObjectDeclaredWithNoTraits, }; use crate::fluent_generated as fluent; use crate::hir_ty_lowering::{AssocItemQSelf, HirTyLowerer}; @@ -309,6 +309,7 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { // change the associated item. err.sugg = Some(errors::AssocItemNotFoundSugg::SimilarInOtherTrait { span: assoc_ident.span, + trait_name: &trait_name, assoc_kind: assoc_kind_str, suggested_name, }); @@ -1409,6 +1410,10 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { self.dcx().emit_err(TraitObjectDeclaredWithNoTraits { span, trait_alias_span }) } + + pub(super) fn report_pointee_sized_trait_object(&self, span: Span) -> ErrorGuaranteed { + self.dcx().emit_err(PointeeSizedTraitObject { span }) + } } /// Emit an error for the given associated item constraint. diff --git a/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs b/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs index b99f7b44661e..74739355e1fc 100644 --- a/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs +++ b/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs @@ -51,7 +51,7 @@ use rustc_trait_selection::traits::wf::object_region_bounds; use rustc_trait_selection::traits::{self, FulfillmentError}; use tracing::{debug, instrument}; -use crate::check::check_abi_fn_ptr; +use crate::check::check_abi; use crate::errors::{AmbiguousLifetimeBound, BadReturnTypeNotation}; use crate::hir_ty_lowering::errors::{GenericsArgsErrExtend, prohibit_assoc_item_constraint}; use crate::hir_ty_lowering::generics::{check_generic_arg_count, lower_generic_args}; @@ -80,10 +80,10 @@ pub enum PredicateFilter { /// and `::A: B`. SelfAndAssociatedTypeBounds, - /// Filter only the `~const` bounds, which are lowered into `HostEffect` clauses. + /// Filter only the `[const]` bounds, which are lowered into `HostEffect` clauses. ConstIfConst, - /// Filter only the `~const` bounds which are *also* in the supertrait position. + /// Filter only the `[const]` bounds which are *also* in the supertrait position. SelfConstIfConst, } @@ -885,7 +885,7 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { } } // On the flip side, when filtering `ConstIfConst` bounds, we only need to convert - // `~const` bounds. All other predicates are handled in their respective queries. + // `[const]` bounds. All other predicates are handled in their respective queries. // // Note that like `PredicateFilter::SelfOnly`, we don't need to do any filtering // here because we only call this on self bounds, and deal with the recursive case @@ -2660,7 +2660,7 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { if let hir::Node::Ty(hir::Ty { kind: hir::TyKind::BareFn(bare_fn_ty), span, .. }) = tcx.hir_node(hir_id) { - check_abi_fn_ptr(tcx, hir_id, *span, bare_fn_ty.abi); + check_abi(tcx, hir_id, *span, bare_fn_ty.abi); } // reject function types that violate cmse ABI requirements diff --git a/compiler/rustc_hir_analysis/src/hir_wf_check.rs b/compiler/rustc_hir_analysis/src/hir_wf_check.rs index 4633f3951a78..fef0dbf2ece9 100644 --- a/compiler/rustc_hir_analysis/src/hir_wf_check.rs +++ b/compiler/rustc_hir_analysis/src/hir_wf_check.rs @@ -1,7 +1,8 @@ +use rustc_hir::def::DefKind; use rustc_hir::intravisit::{self, Visitor, VisitorExt}; use rustc_hir::{self as hir, AmbigArg, ForeignItem, ForeignItemKind}; use rustc_infer::infer::TyCtxtInferExt; -use rustc_infer::traits::{ObligationCause, WellFormedLoc}; +use rustc_infer::traits::{ObligationCause, ObligationCauseCode, WellFormedLoc}; use rustc_middle::bug; use rustc_middle::query::Providers; use rustc_middle::ty::{self, TyCtxt, TypeVisitableExt, TypingMode, fold_regions}; @@ -107,6 +108,17 @@ fn diagnostic_hir_wf_check<'tcx>( // over less-specific types (e.g. `Option>`) if self.depth >= self.cause_depth { self.cause = Some(error.obligation.cause); + if let hir::TyKind::TraitObject(..) = ty.kind { + if let DefKind::AssocTy | DefKind::AssocConst | DefKind::AssocFn = + self.tcx.def_kind(self.def_id) + { + self.cause = Some(ObligationCause::new( + ty.span, + self.def_id, + ObligationCauseCode::DynCompatible(ty.span), + )); + } + } self.cause_depth = self.depth } } diff --git a/compiler/rustc_hir_analysis/src/impl_wf_check/min_specialization.rs b/compiler/rustc_hir_analysis/src/impl_wf_check/min_specialization.rs index 309221f9a127..574d19a5aa5a 100644 --- a/compiler/rustc_hir_analysis/src/impl_wf_check/min_specialization.rs +++ b/compiler/rustc_hir_analysis/src/impl_wf_check/min_specialization.rs @@ -402,22 +402,22 @@ fn check_predicates<'tcx>( /// as some predicate on the base impl (`predicate2`). /// /// This basically just checks syntactic equivalence, but is a little more -/// forgiving since we want to equate `T: Tr` with `T: ~const Tr` so this can work: +/// forgiving since we want to equate `T: Tr` with `T: [const] Tr` so this can work: /// /// ```ignore (illustrative) /// #[rustc_specialization_trait] /// trait Specialize { } /// /// impl Tr for T { } -/// impl const Tr for T { } +/// impl const Tr for T { } /// ``` /// /// However, we *don't* want to allow the reverse, i.e., when the bound on the /// specializing impl is not as const as the bound on the base impl: /// /// ```ignore (illustrative) -/// impl const Tr for T { } -/// impl const Tr for T { } // should be T: ~const Bound +/// impl const Tr for T { } +/// impl const Tr for T { } // should be T: [const] Bound /// ``` /// /// So we make that check in this function and try to raise a helpful error message. diff --git a/compiler/rustc_hir_analysis/src/lib.rs b/compiler/rustc_hir_analysis/src/lib.rs index 7c8c9425a03d..76ab2e57a1b5 100644 --- a/compiler/rustc_hir_analysis/src/lib.rs +++ b/compiler/rustc_hir_analysis/src/lib.rs @@ -62,8 +62,8 @@ This API is completely unstable and subject to change. #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] #![doc(rust_logo)] #![feature(assert_matches)] -#![feature(coroutines)] #![feature(debug_closure_helpers)] +#![feature(gen_blocks)] #![feature(if_let_guard)] #![feature(iter_from_coroutine)] #![feature(iter_intersperse)] diff --git a/compiler/rustc_hir_pretty/src/lib.rs b/compiler/rustc_hir_pretty/src/lib.rs index fc507285860e..087167dfd90b 100644 --- a/compiler/rustc_hir_pretty/src/lib.rs +++ b/compiler/rustc_hir_pretty/src/lib.rs @@ -10,7 +10,7 @@ use std::vec; use rustc_abi::ExternAbi; use rustc_ast::util::parser::{self, ExprPrecedence, Fixity}; -use rustc_ast::{AttrStyle, DUMMY_NODE_ID, DelimArgs}; +use rustc_ast::{DUMMY_NODE_ID, DelimArgs}; use rustc_ast_pretty::pp::Breaks::{Consistent, Inconsistent}; use rustc_ast_pretty::pp::{self, BoxMarker, Breaks}; use rustc_ast_pretty::pprust::state::MacHeader; @@ -22,7 +22,7 @@ use rustc_hir::{ TyPatKind, }; use rustc_span::source_map::SourceMap; -use rustc_span::{FileName, Ident, Span, Symbol, kw}; +use rustc_span::{FileName, Ident, Span, Symbol, kw, sym}; use {rustc_ast as ast, rustc_hir as hir}; pub fn id_to_string(cx: &dyn rustc_hir::intravisit::HirTyCtxt<'_>, hir_id: HirId) -> String { @@ -81,32 +81,24 @@ impl<'a> State<'a> { } fn precedence(&self, expr: &hir::Expr<'_>) -> ExprPrecedence { - let for_each_attr = |id: HirId, callback: &mut dyn FnMut(&hir::Attribute)| { - self.attrs(id).iter().for_each(callback); - }; - expr.precedence(&for_each_attr) + let has_attr = |id: HirId| !self.attrs(id).is_empty(); + expr.precedence(&has_attr) } - fn print_attrs_as_inner(&mut self, attrs: &[hir::Attribute]) { - self.print_either_attributes(attrs, ast::AttrStyle::Inner) - } - - fn print_attrs_as_outer(&mut self, attrs: &[hir::Attribute]) { - self.print_either_attributes(attrs, ast::AttrStyle::Outer) - } - - fn print_either_attributes(&mut self, attrs: &[hir::Attribute], style: ast::AttrStyle) { + fn print_attrs(&mut self, attrs: &[hir::Attribute]) { if attrs.is_empty() { return; } for attr in attrs { - self.print_attribute_inline(attr, style); + self.print_attribute_as_style(attr, ast::AttrStyle::Outer); } self.hardbreak_if_not_bol(); } - fn print_attribute_inline(&mut self, attr: &hir::Attribute, style: AttrStyle) { + /// Print a single attribute as if it has style `style`, disregarding the + /// actual style of the attribute. + fn print_attribute_as_style(&mut self, attr: &hir::Attribute, style: ast::AttrStyle) { match &attr { hir::Attribute::Unparsed(unparsed) => { self.maybe_print_comment(unparsed.span.lo()); @@ -118,14 +110,17 @@ impl<'a> State<'a> { self.word("]"); self.hardbreak() } - hir::Attribute::Parsed(AttributeKind::DocComment { style, kind, comment, .. }) => { + hir::Attribute::Parsed(AttributeKind::DocComment { kind, comment, .. }) => { self.word(rustc_ast_pretty::pprust::state::doc_comment_to_string( - *kind, *style, *comment, + *kind, style, *comment, )); self.hardbreak() } hir::Attribute::Parsed(pa) => { - self.word("#[attr = "); + match style { + ast::AttrStyle::Inner => self.word("#![attr = "), + ast::AttrStyle::Outer => self.word("#[attr = "), + } pa.print_attribute(self); self.word("]"); self.hardbreak() @@ -281,10 +276,17 @@ pub fn print_crate<'a>( ann, }; + // Print all attributes, regardless of actual style, as inner attributes + // since this is the crate root with nothing above it to print outer + // attributes. + for attr in s.attrs(hir::CRATE_HIR_ID) { + s.print_attribute_as_style(attr, ast::AttrStyle::Inner); + } + // When printing the AST, we sometimes need to inject `#[no_std]` here. // Since you can't compile the HIR, it's not necessary. - s.print_mod(krate, (*attrs)(hir::CRATE_HIR_ID)); + s.print_mod(krate); s.print_remaining_comments(); s.s.eof() } @@ -299,7 +301,7 @@ where } pub fn attribute_to_string(ann: &dyn PpAnn, attr: &hir::Attribute) -> String { - to_string(ann, |s| s.print_attribute_inline(attr, AttrStyle::Outer)) + to_string(ann, |s| s.print_attribute_as_style(attr, ast::AttrStyle::Outer)) } pub fn ty_to_string(ann: &dyn PpAnn, ty: &hir::Ty<'_>) -> String { @@ -361,8 +363,7 @@ impl<'a> State<'a> { self.commasep_cmnt(b, exprs, |s, e| s.print_expr(e), |e| e.span); } - fn print_mod(&mut self, _mod: &hir::Mod<'_>, attrs: &[hir::Attribute]) { - self.print_attrs_as_inner(attrs); + fn print_mod(&mut self, _mod: &hir::Mod<'_>) { for &item_id in _mod.item_ids { self.ann.nested(self, Nested::Item(item_id)); } @@ -479,7 +480,7 @@ impl<'a> State<'a> { fn print_foreign_item(&mut self, item: &hir::ForeignItem<'_>) { self.hardbreak_if_not_bol(); self.maybe_print_comment(item.span.lo()); - self.print_attrs_as_outer(self.attrs(item.hir_id())); + self.print_attrs(self.attrs(item.hir_id())); match item.kind { hir::ForeignItemKind::Fn(sig, arg_idents, generics) => { let (cb, ib) = self.head(""); @@ -565,7 +566,7 @@ impl<'a> State<'a> { self.hardbreak_if_not_bol(); self.maybe_print_comment(item.span.lo()); let attrs = self.attrs(item.hir_id()); - self.print_attrs_as_outer(attrs); + self.print_attrs(attrs); self.ann.pre(self, AnnNode::Item(item)); match item.kind { hir::ItemKind::ExternCrate(orig_name, ident) => { @@ -647,14 +648,13 @@ impl<'a> State<'a> { self.print_ident(ident); self.nbsp(); self.bopen(ib); - self.print_mod(mod_, attrs); + self.print_mod(mod_); self.bclose(item.span, cb); } hir::ItemKind::ForeignMod { abi, items } => { let (cb, ib) = self.head("extern"); self.word_nbsp(abi.to_string()); self.bopen(ib); - self.print_attrs_as_inner(self.attrs(item.hir_id())); for item in items { self.ann.nested(self, Nested::ForeignItem(item.id)); } @@ -731,7 +731,6 @@ impl<'a> State<'a> { self.space(); self.bopen(ib); - self.print_attrs_as_inner(attrs); for impl_item in items { self.ann.nested(self, Nested::ImplItem(impl_item.id)); } @@ -785,7 +784,7 @@ impl<'a> State<'a> { match constness { hir::BoundConstness::Never => {} hir::BoundConstness::Always(_) => self.word("const"), - hir::BoundConstness::Maybe(_) => self.word("~const"), + hir::BoundConstness::Maybe(_) => self.word("[const]"), } match polarity { hir::BoundPolarity::Positive => {} @@ -822,7 +821,7 @@ impl<'a> State<'a> { for v in variants { self.space_if_not_bol(); self.maybe_print_comment(v.span.lo()); - self.print_attrs_as_outer(self.attrs(v.hir_id)); + self.print_attrs(self.attrs(v.hir_id)); let ib = self.ibox(INDENT_UNIT); self.print_variant(v); self.word(","); @@ -857,7 +856,7 @@ impl<'a> State<'a> { self.popen(); self.commasep(Inconsistent, struct_def.fields(), |s, field| { s.maybe_print_comment(field.span.lo()); - s.print_attrs_as_outer(s.attrs(field.hir_id)); + s.print_attrs(s.attrs(field.hir_id)); s.print_type(field.ty); }); self.pclose(); @@ -878,7 +877,7 @@ impl<'a> State<'a> { for field in struct_def.fields() { self.hardbreak_if_not_bol(); self.maybe_print_comment(field.span.lo()); - self.print_attrs_as_outer(self.attrs(field.hir_id)); + self.print_attrs(self.attrs(field.hir_id)); self.print_ident(field.ident); self.word_nbsp(":"); self.print_type(field.ty); @@ -916,7 +915,7 @@ impl<'a> State<'a> { self.ann.pre(self, AnnNode::SubItem(ti.hir_id())); self.hardbreak_if_not_bol(); self.maybe_print_comment(ti.span.lo()); - self.print_attrs_as_outer(self.attrs(ti.hir_id())); + self.print_attrs(self.attrs(ti.hir_id())); match ti.kind { hir::TraitItemKind::Const(ty, default) => { self.print_associated_const(ti.ident, ti.generics, ty, default); @@ -944,7 +943,7 @@ impl<'a> State<'a> { self.ann.pre(self, AnnNode::SubItem(ii.hir_id())); self.hardbreak_if_not_bol(); self.maybe_print_comment(ii.span.lo()); - self.print_attrs_as_outer(self.attrs(ii.hir_id())); + self.print_attrs(self.attrs(ii.hir_id())); match ii.kind { hir::ImplItemKind::Const(ty, expr) => { @@ -1028,27 +1027,16 @@ impl<'a> State<'a> { } fn print_block(&mut self, blk: &hir::Block<'_>, cb: BoxMarker, ib: BoxMarker) { - self.print_block_with_attrs(blk, &[], cb, ib) + self.print_block_maybe_unclosed(blk, Some(cb), ib) } fn print_block_unclosed(&mut self, blk: &hir::Block<'_>, ib: BoxMarker) { - self.print_block_maybe_unclosed(blk, &[], None, ib) - } - - fn print_block_with_attrs( - &mut self, - blk: &hir::Block<'_>, - attrs: &[hir::Attribute], - cb: BoxMarker, - ib: BoxMarker, - ) { - self.print_block_maybe_unclosed(blk, attrs, Some(cb), ib) + self.print_block_maybe_unclosed(blk, None, ib) } fn print_block_maybe_unclosed( &mut self, blk: &hir::Block<'_>, - attrs: &[hir::Attribute], cb: Option, ib: BoxMarker, ) { @@ -1060,8 +1048,6 @@ impl<'a> State<'a> { self.ann.pre(self, AnnNode::Block(blk)); self.bopen(ib); - self.print_attrs_as_inner(attrs); - for st in blk.stmts { self.print_stmt(st); } @@ -1251,7 +1237,7 @@ impl<'a> State<'a> { fn print_expr_field(&mut self, field: &hir::ExprField<'_>) { let cb = self.cbox(INDENT_UNIT); - self.print_attrs_as_outer(self.attrs(field.hir_id)); + self.print_attrs(self.attrs(field.hir_id)); if !field.is_shorthand { self.print_ident(field.ident); self.word_space(":"); @@ -1349,6 +1335,10 @@ impl<'a> State<'a> { self.word_nbsp("raw"); self.print_mutability(mutability, true); } + hir::BorrowKind::Pin => { + self.word_nbsp("pin"); + self.print_mutability(mutability, true); + } } self.print_expr_cond_paren(expr, self.precedence(expr) < ExprPrecedence::Prefix); } @@ -1451,7 +1441,7 @@ impl<'a> State<'a> { fn print_expr(&mut self, expr: &hir::Expr<'_>) { self.maybe_print_comment(expr.span.lo()); - self.print_attrs_as_outer(self.attrs(expr.hir_id)); + self.print_attrs(self.attrs(expr.hir_id)); let ib = self.ibox(INDENT_UNIT); self.ann.pre(self, AnnNode::Expr(expr)); match expr.kind { @@ -1517,7 +1507,7 @@ impl<'a> State<'a> { self.bopen(ib); // Print `let _t = $init;`: - let temp = Ident::from_str("_t"); + let temp = Ident::with_dummy_span(sym::_t); self.print_local(false, Some(init), None, |this| this.print_ident(temp)); self.word(";"); @@ -2076,7 +2066,7 @@ impl<'a> State<'a> { self.space(); } let cb = self.cbox(INDENT_UNIT); - self.print_attrs_as_outer(self.attrs(field.hir_id)); + self.print_attrs(self.attrs(field.hir_id)); if !field.is_shorthand { self.print_ident(field.ident); self.word_nbsp(":"); @@ -2086,7 +2076,7 @@ impl<'a> State<'a> { } fn print_param(&mut self, arg: &hir::Param<'_>) { - self.print_attrs_as_outer(self.attrs(arg.hir_id)); + self.print_attrs(self.attrs(arg.hir_id)); self.print_pat(arg.pat); } @@ -2121,7 +2111,7 @@ impl<'a> State<'a> { let cb = self.cbox(INDENT_UNIT); self.ann.pre(self, AnnNode::Arm(arm)); let ib = self.ibox(0); - self.print_attrs_as_outer(self.attrs(arm.hir_id)); + self.print_attrs(self.attrs(arm.hir_id)); self.print_pat(arm.pat); self.space(); if let Some(ref g) = arm.guard { @@ -2409,7 +2399,7 @@ impl<'a> State<'a> { } fn print_where_predicate(&mut self, predicate: &hir::WherePredicate<'_>) { - self.print_attrs_as_outer(self.attrs(predicate.hir_id)); + self.print_attrs(self.attrs(predicate.hir_id)); match *predicate.kind { hir::WherePredicateKind::BoundPredicate(hir::WhereBoundPredicate { bound_generic_params, diff --git a/compiler/rustc_hir_typeck/messages.ftl b/compiler/rustc_hir_typeck/messages.ftl index ac7ff65528d9..c21b16c9f9f0 100644 --- a/compiler/rustc_hir_typeck/messages.ftl +++ b/compiler/rustc_hir_typeck/messages.ftl @@ -1,6 +1,6 @@ -hir_typeck_abi_custom_call = - functions with the `"custom"` ABI cannot be called - .note = an `extern "custom"` function can only be called from within inline assembly +hir_typeck_abi_cannot_be_called = + functions with the {$abi} ABI cannot be called + .note = an `extern {$abi}` function can only be called using inline assembly hir_typeck_add_missing_parentheses_in_range = you must surround the range in parentheses to call its `{$func_name}` function @@ -79,6 +79,9 @@ hir_typeck_cast_unknown_pointer = cannot cast {$to -> .note = the type information given here is insufficient to check whether the pointer cast is valid .label_from = the type information given here is insufficient to check whether the pointer cast is valid +hir_typeck_const_continue_bad_label = + `#[const_continue]` must break to a labeled block that participates in a `#[loop_match]` + hir_typeck_const_select_must_be_const = this argument must be a `const fn` .help = consult the documentation on `const_eval_select` for more information diff --git a/compiler/rustc_hir_typeck/src/_match.rs b/compiler/rustc_hir_typeck/src/_match.rs index 4ac260cb15f4..6467adb54dab 100644 --- a/compiler/rustc_hir_typeck/src/_match.rs +++ b/compiler/rustc_hir_typeck/src/_match.rs @@ -1,12 +1,12 @@ use rustc_errors::{Applicability, Diag}; use rustc_hir::def::{CtorOf, DefKind, Res}; use rustc_hir::def_id::LocalDefId; -use rustc_hir::{self as hir, ExprKind, PatKind}; +use rustc_hir::{self as hir, ExprKind, HirId, PatKind}; use rustc_hir_pretty::ty_to_string; use rustc_middle::ty::{self, Ty}; use rustc_span::Span; use rustc_trait_selection::traits::{ - IfExpressionCause, MatchExpressionArmCause, ObligationCause, ObligationCauseCode, + MatchExpressionArmCause, ObligationCause, ObligationCauseCode, }; use tracing::{debug, instrument}; @@ -414,105 +414,16 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { pub(crate) fn if_cause( &self, - span: Span, - cond_span: Span, - then_expr: &'tcx hir::Expr<'tcx>, + expr_id: HirId, else_expr: &'tcx hir::Expr<'tcx>, - then_ty: Ty<'tcx>, - else_ty: Ty<'tcx>, tail_defines_return_position_impl_trait: Option, ) -> ObligationCause<'tcx> { - let mut outer_span = if self.tcx.sess.source_map().is_multiline(span) { - // The `if`/`else` isn't in one line in the output, include some context to make it - // clear it is an if/else expression: - // ``` - // LL | let x = if true { - // | _____________- - // LL || 10i32 - // || ----- expected because of this - // LL || } else { - // LL || 10u32 - // || ^^^^^ expected `i32`, found `u32` - // LL || }; - // ||_____- `if` and `else` have incompatible types - // ``` - Some(span) - } else { - // The entire expression is in one line, only point at the arms - // ``` - // LL | let x = if true { 10i32 } else { 10u32 }; - // | ----- ^^^^^ expected `i32`, found `u32` - // | | - // | expected because of this - // ``` - None - }; - - let (error_sp, else_id) = if let ExprKind::Block(block, _) = &else_expr.kind { - let block = block.innermost_block(); - - // Avoid overlapping spans that aren't as readable: - // ``` - // 2 | let x = if true { - // | _____________- - // 3 | | 3 - // | | - expected because of this - // 4 | | } else { - // | |____________^ - // 5 | || - // 6 | || }; - // | || ^ - // | ||_____| - // | |______if and else have incompatible types - // | expected integer, found `()` - // ``` - // by not pointing at the entire expression: - // ``` - // 2 | let x = if true { - // | ------- `if` and `else` have incompatible types - // 3 | 3 - // | - expected because of this - // 4 | } else { - // | ____________^ - // 5 | | - // 6 | | }; - // | |_____^ expected integer, found `()` - // ``` - if block.expr.is_none() - && block.stmts.is_empty() - && let Some(outer_span) = &mut outer_span - && let Some(cond_span) = cond_span.find_ancestor_inside(*outer_span) - { - *outer_span = outer_span.with_hi(cond_span.hi()) - } - - (self.find_block_span(block), block.hir_id) - } else { - (else_expr.span, else_expr.hir_id) - }; - - let then_id = if let ExprKind::Block(block, _) = &then_expr.kind { - let block = block.innermost_block(); - // Exclude overlapping spans - if block.expr.is_none() && block.stmts.is_empty() { - outer_span = None; - } - block.hir_id - } else { - then_expr.hir_id - }; + let error_sp = self.find_block_span_from_hir_id(else_expr.hir_id); // Finally construct the cause: self.cause( error_sp, - ObligationCauseCode::IfExpression(Box::new(IfExpressionCause { - else_id, - then_id, - then_ty, - else_ty, - outer_span, - tail_defines_return_position_impl_trait, - })), + ObligationCauseCode::IfExpression { expr_id, tail_defines_return_position_impl_trait }, ) } diff --git a/compiler/rustc_hir_typeck/src/callee.rs b/compiler/rustc_hir_typeck/src/callee.rs index 80bff09d0a43..8c1399aec147 100644 --- a/compiler/rustc_hir_typeck/src/callee.rs +++ b/compiler/rustc_hir_typeck/src/callee.rs @@ -1,13 +1,13 @@ use std::iter; -use rustc_abi::ExternAbi; +use rustc_abi::{CanonAbi, ExternAbi}; use rustc_ast::util::parser::ExprPrecedence; use rustc_errors::{Applicability, Diag, ErrorGuaranteed, StashKey}; use rustc_hir::def::{self, CtorKind, Namespace, Res}; use rustc_hir::def_id::DefId; use rustc_hir::{self as hir, HirId, LangItem}; use rustc_hir_analysis::autoderef::Autoderef; -use rustc_infer::infer; +use rustc_infer::infer::BoundRegionConversionTime; use rustc_infer::traits::{Obligation, ObligationCause, ObligationCauseCode}; use rustc_middle::ty::adjustment::{ Adjust, Adjustment, AllowTwoPhase, AutoBorrow, AutoBorrowMutability, @@ -16,6 +16,7 @@ use rustc_middle::ty::{self, GenericArgsRef, Ty, TyCtxt, TypeVisitableExt}; use rustc_middle::{bug, span_bug}; use rustc_span::def_id::LocalDefId; use rustc_span::{Span, sym}; +use rustc_target::spec::{AbiMap, AbiMapping}; use rustc_trait_selection::error_reporting::traits::DefIdOrName; use rustc_trait_selection::infer::InferCtxtExt as _; use rustc_trait_selection::traits::query::evaluate_obligation::InferCtxtExt as _; @@ -84,7 +85,18 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { while result.is_none() && autoderef.next().is_some() { result = self.try_overloaded_call_step(call_expr, callee_expr, arg_exprs, &autoderef); } - self.check_call_custom_abi(autoderef.final_ty(false), call_expr.span); + + match autoderef.final_ty(false).kind() { + ty::FnDef(def_id, _) => { + let abi = self.tcx.fn_sig(def_id).skip_binder().skip_binder().abi; + self.check_call_abi(abi, call_expr.span); + } + ty::FnPtr(_, header) => { + self.check_call_abi(header.abi, call_expr.span); + } + _ => { /* cannot have a non-rust abi */ } + } + self.register_predicates(autoderef.into_obligations()); let output = match result { @@ -137,19 +149,46 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { output } - /// Functions of type `extern "custom" fn(/* ... */)` cannot be called using `ExprKind::Call`. + /// Can a function with this ABI be called with a rust call expression? /// - /// These functions have a calling convention that is unknown to rust, hence it cannot generate - /// code for the call. The only way to execute such a function is via inline assembly. - fn check_call_custom_abi(&self, callee_ty: Ty<'tcx>, span: Span) { - let abi = match callee_ty.kind() { - ty::FnDef(def_id, _) => self.tcx.fn_sig(def_id).skip_binder().skip_binder().abi, - ty::FnPtr(_, header) => header.abi, - _ => return, + /// Some ABIs cannot be called from rust, either because rust does not know how to generate + /// code for the call, or because a call does not semantically make sense. + pub(crate) fn check_call_abi(&self, abi: ExternAbi, span: Span) { + let canon_abi = match AbiMap::from_target(&self.sess().target).canonize_abi(abi, false) { + AbiMapping::Direct(canon_abi) | AbiMapping::Deprecated(canon_abi) => canon_abi, + AbiMapping::Invalid => { + // This should be reported elsewhere, but we want to taint this body + // so that we don't try to evaluate calls to ABIs that are invalid. + let guar = self.dcx().span_delayed_bug( + span, + format!("invalid abi for platform should have reported an error: {abi}"), + ); + self.set_tainted_by_errors(guar); + return; + } }; - if let ExternAbi::Custom = abi { - self.tcx.dcx().emit_err(errors::AbiCustomCall { span }); + let valid = match canon_abi { + // Rust doesn't know how to call functions with this ABI. + CanonAbi::Custom => false, + + // These is an entry point for the host, and cannot be called on the GPU. + CanonAbi::GpuKernel => false, + + // The interrupt ABIs should only be called by the CPU. They have complex + // pre- and postconditions, and can use non-standard instructions like `iret` on x86. + CanonAbi::Interrupt(_) => false, + + CanonAbi::C + | CanonAbi::Rust + | CanonAbi::RustCold + | CanonAbi::Arm(_) + | CanonAbi::X86(_) => true, + }; + + if !valid { + let err = crate::errors::AbiCannotBeCalled { span, abi }; + self.tcx.dcx().emit_err(err); } } @@ -180,7 +219,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { let closure_sig = args.as_closure().sig(); let closure_sig = self.instantiate_binder_with_fresh_vars( call_expr.span, - infer::FnCall, + BoundRegionConversionTime::FnCall, closure_sig, ); let adjustments = self.adjust_steps(autoderef); @@ -207,7 +246,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { let closure_args = args.as_coroutine_closure(); let coroutine_closure_sig = self.instantiate_binder_with_fresh_vars( call_expr.span, - infer::FnCall, + BoundRegionConversionTime::FnCall, closure_args.coroutine_closure_sig(), ); let tupled_upvars_ty = self.next_ty_var(callee_expr.span); @@ -506,7 +545,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { // renormalize the associated types at this point, since they // previously appeared within a `Binder<>` and hence would not // have been normalized before. - let fn_sig = self.instantiate_binder_with_fresh_vars(call_expr.span, infer::FnCall, fn_sig); + let fn_sig = self.instantiate_binder_with_fresh_vars( + call_expr.span, + BoundRegionConversionTime::FnCall, + fn_sig, + ); let fn_sig = self.normalize(call_expr.span, fn_sig); self.check_argument_types( @@ -873,7 +916,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { return; } - // If we have `rustc_do_not_const_check`, do not check `~const` bounds. + // If we have `rustc_do_not_const_check`, do not check `[const]` bounds. if self.tcx.has_attr(self.body_id, sym::rustc_do_not_const_check) { return; } diff --git a/compiler/rustc_hir_typeck/src/check.rs b/compiler/rustc_hir_typeck/src/check.rs index ac42eebf08c0..612396858841 100644 --- a/compiler/rustc_hir_typeck/src/check.rs +++ b/compiler/rustc_hir_typeck/src/check.rs @@ -58,7 +58,7 @@ pub(super) fn check_fn<'a, 'tcx>( let maybe_va_list = fn_sig.c_variadic.then(|| { let span = body.params.last().unwrap().span; let va_list_did = tcx.require_lang_item(LangItem::VaList, span); - let region = fcx.next_region_var(RegionVariableOrigin::MiscVariable(span)); + let region = fcx.next_region_var(RegionVariableOrigin::Misc(span)); tcx.type_of(va_list_did).instantiate(tcx, &[region.into()]) }); diff --git a/compiler/rustc_hir_typeck/src/coercion.rs b/compiler/rustc_hir_typeck/src/coercion.rs index 24092c01125f..6fa473d177db 100644 --- a/compiler/rustc_hir_typeck/src/coercion.rs +++ b/compiler/rustc_hir_typeck/src/coercion.rs @@ -44,10 +44,9 @@ use rustc_hir as hir; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_hir_analysis::hir_ty_lowering::HirTyLowerer; use rustc_infer::infer::relate::RelateResult; -use rustc_infer::infer::{Coercion, DefineOpaqueTypes, InferOk, InferResult}; +use rustc_infer::infer::{DefineOpaqueTypes, InferOk, InferResult, RegionVariableOrigin}; use rustc_infer::traits::{ - IfExpressionCause, MatchExpressionArmCause, Obligation, PredicateObligation, - PredicateObligations, SelectionError, + MatchExpressionArmCause, Obligation, PredicateObligation, PredicateObligations, SelectionError, }; use rustc_middle::span_bug; use rustc_middle::ty::adjustment::{ @@ -59,7 +58,7 @@ use rustc_span::{BytePos, DUMMY_SP, DesugaringKind, Span}; use rustc_trait_selection::infer::InferCtxtExt as _; use rustc_trait_selection::traits::query::evaluate_obligation::InferCtxtExt; use rustc_trait_selection::traits::{ - self, NormalizeExt, ObligationCause, ObligationCauseCode, ObligationCtxt, + self, ImplSource, NormalizeExt, ObligationCause, ObligationCauseCode, ObligationCtxt, }; use smallvec::{SmallVec, smallvec}; use tracing::{debug, instrument}; @@ -431,7 +430,7 @@ impl<'f, 'tcx> Coerce<'f, 'tcx> { } else { if r_borrow_var.is_none() { // create var lazily, at most once - let coercion = Coercion(span); + let coercion = RegionVariableOrigin::Coercion(span); let r = self.next_region_var(coercion); r_borrow_var = Some(r); // [4] above } @@ -549,7 +548,7 @@ impl<'f, 'tcx> Coerce<'f, 'tcx> { (&ty::Ref(_, ty_a, mutbl_a), &ty::Ref(_, _, mutbl_b)) => { coerce_mutbls(mutbl_a, mutbl_b)?; - let coercion = Coercion(self.cause.span); + let coercion = RegionVariableOrigin::Coercion(self.cause.span); let r_borrow = self.next_region_var(coercion); // We don't allow two-phase borrows here, at least for initial @@ -672,7 +671,7 @@ impl<'f, 'tcx> Coerce<'f, 'tcx> { return Err(TypeError::Mismatch); } } - Err(traits::Unimplemented) => { + Err(SelectionError::Unimplemented) => { debug!("coerce_unsized: early return - can't prove obligation"); return Err(TypeError::Mismatch); } @@ -704,6 +703,19 @@ impl<'f, 'tcx> Coerce<'f, 'tcx> { // be silent, as it causes a type mismatch later. } + Ok(Some(ImplSource::UserDefined(impl_source))) => { + queue.extend(impl_source.nested); + // Certain incoherent `CoerceUnsized` implementations may cause ICEs, + // so check the impl's validity. Taint the body so that we don't try + // to evaluate these invalid coercions in CTFE. We only need to do this + // for local impls, since upstream impls should be valid. + if impl_source.impl_def_id.is_local() + && let Err(guar) = + self.tcx.ensure_ok().coerce_unsized_info(impl_source.impl_def_id) + { + self.fcx.set_tainted_by_errors(guar); + } + } Ok(Some(impl_source)) => queue.extend(impl_source.nested_obligations()), } } @@ -1706,14 +1718,17 @@ impl<'tcx, 'exprs, E: AsCoercionSite> CoerceMany<'tcx, 'exprs, E> { ); } } - ObligationCauseCode::IfExpression(box IfExpressionCause { - then_id, - else_id, - then_ty, - else_ty, + ObligationCauseCode::IfExpression { + expr_id, tail_defines_return_position_impl_trait: Some(rpit_def_id), - .. - }) => { + } => { + let hir::Node::Expr(hir::Expr { + kind: hir::ExprKind::If(_, then_expr, Some(else_expr)), + .. + }) = fcx.tcx.hir_node(expr_id) + else { + unreachable!(); + }; err = fcx.err_ctxt().report_mismatched_types( cause, fcx.param_env, @@ -1721,24 +1736,12 @@ impl<'tcx, 'exprs, E: AsCoercionSite> CoerceMany<'tcx, 'exprs, E> { found, coercion_error, ); - let then_span = fcx.find_block_span_from_hir_id(then_id); - let else_span = fcx.find_block_span_from_hir_id(else_id); - // don't suggest wrapping either blocks in `if .. {} else {}` - let is_empty_arm = |id| { - let hir::Node::Block(blk) = fcx.tcx.hir_node(id) else { - return false; - }; - if blk.expr.is_some() || !blk.stmts.is_empty() { - return false; - } - let Some((_, hir::Node::Expr(expr))) = - fcx.tcx.hir_parent_iter(id).nth(1) - else { - return false; - }; - matches!(expr.kind, hir::ExprKind::If(..)) - }; - if !is_empty_arm(then_id) && !is_empty_arm(else_id) { + let then_span = fcx.find_block_span_from_hir_id(then_expr.hir_id); + let else_span = fcx.find_block_span_from_hir_id(else_expr.hir_id); + // Don't suggest wrapping whole block in `Box::new`. + if then_span != then_expr.span && else_span != else_expr.span { + let then_ty = fcx.typeck_results.borrow().expr_ty(then_expr); + let else_ty = fcx.typeck_results.borrow().expr_ty(else_expr); self.suggest_boxing_tail_for_return_position_impl_trait( fcx, &mut err, diff --git a/compiler/rustc_hir_typeck/src/demand.rs b/compiler/rustc_hir_typeck/src/demand.rs index 5b55fbe91500..e5684f8cbe66 100644 --- a/compiler/rustc_hir_typeck/src/demand.rs +++ b/compiler/rustc_hir_typeck/src/demand.rs @@ -1110,27 +1110,26 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { } } - // Returns whether the given expression is a destruct assignment desugaring. - // For example, `(a, b) = (1, &2);` - // Here we try to find the pattern binding of the expression, - // `default_binding_modes` is false only for destruct assignment desugaring. + /// Returns whether the given expression is a destruct assignment desugaring. + /// For example, `(a, b) = (1, &2);` + /// Here we try to find the pattern binding of the expression, + /// `default_binding_modes` is false only for destruct assignment desugaring. pub(crate) fn is_destruct_assignment_desugaring(&self, expr: &hir::Expr<'_>) -> bool { if let hir::ExprKind::Path(hir::QPath::Resolved( _, hir::Path { res: hir::def::Res::Local(bind_hir_id), .. }, )) = expr.kind - { - let bind = self.tcx.hir_node(*bind_hir_id); - let parent = self.tcx.parent_hir_node(*bind_hir_id); - if let hir::Node::Pat(hir::Pat { + && let bind = self.tcx.hir_node(*bind_hir_id) + && let parent = self.tcx.parent_hir_node(*bind_hir_id) + && let hir::Node::Pat(hir::Pat { kind: hir::PatKind::Binding(_, _hir_id, _, _), .. }) = bind - && let hir::Node::Pat(hir::Pat { default_binding_modes: false, .. }) = parent - { - return true; - } + && let hir::Node::Pat(hir::Pat { default_binding_modes: false, .. }) = parent + { + true + } else { + false } - false } fn explain_self_literal( diff --git a/compiler/rustc_hir_typeck/src/errors.rs b/compiler/rustc_hir_typeck/src/errors.rs index abb8cdc1cdf3..3606c778fc40 100644 --- a/compiler/rustc_hir_typeck/src/errors.rs +++ b/compiler/rustc_hir_typeck/src/errors.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; +use rustc_abi::ExternAbi; use rustc_ast::Label; use rustc_errors::codes::*; use rustc_errors::{ @@ -30,8 +31,6 @@ pub(crate) struct BaseExpressionDoubleDot { )] pub default_field_values_suggestion: Option, #[subdiagnostic] - pub default_field_values_help: Option, - #[subdiagnostic] pub add_expr: Option, #[subdiagnostic] pub remove_dots: Option, @@ -61,10 +60,6 @@ pub(crate) struct BaseExpressionDoubleDotAddExpr { pub span: Span, } -#[derive(Subdiagnostic)] -#[help(hir_typeck_base_expression_double_dot_enable_default_field_values)] -pub(crate) struct BaseExpressionDoubleDotEnableDefaultFieldValues; - #[derive(Diagnostic)] #[diag(hir_typeck_field_multiply_specified_in_initializer, code = E0062)] pub(crate) struct FieldMultiplySpecifiedInInitializer { @@ -1165,8 +1160,17 @@ pub(crate) struct NakedFunctionsMustNakedAsm { } #[derive(Diagnostic)] -#[diag(hir_typeck_abi_custom_call)] -pub(crate) struct AbiCustomCall { +#[diag(hir_typeck_abi_cannot_be_called)] +pub(crate) struct AbiCannotBeCalled { + #[primary_span] + #[note] + pub span: Span, + pub abi: ExternAbi, +} + +#[derive(Diagnostic)] +#[diag(hir_typeck_const_continue_bad_label)] +pub(crate) struct ConstContinueBadLabel { #[primary_span] pub span: Span, } diff --git a/compiler/rustc_hir_typeck/src/expr.rs b/compiler/rustc_hir_typeck/src/expr.rs index 55c39d960e7c..067ee0f0eb0b 100644 --- a/compiler/rustc_hir_typeck/src/expr.rs +++ b/compiler/rustc_hir_typeck/src/expr.rs @@ -5,8 +5,9 @@ //! //! See [`rustc_hir_analysis::check`] for more context on type checking in general. -use rustc_abi::{ExternAbi, FIRST_VARIANT, FieldIdx}; +use rustc_abi::{FIRST_VARIANT, FieldIdx}; use rustc_ast::util::parser::ExprPrecedence; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_data_structures::unord::UnordMap; @@ -18,11 +19,10 @@ use rustc_errors::{ use rustc_hir::def::{CtorKind, DefKind, Res}; use rustc_hir::def_id::DefId; use rustc_hir::lang_items::LangItem; -use rustc_hir::{Attribute, ExprKind, HirId, QPath}; +use rustc_hir::{ExprKind, HirId, QPath}; use rustc_hir_analysis::NoVariantNamed; use rustc_hir_analysis::hir_ty_lowering::{FeedConstTy, HirTyLowerer as _}; -use rustc_infer::infer; -use rustc_infer::infer::{DefineOpaqueTypes, InferOk}; +use rustc_infer::infer::{self, DefineOpaqueTypes, InferOk, RegionVariableOrigin}; use rustc_infer::traits::query::NoSolution; use rustc_middle::ty::adjustment::{Adjust, Adjustment, AllowTwoPhase}; use rustc_middle::ty::error::{ExpectedFound, TypeError}; @@ -43,10 +43,9 @@ use crate::Expectation::{self, ExpectCastableToType, ExpectHasType, NoExpectatio use crate::coercion::{CoerceMany, DynamicCoerceMany}; use crate::errors::{ AddressOfTemporaryTaken, BaseExpressionDoubleDot, BaseExpressionDoubleDotAddExpr, - BaseExpressionDoubleDotEnableDefaultFieldValues, BaseExpressionDoubleDotRemove, - CantDereference, FieldMultiplySpecifiedInInitializer, FunctionalRecordUpdateOnNonStruct, - HelpUseLatestEdition, NakedAsmOutsideNakedFn, NoFieldOnType, NoFieldOnVariant, - ReturnLikeStatementKind, ReturnStmtOutsideOfFnBody, StructExprNonExhaustive, + BaseExpressionDoubleDotRemove, CantDereference, FieldMultiplySpecifiedInInitializer, + FunctionalRecordUpdateOnNonStruct, HelpUseLatestEdition, NakedAsmOutsideNakedFn, NoFieldOnType, + NoFieldOnVariant, ReturnLikeStatementKind, ReturnStmtOutsideOfFnBody, StructExprNonExhaustive, TypeMismatchFruTypo, YieldExprOutsideOfCoroutine, }; use crate::{ @@ -56,7 +55,7 @@ use crate::{ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { pub(crate) fn precedence(&self, expr: &hir::Expr<'_>) -> ExprPrecedence { - let for_each_attr = |id: HirId, callback: &mut dyn FnMut(&Attribute)| { + let has_attr = |id: HirId| -> bool { for attr in self.tcx.hir_attrs(id) { // For the purpose of rendering suggestions, disregard attributes // that originate from desugaring of any kind. For example, `x?` @@ -72,11 +71,12 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { // let y: u32 = (x?).try_into().unwrap(); // + +++++++++++++++++++++ if attr.span().desugaring_kind().is_none() { - callback(attr); + return true; } } + false }; - expr.precedence(&for_each_attr) + expr.precedence(&has_attr) } /// Check an expr with an expectation type, and also demand that the expr's @@ -582,7 +582,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { ascribed_ty } ExprKind::If(cond, then_expr, opt_else_expr) => { - self.check_expr_if(cond, then_expr, opt_else_expr, expr.span, expected) + self.check_expr_if(expr.hir_id, cond, then_expr, opt_else_expr, expr.span, expected) } ExprKind::DropTemps(e) => self.check_expr_with_expectation(e, expected), ExprKind::Array(args) => self.check_expr_array(args, expected, expr), @@ -689,7 +689,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { self.check_named_place_expr(oprnd); Ty::new_ptr(self.tcx, ty, mutbl) } - hir::BorrowKind::Ref => { + hir::BorrowKind::Ref | hir::BorrowKind::Pin => { // Note: at this point, we cannot say what the best lifetime // is to use for resulting pointer. We want to use the // shortest lifetime possible so as to avoid spurious borrowck @@ -704,8 +704,12 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { // this time with enough precision to check that the value // whose address was taken can actually be made to live as long // as it needs to live. - let region = self.next_region_var(infer::BorrowRegion(expr.span)); - Ty::new_ref(self.tcx, region, ty, mutbl) + let region = self.next_region_var(RegionVariableOrigin::BorrowRegion(expr.span)); + match kind { + hir::BorrowKind::Ref => Ty::new_ref(self.tcx, region, ty, mutbl), + hir::BorrowKind::Pin => Ty::new_pinned_ref(self.tcx, region, ty, mutbl), + _ => unreachable!(), + } } } } @@ -1338,6 +1342,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { // or 'if-else' expression. fn check_expr_if( &self, + expr_id: HirId, cond_expr: &'tcx hir::Expr<'tcx>, then_expr: &'tcx hir::Expr<'tcx>, opt_else_expr: Option<&'tcx hir::Expr<'tcx>>, @@ -1377,15 +1382,8 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { let tail_defines_return_position_impl_trait = self.return_position_impl_trait_from_match_expectation(orig_expected); - let if_cause = self.if_cause( - sp, - cond_expr.span, - then_expr, - else_expr, - then_ty, - else_ty, - tail_defines_return_position_impl_trait, - ); + let if_cause = + self.if_cause(expr_id, else_expr, tail_defines_return_position_impl_trait); coerce.coerce(self, &if_cause, else_expr, else_ty); @@ -1651,13 +1649,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { Some(method.def_id), ); - // Functions of type `extern "custom" fn(/* ... */)` cannot be called using - // `ExprKind::MethodCall`. These functions have a calling convention that is - // unknown to rust, hence it cannot generate code for the call. The only way - // to execute such a function is via inline assembly. - if let ExternAbi::Custom = method.sig.abi { - self.tcx.dcx().emit_err(crate::errors::AbiCustomCall { span: expr.span }); - } + self.check_call_abi(method.sig.abi, expr.span); method.sig.output() } @@ -2158,7 +2150,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { } } if !self.tcx.features().default_field_values() { - let sugg = self.tcx.crate_level_attribute_injection_span(expr.hir_id); + let sugg = self.tcx.crate_level_attribute_injection_span(); self.dcx().emit_err(BaseExpressionDoubleDot { span: span.shrink_to_hi(), // We only mention enabling the feature if this is a nightly rustc *and* the @@ -2166,18 +2158,8 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { default_field_values_suggestion: if self.tcx.sess.is_nightly_build() && missing_mandatory_fields.is_empty() && !missing_optional_fields.is_empty() - && sugg.is_some() { - sugg - } else { - None - }, - default_field_values_help: if self.tcx.sess.is_nightly_build() - && missing_mandatory_fields.is_empty() - && !missing_optional_fields.is_empty() - && sugg.is_none() - { - Some(BaseExpressionDoubleDotEnableDefaultFieldValues) + Some(sugg) } else { None }, @@ -3795,7 +3777,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { fn check_expr_asm(&self, asm: &'tcx hir::InlineAsm<'tcx>, span: Span) -> Ty<'tcx> { if let rustc_ast::AsmMacro::NakedAsm = asm.asm_macro { - if !self.tcx.has_attr(self.body_id, sym::naked) { + if !find_attr!(self.tcx.get_all_attrs(self.body_id), AttributeKind::Naked(..)) { self.tcx.dcx().emit_err(NakedAsmOutsideNakedFn { span }); } } diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/_impl.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/_impl.rs index 2df19cb21d58..58751f232d03 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/_impl.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/_impl.rs @@ -281,7 +281,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { ); } Adjust::Deref(None) => { - // FIXME(const_trait_impl): We *could* enforce `&T: ~const Deref` here. + // FIXME(const_trait_impl): We *could* enforce `&T: [const] Deref` here. } Adjust::Pointer(_pointer_coercion) => { // FIXME(const_trait_impl): We should probably enforce these. diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index 95c7f251c884..c7b9cb470913 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -11,7 +11,7 @@ use rustc_hir::{ExprKind, HirId, LangItem, Node, QPath}; use rustc_hir_analysis::check::potentially_plural_count; use rustc_hir_analysis::hir_ty_lowering::{HirTyLowerer, PermitVariants}; use rustc_index::IndexVec; -use rustc_infer::infer::{DefineOpaqueTypes, InferOk, TypeTrace}; +use rustc_infer::infer::{BoundRegionConversionTime, DefineOpaqueTypes, InferOk, TypeTrace}; use rustc_middle::ty::adjustment::AllowTwoPhase; use rustc_middle::ty::error::TypeError; use rustc_middle::ty::{self, IsSuggestable, Ty, TyCtxt, TypeVisitableExt}; @@ -30,7 +30,6 @@ use crate::TupleArgumentsFlag::*; use crate::coercion::CoerceMany; use crate::errors::SuggestPtrNullMut; use crate::fn_ctxt::arg_matrix::{ArgMatrix, Compatibility, Error, ExpectedIdx, ProvidedIdx}; -use crate::fn_ctxt::infer::FnCall; use crate::gather_locals::Declaration; use crate::inline_asm::InlineAsmCtxt; use crate::method::probe::IsSuggestion; @@ -657,7 +656,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { let args = self.infcx.fresh_args_for_item(call_name.span, assoc.def_id); let fn_sig = tcx.fn_sig(assoc.def_id).instantiate(tcx, args); - self.instantiate_binder_with_fresh_vars(call_name.span, FnCall, fn_sig); + self.instantiate_binder_with_fresh_vars( + call_name.span, + BoundRegionConversionTime::FnCall, + fn_sig, + ); } None }; diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/mod.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/mod.rs index 8c18642e54a1..0c6226ce71e7 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/mod.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/mod.rs @@ -15,7 +15,7 @@ use rustc_hir::{self as hir, HirId, ItemLocalMap}; use rustc_hir_analysis::hir_ty_lowering::{ HirTyLowerer, InherentAssocCandidate, RegionInferReason, }; -use rustc_infer::infer; +use rustc_infer::infer::{self, RegionVariableOrigin}; use rustc_infer::traits::{DynCompatibilityViolation, Obligation}; use rustc_middle::ty::{self, Const, Ty, TyCtxt, TypeVisitableExt}; use rustc_session::Session; @@ -244,8 +244,10 @@ impl<'tcx> HirTyLowerer<'tcx> for FnCtxt<'_, 'tcx> { fn re_infer(&self, span: Span, reason: RegionInferReason<'_>) -> ty::Region<'tcx> { let v = match reason { - RegionInferReason::Param(def) => infer::RegionParameterDefinition(span, def.name), - _ => infer::MiscVariable(span), + RegionInferReason::Param(def) => { + RegionVariableOrigin::RegionParameterDefinition(span, def.name) + } + _ => RegionVariableOrigin::Misc(span), }; self.next_region_var(v) } diff --git a/compiler/rustc_hir_typeck/src/lib.rs b/compiler/rustc_hir_typeck/src/lib.rs index 043a687914b7..1cc618e2aeec 100644 --- a/compiler/rustc_hir_typeck/src/lib.rs +++ b/compiler/rustc_hir_typeck/src/lib.rs @@ -42,6 +42,7 @@ mod writeback; pub use coercion::can_coerce; use fn_ctxt::FnCtxt; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_data_structures::unord::UnordSet; use rustc_errors::codes::*; use rustc_errors::{Applicability, ErrorGuaranteed, pluralize, struct_span_code_err}; @@ -55,8 +56,8 @@ use rustc_middle::query::Providers; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::{bug, span_bug}; use rustc_session::config; +use rustc_span::Span; use rustc_span::def_id::LocalDefId; -use rustc_span::{Span, sym}; use tracing::{debug, instrument}; use typeck_root_ctxt::TypeckRootCtxt; @@ -173,7 +174,7 @@ fn typeck_with_inspect<'tcx>( .map(|(idx, ty)| fcx.normalize(arg_span(idx), ty)), ); - if tcx.has_attr(def_id, sym::naked) { + if find_attr!(tcx.get_all_attrs(def_id), AttributeKind::Naked(..)) { naked_functions::typeck_naked_fn(tcx, def_id, body); } diff --git a/compiler/rustc_hir_typeck/src/loops.rs b/compiler/rustc_hir_typeck/src/loops.rs index b06e0704b6ff..80eab578f134 100644 --- a/compiler/rustc_hir_typeck/src/loops.rs +++ b/compiler/rustc_hir_typeck/src/loops.rs @@ -2,6 +2,8 @@ use std::collections::BTreeMap; use std::fmt; use Context::*; +use rustc_ast::Label; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_hir as hir; use rustc_hir::def::DefKind; use rustc_hir::def_id::LocalDefId; @@ -14,8 +16,9 @@ use rustc_span::hygiene::DesugaringKind; use rustc_span::{BytePos, Span}; use crate::errors::{ - BreakInsideClosure, BreakInsideCoroutine, BreakNonLoop, ContinueLabeledBlock, OutsideLoop, - OutsideLoopSuggestion, UnlabeledCfInWhileCondition, UnlabeledInLabeledBlock, + BreakInsideClosure, BreakInsideCoroutine, BreakNonLoop, ConstContinueBadLabel, + ContinueLabeledBlock, OutsideLoop, OutsideLoopSuggestion, UnlabeledCfInWhileCondition, + UnlabeledInLabeledBlock, }; /// The context in which a block is encountered. @@ -37,6 +40,11 @@ enum Context { AnonConst, /// E.g. `const { ... }`. ConstBlock, + /// E.g. `#[loop_match] loop { state = 'label: { /* ... */ } }`. + LoopMatch { + /// The label of the labeled block (not of the loop itself). + labeled_block: Label, + }, } #[derive(Clone)] @@ -141,7 +149,12 @@ impl<'hir> Visitor<'hir> for CheckLoopVisitor<'hir> { } } hir::ExprKind::Loop(ref b, _, source, _) => { - self.with_context(Loop(source), |v| v.visit_block(b)); + let cx = match self.is_loop_match(e, b) { + Some(labeled_block) => LoopMatch { labeled_block }, + None => Loop(source), + }; + + self.with_context(cx, |v| v.visit_block(b)); } hir::ExprKind::Closure(&hir::Closure { ref fn_decl, body, fn_decl_span, kind, .. @@ -197,6 +210,23 @@ impl<'hir> Visitor<'hir> for CheckLoopVisitor<'hir> { Err(hir::LoopIdError::UnresolvedLabel) => None, }; + // A `#[const_continue]` must break to a block in a `#[loop_match]`. + if find_attr!(self.tcx.hir_attrs(e.hir_id), AttributeKind::ConstContinue(_)) { + if let Some(break_label) = break_label.label { + let is_target_label = |cx: &Context| match cx { + Context::LoopMatch { labeled_block } => { + break_label.ident.name == labeled_block.ident.name + } + _ => false, + }; + + if !self.cx_stack.iter().rev().any(is_target_label) { + let span = break_label.ident.span; + self.tcx.dcx().emit_fatal(ConstContinueBadLabel { span }); + } + } + } + if let Some(Node::Block(_)) = loop_id.map(|id| self.tcx.hir_node(id)) { return; } @@ -299,7 +329,7 @@ impl<'hir> CheckLoopVisitor<'hir> { cx_pos: usize, ) { match self.cx_stack[cx_pos] { - LabeledBlock | Loop(_) => {} + LabeledBlock | Loop(_) | LoopMatch { .. } => {} Closure(closure_span) => { self.tcx.dcx().emit_err(BreakInsideClosure { span, @@ -380,4 +410,36 @@ impl<'hir> CheckLoopVisitor<'hir> { }); } } + + /// Is this a loop annotated with `#[loop_match]` that looks syntactically sound? + fn is_loop_match( + &self, + e: &'hir hir::Expr<'hir>, + body: &'hir hir::Block<'hir>, + ) -> Option(arg: A) +where + A: super::sealed::Dmb, +{ + arg.__dmb() +} + +/// Generates a DSB (data synchronization barrier) instruction or equivalent CP15 instruction. +/// +/// DSB ensures the completion of memory accesses. A DSB behaves as the equivalent DMB and has +/// additional properties. After a DSB instruction completes, all memory accesses of the specified +/// type issued before the DSB are guaranteed to have completed. +/// +/// The __dsb() intrinsic also acts as a compiler memory barrier of the appropriate type. +#[inline(always)] +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub unsafe fn __dsb(arg: A) +where + A: super::sealed::Dsb, +{ + arg.__dsb() +} + +/// Generates an ISB (instruction synchronization barrier) instruction or equivalent CP15 +/// instruction. +/// +/// This instruction flushes the processor pipeline fetch buffers, so that following instructions +/// are fetched from cache or memory. +/// +/// An ISB is needed after some system maintenance operations. An ISB is also needed before +/// transferring control to code that has been loaded or modified in memory, for example by an +/// overlay mechanism or just-in-time code generator. (Note that if instruction and data caches are +/// separate, privileged cache maintenance operations would be needed in order to unify the caches.) +/// +/// The only supported argument for the __isb() intrinsic is 15, corresponding to the SY (full +/// system) scope of the ISB instruction. +#[inline(always)] +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub unsafe fn __isb(arg: A) +where + A: super::sealed::Isb, +{ + arg.__isb() +} + +unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.dmb" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")] + fn dmb(_: i32); + + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.dsb" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")] + fn dsb(_: i32); + + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.isb" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")] + fn isb(_: i32); +} + +// we put these in a module to prevent weirdness with glob re-exports +mod arg { + // See Section 7.3 Memory barriers of ACLE + pub const SY: i32 = 15; + pub const ST: i32 = 14; + pub const LD: i32 = 13; + pub const ISH: i32 = 11; + pub const ISHST: i32 = 10; + pub const ISHLD: i32 = 9; + pub const NSH: i32 = 7; + pub const NSHST: i32 = 6; + pub const NSHLD: i32 = 5; + pub const OSH: i32 = 3; + pub const OSHST: i32 = 2; + pub const OSHLD: i32 = 1; +} diff --git a/library/stdarch/crates/core_arch/src/arm_shared/barrier/not_mclass.rs b/library/stdarch/crates/core_arch/src/arm_shared/barrier/not_mclass.rs new file mode 100644 index 000000000000..3b941b2715ef --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/not_mclass.rs @@ -0,0 +1,50 @@ +//! Access types available on v7 and v8 but not on v7(E)-M or v8-M + +/// Full system is the required shareability domain, writes are the required +/// access type +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct ST; + +dmb_dsb!(ST); + +/// Inner Shareable is the required shareability domain, reads and writes are +/// the required access types +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct ISH; + +dmb_dsb!(ISH); + +/// Inner Shareable is the required shareability domain, writes are the required +/// access type +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct ISHST; + +dmb_dsb!(ISHST); + +/// Non-shareable is the required shareability domain, reads and writes are the +/// required access types +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct NSH; + +dmb_dsb!(NSH); + +/// Non-shareable is the required shareability domain, writes are the required +/// access type +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct NSHST; + +dmb_dsb!(NSHST); + +/// Outer Shareable is the required shareability domain, reads and writes are +/// the required access types +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct OSH; + +dmb_dsb!(OSH); + +/// Outer Shareable is the required shareability domain, writes are the required +/// access type +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct OSHST; + +dmb_dsb!(OSHST); diff --git a/library/stdarch/crates/core_arch/src/arm_shared/barrier/v8.rs b/library/stdarch/crates/core_arch/src/arm_shared/barrier/v8.rs new file mode 100644 index 000000000000..5bf757f9f779 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/v8.rs @@ -0,0 +1,27 @@ +/// Full system is the required shareability domain, reads are the required +/// access type +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct LD; + +dmb_dsb!(LD); + +/// Inner Shareable is the required shareability domain, reads are the required +/// access type +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct ISHLD; + +dmb_dsb!(ISHLD); + +/// Non-shareable is the required shareability domain, reads are the required +/// access type +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct NSHLD; + +dmb_dsb!(NSHLD); + +/// Outer Shareable is the required shareability domain, reads are the required +/// access type +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub struct OSHLD; + +dmb_dsb!(OSHLD); diff --git a/library/stdarch/crates/core_arch/src/arm_shared/hints.rs b/library/stdarch/crates/core_arch/src/arm_shared/hints.rs new file mode 100644 index 000000000000..54fd78270abd --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/hints.rs @@ -0,0 +1,125 @@ +// # References +// +// - Section 7.4 "Hints" of ACLE +// - Section 7.7 "NOP" of ACLE + +/// Generates a WFI (wait for interrupt) hint instruction, or nothing. +/// +/// The WFI instruction allows (but does not require) the processor to enter a +/// low-power state until one of a number of asynchronous events occurs. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M +// LLVM says "instruction requires: armv6k" +#[cfg(any( + target_feature = "v6", + target_arch = "aarch64", + target_arch = "arm64ec", + doc +))] +#[inline(always)] +#[unstable(feature = "stdarch_arm_hints", issue = "117218")] +pub unsafe fn __wfi() { + hint(HINT_WFI); +} + +/// Generates a WFE (wait for event) hint instruction, or nothing. +/// +/// The WFE instruction allows (but does not require) the processor to enter a +/// low-power state until some event occurs such as a SEV being issued by +/// another processor. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M +// LLVM says "instruction requires: armv6k" +#[cfg(any( + target_feature = "v6", + target_arch = "aarch64", + target_arch = "arm64ec", + doc +))] +#[inline(always)] +#[unstable(feature = "stdarch_arm_hints", issue = "117218")] +pub unsafe fn __wfe() { + hint(HINT_WFE); +} + +/// Generates a SEV (send a global event) hint instruction. +/// +/// This causes an event to be signaled to all processors in a multiprocessor +/// system. It is a NOP on a uniprocessor system. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M +// LLVM says "instruction requires: armv6k" +#[cfg(any( + target_feature = "v6", + target_arch = "aarch64", + target_arch = "arm64ec", + doc +))] +#[inline(always)] +#[unstable(feature = "stdarch_arm_hints", issue = "117218")] +pub unsafe fn __sev() { + hint(HINT_SEV); +} + +/// Generates a send a local event hint instruction. +/// +/// This causes an event to be signaled to only the processor executing this +/// instruction. In a multiprocessor system, it is not required to affect the +/// other processors. +// LLVM says "instruction requires: armv8" +#[cfg(any( + target_feature = "v8", // 32-bit ARMv8 + target_arch = "aarch64", // AArch64 + target_arch = "arm64ec", // Arm64EC + doc, +))] +#[inline(always)] +#[unstable(feature = "stdarch_arm_hints", issue = "117218")] +pub unsafe fn __sevl() { + hint(HINT_SEVL); +} + +/// Generates a YIELD hint instruction. +/// +/// This enables multithreading software to indicate to the hardware that it is +/// performing a task, for example a spin-lock, that could be swapped out to +/// improve overall system performance. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M +// LLVM says "instruction requires: armv6k" +#[cfg(any( + target_feature = "v6", + target_arch = "aarch64", + target_arch = "arm64ec", + doc +))] +#[inline(always)] +#[unstable(feature = "stdarch_arm_hints", issue = "117218")] +pub unsafe fn __yield() { + hint(HINT_YIELD); +} + +/// Generates an unspecified no-op instruction. +/// +/// Note that not all architectures provide a distinguished NOP instruction. On +/// those that do, it is unspecified whether this intrinsic generates it or +/// another instruction. It is not guaranteed that inserting this instruction +/// will increase execution time. +#[inline(always)] +#[unstable(feature = "stdarch_arm_hints", issue = "117218")] +pub unsafe fn __nop() { + crate::arch::asm!("nop", options(nomem, nostack, preserves_flags)); +} + +unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.hint" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")] + fn hint(_: i32); +} + +// from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td +const HINT_NOP: i32 = 0; +const HINT_YIELD: i32 = 1; +const HINT_WFE: i32 = 2; +const HINT_WFI: i32 = 3; +const HINT_SEV: i32 = 4; +const HINT_SEVL: i32 = 5; diff --git a/library/stdarch/crates/core_arch/src/arm_shared/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/mod.rs new file mode 100644 index 000000000000..527b53de99d9 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/mod.rs @@ -0,0 +1,117 @@ +//! ARM C Language Extensions (ACLE) +//! +//! # Developer notes +//! +//! Below is a list of built-in targets that are representative of the different ARM +//! architectures; the list includes the `target_feature`s they possess. +//! +//! - `armv4t-unknown-linux-gnueabi` - **ARMv4** - `+v4t` +//! - `armv5te-unknown-linux-gnueabi` - **ARMv5TE** - `+v4t +v5te` +//! - `arm-unknown-linux-gnueabi` - **ARMv6** - `+v4t +v5te +v6` +//! - `thumbv6m-none-eabi` - **ARMv6-M** - `+v4t +v5te +v6 +thumb-mode +mclass` +//! - `armv7-unknown-linux-gnueabihf` - **ARMv7-A** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +aclass` +//! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +rclass` +//! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass` +//! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass` +//! - `thumbv8m.main-none-eabi` - **ARMv8-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass` +//! - `armv8r-none-eabi` - **ARMv8-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +v8 +thumb2 +rclass` +//! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon` +//! +//! Section 10.1 of ACLE says: +//! +//! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes +//! its predecessor instruction set." +//! +//! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes +//! its predecessor instruction set." +//! +//! From that info and from looking at how LLVM features work (using custom targets) we can identify +//! features that are subsets of others: +//! +//! Legend: `a < b` reads as "`a` is a subset of `b`"; this means that if `b` is enabled then `a` is +//! enabled as well. +//! +//! - `v4t < v5te < v6 < v6k < v6t2 < v7 < v8` +//! - `v6 < v8m < v6t2` +//! - `v7 < v8m.main` +//! +//! *NOTE*: Section 5.4.7 of ACLE says: +//! +//! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the +//! intrinsics defined in Saturating intrinsics are available." +//! +//! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te +//! targets so we have to work around this difference. +//! +//! # References +//! +//! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest) + +#![cfg_attr( + all(target_arch = "aarch64", target_abi = "softfloat"), + // Just allow the warning: anyone soundly using the intrinsics has to enable + // the target feature, and that will generate a warning for them. + allow(aarch64_softfloat_neon) +)] +// Only for 'neon' submodule +#![allow(non_camel_case_types)] + +// 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported +// via CP15 instructions. See Section 10.1 of ACLE +mod barrier; +#[unstable(feature = "stdarch_arm_barrier", issue = "117219")] +pub use self::barrier::*; + +mod hints; +#[unstable(feature = "stdarch_arm_hints", issue = "117218")] +pub use self::hints::*; + +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm64ec", + target_feature = "v7", + doc +))] +pub(crate) mod neon; + +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm64ec", + target_feature = "v7", + doc +))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub use self::neon::*; + +#[cfg(test)] +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm64ec", + target_feature = "v7", + doc +))] +pub(crate) mod test_support; + +mod sealed { + #[unstable(feature = "stdarch_arm_barrier", issue = "117219")] + pub trait Dmb { + unsafe fn __dmb(&self); + } + + #[unstable(feature = "stdarch_arm_barrier", issue = "117219")] + pub trait Dsb { + unsafe fn __dsb(&self); + } + + #[unstable(feature = "stdarch_arm_barrier", issue = "117219")] + pub trait Isb { + unsafe fn __isb(&self); + } +} diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs new file mode 100644 index 000000000000..286f1868852a --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -0,0 +1,75440 @@ +// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen-arm/spec/` and run the following command to re-generate this file: +// +// ``` +// cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec +// ``` +#![allow(improper_ctypes)] + +#[cfg(test)] +use stdarch_test::assert_instr; + +use super::*; + +#[doc = "CRC32 single round checksum for bytes (8 bits)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32b)"] +#[inline] +#[target_feature(enable = "crc")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(crc32b))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_aarch32_crc32", issue = "125085") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_aarch64_crc32", since = "1.80.0") +)] +pub fn __crc32b(crc: u32, data: u8) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crc32b" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32b")] + fn ___crc32b(crc: u32, data: u32) -> u32; + } + unsafe { ___crc32b(crc, data as u32) } +} +#[doc = "CRC32-C single round checksum for bytes (8 bits)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cb)"] +#[inline] +#[target_feature(enable = "crc")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(crc32cb))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_aarch32_crc32", issue = "125085") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_aarch64_crc32", since = "1.80.0") +)] +pub fn __crc32cb(crc: u32, data: u8) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crc32cb" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cb")] + fn ___crc32cb(crc: u32, data: u32) -> u32; + } + unsafe { ___crc32cb(crc, data as u32) } +} +#[doc = "CRC32-C single round checksum for quad words (64 bits)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd)"] +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(crc32cw))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_aarch32_crc32", issue = "125085") +)] +pub fn __crc32cd(crc: u32, data: u64) -> u32 { + let b: u32 = (data & 0xFFFFFFFF) as u32; + let c: u32 = (data >> 32) as u32; + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] + fn ___crc32cw(crc: u32, data: u32) -> u32; + } + unsafe { ___crc32cw(___crc32cw(crc, b), c) } +} +#[doc = "CRC32-C single round checksum for bytes (16 bits)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32ch)"] +#[inline] +#[target_feature(enable = "crc")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(crc32ch))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_aarch32_crc32", issue = "125085") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_aarch64_crc32", since = "1.80.0") +)] +pub fn __crc32ch(crc: u32, data: u16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crc32ch" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32ch")] + fn ___crc32ch(crc: u32, data: u32) -> u32; + } + unsafe { ___crc32ch(crc, data as u32) } +} +#[doc = "CRC32-C single round checksum for bytes (32 bits)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cw)"] +#[inline] +#[target_feature(enable = "crc")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(crc32cw))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_aarch32_crc32", issue = "125085") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_aarch64_crc32", since = "1.80.0") +)] +pub fn __crc32cw(crc: u32, data: u32) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crc32cw" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] + fn ___crc32cw(crc: u32, data: u32) -> u32; + } + unsafe { ___crc32cw(crc, data) } +} +#[doc = "CRC32 single round checksum for quad words (64 bits)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d)"] +#[inline] +#[target_feature(enable = "crc")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(crc32w))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_aarch32_crc32", issue = "125085") +)] +pub fn __crc32d(crc: u32, data: u64) -> u32 { + let b: u32 = (data & 0xFFFFFFFF) as u32; + let c: u32 = (data >> 32) as u32; + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] + fn ___crc32w(crc: u32, data: u32) -> u32; + } + unsafe { ___crc32w(___crc32w(crc, b), c) } +} +#[doc = "CRC32 single round checksum for bytes (16 bits)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32h)"] +#[inline] +#[target_feature(enable = "crc")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(crc32h))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_aarch32_crc32", issue = "125085") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_aarch64_crc32", since = "1.80.0") +)] +pub fn __crc32h(crc: u32, data: u16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crc32h" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32h")] + fn ___crc32h(crc: u32, data: u32) -> u32; + } + unsafe { ___crc32h(crc, data as u32) } +} +#[doc = "CRC32 single round checksum for bytes (32 bits)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32w)"] +#[inline] +#[target_feature(enable = "crc")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(crc32w))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_aarch32_crc32", issue = "125085") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_aarch64_crc32", since = "1.80.0") +)] +pub fn __crc32w(crc: u32, data: u32) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crc32w" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] + fn ___crc32w(crc: u32, data: u32) -> u32; + } + unsafe { ___crc32w(crc, data) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v4i16.v8i8")] + fn _priv_vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t; + } + unsafe { _priv_vpadal_s8(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v8i16.v16i8")] + fn _priv_vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t; + } + unsafe { _priv_vpadalq_s8(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v2i32.v4i16")] + fn _priv_vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t; + } + unsafe { _priv_vpadal_s16(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v4i32.v8i16")] + fn _priv_vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t; + } + unsafe { _priv_vpadalq_s16(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v1i64.v2i32")] + fn _priv_vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t; + } + unsafe { _priv_vpadal_s32(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v2i64.v4i32")] + fn _priv_vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t; + } + unsafe { _priv_vpadalq_s32(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v4i16.v8i8")] + fn _priv_vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t; + } + unsafe { _priv_vpadal_u8(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v8i16.v16i8")] + fn _priv_vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; + } + unsafe { _priv_vpadalq_u8(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v2i32.v4i16")] + fn _priv_vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t; + } + unsafe { _priv_vpadal_u16(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v4i32.v8i16")] + fn _priv_vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; + } + unsafe { _priv_vpadalq_u16(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v1i64.v2i32")] + fn _priv_vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t; + } + unsafe { _priv_vpadal_u32(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn priv_vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v2i64.v4i32")] + fn _priv_vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; + } + unsafe { _priv_vpadalq_u32(a, b) } +} +#[doc = "Absolute difference and accumulate (64-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaba_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaba_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + unsafe { simd_add(a, vabd_s16(b, c)) } +} +#[doc = "Absolute difference and accumulate (64-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaba_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaba_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + unsafe { simd_add(a, vabd_s32(b, c)) } +} +#[doc = "Absolute difference and accumulate (64-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaba_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaba_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe { simd_add(a, vabd_s8(b, c)) } +} +#[doc = "Absolute difference and accumulate (64-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaba_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaba_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + unsafe { simd_add(a, vabd_u16(b, c)) } +} +#[doc = "Absolute difference and accumulate (64-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaba_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaba_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + unsafe { simd_add(a, vabd_u32(b, c)) } +} +#[doc = "Absolute difference and accumulate (64-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaba_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaba_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { simd_add(a, vabd_u8(b, c)) } +} +#[doc = "Signed Absolute difference and Accumulate Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + let d: int8x8_t = vabd_s8(b, c); + unsafe { + let e: uint8x8_t = simd_cast(d); + simd_add(a, simd_cast(e)) + } +} +#[doc = "Signed Absolute difference and Accumulate Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + let d: int16x4_t = vabd_s16(b, c); + unsafe { + let e: uint16x4_t = simd_cast(d); + simd_add(a, simd_cast(e)) + } +} +#[doc = "Signed Absolute difference and Accumulate Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + let d: int32x2_t = vabd_s32(b, c); + unsafe { + let e: uint32x2_t = simd_cast(d); + simd_add(a, simd_cast(e)) + } +} +#[doc = "Unsigned Absolute difference and Accumulate Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + let d: uint8x8_t = vabd_u8(b, c); + unsafe { simd_add(a, simd_cast(d)) } +} +#[doc = "Unsigned Absolute difference and Accumulate Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + let d: uint16x4_t = vabd_u16(b, c); + unsafe { simd_add(a, simd_cast(d)) } +} +#[doc = "Unsigned Absolute difference and Accumulate Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + let d: uint32x2_t = vabd_u32(b, c); + unsafe { simd_add(a, simd_cast(d)) } +} +#[doc = "Absolute difference and accumulate (128-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabaq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + unsafe { simd_add(a, vabdq_s16(b, c)) } +} +#[doc = "Absolute difference and accumulate (128-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabaq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + unsafe { simd_add(a, vabdq_s32(b, c)) } +} +#[doc = "Absolute difference and accumulate (128-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabaq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + unsafe { simd_add(a, vabdq_s8(b, c)) } +} +#[doc = "Absolute difference and accumulate (128-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabaq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + unsafe { simd_add(a, vabdq_u16(b, c)) } +} +#[doc = "Absolute difference and accumulate (128-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabaq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe { simd_add(a, vabdq_u32(b, c)) } +} +#[doc = "Absolute difference and accumulate (128-bit)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabaq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaba) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + unsafe { simd_add(a, vabdq_u8(b, c)) } +} +#[doc = "Absolute difference between the arguments of Floating"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fabd.v4f16" + )] + fn _vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vabd_f16(a, b) } +} +#[doc = "Absolute difference between the arguments of Floating"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vabdq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fabd.v8f16" + )] + fn _vabdq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vabdq_f16(a, b) } +} +#[doc = "Absolute difference between the arguments of Floating"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fabd.v2f32" + )] + fn _vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vabd_f32(a, b) } +} +#[doc = "Absolute difference between the arguments of Floating"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fabd.v4f32" + )] + fn _vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vabdq_f32(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sabd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i8")] + fn _vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vabd_s8(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sabd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v16i8")] + fn _vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vabdq_s8(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sabd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i16")] + fn _vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vabd_s16(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sabd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i16")] + fn _vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vabdq_s16(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sabd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2i32")] + fn _vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vabd_s32(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sabd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i32")] + fn _vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vabdq_s32(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uabd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i8")] + fn _vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vabd_u8(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uabd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v16i8")] + fn _vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vabdq_u8(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uabd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i16")] + fn _vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vabd_u16(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uabd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i16")] + fn _vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vabdq_u16(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uabd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v2i32")] + fn _vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vabd_u32(a, b) } +} +#[doc = "Absolute difference between the arguments"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uabd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i32")] + fn _vabdq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vabdq_u32(a, b) } +} +#[doc = "Signed Absolute difference Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabdl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + unsafe { + let c: uint8x8_t = simd_cast(vabd_s8(a, b)); + simd_cast(c) + } +} +#[doc = "Signed Absolute difference Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabdl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe { + let c: uint16x4_t = simd_cast(vabd_s16(a, b)); + simd_cast(c) + } +} +#[doc = "Signed Absolute difference Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sabdl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe { + let c: uint32x2_t = simd_cast(vabd_s32(a, b)); + simd_cast(c) + } +} +#[doc = "Unsigned Absolute difference Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabdl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { simd_cast(vabd_u8(a, b)) } +} +#[doc = "Unsigned Absolute difference Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabdl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { simd_cast(vabd_u16(a, b)) } +} +#[doc = "Unsigned Absolute difference Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uabdl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { simd_cast(vabd_u32(a, b)) } +} +#[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vabs_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_fabs(a) } +} +#[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vabsq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_fabs(a) } +} +#[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabs_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_fabs(a) } +} +#[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabsq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_fabs(a) } +} +#[doc = "Absolute value (wrapping)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(abs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabs_s8(a: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.abs.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")] + fn _vabs_s8(a: int8x8_t) -> int8x8_t; + } + unsafe { _vabs_s8(a) } +} +#[doc = "Absolute value (wrapping)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(abs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabsq_s8(a: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.abs.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v16i8")] + fn _vabsq_s8(a: int8x16_t) -> int8x16_t; + } + unsafe { _vabsq_s8(a) } +} +#[doc = "Absolute value (wrapping)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(abs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabs_s16(a: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.abs.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i16")] + fn _vabs_s16(a: int16x4_t) -> int16x4_t; + } + unsafe { _vabs_s16(a) } +} +#[doc = "Absolute value (wrapping)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(abs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabsq_s16(a: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.abs.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i16")] + fn _vabsq_s16(a: int16x8_t) -> int16x8_t; + } + unsafe { _vabsq_s16(a) } +} +#[doc = "Absolute value (wrapping)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(abs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabs_s32(a: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.abs.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v2i32")] + fn _vabs_s32(a: int32x2_t) -> int32x2_t; + } + unsafe { _vabs_s32(a) } +} +#[doc = "Absolute value (wrapping)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(abs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vabsq_s32(a: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.abs.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")] + fn _vabsq_s32(a: int32x4_t) -> int32x4_t; + } + unsafe { _vabsq_s32(a) } +} +#[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsh_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vabsh_f16(a: f16) -> f16 { + unsafe { simd_extract!(vabs_f16(vdup_n_f16(a)), 0) } +} +#[doc = "Floating-point Add (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vadd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_add(a, b) } +} +#[doc = "Floating-point Add (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vadd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_add(a, b) } +} +#[doc = "Vector add."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(add) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_add(a, b) } +} +#[doc = "Bitwise exclusive OR"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Bitwise exclusive OR"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Bitwise exclusive OR"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Bitwise exclusive OR"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Bitwise exclusive OR"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Bitwise exclusive OR"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddh_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vadd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vaddh_f16(a: f16, b: f16) -> f16 { + a + b +} +#[doc = "Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t { + unsafe { + let x = simd_cast(simd_shr(simd_add(a, b), int16x8_t::splat(8))); + simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + } +} +#[doc = "Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_high_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t { + unsafe { + let x = simd_cast(simd_shr(simd_add(a, b), int32x4_t::splat(16))); + simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) + } +} +#[doc = "Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_high_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t { + unsafe { + let x = simd_cast(simd_shr(simd_add(a, b), int64x2_t::splat(32))); + simd_shuffle!(r, x, [0, 1, 2, 3]) + } +} +#[doc = "Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t { + unsafe { + let x = simd_cast(simd_shr(simd_add(a, b), uint16x8_t::splat(8))); + simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + } +} +#[doc = "Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t { + unsafe { + let x = simd_cast(simd_shr(simd_add(a, b), uint32x4_t::splat(16))); + simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) + } +} +#[doc = "Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_high_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t { + unsafe { + let x = simd_cast(simd_shr(simd_add(a, b), uint64x2_t::splat(32))); + simd_shuffle!(r, x, [0, 1, 2, 3]) + } +} +#[doc = "Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + unsafe { simd_cast(simd_shr(simd_add(a, b), int16x8_t::splat(8))) } +} +#[doc = "Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + unsafe { simd_cast(simd_shr(simd_add(a, b), int32x4_t::splat(16))) } +} +#[doc = "Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + unsafe { simd_cast(simd_shr(simd_add(a, b), int64x2_t::splat(32))) } +} +#[doc = "Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + unsafe { simd_cast(simd_shr(simd_add(a, b), uint16x8_t::splat(8))) } +} +#[doc = "Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + unsafe { simd_cast(simd_shr(simd_add(a, b), uint32x4_t::splat(16))) } +} +#[doc = "Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddhn_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + unsafe { simd_cast(simd_shr(simd_add(a, b), uint64x2_t::splat(32))) } +} +#[doc = "Signed Add Long (vector, high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddl2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); + let a: int32x4_t = simd_cast(a); + let b: int32x4_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Signed Add Long (vector, high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_high_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddl2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [2, 3]); + let b: int32x2_t = simd_shuffle!(b, b, [2, 3]); + let a: int64x2_t = simd_cast(a); + let b: int64x2_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Signed Add Long (vector, high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_high_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddl2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let a: int16x8_t = simd_cast(a); + let b: int16x8_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Signed Add Long (vector, high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddl2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); + let a: uint32x4_t = simd_cast(a); + let b: uint32x4_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Signed Add Long (vector, high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddl2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]); + let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]); + let a: uint64x2_t = simd_cast(a); + let b: uint64x2_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Signed Add Long (vector, high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_high_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddl2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let a: uint16x8_t = simd_cast(a); + let b: uint16x8_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Long (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_cast(a); + let b: int32x4_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Long (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_cast(a); + let b: int64x2_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Long (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + unsafe { + let a: int16x8_t = simd_cast(a); + let b: int16x8_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Long (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_cast(a); + let b: uint32x4_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Long (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_cast(a); + let b: uint64x2_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Long (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_cast(a); + let b: uint16x8_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Bitwise exclusive OR"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p128)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddq_p128(a: p128, b: p128) -> p128 { + a ^ b +} +#[doc = "Add Wide (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddw2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + unsafe { + let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); + let b: int32x4_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_high_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddw2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + unsafe { + let b: int32x2_t = simd_shuffle!(b, b, [2, 3]); + let b: int64x2_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_high_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddw2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + unsafe { + let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int16x8_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddw2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + unsafe { + let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); + let b: uint32x4_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddw2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + unsafe { + let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]); + let b: uint64x2_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_high_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddw2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + unsafe { + let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint16x8_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { + unsafe { + let b: int32x4_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { + unsafe { + let b: int64x2_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { + unsafe { + let b: int16x8_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { + let b: uint32x4_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { + let b: uint64x2_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "Add Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddw_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vaddw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { + let b: uint16x8_t = simd_cast(b); + simd_add(a, b) + } +} +#[doc = "AES single round encryption."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesdq_u8)"] +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aesd))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aesd" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesd")] + fn _vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + } + unsafe { _vaesdq_u8(data, key) } +} +#[doc = "AES single round encryption."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaeseq_u8)"] +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aese))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aese" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aese")] + fn _vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + } + unsafe { _vaeseq_u8(data, key) } +} +#[doc = "AES inverse mix columns."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesimcq_u8)"] +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aesimc))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aesimc" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesimc")] + fn _vaesimcq_u8(data: uint8x16_t) -> uint8x16_t; + } + unsafe { _vaesimcq_u8(data) } +} +#[doc = "AES mix columns."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesmcq_u8)"] +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aesmc))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aesmc" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesmc")] + fn _vaesmcq_u8(data: uint8x16_t) -> uint8x16_t; + } + unsafe { _vaesmcq_u8(data) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vand_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vand_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vand_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vandq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vand_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vandq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vand_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vand_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vandq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vand_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vandq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vand_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise and"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(and) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_and(a, b) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbic_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbic_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + let c = int16x4_t::splat(-1); + unsafe { simd_and(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbic_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbic_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + let c = int32x2_t::splat(-1); + unsafe { simd_and(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbic_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbic_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + let c = int64x1_t::splat(-1); + unsafe { simd_and(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbic_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbic_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + let c = int8x8_t::splat(-1); + unsafe { simd_and(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbicq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbicq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + let c = int16x8_t::splat(-1); + unsafe { simd_and(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbicq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbicq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + let c = int32x4_t::splat(-1); + unsafe { simd_and(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbicq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbicq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + let c = int64x2_t::splat(-1); + unsafe { simd_and(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbicq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbicq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + let c = int8x16_t::splat(-1); + unsafe { simd_and(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbic_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbic_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + let c = int16x4_t::splat(-1); + unsafe { simd_and(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbic_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbic_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + let c = int32x2_t::splat(-1); + unsafe { simd_and(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbic_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbic_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + let c = int64x1_t::splat(-1); + unsafe { simd_and(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbic_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbic_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + let c = int8x8_t::splat(-1); + unsafe { simd_and(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbicq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbicq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + let c = int16x8_t::splat(-1); + unsafe { simd_and(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbicq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbicq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + let c = int32x4_t::splat(-1); + unsafe { simd_and(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbicq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbicq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + let c = int64x2_t::splat(-1); + unsafe { simd_and(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise bit clear."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbicq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bic) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbicq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + let c = int8x16_t::splat(-1); + unsafe { simd_and(simd_xor(b, transmute(c)), a) } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vbsl_f16(a: uint16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + let not = int16x4_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vbslq_f16(a: uint16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + let not = int16x8_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_f32(a: uint32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + let not = int32x2_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_p16(a: uint16x4_t, b: poly16x4_t, c: poly16x4_t) -> poly16x4_t { + let not = int16x4_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_p8(a: uint8x8_t, b: poly8x8_t, c: poly8x8_t) -> poly8x8_t { + let not = int8x8_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_s16(a: uint16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + let not = int16x4_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_s32(a: uint32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + let not = int32x2_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_s64(a: uint64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t { + let not = int64x1_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_s8(a: uint8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + let not = int8x8_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_f32(a: uint32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + let not = int32x4_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8_t { + let not = int16x8_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_p8(a: uint8x16_t, b: poly8x16_t, c: poly8x16_t) -> poly8x16_t { + let not = int8x16_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_s16(a: uint16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + let not = int16x8_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_s32(a: uint32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + let not = int32x4_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_s64(a: uint64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t { + let not = int64x2_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_s8(a: uint8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + let not = int8x16_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + let not = int16x4_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, b), + simd_and(simd_xor(a, transmute(not)), c), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + let not = int32x2_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, b), + simd_and(simd_xor(a, transmute(not)), c), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_t { + let not = int64x1_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, b), + simd_and(simd_xor(a, transmute(not)), c), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbsl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbsl_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + let not = int8x8_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, b), + simd_and(simd_xor(a, transmute(not)), c), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + let not = int16x8_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, b), + simd_and(simd_xor(a, transmute(not)), c), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + let not = int32x4_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, b), + simd_and(simd_xor(a, transmute(not)), c), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + let not = int64x2_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, b), + simd_and(simd_xor(a, transmute(not)), c), + )) + } +} +#[doc = "Bitwise Select."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbslq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vbslq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + let not = int8x16_t::splat(-1); + unsafe { + transmute(simd_or( + simd_and(a, b), + simd_and(simd_xor(a, transmute(not)), c), + )) + } +} +#[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcage_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i16.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.v4i16.v4f16" + )] + fn _vcage_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t; + } + unsafe { _vcage_f16(a, b) } +} +#[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcageq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v8i16.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.v8i16.v8f16" + )] + fn _vcageq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t; + } + unsafe { _vcageq_f16(a, b) } +} +#[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v2i32.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.v2i32.v2f32" + )] + fn _vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; + } + unsafe { _vcage_f32(a, b) } +} +#[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i32.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.v4i32.v4f32" + )] + fn _vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; + } + unsafe { _vcageq_f32(a, b) } +} +#[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcagt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i16.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.v4i16.v4f16" + )] + fn _vcagt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t; + } + unsafe { _vcagt_f16(a, b) } +} +#[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcagtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v8i16.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.v8i16.v8f16" + )] + fn _vcagtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t; + } + unsafe { _vcagtq_f16(a, b) } +} +#[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v2i32.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.v2i32.v2f32" + )] + fn _vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; + } + unsafe { _vcagt_f32(a, b) } +} +#[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i32.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.v4i32.v4f32" + )] + fn _vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; + } + unsafe { _vcagtq_f32(a, b) } +} +#[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcale_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + vcage_f16(b, a) +} +#[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcaleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + vcageq_f16(b, a) +} +#[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcale_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + vcage_f32(b, a) +} +#[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + vcageq_f32(b, a) +} +#[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcalt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + vcagt_f16(b, a) +} +#[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcaltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + vcagtq_f16(b, a) +} +#[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcalt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + vcagt_f32(b, a) +} +#[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + vcagtq_f32(b, a) +} +#[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmeq) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vceq_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmeq) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vceqq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceq_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceqq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceq_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceqq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceq_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceqq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceq_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceqq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceq_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceqq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceq_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceqq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceq_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceqq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceq_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Compare bitwise Equal (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmeq) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { + unsafe { simd_eq(a, b) } +} +#[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcge_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcgeq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcge_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare signed greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcge_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare signed greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgeq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare signed greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcge_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare signed greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgeq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare signed greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcge_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare signed greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgeq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare unsigned greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcge_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare unsigned greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgeq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare unsigned greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcge_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare unsigned greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgeq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare unsigned greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Compare unsigned greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_ge(a, b) } +} +#[doc = "Floating-point compare greater than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcgez_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + unsafe { simd_ge(a, transmute(b)) } +} +#[doc = "Floating-point compare greater than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcgezq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + unsafe { simd_ge(a, transmute(b)) } +} +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcgt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcgtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare signed greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare signed greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgtq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare signed greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare signed greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgtq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare signed greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare signed greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgtq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare unsigned greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare unsigned greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgtq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare unsigned greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare unsigned greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgtq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare unsigned greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Compare unsigned greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_gt(a, b) } +} +#[doc = "Floating-point compare greater than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcgtz_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + unsafe { simd_gt(a, transmute(b)) } +} +#[doc = "Floating-point compare greater than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcgtzq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + unsafe { simd_gt(a, transmute(b)) } +} +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcle_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe { simd_le(a, b) } +} +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe { simd_le(a, b) } +} +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcle_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + unsafe { simd_le(a, b) } +} +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare signed less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcle_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare signed less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcleq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare signed less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcle_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare signed less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcleq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare signed less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcle_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare signed less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmge) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcleq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare unsigned less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcle_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare unsigned less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcleq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare unsigned less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcle_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare unsigned less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcleq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare unsigned less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_le(a, b) } +} +#[doc = "Compare unsigned less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_le(a, b) } +} +#[doc = "Floating-point compare less than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcle.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmle) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vclez_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + unsafe { simd_le(a, transmute(b)) } +} +#[doc = "Floating-point compare less than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcle.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmle) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vclezq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + unsafe { simd_le(a, transmute(b)) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcls_s8(a: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.cls.v8i8" + )] + fn _vcls_s8(a: int8x8_t) -> int8x8_t; + } + unsafe { _vcls_s8(a) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclsq_s8(a: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.cls.v16i8" + )] + fn _vclsq_s8(a: int8x16_t) -> int8x16_t; + } + unsafe { _vclsq_s8(a) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcls_s16(a: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.cls.v4i16" + )] + fn _vcls_s16(a: int16x4_t) -> int16x4_t; + } + unsafe { _vcls_s16(a) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclsq_s16(a: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.cls.v8i16" + )] + fn _vclsq_s16(a: int16x8_t) -> int16x8_t; + } + unsafe { _vclsq_s16(a) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcls_s32(a: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.cls.v2i32" + )] + fn _vcls_s32(a: int32x2_t) -> int32x2_t; + } + unsafe { _vcls_s32(a) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclsq_s32(a: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.cls.v4i32" + )] + fn _vclsq_s32(a: int32x4_t) -> int32x4_t; + } + unsafe { _vclsq_s32(a) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcls_u8(a: uint8x8_t) -> int8x8_t { + unsafe { vcls_s8(transmute(a)) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclsq_u8(a: uint8x16_t) -> int8x16_t { + unsafe { vclsq_s8(transmute(a)) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcls_u16(a: uint16x4_t) -> int16x4_t { + unsafe { vcls_s16(transmute(a)) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclsq_u16(a: uint16x8_t) -> int16x8_t { + unsafe { vclsq_s16(transmute(a)) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcls_u32(a: uint32x2_t) -> int32x2_t { + unsafe { vcls_s32(transmute(a)) } +} +#[doc = "Count leading sign bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclsq_u32(a: uint32x4_t) -> int32x4_t { + unsafe { vclsq_s32(transmute(a)) } +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vclt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare signed less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare signed less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcltq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare signed less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare signed less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcltq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare signed less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare signed less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmgt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcltq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare unsigned less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare unsigned less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcltq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare unsigned less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare unsigned less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcltq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare unsigned less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Compare unsigned less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmhi) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_lt(a, b) } +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmlt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcltz_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + unsafe { simd_lt(a, transmute(b)) } +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmlt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcltzq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + unsafe { simd_lt(a, transmute(b)) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclz_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_ctlz(a) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclzq_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_ctlz(a) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclz_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_ctlz(a) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclzq_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_ctlz(a) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclz_s32(a: int32x2_t) -> int32x2_t { + unsafe { simd_ctlz(a) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclzq_s32(a: int32x4_t) -> int32x4_t { + unsafe { simd_ctlz(a) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t { + unsafe { transmute(vclz_s16(transmute(a))) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(vclz_s16(transmute(a))); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t { + unsafe { transmute(vclzq_s16(transmute(a))) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(vclzq_s16(transmute(a))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t { + unsafe { transmute(vclz_s32(transmute(a))) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(vclz_s32(transmute(a))); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t { + unsafe { transmute(vclzq_s32(transmute(a))) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(vclzq_s32(transmute(a))); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vclz_s8(transmute(a))) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vclz_s8(transmute(a))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { transmute(vclzq_s8(transmute(a))) } +} +#[doc = "Count leading zero bits"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(clz) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(vclzq_s8(transmute(a))); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcnt_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_ctpop(a) } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcntq_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_ctpop(a) } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vcnt_s8(transmute(a))) } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vcnt_s8(transmute(a))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { transmute(vcntq_s8(transmute(a))) } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(vcntq_s8(transmute(a))); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { transmute(vcnt_s8(transmute(a))) } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(vcnt_s8(transmute(a))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { transmute(vcntq_s8(transmute(a))) } +} +#[doc = "Population count per byte."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cnt) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(vcntq_s8(transmute(a))); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [0, 1]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [0, 1]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [0, 1]) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcreate_f16(a: u64) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcreate_f16(a: u64) -> float16x4_t { + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_f32(a: u64) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_f32(a: u64) -> float32x2_t { + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_s8(a: u64) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_s8(a: u64) -> int8x8_t { + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_s16(a: u64) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_s16(a: u64) -> int16x4_t { + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_s32(a: u64) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_s32(a: u64) -> int32x2_t { + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_s64(a: u64) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_u8(a: u64) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_u8(a: u64) -> uint8x8_t { + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_u16(a: u64) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_u16(a: u64) -> uint16x4_t { + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_u32(a: u64) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_u32(a: u64) -> uint32x2_t { + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_u64(a: u64) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_p8(a: u64) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_p8(a: u64) -> poly8x8_t { + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_p16(a: u64) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_p16(a: u64) -> poly16x4_t { + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p64)"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcreate_p64(a: u64) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Floating-point convert to lower precision narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_f32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +# [cfg_attr (all (test , target_arch = "arm") , assert_instr (vcvt . f16 . f32))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_f16_f32(a: float32x4_t) -> float16x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_s16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_f16_s16(a: int16x4_t) -> float16x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f16_s16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvtq_f16_s16(a: int16x8_t) -> float16x8_t { + unsafe { simd_cast(a) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_u16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_f16_u16(a: uint16x4_t) -> float16x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f16_u16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvtq_f16_u16(a: uint16x8_t) -> float16x8_t { + unsafe { simd_cast(a) } +} +#[doc = "Floating-point convert to higher precision long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtl) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_f32_f16(a: float16x4_t) -> float32x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcvt_f32_s32(a: int32x2_t) -> float32x2_t { + unsafe { simd_cast(a) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcvtq_f32_s32(a: int32x4_t) -> float32x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcvt_f32_u32(a: uint32x2_t) -> float32x2_t { + unsafe { simd_cast(a) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f16_s16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_n_f16_s16(a: int16x4_t) -> float16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f16.v4i16" + )] + fn _vcvt_n_f16_s16(a: int16x4_t, n: i32) -> float16x4_t; + } + unsafe { _vcvt_n_f16_s16(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f16_s16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvtq_n_f16_s16(a: int16x8_t) -> float16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.v8f16.v8i16" + )] + fn _vcvtq_n_f16_s16(a: int16x8_t, n: i32) -> float16x8_t; + } + unsafe { _vcvtq_n_f16_s16(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f16_u16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_n_f16_u16(a: uint16x4_t) -> float16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f16.v4i16" + )] + fn _vcvt_n_f16_u16(a: uint16x4_t, n: i32) -> float16x4_t; + } + unsafe { _vcvt_n_f16_u16(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f16_u16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvtq_n_f16_u16(a: uint16x8_t) -> float16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.v8f16.v8i16" + )] + fn _vcvtq_n_f16_u16(a: uint16x8_t, n: i32) -> float16x8_t; + } + unsafe { _vcvtq_n_f16_u16(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vcvt_n_f32_s32(a: int32x2_t) -> float32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32" + )] + fn _vcvt_n_f32_s32(a: int32x2_t, n: i32) -> float32x2_t; + } + unsafe { _vcvt_n_f32_s32(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vcvtq_n_f32_s32(a: int32x4_t) -> float32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32" + )] + fn _vcvtq_n_f32_s32(a: int32x4_t, n: i32) -> float32x4_t; + } + unsafe { _vcvtq_n_f32_s32(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcvt_n_f32_s32(a: int32x2_t) -> float32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32" + )] + fn _vcvt_n_f32_s32(a: int32x2_t, n: i32) -> float32x2_t; + } + unsafe { _vcvt_n_f32_s32(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcvtq_n_f32_s32(a: int32x4_t) -> float32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32" + )] + fn _vcvtq_n_f32_s32(a: int32x4_t, n: i32) -> float32x4_t; + } + unsafe { _vcvtq_n_f32_s32(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vcvt_n_f32_u32(a: uint32x2_t) -> float32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32" + )] + fn _vcvt_n_f32_u32(a: uint32x2_t, n: i32) -> float32x2_t; + } + unsafe { _vcvt_n_f32_u32(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vcvtq_n_f32_u32(a: uint32x4_t) -> float32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32" + )] + fn _vcvtq_n_f32_u32(a: uint32x4_t, n: i32) -> float32x4_t; + } + unsafe { _vcvtq_n_f32_u32(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcvt_n_f32_u32(a: uint32x2_t) -> float32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32" + )] + fn _vcvt_n_f32_u32(a: uint32x2_t, n: i32) -> float32x2_t; + } + unsafe { _vcvt_n_f32_u32(a, N) } +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcvtq_n_f32_u32(a: uint32x4_t) -> float32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32" + )] + fn _vcvtq_n_f32_u32(a: uint32x4_t, n: i32) -> float32x4_t; + } + unsafe { _vcvtq_n_f32_u32(a, N) } +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s16_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_n_s16_f16(a: float16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i16.v4f16" + )] + fn _vcvt_n_s16_f16(a: float16x4_t, n: i32) -> int16x4_t; + } + unsafe { _vcvt_n_s16_f16(a, N) } +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s16_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvtq_n_s16_f16(a: float16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.v8i16.v8f16" + )] + fn _vcvtq_n_s16_f16(a: float16x8_t, n: i32) -> int16x8_t; + } + unsafe { _vcvtq_n_s16_f16(a, N) } +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vcvt_n_s32_f32(a: float32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32" + )] + fn _vcvt_n_s32_f32(a: float32x2_t, n: i32) -> int32x2_t; + } + unsafe { _vcvt_n_s32_f32(a, N) } +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vcvtq_n_s32_f32(a: float32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32" + )] + fn _vcvtq_n_s32_f32(a: float32x4_t, n: i32) -> int32x4_t; + } + unsafe { _vcvtq_n_s32_f32(a, N) } +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcvt_n_s32_f32(a: float32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32" + )] + fn _vcvt_n_s32_f32(a: float32x2_t, n: i32) -> int32x2_t; + } + unsafe { _vcvt_n_s32_f32(a, N) } +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcvtq_n_s32_f32(a: float32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32" + )] + fn _vcvtq_n_s32_f32(a: float32x4_t, n: i32) -> int32x4_t; + } + unsafe { _vcvtq_n_s32_f32(a, N) } +} +#[doc = "Fixed-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u16_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_n_u16_f16(a: float16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i16.v4f16" + )] + fn _vcvt_n_u16_f16(a: float16x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vcvt_n_u16_f16(a, N) } +} +#[doc = "Fixed-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u16_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvtq_n_u16_f16(a: float16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.v8i16.v8f16" + )] + fn _vcvtq_n_u16_f16(a: float16x8_t, n: i32) -> uint16x8_t; + } + unsafe { _vcvtq_n_u16_f16(a, N) } +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vcvt_n_u32_f32(a: float32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32" + )] + fn _vcvt_n_u32_f32(a: float32x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vcvt_n_u32_f32(a, N) } +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vcvtq_n_u32_f32(a: float32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32" + )] + fn _vcvtq_n_u32_f32(a: float32x4_t, n: i32) -> uint32x4_t; + } + unsafe { _vcvtq_n_u32_f32(a, N) } +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcvt_n_u32_f32(a: float32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32" + )] + fn _vcvt_n_u32_f32(a: float32x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vcvt_n_u32_f32(a, N) } +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcvtq_n_u32_f32(a: float32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32" + )] + fn _vcvtq_n_u32_f32(a: float32x4_t, n: i32) -> uint32x4_t; + } + unsafe { _vcvtq_n_u32_f32(a, N) } +} +#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s16_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s16_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvtq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe { simd_cast(a) } +} +#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s32_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.fptosi.sat.v2i32.v2f32" + )] + fn _vcvt_s32_f32(a: float32x2_t) -> int32x2_t; + } + unsafe { _vcvt_s32_f32(a) } +} +#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s32_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.fptosi.sat.v4i32.v4f32" + )] + fn _vcvtq_s32_f32(a: float32x4_t) -> int32x4_t; + } + unsafe { _vcvtq_s32_f32(a) } +} +#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u16_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvt_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u16_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vcvtq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe { simd_cast(a) } +} +#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u32_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.fptoui.sat.v2i32.v2f32" + )] + fn _vcvt_u32_f32(a: float32x2_t) -> uint32x2_t; + } + unsafe { _vcvt_u32_f32(a) } +} +#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u32_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.fptoui.sat.v4i32.v4f32" + )] + fn _vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t; + } + unsafe { _vcvtq_u32_f32(a) } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + vdot_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vdotq_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + vdot_u32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vdotq_u32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8" + )] + fn _vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t; + } + unsafe { _vdot_s32(a, b, c) } +} +#[doc = "Dot product arithmetic (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8" + )] + fn _vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { _vdotq_s32(a, b, c) } +} +#[doc = "Dot product arithmetic (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.udot.v2i32.v8i8" + )] + fn _vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t; + } + unsafe { _vdot_u32(a, b, c) } +} +#[doc = "Dot product arithmetic (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.udot.v4i32.v16i8" + )] + fn _vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t; + } + unsafe { _vdotq_u32(a, b, c) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vdupq_lane_f16(a: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_f32(a: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [ + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 + ] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [ + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 + ] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [ + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 + ] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, N = 0) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N == 0); + a +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, N = 0) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N == 0); + a +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 8) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 8) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 8) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 8) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + a, + [ + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 + ] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 8) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + a, + [ + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 + ] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 8) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + a, + [ + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, + N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 + ] + ) + } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t { + static_assert_uimm_bits!(N, 1); + unsafe { transmute::(simd_extract!(a, N as u32)) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 1); + unsafe { transmute::(simd_extract!(a, N as u32)) } +} +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vdup_n_f16(a: f16) -> float16x4_t { + float16x4_t::splat(a) +} +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vdupq_n_f16(a: f16) -> float16x8_t { + float16x8_t::splat(a) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_f32(value: f32) -> float32x2_t { + float32x2_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_p16(value: p16) -> poly16x4_t { + poly16x4_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_p8(value: p8) -> poly8x8_t { + poly8x8_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_s16(value: i16) -> int16x4_t { + int16x4_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_s32(value: i32) -> int32x2_t { + int32x2_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmov) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_s64(value: i64) -> int64x1_t { + int64x1_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_s8(value: i8) -> int8x8_t { + int8x8_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_u16(value: u16) -> uint16x4_t { + uint16x4_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_u32(value: u32) -> uint32x2_t { + uint32x2_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmov) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_u64(value: u64) -> uint64x1_t { + uint64x1_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdup_n_u8(value: u8) -> uint8x8_t { + uint8x8_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_f32(value: f32) -> float32x4_t { + float32x4_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_p16(value: p16) -> poly16x8_t { + poly16x8_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_p8(value: p8) -> poly8x16_t { + poly8x16_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_s16(value: i16) -> int16x8_t { + int16x8_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_s32(value: i32) -> int32x4_t { + int32x4_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_s64(value: i64) -> int64x2_t { + int64x2_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_s8(value: i8) -> int8x16_t { + int8x16_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_u16(value: u16) -> uint16x8_t { + uint16x8_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_u32(value: u32) -> uint32x4_t { + uint32x4_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_u64(value: u64) -> uint64x2_t { + uint64x2_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_n_u8(value: u8) -> uint8x16_t { + uint8x16_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32_vfp4)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn vdup_n_f32_vfp4(value: f32) -> float32x2_t { + float32x2_t::splat(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32_vfp4)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t { + float32x4_t::splat(value) +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 0) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { + static_assert!(N == 0); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 0) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { + static_assert!(N == 0); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_xor(a, b) } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + match N & 0b11 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + match N & 0b1 { + 0 => simd_shuffle!(a, b, [0, 1]), + 1 => simd_shuffle!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + match N & 0b1 { + 0 => simd_shuffle!(a, b, [0, 1]), + 1 => simd_shuffle!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + match N & 0b1 { + 0 => simd_shuffle!(a, b, [0, 1]), + 1 => simd_shuffle!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, N = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t { + static_assert!(N == 0); + a +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, N = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t { + static_assert!(N == 0); + a +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + match N & 0b111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + match N & 0b111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + match N & 0b111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + match N & 0b111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + match N & 0b111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + match N & 0b111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + match N & 0b111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + match N & 0b11 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + match N & 0b11 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + match N & 0b11 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + match N & 0b11 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + match N & 0b11 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + match N & 0b11 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + match N & 0b1 { + 0 => simd_shuffle!(a, b, [0, 1]), + 1 => simd_shuffle!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + match N & 0b1 { + 0 => simd_shuffle!(a, b, [0, 1]), + 1 => simd_shuffle!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 15) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + match N & 0b1111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle!( + a, + b, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + ), + 2 => simd_shuffle!( + a, + b, + [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + ), + 3 => simd_shuffle!( + a, + b, + [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] + ), + 4 => simd_shuffle!( + a, + b, + [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + ), + 5 => simd_shuffle!( + a, + b, + [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + ), + 6 => simd_shuffle!( + a, + b, + [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + ), + 7 => simd_shuffle!( + a, + b, + [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + ), + 8 => simd_shuffle!( + a, + b, + [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + ), + 9 => simd_shuffle!( + a, + b, + [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + ), + 10 => simd_shuffle!( + a, + b, + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] + ), + 11 => simd_shuffle!( + a, + b, + [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26] + ), + 12 => simd_shuffle!( + a, + b, + [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + ), + 13 => simd_shuffle!( + a, + b, + [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] + ), + 14 => simd_shuffle!( + a, + b, + [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] + ), + 15 => simd_shuffle!( + a, + b, + [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] + ), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 15) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + match N & 0b1111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle!( + a, + b, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + ), + 2 => simd_shuffle!( + a, + b, + [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + ), + 3 => simd_shuffle!( + a, + b, + [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] + ), + 4 => simd_shuffle!( + a, + b, + [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + ), + 5 => simd_shuffle!( + a, + b, + [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + ), + 6 => simd_shuffle!( + a, + b, + [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + ), + 7 => simd_shuffle!( + a, + b, + [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + ), + 8 => simd_shuffle!( + a, + b, + [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + ), + 9 => simd_shuffle!( + a, + b, + [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + ), + 10 => simd_shuffle!( + a, + b, + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] + ), + 11 => simd_shuffle!( + a, + b, + [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26] + ), + 12 => simd_shuffle!( + a, + b, + [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + ), + 13 => simd_shuffle!( + a, + b, + [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] + ), + 14 => simd_shuffle!( + a, + b, + [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] + ), + 15 => simd_shuffle!( + a, + b, + [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] + ), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 15) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + match N & 0b1111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle!( + a, + b, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + ), + 2 => simd_shuffle!( + a, + b, + [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + ), + 3 => simd_shuffle!( + a, + b, + [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] + ), + 4 => simd_shuffle!( + a, + b, + [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + ), + 5 => simd_shuffle!( + a, + b, + [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + ), + 6 => simd_shuffle!( + a, + b, + [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + ), + 7 => simd_shuffle!( + a, + b, + [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + ), + 8 => simd_shuffle!( + a, + b, + [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + ), + 9 => simd_shuffle!( + a, + b, + [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + ), + 10 => simd_shuffle!( + a, + b, + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] + ), + 11 => simd_shuffle!( + a, + b, + [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26] + ), + 12 => simd_shuffle!( + a, + b, + [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] + ), + 13 => simd_shuffle!( + a, + b, + [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] + ), + 14 => simd_shuffle!( + a, + b, + [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] + ), + 15 => simd_shuffle!( + a, + b, + [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] + ), + _ => unreachable_unchecked(), + } + } +} +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe { simd_fma(b, c, a) } +} +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe { simd_fma(b, c, a) } +} +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { simd_fma(b, c, a) } +} +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { simd_fma(b, c, a) } +} +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vfma_f32(a, b, vdup_n_f32_vfp4(c)) +} +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vfmaq_f32(a, b, vdupq_n_f32_vfp4(c)) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe { + let b: float16x4_t = simd_neg(b); + vfma_f16(a, b, c) + } +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe { + let b: float16x8_t = simd_neg(b); + vfmaq_f16(a, b, c) + } +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { + let b: float32x2_t = simd_neg(b); + vfma_f32(a, b, c) + } +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { + let b: float32x4_t = simd_neg(b); + vfmaq_f32(a, b, c) + } +} +#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vfms_f32(a, b, vdup_n_f32_vfp4(c)) +} +#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vfmsq_f32(a, b, vdupq_n_f32_vfp4(c)) +} +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_high_f16(a: float16x8_t) -> float16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +} +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_low_f16(a: float16x8_t) -> float16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_f32(a: float32x4_t) -> float32x2_t { + unsafe { simd_shuffle!(a, a, [2, 3]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_s16(a: int16x8_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_s32(a: int32x4_t) -> int32x2_t { + unsafe { simd_shuffle!(a, a, [2, 3]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_s8(a: int8x16_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, a, [2, 3]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_s64(a: int64x2_t) -> int64x1_t { + unsafe { int64x1_t([simd_extract!(a, 1)]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { uint64x1_t([simd_extract!(a, 1)]) } +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vget_lane_f16(a: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_extract!(a, LANE as u32) } +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vgetq_lane_f16(a: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_extract!(a, LANE as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_f32(v: float32x2_t) -> f32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_p16(v: poly16x4_t) -> p16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_p8(v: poly8x8_t) -> p8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s16(v: int16x4_t) -> i16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s32(v: int32x2_t) -> i32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s8(v: int8x8_t) -> i8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_u16(v: uint16x4_t) -> u16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_u32(v: uint32x2_t) -> u32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_u8(v: uint8x8_t) -> u8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_f32(v: float32x4_t) -> f32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_p16(v: poly16x8_t) -> p16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_p64(v: poly64x2_t) -> p64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_p8(v: poly8x16_t) -> p8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_s16(v: int16x8_t) -> i16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_s32(v: int32x4_t) -> i32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_s64(v: int64x2_t) -> i64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_s8(v: int8x16_t) -> i8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_u16(v: uint16x8_t) -> u16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_u32(v: uint32x4_t) -> u32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_u64(v: uint64x2_t) -> u64 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vgetq_lane_u8(v: uint8x16_t) -> u8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_p64(v: poly64x1_t) -> p64 { + static_assert!(IMM5 == 0); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s64(v: int64x1_t) -> i64 { + static_assert!(IMM5 == 0); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_u64(v: uint64x1_t) -> u64 { + static_assert!(IMM5 == 0); + unsafe { simd_extract!(v, 0) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_f32(a: float32x4_t) -> float32x2_t { + unsafe { simd_shuffle!(a, a, [0, 1]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_s16(a: int16x8_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_s32(a: int32x4_t) -> int32x2_t { + unsafe { simd_shuffle!(a, a, [0, 1]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_s8(a: int8x16_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, a, [0, 1]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_s64(a: int64x2_t) -> int64x1_t { + unsafe { int64x1_t([simd_extract!(a, 0)]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { uint64x1_t([simd_extract!(a, 0)]) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")] + fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vhadd_s8(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")] + fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vhaddq_s8(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")] + fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vhadd_s16(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")] + fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vhaddq_s16(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")] + fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vhadd_s32(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")] + fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vhaddq_s32(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")] + fn _vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vhadd_u8(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")] + fn _vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vhaddq_u8(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")] + fn _vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vhadd_u16(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")] + fn _vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vhaddq_u16(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")] + fn _vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vhadd_u32(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")] + fn _vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vhaddq_u32(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")] + fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vhsub_s16(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")] + fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vhsubq_s16(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")] + fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vhsub_s32(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")] + fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vhsubq_s32(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")] + fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vhsub_s8(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")] + fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vhsubq_s8(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")] + fn _vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vhsub_u8(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")] + fn _vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vhsubq_u8(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")] + fn _vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vhsub_u16(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")] + fn _vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vhsubq_u16(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")] + fn _vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vhsub_u32(a, b) } +} +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uhsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")] + fn _vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vhsubq_u32(a, b) } +} +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t { + let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t { + let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { + let x = vld1_lane_f32::<0>(ptr, transmute(f32x2::splat(0.0))); + simd_shuffle!(x, x, [0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { + let x = vld1_lane_p16::<0>(ptr, transmute(u16x4::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { + let x = vld1_lane_p8::<0>(ptr, transmute(u8x8::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { + let x = vld1_lane_s16::<0>(ptr, transmute(i16x4::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { + let x = vld1_lane_s32::<0>(ptr, transmute(i32x2::splat(0))); + simd_shuffle!(x, x, [0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { + let x = vld1_lane_s8::<0>(ptr, transmute(i8x8::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { + let x = vld1_lane_u16::<0>(ptr, transmute(u16x4::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { + let x = vld1_lane_u32::<0>(ptr, transmute(u32x2::splat(0))); + simd_shuffle!(x, x, [0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { + let x = vld1_lane_u8::<0>(ptr, transmute(u8x8::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t { + let x = vld1q_lane_f32::<0>(ptr, transmute(f32x4::splat(0.0))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t { + let x = vld1q_lane_p16::<0>(ptr, transmute(u16x8::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { + let x = vld1q_lane_p8::<0>(ptr, transmute(u8x16::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { + let x = vld1q_lane_s16::<0>(ptr, transmute(i16x8::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { + let x = vld1q_lane_s32::<0>(ptr, transmute(i32x4::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { + let x = vld1q_lane_s64::<0>(ptr, transmute(i64x2::splat(0))); + simd_shuffle!(x, x, [0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { + let x = vld1q_lane_s8::<0>(ptr, transmute(i8x16::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { + let x = vld1q_lane_u16::<0>(ptr, transmute(u16x8::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { + let x = vld1q_lane_u32::<0>(ptr, transmute(u32x4::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { + let x = vld1q_lane_u64::<0>(ptr, transmute(u64x2::splat(0))); + simd_shuffle!(x, x, [0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { + let x = vld1q_lane_u8::<0>(ptr, transmute(u8x16::splat(0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t { + let x: poly64x1_t; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + { + x = crate::core_arch::aarch64::vld1_p64(ptr); + } + #[cfg(target_arch = "arm")] + { + x = crate::core_arch::arm::vld1_p64(ptr); + }; + x +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { + let x: int64x1_t; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + { + x = crate::core_arch::aarch64::vld1_s64(ptr); + } + #[cfg(target_arch = "arm")] + { + x = crate::core_arch::arm::vld1_s64(ptr); + }; + x +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t { + let x: uint64x1_t; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + { + x = crate::core_arch::aarch64::vld1_u64(ptr); + } + #[cfg(target_arch = "arm")] + { + x = crate::core_arch::arm::vld1_u64(ptr); + }; + x +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + transmute(vld1_v4f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + let ret_val: float16x4_t = transmute(vld1_v4f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + transmute(vld1q_v8f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + let ret_val: float16x8_t = transmute(vld1q_v8f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v4f16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f16.p0")] + fn _vld1_f16_x2(a: *const f16) -> float16x4x2_t; + } + _vld1_f16_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v4f16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f16.p0")] + fn _vld1_f16_x3(a: *const f16) -> float16x4x3_t; + } + _vld1_f16_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v4f16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f16.p0")] + fn _vld1_f16_x4(a: *const f16) -> float16x4x4_t; + } + _vld1_f16_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v8f16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8f16.p0")] + fn _vld1q_f16_x2(a: *const f16) -> float16x8x2_t; + } + _vld1q_f16_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v8f16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8f16.p0")] + fn _vld1q_f16_x3(a: *const f16) -> float16x8x3_t; + } + _vld1q_f16_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v8f16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8f16.p0")] + fn _vld1q_f16_x4(a: *const f16) -> float16x8x4_t; + } + _vld1q_f16_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { + transmute(vld1_v2f32( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { + let ret_val: float32x2_t = transmute(vld1_v2f32( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { + transmute(vld1q_v4f32( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { + let ret_val: float32x4_t = transmute(vld1q_v4f32( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { + transmute(vld1_v8i8( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { + let ret_val: uint8x8_t = transmute(vld1_v8i8( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { + transmute(vld1q_v16i8( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { + let ret_val: uint8x16_t = transmute(vld1q_v16i8( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { + transmute(vld1_v4i16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { + let ret_val: uint16x4_t = transmute(vld1_v4i16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { + transmute(vld1q_v8i16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { + let ret_val: uint16x8_t = transmute(vld1q_v8i16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { + transmute(vld1_v2i32( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { + let ret_val: uint32x2_t = transmute(vld1_v2i32( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { + transmute(vld1q_v4i32( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { + let ret_val: uint32x4_t = transmute(vld1q_v4i32( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { + transmute(vld1_v1i64( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { + transmute(vld1q_v2i64( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { + let ret_val: uint64x2_t = transmute(vld1q_v2i64( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { + transmute(vld1_v8i8( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { + let ret_val: poly8x8_t = transmute(vld1_v8i8( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { + transmute(vld1q_v16i8( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { + let ret_val: poly8x16_t = transmute(vld1q_v16i8( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { + transmute(vld1_v4i16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { + let ret_val: poly16x4_t = transmute(vld1_v4i16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { + transmute(vld1q_v8i16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { + let ret_val: poly16x8_t = transmute(vld1q_v8i16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,aes")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { + transmute(vld1q_v2i64( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,aes")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { + let ret_val: poly64x2_t = transmute(vld1q_v2i64( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2f32.p0")] + fn _vld1_f32_x2(a: *const f32) -> float32x2x2_t; + } + _vld1_f32_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2f32.p0")] + fn _vld1_f32_x3(a: *const f32) -> float32x2x3_t; + } + _vld1_f32_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2f32.p0")] + fn _vld1_f32_x4(a: *const f32) -> float32x2x4_t; + } + _vld1_f32_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f32.p0")] + fn _vld1q_f32_x2(a: *const f32) -> float32x4x2_t; + } + _vld1q_f32_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f32.p0")] + fn _vld1q_f32_x3(a: *const f32) -> float32x4x3_t; + } + _vld1q_f32_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f32.p0")] + fn _vld1q_f32_x4(a: *const f32) -> float32x4x4_t; + } + _vld1q_f32_x4(a) +} +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_lane_f16(ptr: *const f16, src: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_lane_f16(ptr: *const f16, src: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 7) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr, LANE = 0) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> int64x1_t { + static_assert!(LANE == 0); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 7) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr, LANE = 0) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) -> uint64x1_t { + static_assert!(LANE == 0); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 7) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 7) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 15) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(LANE, 4); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 7) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 15) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(LANE, 4); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 7) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 15) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(LANE, 4); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr, LANE = 0) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) -> poly64x1_t { + static_assert!(LANE == 0); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_p64(ptr: *const p64, src: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,aes")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t { + let a: *const i8 = ptr as *const i8; + let b: i32 = crate::mem::align_of::() as i32; + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] + fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; + } + transmute(_vld1_v1i64(a, b)) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t { + transmute(vld1_s64_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t { + transmute(vld1_s64_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t { + transmute(vld1_s64_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { + transmute(vld1q_s64_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { + let mut ret_val: poly64x2x2_t = transmute(vld1q_s64_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { + transmute(vld1q_s64_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { + let mut ret_val: poly64x2x3_t = transmute(vld1q_s64_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { + transmute(vld1q_s64_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { + let mut ret_val: poly64x2x4_t = transmute(vld1q_s64_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { + vld1_v8i8(ptr as *const i8, crate::mem::align_of::() as i32) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { + vld1q_v16i8(ptr as *const i8, crate::mem::align_of::() as i32) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { + vld1_v4i16(ptr as *const i8, crate::mem::align_of::() as i32) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { + vld1q_v8i16(ptr as *const i8, crate::mem::align_of::() as i32) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { + vld1_v2i32(ptr as *const i8, crate::mem::align_of::() as i32) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { + vld1q_v4i32(ptr as *const i8, crate::mem::align_of::() as i32) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { + vld1_v1i64(ptr as *const i8, crate::mem::align_of::() as i32) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { + vld1q_v2i64(ptr as *const i8, crate::mem::align_of::() as i32) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i8.p0")] + fn _vld1_s8_x2(a: *const i8) -> int8x8x2_t; + } + _vld1_s8_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i8.p0")] + fn _vld1_s8_x3(a: *const i8) -> int8x8x3_t; + } + _vld1_s8_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i8.p0")] + fn _vld1_s8_x4(a: *const i8) -> int8x8x4_t; + } + _vld1_s8_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v16i8.p0")] + fn _vld1q_s8_x2(a: *const i8) -> int8x16x2_t; + } + _vld1q_s8_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v16i8.p0")] + fn _vld1q_s8_x3(a: *const i8) -> int8x16x3_t; + } + _vld1q_s8_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v16i8.p0")] + fn _vld1q_s8_x4(a: *const i8) -> int8x16x4_t; + } + _vld1q_s8_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i16.p0")] + fn _vld1_s16_x2(a: *const i16) -> int16x4x2_t; + } + _vld1_s16_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i16.p0")] + fn _vld1_s16_x3(a: *const i16) -> int16x4x3_t; + } + _vld1_s16_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i16.p0")] + fn _vld1_s16_x4(a: *const i16) -> int16x4x4_t; + } + _vld1_s16_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i16.p0")] + fn _vld1q_s16_x2(a: *const i16) -> int16x8x2_t; + } + _vld1q_s16_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i16.p0")] + fn _vld1q_s16_x3(a: *const i16) -> int16x8x3_t; + } + _vld1q_s16_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i16.p0")] + fn _vld1q_s16_x4(a: *const i16) -> int16x8x4_t; + } + _vld1q_s16_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i32.p0")] + fn _vld1_s32_x2(a: *const i32) -> int32x2x2_t; + } + _vld1_s32_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i32.p0")] + fn _vld1_s32_x3(a: *const i32) -> int32x2x3_t; + } + _vld1_s32_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i32.p0")] + fn _vld1_s32_x4(a: *const i32) -> int32x2x4_t; + } + _vld1_s32_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i32.p0")] + fn _vld1q_s32_x2(a: *const i32) -> int32x4x2_t; + } + _vld1q_s32_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i32.p0")] + fn _vld1q_s32_x3(a: *const i32) -> int32x4x3_t; + } + _vld1q_s32_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i32.p0")] + fn _vld1q_s32_x4(a: *const i32) -> int32x4x4_t; + } + _vld1q_s32_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v1i64.p0")] + fn _vld1_s64_x2(a: *const i64) -> int64x1x2_t; + } + _vld1_s64_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v1i64.p0")] + fn _vld1_s64_x3(a: *const i64) -> int64x1x3_t; + } + _vld1_s64_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v1i64.p0")] + fn _vld1_s64_x4(a: *const i64) -> int64x1x4_t; + } + _vld1_s64_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i64.p0")] + fn _vld1q_s64_x2(a: *const i64) -> int64x2x2_t; + } + _vld1q_s64_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i64.p0")] + fn _vld1q_s64_x3(a: *const i64) -> int64x2x3_t; + } + _vld1q_s64_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i64.p0")] + fn _vld1q_s64_x4(a: *const i64) -> int64x2x4_t; + } + _vld1q_s64_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { + transmute(vld1_s8_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { + let mut ret_val: uint8x8x2_t = transmute(vld1_s8_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { + transmute(vld1_s8_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { + let mut ret_val: uint8x8x3_t = transmute(vld1_s8_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { + transmute(vld1_s8_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { + let mut ret_val: uint8x8x4_t = transmute(vld1_s8_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { + transmute(vld1q_s8_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { + let mut ret_val: uint8x16x2_t = transmute(vld1q_s8_x2(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { + transmute(vld1q_s8_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { + let mut ret_val: uint8x16x3_t = transmute(vld1q_s8_x3(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { + transmute(vld1q_s8_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { + let mut ret_val: uint8x16x4_t = transmute(vld1q_s8_x4(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.3 = unsafe { + simd_shuffle!( + ret_val.3, + ret_val.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { + transmute(vld1_s16_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { + let mut ret_val: uint16x4x2_t = transmute(vld1_s16_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { + transmute(vld1_s16_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { + let mut ret_val: uint16x4x3_t = transmute(vld1_s16_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { + transmute(vld1_s16_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { + let mut ret_val: uint16x4x4_t = transmute(vld1_s16_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { + transmute(vld1q_s16_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { + let mut ret_val: uint16x8x2_t = transmute(vld1q_s16_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { + transmute(vld1q_s16_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { + let mut ret_val: uint16x8x3_t = transmute(vld1q_s16_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { + transmute(vld1q_s16_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { + let mut ret_val: uint16x8x4_t = transmute(vld1q_s16_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { + transmute(vld1_s32_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { + let mut ret_val: uint32x2x2_t = transmute(vld1_s32_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { + transmute(vld1_s32_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { + let mut ret_val: uint32x2x3_t = transmute(vld1_s32_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { + transmute(vld1_s32_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { + let mut ret_val: uint32x2x4_t = transmute(vld1_s32_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { + transmute(vld1q_s32_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { + let mut ret_val: uint32x4x2_t = transmute(vld1q_s32_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { + transmute(vld1q_s32_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { + let mut ret_val: uint32x4x3_t = transmute(vld1q_s32_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { + transmute(vld1q_s32_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { + let mut ret_val: uint32x4x4_t = transmute(vld1q_s32_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { + transmute(vld1_s64_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { + transmute(vld1_s64_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { + transmute(vld1_s64_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { + transmute(vld1q_s64_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { + let mut ret_val: uint64x2x2_t = transmute(vld1q_s64_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { + transmute(vld1q_s64_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { + let mut ret_val: uint64x2x3_t = transmute(vld1q_s64_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { + transmute(vld1q_s64_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { + let mut ret_val: uint64x2x4_t = transmute(vld1q_s64_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { + transmute(vld1_s8_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { + let mut ret_val: poly8x8x2_t = transmute(vld1_s8_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { + transmute(vld1_s8_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { + let mut ret_val: poly8x8x3_t = transmute(vld1_s8_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { + transmute(vld1_s8_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { + let mut ret_val: poly8x8x4_t = transmute(vld1_s8_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { + transmute(vld1q_s8_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { + let mut ret_val: poly8x16x2_t = transmute(vld1q_s8_x2(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { + transmute(vld1q_s8_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { + let mut ret_val: poly8x16x3_t = transmute(vld1q_s8_x3(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { + transmute(vld1q_s8_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { + let mut ret_val: poly8x16x4_t = transmute(vld1q_s8_x4(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.3 = unsafe { + simd_shuffle!( + ret_val.3, + ret_val.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { + transmute(vld1_s16_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { + let mut ret_val: poly16x4x2_t = transmute(vld1_s16_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { + transmute(vld1_s16_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { + let mut ret_val: poly16x4x3_t = transmute(vld1_s16_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { + transmute(vld1_s16_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { + let mut ret_val: poly16x4x4_t = transmute(vld1_s16_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { + transmute(vld1q_s16_x2(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { + let mut ret_val: poly16x8x2_t = transmute(vld1q_s16_x2(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { + transmute(vld1q_s16_x3(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { + let mut ret_val: poly16x8x3_t = transmute(vld1q_s16_x3(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { + transmute(vld1q_s16_x4(transmute(a))) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { + let mut ret_val: poly16x8x4_t = transmute(vld1q_s16_x4(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v1i64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v1i64(a: *const i8, b: i32) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] + fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; + } + _vld1_v1i64(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v2f32(a: *const i8, b: i32) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")] + fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t; + } + _vld1_v2f32(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2i32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v2i32(a: *const i8, b: i32) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")] + fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t; + } + _vld1_v2i32(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4i16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v4i16(a: *const i8, b: i32) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")] + fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t; + } + _vld1_v4i16(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v8i8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v8i8(a: *const i8, b: i32) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")] + fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t; + } + _vld1_v8i8(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v16i8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")] + fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t; + } + _vld1q_v16i8(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v2i64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")] + fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t; + } + _vld1q_v2i64(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")] + fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t; + } + _vld1q_v4f32(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4i32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")] + fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t; + } + _vld1q_v4i32(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8i16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")] + fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t; + } + _vld1q_v8i16(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")] + fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t; + } + _vld1_v4f16(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")] + fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t; + } + _vld1q_v8f16(a, b) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t { + let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0))); + simd_shuffle!(x, x, [0, 0]) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")] + fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t; + } + _vld2_dup_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")] + fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t; + } + _vld2q_dup_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v4f16.p0" + )] + fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t; + } + _vld2_dup_f16(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v8f16.p0" + )] + fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t; + } + _vld2q_dup_f16(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")] + fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t; + } + _vld2_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")] + fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t; + } + _vld2q_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")] + fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t; + } + _vld2_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")] + fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t; + } + _vld2q_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")] + fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t; + } + _vld2_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")] + fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t; + } + _vld2q_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")] + fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t; + } + _vld2_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")] + fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t; + } + _vld2q_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v2f32.p0" + )] + fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t; + } + _vld2_dup_f32(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v4f32.p0" + )] + fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t; + } + _vld2q_dup_f32(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v8i8.p0" + )] + fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t; + } + _vld2_dup_s8(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v16i8.p0" + )] + fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t; + } + _vld2q_dup_s8(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v4i16.p0" + )] + fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t; + } + _vld2_dup_s16(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v8i16.p0" + )] + fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t; + } + _vld2q_dup_s16(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v2i32.p0" + )] + fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t; + } + _vld2_dup_s32(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v4i32.p0" + )] + fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t; + } + _vld2q_dup_s32(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t { + transmute(vld2_dup_s64(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")] + fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t; + } + _vld2_dup_s64(a as *const i8, 8) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v1i64.p0" + )] + fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t; + } + _vld2_dup_s64(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t { + transmute(vld2_dup_s64(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { + transmute(vld2_dup_s8(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { + let mut ret_val: uint8x8x2_t = transmute(vld2_dup_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { + transmute(vld2q_dup_s8(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { + let mut ret_val: uint8x16x2_t = transmute(vld2q_dup_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { + transmute(vld2_dup_s16(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { + let mut ret_val: uint16x4x2_t = transmute(vld2_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { + transmute(vld2q_dup_s16(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { + let mut ret_val: uint16x8x2_t = transmute(vld2q_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { + transmute(vld2_dup_s32(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { + let mut ret_val: uint32x2x2_t = transmute(vld2_dup_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { + transmute(vld2q_dup_s32(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { + let mut ret_val: uint32x4x2_t = transmute(vld2q_dup_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { + transmute(vld2_dup_s8(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { + let mut ret_val: poly8x8x2_t = transmute(vld2_dup_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { + transmute(vld2q_dup_s8(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { + let mut ret_val: poly8x16x2_t = transmute(vld2q_dup_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { + transmute(vld2_dup_s16(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { + let mut ret_val: poly16x4x2_t = transmute(vld2_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { + transmute(vld2q_dup_s16(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { + let mut ret_val: poly16x8x2_t = transmute(vld2q_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")] + fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t; + } + _vld2_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")] + fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t; + } + _vld2q_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v4f16.p0" + )] + fn _vld2_f16(ptr: *const f16) -> float16x4x2_t; + } + _vld2_f16(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v8f16.p0" + )] + fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t; + } + _vld2q_f16(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")] + fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t; + } + _vld2_f32(a as *const i8, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")] + fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t; + } + _vld2q_f32(a as *const i8, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")] + fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t; + } + _vld2_s8(a as *const i8, 1) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")] + fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t; + } + _vld2q_s8(a as *const i8, 1) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")] + fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t; + } + _vld2_s16(a as *const i8, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")] + fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t; + } + _vld2q_s16(a as *const i8, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")] + fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t; + } + _vld2_s32(a as *const i8, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")] + fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t; + } + _vld2q_s32(a as *const i8, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v2f32.p0" + )] + fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t; + } + _vld2_f32(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v4f32.p0" + )] + fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t; + } + _vld2q_f32(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v8i8.p0" + )] + fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t; + } + _vld2_s8(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v16i8.p0" + )] + fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t; + } + _vld2q_s8(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v4i16.p0" + )] + fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t; + } + _vld2_s16(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v8i16.p0" + )] + fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t; + } + _vld2q_s16(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v2i32.p0" + )] + fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t; + } + _vld2_s32(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v4i32.p0" + )] + fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t; + } + _vld2q_s32(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")] + fn _vld2_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x2_t; + } + _vld2_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")] + fn _vld2q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x2_t; + } + _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0" + )] + fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16) + -> float16x4x2_t; + } + _vld2_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0" + )] + fn _vld2q_lane_f16( + a: float16x8_t, + b: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x2_t; + } + _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0" + )] + fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t; + } + _vld2_lane_f32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0" + )] + fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8) + -> float32x4x2_t; + } + _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0" + )] + fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t; + } + _vld2_lane_s8(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0" + )] + fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t; + } + _vld2_lane_s16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0" + )] + fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t; + } + _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0" + )] + fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t; + } + _vld2_lane_s32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0" + )] + fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t; + } + _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")] + fn _vld2_lane_f32( + ptr: *const i8, + a: float32x2_t, + b: float32x2_t, + n: i32, + size: i32, + ) -> float32x2x2_t; + } + _vld2_lane_f32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")] + fn _vld2q_lane_f32( + ptr: *const i8, + a: float32x4_t, + b: float32x4_t, + n: i32, + size: i32, + ) -> float32x4x2_t; + } + _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")] + fn _vld2q_lane_s16( + ptr: *const i8, + a: int16x8_t, + b: int16x8_t, + n: i32, + size: i32, + ) -> int16x8x2_t; + } + _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")] + fn _vld2q_lane_s32( + ptr: *const i8, + a: int32x4_t, + b: int32x4_t, + n: i32, + size: i32, + ) -> int32x4x2_t; + } + _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")] + fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32) + -> int8x8x2_t; + } + _vld2_lane_s8(a as _, b.0, b.1, LANE, 1) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")] + fn _vld2_lane_s16( + ptr: *const i8, + a: int16x4_t, + b: int16x4_t, + n: i32, + size: i32, + ) -> int16x4x2_t; + } + _vld2_lane_s16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")] + fn _vld2_lane_s32( + ptr: *const i8, + a: int32x2_t, + b: int32x2_t, + n: i32, + size: i32, + ) -> int32x2x2_t; + } + _vld2_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_lane_u8(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2_lane_s8::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_lane_u16(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld2_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_lane_u16(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2q_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_lane_u32(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + transmute(vld2_lane_s32::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_lane_u32(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld2q_lane_s32::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_lane_p8(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2_lane_s8::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_lane_p16(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld2_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_lane_p16(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2q_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t { + transmute(vld2_s64(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")] + fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t; + } + _vld2_s64(a as *const i8, 8) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v1i64.p0" + )] + fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t; + } + _vld2_s64(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t { + transmute(vld2_s64(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t { + transmute(vld2_s8(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t { + let mut ret_val: uint8x8x2_t = transmute(vld2_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t { + transmute(vld2q_s8(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t { + let mut ret_val: uint8x16x2_t = transmute(vld2q_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t { + transmute(vld2_s16(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t { + let mut ret_val: uint16x4x2_t = transmute(vld2_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t { + transmute(vld2q_s16(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t { + let mut ret_val: uint16x8x2_t = transmute(vld2q_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t { + transmute(vld2_s32(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t { + let mut ret_val: uint32x2x2_t = transmute(vld2_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t { + transmute(vld2q_s32(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t { + let mut ret_val: uint32x4x2_t = transmute(vld2q_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t { + transmute(vld2_s8(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t { + let mut ret_val: poly8x8x2_t = transmute(vld2_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t { + transmute(vld2q_s8(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t { + let mut ret_val: poly8x16x2_t = transmute(vld2q_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t { + transmute(vld2_s16(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t { + let mut ret_val: poly16x4x2_t = transmute(vld2_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t { + transmute(vld2q_s16(transmute(a))) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t { + let mut ret_val: poly16x8x2_t = transmute(vld2q_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")] + fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t; + } + _vld3_dup_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")] + fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t; + } + _vld3q_dup_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v4f16.p0" + )] + fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t; + } + _vld3_dup_f16(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v8f16.p0" + )] + fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t; + } + _vld3q_dup_f16(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v2f32.p0" + )] + fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t; + } + _vld3_dup_f32(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v4f32.p0" + )] + fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t; + } + _vld3q_dup_f32(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v8i8.p0" + )] + fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t; + } + _vld3_dup_s8(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v16i8.p0" + )] + fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t; + } + _vld3q_dup_s8(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v4i16.p0" + )] + fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t; + } + _vld3_dup_s16(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v8i16.p0" + )] + fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t; + } + _vld3q_dup_s16(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v2i32.p0" + )] + fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t; + } + _vld3_dup_s32(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v4i32.p0" + )] + fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t; + } + _vld3q_dup_s32(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v1i64.p0" + )] + fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t; + } + _vld3_dup_s64(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")] + fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t; + } + _vld3_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")] + fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t; + } + _vld3q_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")] + fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t; + } + _vld3_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")] + fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t; + } + _vld3q_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")] + fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t; + } + _vld3_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")] + fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t; + } + _vld3q_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")] + fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t; + } + _vld3_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")] + fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t; + } + _vld3q_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t { + transmute(vld3_dup_s64(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")] + fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t; + } + _vld3_dup_s64(a as *const i8, 8) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t { + transmute(vld3_dup_s64(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { + transmute(vld3_dup_s8(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { + let mut ret_val: uint8x8x3_t = transmute(vld3_dup_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { + transmute(vld3q_dup_s8(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { + let mut ret_val: uint8x16x3_t = transmute(vld3q_dup_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { + transmute(vld3_dup_s16(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { + let mut ret_val: uint16x4x3_t = transmute(vld3_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { + transmute(vld3q_dup_s16(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { + let mut ret_val: uint16x8x3_t = transmute(vld3q_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { + transmute(vld3_dup_s32(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { + let mut ret_val: uint32x2x3_t = transmute(vld3_dup_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { + transmute(vld3q_dup_s32(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { + let mut ret_val: uint32x4x3_t = transmute(vld3q_dup_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { + transmute(vld3_dup_s8(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { + let mut ret_val: poly8x8x3_t = transmute(vld3_dup_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { + transmute(vld3q_dup_s8(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { + let mut ret_val: poly8x16x3_t = transmute(vld3q_dup_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { + transmute(vld3_dup_s16(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { + let mut ret_val: poly16x4x3_t = transmute(vld3_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { + transmute(vld3q_dup_s16(transmute(a))) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { + let mut ret_val: poly16x8x3_t = transmute(vld3q_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")] + fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t; + } + _vld3_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")] + fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t; + } + _vld3q_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v4f16.p0" + )] + fn _vld3_f16(ptr: *const f16) -> float16x4x3_t; + } + _vld3_f16(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v8f16.p0" + )] + fn _vld3q_f16(ptr: *const f16) -> float16x8x3_t; + } + _vld3q_f16(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v2f32.p0" + )] + fn _vld3_f32(ptr: *const float32x2_t) -> float32x2x3_t; + } + _vld3_f32(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v4f32.p0" + )] + fn _vld3q_f32(ptr: *const float32x4_t) -> float32x4x3_t; + } + _vld3q_f32(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v8i8.p0" + )] + fn _vld3_s8(ptr: *const int8x8_t) -> int8x8x3_t; + } + _vld3_s8(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v16i8.p0" + )] + fn _vld3q_s8(ptr: *const int8x16_t) -> int8x16x3_t; + } + _vld3q_s8(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v4i16.p0" + )] + fn _vld3_s16(ptr: *const int16x4_t) -> int16x4x3_t; + } + _vld3_s16(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v8i16.p0" + )] + fn _vld3q_s16(ptr: *const int16x8_t) -> int16x8x3_t; + } + _vld3q_s16(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v2i32.p0" + )] + fn _vld3_s32(ptr: *const int32x2_t) -> int32x2x3_t; + } + _vld3_s32(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v4i32.p0" + )] + fn _vld3q_s32(ptr: *const int32x4_t) -> int32x4x3_t; + } + _vld3q_s32(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")] + fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t; + } + _vld3_f32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")] + fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t; + } + _vld3q_f32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")] + fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t; + } + _vld3_s8(a as *const i8, 1) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")] + fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t; + } + _vld3q_s8(a as *const i8, 1) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")] + fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t; + } + _vld3_s16(a as *const i8, 2) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")] + fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t; + } + _vld3q_s16(a as *const i8, 2) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")] + fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t; + } + _vld3_s32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")] + fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t; + } + _vld3q_s32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")] + fn _vld3_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x3_t; + } + _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")] + fn _vld3q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x3_t; + } + _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0" + )] + fn _vld3_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x3_t; + } + _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0" + )] + fn _vld3q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x3_t; + } + _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0" + )] + fn _vld3_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + n: i64, + ptr: *const i8, + ) -> float32x2x3_t; + } + _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0" + )] + fn _vld3q_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + n: i64, + ptr: *const i8, + ) -> float32x4x3_t; + } + _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")] + fn _vld3_lane_f32( + ptr: *const i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + n: i32, + size: i32, + ) -> float32x2x3_t; + } + _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0" + )] + fn _vld3_lane_s8( + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + n: i64, + ptr: *const i8, + ) -> int8x8x3_t; + } + _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0" + )] + fn _vld3_lane_s16( + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + n: i64, + ptr: *const i8, + ) -> int16x4x3_t; + } + _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { + static_assert_uimm_bits!(LANE, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0" + )] + fn _vld3q_lane_s16( + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + n: i64, + ptr: *const i8, + ) -> int16x8x3_t; + } + _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0" + )] + fn _vld3_lane_s32( + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + n: i64, + ptr: *const i8, + ) -> int32x2x3_t; + } + _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0" + )] + fn _vld3q_lane_s32( + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + n: i64, + ptr: *const i8, + ) -> int32x4x3_t; + } + _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")] + fn _vld3_lane_s8( + ptr: *const i8, + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + n: i32, + size: i32, + ) -> int8x8x3_t; + } + _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")] + fn _vld3_lane_s16( + ptr: *const i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + n: i32, + size: i32, + ) -> int16x4x3_t; + } + _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")] + fn _vld3q_lane_s16( + ptr: *const i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + n: i32, + size: i32, + ) -> int16x8x3_t; + } + _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")] + fn _vld3_lane_s32( + ptr: *const i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + n: i32, + size: i32, + ) -> int32x2x3_t; + } + _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")] + fn _vld3q_lane_s32( + ptr: *const i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + n: i32, + size: i32, + ) -> int32x4x3_t; + } + _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_lane_u8(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3_lane_s8::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_lane_u16(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld3_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_lane_u16(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3q_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_lane_u32(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + transmute(vld3_lane_s32::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_lane_u32(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld3q_lane_s32::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_lane_p8(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3_lane_s8::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_lane_p16(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld3_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_lane_p16(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3q_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t { + transmute(vld3_s64(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v1i64.p0" + )] + fn _vld3_s64(ptr: *const int64x1_t) -> int64x1x3_t; + } + _vld3_s64(a as _) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")] + fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t; + } + _vld3_s64(a as *const i8, 8) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t { + transmute(vld3_s64(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t { + transmute(vld3_s8(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t { + let mut ret_val: uint8x8x3_t = transmute(vld3_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t { + transmute(vld3q_s8(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t { + let mut ret_val: uint8x16x3_t = transmute(vld3q_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t { + transmute(vld3_s16(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t { + let mut ret_val: uint16x4x3_t = transmute(vld3_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t { + transmute(vld3q_s16(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t { + let mut ret_val: uint16x8x3_t = transmute(vld3q_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t { + transmute(vld3_s32(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t { + let mut ret_val: uint32x2x3_t = transmute(vld3_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t { + transmute(vld3q_s32(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t { + let mut ret_val: uint32x4x3_t = transmute(vld3q_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t { + transmute(vld3_s8(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t { + let mut ret_val: poly8x8x3_t = transmute(vld3_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t { + transmute(vld3q_s8(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t { + let mut ret_val: poly8x16x3_t = transmute(vld3q_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t { + transmute(vld3_s16(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t { + let mut ret_val: poly16x4x3_t = transmute(vld3_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t { + transmute(vld3q_s16(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t { + let mut ret_val: poly16x8x3_t = transmute(vld3q_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")] + fn _vld3q_lane_f32( + ptr: *const i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + n: i32, + size: i32, + ) -> float32x4x3_t; + } + _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")] + fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + } + _vld4_dup_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")] + fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + } + _vld4q_dup_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4f16.p0" + )] + fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t; + } + _vld4_dup_f16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8f16.p0" + )] + fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t; + } + _vld4q_dup_f16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")] + fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t; + } + _vld4_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")] + fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t; + } + _vld4q_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")] + fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t; + } + _vld4_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")] + fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t; + } + _vld4q_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")] + fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t; + } + _vld4_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")] + fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t; + } + _vld4q_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")] + fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t; + } + _vld4_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")] + fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t; + } + _vld4q_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v2f32.p0.p0" + )] + fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t; + } + _vld4_dup_f32(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4f32.p0.p0" + )] + fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t; + } + _vld4q_dup_f32(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8i8.p0.p0" + )] + fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t; + } + _vld4_dup_s8(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v16i8.p0.p0" + )] + fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t; + } + _vld4q_dup_s8(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4i16.p0.p0" + )] + fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t; + } + _vld4_dup_s16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8i16.p0.p0" + )] + fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t; + } + _vld4q_dup_s16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v2i32.p0.p0" + )] + fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t; + } + _vld4_dup_s32(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4i32.p0.p0" + )] + fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t; + } + _vld4q_dup_s32(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v1i64.p0.p0" + )] + fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t; + } + _vld4_dup_s64(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t { + transmute(vld4_dup_s64(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")] + fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t; + } + _vld4_dup_s64(a as *const i8, 8) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t { + transmute(vld4_dup_s64(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t { + transmute(vld4_dup_s8(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t { + let mut ret_val: uint8x8x4_t = transmute(vld4_dup_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t { + transmute(vld4q_dup_s8(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t { + let mut ret_val: uint8x16x4_t = transmute(vld4q_dup_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.3 = unsafe { + simd_shuffle!( + ret_val.3, + ret_val.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t { + transmute(vld4_dup_s16(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t { + let mut ret_val: uint16x4x4_t = transmute(vld4_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t { + transmute(vld4q_dup_s16(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t { + let mut ret_val: uint16x8x4_t = transmute(vld4q_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { + transmute(vld4_dup_s32(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { + let mut ret_val: uint32x2x4_t = transmute(vld4_dup_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { + transmute(vld4q_dup_s32(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { + let mut ret_val: uint32x4x4_t = transmute(vld4q_dup_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { + transmute(vld4_dup_s8(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { + let mut ret_val: poly8x8x4_t = transmute(vld4_dup_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { + transmute(vld4q_dup_s8(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { + let mut ret_val: poly8x16x4_t = transmute(vld4q_dup_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.3 = unsafe { + simd_shuffle!( + ret_val.3, + ret_val.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { + transmute(vld4_dup_s16(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { + let mut ret_val: poly16x4x4_t = transmute(vld4_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { + transmute(vld4q_dup_s16(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { + let mut ret_val: poly16x8x4_t = transmute(vld4q_dup_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")] + fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + } + _vld4_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")] + fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + } + _vld4q_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v4f16.p0" + )] + fn _vld4_f16(ptr: *const f16) -> float16x4x4_t; + } + _vld4_f16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v8f16.p0" + )] + fn _vld4q_f16(ptr: *const f16) -> float16x8x4_t; + } + _vld4q_f16(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v2f32.p0" + )] + fn _vld4_f32(ptr: *const float32x2_t) -> float32x2x4_t; + } + _vld4_f32(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v4f32.p0" + )] + fn _vld4q_f32(ptr: *const float32x4_t) -> float32x4x4_t; + } + _vld4q_f32(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v8i8.p0" + )] + fn _vld4_s8(ptr: *const int8x8_t) -> int8x8x4_t; + } + _vld4_s8(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v16i8.p0" + )] + fn _vld4q_s8(ptr: *const int8x16_t) -> int8x16x4_t; + } + _vld4q_s8(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v4i16.p0" + )] + fn _vld4_s16(ptr: *const int16x4_t) -> int16x4x4_t; + } + _vld4_s16(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v8i16.p0" + )] + fn _vld4q_s16(ptr: *const int16x8_t) -> int16x8x4_t; + } + _vld4q_s16(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v2i32.p0" + )] + fn _vld4_s32(ptr: *const int32x2_t) -> int32x2x4_t; + } + _vld4_s32(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v4i32.p0" + )] + fn _vld4q_s32(ptr: *const int32x4_t) -> int32x4x4_t; + } + _vld4q_s32(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")] + fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t; + } + _vld4_f32(a as *const i8, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")] + fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t; + } + _vld4q_f32(a as *const i8, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")] + fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t; + } + _vld4_s8(a as *const i8, 1) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")] + fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t; + } + _vld4q_s8(a as *const i8, 1) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")] + fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t; + } + _vld4_s16(a as *const i8, 2) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")] + fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t; + } + _vld4q_s16(a as *const i8, 2) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")] + fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t; + } + _vld4_s32(a as *const i8, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")] + fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t; + } + _vld4q_s32(a as *const i8, 4) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")] + fn _vld4_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x4_t; + } + _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")] + fn _vld4q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0" + )] + fn _vld4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x4_t; + } + _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0" + )] + fn _vld4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0" + )] + fn _vld4_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i64, + ptr: *const i8, + ) -> float32x2x4_t; + } + _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0" + )] + fn _vld4q_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i64, + ptr: *const i8, + ) -> float32x4x4_t; + } + _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0" + )] + fn _vld4_lane_s8( + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + n: i64, + ptr: *const i8, + ) -> int8x8x4_t; + } + _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0" + )] + fn _vld4_lane_s16( + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i64, + ptr: *const i8, + ) -> int16x4x4_t; + } + _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0" + )] + fn _vld4q_lane_s16( + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i64, + ptr: *const i8, + ) -> int16x8x4_t; + } + _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0" + )] + fn _vld4_lane_s32( + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i64, + ptr: *const i8, + ) -> int32x2x4_t; + } + _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0" + )] + fn _vld4q_lane_s32( + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i64, + ptr: *const i8, + ) -> int32x4x4_t; + } + _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")] + fn _vld4_lane_f32( + ptr: *const i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i32, + size: i32, + ) -> float32x2x4_t; + } + _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")] + fn _vld4q_lane_f32( + ptr: *const i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i32, + size: i32, + ) -> float32x4x4_t; + } + _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")] + fn _vld4_lane_s8( + ptr: *const i8, + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + n: i32, + size: i32, + ) -> int8x8x4_t; + } + _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")] + fn _vld4_lane_s16( + ptr: *const i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i32, + size: i32, + ) -> int16x4x4_t; + } + _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")] + fn _vld4q_lane_s16( + ptr: *const i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i32, + size: i32, + ) -> int16x8x4_t; + } + _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")] + fn _vld4_lane_s32( + ptr: *const i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i32, + size: i32, + ) -> int32x2x4_t; + } + _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")] + fn _vld4q_lane_s32( + ptr: *const i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i32, + size: i32, + ) -> int32x4x4_t; + } + _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_lane_u8(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4_lane_s8::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_lane_u16(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld4_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_lane_u16(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4q_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_lane_u32(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + transmute(vld4_lane_s32::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_lane_u32(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld4q_lane_s32::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_lane_p8(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4_lane_s8::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_lane_p16(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld4_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_lane_p16(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4q_lane_s16::(transmute(a), transmute(b))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t { + transmute(vld4_s64(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v1i64.p0" + )] + fn _vld4_s64(ptr: *const int64x1_t) -> int64x1x4_t; + } + _vld4_s64(a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")] + fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t; + } + _vld4_s64(a as *const i8, 8) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t { + transmute(vld4_s64(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t { + transmute(vld4_s8(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t { + let mut ret_val: uint8x8x4_t = transmute(vld4_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t { + transmute(vld4q_s8(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t { + let mut ret_val: uint8x16x4_t = transmute(vld4q_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.3 = unsafe { + simd_shuffle!( + ret_val.3, + ret_val.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t { + transmute(vld4_s16(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t { + let mut ret_val: uint16x4x4_t = transmute(vld4_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t { + transmute(vld4q_s16(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t { + let mut ret_val: uint16x8x4_t = transmute(vld4q_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t { + transmute(vld4_s32(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t { + let mut ret_val: uint32x2x4_t = transmute(vld4_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) }; + ret_val +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t { + transmute(vld4q_s32(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t { + let mut ret_val: uint32x4x4_t = transmute(vld4q_s32(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t { + transmute(vld4_s8(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t { + let mut ret_val: poly8x8x4_t = transmute(vld4_s8(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t { + transmute(vld4q_s8(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t { + let mut ret_val: poly8x16x4_t = transmute(vld4q_s8(transmute(a))); + ret_val.0 = unsafe { + simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.1 = unsafe { + simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.2 = unsafe { + simd_shuffle!( + ret_val.2, + ret_val.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val.3 = unsafe { + simd_shuffle!( + ret_val.3, + ret_val.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + }; + ret_val +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t { + transmute(vld4_s16(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t { + let mut ret_val: poly16x4x4_t = transmute(vld4_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; + ret_val +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t { + transmute(vld4q_s16(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t { + let mut ret_val: poly16x8x4_t = transmute(vld4q_s16(transmute(a))); + ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + ret_val +} +#[doc = "Store SIMD&FP register (immediate offset)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldrq_p128)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vldrq_p128(a: *const p128) -> p128 { + *a +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmax) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v4f16" + )] + fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vmax_f16(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmax) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v8f16" + )] + fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vmaxq_f16(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v2f32" + )] + fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vmax_f32(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v4f32" + )] + fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vmaxq_f32(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smax.v8i8" + )] + fn _vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vmax_s8(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smax.v16i8" + )] + fn _vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vmaxq_s8(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smax.v4i16" + )] + fn _vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vmax_s16(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smax.v8i16" + )] + fn _vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vmaxq_s16(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smax.v2i32" + )] + fn _vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vmax_s32(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smax.v4i32" + )] + fn _vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vmaxq_s32(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umax.v8i8" + )] + fn _vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vmax_u8(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umax.v16i8" + )] + fn _vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vmaxq_u8(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umax.v4i16" + )] + fn _vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vmax_u16(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umax.v8i16" + )] + fn _vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vmaxq_u16(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umax.v2i32" + )] + fn _vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vmax_u32(a, b) } +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umax) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umax.v4i32" + )] + fn _vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vmaxq_u32(a, b) } +} +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmaxnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v4f16" + )] + fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vmaxnm_f16(a, b) } +} +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmaxnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v8f16" + )] + fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vmaxnmq_f16(a, b) } +} +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmaxnm) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v2f32" + )] + fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vmaxnm_f32(a, b) } +} +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmaxnm) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v4f32" + )] + fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vmaxnmq_f32(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmin) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v4f16" + )] + fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vmin_f16(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmin) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v8f16" + )] + fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vminq_f16(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v2f32" + )] + fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vmin_f32(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v4f32" + )] + fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vminq_f32(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smin.v8i8" + )] + fn _vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vmin_s8(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smin.v16i8" + )] + fn _vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vminq_s8(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smin.v4i16" + )] + fn _vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vmin_s16(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smin.v8i16" + )] + fn _vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vminq_s16(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smin.v2i32" + )] + fn _vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vmin_s32(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smin.v4i32" + )] + fn _vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vminq_s32(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umin.v8i8" + )] + fn _vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vmin_u8(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umin.v16i8" + )] + fn _vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vminq_u8(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umin.v4i16" + )] + fn _vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vmin_u16(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umin.v8i16" + )] + fn _vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vminq_u16(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umin.v2i32" + )] + fn _vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vmin_u32(a, b) } +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umin) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umin.v4i32" + )] + fn _vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vminq_u32(a, b) } +} +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v4f16" + )] + fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vminnm_f16(a, b) } +} +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v8f16" + )] + fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vminnmq_f16(a, b) } +} +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminnm) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v2f32" + )] + fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vminnm_f32(a, b) } +} +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminnm) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v4f32" + )] + fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vminnmq_f32(a, b) } +} +#[doc = "Floating-point multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Floating-point multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + vmlaq_f32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlaq_f32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmla_s16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmla_u16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmla_s16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmla_u16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlaq_s16( + a, + b, + simd_shuffle!( + c, + c, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlaq_u16( + a, + b, + simd_shuffle!( + c, + c, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmlaq_s16( + a, + b, + simd_shuffle!( + c, + c, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmlaq_u16( + a, + b, + simd_shuffle!( + c, + c, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + vmlaq_s32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + vmlaq_u32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlaq_s32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlaq_u32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmla_f32(a, b, vdup_n_f32(c)) +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlaq_f32(a, b, vdupq_n_f32(c)) +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmla_s16(a, b, vdup_n_s16(c)) +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlaq_s16(a, b, vdupq_n_s16(c)) +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmla_u16(a, b, vdup_n_u16(c)) +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlaq_u16(a, b, vdupq_n_u16(c)) +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmla_s32(a, b, vdup_n_s32(c)) +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlaq_s32(a, b, vdupq_n_s32(c)) +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmla_u32(a, b, vdup_n_u32(c)) +} +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlaq_u32(a, b, vdupq_n_u32(c)) +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mla) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlal, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlal_s16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlal, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmlal_s16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlal, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlal, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlal, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlal_u16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlal, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmlal_u16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlal, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlal, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlal_s16(a, b, vdup_n_s16(c)) +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlal_s32(a, b, vdup_n_s32(c)) +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlal_u16(a, b, vdup_n_u16(c)) +} +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlal_u32(a, b, vdup_n_u32(c)) +} +#[doc = "Signed multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + unsafe { simd_add(a, vmull_s8(b, c)) } +} +#[doc = "Signed multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + unsafe { simd_add(a, vmull_s16(b, c)) } +} +#[doc = "Signed multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + unsafe { simd_add(a, vmull_s32(b, c)) } +} +#[doc = "Unsigned multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + unsafe { simd_add(a, vmull_u8(b, c)) } +} +#[doc = "Unsigned multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + unsafe { simd_add(a, vmull_u16(b, c)) } +} +#[doc = "Unsigned multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + unsafe { simd_add(a, vmull_u32(b, c)) } +} +#[doc = "Floating-point multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Floating-point multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + vmlsq_f32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlsq_f32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmls_s16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmls_u16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmls_s16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmls_u16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlsq_s16( + a, + b, + simd_shuffle!( + c, + c, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlsq_u16( + a, + b, + simd_shuffle!( + c, + c, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmlsq_s16( + a, + b, + simd_shuffle!( + c, + c, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmlsq_u16( + a, + b, + simd_shuffle!( + c, + c, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + vmlsq_s32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + vmlsq_u32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlsq_s32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlsq_u32( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmls_f32(a, b, vdup_n_f32(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlsq_f32(a, b, vdupq_n_f32(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmls_s16(a, b, vdup_n_s16(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlsq_s16(a, b, vdupq_n_s16(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmls_u16(a, b, vdup_n_u16(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlsq_u16(a, b, vdupq_n_u16(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmls_s32(a, b, vdup_n_s32(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlsq_s32(a, b, vdupq_n_s32(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmls_u32(a, b, vdup_n_u32(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlsq_u32(a, b, vdupq_n_u32(c)) +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mls) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlsl, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlsl_s16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlsl, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmlsl_s16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlsl, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlsl, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlsl, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmlsl_u16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlsl, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmlsl_u16( + a, + b, + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlsl, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlsl, LANE = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlsl_s16(a, b, vdup_n_s16(c)) +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlsl_s32(a, b, vdup_n_s32(c)) +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlsl_u16(a, b, vdup_n_u16(c)) +} +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlsl_u32(a, b, vdup_n_u32(c)) +} +#[doc = "Signed multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + unsafe { simd_sub(a, vmull_s8(b, c)) } +} +#[doc = "Signed multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + unsafe { simd_sub(a, vmull_s16(b, c)) } +} +#[doc = "Signed multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + unsafe { simd_sub(a, vmull_s32(b, c)) } +} +#[doc = "Unsigned multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + unsafe { simd_sub(a, vmull_u8(b, c)) } +} +#[doc = "Unsigned multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + unsafe { simd_sub(a, vmull_u16(b, c)) } +} +#[doc = "Unsigned multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + unsafe { simd_sub(a, vmull_u32(b, c)) } +} +#[doc = "8-bit integer matrix multiply-accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smmla) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")] + fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { _vmmlaq_s32(a, b, c) } +} +#[doc = "8-bit integer matrix multiply-accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ummla) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")] + fn _vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t; + } + unsafe { _vmmlaq_u32(a, b, c) } +} +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmov_n_f16(a: f16) -> float16x4_t { + vdup_n_f16(a) +} +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmovq_n_f16(a: f16) -> float16x8_t { + vdupq_n_f16(a) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_f32(value: f32) -> float32x2_t { + vdup_n_f32(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_p16(value: p16) -> poly16x4_t { + vdup_n_p16(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_p8(value: p8) -> poly8x8_t { + vdup_n_p8(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_s16(value: i16) -> int16x4_t { + vdup_n_s16(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_s32(value: i32) -> int32x2_t { + vdup_n_s32(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmov) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_s64(value: i64) -> int64x1_t { + vdup_n_s64(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_s8(value: i8) -> int8x8_t { + vdup_n_s8(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_u16(value: u16) -> uint16x4_t { + vdup_n_u16(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_u32(value: u32) -> uint32x2_t { + vdup_n_u32(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmov) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_u64(value: u64) -> uint64x1_t { + vdup_n_u64(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmov_n_u8(value: u8) -> uint8x8_t { + vdup_n_u8(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_f32(value: f32) -> float32x4_t { + vdupq_n_f32(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_p16(value: p16) -> poly16x8_t { + vdupq_n_p16(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_p8(value: p8) -> poly8x16_t { + vdupq_n_p8(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_s16(value: i16) -> int16x8_t { + vdupq_n_s16(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_s32(value: i32) -> int32x4_t { + vdupq_n_s32(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_s64(value: i64) -> int64x2_t { + vdupq_n_s64(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_s8(value: i8) -> int8x16_t { + vdupq_n_s8(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_u16(value: u16) -> uint16x8_t { + vdupq_n_u16(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_u32(value: u32) -> uint32x4_t { + vdupq_n_u32(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_u64(value: u64) -> uint64x2_t { + vdupq_n_u64(value) +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovq_n_u8(value: u8) -> uint8x16_t { + vdupq_n_u8(value) +} +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sxtl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovl_s16(a: int16x4_t) -> int32x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sxtl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovl_s32(a: int32x2_t) -> int64x2_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sxtl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovl_s8(a: int8x8_t) -> int16x8_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uxtl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uxtl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uxtl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(xtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovn_s16(a: int16x8_t) -> int8x8_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(xtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovn_s32(a: int32x4_t) -> int16x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(xtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovn_s64(a: int64x2_t) -> int32x2_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(xtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(xtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { + unsafe { simd_cast(a) } +} +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(xtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { + unsafe { simd_cast(a) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmul_lane_f16(a: float16x4_t, v: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + simd_mul( + a, + simd_shuffle!(v, v, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmulq_lane_f16(a: float16x8_t, v: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + simd_mul( + a, + simd_shuffle!( + v, + v, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + simd_mul( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + simd_mul( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + simd_mul( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + simd_mul( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) + } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t { + unsafe { simd_mul(a, vdup_n_f16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { + unsafe { simd_mul(a, vdupq_n_f16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t { + unsafe { simd_mul(a, vdup_n_f32(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t { + unsafe { simd_mul(a, vdupq_n_f32(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + unsafe { simd_mul(a, vdup_n_s16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + unsafe { simd_mul(a, vdupq_n_s16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + unsafe { simd_mul(a, vdup_n_s32(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + unsafe { simd_mul(a, vdupq_n_s32(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t { + unsafe { simd_mul(a, vdup_n_u16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t { + unsafe { simd_mul(a, vdupq_n_u16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t { + unsafe { simd_mul(a, vdup_n_u32(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t { + unsafe { simd_mul(a, vdupq_n_u32(b)) } +} +#[doc = "Polynomial multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(pmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.pmul.v8i8" + )] + fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; + } + unsafe { _vmul_p8(a, b) } +} +#[doc = "Polynomial multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(pmul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.pmul.v16i8" + )] + fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; + } + unsafe { _vmulq_p8(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mul) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_mul(a, b) } +} +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smull, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmull_s16( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smull, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmull_s16( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smull, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smull, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umull, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + vmull_u16( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umull, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + vmull_u16( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) + } +} +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umull, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umull, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +} +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { + vmull_s16(a, vdup_n_s16(b)) +} +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { + vmull_s32(a, vdup_n_s32(b)) +} +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t { + vmull_u16(a, vdup_n_u16(b)) +} +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t { + vmull_u32(a, vdup_n_u32(b)) +} +#[doc = "Polynomial multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(pmull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.pmull.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")] + fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t; + } + unsafe { _vmull_p8(a, b) } +} +#[doc = "Signed multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smull.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i32")] + fn _vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; + } + unsafe { _vmull_s16(a, b) } +} +#[doc = "Signed multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smull.v2i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i64")] + fn _vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; + } + unsafe { _vmull_s32(a, b) } +} +#[doc = "Signed multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smull.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i16")] + fn _vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t; + } + unsafe { _vmull_s8(a, b) } +} +#[doc = "Unsigned multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umull.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i16")] + fn _vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t; + } + unsafe { _vmull_u8(a, b) } +} +#[doc = "Unsigned multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umull.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i32")] + fn _vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t; + } + unsafe { _vmull_u16(a, b) } +} +#[doc = "Unsigned multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umull.v2i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i64")] + fn _vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t; + } + unsafe { _vmull_u32(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { + let b = poly8x8_t::splat(255); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvn_s16(a: int16x4_t) -> int16x4_t { + let b = int16x4_t::splat(-1); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvn_s32(a: int32x2_t) -> int32x2_t { + let b = int32x2_t::splat(-1); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvn_s8(a: int8x8_t) -> int8x8_t { + let b = int8x8_t::splat(-1); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { + let b = uint16x4_t::splat(65_535); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { + let b = uint32x2_t::splat(4_294_967_295); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { + let b = uint8x8_t::splat(255); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { + let b = poly8x16_t::splat(255); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t { + let b = int16x8_t::splat(-1); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t { + let b = int32x4_t::splat(-1); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t { + let b = int8x16_t::splat(-1); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { + let b = uint16x8_t::splat(65_535); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { + let b = uint32x4_t::splat(4_294_967_295); + unsafe { simd_xor(a, b) } +} +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(mvn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t { + let b = uint8x16_t::splat(255); + unsafe { simd_xor(a, b) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fneg) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vneg_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_neg(a) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fneg) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vnegq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_neg(a) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fneg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vneg_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_neg(a) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fneg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vnegq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_neg(a) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(neg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vneg_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_neg(a) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(neg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vnegq_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_neg(a) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(neg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vneg_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_neg(a) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(neg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vnegq_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_neg(a) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(neg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vneg_s32(a: int32x2_t) -> int32x2_t { + unsafe { simd_neg(a) } +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(neg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vnegq_s32(a: int32x4_t) -> int32x4_t { + unsafe { simd_neg(a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + let c = int16x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + let c = int32x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + let c = int64x1_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + let c = int8x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + let c = int16x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + let c = int32x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + let c = int64x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + let c = int8x16_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + let c = int16x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + let c = int32x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + let c = int64x1_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + let c = int8x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + let c = int16x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + let c = int32x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + let c = int64x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + let c = int8x16_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_or(a, b) } +} +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(orr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_or(a, b) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { + let x: int16x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_s8(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_s8(b), a); + }; + x +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + let x: int16x8_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_s8(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_s8(b), a); + }; + x +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { + let x: int32x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_s16(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_s16(b), a); + }; + x +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + let x: int32x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_s16(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_s16(b), a); + }; + x +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { + let x: int64x1_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_s32(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_s32(b), a); + }; + x +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + let x: int64x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_s32(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_s32(b), a); + }; + x +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { + let x: uint16x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_u8(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_u8(b), a); + }; + x +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + let x: uint16x8_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_u8(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_u8(b), a); + }; + x +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { + let x: uint32x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_u16(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_u16(b), a); + }; + x +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + let x: uint32x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_u16(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_u16(b), a); + }; + x +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { + let x: uint64x1_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_u32(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_u32(b), a); + }; + x +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uadalp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + let x: uint64x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_u32(a, b); + } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_u32(b), a); + }; + x +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(faddp) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.faddp.v4f16" + )] + fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vpadd_f16(a, b) } +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(faddp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.faddp.v2f32" + )] + fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vpadd_f32(a, b) } +} +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")] + fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vpadd_s8(a, b) } +} +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] + fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vpadd_s16(a, b) } +} +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")] + fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vpadd_s32(a, b) } +} +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) } +} +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vpadd_s8(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) } +} +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: uint16x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(vpadd_s16(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) } +} +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(addp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: uint32x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(vpadd_s32(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")] + fn _vpaddl_s8(a: int8x8_t) -> int16x4_t; + } + unsafe { _vpaddl_s8(a) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")] + fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t; + } + unsafe { _vpaddlq_s8(a) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")] + fn _vpaddl_s16(a: int16x4_t) -> int32x2_t; + } + unsafe { _vpaddl_s16(a) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")] + fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t; + } + unsafe { _vpaddlq_s16(a) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")] + fn _vpaddl_s32(a: int32x2_t) -> int64x1_t; + } + unsafe { _vpaddl_s32(a) } +} +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(saddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")] + fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t; + } + unsafe { _vpaddlq_s32(a) } +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")] + fn _vpaddl_u8(a: uint8x8_t) -> uint16x4_t; + } + unsafe { _vpaddl_u8(a) } +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")] + fn _vpaddlq_u8(a: uint8x16_t) -> uint16x8_t; + } + unsafe { _vpaddlq_u8(a) } +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")] + fn _vpaddl_u16(a: uint16x4_t) -> uint32x2_t; + } + unsafe { _vpaddl_u16(a) } +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")] + fn _vpaddlq_u16(a: uint16x8_t) -> uint32x4_t; + } + unsafe { _vpaddlq_u16(a) } +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")] + fn _vpaddl_u32(a: uint32x2_t) -> uint64x1_t; + } + unsafe { _vpaddl_u32(a) } +} +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uaddlp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")] + fn _vpaddlq_u32(a: uint32x4_t) -> uint64x2_t; + } + unsafe { _vpaddlq_u32(a) } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v2f32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] + fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vpmax_f32(a, b) } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] + fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vpmax_s8(a, b) } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] + fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vpmax_s16(a, b) } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(smaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] + fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vpmax_s32(a, b) } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] + fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vpmax_u8(a, b) } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] + fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vpmax_u16(a, b) } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] + fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vpmax_u32(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v2f32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] + fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vpmin_f32(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] + fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vpmin_s8(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] + fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vpmin_s16(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] + fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vpmin_s32(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] + fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vpmin_u8(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] + fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vpmin_u16(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] + fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vpmin_u32(a, b) } +} +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqabs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqabs_s8(a: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")] + fn _vqabs_s8(a: int8x8_t) -> int8x8_t; + } + unsafe { _vqabs_s8(a) } +} +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqabs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")] + fn _vqabsq_s8(a: int8x16_t) -> int8x16_t; + } + unsafe { _vqabsq_s8(a) } +} +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqabs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqabs_s16(a: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")] + fn _vqabs_s16(a: int16x4_t) -> int16x4_t; + } + unsafe { _vqabs_s16(a) } +} +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqabs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")] + fn _vqabsq_s16(a: int16x8_t) -> int16x8_t; + } + unsafe { _vqabsq_s16(a) } +} +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqabs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqabs_s32(a: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")] + fn _vqabs_s32(a: int32x2_t) -> int32x2_t; + } + unsafe { _vqabs_s32(a) } +} +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqabs) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")] + fn _vqabsq_s32(a: int32x4_t) -> int32x4_t; + } + unsafe { _vqabsq_s32(a) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")] + fn _vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vqadd_s8(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")] + fn _vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vqaddq_s8(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")] + fn _vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vqadd_s16(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")] + fn _vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqaddq_s16(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")] + fn _vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vqadd_s32(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")] + fn _vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqaddq_s32(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqadd.v1i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")] + fn _vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vqadd_s64(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqadd.v2i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")] + fn _vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vqaddq_s64(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")] + fn _vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vqadd_u8(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")] + fn _vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vqaddq_u8(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")] + fn _vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vqadd_u16(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")] + fn _vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vqaddq_u16(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")] + fn _vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vqadd_u32(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")] + fn _vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vqaddq_u32(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqadd.v1i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")] + fn _vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; + } + unsafe { _vqadd_u64(a, b) } +} +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqadd.v2i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")] + fn _vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } + unsafe { _vqaddq_u64(a, b) } +} +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal, N = 2) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + vqaddq_s32(a, vqdmull_lane_s16::(b, c)) +} +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal, N = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + vqaddq_s64(a, vqdmull_lane_s32::(b, c)) +} +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqaddq_s32(a, vqdmull_n_s16(b, c)) +} +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqaddq_s64(a, vqdmull_n_s32(b, c)) +} +#[doc = "Signed saturating doubling multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqaddq_s32(a, vqdmull_s16(b, c)) +} +#[doc = "Signed saturating doubling multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqaddq_s64(a, vqdmull_s32(b, c)) +} +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl, N = 2) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + vqsubq_s32(a, vqdmull_lane_s16::(b, c)) +} +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl, N = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + vqsubq_s64(a, vqdmull_lane_s32::(b, c)) +} +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqsubq_s32(a, vqdmull_n_s16(b, c)) +} +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqsubq_s64(a, vqdmull_n_s32(b, c)) +} +#[doc = "Signed saturating doubling multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqsubq_s32(a, vqdmull_s16(b, c)) +} +#[doc = "Signed saturating doubling multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqsubq_s64(a, vqdmull_s32(b, c)) +} +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32))) } +} +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32))) } +} +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32))) } +} +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32))) } +} +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + let b: int16x4_t = vdup_n_s16(b); + vqdmulh_s16(a, b) +} +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + let b: int16x8_t = vdupq_n_s16(b); + vqdmulhq_s16(a, b) +} +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + let b: int32x2_t = vdup_n_s32(b); + vqdmulh_s32(a, b) +} +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + let b: int32x4_t = vdupq_n_s32(b); + vqdmulhq_s32(a, b) +} +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v4i16" + )] + fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vqdmulh_s16(a, b) } +} +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v8i16" + )] + fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqdmulhq_s16(a, b) } +} +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v2i32" + )] + fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vqdmulh_s32(a, b) } +} +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v4i32" + )] + fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqdmulhq_s32(a, b) } +} +#[doc = "Vector saturating doubling long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmull, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]); + vqdmull_s16(a, b) + } +} +#[doc = "Vector saturating doubling long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmull, N = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]); + vqdmull_s32(a, b) + } +} +#[doc = "Vector saturating doubling long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { + vqdmull_s16(a, vdup_n_s16(b)) +} +#[doc = "Vector saturating doubling long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { + vqdmull_s32(a, vdup_n_s32(b)) +} +#[doc = "Signed saturating doubling multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmull.v4i32" + )] + fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; + } + unsafe { _vqdmull_s16(a, b) } +} +#[doc = "Signed saturating doubling multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmull) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmull.v2i64" + )] + fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; + } + unsafe { _vqdmull_s32(a, b) } +} +#[doc = "Signed saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqxtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtn.v8i8" + )] + fn _vqmovn_s16(a: int16x8_t) -> int8x8_t; + } + unsafe { _vqmovn_s16(a) } +} +#[doc = "Signed saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqxtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtn.v4i16" + )] + fn _vqmovn_s32(a: int32x4_t) -> int16x4_t; + } + unsafe { _vqmovn_s32(a) } +} +#[doc = "Signed saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqxtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtn.v2i32" + )] + fn _vqmovn_s64(a: int64x2_t) -> int32x2_t; + } + unsafe { _vqmovn_s64(a) } +} +#[doc = "Unsigned saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqxtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqxtn.v8i8" + )] + fn _vqmovn_u16(a: uint16x8_t) -> uint8x8_t; + } + unsafe { _vqmovn_u16(a) } +} +#[doc = "Unsigned saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqxtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqxtn.v4i16" + )] + fn _vqmovn_u32(a: uint32x4_t) -> uint16x4_t; + } + unsafe { _vqmovn_u32(a) } +} +#[doc = "Unsigned saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqxtn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqxtn.v2i32" + )] + fn _vqmovn_u64(a: uint64x2_t) -> uint32x2_t; + } + unsafe { _vqmovn_u64(a) } +} +#[doc = "Signed saturating extract unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqxtun) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtun.v8i8" + )] + fn _vqmovun_s16(a: int16x8_t) -> uint8x8_t; + } + unsafe { _vqmovun_s16(a) } +} +#[doc = "Signed saturating extract unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqxtun) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtun.v4i16" + )] + fn _vqmovun_s32(a: int32x4_t) -> uint16x4_t; + } + unsafe { _vqmovun_s32(a) } +} +#[doc = "Signed saturating extract unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqxtun) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtun.v2i32" + )] + fn _vqmovun_s64(a: int64x2_t) -> uint32x2_t; + } + unsafe { _vqmovun_s64(a) } +} +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqneg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqneg_s8(a: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")] + fn _vqneg_s8(a: int8x8_t) -> int8x8_t; + } + unsafe { _vqneg_s8(a) } +} +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqneg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")] + fn _vqnegq_s8(a: int8x16_t) -> int8x16_t; + } + unsafe { _vqnegq_s8(a) } +} +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqneg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqneg_s16(a: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")] + fn _vqneg_s16(a: int16x4_t) -> int16x4_t; + } + unsafe { _vqneg_s16(a) } +} +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqneg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")] + fn _vqnegq_s16(a: int16x8_t) -> int16x8_t; + } + unsafe { _vqnegq_s16(a) } +} +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqneg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqneg_s32(a: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")] + fn _vqneg_s32(a: int32x2_t) -> int32x2_t; + } + unsafe { _vqneg_s32(a) } +} +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqneg) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")] + fn _vqnegq_s32(a: int32x4_t) -> int32x4_t; + } + unsafe { _vqnegq_s32(a) } +} +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + let b: int16x4_t = + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulh_s16(a, b) + } +} +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]); + vqrdmulh_s32(a, b) + } +} +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + let b: int16x4_t = + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulh_s16(a, b) + } +} +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]); + vqrdmulh_s32(a, b) + } +} +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + let b: int16x8_t = simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ); + vqrdmulhq_s16(a, b) + } +} +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let b: int32x4_t = + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulhq_s32(a, b) + } +} +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { + let b: int16x8_t = simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ); + vqrdmulhq_s16(a, b) + } +} +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + let b: int32x4_t = + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulhq_s32(a, b) + } +} +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + vqrdmulh_s16(a, vdup_n_s16(b)) +} +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + vqrdmulhq_s16(a, vdupq_n_s16(b)) +} +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + vqrdmulh_s32(a, vdup_n_s32(b)) +} +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + vqrdmulhq_s32(a, vdupq_n_s32(b)) +} +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v4i16" + )] + fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vqrdmulh_s16(a, b) } +} +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v8i16" + )] + fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqrdmulhq_s16(a, b) } +} +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v2i32" + )] + fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vqrdmulh_s32(a, b) } +} +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v4i32" + )] + fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqrdmulhq_s32(a, b) } +} +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v8i8" + )] + fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vqrshl_s8(a, b) } +} +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v16i8" + )] + fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vqrshlq_s8(a, b) } +} +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v4i16" + )] + fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vqrshl_s16(a, b) } +} +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v8i16" + )] + fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqrshlq_s16(a, b) } +} +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v2i32" + )] + fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vqrshl_s32(a, b) } +} +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v4i32" + )] + fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqrshlq_s32(a, b) } +} +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v1i64" + )] + fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vqrshl_s64(a, b) } +} +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v2i64" + )] + fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vqrshlq_s64(a, b) } +} +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v8i8" + )] + fn _vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } + unsafe { _vqrshl_u8(a, b) } +} +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v16i8" + )] + fn _vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vqrshlq_u8(a, b) } +} +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v4i16" + )] + fn _vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } + unsafe { _vqrshl_u16(a, b) } +} +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v8i16" + )] + fn _vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vqrshlq_u16(a, b) } +} +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v2i32" + )] + fn _vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } + unsafe { _vqrshl_u32(a, b) } +} +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v4i32" + )] + fn _vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vqrshlq_u32(a, b) } +} +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v1i64" + )] + fn _vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } + unsafe { _vqrshl_u64(a, b) } +} +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqrshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v2i64" + )] + fn _vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vqrshlq_u64(a, b) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")] + fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } + unsafe { + _vqrshrn_n_s16( + a, + const { + int16x8_t([ + -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, + -N as i16, + ]) + }, + ) + } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")] + fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } + unsafe { + _vqrshrn_n_s32( + a, + const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }, + ) + } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")] + fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } + unsafe { _vqrshrn_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrn.v8i8" + )] + fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + } + unsafe { _vqrshrn_n_s16(a, N) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrn.v4i16" + )] + fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + } + unsafe { _vqrshrn_n_s32(a, N) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrn.v2i32" + )] + fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + } + unsafe { _vqrshrn_n_s64(a, N) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] + fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } + unsafe { + _vqrshrn_n_u16( + a, + const { + uint16x8_t([ + -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, + -N as u16, + ]) + }, + ) + } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] + fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } + unsafe { + _vqrshrn_n_u32( + a, + const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }, + ) + } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] + fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } + unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64, -N as u64]) }) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshrn.v8i8" + )] + fn _vqrshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqrshrn_n_u16(a, N) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshrn.v4i16" + )] + fn _vqrshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqrshrn_n_u32(a, N) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshrn.v2i32" + )] + fn _vqrshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqrshrn_n_u64(a, N) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")] + fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } + unsafe { + _vqrshrun_n_s16( + a, + const { + int16x8_t([ + -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, + -N as i16, + ]) + }, + ) + } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")] + fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } + unsafe { + _vqrshrun_n_s32( + a, + const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }, + ) + } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")] + fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } + unsafe { _vqrshrun_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrun.v8i8" + )] + fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqrshrun_n_s16(a, N) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrun.v4i16" + )] + fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqrshrun_n_s32(a, N) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrun.v2i32" + )] + fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqrshrun_n_s64(a, N) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + vqshl_s8(a, vdup_n_s8(N as _)) +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + vqshlq_s8(a, vdupq_n_s8(N as _)) +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + vqshl_s16(a, vdup_n_s16(N as _)) +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + vqshlq_s16(a, vdupq_n_s16(N as _)) +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 5); + vqshl_s32(a, vdup_n_s32(N as _)) +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 5); + vqshlq_s32(a, vdupq_n_s32(N as _)) +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_uimm_bits!(N, 6); + vqshl_s64(a, vdup_n_s64(N as _)) +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 6); + vqshlq_s64(a, vdupq_n_s64(N as _)) +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + vqshl_u8(a, vdup_n_s8(N as _)) +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + vqshlq_u8(a, vdupq_n_s8(N as _)) +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + vqshl_u16(a, vdup_n_s16(N as _)) +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + vqshlq_u16(a, vdupq_n_s16(N as _)) +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + vqshl_u32(a, vdup_n_s32(N as _)) +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + vqshlq_u32(a, vdupq_n_s32(N as _)) +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + vqshl_u64(a, vdup_n_s64(N as _)) +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + vqshlq_u64(a, vdupq_n_s64(N as _)) +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v8i8" + )] + fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vqshl_s8(a, b) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v16i8" + )] + fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vqshlq_s8(a, b) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v4i16" + )] + fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vqshl_s16(a, b) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v8i16" + )] + fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqshlq_s16(a, b) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v2i32" + )] + fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vqshl_s32(a, b) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v4i32" + )] + fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqshlq_s32(a, b) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v1i64" + )] + fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vqshl_s64(a, b) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v2i64" + )] + fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vqshlq_s64(a, b) } +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v8i8" + )] + fn _vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } + unsafe { _vqshl_u8(a, b) } +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v16i8" + )] + fn _vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vqshlq_u8(a, b) } +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v4i16" + )] + fn _vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } + unsafe { _vqshl_u16(a, b) } +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v8i16" + )] + fn _vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vqshlq_u16(a, b) } +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v2i32" + )] + fn _vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } + unsafe { _vqshl_u32(a, b) } +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v4i32" + )] + fn _vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vqshlq_u32(a, b) } +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v1i64" + )] + fn _vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } + unsafe { _vqshl_u64(a, b) } +} +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v2i64" + )] + fn _vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vqshlq_u64(a, b) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")] + fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; + } + unsafe { + _vqshlu_n_s8( + a, + const { + int8x8_t([ + N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, + ]) + }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")] + fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; + } + unsafe { + _vqshluq_n_s8( + a, + const { + int8x16_t([ + N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, + N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, + ]) + }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")] + fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; + } + unsafe { + _vqshlu_n_s16( + a, + const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")] + fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; + } + unsafe { + _vqshluq_n_s16( + a, + const { + int16x8_t([ + N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, + ]) + }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")] + fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; + } + unsafe { _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")] + fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; + } + unsafe { + _vqshluq_n_s32( + a, + const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")] + fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; + } + unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")] + fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } + unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v8i8" + )] + fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; + } + unsafe { + _vqshlu_n_s8( + a, + const { + int8x8_t([ + N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, + ]) + }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v16i8" + )] + fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; + } + unsafe { + _vqshluq_n_s8( + a, + const { + int8x16_t([ + N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, + N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, + ]) + }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v4i16" + )] + fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; + } + unsafe { + _vqshlu_n_s16( + a, + const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v8i16" + )] + fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; + } + unsafe { + _vqshluq_n_s16( + a, + const { + int16x8_t([ + N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, + ]) + }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v2i32" + )] + fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; + } + unsafe { _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v4i32" + )] + fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; + } + unsafe { + _vqshluq_n_s32( + a, + const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }, + ) + } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v1i64" + )] + fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; + } + unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v2i64" + )] + fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } + unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")] + fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } + unsafe { + _vqshrn_n_s16( + a, + const { + int16x8_t([ + -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, + -N as i16, + ]) + }, + ) + } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")] + fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } + unsafe { + _vqshrn_n_s32( + a, + const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }, + ) + } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")] + fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } + unsafe { _vqshrn_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrn.v8i8" + )] + fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + } + unsafe { _vqshrn_n_s16(a, N) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrn.v4i16" + )] + fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + } + unsafe { _vqshrn_n_s32(a, N) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrn.v2i32" + )] + fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + } + unsafe { _vqshrn_n_s64(a, N) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] + fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } + unsafe { + _vqshrn_n_u16( + a, + const { + uint16x8_t([ + -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, + -N as u16, + ]) + }, + ) + } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] + fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } + unsafe { + _vqshrn_n_u32( + a, + const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }, + ) + } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] + fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } + unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64, -N as u64]) }) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshrn.v8i8" + )] + fn _vqshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqshrn_n_u16(a, N) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshrn.v4i16" + )] + fn _vqshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqshrn_n_u32(a, N) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshrn.v2i32" + )] + fn _vqshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqshrn_n_u64(a, N) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")] + fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } + unsafe { + _vqshrun_n_s16( + a, + const { + int16x8_t([ + -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, + -N as i16, + ]) + }, + ) + } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")] + fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } + unsafe { + _vqshrun_n_s32( + a, + const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }, + ) + } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")] + fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } + unsafe { _vqshrun_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrun.v8i8" + )] + fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqshrun_n_s16(a, N) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrun.v4i16" + )] + fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqshrun_n_s32(a, N) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrun.v2i32" + )] + fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqshrun_n_s64(a, N) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqsub.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")] + fn _vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vqsub_s8(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqsub.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")] + fn _vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vqsubq_s8(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqsub.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")] + fn _vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vqsub_s16(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqsub.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")] + fn _vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqsubq_s16(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqsub.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")] + fn _vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vqsub_s32(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqsub.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")] + fn _vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqsubq_s32(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqsub.v1i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")] + fn _vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vqsub_s64(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqsub.v2i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")] + fn _vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vqsubq_s64(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqsub.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")] + fn _vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vqsub_u8(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqsub.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")] + fn _vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vqsubq_u8(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqsub.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")] + fn _vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vqsub_u16(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqsub.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")] + fn _vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vqsubq_u16(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqsub.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")] + fn _vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vqsub_u32(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqsub.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")] + fn _vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vqsubq_u32(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqsub.v1i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")] + fn _vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; + } + unsafe { _vqsub_u64(a, b) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uqsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqsub.v2i64" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")] + fn _vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } + unsafe { _vqsubq_u64(a, b) } +} +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let x = vraddhn_s16(b, c); + unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let x = vraddhn_s32(b, c); + unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let x = vraddhn_s64(b, c); + unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) } +} +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + unsafe { + let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c))); + simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + } +} +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + unsafe { + let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c))); + simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) + } +} +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + unsafe { + let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c))); + simd_shuffle!(a, x, [0, 1, 2, 3]) + } +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.raddhn.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")] + fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; + } + unsafe { _vraddhn_s16(a, b) } +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.raddhn.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")] + fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; + } + unsafe { _vraddhn_s32(a, b) } +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.raddhn.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")] + fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; + } + unsafe { _vraddhn_s64(a, b) } +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) } +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vraddhn_s16(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) } +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(vraddhn_s32(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) } +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(vraddhn_s64(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v4f16" + )] + fn _vrecpe_f16(a: float16x4_t) -> float16x4_t; + } + unsafe { _vrecpe_f16(a) } +} +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v8f16" + )] + fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t; + } + unsafe { _vrecpeq_f16(a) } +} +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v2f32" + )] + fn _vrecpe_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrecpe_f32(a) } +} +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v4f32" + )] + fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrecpeq_f32(a) } +} +#[doc = "Unsigned reciprocal estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urecpe) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urecpe.v2i32" + )] + fn _vrecpe_u32(a: uint32x2_t) -> uint32x2_t; + } + unsafe { _vrecpe_u32(a) } +} +#[doc = "Unsigned reciprocal estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urecpe) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urecpe.v4i32" + )] + fn _vrecpeq_u32(a: uint32x4_t) -> uint32x4_t; + } + unsafe { _vrecpeq_u32(a) } +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v4f16" + )] + fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vrecps_f16(a, b) } +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v8f16" + )] + fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vrecpsq_f16(a, b) } +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v2f32" + )] + fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vrecps_f32(a, b) } +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v4f32" + )] + fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vrecpsq_f32(a, b) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev16) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev16) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev16_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev16) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev16) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev16) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev16) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev32) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, a, [1, 0]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64_s32(a: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, a, [1, 0]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, a, [1, 0]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +} +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } +} +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrev64_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +} +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")] + fn _vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vrhadd_s8(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")] + fn _vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vrhaddq_s8(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")] + fn _vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vrhadd_s16(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")] + fn _vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vrhaddq_s16(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")] + fn _vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vrhadd_s32(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")] + fn _vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vrhaddq_s32(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")] + fn _vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vrhadd_u8(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")] + fn _vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vrhaddq_u8(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")] + fn _vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vrhadd_u16(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")] + fn _vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vrhaddq_u16(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")] + fn _vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vrhadd_u32(a, b) } +} +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urhadd) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")] + fn _vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vrhaddq_u32(a, b) } +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frintn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrndn_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.v4f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f16")] + fn _vrndn_f16(a: float16x4_t) -> float16x4_t; + } + unsafe { _vrndn_f16(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frintn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.v8f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v8f16")] + fn _vrndnq_f16(a: float16x8_t) -> float16x8_t; + } + unsafe { _vrndnq_f16(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frintn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrndn_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.v2f32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")] + fn _vrndn_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrndn_f32(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frintn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.v4f32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")] + fn _vrndnq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrndnq_f32(a) } +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v8i8" + )] + fn _vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vrshl_s8(a, b) } +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v16i8" + )] + fn _vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vrshlq_s8(a, b) } +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v4i16" + )] + fn _vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vrshl_s16(a, b) } +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v8i16" + )] + fn _vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vrshlq_s16(a, b) } +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v2i32" + )] + fn _vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vrshl_s32(a, b) } +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v4i32" + )] + fn _vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vrshlq_s32(a, b) } +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v1i64" + )] + fn _vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vrshl_s64(a, b) } +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v2i64" + )] + fn _vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vrshlq_s64(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v8i8" + )] + fn _vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } + unsafe { _vrshl_u8(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v16i8" + )] + fn _vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vrshlq_u8(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v4i16" + )] + fn _vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } + unsafe { _vrshl_u16(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v8i16" + )] + fn _vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vrshlq_u16(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v2i32" + )] + fn _vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } + unsafe { _vrshl_u32(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v4i32" + )] + fn _vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vrshlq_u32(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v1i64" + )] + fn _vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } + unsafe { _vrshl_u64(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v2i64" + )] + fn _vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vrshlq_u64(a, b) } +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + vrshl_s8(a, vdup_n_s8(-N as _)) +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + vrshlq_s8(a, vdupq_n_s8(-N as _)) +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + vrshl_s16(a, vdup_n_s16(-N as _)) +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + vrshlq_s16(a, vdupq_n_s16(-N as _)) +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + vrshl_s32(a, vdup_n_s32(-N as _)) +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + vrshlq_s32(a, vdupq_n_s32(-N as _)) +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + vrshl_s64(a, vdup_n_s64(-N as _)) +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + vrshlq_s64(a, vdupq_n_s64(-N as _)) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + vrshl_u8(a, vdup_n_s8(-N as _)) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + vrshlq_u8(a, vdupq_n_s8(-N as _)) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + vrshl_u16(a, vdup_n_s16(-N as _)) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + vrshlq_u16(a, vdupq_n_s16(-N as _)) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + vrshl_u32(a, vdup_n_s32(-N as _)) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + vrshlq_u32(a, vdupq_n_s32(-N as _)) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + vrshl_u64(a, vdup_n_s64(-N as _)) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + vrshlq_u64(a, vdupq_n_s64(-N as _)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")] + fn _vrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } + unsafe { + _vrshrn_n_s16( + a, + const { + int16x8_t([ + -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, + -N as i16, + ]) + }, + ) + } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")] + fn _vrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } + unsafe { + _vrshrn_n_s32( + a, + const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }, + ) + } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")] + fn _vrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } + unsafe { _vrshrn_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rshrn.v8i8" + )] + fn _vrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + } + unsafe { _vrshrn_n_s16(a, N) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rshrn.v4i16" + )] + fn _vrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + } + unsafe { _vrshrn_n_s32(a, N) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rshrn.v2i32" + )] + fn _vrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + } + unsafe { _vrshrn_n_s64(a, N) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rshrn, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vrshrn_n_s16::(transmute(a))) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rshrn, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vrshrn_n_s32::(transmute(a))) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rshrn, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { transmute(vrshrn_n_s64::(transmute(a))) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrte) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v4f16" + )] + fn _vrsqrte_f16(a: float16x4_t) -> float16x4_t; + } + unsafe { _vrsqrte_f16(a) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrte) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v8f16" + )] + fn _vrsqrteq_f16(a: float16x8_t) -> float16x8_t; + } + unsafe { _vrsqrteq_f16(a) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrte) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v2f32" + )] + fn _vrsqrte_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrsqrte_f32(a) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrte) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v4f32" + )] + fn _vrsqrteq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrsqrteq_f32(a) } +} +#[doc = "Unsigned reciprocal square root estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursqrte) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ursqrte.v2i32" + )] + fn _vrsqrte_u32(a: uint32x2_t) -> uint32x2_t; + } + unsafe { _vrsqrte_u32(a) } +} +#[doc = "Unsigned reciprocal square root estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursqrte) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ursqrte.v4i32" + )] + fn _vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t; + } + unsafe { _vrsqrteq_u32(a) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrts) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v4f16" + )] + fn _vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vrsqrts_f16(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrts) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v8f16" + )] + fn _vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vrsqrtsq_f16(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrts) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v2f32" + )] + fn _vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vrsqrts_f32(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrts) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v4f32" + )] + fn _vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vrsqrtsq_f32(a, b) } +} +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srsra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshr_n_s8::(b)) } +} +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srsra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshrq_n_s8::(b)) } +} +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srsra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshr_n_s16::(b)) } +} +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srsra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshrq_n_s16::(b)) } +} +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srsra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshr_n_s32::(b)) } +} +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srsra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshrq_n_s32::(b)) } +} +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srsra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshr_n_s64::(b)) } +} +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(srsra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshrq_n_s64::(b)) } +} +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshr_n_u8::(b)) } +} +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshrq_n_u8::(b)) } +} +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshr_n_u16::(b)) } +} +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshrq_n_u16::(b)) } +} +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshr_n_u32::(b)) } +} +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshrq_n_u32::(b)) } +} +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshr_n_u64::(b)) } +} +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ursra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshrq_n_u64::(b)) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rsubhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rsubhn.v8i8" + )] + fn _vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; + } + unsafe { _vrsubhn_s16(a, b) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rsubhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rsubhn.v4i16" + )] + fn _vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; + } + unsafe { _vrsubhn_s32(a, b) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rsubhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rsubhn.v2i32" + )] + fn _vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; + } + unsafe { _vrsubhn_s64(a, b) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rsubhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + unsafe { transmute(vrsubhn_s16(transmute(a), transmute(b))) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rsubhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vrsubhn_s16(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rsubhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + unsafe { transmute(vrsubhn_s32(transmute(a), transmute(b))) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rsubhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(vrsubhn_s32(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rsubhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + unsafe { transmute(vrsubhn_s64(transmute(a), transmute(b))) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rsubhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(vrsubhn_s64(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vset_lane_f16(a: f16, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(LANE, 4); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(LANE, 4); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(LANE, 4); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p64)"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "SHA1 hash update accelerator, choose."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha1c))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1c" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")] + fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1cq_u32(hash_abcd, hash_e, wk) } +} +#[doc = "SHA1 fixed rotate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1h_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha1h))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha1h_u32(hash_e: u32) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1h" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")] + fn _vsha1h_u32(hash_e: u32) -> u32; + } + unsafe { _vsha1h_u32(hash_e) } +} +#[doc = "SHA1 hash update accelerator, majority"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha1m))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1m" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")] + fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1mq_u32(hash_abcd, hash_e, wk) } +} +#[doc = "SHA1 hash update accelerator, parity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha1p))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1p" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")] + fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1pq_u32(hash_abcd, hash_e, wk) } +} +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha1su0))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")] + fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1su0q_u32(w0_3, w4_7, w8_11) } +} +#[doc = "SHA1 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha1su1))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")] + fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1su1q_u32(tw0_3, w12_15) } +} +#[doc = "SHA1 schedule update accelerator, upper part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256h2))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h2" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")] + fn _vsha256h2q_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } + unsafe { _vsha256h2q_u32(hash_abcd, hash_efgh, wk) } +} +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256h))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")] + fn _vsha256hq_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } + unsafe { _vsha256hq_u32(hash_abcd, hash_efgh, wk) } +} +#[doc = "SHA256 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256su0))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")] + fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha256su0q_u32(w0_3, w4_7) } +} +#[doc = "SHA256 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"] +#[inline] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256su1))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")] + fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) + -> uint32x4_t; + } + unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v16i8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] + fn _vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + } + unsafe { _vshiftins_v16i8(a, b, c) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v1i64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] + fn _vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + } + unsafe { _vshiftins_v1i64(a, b, c) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] + fn _vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + } + unsafe { _vshiftins_v2i32(a, b, c) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] + fn _vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + } + unsafe { _vshiftins_v2i64(a, b, c) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] + fn _vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + } + unsafe { _vshiftins_v4i16(a, b, c) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] + fn _vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + } + unsafe { _vshiftins_v4i32(a, b, c) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] + fn _vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + } + unsafe { _vshiftins_v8i16(a, b, c) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] + fn _vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vshiftins_v8i8(a, b, c) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdup_n_s8(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdupq_n_s8(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdup_n_s16(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdupq_n_s16(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdup_n_s32(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdupq_n_s32(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdup_n_s64(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdupq_n_s64(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdup_n_u8(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdupq_n_u8(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdup_n_u16(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdupq_n_u16(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdup_n_u32(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdupq_n_u32(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdup_n_u64(N as _)) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shl, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdupq_n_u64(N as _)) } +} +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v8i8" + )] + fn _vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vshl_s8(a, b) } +} +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v16i8" + )] + fn _vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vshlq_s8(a, b) } +} +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v4i16" + )] + fn _vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vshl_s16(a, b) } +} +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v8i16" + )] + fn _vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vshlq_s16(a, b) } +} +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v2i32" + )] + fn _vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vshl_s32(a, b) } +} +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v4i32" + )] + fn _vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vshlq_s32(a, b) } +} +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v1i64" + )] + fn _vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vshl_s64(a, b) } +} +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v2i64" + )] + fn _vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vshlq_s64(a, b) } +} +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v8i8" + )] + fn _vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } + unsafe { _vshl_u8(a, b) } +} +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v16i8" + )] + fn _vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vshlq_u8(a, b) } +} +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v4i16" + )] + fn _vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } + unsafe { _vshl_u16(a, b) } +} +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v8i16" + )] + fn _vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vshlq_u16(a, b) } +} +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v2i32" + )] + fn _vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } + unsafe { _vshl_u32(a, b) } +} +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v4i32" + )] + fn _vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vshlq_u32(a, b) } +} +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v1i64" + )] + fn _vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } + unsafe { _vshl_u64(a, b) } +} +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v2i64" + )] + fn _vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vshlq_u64(a, b) } +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshll, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshll_n_s16(a: int16x4_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 16); + unsafe { simd_shl(simd_cast(a), vdupq_n_s32(N as _)) } +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshll, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshll_n_s32(a: int32x2_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 32); + unsafe { simd_shl(simd_cast(a), vdupq_n_s64(N as _)) } +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshll, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshll_n_s8(a: int8x8_t) -> int16x8_t { + static_assert!(N >= 0 && N <= 8); + unsafe { simd_shl(simd_cast(a), vdupq_n_s16(N as _)) } +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushll, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 16); + unsafe { simd_shl(simd_cast(a), vdupq_n_u32(N as _)) } +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushll, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 32); + unsafe { simd_shl(simd_cast(a), vdupq_n_u64(N as _)) } +} +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushll, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { + static_assert!(N >= 0 && N <= 8); + unsafe { simd_shl(simd_cast(a), vdupq_n_u16(N as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { 7 } else { N }; + unsafe { simd_shr(a, vdup_n_s8(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { 7 } else { N }; + unsafe { simd_shr(a, vdupq_n_s8(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { 15 } else { N }; + unsafe { simd_shr(a, vdup_n_s16(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { 15 } else { N }; + unsafe { simd_shr(a, vdupq_n_s16(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { 31 } else { N }; + unsafe { simd_shr(a, vdup_n_s32(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { 31 } else { N }; + unsafe { simd_shr(a, vdupq_n_s32(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { 63 } else { N }; + unsafe { simd_shr(a, vdup_n_s64(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sshr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { 63 } else { N }; + unsafe { simd_shr(a, vdupq_n_s64(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { + return vdup_n_u8(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u8(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { + return vdupq_n_u8(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u8(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { + return vdup_n_u16(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u16(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { + return vdupq_n_u16(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u16(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { + return vdup_n_u32(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u32(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { + return vdupq_n_u32(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u32(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { + return vdup_n_u64(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u64(n as _)) } +} +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ushr, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { + return vdupq_n_u64(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u64(n as _)) } +} +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shrn, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_cast(simd_shr(a, vdupq_n_s16(N as _))) } +} +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shrn, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_cast(simd_shr(a, vdupq_n_s32(N as _))) } +} +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shrn, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_cast(simd_shr(a, vdupq_n_s64(N as _))) } +} +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shrn, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_cast(simd_shr(a, vdupq_n_u16(N as _))) } +} +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shrn, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_cast(simd_shr(a, vdupq_n_u32(N as _))) } +} +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(shrn, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_cast(simd_shr(a, vdupq_n_u64(N as _))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + vshiftins_v8i8(a, b, int8x8_t::splat(N as i8)) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + vshiftins_v16i8(a, b, int8x16_t::splat(N as i8)) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + vshiftins_v4i16(a, b, int16x4_t::splat(N as i16)) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + vshiftins_v8i16(a, b, int16x8_t::splat(N as i16)) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 0 && N <= 31); + vshiftins_v2i32(a, b, int32x2_t::splat(N)) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 31); + vshiftins_v4i32(a, b, int32x4_t::splat(N)) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 0 && N <= 63); + vshiftins_v1i64(a, b, int64x1_t::splat(N as i64)) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 63); + vshiftins_v2i64(a, b, int64x2_t::splat(N as i64)) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t::splat(N as i8), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t::splat(N as i8), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t::splat(N as i16), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t::splat(N as i16), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 0 && N <= 31); + unsafe { + transmute(vshiftins_v2i32( + transmute(a), + transmute(b), + int32x2_t::splat(N as i32), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 31); + unsafe { + transmute(vshiftins_v4i32( + transmute(a), + transmute(b), + int32x4_t::splat(N as i32), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 0 && N <= 63); + unsafe { + transmute(vshiftins_v1i64( + transmute(a), + transmute(b), + int64x1_t::splat(N as i64), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 63); + unsafe { + transmute(vshiftins_v2i64( + transmute(a), + transmute(b), + int64x2_t::splat(N as i64), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t::splat(N as i8), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t::splat(N as i8), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t::splat(N as i16), + )) + } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t::splat(N as i16), + )) + } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vshr_n_s8::(b)) } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vshrq_n_s8::(b)) } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vshr_n_s16::(b)) } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vshrq_n_s16::(b)) } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vshr_n_s32::(b)) } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vshrq_n_s32::(b)) } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vshr_n_s64::(b)) } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vshrq_n_s64::(b)) } +} +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vshr_n_u8::(b)) } +} +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vshrq_n_u8::(b)) } +} +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vshr_n_u16::(b)) } +} +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vshrq_n_u16::(b)) } +} +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vshr_n_u32::(b)) } +} +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vshrq_n_u32::(b)) } +} +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vshr_n_u64::(b)) } +} +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usra, N = 2) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vshrq_n_u64::(b)) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(1 <= N && N <= 8); + vshiftins_v8i8(a, b, int8x8_t::splat(-N as i8)) +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(1 <= N && N <= 8); + vshiftins_v16i8(a, b, int8x16_t::splat(-N as i8)) +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(1 <= N && N <= 16); + vshiftins_v4i16(a, b, int16x4_t::splat(-N as i16)) +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(1 <= N && N <= 16); + vshiftins_v8i16(a, b, int16x8_t::splat(-N as i16)) +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(1 <= N && N <= 32); + vshiftins_v2i32(a, b, int32x2_t::splat(-N as i32)) +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(1 <= N && N <= 32); + vshiftins_v4i32(a, b, int32x4_t::splat(-N as i32)) +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(1 <= N && N <= 64); + vshiftins_v1i64(a, b, int64x1_t::splat(-N as i64)) +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(1 <= N && N <= 64); + vshiftins_v2i64(a, b, int64x2_t::splat(-N as i64)) +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(1 <= N && N <= 8); + unsafe { + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t::splat(-N as i8), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(1 <= N && N <= 8); + unsafe { + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t::splat(-N as i8), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(1 <= N && N <= 16); + unsafe { + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t::splat(-N as i16), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(1 <= N && N <= 16); + unsafe { + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t::splat(-N as i16), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(1 <= N && N <= 32); + unsafe { + transmute(vshiftins_v2i32( + transmute(a), + transmute(b), + int32x2_t::splat(-N), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(1 <= N && N <= 32); + unsafe { + transmute(vshiftins_v4i32( + transmute(a), + transmute(b), + int32x4_t::splat(-N), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(1 <= N && N <= 64); + unsafe { + transmute(vshiftins_v1i64( + transmute(a), + transmute(b), + int64x1_t::splat(-N as i64), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(1 <= N && N <= 64); + unsafe { + transmute(vshiftins_v2i64( + transmute(a), + transmute(b), + int64x2_t::splat(-N as i64), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert!(1 <= N && N <= 8); + unsafe { + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t::splat(-N as i8), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert!(1 <= N && N <= 8); + unsafe { + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t::splat(-N as i8), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert!(1 <= N && N <= 16); + unsafe { + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t::splat(-N as i16), + )) + } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert!(1 <= N && N <= 16); + unsafe { + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t::splat(-N as i16), + )) + } +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { + vst1_v4f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { + vst1q_v8f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v4f16")] + fn _vst1_f16_x2(ptr: *mut f16, a: float16x4_t, b: float16x4_t); + } + _vst1_f16_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v8f16")] + fn _vst1q_f16_x2(ptr: *mut f16, a: float16x8_t, b: float16x8_t); + } + _vst1q_f16_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v4f16.p0" + )] + fn _vst1_f16_x2(a: float16x4_t, b: float16x4_t, ptr: *mut f16); + } + _vst1_f16_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v8f16.p0" + )] + fn _vst1q_f16_x2(a: float16x8_t, b: float16x8_t, ptr: *mut f16); + } + _vst1q_f16_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4f16")] + fn _vst1_f16_x3(ptr: *mut f16, a: float16x4_t, b: float16x4_t, c: float16x4_t); + } + _vst1_f16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8f16")] + fn _vst1q_f16_x3(ptr: *mut f16, a: float16x8_t, b: float16x8_t, c: float16x8_t); + } + _vst1q_f16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4f16.p0" + )] + fn _vst1_f16_x3(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut f16); + } + _vst1_f16_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v8f16.p0" + )] + fn _vst1q_f16_x3(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut f16); + } + _vst1q_f16_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f16")] + fn _vst1_f16_x4( + ptr: *mut f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ); + } + _vst1_f16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8f16")] + fn _vst1q_f16_x4( + ptr: *mut f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ); + } + _vst1q_f16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4f16.p0" + )] + fn _vst1_f16_x4( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ptr: *mut f16, + ); + } + _vst1_f16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8f16.p0" + )] + fn _vst1q_f16_x4( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ptr: *mut f16, + ); + } + _vst1q_f16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { + vst1_v2f32( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { + vst1q_v4f32( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { + vst1_v8i8(ptr as *const i8, a, crate::mem::align_of::() as i32) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { + vst1q_v16i8(ptr as *const i8, a, crate::mem::align_of::() as i32) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { + vst1_v4i16(ptr as *const i8, a, crate::mem::align_of::() as i32) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { + vst1q_v8i16(ptr as *const i8, a, crate::mem::align_of::() as i32) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { + vst1_v2i32(ptr as *const i8, a, crate::mem::align_of::() as i32) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { + vst1q_v4i32(ptr as *const i8, a, crate::mem::align_of::() as i32) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { + vst1_v1i64(ptr as *const i8, a, crate::mem::align_of::() as i32) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { + vst1q_v2i64(ptr as *const i8, a, crate::mem::align_of::() as i32) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { + vst1_v8i8( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { + vst1q_v16i8( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { + vst1_v4i16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { + vst1q_v8i16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { + vst1_v2i32( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { + vst1q_v4i32( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { + vst1_v1i64( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { + vst1q_v2i64( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { + vst1_v8i8( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { + vst1q_v16i8( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { + vst1_v4i16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { + vst1q_v8i16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { + vst1_v1i64( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { + vst1q_v2i64( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2f32.p0")] + fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t); + } + _vst1_f32_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4f32.p0")] + fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t); + } + _vst1q_f32_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v2f32.p0" + )] + fn _vst1_f32_x2(a: float32x2_t, b: float32x2_t, ptr: *mut f32); + } + _vst1_f32_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v4f32.p0" + )] + fn _vst1q_f32_x2(a: float32x4_t, b: float32x4_t, ptr: *mut f32); + } + _vst1q_f32_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v2f32.p0" + )] + fn _vst1_f32_x3(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32); + } + _vst1_f32_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4f32.p0" + )] + fn _vst1q_f32_x3(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32); + } + _vst1q_f32_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2f32.p0")] + fn _vst1_f32_x4( + ptr: *mut f32, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + ); + } + _vst1_f32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f32.p0")] + fn _vst1q_f32_x4( + ptr: *mut f32, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + ); + } + _vst1q_f32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v2f32.p0" + )] + fn _vst1_f32_x4( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + ptr: *mut f32, + ); + } + _vst1_f32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4f32.p0" + )] + fn _vst1q_f32_x4( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + ptr: *mut f32, + ); + } + _vst1q_f32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_lane_f16(a: *mut f16, b: float16x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_lane_f16(a: *mut f16, b: float16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_f32(a: *mut f32, b: float32x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t) { + static_assert_uimm_bits!(LANE, 4); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_s32(a: *mut i32, b: int32x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_s64(a: *mut i64, b: int64x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t) { + static_assert_uimm_bits!(LANE, 4); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { + static_assert_uimm_bits!(LANE, 4); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { + static_assert!(LANE == 0); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t) { + static_assert!(LANE == 0); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t) { + static_assert!(LANE == 0); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) { + vst1_s64_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) { + vst1_s64_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) { + vst1_s64_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) { + vst1q_s64_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) { + vst1q_s64_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) { + vst1q_s64_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v8i8.p0" + )] + fn _vst1_s8_x2(a: int8x8_t, b: int8x8_t, ptr: *mut i8); + } + _vst1_s8_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v16i8.p0" + )] + fn _vst1q_s8_x2(a: int8x16_t, b: int8x16_t, ptr: *mut i8); + } + _vst1q_s8_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v4i16.p0" + )] + fn _vst1_s16_x2(a: int16x4_t, b: int16x4_t, ptr: *mut i16); + } + _vst1_s16_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v8i16.p0" + )] + fn _vst1q_s16_x2(a: int16x8_t, b: int16x8_t, ptr: *mut i16); + } + _vst1q_s16_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v2i32.p0" + )] + fn _vst1_s32_x2(a: int32x2_t, b: int32x2_t, ptr: *mut i32); + } + _vst1_s32_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v4i32.p0" + )] + fn _vst1q_s32_x2(a: int32x4_t, b: int32x4_t, ptr: *mut i32); + } + _vst1q_s32_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v1i64.p0" + )] + fn _vst1_s64_x2(a: int64x1_t, b: int64x1_t, ptr: *mut i64); + } + _vst1_s64_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v2i64.p0" + )] + fn _vst1q_s64_x2(a: int64x2_t, b: int64x2_t, ptr: *mut i64); + } + _vst1q_s64_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i8.p0")] + fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t); + } + _vst1_s8_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v16i8.p0")] + fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t); + } + _vst1q_s8_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i16.p0")] + fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t); + } + _vst1_s16_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i16.p0")] + fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t); + } + _vst1q_s16_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i32.p0")] + fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t); + } + _vst1_s32_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i32.p0")] + fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t); + } + _vst1q_s32_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v1i64.p0")] + fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t); + } + _vst1_s64_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i64.p0")] + fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t); + } + _vst1q_s64_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v8i8.p0" + )] + fn _vst1_s8_x3(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); + } + _vst1_s8_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v16i8.p0" + )] + fn _vst1q_s8_x3(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); + } + _vst1q_s8_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4i16.p0" + )] + fn _vst1_s16_x3(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16); + } + _vst1_s16_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v8i16.p0" + )] + fn _vst1q_s16_x3(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16); + } + _vst1q_s16_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v2i32.p0" + )] + fn _vst1_s32_x3(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32); + } + _vst1_s32_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4i32.p0" + )] + fn _vst1q_s32_x3(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32); + } + _vst1q_s32_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v1i64.p0" + )] + fn _vst1_s64_x3(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64); + } + _vst1_s64_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v2i64.p0" + )] + fn _vst1q_s64_x3(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64); + } + _vst1q_s64_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i8.p0")] + fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t); + } + _vst1_s8_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v16i8.p0")] + fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t); + } + _vst1q_s8_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i16.p0")] + fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t); + } + _vst1_s16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i16.p0")] + fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t); + } + _vst1q_s16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i32.p0")] + fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t); + } + _vst1_s32_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i32.p0")] + fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t); + } + _vst1q_s32_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v1i64.p0")] + fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t); + } + _vst1_s64_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i64.p0")] + fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t); + } + _vst1q_s64_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8i8.p0" + )] + fn _vst1_s8_x4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); + } + _vst1_s8_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v16i8.p0" + )] + fn _vst1q_s8_x4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); + } + _vst1q_s8_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4i16.p0" + )] + fn _vst1_s16_x4(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16); + } + _vst1_s16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8i16.p0" + )] + fn _vst1q_s16_x4(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16); + } + _vst1q_s16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v2i32.p0" + )] + fn _vst1_s32_x4(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32); + } + _vst1_s32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4i32.p0" + )] + fn _vst1q_s32_x4(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32); + } + _vst1q_s32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v1i64.p0" + )] + fn _vst1_s64_x4(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64); + } + _vst1_s64_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v2i64.p0" + )] + fn _vst1q_s64_x4(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64); + } + _vst1q_s64_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i8.p0")] + fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t); + } + _vst1_s8_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v16i8.p0")] + fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t); + } + _vst1q_s8_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i16.p0")] + fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t); + } + _vst1_s16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i16.p0")] + fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t); + } + _vst1q_s16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i32.p0")] + fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t); + } + _vst1_s32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i32.p0")] + fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t); + } + _vst1q_s32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v1i64.p0")] + fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t); + } + _vst1_s64_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i64.p0")] + fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t); + } + _vst1q_s64_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) { + vst1_s32_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) { + vst1_s32_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) { + vst1_s32_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) { + vst1q_s32_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) { + vst1q_s32_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) { + vst1q_s32_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) { + vst1_s64_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) { + vst1_s64_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) { + vst1_s64_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) { + vst1q_s64_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) { + vst1q_s64_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) { + vst1q_s64_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v1i64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")] + fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32); + } + _vst1_v1i64(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")] + fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32); + } + _vst1_v2f32(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2i32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")] + fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32); + } + _vst1_v2i32(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4i16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")] + fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32); + } + _vst1_v4i16(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v8i8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")] + fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32); + } + _vst1_v8i8(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v16i8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")] + fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32); + } + _vst1q_v16i8(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v2i64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")] + fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32); + } + _vst1q_v2i64(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")] + fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32); + } + _vst1q_v4f32(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4i32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")] + fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32); + } + _vst1q_v4i32(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8i16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")] + fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32); + } + _vst1q_v8i16(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f16.p0")] + fn _vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32); + } + _vst1_v4f16(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8f16.p0")] + fn _vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32); + } + _vst1q_v8f16(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v4f16.p0" + )] + fn _vst2_f16(a: float16x4_t, b: float16x4_t, ptr: *mut i8); + } + _vst2_f16(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v8f16.p0" + )] + fn _vst2q_f16(a: float16x8_t, b: float16x8_t, ptr: *mut i8); + } + _vst2q_f16(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v4f16")] + fn _vst2_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, size: i32); + } + _vst2_f16(a as _, b.0, b.1, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v8f16")] + fn _vst2q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, size: i32); + } + _vst2q_f16(a as _, b.0, b.1, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v2f32.p0" + )] + fn _vst2_f32(a: float32x2_t, b: float32x2_t, ptr: *mut i8); + } + _vst2_f32(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v4f32.p0" + )] + fn _vst2q_f32(a: float32x4_t, b: float32x4_t, ptr: *mut i8); + } + _vst2q_f32(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v8i8.p0" + )] + fn _vst2_s8(a: int8x8_t, b: int8x8_t, ptr: *mut i8); + } + _vst2_s8(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v16i8.p0" + )] + fn _vst2q_s8(a: int8x16_t, b: int8x16_t, ptr: *mut i8); + } + _vst2q_s8(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v4i16.p0" + )] + fn _vst2_s16(a: int16x4_t, b: int16x4_t, ptr: *mut i8); + } + _vst2_s16(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v8i16.p0" + )] + fn _vst2q_s16(a: int16x8_t, b: int16x8_t, ptr: *mut i8); + } + _vst2q_s16(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v2i32.p0" + )] + fn _vst2_s32(a: int32x2_t, b: int32x2_t, ptr: *mut i8); + } + _vst2_s32(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v4i32.p0" + )] + fn _vst2q_s32(a: int32x4_t, b: int32x4_t, ptr: *mut i8); + } + _vst2q_s32(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2f32.p0")] + fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32); + } + _vst2_f32(a as _, b.0, b.1, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4f32.p0")] + fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32); + } + _vst2q_f32(a as _, b.0, b.1, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i8.p0")] + fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32); + } + _vst2_s8(a as _, b.0, b.1, 1) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v16i8.p0")] + fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32); + } + _vst2q_s8(a as _, b.0, b.1, 1) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i16.p0")] + fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32); + } + _vst2_s16(a as _, b.0, b.1, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i16.p0")] + fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32); + } + _vst2q_s16(a as _, b.0, b.1, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2i32.p0")] + fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32); + } + _vst2_s32(a as _, b.0, b.1, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i32.p0")] + fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32); + } + _vst2q_s32(a as _, b.0, b.1, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4f16.p0" + )] + fn _vst2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *mut i8); + } + _vst2_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8f16.p0" + )] + fn _vst2q_lane_f16(a: float16x8_t, b: float16x8_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v4f16")] + fn _vst2_lane_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, n: i32, size: i32); + } + _vst2_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v8f16")] + fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32); + } + _vst2q_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v2f32.p0" + )] + fn _vst2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8); + } + _vst2_lane_f32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4f32.p0" + )] + fn _vst2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_f32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8i8.p0" + )] + fn _vst2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s8(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4i16.p0" + )] + fn _vst2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8i16.p0" + )] + fn _vst2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_s16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v2i32.p0" + )] + fn _vst2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4i32.p0" + )] + fn _vst2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_s32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2f32.p0")] + fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32); + } + _vst2_lane_f32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4f32.p0")] + fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32); + } + _vst2q_lane_f32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i8.p0")] + fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32); + } + _vst2_lane_s8(a as _, b.0, b.1, LANE, 1) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i16.p0")] + fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32); + } + _vst2_lane_s16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i16.p0")] + fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32); + } + _vst2q_lane_s16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2i32.p0")] + fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32); + } + _vst2_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i32.p0")] + fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32); + } + _vst2q_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_lane_u8(a: *mut u8, b: uint8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2_lane_s8::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_lane_u16(a: *mut u16, b: uint16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + vst2_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_lane_u16(a: *mut u16, b: uint16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2q_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_lane_u32(a: *mut u32, b: uint32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + vst2_lane_s32::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_lane_u32(a: *mut u32, b: uint32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + vst2q_lane_s32::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_lane_p8(a: *mut p8, b: poly8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2_lane_s8::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_lane_p16(a: *mut p16, b: poly16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + vst2_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_lane_p16(a: *mut p16, b: poly16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2q_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) { + vst2_s64(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v1i64.p0")] + fn _vst2_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32); + } + _vst2_s64(a as _, b.0, b.1, 8) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v1i64.p0" + )] + fn _vst2_s64(a: int64x1_t, b: int64x1_t, ptr: *mut i8); + } + _vst2_s64(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) { + vst2_s64(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) { + vst2_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) { + vst2q_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) { + vst2_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) { + vst2q_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) { + vst2_s32(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) { + vst2q_s32(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) { + vst2_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) { + vst2q_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) { + vst2_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) { + vst2q_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f16")] + fn _vst3_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, size: i32); + } + _vst3_f16(a as _, b.0, b.1, b.2, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8f16")] + fn _vst3q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, size: i32); + } + _vst3q_f16(a as _, b.0, b.1, b.2, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v4f16.p0" + )] + fn _vst3_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut i8); + } + _vst3_f16(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v8f16.p0" + )] + fn _vst3q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut i8); + } + _vst3q_f16(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2f32")] + fn _vst3_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32); + } + _vst3_f32(a as _, b.0, b.1, b.2, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f32")] + fn _vst3q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32); + } + _vst3q_f32(a as _, b.0, b.1, b.2, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i8")] + fn _vst3_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32); + } + _vst3_s8(a as _, b.0, b.1, b.2, 1) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v16i8")] + fn _vst3q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32); + } + _vst3q_s8(a as _, b.0, b.1, b.2, 1) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i16")] + fn _vst3_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32); + } + _vst3_s16(a as _, b.0, b.1, b.2, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i16")] + fn _vst3q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32); + } + _vst3q_s16(a as _, b.0, b.1, b.2, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2i32")] + fn _vst3_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32); + } + _vst3_s32(a as _, b.0, b.1, b.2, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i32")] + fn _vst3q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32); + } + _vst3q_s32(a as _, b.0, b.1, b.2, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v2f32.p0" + )] + fn _vst3_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut i8); + } + _vst3_f32(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v4f32.p0" + )] + fn _vst3q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut i8); + } + _vst3q_f32(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v8i8.p0" + )] + fn _vst3_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); + } + _vst3_s8(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v16i8.p0" + )] + fn _vst3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); + } + _vst3q_s8(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v4i16.p0" + )] + fn _vst3_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i8); + } + _vst3_s16(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v8i16.p0" + )] + fn _vst3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i8); + } + _vst3q_s16(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v2i32.p0" + )] + fn _vst3_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i8); + } + _vst3_s32(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v4i32.p0" + )] + fn _vst3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i8); + } + _vst3q_s32(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f16")] + fn _vst3_lane_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i32, + size: i32, + ); + } + _vst3_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8f16")] + fn _vst3q_lane_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v4f16.p0" + )] + fn _vst3_lane_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, n: i64, ptr: *mut i8); + } + _vst3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v8f16.p0" + )] + fn _vst3q_lane_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2f32")] + fn _vst3_lane_f32( + ptr: *mut i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + n: i32, + size: i32, + ); + } + _vst3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f32")] + fn _vst3q_lane_f32( + ptr: *mut i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i8")] + fn _vst3_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32); + } + _vst3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i16")] + fn _vst3_lane_s16( + ptr: *mut i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + n: i32, + size: i32, + ); + } + _vst3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i16")] + fn _vst3q_lane_s16( + ptr: *mut i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2i32")] + fn _vst3_lane_s32( + ptr: *mut i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + n: i32, + size: i32, + ); + } + _vst3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i32")] + fn _vst3q_lane_s32( + ptr: *mut i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v2f32.p0" + )] + fn _vst3_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8); + } + _vst3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v4f32.p0" + )] + fn _vst3q_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v8i8.p0" + )] + fn _vst3_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8); + } + _vst3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v4i16.p0" + )] + fn _vst3_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8); + } + _vst3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v8i16.p0" + )] + fn _vst3q_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v2i32.p0" + )] + fn _vst3_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8); + } + _vst3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v4i32.p0" + )] + fn _vst3q_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_u8(a: *mut u8, b: uint8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3_lane_s8::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_u16(a: *mut u16, b: uint16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + vst3_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_lane_u16(a: *mut u16, b: uint16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3q_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_u32(a: *mut u32, b: uint32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + vst3_lane_s32::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_lane_u32(a: *mut u32, b: uint32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + vst3q_lane_s32::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_p8(a: *mut p8, b: poly8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3_lane_s8::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_p16(a: *mut p16, b: poly16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + vst3_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_lane_p16(a: *mut p16, b: poly16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3q_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) { + vst3_s64(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v1i64.p0" + )] + fn _vst3_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8); + } + _vst3_s64(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v1i64")] + fn _vst3_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32); + } + _vst3_s64(a as _, b.0, b.1, b.2, 8) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) { + vst3_s64(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) { + vst3_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) { + vst3q_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) { + vst3_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) { + vst3q_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) { + vst3_s32(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) { + vst3q_s32(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) { + vst3_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) { + vst3q_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) { + vst3_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st3) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) { + vst3q_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f16")] + fn _vst4_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + size: i32, + ); + } + _vst4_f16(a as _, b.0, b.1, b.2, b.3, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8f16")] + fn _vst4q_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + size: i32, + ); + } + _vst4q_f16(a as _, b.0, b.1, b.2, b.3, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v4f16.p0" + )] + fn _vst4_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, d: float16x4_t, ptr: *mut i8); + } + _vst4_f16(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v8f16.p0" + )] + fn _vst4q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, d: float16x8_t, ptr: *mut i8); + } + _vst4q_f16(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2f32")] + fn _vst4_f32( + ptr: *mut i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + size: i32, + ); + } + _vst4_f32(a as _, b.0, b.1, b.2, b.3, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f32")] + fn _vst4q_f32( + ptr: *mut i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + size: i32, + ); + } + _vst4q_f32(a as _, b.0, b.1, b.2, b.3, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i8")] + fn _vst4_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32); + } + _vst4_s8(a as _, b.0, b.1, b.2, b.3, 1) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v16i8")] + fn _vst4q_s8( + ptr: *mut i8, + a: int8x16_t, + b: int8x16_t, + c: int8x16_t, + d: int8x16_t, + size: i32, + ); + } + _vst4q_s8(a as _, b.0, b.1, b.2, b.3, 1) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i16")] + fn _vst4_s16( + ptr: *mut i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + size: i32, + ); + } + _vst4_s16(a as _, b.0, b.1, b.2, b.3, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i16")] + fn _vst4q_s16( + ptr: *mut i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + size: i32, + ); + } + _vst4q_s16(a as _, b.0, b.1, b.2, b.3, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2i32")] + fn _vst4_s32( + ptr: *mut i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + size: i32, + ); + } + _vst4_s32(a as _, b.0, b.1, b.2, b.3, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i32")] + fn _vst4q_s32( + ptr: *mut i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + size: i32, + ); + } + _vst4q_s32(a as _, b.0, b.1, b.2, b.3, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v2f32.p0" + )] + fn _vst4_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8); + } + _vst4_f32(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v4f32.p0" + )] + fn _vst4q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8); + } + _vst4q_f32(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v8i8.p0" + )] + fn _vst4_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); + } + _vst4_s8(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v16i8.p0" + )] + fn _vst4q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); + } + _vst4q_s8(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v4i16.p0" + )] + fn _vst4_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8); + } + _vst4_s16(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v8i16.p0" + )] + fn _vst4q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8); + } + _vst4q_s16(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v2i32.p0" + )] + fn _vst4_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8); + } + _vst4_s32(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v4i32.p0" + )] + fn _vst4q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8); + } + _vst4q_s32(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f16")] + fn _vst4_lane_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ); + } + _vst4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8f16")] + fn _vst4q_lane_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4f16.p0" + )] + fn _vst4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v8f16.p0" + )] + fn _vst4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2f32")] + fn _vst4_lane_f32( + ptr: *mut i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i32, + size: i32, + ); + } + _vst4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f32")] + fn _vst4q_lane_f32( + ptr: *mut i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i8")] + fn _vst4_lane_s8( + ptr: *mut i8, + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + n: i32, + size: i32, + ); + } + _vst4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i16")] + fn _vst4_lane_s16( + ptr: *mut i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i32, + size: i32, + ); + } + _vst4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i16")] + fn _vst4q_lane_s16( + ptr: *mut i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2i32")] + fn _vst4_lane_s32( + ptr: *mut i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i32, + size: i32, + ); + } + _vst4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i32")] + fn _vst4q_lane_s32( + ptr: *mut i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v2f32.p0" + )] + fn _vst4_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4f32.p0" + )] + fn _vst4q_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v8i8.p0" + )] + fn _vst4_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8); + } + _vst4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4i16.p0" + )] + fn _vst4_lane_s16( + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v8i16.p0" + )] + fn _vst4q_lane_s16( + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v2i32.p0" + )] + fn _vst4_lane_s32( + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4i32.p0" + )] + fn _vst4q_lane_s32( + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_lane_u8(a: *mut u8, b: uint8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4_lane_s8::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_lane_u16(a: *mut u16, b: uint16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + vst4_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_lane_u16(a: *mut u16, b: uint16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4q_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_lane_u32(a: *mut u32, b: uint32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + vst4_lane_s32::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_lane_u32(a: *mut u32, b: uint32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + vst4q_lane_s32::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_lane_p8(a: *mut p8, b: poly8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4_lane_s8::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_lane_p16(a: *mut p16, b: poly16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + vst4_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_lane_p16(a: *mut p16, b: poly16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4q_lane_s16::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) { + vst4_s64(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v1i64")] + fn _vst4_s64( + ptr: *mut i8, + a: int64x1_t, + b: int64x1_t, + c: int64x1_t, + d: int64x1_t, + size: i32, + ); + } + _vst4_s64(a as _, b.0, b.1, b.2, b.3, 8) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v1i64.p0" + )] + fn _vst4_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8); + } + _vst4_s64(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) { + vst4_s64(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) { + vst4_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) { + vst4q_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) { + vst4_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) { + vst4q_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) { + vst4_s32(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) { + vst4q_s32(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) { + vst4_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) { + vst4q_s8(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) { + vst4_s16(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st4) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) { + vst4q_s16(transmute(a), transmute(b)) +} +#[doc = "Store SIMD&FP register (immediate offset)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstrq_p128)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vstrq_p128(a: *mut p128, b: p128) { + *a = b +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let d: int8x8_t = vsubhn_s16(b, c); + unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let d: int16x4_t = vsubhn_s32(b, c); + unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let d: int32x2_t = vsubhn_s64(b, c); + unsafe { simd_shuffle!(a, d, [0, 1, 2, 3]) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + let d: uint8x8_t = vsubhn_u16(b, c); + unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + let d: uint16x4_t = vsubhn_u32(b, c); + unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + let d: uint32x2_t = vsubhn_u64(b, c); + unsafe { simd_shuffle!(a, d, [0, 1, 2, 3]) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + let c: i32x4 = i32x4::new(16, 16, 16, 16); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + let c: i64x2 = i64x2::new(32, 32); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + let c: u32x4 = u32x4::new(16, 16, 16, 16); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +} +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + let c: u64x2 = u64x2::new(32, 32); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +} +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + unsafe { + let c: int16x8_t = simd_cast(a); + let d: int16x8_t = simd_cast(b); + simd_sub(c, d) + } +} +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe { + let c: int32x4_t = simd_cast(a); + let d: int32x4_t = simd_cast(b); + simd_sub(c, d) + } +} +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe { + let c: int64x2_t = simd_cast(a); + let d: int64x2_t = simd_cast(b); + simd_sub(c, d) + } +} +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { + let c: uint16x8_t = simd_cast(a); + let d: uint16x8_t = simd_cast(b); + simd_sub(c, d) + } +} +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { + let c: uint32x4_t = simd_cast(a); + let d: uint32x4_t = simd_cast(b); + simd_sub(c, d) + } +} +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { + let c: uint64x2_t = simd_cast(a); + let d: uint64x2_t = simd_cast(b); + simd_sub(c, d) + } +} +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { + unsafe { simd_sub(a, simd_cast(b)) } +} +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { + unsafe { simd_sub(a, simd_cast(b)) } +} +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { + unsafe { simd_sub(a, simd_cast(b)) } +} +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { simd_sub(a, simd_cast(b)) } +} +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { simd_sub(a, simd_cast(b)) } +} +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { simd_sub(a, simd_cast(b)) } +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + vusdot_s32(a, transmute(c), b) + } +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vusdotq_s32(a, transmute(c), b) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl1")] + fn _vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vtbl1(a, b) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vtbl1(a, b) +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vtbl1(transmute(a), transmute(b))) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vtbl1(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vtbl1(transmute(a), transmute(b))) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(vtbl1(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl2")] + fn _vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vtbl2(a, b, c) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { + vtbl2(a.0, a.1, b) +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { + let mut a: uint8x8x2_t = a; + a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { + let mut a: poly8x8x2_t = a; + a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl3")] + fn _vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; + } + unsafe { _vtbl3(a, b, c, d) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { + vtbl3(a.0, a.1, a.2, b) +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { + let mut a: uint8x8x3_t = a; + a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { + let mut a: poly8x8x3_t = a; + a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl4")] + fn _vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; + } + unsafe { _vtbl4(a, b, c, d, e) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { + vtbl4(a.0, a.1, a.2, a.3, b) +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { + let mut a: uint8x8x4_t = a; + a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.3 = unsafe { simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { + let mut a: poly8x8x4_t = a; + a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + a.3 = unsafe { simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx1")] + fn _vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vtbx1(a, b, c) } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + vtbx1(a, b, c) +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let b: poly8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx2")] + fn _vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; + } + unsafe { _vtbx2(a, b, c, d) } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { + vtbx2(a, b.0, b.1, c) +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { + let mut b: uint8x8x2_t = b; + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { + let mut b: poly8x8x2_t = b; + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx3")] + fn _vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; + } + unsafe { _vtbx3(a, b, c, d, e) } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { + vtbx3(a, b.0, b.1, b.2, c) +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { + let mut b: uint8x8x3_t = b; + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { + let mut b: poly8x8x3_t = b; + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t, f: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx4")] + fn _vtbx4( + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + e: int8x8_t, + f: int8x8_t, + ) -> int8x8_t; + } + unsafe { _vtbx4(a, b, c, d, e, f) } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + unsafe { + vtbx4( + a, + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + ) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + let mut b: int8x8x4_t = b; + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.3 = unsafe { simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = vtbx4( + a, + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { + let mut b: uint8x8x4_t = b; + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.3 = unsafe { simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { + let mut b: poly8x8x4_t = b; + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; + b.3 = unsafe { simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + unsafe { + let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + unsafe { + let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + unsafe { + let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + unsafe { + let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + unsafe { + let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { + unsafe { + let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { + unsafe { + let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { + unsafe { + let a1: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: int8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { + unsafe { + let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + unsafe { + let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + unsafe { + let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { + unsafe { + let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { + unsafe { + let a1: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: uint8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { + unsafe { + let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { + unsafe { + let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { + unsafe { + let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { + unsafe { + let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { + unsafe { + let a1: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: poly8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + unsafe { + let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + unsafe { + let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe { + let c: int8x8_t = simd_and(a, b); + let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe { + let c: int8x16_t = simd_and(a, b); + let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe { + let c: int16x4_t = simd_and(a, b); + let d: i16x4 = i16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe { + let c: int16x8_t = simd_and(a, b); + let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe { + let c: int32x2_t = simd_and(a, b); + let d: i32x2 = i32x2::new(0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe { + let c: int32x4_t = simd_and(a, b); + let d: i32x4 = i32x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { + unsafe { + let c: poly8x8_t = simd_and(a, b); + let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { + unsafe { + let c: poly8x16_t = simd_and(a, b); + let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t { + unsafe { + let c: poly16x4_t = simd_and(a, b); + let d: i16x4 = i16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t { + unsafe { + let c: poly16x8_t = simd_and(a, b); + let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let c: uint8x8_t = simd_and(a, b); + let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let c: uint8x16_t = simd_and(a, b); + let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let c: uint16x4_t = simd_and(a, b); + let d: u16x4 = u16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let c: uint16x8_t = simd_and(a, b); + let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let c: uint32x2_t = simd_and(a, b); + let d: u32x2 = u32x2::new(0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let c: uint32x4_t = simd_and(a, b); + let d: u32x4 = u32x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + vusdot_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vusdotq_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product vector form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")] + fn _vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t; + } + unsafe { _vusdot_s32(a, b, c) } +} +#[doc = "Dot product vector form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")] + fn _vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { _vusdotq_s32(a, b, c) } +} +#[doc = "Unsigned and signed 8-bit integer matrix multiply-accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusmmlaq_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usmmla) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")] + fn _vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { _vusmmlaq_s32(a, b, c) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + unsafe { + let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + unsafe { + let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + unsafe { + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + unsafe { + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + unsafe { + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { + unsafe { + let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { + unsafe { + let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { + unsafe { + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { + unsafe { + let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + unsafe { + let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + unsafe { + let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { + unsafe { + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { + unsafe { + let a0: uint8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: uint8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { + unsafe { + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { + unsafe { + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { + unsafe { + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { + unsafe { + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { + unsafe { + let a0: poly8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: poly8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + unsafe { + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + unsafe { + let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + unsafe { + let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + unsafe { + let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + unsafe { + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + unsafe { + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + unsafe { + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { + unsafe { + let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { + unsafe { + let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { + unsafe { + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { + unsafe { + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { + unsafe { + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + unsafe { + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { + unsafe { + let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { + unsafe { + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + unsafe { + let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + unsafe { + let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { + unsafe { + let a0: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: uint8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { + unsafe { + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { + unsafe { + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { + unsafe { + let a0: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: poly8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + transmute((a0, b0)) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + unsafe { + let a0: poly16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) + } +} diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs new file mode 100644 index 000000000000..bdf511ecf881 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs @@ -0,0 +1,206 @@ +//! Tests for ARM+v7+neon load (vld1) intrinsics. +//! +//! These are included in `{arm, aarch64}::neon`. + +use super::*; + +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +use crate::core_arch::aarch64::*; + +use crate::core_arch::simd::*; +use std::mem; +use stdarch_test::simd_test; +#[simd_test(enable = "neon")] +unsafe fn test_vld1_s8() { + let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_s8() { + let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_s16() { + let a: [i16; 5] = [0, 1, 2, 3, 4]; + let e = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_s16() { + let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_s32() { + let a: [i32; 3] = [0, 1, 2]; + let e = i32x2::new(1, 2); + let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_s32() { + let a: [i32; 5] = [0, 1, 2, 3, 4]; + let e = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_s64() { + let a: [i64; 2] = [0, 1]; + let e = i64x1::new(1); + let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_s64() { + let a: [i64; 3] = [0, 1, 2]; + let e = i64x2::new(1, 2); + let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_u8() { + let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_u8() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_u16() { + let a: [u16; 5] = [0, 1, 2, 3, 4]; + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_u16() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_u32() { + let a: [u32; 3] = [0, 1, 2]; + let e = u32x2::new(1, 2); + let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_u32() { + let a: [u32; 5] = [0, 1, 2, 3, 4]; + let e = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_u64() { + let a: [u64; 2] = [0, 1]; + let e = u64x1::new(1); + let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_u64() { + let a: [u64; 3] = [0, 1, 2]; + let e = u64x2::new(1, 2); + let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_p8() { + let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_p8() { + let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_p16() { + let a: [p16; 5] = [0, 1, 2, 3, 4]; + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_p16() { + let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon,aes")] +unsafe fn test_vld1_p64() { + let a: [p64; 2] = [0, 1]; + let e = u64x1::new(1); + let r: u64x1 = transmute(vld1_p64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon,aes")] +unsafe fn test_vld1q_p64() { + let a: [p64; 3] = [0, 1, 2]; + let e = u64x2::new(1, 2); + let r: u64x2 = transmute(vld1q_p64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_f32() { + let a: [f32; 3] = [0., 1., 2.]; + let e = f32x2::new(1., 2.); + let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_f32() { + let a: [f32; 5] = [0., 1., 2., 3., 4.]; + let e = f32x4::new(1., 2., 3., 4.); + let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr())); + assert_eq!(r, e) +} diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs new file mode 100644 index 000000000000..0683d48ed327 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs @@ -0,0 +1,5531 @@ +//! ARMv7 NEON intrinsics + +#[rustfmt::skip] +mod generated; +#[rustfmt::skip] +#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))] +#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] +pub use self::generated::*; + +use crate::{core_arch::simd::*, hint::unreachable_unchecked, intrinsics::simd::*, mem::transmute}; +#[cfg(test)] +use stdarch_test::assert_instr; + +pub(crate) trait AsUnsigned { + type Unsigned; + fn as_unsigned(self) -> Self::Unsigned; +} + +pub(crate) trait AsSigned { + type Signed; + fn as_signed(self) -> Self::Signed; +} + +macro_rules! impl_sign_conversions_neon { + ($(($signed:ty, $unsigned:ty))*) => ($( + impl AsUnsigned for $signed { + type Unsigned = $unsigned; + + #[inline(always)] + fn as_unsigned(self) -> $unsigned { + unsafe { transmute(self) } + } + } + + impl AsSigned for $unsigned { + type Signed = $signed; + + #[inline(always)] + fn as_signed(self) -> $signed { + unsafe { transmute(self) } + } + } + )*) +} + +pub(crate) type p8 = u8; +pub(crate) type p16 = u16; +pub(crate) type p64 = u64; +pub(crate) type p128 = u128; + +types! { + #![cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))] + #![cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] + + /// Arm-specific 64-bit wide vector of eight packed `i8`. + pub struct int8x8_t(8 x pub(crate) i8); + /// Arm-specific 64-bit wide vector of eight packed `u8`. + pub struct uint8x8_t(8 x pub(crate) u8); + /// Arm-specific 64-bit wide polynomial vector of eight packed `p8`. + pub struct poly8x8_t(8 x pub(crate) p8); + /// Arm-specific 64-bit wide vector of four packed `i16`. + pub struct int16x4_t(4 x pub(crate) i16); + /// Arm-specific 64-bit wide vector of four packed `u16`. + pub struct uint16x4_t(4 x pub(crate) u16); + /// Arm-specific 64-bit wide vector of four packed `p16`. + pub struct poly16x4_t(4 x pub(crate) p16); + /// Arm-specific 64-bit wide vector of two packed `i32`. + pub struct int32x2_t(2 x pub(crate) i32); + /// Arm-specific 64-bit wide vector of two packed `u32`. + pub struct uint32x2_t(2 x pub(crate) u32); + /// Arm-specific 64-bit wide vector of two packed `f32`. + pub struct float32x2_t(2 x pub(crate) f32); + /// Arm-specific 64-bit wide vector of one packed `i64`. + pub struct int64x1_t(1 x pub(crate) i64); + /// Arm-specific 64-bit wide vector of one packed `u64`. + pub struct uint64x1_t(1 x pub(crate) u64); + /// Arm-specific 64-bit wide vector of one packed `p64`. + pub struct poly64x1_t(1 x pub(crate) p64); + + /// Arm-specific 128-bit wide vector of sixteen packed `i8`. + pub struct int8x16_t(16 x pub(crate) i8); + /// Arm-specific 128-bit wide vector of sixteen packed `u8`. + pub struct uint8x16_t(16 x pub(crate) u8); + /// Arm-specific 128-bit wide vector of sixteen packed `p8`. + pub struct poly8x16_t(16 x pub(crate) p8); + /// Arm-specific 128-bit wide vector of eight packed `i16`. + pub struct int16x8_t(8 x pub(crate) i16); + /// Arm-specific 128-bit wide vector of eight packed `u16`. + pub struct uint16x8_t(8 x pub(crate) u16); + /// Arm-specific 128-bit wide vector of eight packed `p16`. + pub struct poly16x8_t(8 x pub(crate) p16); + /// Arm-specific 128-bit wide vector of four packed `i32`. + pub struct int32x4_t(4 x pub(crate) i32); + /// Arm-specific 128-bit wide vector of four packed `u32`. + pub struct uint32x4_t(4 x pub(crate) u32); + /// Arm-specific 128-bit wide vector of four packed `f32`. + pub struct float32x4_t(4 x pub(crate) f32); + /// Arm-specific 128-bit wide vector of two packed `i64`. + pub struct int64x2_t(2 x pub(crate) i64); + /// Arm-specific 128-bit wide vector of two packed `u64`. + pub struct uint64x2_t(2 x pub(crate) u64); + /// Arm-specific 128-bit wide vector of two packed `p64`. + pub struct poly64x2_t(2 x pub(crate) p64); +} + +types! { + #![unstable(feature = "stdarch_neon_f16", issue = "136306")] + + /// Arm-specific 64-bit wide vector of four packed `f16`. + pub struct float16x4_t(4 x pub(crate) f16); + /// Arm-specific 128-bit wide vector of eight packed `f16`. + pub struct float16x8_t(8 x pub(crate) f16); +} + +/// Arm-specific type containing two `int8x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int8x8x2_t(pub int8x8_t, pub int8x8_t); +/// Arm-specific type containing three `int8x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t); +/// Arm-specific type containing four `int8x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t); + +/// Arm-specific type containing two `int8x16_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t); +/// Arm-specific type containing three `int8x16_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t); +/// Arm-specific type containing four `int8x16_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t); + +/// Arm-specific type containing two `uint8x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t); +/// Arm-specific type containing three `uint8x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); +/// Arm-specific type containing four `uint8x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); + +/// Arm-specific type containing two `uint8x16_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t); +/// Arm-specific type containing three `uint8x16_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t); +/// Arm-specific type containing four `uint8x16_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint8x16x4_t( + pub uint8x16_t, + pub uint8x16_t, + pub uint8x16_t, + pub uint8x16_t, +); + +/// Arm-specific type containing two `poly8x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t); +/// Arm-specific type containing three `poly8x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); +/// Arm-specific type containing four `poly8x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); + +/// Arm-specific type containing two `poly8x16_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t); +/// Arm-specific type containing three `poly8x16_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t); +/// Arm-specific type containing four `poly8x16_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly8x16x4_t( + pub poly8x16_t, + pub poly8x16_t, + pub poly8x16_t, + pub poly8x16_t, +); + +/// Arm-specific type containing two `int16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t); +/// Arm-specific type containing three `int16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int16x4x3_t(pub int16x4_t, pub int16x4_t, pub int16x4_t); +/// Arm-specific type containing four `int16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int16x4x4_t(pub int16x4_t, pub int16x4_t, pub int16x4_t, pub int16x4_t); + +/// Arm-specific type containing two `int16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int16x8x2_t(pub int16x8_t, pub int16x8_t); +/// Arm-specific type containing three `int16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int16x8x3_t(pub int16x8_t, pub int16x8_t, pub int16x8_t); +/// Arm-specific type containing four `int16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int16x8x4_t(pub int16x8_t, pub int16x8_t, pub int16x8_t, pub int16x8_t); + +/// Arm-specific type containing two `uint16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint16x4x2_t(pub uint16x4_t, pub uint16x4_t); +/// Arm-specific type containing three `uint16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint16x4x3_t(pub uint16x4_t, pub uint16x4_t, pub uint16x4_t); +/// Arm-specific type containing four `uint16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint16x4x4_t( + pub uint16x4_t, + pub uint16x4_t, + pub uint16x4_t, + pub uint16x4_t, +); + +/// Arm-specific type containing two `uint16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint16x8x2_t(pub uint16x8_t, pub uint16x8_t); +/// Arm-specific type containing three `uint16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint16x8x3_t(pub uint16x8_t, pub uint16x8_t, pub uint16x8_t); +/// Arm-specific type containing four `uint16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint16x8x4_t( + pub uint16x8_t, + pub uint16x8_t, + pub uint16x8_t, + pub uint16x8_t, +); + +/// Arm-specific type containing two `poly16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly16x4x2_t(pub poly16x4_t, pub poly16x4_t); +/// Arm-specific type containing three `poly16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly16x4x3_t(pub poly16x4_t, pub poly16x4_t, pub poly16x4_t); +/// Arm-specific type containing four `poly16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly16x4x4_t( + pub poly16x4_t, + pub poly16x4_t, + pub poly16x4_t, + pub poly16x4_t, +); + +/// Arm-specific type containing two `poly16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly16x8x2_t(pub poly16x8_t, pub poly16x8_t); +/// Arm-specific type containing three `poly16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly16x8x3_t(pub poly16x8_t, pub poly16x8_t, pub poly16x8_t); +/// Arm-specific type containing four `poly16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly16x8x4_t( + pub poly16x8_t, + pub poly16x8_t, + pub poly16x8_t, + pub poly16x8_t, +); + +/// Arm-specific type containing two `int32x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int32x2x2_t(pub int32x2_t, pub int32x2_t); +/// Arm-specific type containing three `int32x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int32x2x3_t(pub int32x2_t, pub int32x2_t, pub int32x2_t); +/// Arm-specific type containing four `int32x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int32x2x4_t(pub int32x2_t, pub int32x2_t, pub int32x2_t, pub int32x2_t); + +/// Arm-specific type containing two `int32x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int32x4x2_t(pub int32x4_t, pub int32x4_t); +/// Arm-specific type containing three `int32x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int32x4x3_t(pub int32x4_t, pub int32x4_t, pub int32x4_t); +/// Arm-specific type containing four `int32x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int32x4x4_t(pub int32x4_t, pub int32x4_t, pub int32x4_t, pub int32x4_t); + +/// Arm-specific type containing two `uint32x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint32x2x2_t(pub uint32x2_t, pub uint32x2_t); +/// Arm-specific type containing three `uint32x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint32x2x3_t(pub uint32x2_t, pub uint32x2_t, pub uint32x2_t); +/// Arm-specific type containing four `uint32x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint32x2x4_t( + pub uint32x2_t, + pub uint32x2_t, + pub uint32x2_t, + pub uint32x2_t, +); + +/// Arm-specific type containing two `uint32x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint32x4x2_t(pub uint32x4_t, pub uint32x4_t); +/// Arm-specific type containing three `uint32x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint32x4x3_t(pub uint32x4_t, pub uint32x4_t, pub uint32x4_t); +/// Arm-specific type containing four `uint32x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint32x4x4_t( + pub uint32x4_t, + pub uint32x4_t, + pub uint32x4_t, + pub uint32x4_t, +); + +/// Arm-specific type containing two `float16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x4x2_t(pub float16x4_t, pub float16x4_t); + +/// Arm-specific type containing three `float16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x4x3_t(pub float16x4_t, pub float16x4_t, pub float16x4_t); + +/// Arm-specific type containing four `float16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x4x4_t( + pub float16x4_t, + pub float16x4_t, + pub float16x4_t, + pub float16x4_t, +); + +/// Arm-specific type containing two `float16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x8x2_t(pub float16x8_t, pub float16x8_t); + +/// Arm-specific type containing three `float16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] + +pub struct float16x8x3_t(pub float16x8_t, pub float16x8_t, pub float16x8_t); +/// Arm-specific type containing four `float16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x8x4_t( + pub float16x8_t, + pub float16x8_t, + pub float16x8_t, + pub float16x8_t, +); + +/// Arm-specific type containing two `float32x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct float32x2x2_t(pub float32x2_t, pub float32x2_t); +/// Arm-specific type containing three `float32x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct float32x2x3_t(pub float32x2_t, pub float32x2_t, pub float32x2_t); +/// Arm-specific type containing four `float32x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct float32x2x4_t( + pub float32x2_t, + pub float32x2_t, + pub float32x2_t, + pub float32x2_t, +); + +/// Arm-specific type containing two `float32x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct float32x4x2_t(pub float32x4_t, pub float32x4_t); +/// Arm-specific type containing three `float32x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct float32x4x3_t(pub float32x4_t, pub float32x4_t, pub float32x4_t); +/// Arm-specific type containing four `float32x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct float32x4x4_t( + pub float32x4_t, + pub float32x4_t, + pub float32x4_t, + pub float32x4_t, +); + +/// Arm-specific type containing two `int64x1_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int64x1x2_t(pub int64x1_t, pub int64x1_t); +/// Arm-specific type containing three `int64x1_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int64x1x3_t(pub int64x1_t, pub int64x1_t, pub int64x1_t); +/// Arm-specific type containing four `int64x1_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int64x1x4_t(pub int64x1_t, pub int64x1_t, pub int64x1_t, pub int64x1_t); + +/// Arm-specific type containing two `int64x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int64x2x2_t(pub int64x2_t, pub int64x2_t); +/// Arm-specific type containing three `int64x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int64x2x3_t(pub int64x2_t, pub int64x2_t, pub int64x2_t); +/// Arm-specific type containing four `int64x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct int64x2x4_t(pub int64x2_t, pub int64x2_t, pub int64x2_t, pub int64x2_t); + +/// Arm-specific type containing two `uint64x1_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint64x1x2_t(pub uint64x1_t, pub uint64x1_t); +/// Arm-specific type containing three `uint64x1_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint64x1x3_t(pub uint64x1_t, pub uint64x1_t, pub uint64x1_t); +/// Arm-specific type containing four `uint64x1_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint64x1x4_t( + pub uint64x1_t, + pub uint64x1_t, + pub uint64x1_t, + pub uint64x1_t, +); + +/// Arm-specific type containing two `uint64x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint64x2x2_t(pub uint64x2_t, pub uint64x2_t); +/// Arm-specific type containing three `uint64x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint64x2x3_t(pub uint64x2_t, pub uint64x2_t, pub uint64x2_t); +/// Arm-specific type containing four `uint64x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct uint64x2x4_t( + pub uint64x2_t, + pub uint64x2_t, + pub uint64x2_t, + pub uint64x2_t, +); + +/// Arm-specific type containing two `poly64x1_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly64x1x2_t(pub poly64x1_t, pub poly64x1_t); +/// Arm-specific type containing three `poly64x1_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly64x1x3_t(pub poly64x1_t, pub poly64x1_t, pub poly64x1_t); +/// Arm-specific type containing four `poly64x1_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly64x1x4_t( + pub poly64x1_t, + pub poly64x1_t, + pub poly64x1_t, + pub poly64x1_t, +); + +/// Arm-specific type containing two `poly64x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly64x2x2_t(pub poly64x2_t, pub poly64x2_t); +/// Arm-specific type containing three `poly64x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly64x2x3_t(pub poly64x2_t, pub poly64x2_t, pub poly64x2_t); +/// Arm-specific type containing four `poly64x2_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub struct poly64x2x4_t( + pub poly64x2_t, + pub poly64x2_t, + pub poly64x2_t, + pub poly64x2_t, +); + +impl_sign_conversions_neon! { + (i8, u8) + (i16, u16) + (i32, u32) + (i64, u64) + (*const i8, *const u8) + (*const i16, *const u16) + (*const i32, *const u32) + (*const i64, *const u64) + (*mut i8, *mut u8) + (*mut i16, *mut u16) + (*mut i32, *mut u32) + (*mut i64, *mut u64) + (int16x4_t, uint16x4_t) + (int16x8_t, uint16x8_t) + (int32x2_t, uint32x2_t) + (int32x4_t, uint32x4_t) + (int64x1_t, uint64x1_t) + (int64x2_t, uint64x2_t) + (int8x16_t, uint8x16_t) + (int8x8_t, uint8x8_t) + (uint16x4_t, int16x4_t) + (uint16x8_t, int16x8_t) + (uint32x2_t, int32x2_t) + (uint32x4_t, int32x4_t) + (uint64x1_t, int64x1_t) + (uint64x2_t, int64x2_t) + (uint8x16_t, int8x16_t) + (uint8x8_t, int8x8_t) + (int16x4x2_t, uint16x4x2_t) + (int16x4x3_t, uint16x4x3_t) + (int16x4x4_t, uint16x4x4_t) + (int16x8x2_t, uint16x8x2_t) + (int16x8x3_t, uint16x8x3_t) + (int16x8x4_t, uint16x8x4_t) + (int32x2x2_t, uint32x2x2_t) + (int32x2x3_t, uint32x2x3_t) + (int32x2x4_t, uint32x2x4_t) + (int32x4x2_t, uint32x4x2_t) + (int32x4x3_t, uint32x4x3_t) + (int32x4x4_t, uint32x4x4_t) + (int64x1x2_t, uint64x1x2_t) + (int64x1x3_t, uint64x1x3_t) + (int64x1x4_t, uint64x1x4_t) + (int64x2x2_t, uint64x2x2_t) + (int64x2x3_t, uint64x2x3_t) + (int64x2x4_t, uint64x2x4_t) + (int8x16x2_t, uint8x16x2_t) + (int8x16x3_t, uint8x16x3_t) + (int8x16x4_t, uint8x16x4_t) + (int8x8x2_t, uint8x8x2_t) + (int8x8x3_t, uint8x8x3_t) + (int8x8x4_t, uint8x8x4_t) + (uint16x4x2_t, int16x4x2_t) + (uint16x4x3_t, int16x4x3_t) + (uint16x4x4_t, int16x4x4_t) + (uint16x8x2_t, int16x8x2_t) + (uint16x8x3_t, int16x8x3_t) + (uint16x8x4_t, int16x8x4_t) + (uint32x2x2_t, int32x2x2_t) + (uint32x2x3_t, int32x2x3_t) + (uint32x2x4_t, int32x2x4_t) + (uint32x4x2_t, int32x4x2_t) + (uint32x4x3_t, int32x4x3_t) + (uint32x4x4_t, int32x4x4_t) + (uint64x1x2_t, int64x1x2_t) + (uint64x1x3_t, int64x1x3_t) + (uint64x1x4_t, int64x1x4_t) + (uint64x2x2_t, int64x2x2_t) + (uint64x2x3_t, int64x2x3_t) + (uint64x2x4_t, int64x2x4_t) + (uint8x16x2_t, int8x16x2_t) + (uint8x16x3_t, int8x16x3_t) + (uint8x16x4_t, int8x16x4_t) + (uint8x8x2_t, int8x8x2_t) + (uint8x8x3_t, int8x8x3_t) + (uint8x8x4_t, int8x8x4_t) +} + +#[cfg(test)] +mod tests { + use super::*; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + use crate::core_arch::aarch64::*; + #[cfg(target_arch = "arm")] + use crate::core_arch::arm::*; + use crate::core_arch::arm_shared::test_support::*; + use crate::core_arch::simd::*; + use std::{mem::transmute, vec::Vec}; + use stdarch_test::simd_test; + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: i8 = 42; + let e = i8x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: i8x8 = transmute(vld1_lane_s8::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let elem: i8 = 42; + let e = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42); + let r: i8x16 = transmute(vld1q_lane_s8::<15>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_s16() { + let a = i16x4::new(0, 1, 2, 3); + let elem: i16 = 42; + let e = i16x4::new(0, 1, 2, 42); + let r: i16x4 = transmute(vld1_lane_s16::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: i16 = 42; + let e = i16x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: i16x8 = transmute(vld1q_lane_s16::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_s32() { + let a = i32x2::new(0, 1); + let elem: i32 = 42; + let e = i32x2::new(0, 42); + let r: i32x2 = transmute(vld1_lane_s32::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_s32() { + let a = i32x4::new(0, 1, 2, 3); + let elem: i32 = 42; + let e = i32x4::new(0, 1, 2, 42); + let r: i32x4 = transmute(vld1q_lane_s32::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_s64() { + let a = i64x1::new(0); + let elem: i64 = 42; + let e = i64x1::new(42); + let r: i64x1 = transmute(vld1_lane_s64::<0>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_s64() { + let a = i64x2::new(0, 1); + let elem: i64 = 42; + let e = i64x2::new(0, 42); + let r: i64x2 = transmute(vld1q_lane_s64::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: u8 = 42; + let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: u8x8 = transmute(vld1_lane_u8::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let elem: u8 = 42; + let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42); + let r: u8x16 = transmute(vld1q_lane_u8::<15>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_u16() { + let a = u16x4::new(0, 1, 2, 3); + let elem: u16 = 42; + let e = u16x4::new(0, 1, 2, 42); + let r: u16x4 = transmute(vld1_lane_u16::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: u16 = 42; + let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: u16x8 = transmute(vld1q_lane_u16::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_u32() { + let a = u32x2::new(0, 1); + let elem: u32 = 42; + let e = u32x2::new(0, 42); + let r: u32x2 = transmute(vld1_lane_u32::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_u32() { + let a = u32x4::new(0, 1, 2, 3); + let elem: u32 = 42; + let e = u32x4::new(0, 1, 2, 42); + let r: u32x4 = transmute(vld1q_lane_u32::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_u64() { + let a = u64x1::new(0); + let elem: u64 = 42; + let e = u64x1::new(42); + let r: u64x1 = transmute(vld1_lane_u64::<0>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_u64() { + let a = u64x2::new(0, 1); + let elem: u64 = 42; + let e = u64x2::new(0, 42); + let r: u64x2 = transmute(vld1q_lane_u64::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_p8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: p8 = 42; + let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: u8x8 = transmute(vld1_lane_p8::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let elem: p8 = 42; + let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42); + let r: u8x16 = transmute(vld1q_lane_p8::<15>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_p16() { + let a = u16x4::new(0, 1, 2, 3); + let elem: p16 = 42; + let e = u16x4::new(0, 1, 2, 42); + let r: u16x4 = transmute(vld1_lane_p16::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_p16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: p16 = 42; + let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: u16x8 = transmute(vld1q_lane_p16::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1_lane_p64() { + let a = u64x1::new(0); + let elem: u64 = 42; + let e = u64x1::new(42); + let r: u64x1 = transmute(vld1_lane_p64::<0>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1q_lane_p64() { + let a = u64x2::new(0, 1); + let elem: u64 = 42; + let e = u64x2::new(0, 42); + let r: u64x2 = transmute(vld1q_lane_p64::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_f32() { + let a = f32x2::new(0., 1.); + let elem: f32 = 42.; + let e = f32x2::new(0., 42.); + let r: f32x2 = transmute(vld1_lane_f32::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_f32() { + let a = f32x4::new(0., 1., 2., 3.); + let elem: f32 = 42.; + let e = f32x4::new(0., 1., 2., 42.); + let r: f32x4 = transmute(vld1q_lane_f32::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_s8() { + let elem: i8 = 42; + let e = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: i8x8 = transmute(vld1_dup_s8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_s8() { + let elem: i8 = 42; + let e = i8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: i8x16 = transmute(vld1q_dup_s8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_s16() { + let elem: i16 = 42; + let e = i16x4::new(42, 42, 42, 42); + let r: i16x4 = transmute(vld1_dup_s16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_s16() { + let elem: i16 = 42; + let e = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: i16x8 = transmute(vld1q_dup_s16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_s32() { + let elem: i32 = 42; + let e = i32x2::new(42, 42); + let r: i32x2 = transmute(vld1_dup_s32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_s32() { + let elem: i32 = 42; + let e = i32x4::new(42, 42, 42, 42); + let r: i32x4 = transmute(vld1q_dup_s32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_s64() { + let elem: i64 = 42; + let e = i64x1::new(42); + let r: i64x1 = transmute(vld1_dup_s64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_s64() { + let elem: i64 = 42; + let e = i64x2::new(42, 42); + let r: i64x2 = transmute(vld1q_dup_s64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_u8() { + let elem: u8 = 42; + let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: u8x8 = transmute(vld1_dup_u8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_u8() { + let elem: u8 = 42; + let e = u8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: u8x16 = transmute(vld1q_dup_u8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_u16() { + let elem: u16 = 42; + let e = u16x4::new(42, 42, 42, 42); + let r: u16x4 = transmute(vld1_dup_u16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_u16() { + let elem: u16 = 42; + let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: u16x8 = transmute(vld1q_dup_u16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_u32() { + let elem: u32 = 42; + let e = u32x2::new(42, 42); + let r: u32x2 = transmute(vld1_dup_u32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_u32() { + let elem: u32 = 42; + let e = u32x4::new(42, 42, 42, 42); + let r: u32x4 = transmute(vld1q_dup_u32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_u64() { + let elem: u64 = 42; + let e = u64x1::new(42); + let r: u64x1 = transmute(vld1_dup_u64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_u64() { + let elem: u64 = 42; + let e = u64x2::new(42, 42); + let r: u64x2 = transmute(vld1q_dup_u64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_p8() { + let elem: p8 = 42; + let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: u8x8 = transmute(vld1_dup_p8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_p8() { + let elem: p8 = 42; + let e = u8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: u8x16 = transmute(vld1q_dup_p8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_p16() { + let elem: p16 = 42; + let e = u16x4::new(42, 42, 42, 42); + let r: u16x4 = transmute(vld1_dup_p16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_p16() { + let elem: p16 = 42; + let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: u16x8 = transmute(vld1q_dup_p16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1_dup_p64() { + let elem: u64 = 42; + let e = u64x1::new(42); + let r: u64x1 = transmute(vld1_dup_p64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1q_dup_p64() { + let elem: u64 = 42; + let e = u64x2::new(42, 42); + let r: u64x2 = transmute(vld1q_dup_p64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_f32() { + let elem: f32 = 42.; + let e = f32x2::new(42., 42.); + let r: f32x2 = transmute(vld1_dup_f32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_f32() { + let elem: f32 = 42.; + let e = f32x4::new(42., 42., 42., 42.); + let r: f32x4 = transmute(vld1q_dup_f32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u8() { + let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r = vget_lane_u8::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u32() { + let v = i32x4::new(1, 2, 3, 4); + let r = vgetq_lane_u32::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_s32() { + let v = i32x4::new(1, 2, 3, 4); + let r = vgetq_lane_s32::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u64() { + let v: u64 = 1; + let r = vget_lane_u64::<0>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u16() { + let v = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r = vgetq_lane_u16::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_s8() { + let v = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = vget_lane_s8::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_s8::<4>(transmute(v)); + assert_eq!(r, 4); + let r = vget_lane_s8::<5>(transmute(v)); + assert_eq!(r, 5); + } + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_p8() { + let v = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = vget_lane_p8::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_p8::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vget_lane_p8::<5>(transmute(v)); + assert_eq!(r, 5); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_p16() { + let v = u16x4::new(0, 1, 2, 3); + let r = vget_lane_p16::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_p16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vget_lane_p16::<0>(transmute(v)); + assert_eq!(r, 0); + let r = vget_lane_p16::<1>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_s16() { + let v = i16x4::new(0, 1, 2, 3); + let r = vget_lane_s16::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_s16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vget_lane_s16::<0>(transmute(v)); + assert_eq!(r, 0); + let r = vget_lane_s16::<1>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u16() { + let v = u16x4::new(0, 1, 2, 3); + let r = vget_lane_u16::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_u16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vget_lane_u16::<0>(transmute(v)); + assert_eq!(r, 0); + let r = vget_lane_u16::<1>(transmute(v)); + assert_eq!(r, 1); + } + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_f32() { + let v = f32x2::new(0.0, 1.0); + let r = vget_lane_f32::<1>(transmute(v)); + assert_eq!(r, 1.0); + let r = vget_lane_f32::<0>(transmute(v)); + assert_eq!(r, 0.0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_s32() { + let v = i32x2::new(0, 1); + let r = vget_lane_s32::<1>(transmute(v)); + assert_eq!(r, 1); + let r = vget_lane_s32::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u32() { + let v = u32x2::new(0, 1); + let r = vget_lane_u32::<1>(transmute(v)); + assert_eq!(r, 1); + let r = vget_lane_u32::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_s64() { + let v = i64x1::new(1); + let r = vget_lane_s64::<0>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_p64() { + let v = u64x1::new(1); + let r = vget_lane_p64::<0>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_s8() { + let v = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = vgetq_lane_s8::<7>(transmute(v)); + assert_eq!(r, 7); + let r = vgetq_lane_s8::<13>(transmute(v)); + assert_eq!(r, 13); + let r = vgetq_lane_s8::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_s8::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_p8() { + let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = vgetq_lane_p8::<7>(transmute(v)); + assert_eq!(r, 7); + let r = vgetq_lane_p8::<13>(transmute(v)); + assert_eq!(r, 13); + let r = vgetq_lane_p8::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_p8::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u8() { + let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = vgetq_lane_u8::<7>(transmute(v)); + assert_eq!(r, 7); + let r = vgetq_lane_u8::<13>(transmute(v)); + assert_eq!(r, 13); + let r = vgetq_lane_u8::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_u8::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_s16() { + let v = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = vgetq_lane_s16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_s16::<6>(transmute(v)); + assert_eq!(r, 6); + let r = vgetq_lane_s16::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_p16() { + let v = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = vgetq_lane_p16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_p16::<7>(transmute(v)); + assert_eq!(r, 7); + let r = vgetq_lane_p16::<1>(transmute(v)); + assert_eq!(r, 1); + } + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_f32() { + let v = f32x4::new(0.0, 1.0, 2.0, 3.0); + let r = vgetq_lane_f32::<3>(transmute(v)); + assert_eq!(r, 3.0); + let r = vgetq_lane_f32::<0>(transmute(v)); + assert_eq!(r, 0.0); + let r = vgetq_lane_f32::<2>(transmute(v)); + assert_eq!(r, 2.0); + let r = vgetq_lane_f32::<1>(transmute(v)); + assert_eq!(r, 1.0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_s64() { + let v = i64x2::new(0, 1); + let r = vgetq_lane_s64::<1>(transmute(v)); + assert_eq!(r, 1); + let r = vgetq_lane_s64::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_p64() { + let v = u64x2::new(0, 1); + let r = vgetq_lane_p64::<1>(transmute(v)); + assert_eq!(r, 1); + let r = vgetq_lane_p64::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_s64() { + let a: i64x1 = i64x1::new(0); + let b: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vext_s64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_u64() { + let a: u64x1 = u64x1::new(0); + let b: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vext_u64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = i8x8::new(9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x8 = transmute(vget_high_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = i16x4::new(5, 6, 7, 8); + let r: i16x4 = transmute(vget_high_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_s32() { + let a = i32x4::new(1, 2, 3, 4); + let e = i32x2::new(3, 4); + let r: i32x2 = transmute(vget_high_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_s64() { + let a = i64x2::new(1, 2); + let e = i64x1::new(2); + let r: i64x1 = transmute(vget_high_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x8 = transmute(vget_high_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u16x4::new(5, 6, 7, 8); + let r: u16x4 = transmute(vget_high_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_u32() { + let a = u32x4::new(1, 2, 3, 4); + let e = u32x2::new(3, 4); + let r: u32x2 = transmute(vget_high_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_u64() { + let a = u64x2::new(1, 2); + let e = u64x1::new(2); + let r: u64x1 = transmute(vget_high_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_p8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x8 = transmute(vget_high_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_p16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u16x4::new(5, 6, 7, 8); + let r: u16x4 = transmute(vget_high_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_f32() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let e = f32x2::new(3.0, 4.0); + let r: f32x2 = transmute(vget_high_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vget_low_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vget_low_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_s32() { + let a = i32x4::new(1, 2, 3, 4); + let e = i32x2::new(1, 2); + let r: i32x2 = transmute(vget_low_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_s64() { + let a = i64x2::new(1, 2); + let e = i64x1::new(1); + let r: i64x1 = transmute(vget_low_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vget_low_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vget_low_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_u32() { + let a = u32x4::new(1, 2, 3, 4); + let e = u32x2::new(1, 2); + let r: u32x2 = transmute(vget_low_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_u64() { + let a = u64x2::new(1, 2); + let e = u64x1::new(1); + let r: u64x1 = transmute(vget_low_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_p8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vget_low_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_p16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vget_low_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_f32() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let e = f32x2::new(1.0, 2.0); + let r: f32x2 = transmute(vget_low_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_s8() { + let v: i8 = 42; + let e = i8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: i8x16 = transmute(vdupq_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_s16() { + let v: i16 = 64; + let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: i16x8 = transmute(vdupq_n_s16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_s32() { + let v: i32 = 64; + let e = i32x4::new(64, 64, 64, 64); + let r: i32x4 = transmute(vdupq_n_s32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_s64() { + let v: i64 = 64; + let e = i64x2::new(64, 64); + let r: i64x2 = transmute(vdupq_n_s64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_u8() { + let v: u8 = 64; + let e = u8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: u8x16 = transmute(vdupq_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_u16() { + let v: u16 = 64; + let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u16x8 = transmute(vdupq_n_u16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_u32() { + let v: u32 = 64; + let e = u32x4::new(64, 64, 64, 64); + let r: u32x4 = transmute(vdupq_n_u32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_u64() { + let v: u64 = 64; + let e = u64x2::new(64, 64); + let r: u64x2 = transmute(vdupq_n_u64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_p8() { + let v: p8 = 64; + let e = u8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: u8x16 = transmute(vdupq_n_p8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_p16() { + let v: p16 = 64; + let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u16x8 = transmute(vdupq_n_p16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_f32() { + let v: f32 = 64.0; + let e = f32x4::new(64.0, 64.0, 64.0, 64.0); + let r: f32x4 = transmute(vdupq_n_f32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_s8() { + let v: i8 = 64; + let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: i8x8 = transmute(vdup_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_s16() { + let v: i16 = 64; + let e = i16x4::new(64, 64, 64, 64); + let r: i16x4 = transmute(vdup_n_s16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_s32() { + let v: i32 = 64; + let e = i32x2::new(64, 64); + let r: i32x2 = transmute(vdup_n_s32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_s64() { + let v: i64 = 64; + let e = i64x1::new(64); + let r: i64x1 = transmute(vdup_n_s64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_u8() { + let v: u8 = 64; + let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u8x8 = transmute(vdup_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_u16() { + let v: u16 = 64; + let e = u16x4::new(64, 64, 64, 64); + let r: u16x4 = transmute(vdup_n_u16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_u32() { + let v: u32 = 64; + let e = u32x2::new(64, 64); + let r: u32x2 = transmute(vdup_n_u32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_u64() { + let v: u64 = 64; + let e = u64x1::new(64); + let r: u64x1 = transmute(vdup_n_u64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_p8() { + let v: p8 = 64; + let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u8x8 = transmute(vdup_n_p8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_p16() { + let v: p16 = 64; + let e = u16x4::new(64, 64, 64, 64); + let r: u16x4 = transmute(vdup_n_p16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_f32() { + let v: f32 = 64.0; + let e = f32x2::new(64.0, 64.0); + let r: f32x2 = transmute(vdup_n_f32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vldrq_p128() { + let v: [p128; 2] = [1, 2]; + let e: p128 = 2; + let r: p128 = vldrq_p128(v[1..].as_ptr()); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vstrq_p128() { + let v: [p128; 2] = [1, 2]; + let e: p128 = 2; + let mut r: p128 = 1; + vstrq_p128(&mut r, v[1]); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_s8() { + let v: i8 = 64; + let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: i8x8 = transmute(vmov_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_s16() { + let v: i16 = 64; + let e = i16x4::new(64, 64, 64, 64); + let r: i16x4 = transmute(vmov_n_s16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_s32() { + let v: i32 = 64; + let e = i32x2::new(64, 64); + let r: i32x2 = transmute(vmov_n_s32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_s64() { + let v: i64 = 64; + let e = i64x1::new(64); + let r: i64x1 = transmute(vmov_n_s64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_u8() { + let v: u8 = 64; + let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u8x8 = transmute(vmov_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_u16() { + let v: u16 = 64; + let e = u16x4::new(64, 64, 64, 64); + let r: u16x4 = transmute(vmov_n_u16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_u32() { + let v: u32 = 64; + let e = u32x2::new(64, 64); + let r: u32x2 = transmute(vmov_n_u32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_u64() { + let v: u64 = 64; + let e = u64x1::new(64); + let r: u64x1 = transmute(vmov_n_u64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_p8() { + let v: p8 = 64; + let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u8x8 = transmute(vmov_n_p8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_p16() { + let v: p16 = 64; + let e = u16x4::new(64, 64, 64, 64); + let r: u16x4 = transmute(vmov_n_p16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_f32() { + let v: f32 = 64.0; + let e = f32x2::new(64.0, 64.0); + let r: f32x2 = transmute(vmov_n_f32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_s8() { + let v: i8 = 64; + let e = i8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: i8x16 = transmute(vmovq_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_s16() { + let v: i16 = 64; + let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: i16x8 = transmute(vmovq_n_s16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_s32() { + let v: i32 = 64; + let e = i32x4::new(64, 64, 64, 64); + let r: i32x4 = transmute(vmovq_n_s32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_s64() { + let v: i64 = 64; + let e = i64x2::new(64, 64); + let r: i64x2 = transmute(vmovq_n_s64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_u8() { + let v: u8 = 64; + let e = u8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: u8x16 = transmute(vmovq_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_u16() { + let v: u16 = 64; + let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u16x8 = transmute(vmovq_n_u16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_u32() { + let v: u32 = 64; + let e = u32x4::new(64, 64, 64, 64); + let r: u32x4 = transmute(vmovq_n_u32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_u64() { + let v: u64 = 64; + let e = u64x2::new(64, 64); + let r: u64x2 = transmute(vmovq_n_u64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_p8() { + let v: p8 = 64; + let e = u8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: u8x16 = transmute(vmovq_n_p8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_p16() { + let v: p16 = 64; + let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u16x8 = transmute(vmovq_n_p16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_f32() { + let v: f32 = 64.0; + let e = f32x4::new(64.0, 64.0, 64.0, 64.0); + let r: f32x4 = transmute(vmovq_n_f32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u64() { + let v = i64x2::new(1, 2); + let r = vgetq_lane_u64::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s8() { + test_ari_s8( + |i, j| vadd_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_s8() { + testq_ari_s8( + |i, j| vaddq_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s16() { + test_ari_s16( + |i, j| vadd_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_s16() { + testq_ari_s16( + |i, j| vaddq_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s32() { + test_ari_s32( + |i, j| vadd_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_s32() { + testq_ari_s32( + |i, j| vaddq_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u8() { + test_ari_u8( + |i, j| vadd_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_u8() { + testq_ari_u8( + |i, j| vaddq_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u16() { + test_ari_u16( + |i, j| vadd_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_u16() { + testq_ari_u16( + |i, j| vaddq_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u32() { + test_ari_u32( + |i, j| vadd_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_u32() { + testq_ari_u32( + |i, j| vaddq_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_f32() { + test_ari_f32(|i, j| vadd_f32(i, j), |a: f32, b: f32| -> f32 { a + b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_f32() { + testq_ari_f32(|i, j| vaddq_f32(i, j), |a: f32, b: f32| -> f32 { a + b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_s8() { + let v = i8::MAX; + let a = i8x8::new(v, v, v, v, v, v, v, v); + let v = 2 * (v as i16); + let e = i16x8::new(v, v, v, v, v, v, v, v); + let r: i16x8 = transmute(vaddl_s8(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_s16() { + let v = i16::MAX; + let a = i16x4::new(v, v, v, v); + let v = 2 * (v as i32); + let e = i32x4::new(v, v, v, v); + let r: i32x4 = transmute(vaddl_s16(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_s32() { + let v = i32::MAX; + let a = i32x2::new(v, v); + let v = 2 * (v as i64); + let e = i64x2::new(v, v); + let r: i64x2 = transmute(vaddl_s32(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_u8() { + let v = u8::MAX; + let a = u8x8::new(v, v, v, v, v, v, v, v); + let v = 2 * (v as u16); + let e = u16x8::new(v, v, v, v, v, v, v, v); + let r: u16x8 = transmute(vaddl_u8(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_u16() { + let v = u16::MAX; + let a = u16x4::new(v, v, v, v); + let v = 2 * (v as u32); + let e = u32x4::new(v, v, v, v); + let r: u32x4 = transmute(vaddl_u16(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_u32() { + let v = u32::MAX; + let a = u32x2::new(v, v); + let v = 2 * (v as u64); + let e = u64x2::new(v, v); + let r: u64x2 = transmute(vaddl_u32(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let x = i8::MAX; + let b = i8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); + let x = x as i16; + let e = i16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15); + let r: i16x8 = transmute(vaddl_high_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let x = i16::MAX; + let b = i16x8::new(x, x, x, x, x, x, x, x); + let x = x as i32; + let e = i32x4::new(x + 4, x + 5, x + 6, x + 7); + let r: i32x4 = transmute(vaddl_high_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_s32() { + let a = i32x4::new(0, 1, 2, 3); + let x = i32::MAX; + let b = i32x4::new(x, x, x, x); + let x = x as i64; + let e = i64x2::new(x + 2, x + 3); + let r: i64x2 = transmute(vaddl_high_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let x = u8::MAX; + let b = u8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); + let x = x as u16; + let e = u16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15); + let r: u16x8 = transmute(vaddl_high_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let x = u16::MAX; + let b = u16x8::new(x, x, x, x, x, x, x, x); + let x = x as u32; + let e = u32x4::new(x + 4, x + 5, x + 6, x + 7); + let r: u32x4 = transmute(vaddl_high_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_u32() { + let a = u32x4::new(0, 1, 2, 3); + let x = u32::MAX; + let b = u32x4::new(x, x, x, x); + let x = x as u64; + let e = u64x2::new(x + 2, x + 3); + let r: u64x2 = transmute(vaddl_high_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_s8() { + let x = i16::MAX; + let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7); + let y = i8::MAX; + let b = i8x8::new(y, y, y, y, y, y, y, y); + let y = y as i16; + let e = i16x8::new( + x.wrapping_add(y), + 1 + y, + 2 + y, + 3 + y, + 4 + y, + 5 + y, + 6 + y, + 7 + y, + ); + let r: i16x8 = transmute(vaddw_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_s16() { + let x = i32::MAX; + let a = i32x4::new(x, 1, 2, 3); + let y = i16::MAX; + let b = i16x4::new(y, y, y, y); + let y = y as i32; + let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); + let r: i32x4 = transmute(vaddw_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_s32() { + let x = i64::MAX; + let a = i64x2::new(x, 1); + let y = i32::MAX; + let b = i32x2::new(y, y); + let y = y as i64; + let e = i64x2::new(x.wrapping_add(y), 1 + y); + let r: i64x2 = transmute(vaddw_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_u8() { + let x = u16::MAX; + let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7); + let y = u8::MAX; + let b = u8x8::new(y, y, y, y, y, y, y, y); + let y = y as u16; + let e = u16x8::new( + x.wrapping_add(y), + 1 + y, + 2 + y, + 3 + y, + 4 + y, + 5 + y, + 6 + y, + 7 + y, + ); + let r: u16x8 = transmute(vaddw_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_u16() { + let x = u32::MAX; + let a = u32x4::new(x, 1, 2, 3); + let y = u16::MAX; + let b = u16x4::new(y, y, y, y); + let y = y as u32; + let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); + let r: u32x4 = transmute(vaddw_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_u32() { + let x = u64::MAX; + let a = u64x2::new(x, 1); + let y = u32::MAX; + let b = u32x2::new(y, y); + let y = y as u64; + let e = u64x2::new(x.wrapping_add(y), 1 + y); + let r: u64x2 = transmute(vaddw_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_s8() { + let x = i16::MAX; + let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7); + let y = i8::MAX; + let b = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y); + let y = y as i16; + let e = i16x8::new( + x.wrapping_add(y), + 1 + y, + 2 + y, + 3 + y, + 4 + y, + 5 + y, + 6 + y, + 7 + y, + ); + let r: i16x8 = transmute(vaddw_high_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_s16() { + let x = i32::MAX; + let a = i32x4::new(x, 1, 2, 3); + let y = i16::MAX; + let b = i16x8::new(0, 0, 0, 0, y, y, y, y); + let y = y as i32; + let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); + let r: i32x4 = transmute(vaddw_high_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_s32() { + let x = i64::MAX; + let a = i64x2::new(x, 1); + let y = i32::MAX; + let b = i32x4::new(0, 0, y, y); + let y = y as i64; + let e = i64x2::new(x.wrapping_add(y), 1 + y); + let r: i64x2 = transmute(vaddw_high_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_u8() { + let x = u16::MAX; + let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7); + let y = u8::MAX; + let b = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y); + let y = y as u16; + let e = u16x8::new( + x.wrapping_add(y), + 1 + y, + 2 + y, + 3 + y, + 4 + y, + 5 + y, + 6 + y, + 7 + y, + ); + let r: u16x8 = transmute(vaddw_high_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_u16() { + let x = u32::MAX; + let a = u32x4::new(x, 1, 2, 3); + let y = u16::MAX; + let b = u16x8::new(0, 0, 0, 0, y, y, y, y); + let y = y as u32; + let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); + let r: u32x4 = transmute(vaddw_high_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_u32() { + let x = u64::MAX; + let a = u64x2::new(x, 1); + let y = u32::MAX; + let b = u32x4::new(0, 0, y, y); + let y = y as u64; + let e = u64x2::new(x.wrapping_add(y), 1 + y); + let r: u64x2 = transmute(vaddw_high_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = i8x8::new(-1, -2, -3, -4, -5, -6, -7, -8); + let r: i8x8 = transmute(vmvn_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e = i8x16::new( + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, + ); + let r: i8x16 = transmute(vmvnq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_s16() { + let a = i16x4::new(0, 1, 2, 3); + let e = i16x4::new(-1, -2, -3, -4); + let r: i16x4 = transmute(vmvn_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = i16x8::new(-1, -2, -3, -4, -5, -6, -7, -8); + let r: i16x8 = transmute(vmvnq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_s32() { + let a = i32x2::new(0, 1); + let e = i32x2::new(-1, -2); + let r: i32x2 = transmute(vmvn_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_s32() { + let a = i32x4::new(0, 1, 2, 3); + let e = i32x4::new(-1, -2, -3, -4); + let r: i32x4 = transmute(vmvnq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248); + let r: u8x8 = transmute(vmvn_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e = u8x16::new( + 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, + ); + let r: u8x16 = transmute(vmvnq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_u16() { + let a = u16x4::new(0, 1, 2, 3); + let e = u16x4::new(65_535, 65_534, 65_533, 65_532); + let r: u16x4 = transmute(vmvn_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = u16x8::new( + 65_535, 65_534, 65_533, 65_532, 65_531, 65_530, 65_529, 65_528, + ); + let r: u16x8 = transmute(vmvnq_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_u32() { + let a = u32x2::new(0, 1); + let e = u32x2::new(4_294_967_295, 4_294_967_294); + let r: u32x2 = transmute(vmvn_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_u32() { + let a = u32x4::new(0, 1, 2, 3); + let e = u32x4::new(4_294_967_295, 4_294_967_294, 4_294_967_293, 4_294_967_292); + let r: u32x4 = transmute(vmvnq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_p8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248); + let r: u8x8 = transmute(vmvn_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e = u8x16::new( + 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, + ); + let r: u8x16 = transmute(vmvnq_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_s8() { + let a = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let b = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e = i8x8::new(0, -2, -2, -4, -4, -6, -6, -8); + let r: i8x8 = transmute(vbic_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_s8() { + let a = i8x16::new( + 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, + ); + let b = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = i8x16::new( + 0, -2, -2, -4, -4, -6, -6, -8, -8, -10, -10, -12, -12, -14, -14, -16, + ); + let r: i8x16 = transmute(vbicq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_s16() { + let a = i16x4::new(0, -1, -2, -3); + let b = i16x4::new(1, 1, 1, 1); + let e = i16x4::new(0, -2, -2, -4); + let r: i16x4 = transmute(vbic_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_s16() { + let a = i16x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let b = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e = i16x8::new(0, -2, -2, -4, -4, -6, -6, -8); + let r: i16x8 = transmute(vbicq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_s32() { + let a = i32x2::new(0, -1); + let b = i32x2::new(1, 1); + let e = i32x2::new(0, -2); + let r: i32x2 = transmute(vbic_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_s32() { + let a = i32x4::new(0, -1, -2, -3); + let b = i32x4::new(1, 1, 1, 1); + let e = i32x4::new(0, -2, -2, -4); + let r: i32x4 = transmute(vbicq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_s64() { + let a = i64x1::new(-1); + let b = i64x1::new(1); + let e = i64x1::new(-2); + let r: i64x1 = transmute(vbic_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_s64() { + let a = i64x2::new(0, -1); + let b = i64x2::new(1, 1); + let e = i64x2::new(0, -2); + let r: i64x2 = transmute(vbicq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e = u8x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: u8x8 = transmute(vbic_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = u8x16::new(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); + let r: u8x16 = transmute(vbicq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_u16() { + let a = u16x4::new(0, 1, 2, 3); + let b = u16x4::new(1, 1, 1, 1); + let e = u16x4::new(0, 0, 2, 2); + let r: u16x4 = transmute(vbic_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e = u16x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: u16x8 = transmute(vbicq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_u32() { + let a = u32x2::new(0, 1); + let b = u32x2::new(1, 1); + let e = u32x2::new(0, 0); + let r: u32x2 = transmute(vbic_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_u32() { + let a = u32x4::new(0, 1, 2, 3); + let b = u32x4::new(1, 1, 1, 1); + let e = u32x4::new(0, 0, 2, 2); + let r: u32x4 = transmute(vbicq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_u64() { + let a = u64x1::new(1); + let b = u64x1::new(1); + let e = u64x1::new(0); + let r: u64x1 = transmute(vbic_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_u64() { + let a = u64x2::new(0, 1); + let b = u64x2::new(1, 1); + let e = u64x2::new(0, 0); + let r: u64x2 = transmute(vbicq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_s8() { + let a = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, 0, u8::MAX, 0); + let b = i8x8::new( + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + ); + let c = i8x8::new( + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + ); + let e = i8x8::new( + i8::MAX, + i8::MIN | 1, + i8::MAX, + i8::MIN | 2, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + ); + let r: i8x8 = transmute(vbsl_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_s16() { + let a = u16x4::new(u16::MAX, 0, 1, 2); + let b = i16x4::new(i16::MAX, i16::MAX, i16::MAX, i16::MAX); + let c = i16x4::new(i16::MIN, i16::MIN, i16::MIN, i16::MIN); + let e = i16x4::new(i16::MAX, i16::MIN, i16::MIN | 1, i16::MIN | 2); + let r: i16x4 = transmute(vbsl_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_s32() { + let a = u32x2::new(u32::MAX, 1); + let b = i32x2::new(i32::MAX, i32::MAX); + let c = i32x2::new(i32::MIN, i32::MIN); + let e = i32x2::new(i32::MAX, i32::MIN | 1); + let r: i32x2 = transmute(vbsl_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_s64() { + let a = u64x1::new(1); + let b = i64x1::new(i64::MAX); + let c = i64x1::new(i64::MIN); + let e = i64x1::new(i64::MIN | 1); + let r: i64x1 = transmute(vbsl_s64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_u8() { + let a = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, 0, u8::MAX, 0); + let b = u8x8::new( + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + ); + let c = u8x8::new( + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + ); + let e = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, u8::MIN, u8::MAX, u8::MIN); + let r: u8x8 = transmute(vbsl_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_u16() { + let a = u16x4::new(u16::MAX, 0, 1, 2); + let b = u16x4::new(u16::MAX, u16::MAX, u16::MAX, u16::MAX); + let c = u16x4::new(u16::MIN, u16::MIN, u16::MIN, u16::MIN); + let e = u16x4::new(u16::MAX, 0, 1, 2); + let r: u16x4 = transmute(vbsl_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_u32() { + let a = u32x2::new(u32::MAX, 2); + let b = u32x2::new(u32::MAX, u32::MAX); + let c = u32x2::new(u32::MIN, u32::MIN); + let e = u32x2::new(u32::MAX, 2); + let r: u32x2 = transmute(vbsl_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_u64() { + let a = u64x1::new(2); + let b = u64x1::new(u64::MAX); + let c = u64x1::new(u64::MIN); + let e = u64x1::new(2); + let r: u64x1 = transmute(vbsl_u64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_f32() { + let a = u32x2::new(1, 0x80000000); + let b = f32x2::new(8388609f32, -1.23f32); + let c = f32x2::new(2097152f32, 2.34f32); + let e = f32x2::new(2097152.25f32, -2.34f32); + let r: f32x2 = transmute(vbsl_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_p8() { + let a = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, 0, u8::MAX, 0); + let b = u8x8::new( + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + ); + let c = u8x8::new( + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + ); + let e = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, u8::MIN, u8::MAX, u8::MIN); + let r: u8x8 = transmute(vbsl_p8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_p16() { + let a = u16x4::new(u16::MAX, 0, 1, 2); + let b = u16x4::new(u16::MAX, u16::MAX, u16::MAX, u16::MAX); + let c = u16x4::new(u16::MIN, u16::MIN, u16::MIN, u16::MIN); + let e = u16x4::new(u16::MAX, 0, 1, 2); + let r: u16x4 = transmute(vbsl_p16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_s8() { + let a = u8x16::new( + u8::MAX, + 1, + u8::MAX, + 2, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + ); + let b = i8x16::new( + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + ); + let c = i8x16::new( + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + ); + let e = i8x16::new( + i8::MAX, + i8::MIN | 1, + i8::MAX, + i8::MIN | 2, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + ); + let r: i8x16 = transmute(vbslq_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_s16() { + let a = u16x8::new(u16::MAX, 1, u16::MAX, 2, u16::MAX, 0, u16::MAX, 0); + let b = i16x8::new( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + ); + let c = i16x8::new( + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let e = i16x8::new( + i16::MAX, + i16::MIN | 1, + i16::MAX, + i16::MIN | 2, + i16::MAX, + i16::MIN, + i16::MAX, + i16::MIN, + ); + let r: i16x8 = transmute(vbslq_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_s32() { + let a = u32x4::new(u32::MAX, 1, u32::MAX, 2); + let b = i32x4::new(i32::MAX, i32::MAX, i32::MAX, i32::MAX); + let c = i32x4::new(i32::MIN, i32::MIN, i32::MIN, i32::MIN); + let e = i32x4::new(i32::MAX, i32::MIN | 1, i32::MAX, i32::MIN | 2); + let r: i32x4 = transmute(vbslq_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_s64() { + let a = u64x2::new(u64::MAX, 1); + let b = i64x2::new(i64::MAX, i64::MAX); + let c = i64x2::new(i64::MIN, i64::MIN); + let e = i64x2::new(i64::MAX, i64::MIN | 1); + let r: i64x2 = transmute(vbslq_s64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_u8() { + let a = u8x16::new( + u8::MAX, + 1, + u8::MAX, + 2, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + ); + let b = u8x16::new( + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + ); + let c = u8x16::new( + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + ); + let e = u8x16::new( + u8::MAX, + 1, + u8::MAX, + 2, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + ); + let r: u8x16 = transmute(vbslq_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_u16() { + let a = u16x8::new(u16::MAX, 1, u16::MAX, 2, u16::MAX, 0, u16::MAX, 0); + let b = u16x8::new( + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + ); + let c = u16x8::new( + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + ); + let e = u16x8::new( + u16::MAX, + 1, + u16::MAX, + 2, + u16::MAX, + u16::MIN, + u16::MAX, + u16::MIN, + ); + let r: u16x8 = transmute(vbslq_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_u32() { + let a = u32x4::new(u32::MAX, 1, u32::MAX, 2); + let b = u32x4::new(u32::MAX, u32::MAX, u32::MAX, u32::MAX); + let c = u32x4::new(u32::MIN, u32::MIN, u32::MIN, u32::MIN); + let e = u32x4::new(u32::MAX, 1, u32::MAX, 2); + let r: u32x4 = transmute(vbslq_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_u64() { + let a = u64x2::new(u64::MAX, 1); + let b = u64x2::new(u64::MAX, u64::MAX); + let c = u64x2::new(u64::MIN, u64::MIN); + let e = u64x2::new(u64::MAX, 1); + let r: u64x2 = transmute(vbslq_u64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_f32() { + let a = u32x4::new(u32::MAX, 0, 1, 0x80000000); + let b = f32x4::new(-1.23f32, -1.23f32, 8388609f32, -1.23f32); + let c = f32x4::new(2.34f32, 2.34f32, 2097152f32, 2.34f32); + let e = f32x4::new(-1.23f32, 2.34f32, 2097152.25f32, -2.34f32); + let r: f32x4 = transmute(vbslq_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_p8() { + let a = u8x16::new( + u8::MAX, + 1, + u8::MAX, + 2, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + ); + let b = u8x16::new( + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + ); + let c = u8x16::new( + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + ); + let e = u8x16::new( + u8::MAX, + 1, + u8::MAX, + 2, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + ); + let r: u8x16 = transmute(vbslq_p8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_p16() { + let a = u16x8::new(u16::MAX, 1, u16::MAX, 2, u16::MAX, 0, u16::MAX, 0); + let b = u16x8::new( + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + ); + let c = u16x8::new( + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + ); + let e = u16x8::new( + u16::MAX, + 1, + u16::MAX, + 2, + u16::MAX, + u16::MIN, + u16::MAX, + u16::MIN, + ); + let r: u16x8 = transmute(vbslq_p16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_s8() { + let a = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let b = i8x8::new(-2, -2, -2, -2, -2, -2, -2, -2); + let e = i8x8::new(1, -1, -1, -3, -3, -5, -5, -7); + let r: i8x8 = transmute(vorn_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_s8() { + let a = i8x16::new( + 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, + ); + let b = i8x16::new( + -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + ); + let e = i8x16::new( + 1, -1, -1, -3, -3, -5, -5, -7, -7, -9, -9, -11, -11, -13, -13, -15, + ); + let r: i8x16 = transmute(vornq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_s16() { + let a = i16x4::new(0, -1, -2, -3); + let b = i16x4::new(-2, -2, -2, -2); + let e = i16x4::new(1, -1, -1, -3); + let r: i16x4 = transmute(vorn_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_s16() { + let a = i16x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let b = i16x8::new(-2, -2, -2, -2, -2, -2, -2, -2); + let e = i16x8::new(1, -1, -1, -3, -3, -5, -5, -7); + let r: i16x8 = transmute(vornq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_s32() { + let a = i32x2::new(0, -1); + let b = i32x2::new(-2, -2); + let e = i32x2::new(1, -1); + let r: i32x2 = transmute(vorn_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_s32() { + let a = i32x4::new(0, -1, -2, -3); + let b = i32x4::new(-2, -2, -2, -2); + let e = i32x4::new(1, -1, -1, -3); + let r: i32x4 = transmute(vornq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_s64() { + let a = i64x1::new(0); + let b = i64x1::new(-2); + let e = i64x1::new(1); + let r: i64x1 = transmute(vorn_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_s64() { + let a = i64x2::new(0, -1); + let b = i64x2::new(-2, -2); + let e = i64x2::new(1, -1); + let r: i64x2 = transmute(vornq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let t = u8::MAX - 1; + let b = u8x8::new(t, t, t, t, t, t, t, t); + let e = u8x8::new(1, 1, 3, 3, 5, 5, 7, 7); + let r: u8x8 = transmute(vorn_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let t = u8::MAX - 1; + let b = u8x16::new(t, t, t, t, t, t, t, t, t, t, t, t, t, t, t, t); + let e = u8x16::new(1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); + let r: u8x16 = transmute(vornq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_u16() { + let a = u16x4::new(0, 1, 2, 3); + let t = u16::MAX - 1; + let b = u16x4::new(t, t, t, t); + let e = u16x4::new(1, 1, 3, 3); + let r: u16x4 = transmute(vorn_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let t = u16::MAX - 1; + let b = u16x8::new(t, t, t, t, t, t, t, t); + let e = u16x8::new(1, 1, 3, 3, 5, 5, 7, 7); + let r: u16x8 = transmute(vornq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_u32() { + let a = u32x2::new(0, 1); + let t = u32::MAX - 1; + let b = u32x2::new(t, t); + let e = u32x2::new(1, 1); + let r: u32x2 = transmute(vorn_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_u32() { + let a = u32x4::new(0, 1, 2, 3); + let t = u32::MAX - 1; + let b = u32x4::new(t, t, t, t); + let e = u32x4::new(1, 1, 3, 3); + let r: u32x4 = transmute(vornq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_u64() { + let a = u64x1::new(0); + let t = u64::MAX - 1; + let b = u64x1::new(t); + let e = u64x1::new(1); + let r: u64x1 = transmute(vorn_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_u64() { + let a = u64x2::new(0, 1); + let t = u64::MAX - 1; + let b = u64x2::new(t, t); + let e = u64x2::new(1, 1); + let r: u64x2 = transmute(vornq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vmovn_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_s32() { + let a = i32x4::new(1, 2, 3, 4); + let e = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vmovn_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_s64() { + let a = i64x2::new(1, 2); + let e = i32x2::new(1, 2); + let r: i32x2 = transmute(vmovn_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vmovn_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_u32() { + let a = u32x4::new(1, 2, 3, 4); + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vmovn_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_u64() { + let a = u64x2::new(1, 2); + let e = u32x2::new(1, 2); + let r: u32x2 = transmute(vmovn_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_s8() { + let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vmovl_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_s16() { + let e = i32x4::new(1, 2, 3, 4); + let a = i16x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vmovl_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_s32() { + let e = i64x2::new(1, 2); + let a = i32x2::new(1, 2); + let r: i64x2 = transmute(vmovl_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_u8() { + let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vmovl_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_u16() { + let e = u32x4::new(1, 2, 3, 4); + let a = u16x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vmovl_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_u32() { + let e = u64x2::new(1, 2); + let a = u32x2::new(1, 2); + let r: u64x2 = transmute(vmovl_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s8() { + test_bit_s8(|i, j| vand_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s8() { + testq_bit_s8(|i, j| vandq_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_s16() { + test_bit_s16(|i, j| vand_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s16() { + testq_bit_s16(|i, j| vandq_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_s32() { + test_bit_s32(|i, j| vand_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s32() { + testq_bit_s32(|i, j| vandq_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_s64() { + test_bit_s64(|i, j| vand_s64(i, j), |a: i64, b: i64| -> i64 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s64() { + testq_bit_s64(|i, j| vandq_s64(i, j), |a: i64, b: i64| -> i64 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u8() { + test_bit_u8(|i, j| vand_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u8() { + testq_bit_u8(|i, j| vandq_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_u16() { + test_bit_u16(|i, j| vand_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u16() { + testq_bit_u16(|i, j| vandq_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_u32() { + test_bit_u32(|i, j| vand_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u32() { + testq_bit_u32(|i, j| vandq_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_u64() { + test_bit_u64(|i, j| vand_u64(i, j), |a: u64, b: u64| -> u64 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u64() { + testq_bit_u64(|i, j| vandq_u64(i, j), |a: u64, b: u64| -> u64 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s8() { + test_bit_s8(|i, j| vorr_s8(i, j), |a: i8, b: i8| -> i8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s8() { + testq_bit_s8(|i, j| vorrq_s8(i, j), |a: i8, b: i8| -> i8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s16() { + test_bit_s16(|i, j| vorr_s16(i, j), |a: i16, b: i16| -> i16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s16() { + testq_bit_s16(|i, j| vorrq_s16(i, j), |a: i16, b: i16| -> i16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s32() { + test_bit_s32(|i, j| vorr_s32(i, j), |a: i32, b: i32| -> i32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s32() { + testq_bit_s32(|i, j| vorrq_s32(i, j), |a: i32, b: i32| -> i32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s64() { + test_bit_s64(|i, j| vorr_s64(i, j), |a: i64, b: i64| -> i64 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s64() { + testq_bit_s64(|i, j| vorrq_s64(i, j), |a: i64, b: i64| -> i64 { a | b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u8() { + test_bit_u8(|i, j| vorr_u8(i, j), |a: u8, b: u8| -> u8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u8() { + testq_bit_u8(|i, j| vorrq_u8(i, j), |a: u8, b: u8| -> u8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u16() { + test_bit_u16(|i, j| vorr_u16(i, j), |a: u16, b: u16| -> u16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u16() { + testq_bit_u16(|i, j| vorrq_u16(i, j), |a: u16, b: u16| -> u16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u32() { + test_bit_u32(|i, j| vorr_u32(i, j), |a: u32, b: u32| -> u32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u32() { + testq_bit_u32(|i, j| vorrq_u32(i, j), |a: u32, b: u32| -> u32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u64() { + test_bit_u64(|i, j| vorr_u64(i, j), |a: u64, b: u64| -> u64 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u64() { + testq_bit_u64(|i, j| vorrq_u64(i, j), |a: u64, b: u64| -> u64 { a | b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s8() { + test_bit_s8(|i, j| veor_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s8() { + testq_bit_s8(|i, j| veorq_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s16() { + test_bit_s16(|i, j| veor_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s16() { + testq_bit_s16(|i, j| veorq_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s32() { + test_bit_s32(|i, j| veor_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s32() { + testq_bit_s32(|i, j| veorq_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s64() { + test_bit_s64(|i, j| veor_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s64() { + testq_bit_s64(|i, j| veorq_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u8() { + test_bit_u8(|i, j| veor_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u8() { + testq_bit_u8(|i, j| veorq_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_u16() { + test_bit_u16(|i, j| veor_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u16() { + testq_bit_u16(|i, j| veorq_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_u32() { + test_bit_u32(|i, j| veor_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u32() { + testq_bit_u32(|i, j| veorq_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_u64() { + test_bit_u64(|i, j| veor_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u64() { + testq_bit_u64(|i, j| veorq_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s8() { + test_cmp_s8( + |i, j| vceq_s8(i, j), + |a: i8, b: i8| -> u8 { if a == b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s8() { + testq_cmp_s8( + |i, j| vceqq_s8(i, j), + |a: i8, b: i8| -> u8 { if a == b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s16() { + test_cmp_s16( + |i, j| vceq_s16(i, j), + |a: i16, b: i16| -> u16 { if a == b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s16() { + testq_cmp_s16( + |i, j| vceqq_s16(i, j), + |a: i16, b: i16| -> u16 { if a == b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s32() { + test_cmp_s32( + |i, j| vceq_s32(i, j), + |a: i32, b: i32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s32() { + testq_cmp_s32( + |i, j| vceqq_s32(i, j), + |a: i32, b: i32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u8() { + test_cmp_u8( + |i, j| vceq_u8(i, j), + |a: u8, b: u8| -> u8 { if a == b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u8() { + testq_cmp_u8( + |i, j| vceqq_u8(i, j), + |a: u8, b: u8| -> u8 { if a == b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u16() { + test_cmp_u16( + |i, j| vceq_u16(i, j), + |a: u16, b: u16| -> u16 { if a == b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u16() { + testq_cmp_u16( + |i, j| vceqq_u16(i, j), + |a: u16, b: u16| -> u16 { if a == b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u32() { + test_cmp_u32( + |i, j| vceq_u32(i, j), + |a: u32, b: u32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u32() { + testq_cmp_u32( + |i, j| vceqq_u32(i, j), + |a: u32, b: u32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_f32() { + test_cmp_f32( + |i, j| vcge_f32(i, j), + |a: f32, b: f32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_f32() { + testq_cmp_f32( + |i, j| vcgeq_f32(i, j), + |a: f32, b: f32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s8() { + test_cmp_s8( + |i, j| vcgt_s8(i, j), + |a: i8, b: i8| -> u8 { if a > b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s8() { + testq_cmp_s8( + |i, j| vcgtq_s8(i, j), + |a: i8, b: i8| -> u8 { if a > b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s16() { + test_cmp_s16( + |i, j| vcgt_s16(i, j), + |a: i16, b: i16| -> u16 { if a > b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s16() { + testq_cmp_s16( + |i, j| vcgtq_s16(i, j), + |a: i16, b: i16| -> u16 { if a > b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s32() { + test_cmp_s32( + |i, j| vcgt_s32(i, j), + |a: i32, b: i32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s32() { + testq_cmp_s32( + |i, j| vcgtq_s32(i, j), + |a: i32, b: i32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u8() { + test_cmp_u8( + |i, j| vcgt_u8(i, j), + |a: u8, b: u8| -> u8 { if a > b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u8() { + testq_cmp_u8( + |i, j| vcgtq_u8(i, j), + |a: u8, b: u8| -> u8 { if a > b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u16() { + test_cmp_u16( + |i, j| vcgt_u16(i, j), + |a: u16, b: u16| -> u16 { if a > b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u16() { + testq_cmp_u16( + |i, j| vcgtq_u16(i, j), + |a: u16, b: u16| -> u16 { if a > b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u32() { + test_cmp_u32( + |i, j| vcgt_u32(i, j), + |a: u32, b: u32| -> u32 { if a > b { 0xFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u32() { + testq_cmp_u32( + |i, j| vcgtq_u32(i, j), + |a: u32, b: u32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_f32() { + test_cmp_f32( + |i, j| vcgt_f32(i, j), + |a: f32, b: f32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_f32() { + testq_cmp_f32( + |i, j| vcgtq_f32(i, j), + |a: f32, b: f32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s8() { + test_cmp_s8( + |i, j| vclt_s8(i, j), + |a: i8, b: i8| -> u8 { if a < b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s8() { + testq_cmp_s8( + |i, j| vcltq_s8(i, j), + |a: i8, b: i8| -> u8 { if a < b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s16() { + test_cmp_s16( + |i, j| vclt_s16(i, j), + |a: i16, b: i16| -> u16 { if a < b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s16() { + testq_cmp_s16( + |i, j| vcltq_s16(i, j), + |a: i16, b: i16| -> u16 { if a < b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s32() { + test_cmp_s32( + |i, j| vclt_s32(i, j), + |a: i32, b: i32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s32() { + testq_cmp_s32( + |i, j| vcltq_s32(i, j), + |a: i32, b: i32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u8() { + test_cmp_u8( + |i, j| vclt_u8(i, j), + |a: u8, b: u8| -> u8 { if a < b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u8() { + testq_cmp_u8( + |i, j| vcltq_u8(i, j), + |a: u8, b: u8| -> u8 { if a < b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u16() { + test_cmp_u16( + |i, j| vclt_u16(i, j), + |a: u16, b: u16| -> u16 { if a < b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u16() { + testq_cmp_u16( + |i, j| vcltq_u16(i, j), + |a: u16, b: u16| -> u16 { if a < b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u32() { + test_cmp_u32( + |i, j| vclt_u32(i, j), + |a: u32, b: u32| -> u32 { if a < b { 0xFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u32() { + testq_cmp_u32( + |i, j| vcltq_u32(i, j), + |a: u32, b: u32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_f32() { + test_cmp_f32( + |i, j| vclt_f32(i, j), + |a: f32, b: f32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_f32() { + testq_cmp_f32( + |i, j| vcltq_f32(i, j), + |a: f32, b: f32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s8() { + test_cmp_s8( + |i, j| vcle_s8(i, j), + |a: i8, b: i8| -> u8 { if a <= b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s8() { + testq_cmp_s8( + |i, j| vcleq_s8(i, j), + |a: i8, b: i8| -> u8 { if a <= b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s16() { + test_cmp_s16( + |i, j| vcle_s16(i, j), + |a: i16, b: i16| -> u16 { if a <= b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s16() { + testq_cmp_s16( + |i, j| vcleq_s16(i, j), + |a: i16, b: i16| -> u16 { if a <= b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s32() { + test_cmp_s32( + |i, j| vcle_s32(i, j), + |a: i32, b: i32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s32() { + testq_cmp_s32( + |i, j| vcleq_s32(i, j), + |a: i32, b: i32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u8() { + test_cmp_u8( + |i, j| vcle_u8(i, j), + |a: u8, b: u8| -> u8 { if a <= b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u8() { + testq_cmp_u8( + |i, j| vcleq_u8(i, j), + |a: u8, b: u8| -> u8 { if a <= b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u16() { + test_cmp_u16( + |i, j| vcle_u16(i, j), + |a: u16, b: u16| -> u16 { if a <= b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u16() { + testq_cmp_u16( + |i, j| vcleq_u16(i, j), + |a: u16, b: u16| -> u16 { if a <= b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u32() { + test_cmp_u32( + |i, j| vcle_u32(i, j), + |a: u32, b: u32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u32() { + testq_cmp_u32( + |i, j| vcleq_u32(i, j), + |a: u32, b: u32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_f32() { + test_cmp_f32( + |i, j| vcle_f32(i, j), + |a: f32, b: f32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_f32() { + testq_cmp_f32( + |i, j| vcleq_f32(i, j), + |a: f32, b: f32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s8() { + test_cmp_s8( + |i, j| vcge_s8(i, j), + |a: i8, b: i8| -> u8 { if a >= b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s8() { + testq_cmp_s8( + |i, j| vcgeq_s8(i, j), + |a: i8, b: i8| -> u8 { if a >= b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s16() { + test_cmp_s16( + |i, j| vcge_s16(i, j), + |a: i16, b: i16| -> u16 { if a >= b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s16() { + testq_cmp_s16( + |i, j| vcgeq_s16(i, j), + |a: i16, b: i16| -> u16 { if a >= b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s32() { + test_cmp_s32( + |i, j| vcge_s32(i, j), + |a: i32, b: i32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s32() { + testq_cmp_s32( + |i, j| vcgeq_s32(i, j), + |a: i32, b: i32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u8() { + test_cmp_u8( + |i, j| vcge_u8(i, j), + |a: u8, b: u8| -> u8 { if a >= b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u8() { + testq_cmp_u8( + |i, j| vcgeq_u8(i, j), + |a: u8, b: u8| -> u8 { if a >= b { 0xFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u16() { + test_cmp_u16( + |i, j| vcge_u16(i, j), + |a: u16, b: u16| -> u16 { if a >= b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u16() { + testq_cmp_u16( + |i, j| vcgeq_u16(i, j), + |a: u16, b: u16| -> u16 { if a >= b { 0xFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u32() { + test_cmp_u32( + |i, j| vcge_u32(i, j), + |a: u32, b: u32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u32() { + testq_cmp_u32( + |i, j| vcgeq_u32(i, j), + |a: u32, b: u32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_f32() { + test_cmp_f32( + |i, j| vcge_f32(i, j), + |a: f32, b: f32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_f32() { + testq_cmp_f32( + |i, j| vcgeq_f32(i, j), + |a: f32, b: f32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s8() { + test_ari_s8( + |i, j| vqsub_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s8() { + testq_ari_s8( + |i, j| vqsubq_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s16() { + test_ari_s16( + |i, j| vqsub_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s16() { + testq_ari_s16( + |i, j| vqsubq_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s32() { + test_ari_s32( + |i, j| vqsub_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s32() { + testq_ari_s32( + |i, j| vqsubq_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_sub(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u8() { + test_ari_u8( + |i, j| vqsub_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u8() { + testq_ari_u8( + |i, j| vqsubq_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u16() { + test_ari_u16( + |i, j| vqsub_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u16() { + testq_ari_u16( + |i, j| vqsubq_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u32() { + test_ari_u32( + |i, j| vqsub_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u32() { + testq_ari_u32( + |i, j| vqsubq_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_sub(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s8() { + test_ari_s8(|i, j| vhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s8() { + testq_ari_s8(|i, j| vhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s16() { + test_ari_s16(|i, j| vhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s16() { + testq_ari_s16(|i, j| vhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s32() { + test_ari_s32(|i, j| vhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s32() { + testq_ari_s32(|i, j| vhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u8() { + test_ari_u8(|i, j| vhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u8() { + testq_ari_u8(|i, j| vhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u16() { + test_ari_u16(|i, j| vhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u16() { + testq_ari_u16(|i, j| vhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u32() { + test_ari_u32(|i, j| vhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u32() { + testq_ari_u32(|i, j| vhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s8() { + test_ari_s8(|i, j| vrhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s8() { + testq_ari_s8(|i, j| vrhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s16() { + test_ari_s16(|i, j| vrhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s16() { + testq_ari_s16(|i, j| vrhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s32() { + test_ari_s32(|i, j| vrhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s32() { + testq_ari_s32(|i, j| vrhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u8() { + test_ari_u8(|i, j| vrhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u8() { + testq_ari_u8(|i, j| vrhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u16() { + test_ari_u16(|i, j| vrhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u16() { + testq_ari_u16(|i, j| vrhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u32() { + test_ari_u32(|i, j| vrhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u32() { + testq_ari_u32(|i, j| vrhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s8() { + test_ari_s8( + |i, j| vqadd_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s8() { + testq_ari_s8( + |i, j| vqaddq_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s16() { + test_ari_s16( + |i, j| vqadd_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s16() { + testq_ari_s16( + |i, j| vqaddq_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s32() { + test_ari_s32( + |i, j| vqadd_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s32() { + testq_ari_s32( + |i, j| vqaddq_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_add(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u8() { + test_ari_u8( + |i, j| vqadd_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u8() { + testq_ari_u8( + |i, j| vqaddq_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u16() { + test_ari_u16( + |i, j| vqadd_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u16() { + testq_ari_u16( + |i, j| vqaddq_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u32() { + test_ari_u32( + |i, j| vqadd_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u32() { + testq_ari_u32( + |i, j| vqaddq_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_add(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s8() { + test_ari_s8( + |i, j| vmul_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s8() { + testq_ari_s8( + |i, j| vmulq_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s16() { + test_ari_s16( + |i, j| vmul_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s16() { + testq_ari_s16( + |i, j| vmulq_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s32() { + test_ari_s32( + |i, j| vmul_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s32() { + testq_ari_s32( + |i, j| vmulq_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u8() { + test_ari_u8( + |i, j| vmul_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u8() { + testq_ari_u8( + |i, j| vmulq_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u16() { + test_ari_u16( + |i, j| vmul_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u16() { + testq_ari_u16( + |i, j| vmulq_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u32() { + test_ari_u32( + |i, j| vmul_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u32() { + testq_ari_u32( + |i, j| vmulq_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_f32() { + test_ari_f32(|i, j| vmul_f32(i, j), |a: f32, b: f32| -> f32 { a * b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_f32() { + testq_ari_f32(|i, j| vmulq_f32(i, j), |a: f32, b: f32| -> f32 { a * b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s8() { + test_ari_s8(|i, j| vsub_s8(i, j), |a: i8, b: i8| -> i8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s8() { + testq_ari_s8(|i, j| vsubq_s8(i, j), |a: i8, b: i8| -> i8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s16() { + test_ari_s16(|i, j| vsub_s16(i, j), |a: i16, b: i16| -> i16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s16() { + testq_ari_s16(|i, j| vsubq_s16(i, j), |a: i16, b: i16| -> i16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s32() { + test_ari_s32(|i, j| vsub_s32(i, j), |a: i32, b: i32| -> i32 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s32() { + testq_ari_s32(|i, j| vsubq_s32(i, j), |a: i32, b: i32| -> i32 { a - b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u8() { + test_ari_u8(|i, j| vsub_u8(i, j), |a: u8, b: u8| -> u8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u8() { + testq_ari_u8(|i, j| vsubq_u8(i, j), |a: u8, b: u8| -> u8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u16() { + test_ari_u16(|i, j| vsub_u16(i, j), |a: u16, b: u16| -> u16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u16() { + testq_ari_u16(|i, j| vsubq_u16(i, j), |a: u16, b: u16| -> u16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u32() { + test_ari_u32(|i, j| vsub_u32(i, j), |a: u32, b: u32| -> u32 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u32() { + testq_ari_u32(|i, j| vsubq_u32(i, j), |a: u32, b: u32| -> u32 { a - b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_f32() { + test_ari_f32(|i, j| vsub_f32(i, j), |a: f32, b: f32| -> f32 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_f32() { + testq_ari_f32(|i, j| vsubq_f32(i, j), |a: f32, b: f32| -> f32 { a - b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s8() { + test_ari_s8( + |i, j| vhsub_s8(i, j), + |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s8() { + testq_ari_s8( + |i, j| vhsubq_s8(i, j), + |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s16() { + test_ari_s16( + |i, j| vhsub_s16(i, j), + |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s16() { + testq_ari_s16( + |i, j| vhsubq_s16(i, j), + |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s32() { + test_ari_s32( + |i, j| vhsub_s32(i, j), + |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s32() { + testq_ari_s32( + |i, j| vhsubq_s32(i, j), + |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u8() { + test_ari_u8( + |i, j| vhsub_u8(i, j), + |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u8() { + testq_ari_u8( + |i, j| vhsubq_u8(i, j), + |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u16() { + test_ari_u16( + |i, j| vhsub_u16(i, j), + |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u16() { + testq_ari_u16( + |i, j| vhsubq_u16(i, j), + |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u32() { + test_ari_u32( + |i, j| vhsub_u32(i, j), + |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u32() { + testq_ari_u32( + |i, j| vhsubq_u32(i, j), + |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaba_s8() { + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let c = i8x8::new(10, 9, 8, 7, 6, 5, 4, 3); + let r: i8x8 = transmute(vaba_s8(transmute(a), transmute(b), transmute(c))); + let e = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_s16() { + let a = i16x4::new(1, 2, 3, 4); + let b = i16x4::new(1, 1, 1, 1); + let c = i16x4::new(10, 9, 8, 7); + let r: i16x4 = transmute(vaba_s16(transmute(a), transmute(b), transmute(c))); + let e = i16x4::new(10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_s32() { + let a = i32x2::new(1, 2); + let b = i32x2::new(1, 1); + let c = i32x2::new(10, 9); + let r: i32x2 = transmute(vaba_s32(transmute(a), transmute(b), transmute(c))); + let e = i32x2::new(10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_u8() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let c = u8x8::new(10, 9, 8, 7, 6, 5, 4, 3); + let r: u8x8 = transmute(vaba_u8(transmute(a), transmute(b), transmute(c))); + let e = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_u16() { + let a = u16x4::new(1, 2, 3, 4); + let b = u16x4::new(1, 1, 1, 1); + let c = u16x4::new(10, 9, 8, 7); + let r: u16x4 = transmute(vaba_u16(transmute(a), transmute(b), transmute(c))); + let e = u16x4::new(10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_u32() { + let a = u32x2::new(1, 2); + let b = u32x2::new(1, 1); + let c = u32x2::new(10, 9); + let r: u32x2 = transmute(vaba_u32(transmute(a), transmute(b), transmute(c))); + let e = u32x2::new(10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2); + let b = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let c = i8x16::new(10, 9, 8, 7, 6, 5, 4, 3, 12, 13, 14, 15, 16, 17, 18, 19); + let r: i8x16 = transmute(vabaq_s8(transmute(a), transmute(b), transmute(c))); + let e = i8x16::new( + 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, + ); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let c = i16x8::new(10, 9, 8, 7, 6, 5, 4, 3); + let r: i16x8 = transmute(vabaq_s16(transmute(a), transmute(b), transmute(c))); + let e = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_s32() { + let a = i32x4::new(1, 2, 3, 4); + let b = i32x4::new(1, 1, 1, 1); + let c = i32x4::new(10, 9, 8, 7); + let r: i32x4 = transmute(vabaq_s32(transmute(a), transmute(b), transmute(c))); + let e = i32x4::new(10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2); + let b = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let c = u8x16::new(10, 9, 8, 7, 6, 5, 4, 3, 12, 13, 14, 15, 16, 17, 18, 19); + let r: u8x16 = transmute(vabaq_u8(transmute(a), transmute(b), transmute(c))); + let e = u8x16::new( + 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, + ); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let c = u16x8::new(10, 9, 8, 7, 6, 5, 4, 3); + let r: u16x8 = transmute(vabaq_u16(transmute(a), transmute(b), transmute(c))); + let e = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_u32() { + let a = u32x4::new(1, 2, 3, 4); + let b = u32x4::new(1, 1, 1, 1); + let c = u32x4::new(10, 9, 8, 7); + let r: u32x4 = transmute(vabaq_u32(transmute(a), transmute(b), transmute(c))); + let e = u32x4::new(10, 10, 10, 10); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrev16_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i8x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i8x8 = transmute(vrev16_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16q_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = i8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + let e: i8x16 = transmute(vrev16q_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: u8x8 = transmute(vrev16_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16q_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + let e: u8x16 = transmute(vrev16q_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16_p8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i8x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i8x8 = transmute(vrev16_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16q_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + let e: u8x16 = transmute(vrev16q_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i8x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: i8x8 = transmute(vrev32_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = i8x16::new(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + let e: i8x16 = transmute(vrev32q_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: u8x8 = transmute(vrev32_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + let e: u8x16 = transmute(vrev32q_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_s16() { + let a = i16x4::new(0, 1, 2, 3); + let r = i16x4::new(1, 0, 3, 2); + let e: i16x4 = transmute(vrev32_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i16x8 = transmute(vrev32q_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_p16() { + let a = i16x4::new(0, 1, 2, 3); + let r = i16x4::new(1, 0, 3, 2); + let e: i16x4 = transmute(vrev32_p16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_p16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i16x8 = transmute(vrev32q_p16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_u16() { + let a = u16x4::new(0, 1, 2, 3); + let r = u16x4::new(1, 0, 3, 2); + let e: u16x4 = transmute(vrev32_u16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u16x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: u16x8 = transmute(vrev32q_u16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_p8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: u8x8 = transmute(vrev32_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + let e: u8x16 = transmute(vrev32q_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i8x8::new(7, 6, 5, 4, 3, 2, 1, 0); + let e: i8x8 = transmute(vrev64_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = i8x16::new(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + let e: i8x16 = transmute(vrev64q_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_s16() { + let a = i16x4::new(0, 1, 2, 3); + let r = i16x4::new(3, 2, 1, 0); + let e: i16x4 = transmute(vrev64_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i16x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: i16x8 = transmute(vrev64q_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_s32() { + let a = i32x2::new(0, 1); + let r = i32x2::new(1, 0); + let e: i32x2 = transmute(vrev64_s32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_s32() { + let a = i32x4::new(0, 1, 2, 3); + let r = i32x4::new(1, 0, 3, 2); + let e: i32x4 = transmute(vrev64q_s32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(7, 6, 5, 4, 3, 2, 1, 0); + let e: u8x8 = transmute(vrev64_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + let e: u8x16 = transmute(vrev64q_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_u16() { + let a = u16x4::new(0, 1, 2, 3); + let r = u16x4::new(3, 2, 1, 0); + let e: u16x4 = transmute(vrev64_u16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u16x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: u16x8 = transmute(vrev64q_u16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_u32() { + let a = u32x2::new(0, 1); + let r = u32x2::new(1, 0); + let e: u32x2 = transmute(vrev64_u32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_u32() { + let a = u32x4::new(0, 1, 2, 3); + let r = u32x4::new(1, 0, 3, 2); + let e: u32x4 = transmute(vrev64q_u32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_f32() { + let a = f32x2::new(1.0, 2.0); + let r = f32x2::new(2.0, 1.0); + let e: f32x2 = transmute(vrev64_f32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_f32() { + let a = f32x4::new(1.0, 2.0, -2.0, -1.0); + let r = f32x4::new(2.0, 1.0, -1.0, -2.0); + let e: f32x4 = transmute(vrev64q_f32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_p8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(7, 6, 5, 4, 3, 2, 1, 0); + let e: u8x8 = transmute(vrev64_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + let e: u8x16 = transmute(vrev64q_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_p16() { + let a = u16x4::new(0, 1, 2, 3); + let r = u16x4::new(3, 2, 1, 0); + let e: u16x4 = transmute(vrev64_p16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_p16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u16x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: u16x8 = transmute(vrev64q_p16(transmute(a))); + assert_eq!(r, e); + } + + macro_rules! test_vcombine { + ($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => { + #[allow(unused_assignments)] + #[simd_test(enable = "neon")] + unsafe fn $test_id() { + let a = [$($a),*]; + let b = [$($b),*]; + let e = [$($a),* $(, $b)*]; + let c = $fn_id(transmute(a), transmute(b)); + let mut d = e; + d = transmute(c); + assert_eq!(d, e); + } + } + } + + test_vcombine!(test_vcombine_s8 => vcombine_s8([3_i8, -4, 5, -6, 7, 8, 9, 10], [13_i8, -14, 15, -16, 17, 18, 19, 110])); + test_vcombine!(test_vcombine_u8 => vcombine_u8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110])); + test_vcombine!(test_vcombine_p8 => vcombine_p8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110])); + + test_vcombine!(test_vcombine_s16 => vcombine_s16([3_i16, -4, 5, -6], [13_i16, -14, 15, -16])); + test_vcombine!(test_vcombine_u16 => vcombine_u16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16])); + test_vcombine!(test_vcombine_p16 => vcombine_p16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16])); + test_vcombine!(test_vcombine_f16 => vcombine_f16([3_f16, 4., 5., 6.], + [13_f16, 14., 15., 16.])); + + test_vcombine!(test_vcombine_s32 => vcombine_s32([3_i32, -4], [13_i32, -14])); + test_vcombine!(test_vcombine_u32 => vcombine_u32([3_u32, 4], [13_u32, 14])); + // note: poly32x4 does not exist, and neither does vcombine_p32 + test_vcombine!(test_vcombine_f32 => vcombine_f32([3_f32, -4.], [13_f32, -14.])); + + test_vcombine!(test_vcombine_s64 => vcombine_s64([-3_i64], [13_i64])); + test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64])); + test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64])); + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64])); +} + +#[cfg(all(test, target_arch = "arm"))] +mod table_lookup_tests; + +#[cfg(all(test, target_arch = "arm"))] +mod shift_and_insert_tests; + +#[cfg(all(test, target_arch = "arm"))] +mod load_tests; + +#[cfg(all(test, target_arch = "arm"))] +mod store_tests; diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs new file mode 100644 index 000000000000..cfb1a2843a31 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs @@ -0,0 +1,93 @@ +//! Tests for ARM+v7+neon shift and insert (vsli[q]_n, vsri[q]_n) intrinsics. +//! +//! These are included in `{arm, aarch64}::neon`. + +use super::*; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +use crate::core_arch::aarch64::*; + +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +use crate::core_arch::simd::*; +use std::mem::transmute; +use stdarch_test::simd_test; + +macro_rules! test_vsli { + ($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => { + #[simd_test(enable = "neon")] + #[allow(unused_assignments)] + unsafe fn $test_id() { + let a = [$($a as $t),*]; + let b = [$($b as $t),*]; + let n_bit_mask: $t = (1 << $n) - 1; + let e = [$(($a as $t & n_bit_mask) | (($b as $t) << $n)),*]; + let r = $fn_id::<$n>(transmute(a), transmute(b)); + let mut d = e; + d = transmute(r); + assert_eq!(d, e); + } + } +} +test_vsli!(test_vsli_n_s8, i8 => vsli_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5)); +test_vsli!(test_vsliq_n_s8, i8 => vsliq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2)); +test_vsli!(test_vsli_n_s16, i16 => vsli_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7)); +test_vsli!(test_vsliq_n_s16, i16 => vsliq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14)); +test_vsli!(test_vsli_n_s32, i32 => vsli_n_s32([125683, -78901], [-128, -112944], 23)); +test_vsli!(test_vsliq_n_s32, i32 => vsliq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15)); +test_vsli!(test_vsli_n_s64, i64 => vsli_n_s64([-333333], [1028], 45)); +test_vsli!(test_vsliq_n_s64, i64 => vsliq_n_s64([-333333, -52023], [1028, -99814], 33)); +test_vsli!(test_vsli_n_u8, u8 => vsli_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5)); +test_vsli!(test_vsliq_n_u8, u8 => vsliq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2)); +test_vsli!(test_vsli_n_u16, u16 => vsli_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7)); +test_vsli!(test_vsliq_n_u16, u16 => vsliq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14)); +test_vsli!(test_vsli_n_u32, u32 => vsli_n_u32([125683, 78901], [128, 112944], 23)); +test_vsli!(test_vsliq_n_u32, u32 => vsliq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15)); +test_vsli!(test_vsli_n_u64, u64 => vsli_n_u64([333333], [1028], 45)); +test_vsli!(test_vsliq_n_u64, u64 => vsliq_n_u64([333333, 52023], [1028, 99814], 33)); +test_vsli!(test_vsli_n_p8, i8 => vsli_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5)); +test_vsli!(test_vsliq_n_p8, i8 => vsliq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2)); +test_vsli!(test_vsli_n_p16, i16 => vsli_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7)); +test_vsli!(test_vsliq_n_p16, i16 => vsliq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14)); +//test_vsli!(test_vsli_n_p64, i64 => vsli_n_p64([333333], [1028], 45)); +//test_vsli!(test_vsliq_n_p64, i64 => vsliq_n_p64([333333, 52023], [1028, 99814], 33)); + +macro_rules! test_vsri { + ($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => { + #[simd_test(enable = "neon")] + #[allow(unused_assignments)] + unsafe fn $test_id() { + let a = [$($a as $t),*]; + let b = [$($b as $t),*]; + let n_bit_mask = (((1 as $t) << $n) - 1).rotate_right($n); + let e = [$(($a as $t & n_bit_mask) | (($b as $t >> $n) & !n_bit_mask)),*]; + let r = $fn_id::<$n>(transmute(a), transmute(b)); + let mut d = e; + d = transmute(r); + assert_eq!(d, e); + } + } +} +test_vsri!(test_vsri_n_s8, i8 => vsri_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5)); +test_vsri!(test_vsriq_n_s8, i8 => vsriq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2)); +test_vsri!(test_vsri_n_s16, i16 => vsri_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7)); +test_vsri!(test_vsriq_n_s16, i16 => vsriq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14)); +test_vsri!(test_vsri_n_s32, i32 => vsri_n_s32([125683, -78901], [-128, -112944], 23)); +test_vsri!(test_vsriq_n_s32, i32 => vsriq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15)); +test_vsri!(test_vsri_n_s64, i64 => vsri_n_s64([-333333], [1028], 45)); +test_vsri!(test_vsriq_n_s64, i64 => vsriq_n_s64([-333333, -52023], [1028, -99814], 33)); +test_vsri!(test_vsri_n_u8, u8 => vsri_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5)); +test_vsri!(test_vsriq_n_u8, u8 => vsriq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2)); +test_vsri!(test_vsri_n_u16, u16 => vsri_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7)); +test_vsri!(test_vsriq_n_u16, u16 => vsriq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14)); +test_vsri!(test_vsri_n_u32, u32 => vsri_n_u32([125683, 78901], [128, 112944], 23)); +test_vsri!(test_vsriq_n_u32, u32 => vsriq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15)); +test_vsri!(test_vsri_n_u64, u64 => vsri_n_u64([333333], [1028], 45)); +test_vsri!(test_vsriq_n_u64, u64 => vsriq_n_u64([333333, 52023], [1028, 99814], 33)); +test_vsri!(test_vsri_n_p8, i8 => vsri_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5)); +test_vsri!(test_vsriq_n_p8, i8 => vsriq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2)); +test_vsri!(test_vsri_n_p16, i16 => vsri_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7)); +test_vsri!(test_vsriq_n_p16, i16 => vsriq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14)); +//test_vsri!(test_vsri_n_p64, i64 => vsri_n_p64([333333], [1028], 45)); +//test_vsri!(test_vsriq_n_p64, i64 => vsriq_n_p64([333333, 52023], [1028, 99814], 33)); diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs new file mode 100644 index 000000000000..6b5d4a19ad57 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs @@ -0,0 +1,389 @@ +//! Tests for ARM+v7+neon store (vst1) intrinsics. +//! +//! These are included in `{arm, aarch64}::neon`. + +use super::*; + +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +use crate::core_arch::aarch64::*; + +use crate::core_arch::simd::*; +use stdarch_test::simd_test; + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s8() { + let mut vals = [0_i8; 9]; + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1_s8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s8() { + let mut vals = [0_i8; 17]; + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + vst1q_s8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); + assert_eq!(vals[9], 9); + assert_eq!(vals[10], 10); + assert_eq!(vals[11], 11); + assert_eq!(vals[12], 12); + assert_eq!(vals[13], 13); + assert_eq!(vals[14], 14); + assert_eq!(vals[15], 15); + assert_eq!(vals[16], 16); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s16() { + let mut vals = [0_i16; 5]; + let a = i16x4::new(1, 2, 3, 4); + + vst1_s16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s16() { + let mut vals = [0_i16; 9]; + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1q_s16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s32() { + let mut vals = [0_i32; 3]; + let a = i32x2::new(1, 2); + + vst1_s32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s32() { + let mut vals = [0_i32; 5]; + let a = i32x4::new(1, 2, 3, 4); + + vst1q_s32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s64() { + let mut vals = [0_i64; 2]; + let a = i64x1::new(1); + + vst1_s64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s64() { + let mut vals = [0_i64; 3]; + let a = i64x2::new(1, 2); + + vst1q_s64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u8() { + let mut vals = [0_u8; 9]; + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1_u8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u8() { + let mut vals = [0_u8; 17]; + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + vst1q_u8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); + assert_eq!(vals[9], 9); + assert_eq!(vals[10], 10); + assert_eq!(vals[11], 11); + assert_eq!(vals[12], 12); + assert_eq!(vals[13], 13); + assert_eq!(vals[14], 14); + assert_eq!(vals[15], 15); + assert_eq!(vals[16], 16); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u16() { + let mut vals = [0_u16; 5]; + let a = u16x4::new(1, 2, 3, 4); + + vst1_u16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u16() { + let mut vals = [0_u16; 9]; + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1q_u16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u32() { + let mut vals = [0_u32; 3]; + let a = u32x2::new(1, 2); + + vst1_u32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u32() { + let mut vals = [0_u32; 5]; + let a = u32x4::new(1, 2, 3, 4); + + vst1q_u32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u64() { + let mut vals = [0_u64; 2]; + let a = u64x1::new(1); + + vst1_u64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u64() { + let mut vals = [0_u64; 3]; + let a = u64x2::new(1, 2); + + vst1q_u64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_p8() { + let mut vals = [0_u8; 9]; + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1_p8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_p8() { + let mut vals = [0_u8; 17]; + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + vst1q_p8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); + assert_eq!(vals[9], 9); + assert_eq!(vals[10], 10); + assert_eq!(vals[11], 11); + assert_eq!(vals[12], 12); + assert_eq!(vals[13], 13); + assert_eq!(vals[14], 14); + assert_eq!(vals[15], 15); + assert_eq!(vals[16], 16); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_p16() { + let mut vals = [0_u16; 5]; + let a = u16x4::new(1, 2, 3, 4); + + vst1_p16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_p16() { + let mut vals = [0_u16; 9]; + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1q_p16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon,aes")] +unsafe fn test_vst1_p64() { + let mut vals = [0_u64; 2]; + let a = u64x1::new(1); + + vst1_p64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); +} + +#[simd_test(enable = "neon,aes")] +unsafe fn test_vst1q_p64() { + let mut vals = [0_u64; 3]; + let a = u64x2::new(1, 2); + + vst1q_p64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_f32() { + let mut vals = [0_f32; 3]; + let a = f32x2::new(1., 2.); + + vst1_f32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + assert_eq!(vals[2], 2.); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_f32() { + let mut vals = [0_f32; 5]; + let a = f32x4::new(1., 2., 3., 4.); + + vst1q_f32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + assert_eq!(vals[2], 2.); + assert_eq!(vals[3], 3.); + assert_eq!(vals[4], 4.); +} diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs new file mode 100644 index 000000000000..9403855f00e0 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs @@ -0,0 +1,1044 @@ +//! Tests for ARM+v7+neon table lookup (vtbl, vtbx) intrinsics. +//! +//! These are included in `{arm, aarch64}::neon`. + +use super::*; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +use crate::core_arch::aarch64::*; + +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +use crate::core_arch::simd::*; +use std::mem; +use stdarch_test::simd_test; + +macro_rules! test_vtbl { + ($test_name:ident => $fn_id:ident: + - table[$table_t:ident]: [$($table_v:expr),*] | + $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|* + ) => { + #[cfg(target_endian = "little")] + #[simd_test(enable = "neon")] + unsafe fn $test_name() { + // create table as array, and transmute it to + // arm's table type + let table: $table_t = mem::transmute([$($table_v),*]); + + // For each control vector, perform a table lookup and + // verify the result: + $( + { + let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]); + let result = $fn_id(table, mem::transmute(ctrl)); + let result: $ctrl_t = mem::transmute(result); + let expected: $ctrl_t = mem::transmute([$($exp_v),*]); + assert_eq!(result, expected); + } + )* + } + } +} + +// ARM+v7+neon and AArch64+neon tests + +test_vtbl!( + test_vtbl1_s8 => vtbl1_s8: + - table[int8x8_t]: [0_i8, -11, 2, 3, 4, 5, 6, 7] | + - ctrl[i8x8]: [3_i8, 4, 1, 6, 0, 2, 7, 5] => [3_i8, 4, -11, 6, 0, 2, 7, 5] | + - ctrl[i8x8]: [3_i8, 8, 1, -9, 10, 2, 15, 5] => [3_i8, 0, -11, 0, 0, 2, 0, 5] +); + +test_vtbl!( + test_vtbl1_u8 => vtbl1_u8: + - table[uint8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 0, 1, 0, 0, 2, 0, 5] +); + +test_vtbl!( + test_vtbl1_p8 => vtbl1_p8: + - table[poly8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 0, 1, 0, 0, 2, 0, 5] +); + +test_vtbl!( + test_vtbl2_s8 => vtbl2_s8: + - table[int8x8x2_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ctrl[i8x8]: [127_i8, 15, 1, 14, 2, 13, 3, 12] => [0_i8, -121, -17, -72, 34, -116, 51, -104] | + - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, -19, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, 0] +); + +test_vtbl!( + test_vtbl2_u8 => vtbl2_u8: + - table[uint8x8x2_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 255, 17, 238, 34, 221, 51, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 0] +); + +test_vtbl!( + test_vtbl2_p8 => vtbl2_p8: + - table[poly8x8x2_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 255, 17, 238, 34, 221, 51, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 0] +); + +test_vtbl!( + test_vtbl3_s8 => vtbl3_s8: + - table[int8x8x3_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121, + 0, 1, -2, 3, 4, -5, 6, 7 + ] | + - ctrl[i8x8]: [127_i8, 15, 1, 19, 2, 13, 21, 12] => [0_i8, -121, -17, 3, 34, -116, -5, -104] | + - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, -27, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, -2] +); + +test_vtbl!( + test_vtbl3_u8 => vtbl3_u8: + - table[uint8x8x3_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255, + 0, 1, 2, 3, 4, 5, 6, 7 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 2] +); + +test_vtbl!( + test_vtbl3_p8 => vtbl3_p8: + - table[poly8x8x3_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255, + 0, 1, 2, 3, 4, 5, 6, 7 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 2] +); + +test_vtbl!( + test_vtbl4_s8 => vtbl4_s8: + - table[int8x8x4_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121, + 0, 1, -2, 3, 4, -5, 6, 7, + 8, -9, 10, 11, 12, -13, 14, 15 + ] | + - ctrl[i8x8]: [127_i8, 15, 1, 19, 2, 13, 25, 12] => [0_i8, -121, -17, 3, 34, -116, -9, -104] | + - ctrl[i8x8]: [4_i8, 11, 32, 10, -33, 27, 7, 18] => [68_i8, -117, 0, -84, 0, 11, 119, -2] +); + +test_vtbl!( + test_vtbl4_u8 => vtbl4_u8: + - table[uint8x8x4_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 11, 119, 2] +); + +test_vtbl!( + test_vtbl4_p8 => vtbl4_p8: + - table[poly8x8x4_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 11, 119, 2] +); + +macro_rules! test_vtbx { + ($test_name:ident => $fn_id:ident: + - table[$table_t:ident]: [$($table_v:expr),*] | + - ext[$ext_t:ident]: [$($ext_v:expr),*] | + $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|* + ) => { + #[cfg(target_endian = "little")] + #[simd_test(enable = "neon")] + unsafe fn $test_name() { + // create table as array, and transmute it to + // arm's table type + let table: $table_t = mem::transmute([$($table_v),*]); + let ext: $ext_t = mem::transmute([$($ext_v),*]); + + // For each control vector, perform a table lookup and + // verify the result: + $( + { + let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]); + let result = $fn_id(ext, table, mem::transmute(ctrl)); + let result: $ctrl_t = mem::transmute(result); + let expected: $ctrl_t = mem::transmute([$($exp_v),*]); + assert_eq!(result, expected); + } + )* + } + } +} + +test_vtbx!( + test_vtbx1_s8 => vtbx1_s8: + - table[int8x8_t]: [0_i8, 1, 2, -3, 4, 5, 6, 7] | + - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[i8x8]: [3_i8, 4, 1, 6, 0, 2, 7, 5] => [-3_i8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[i8x8]: [3_i8, 8, 1, 9, 10, 2, -15, 5] => [-3_i8, 51, 1, 53, 54, 2, 56, 5] +); + +test_vtbx!( + test_vtbx1_u8 => vtbx1_u8: + - table[uint8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | + - ext[uint8x8_t]: [50_u8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 51, 1, 53, 54, 2, 56, 5] +); + +test_vtbx!( + test_vtbx1_p8 => vtbx1_p8: + - table[poly8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | + - ext[poly8x8_t]: [50_u8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 51, 1, 53, 54, 2, 56, 5] +); + +test_vtbx!( + test_vtbx2_s8 => vtbx2_s8: + - table[int8x8x2_t]: [0_i8, 1, 2, -3, 4, 5, 6, 7, 8, 9, -10, 11, 12, -13, 14, 15] | + - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[i8x8]: [3_i8, 4, 1, 6, 10, 2, 7, 15] => [-3_i8, 4, 1, 6, -10, 2, 7, 15] | + - ctrl[i8x8]: [3_i8, 8, 1, 10, 17, 2, 15, -19] => [-3_i8, 8, 1, -10, 54, 2, 15, 57] +); + +test_vtbx!( + test_vtbx2_u8 => vtbx2_u8: + - table[uint8x8x2_t]: [0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | + - ext[uint8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 10, 2, 7, 15] => [3_i8, 4, 1, 6, 10, 2, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 1, 10, 17, 2, 15, 19] => [3_i8, 8, 1, 10, 54, 2, 15, 57] +); + +test_vtbx!( + test_vtbx2_p8 => vtbx2_p8: + - table[poly8x8x2_t]: [0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | + - ext[poly8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 10, 2, 7, 15] => [3_i8, 4, 1, 6, 10, 2, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 1, 10, 17, 2, 15, 19] => [3_i8, 8, 1, 10, 54, 2, 15, 57] +); + +test_vtbx!( + test_vtbx3_s8 => vtbx3_s8: + - table[int8x8x3_t]: [ + 0_i8, 1, 2, -3, 4, 5, 6, 7, + 8, 9, -10, 11, 12, -13, 14, 15, + 16, -17, 18, 19, 20, 21, 22, 23 ] | + - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[i8x8]: [3_i8, 4, 17, 22, 10, 2, 7, 15] => [-3_i8, 4, -17, 22, -10, 2, 7, 15] | + - ctrl[i8x8]: [3_i8, 8, 17, 10, 37, 2, 19, -29] => [-3_i8, 8, -17, -10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx3_u8 => vtbx3_u8: + - table[uint8x8x3_t]: [ + 0_i8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23 ] | + - ext[uint8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 17, 22, 10, 2, 7, 15] => [3_i8, 4, 17, 22, 10, 2, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 29] => [3_i8, 8, 17, 10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx3_p8 => vtbx3_p8: + - table[poly8x8x3_t]: [ + 0_i8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23 ] | + - ext[poly8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 17, 22, 10, 2, 7, 15] => [3_i8, 4, 17, 22, 10, 2, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 29] => [3_i8, 8, 17, 10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx4_s8 => vtbx4_s8: + - table[int8x8x4_t]: [ + 0_i8, 1, 2, -3, 4, 5, 6, 7, + 8, 9, -10, 11, 12, -13, 14, 15, + 16, -17, 18, 19, 20, 21, 22, 23, + -24, 25, 26, -27, 28, -29, 30, 31] | + - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[i8x8]: [3_i8, 31, 17, 22, 10, 29, 7, 15] => [-3_i8, 31, -17, 22, -10, -29, 7, 15] | + - ctrl[i8x8]: [3_i8, 8, 17, 10, 37, 2, 19, -42] => [-3_i8, 8, -17, -10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx4_u8 => vtbx4_u8: + - table[uint8x8x4_t]: [ + 0_i8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31] | + - ext[uint8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 31, 17, 22, 10, 29, 7, 15] => [3_i8, 31, 17, 22, 10, 29, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 42] => [3_i8, 8, 17, 10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx4_p8 => vtbx4_p8: + - table[poly8x8x4_t]: [ + 0_i8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31] | + - ext[poly8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 31, 17, 22, 10, 29, 7, 15] => [3_i8, 31, 17, 22, 10, 29, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 42] => [3_i8, 8, 17, 10, 54, 2, 19, 57] +); + +// Aarch64 tests + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl1_s8 => vqtbl1_s8: + - table[int8x16_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ctrl[i8x8]: [127_i8, 15, 1, 14, 2, 13, 3, 12] => [0_i8, -121, -17, -72, 34, -116, 51, -104] | + - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, 19, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, 0] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl1q_s8 => vqtbl1q_s8: + - table[int8x16_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ctrl[i8x16]: [127_i8, 15, 1, 14, 2, 13, 3, 12, 4_i8, 11, 16, 10, 6, 19, 7, 18] + => [0_i8, -121, -17, -72, 34, -116, 51, -104, 68, -117, 0, -84, 102, 0, 119, 0] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl1_u8 => vqtbl1_u8: + - table[uint8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 121, 17, 72, 34, 116, 51, 104] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 0, 84, 102, 0, 119, 0] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl1q_u8 => vqtbl1q_u8: + - table[uint8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] + => [0_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 0, 84, 102, 0, 119, 0] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl1_p8 => vqtbl1_p8: + - table[poly8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 121, 17, 72, 34, 116, 51, 104] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 0, 84, 102, 0, 119, 0] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl1q_p8 => vqtbl1q_p8: + - table[poly8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] + => [0_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 0, 84, 102, 0, 119, 0] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl2_s8 => vqtbl2_s8: + - table[int8x16x2_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31 + ] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [0_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 31, 32, 10, 6, 49, 7, 18] => [4_i8, -31, 0, 10, 6, 0, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl2q_s8 => vqtbl2q_s8: + - table[int8x16x2_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 31, 32, 10, 6, 49, 7, 18] + => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, -31, 0, 10, 6, 0, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl2_u8 => vqtbl2_u8: + - table[uint8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 0, 10, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl2q_u8 => vqtbl2q_u8: + - table[uint8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 0, 10, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl2_p8 => vqtbl2_p8: + - table[poly8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 0, 10, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl2q_p8 => vqtbl2q_p8: + - table[poly8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 0, 10, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl3_s8 => vqtbl3_s8: + - table[int8x16x3_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47 + ] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [0_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 32, 46, 51, 6, 49, 7, 18] => [4_i8, 32, 46, 0, 6, 0, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl3q_s8 => vqtbl3q_s8: + - table[int8x16x3_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 32, 46, 51, 6, 49, 7, 18] + => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, 32, 46, 0, 6, 0, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl3_u8 => vqtbl3_u8: + - table[uint8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 0, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl3q_u8 => vqtbl3q_u8: + - table[uint8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 0, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl3_p8 => vqtbl3_p8: + - table[poly8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 0, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl3q_p8 => vqtbl3q_p8: + - table[poly8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 0, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl4_s8 => vqtbl4_s8: + - table[int8x16x4_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47, + 48, -49, 50, -51, 52, -53, 54, -55, + 56, -57, 58, -59, 60, -61, 62, -63 + ] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [0_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 46, 64, 51, 6, 71, 7, 18] => [4_i8, 46, 0, -51, 6, 0, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl4q_s8 => vqtbl4q_s8: + - table[int8x16x4_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47, + 48, -49, 50, -51, 52, -53, 54, -55, + 56, -57, 58, -59, 60, -61, 62, -63 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 46, 64, 51, 6, 71, 7, 18] + => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, 46, 0, -51, 6, 0, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl4_u8 => vqtbl4_u8: + - table[uint8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 0, 51, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl4q_u8 => vqtbl4q_u8: + - table[uint8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 0, 51, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl4_p8 => vqtbl4_p8: + - table[poly8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 0, 51, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbl!( + test_vqtbl4q_p8 => vqtbl4q_p8: + - table[poly8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 0, 51, 6, 0, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx1_s8 => vqtbx1_s8: + - table[int8x16_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | + - ctrl[i8x8]: [127_i8, 15, 1, 14, 2, 13, 3, 12] => [100_i8, -121, -17, -72, 34, -116, 51, -104] | + - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, 19, 7, 18] => [68_i8, -117, 102, -84, 102, -105, 119, -107] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx1q_s8 => vqtbx1q_s8: + - table[int8x16_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ext[int8x16_t]: [ + 100_i8, -101, 102, -103, 104, -105, 106, -107, + 108, -109, 110, -111, 112, -113, 114, -115 + ] | + - ctrl[i8x16]: [127_i8, 15, 1, 14, 2, 13, 3, 12, 4_i8, 11, 16, 10, 6, 19, 7, 18] + => [100_i8, -121, -17, -72, 34, -116, 51, -104, 68, -117, 110, -84, 102, -113, 119, -115] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx1_u8 => vqtbx1_u8: + - table[uint8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [100_u8, 121, 17, 72, 34, 116, 51, 104] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 102, 84, 102, 105, 119, 107] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx1q_u8 => vqtbx1q_u8: + - table[uint8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ext[uint8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] + => [100_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 110, 84, 102, 113, 119, 115] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx1_p8 => vqtbx1_p8: + - table[poly8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [100_u8, 121, 17, 72, 34, 116, 51, 104] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 102, 84, 102, 105, 119, 107] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx1q_p8 => vqtbx1q_p8: + - table[poly8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ext[poly8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] + => [100_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 110, 84, 102, 113, 119, 115] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx2_s8 => vqtbx2_s8: + - table[int8x16x2_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31 + ] | + - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [100_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 31, 32, 10, 6, 49, 7, 18] => [4_i8, -31, 102, 10, 6, -105, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx2q_s8 => vqtbx2q_s8: + - table[int8x16x2_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31 + ] | + - ext[int8x16_t]: [ + 100_i8, -101, 102, -103, 104, -105, 106, -107, + 108, -109, 110, -111, 112, -113, 114, -115 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 31, 32, 10, 6, 49, 7, 18] + => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, -31, 110, 10, 6, -113, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx2_u8 => vqtbx2_u8: + - table[uint8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 102, 10, 6, 105, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx2q_u8 => vqtbx2q_u8: + - table[uint8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ext[uint8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 110, 10, 6, 113, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx2_p8 => vqtbx2_p8: + - table[poly8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 102, 10, 6, 105, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx2q_p8 => vqtbx2q_p8: + - table[poly8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ext[poly8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 110, 10, 6, 113, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx3_s8 => vqtbx3_s8: + - table[int8x16x3_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47 + ] | + - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [100_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 32, 46, 51, 6, 49, 7, 18] => [4_i8, 32, 46, -103, 6, -105, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx3q_s8 => vqtbx3q_s8: + - table[int8x16x3_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47 + ] | + - ext[int8x16_t]: [ + 100_i8, -101, 102, -103, 104, -105, 106, -107, + 108, -109, 110, -111, 112, -113, 114, -115 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 32, 46, 51, 6, 49, 7, 18] + => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, 32, 46, -111, 6, -113, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx3_u8 => vqtbx3_u8: + - table[uint8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 103, 6, 105, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx3q_u8 => vqtbx3q_u8: + - table[uint8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ext[uint8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 111, 6, 113, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx3_p8 => vqtbx3_p8: + - table[poly8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 103, 6, 105, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx3q_p8 => vqtbx3q_p8: + - table[poly8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ext[poly8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 111, 6, 113, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx4_s8 => vqtbx4_s8: + - table[int8x16x4_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47, + 48, -49, 50, -51, 52, -53, 54, -55, + 56, -57, 58, -59, 60, -61, 62, -63 + ] | + - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [100_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 46, 64, 51, 6, 71, 7, 18] => [4_i8, 46, 102, -51, 6, -105, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx4q_s8 => vqtbx4q_s8: + - table[int8x16x4_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47, + 48, -49, 50, -51, 52, -53, 54, -55, + 56, -57, 58, -59, 60, -61, 62, -63 + ] | + - ext[int8x16_t]: [ + 100_i8, -101, 102, -103, 104, -105, 106, -107, + 108, -109, 110, -111, 112, -113, 114, -115 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 46, 64, 51, 6, 71, 7, 18] + => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, 46, 110, -51, 6, -113, -7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx4_u8 => vqtbx4_u8: + - table[uint8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 102, 51, 6, 105, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx4q_u8 => vqtbx4q_u8: + - table[uint8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ext[uint8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 110, 51, 6, 113, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx4_p8 => vqtbx4_p8: + - table[poly8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 102, 51, 6, 105, 7, 18] +); + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +test_vtbx!( + test_vqtbx4q_p8 => vqtbx4q_p8: + - table[poly8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ext[poly8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 110, 51, 6, 113, 7, 18] +); diff --git a/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs b/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs new file mode 100644 index 000000000000..e2828f85561d --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs @@ -0,0 +1,836 @@ +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +use crate::core_arch::aarch64::*; + +use crate::core_arch::simd::*; +use std::{mem::transmute, vec::Vec}; + +macro_rules! V_u8 { + () => { + vec![0x00u8, 0x01u8, 0x02u8, 0x0Fu8, 0x80u8, 0xF0u8, 0xFFu8] + }; +} +macro_rules! V_u16 { + () => { + vec![ + 0x0000u16, 0x0101u16, 0x0202u16, 0x0F0Fu16, 0x8000u16, 0xF0F0u16, 0xFFFFu16, + ] + }; +} +macro_rules! V_u32 { + () => { + vec![ + 0x00000000u32, + 0x01010101u32, + 0x02020202u32, + 0x0F0F0F0Fu32, + 0x80000000u32, + 0xF0F0F0F0u32, + 0xFFFFFFFFu32, + ] + }; +} +macro_rules! V_u64 { + () => { + vec![ + 0x0000000000000000u64, + 0x0101010101010101u64, + 0x0202020202020202u64, + 0x0F0F0F0F0F0F0F0Fu64, + 0x8080808080808080u64, + 0xF0F0F0F0F0F0F0F0u64, + 0xFFFFFFFFFFFFFFFFu64, + ] + }; +} + +macro_rules! V_i8 { + () => { + vec![ + 0x00i8, 0x01i8, 0x02i8, 0x0Fi8, -128i8, /* 0x80 */ + -16i8, /* 0xF0 */ + -1i8, /* 0xFF */ + ] + }; +} +macro_rules! V_i16 { + () => { + vec![ + 0x0000i16, 0x0101i16, 0x0202i16, 0x0F0Fi16, -32768i16, /* 0x8000 */ + -3856i16, /* 0xF0F0 */ + -1i16, /* 0xFFF */ + ] + }; +} +macro_rules! V_i32 { + () => { + vec![ + 0x00000000i32, + 0x01010101i32, + 0x02020202i32, + 0x0F0F0F0Fi32, + -2139062144i32, /* 0x80000000 */ + -252645136i32, /* 0xF0F0F0F0 */ + -1i32, /* 0xFFFFFFFF */ + ] + }; +} + +macro_rules! V_i64 { + () => { + vec![ + 0x0000000000000000i64, + 0x0101010101010101i64, + 0x0202020202020202i64, + 0x0F0F0F0F0F0F0F0Fi64, + -9223372036854775808i64, /* 0x8000000000000000 */ + -1152921504606846976i64, /* 0xF000000000000000 */ + -1i64, /* 0xFFFFFFFFFFFFFFFF */ + ] + }; +} + +macro_rules! V_f32 { + () => { + vec![ + 0.0f32, + 1.0f32, + -1.0f32, + 1.2f32, + 2.4f32, + f32::MAX, + f32::MIN, + f32::INFINITY, + f32::NEG_INFINITY, + f32::NAN, + ] + }; +} + +macro_rules! to64 { + ($t : ident) => { + |v: $t| -> u64 { transmute(v) } + }; +} + +macro_rules! to128 { + ($t : ident) => { + |v: $t| -> u128 { transmute(v) } + }; +} + +pub(crate) fn test( + vals: Vec, + fill1: fn(T) -> V, + fill2: fn(U) -> W, + cast: fn(W) -> X, + test_fun: fn(V, V) -> W, + verify_fun: fn(T, T) -> U, +) where + T: Copy + core::fmt::Debug + std::cmp::PartialEq, + U: Copy + core::fmt::Debug + std::cmp::PartialEq, + V: Copy + core::fmt::Debug, + W: Copy + core::fmt::Debug, + X: Copy + core::fmt::Debug + std::cmp::PartialEq, +{ + let pairs = vals.iter().zip(vals.iter()); + + for (i, j) in pairs { + let a: V = fill1(*i); + let b: V = fill1(*j); + + let actual_pre: W = test_fun(a, b); + let expected_pre: W = fill2(verify_fun(*i, *j)); + + let actual: X = cast(actual_pre); + let expected: X = cast(expected_pre); + + assert_eq!( + actual, expected, + "[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n", + *i, *j, &a, &b, actual_pre, &a, &b, expected_pre + ); + } +} + +macro_rules! gen_test_fn { + ($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => { + pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) { + unsafe { + test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun) + }; + } + }; +} + +macro_rules! gen_fill_fn { + ($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => { + pub(crate) fn $id(val: $in_t) -> $out_t { + let initial: [$in_t; $num_els] = [val; $num_els]; + let result: $cmp_t = unsafe { transmute(initial) }; + let result_out: $out_t = unsafe { transmute(result) }; + + // println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits()); + + result_out + } + }; +} + +gen_fill_fn!(fill_u8, 8, 8, u8, uint8x8_t, u64); +gen_fill_fn!(fill_s8, 8, 8, i8, int8x8_t, u64); +gen_fill_fn!(fillq_u8, 8, 16, u8, uint8x16_t, u128); +gen_fill_fn!(fillq_s8, 8, 16, i8, int8x16_t, u128); + +gen_fill_fn!(fill_u16, 16, 4, u16, uint16x4_t, u64); +gen_fill_fn!(fill_s16, 16, 4, i16, int16x4_t, u64); +gen_fill_fn!(fillq_u16, 16, 8, u16, uint16x8_t, u128); +gen_fill_fn!(fillq_s16, 16, 8, i16, int16x8_t, u128); + +gen_fill_fn!(fill_u32, 32, 2, u32, uint32x2_t, u64); +gen_fill_fn!(fill_s32, 32, 2, i32, int32x2_t, u64); +gen_fill_fn!(fillq_u32, 32, 4, u32, uint32x4_t, u128); +gen_fill_fn!(fillq_s32, 32, 4, i32, int32x4_t, u128); + +gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64); +gen_fill_fn!(fill_s64, 64, 1, i64, int64x1_t, u64); +gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128); +gen_fill_fn!(fillq_s64, 64, 2, i64, int64x2_t, u128); + +gen_fill_fn!(fill_f32, 32, 2, f32, float32x2_t, u64); +gen_fill_fn!(fillq_f32, 32, 4, f32, float32x4_t, u128); + +gen_test_fn!( + test_ari_u8, + u8, + u8, + uint8x8_t, + uint8x8_t, + u64, + V_u8!(), + fill_u8, + fill_u8, + to64!(uint8x8_t) +); +gen_test_fn!( + test_bit_u8, + u8, + u8, + uint8x8_t, + uint8x8_t, + u64, + V_u8!(), + fill_u8, + fill_u8, + to64!(uint8x8_t) +); +gen_test_fn!( + test_cmp_u8, + u8, + u8, + uint8x8_t, + uint8x8_t, + u64, + V_u8!(), + fill_u8, + fill_u8, + to64!(uint8x8_t) +); +gen_test_fn!( + testq_ari_u8, + u8, + u8, + uint8x16_t, + uint8x16_t, + u128, + V_u8!(), + fillq_u8, + fillq_u8, + to128!(uint8x16_t) +); +gen_test_fn!( + testq_bit_u8, + u8, + u8, + uint8x16_t, + uint8x16_t, + u128, + V_u8!(), + fillq_u8, + fillq_u8, + to128!(uint8x16_t) +); +gen_test_fn!( + testq_cmp_u8, + u8, + u8, + uint8x16_t, + uint8x16_t, + u128, + V_u8!(), + fillq_u8, + fillq_u8, + to128!(uint8x16_t) +); + +gen_test_fn!( + test_ari_s8, + i8, + i8, + int8x8_t, + int8x8_t, + u64, + V_i8!(), + fill_s8, + fill_s8, + to64!(int8x8_t) +); +gen_test_fn!( + test_bit_s8, + i8, + i8, + int8x8_t, + int8x8_t, + u64, + V_i8!(), + fill_s8, + fill_s8, + to64!(int8x8_t) +); +gen_test_fn!( + test_cmp_s8, + i8, + u8, + int8x8_t, + uint8x8_t, + u64, + V_i8!(), + fill_s8, + fill_u8, + to64!(uint8x8_t) +); +gen_test_fn!( + testq_ari_s8, + i8, + i8, + int8x16_t, + int8x16_t, + u128, + V_i8!(), + fillq_s8, + fillq_s8, + to128!(int8x16_t) +); +gen_test_fn!( + testq_bit_s8, + i8, + i8, + int8x16_t, + int8x16_t, + u128, + V_i8!(), + fillq_s8, + fillq_s8, + to128!(int8x16_t) +); +gen_test_fn!( + testq_cmp_s8, + i8, + u8, + int8x16_t, + uint8x16_t, + u128, + V_i8!(), + fillq_s8, + fillq_u8, + to128!(uint8x16_t) +); + +gen_test_fn!( + test_ari_u16, + u16, + u16, + uint16x4_t, + uint16x4_t, + u64, + V_u16!(), + fill_u16, + fill_u16, + to64!(uint16x4_t) +); +gen_test_fn!( + test_bit_u16, + u16, + u16, + uint16x4_t, + uint16x4_t, + u64, + V_u16!(), + fill_u16, + fill_u16, + to64!(uint16x4_t) +); +gen_test_fn!( + test_cmp_u16, + u16, + u16, + uint16x4_t, + uint16x4_t, + u64, + V_u16!(), + fill_u16, + fill_u16, + to64!(uint16x4_t) +); +gen_test_fn!( + testq_ari_u16, + u16, + u16, + uint16x8_t, + uint16x8_t, + u128, + V_u16!(), + fillq_u16, + fillq_u16, + to128!(uint16x8_t) +); +gen_test_fn!( + testq_bit_u16, + u16, + u16, + uint16x8_t, + uint16x8_t, + u128, + V_u16!(), + fillq_u16, + fillq_u16, + to128!(uint16x8_t) +); +gen_test_fn!( + testq_cmp_u16, + u16, + u16, + uint16x8_t, + uint16x8_t, + u128, + V_u16!(), + fillq_u16, + fillq_u16, + to128!(uint16x8_t) +); + +gen_test_fn!( + test_ari_s16, + i16, + i16, + int16x4_t, + int16x4_t, + u64, + V_i16!(), + fill_s16, + fill_s16, + to64!(int16x4_t) +); +gen_test_fn!( + test_bit_s16, + i16, + i16, + int16x4_t, + int16x4_t, + u64, + V_i16!(), + fill_s16, + fill_s16, + to64!(int16x4_t) +); +gen_test_fn!( + test_cmp_s16, + i16, + u16, + int16x4_t, + uint16x4_t, + u64, + V_i16!(), + fill_s16, + fill_u16, + to64!(uint16x4_t) +); +gen_test_fn!( + testq_ari_s16, + i16, + i16, + int16x8_t, + int16x8_t, + u128, + V_i16!(), + fillq_s16, + fillq_s16, + to128!(int16x8_t) +); +gen_test_fn!( + testq_bit_s16, + i16, + i16, + int16x8_t, + int16x8_t, + u128, + V_i16!(), + fillq_s16, + fillq_s16, + to128!(int16x8_t) +); +gen_test_fn!( + testq_cmp_s16, + i16, + u16, + int16x8_t, + uint16x8_t, + u128, + V_i16!(), + fillq_s16, + fillq_u16, + to128!(uint16x8_t) +); + +gen_test_fn!( + test_ari_u32, + u32, + u32, + uint32x2_t, + uint32x2_t, + u64, + V_u32!(), + fill_u32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + test_bit_u32, + u32, + u32, + uint32x2_t, + uint32x2_t, + u64, + V_u32!(), + fill_u32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + test_cmp_u32, + u32, + u32, + uint32x2_t, + uint32x2_t, + u64, + V_u32!(), + fill_u32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + testq_ari_u32, + u32, + u32, + uint32x4_t, + uint32x4_t, + u128, + V_u32!(), + fillq_u32, + fillq_u32, + to128!(uint32x4_t) +); +gen_test_fn!( + testq_bit_u32, + u32, + u32, + uint32x4_t, + uint32x4_t, + u128, + V_u32!(), + fillq_u32, + fillq_u32, + to128!(uint32x4_t) +); +gen_test_fn!( + testq_cmp_u32, + u32, + u32, + uint32x4_t, + uint32x4_t, + u128, + V_u32!(), + fillq_u32, + fillq_u32, + to128!(uint32x4_t) +); + +gen_test_fn!( + test_ari_s32, + i32, + i32, + int32x2_t, + int32x2_t, + u64, + V_i32!(), + fill_s32, + fill_s32, + to64!(int32x2_t) +); +gen_test_fn!( + test_bit_s32, + i32, + i32, + int32x2_t, + int32x2_t, + u64, + V_i32!(), + fill_s32, + fill_s32, + to64!(int32x2_t) +); +gen_test_fn!( + test_cmp_s32, + i32, + u32, + int32x2_t, + uint32x2_t, + u64, + V_i32!(), + fill_s32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + testq_ari_s32, + i32, + i32, + int32x4_t, + int32x4_t, + u128, + V_i32!(), + fillq_s32, + fillq_s32, + to128!(int32x4_t) +); +gen_test_fn!( + testq_bit_s32, + i32, + i32, + int32x4_t, + int32x4_t, + u128, + V_i32!(), + fillq_s32, + fillq_s32, + to128!(int32x4_t) +); +gen_test_fn!( + testq_cmp_s32, + i32, + u32, + int32x4_t, + uint32x4_t, + u128, + V_i32!(), + fillq_s32, + fillq_u32, + to128!(uint32x4_t) +); + +gen_test_fn!( + test_ari_u64, + u64, + u64, + uint64x1_t, + uint64x1_t, + u64, + V_u64!(), + fill_u64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + test_bit_u64, + u64, + u64, + uint64x1_t, + uint64x1_t, + u64, + V_u64!(), + fill_u64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + test_cmp_u64, + u64, + u64, + uint64x1_t, + uint64x1_t, + u64, + V_u64!(), + fill_u64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + testq_ari_u64, + u64, + u64, + uint64x2_t, + uint64x2_t, + u128, + V_u64!(), + fillq_u64, + fillq_u64, + to128!(uint64x2_t) +); +gen_test_fn!( + testq_bit_u64, + u64, + u64, + uint64x2_t, + uint64x2_t, + u128, + V_u64!(), + fillq_u64, + fillq_u64, + to128!(uint64x2_t) +); +gen_test_fn!( + testq_cmp_u64, + u64, + u64, + uint64x2_t, + uint64x2_t, + u128, + V_u64!(), + fillq_u64, + fillq_u64, + to128!(uint64x2_t) +); + +gen_test_fn!( + test_ari_s64, + i64, + i64, + int64x1_t, + int64x1_t, + u64, + V_i64!(), + fill_s64, + fill_s64, + to64!(int64x1_t) +); +gen_test_fn!( + test_bit_s64, + i64, + i64, + int64x1_t, + int64x1_t, + u64, + V_i64!(), + fill_s64, + fill_s64, + to64!(int64x1_t) +); +gen_test_fn!( + test_cmp_s64, + i64, + u64, + int64x1_t, + uint64x1_t, + u64, + V_i64!(), + fill_s64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + testq_ari_s64, + i64, + i64, + int64x2_t, + int64x2_t, + u128, + V_i64!(), + fillq_s64, + fillq_s64, + to128!(int64x2_t) +); +gen_test_fn!( + testq_bit_s64, + i64, + i64, + int64x2_t, + int64x2_t, + u128, + V_i64!(), + fillq_s64, + fillq_s64, + to128!(int64x2_t) +); +gen_test_fn!( + testq_cmp_s64, + i64, + u64, + int64x2_t, + uint64x2_t, + u128, + V_i64!(), + fillq_s64, + fillq_u64, + to128!(uint64x2_t) +); + +gen_test_fn!( + test_ari_f32, + f32, + f32, + float32x2_t, + float32x2_t, + u64, + V_f32!(), + fill_f32, + fill_f32, + to64!(float32x2_t) +); +gen_test_fn!( + test_cmp_f32, + f32, + u32, + float32x2_t, + uint32x2_t, + u64, + V_f32!(), + fill_f32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + testq_ari_f32, + f32, + f32, + float32x4_t, + float32x4_t, + u128, + V_f32!(), + fillq_f32, + fillq_f32, + to128!(float32x4_t) +); +gen_test_fn!( + testq_cmp_f32, + f32, + u32, + float32x4_t, + uint32x4_t, + u128, + V_f32!(), + fillq_f32, + fillq_u32, + to128!(uint32x4_t) +); diff --git a/library/stdarch/crates/core_arch/src/core_arch_docs.md b/library/stdarch/crates/core_arch/src/core_arch_docs.md new file mode 100644 index 000000000000..bfa1b7228860 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/core_arch_docs.md @@ -0,0 +1,350 @@ +SIMD and vendor intrinsics module. + +This module is intended to be the gateway to architecture-specific +intrinsic functions, typically related to SIMD (but not always!). Each +architecture that Rust compiles to may contain a submodule here, which +means that this is not a portable module! If you're writing a portable +library take care when using these APIs! + +Under this module you'll find an architecture-named module, such as +`x86_64`. Each `#[cfg(target_arch)]` that Rust can compile to may have a +module entry here, only present on that particular target. For example the +`i686-pc-windows-msvc` target will have an `x86` module here, whereas +`x86_64-pc-windows-msvc` has `x86_64`. + +[rfc]: https://github.com/rust-lang/rfcs/pull/2325 +[tracked]: https://github.com/rust-lang/rust/issues/48556 + +# Overview + +This module exposes vendor-specific intrinsics that typically correspond to +a single machine instruction. These intrinsics are not portable: their +availability is architecture-dependent, and not all machines of that +architecture might provide the intrinsic. + +The `arch` module is intended to be a low-level implementation detail for +higher-level APIs. Using it correctly can be quite tricky as you need to +ensure at least a few guarantees are upheld: + +* The correct architecture's module is used. For example the `arm` module + isn't available on the `x86_64-unknown-linux-gnu` target. This is + typically done by ensuring that `#[cfg]` is used appropriately when using + this module. +* The CPU the program is currently running on supports the function being + called. For example it is unsafe to call an AVX2 function on a CPU that + doesn't actually support AVX2. + +As a result of the latter of these guarantees all intrinsics in this module +are `unsafe` and extra care needs to be taken when calling them! + +# CPU Feature Detection + +In order to call these APIs in a safe fashion there's a number of +mechanisms available to ensure that the correct CPU feature is available +to call an intrinsic. Let's consider, for example, the `_mm256_add_epi64` +intrinsics on the `x86` and `x86_64` architectures. This function requires +the AVX2 feature as [documented by Intel][intel-dox] so to correctly call +this function we need to (a) guarantee we only call it on `x86`/`x86_64` +and (b) ensure that the CPU feature is available + +[intel-dox]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64&expand=100 + +## Static CPU Feature Detection + +The first option available to us is to conditionally compile code via the +`#[cfg]` attribute. CPU features correspond to the `target_feature` cfg +available, and can be used like so: + +```ignore +#[cfg( + all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx2" + ) +)] +fn foo() { + #[cfg(target_arch = "x86")] + use std::arch::x86::_mm256_add_epi64; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::_mm256_add_epi64; + + unsafe { + _mm256_add_epi64(...); + } +} +``` + +Here we're using `#[cfg(target_feature = "avx2")]` to conditionally compile +this function into our module. This means that if the `avx2` feature is +*enabled statically* then we'll use the `_mm256_add_epi64` function at +runtime. The `unsafe` block here can be justified through the usage of +`#[cfg]` to only compile the code in situations where the safety guarantees +are upheld. + +Statically enabling a feature is typically done with the `-C +target-feature` or `-C target-cpu` flags to the compiler. For example if +your local CPU supports AVX2 then you can compile the above function with: + +```sh +$ RUSTFLAGS='-C target-cpu=native' cargo build +``` + +Or otherwise you can specifically enable just the AVX2 feature: + +```sh +$ RUSTFLAGS='-C target-feature=+avx2' cargo build +``` + +Note that when you compile a binary with a particular feature enabled it's +important to ensure that you only run the binary on systems which satisfy +the required feature set. + +## Dynamic CPU Feature Detection + +Sometimes statically dispatching isn't quite what you want. Instead you +might want to build a portable binary that runs across a variety of CPUs, +but at runtime it selects the most optimized implementation available. This +allows you to build a "least common denominator" binary which has certain +sections more optimized for different CPUs. + +Taking our previous example from before, we're going to compile our binary +*without* AVX2 support, but we'd like to enable it for just one function. +We can do that in a manner like: + +```ignore +fn foo() { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("avx2") { + return unsafe { foo_avx2() }; + } + } + + // fallback implementation without using AVX2 +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "avx2")] +unsafe fn foo_avx2() { + #[cfg(target_arch = "x86")] + use std::arch::x86::_mm256_add_epi64; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::_mm256_add_epi64; + + unsafe { _mm256_add_epi64(...); } +} +``` + +There's a couple of components in play here, so let's go through them in +detail! + +* First up we notice the `is_x86_feature_detected!` macro. Provided by + the standard library, this macro will perform necessary runtime detection + to determine whether the CPU the program is running on supports the + specified feature. In this case the macro will expand to a boolean + expression evaluating to whether the local CPU has the AVX2 feature or + not. + + Note that this macro, like the `arch` module, is platform-specific. For + example calling `is_x86_feature_detected!("avx2")` on ARM will be a + compile time error. To ensure we don't hit this error a statement level + `#[cfg]` is used to only compile usage of the macro on `x86`/`x86_64`. + +* Next up we see our AVX2-enabled function, `foo_avx2`. This function is + decorated with the `#[target_feature]` attribute which enables a CPU + feature for just this one function. Using a compiler flag like `-C + target-feature=+avx2` will enable AVX2 for the entire program, but using + an attribute will only enable it for the one function. Usage of the + `#[target_feature]` attribute currently requires the function to also be + `unsafe`, as we see here. This is because the function can only be + correctly called on systems which have the AVX2 (like the intrinsics + themselves). + +And with all that we should have a working program! This program will run +across all machines and it'll use the optimized AVX2 implementation on +machines where support is detected. + +# Ergonomics + +It's important to note that using the `arch` module is not the easiest +thing in the world, so if you're curious to try it out you may want to +brace yourself for some wordiness! + +The primary purpose of this module is to enable stable crates on crates.io +to build up much more ergonomic abstractions which end up using SIMD under +the hood. Over time these abstractions may also move into the standard +library itself, but for now this module is tasked with providing the bare +minimum necessary to use vendor intrinsics on stable Rust. + +# Other architectures + +This documentation is only for one particular architecture, you can find +others at: + +* [`x86`] +* [`x86_64`] +* [`arm`] +* [`aarch64`] +* [`riscv32`] +* [`riscv64`] +* [`mips`] +* [`mips64`] +* [`powerpc`] +* [`powerpc64`] +* [`nvptx`] +* [`wasm32`] +* [`loongarch64`] +* [`s390x`] + +[`x86`]: ../../core/arch/x86/index.html +[`x86_64`]: ../../core/arch/x86_64/index.html +[`arm`]: ../../core/arch/arm/index.html +[`aarch64`]: ../../core/arch/aarch64/index.html +[`riscv32`]: ../../core/arch/riscv32/index.html +[`riscv64`]: ../../core/arch/riscv64/index.html +[`mips`]: ../../core/arch/mips/index.html +[`mips64`]: ../../core/arch/mips64/index.html +[`powerpc`]: ../../core/arch/powerpc/index.html +[`powerpc64`]: ../../core/arch/powerpc64/index.html +[`nvptx`]: ../../core/arch/nvptx/index.html +[`wasm32`]: ../../core/arch/wasm32/index.html +[`loongarch64`]: ../../core/arch/loongarch64/index.html +[`s390x`]: ../../core/arch/s390x/index.html + +# Examples + +First let's take a look at not actually using any intrinsics but instead +using LLVM's auto-vectorization to produce optimized vectorized code for +AVX2 and also for the default platform. + +```rust +fn main() { + let mut dst = [0]; + add_quickly(&[1], &[2], &mut dst); + assert_eq!(dst[0], 3); +} + +fn add_quickly(a: &[u8], b: &[u8], c: &mut [u8]) { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // Note that this `unsafe` block is safe because we're testing + // that the `avx2` feature is indeed available on our CPU. + if is_x86_feature_detected!("avx2") { + return unsafe { add_quickly_avx2(a, b, c) }; + } + } + + add_quickly_fallback(a, b, c) +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "avx2")] +unsafe fn add_quickly_avx2(a: &[u8], b: &[u8], c: &mut [u8]) { + add_quickly_fallback(a, b, c) // the function below is inlined here +} + +fn add_quickly_fallback(a: &[u8], b: &[u8], c: &mut [u8]) { + for ((a, b), c) in a.iter().zip(b).zip(c) { + *c = *a + *b; + } +} +``` + +Next up let's take a look at an example of manually using intrinsics. Here +we'll be using SSE4.1 features to implement hex encoding. + +``` +fn main() { + let mut dst = [0; 32]; + hex_encode(b"\x01\x02\x03", &mut dst); + assert_eq!(&dst[..6], b"010203"); + + let mut src = [0; 16]; + for i in 0..16 { + src[i] = (i + 1) as u8; + } + hex_encode(&src, &mut dst); + assert_eq!(&dst, b"0102030405060708090a0b0c0d0e0f10"); +} + +pub fn hex_encode(src: &[u8], dst: &mut [u8]) { + let len = src.len().checked_mul(2).unwrap(); + assert!(dst.len() >= len); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse4.1") { + return unsafe { hex_encode_sse41(src, dst) }; + } + } + + hex_encode_fallback(src, dst) +} + +// translated from +// +#[target_feature(enable = "sse4.1")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +unsafe fn hex_encode_sse41(mut src: &[u8], dst: &mut [u8]) { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + unsafe { + let ascii_zero = _mm_set1_epi8(b'0' as i8); + let nines = _mm_set1_epi8(9); + let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8); + let and4bits = _mm_set1_epi8(0xf); + + let mut i = 0_isize; + while src.len() >= 16 { + let invec = _mm_loadu_si128(src.as_ptr() as *const _); + + let masked1 = _mm_and_si128(invec, and4bits); + let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits); + + // return 0xff corresponding to the elements > 9, or 0x00 otherwise + let cmpmask1 = _mm_cmpgt_epi8(masked1, nines); + let cmpmask2 = _mm_cmpgt_epi8(masked2, nines); + + // add '0' or the offset depending on the masks + let masked1 = _mm_add_epi8( + masked1, + _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1), + ); + let masked2 = _mm_add_epi8( + masked2, + _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2), + ); + + // interleave masked1 and masked2 bytes + let res1 = _mm_unpacklo_epi8(masked2, masked1); + let res2 = _mm_unpackhi_epi8(masked2, masked1); + + _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1); + _mm_storeu_si128( + dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, + res2, + ); + src = &src[16..]; + i += 16; + } + + let i = i as usize; + hex_encode_fallback(src, &mut dst[i * 2..]); + } +} + +fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) { + fn hex(byte: u8) -> u8 { + static TABLE: &[u8] = b"0123456789abcdef"; + TABLE[byte as usize] + } + + for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) { + slots[0] = hex((*byte >> 4) & 0xf); + slots[1] = hex(*byte & 0xf); + } +} +``` diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs new file mode 100644 index 000000000000..340c4c510d78 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/lib.rs @@ -0,0 +1,94 @@ +#![doc = include_str!("core_arch_docs.md")] +#![allow(improper_ctypes_definitions)] +#![allow(dead_code)] +#![allow(unused_features)] +#![allow(internal_features)] +#![allow(unsafe_op_in_unsafe_fn)] +#![deny(rust_2018_idioms)] +#![feature( + custom_inner_attributes, + link_llvm_intrinsics, + repr_simd, + simd_ffi, + proc_macro_hygiene, + stmt_expr_attributes, + core_intrinsics, + no_core, + fmt_helpers_for_derive, + rustc_attrs, + staged_api, + doc_cfg, + tbm_target_feature, + sse4a_target_feature, + riscv_target_feature, + arm_target_feature, + mips_target_feature, + powerpc_target_feature, + s390x_target_feature, + loongarch_target_feature, + wasm_target_feature, + abi_unadjusted, + rtm_target_feature, + allow_internal_unstable, + decl_macro, + asm_experimental_arch, + x86_amx_intrinsics, + f16, + aarch64_unstable_target_feature, + bigint_helper_methods +)] +#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))] +#![deny(clippy::missing_inline_in_public_items)] +#![allow( + clippy::identity_op, + clippy::inline_always, + clippy::too_many_arguments, + clippy::cast_sign_loss, + clippy::cast_lossless, + clippy::cast_possible_wrap, + clippy::cast_possible_truncation, + clippy::cast_precision_loss, + clippy::cognitive_complexity, + clippy::many_single_char_names, + clippy::missing_safety_doc, + clippy::shadow_reuse, + clippy::similar_names, + clippy::unusual_byte_groupings, + clippy::wrong_self_convention +)] +#![cfg_attr(test, allow(unused_imports))] +#![no_std] +#![stable(feature = "stdsimd", since = "1.27.0")] +#![doc( + test(attr(deny(warnings))), + test(attr(allow(dead_code, deprecated, unused_variables, unused_mut))) +)] +#![cfg_attr( + test, + feature( + stdarch_arm_feature_detection, + stdarch_powerpc_feature_detection, + stdarch_s390x_feature_detection + ) +)] + +#[cfg(test)] +#[macro_use] +extern crate std; +#[cfg(test)] +#[macro_use] +extern crate std_detect; +#[path = "mod.rs"] +mod core_arch; + +#[stable(feature = "stdsimd", since = "1.27.0")] +pub mod arch { + #[stable(feature = "stdsimd", since = "1.27.0")] + #[allow(unused_imports)] + pub use crate::core_arch::arch::*; + #[stable(feature = "stdsimd", since = "1.27.0")] + pub use core::arch::asm; +} + +#[allow(unused_imports)] +use core::{array, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync}; diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs new file mode 100644 index 000000000000..2e56d8fb9b83 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -0,0 +1,7063 @@ +// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen-loongarch/lasx.spec` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasx.spec +// ``` + +use super::types::*; + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.loongarch.lasx.xvsll.b"] + fn __lasx_xvsll_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsll.h"] + fn __lasx_xvsll_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsll.w"] + fn __lasx_xvsll_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsll.d"] + fn __lasx_xvsll_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvslli.b"] + fn __lasx_xvslli_b(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvslli.h"] + fn __lasx_xvslli_h(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvslli.w"] + fn __lasx_xvslli_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvslli.d"] + fn __lasx_xvslli_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsra.b"] + fn __lasx_xvsra_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsra.h"] + fn __lasx_xvsra_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsra.w"] + fn __lasx_xvsra_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsra.d"] + fn __lasx_xvsra_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrai.b"] + fn __lasx_xvsrai_b(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrai.h"] + fn __lasx_xvsrai_h(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrai.w"] + fn __lasx_xvsrai_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrai.d"] + fn __lasx_xvsrai_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrar.b"] + fn __lasx_xvsrar_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrar.h"] + fn __lasx_xvsrar_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrar.w"] + fn __lasx_xvsrar_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrar.d"] + fn __lasx_xvsrar_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrari.b"] + fn __lasx_xvsrari_b(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrari.h"] + fn __lasx_xvsrari_h(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrari.w"] + fn __lasx_xvsrari_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrari.d"] + fn __lasx_xvsrari_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrl.b"] + fn __lasx_xvsrl_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrl.h"] + fn __lasx_xvsrl_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrl.w"] + fn __lasx_xvsrl_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrl.d"] + fn __lasx_xvsrl_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrli.b"] + fn __lasx_xvsrli_b(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrli.h"] + fn __lasx_xvsrli_h(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrli.w"] + fn __lasx_xvsrli_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrli.d"] + fn __lasx_xvsrli_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrlr.b"] + fn __lasx_xvsrlr_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrlr.h"] + fn __lasx_xvsrlr_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrlr.w"] + fn __lasx_xvsrlr_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrlr.d"] + fn __lasx_xvsrlr_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrlri.b"] + fn __lasx_xvsrlri_b(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrlri.h"] + fn __lasx_xvsrlri_h(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrlri.w"] + fn __lasx_xvsrlri_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrlri.d"] + fn __lasx_xvsrlri_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvbitclr.b"] + fn __lasx_xvbitclr_b(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvbitclr.h"] + fn __lasx_xvbitclr_h(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvbitclr.w"] + fn __lasx_xvbitclr_w(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvbitclr.d"] + fn __lasx_xvbitclr_d(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvbitclri.b"] + fn __lasx_xvbitclri_b(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvbitclri.h"] + fn __lasx_xvbitclri_h(a: v16u16, b: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvbitclri.w"] + fn __lasx_xvbitclri_w(a: v8u32, b: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvbitclri.d"] + fn __lasx_xvbitclri_d(a: v4u64, b: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvbitset.b"] + fn __lasx_xvbitset_b(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvbitset.h"] + fn __lasx_xvbitset_h(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvbitset.w"] + fn __lasx_xvbitset_w(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvbitset.d"] + fn __lasx_xvbitset_d(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvbitseti.b"] + fn __lasx_xvbitseti_b(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvbitseti.h"] + fn __lasx_xvbitseti_h(a: v16u16, b: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvbitseti.w"] + fn __lasx_xvbitseti_w(a: v8u32, b: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvbitseti.d"] + fn __lasx_xvbitseti_d(a: v4u64, b: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvbitrev.b"] + fn __lasx_xvbitrev_b(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvbitrev.h"] + fn __lasx_xvbitrev_h(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvbitrev.w"] + fn __lasx_xvbitrev_w(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvbitrev.d"] + fn __lasx_xvbitrev_d(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvbitrevi.b"] + fn __lasx_xvbitrevi_b(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvbitrevi.h"] + fn __lasx_xvbitrevi_h(a: v16u16, b: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvbitrevi.w"] + fn __lasx_xvbitrevi_w(a: v8u32, b: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvbitrevi.d"] + fn __lasx_xvbitrevi_d(a: v4u64, b: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvadd.b"] + fn __lasx_xvadd_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvadd.h"] + fn __lasx_xvadd_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvadd.w"] + fn __lasx_xvadd_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvadd.d"] + fn __lasx_xvadd_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddi.bu"] + fn __lasx_xvaddi_bu(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvaddi.hu"] + fn __lasx_xvaddi_hu(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvaddi.wu"] + fn __lasx_xvaddi_wu(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvaddi.du"] + fn __lasx_xvaddi_du(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsub.b"] + fn __lasx_xvsub_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsub.h"] + fn __lasx_xvsub_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsub.w"] + fn __lasx_xvsub_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsub.d"] + fn __lasx_xvsub_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsubi.bu"] + fn __lasx_xvsubi_bu(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsubi.hu"] + fn __lasx_xvsubi_hu(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsubi.wu"] + fn __lasx_xvsubi_wu(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsubi.du"] + fn __lasx_xvsubi_du(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmax.b"] + fn __lasx_xvmax_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmax.h"] + fn __lasx_xvmax_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmax.w"] + fn __lasx_xvmax_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmax.d"] + fn __lasx_xvmax_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmaxi.b"] + fn __lasx_xvmaxi_b(a: v32i8, b: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmaxi.h"] + fn __lasx_xvmaxi_h(a: v16i16, b: i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmaxi.w"] + fn __lasx_xvmaxi_w(a: v8i32, b: i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmaxi.d"] + fn __lasx_xvmaxi_d(a: v4i64, b: i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmax.bu"] + fn __lasx_xvmax_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvmax.hu"] + fn __lasx_xvmax_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvmax.wu"] + fn __lasx_xvmax_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvmax.du"] + fn __lasx_xvmax_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvmaxi.bu"] + fn __lasx_xvmaxi_bu(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvmaxi.hu"] + fn __lasx_xvmaxi_hu(a: v16u16, b: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvmaxi.wu"] + fn __lasx_xvmaxi_wu(a: v8u32, b: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvmaxi.du"] + fn __lasx_xvmaxi_du(a: v4u64, b: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvmin.b"] + fn __lasx_xvmin_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmin.h"] + fn __lasx_xvmin_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmin.w"] + fn __lasx_xvmin_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmin.d"] + fn __lasx_xvmin_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmini.b"] + fn __lasx_xvmini_b(a: v32i8, b: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmini.h"] + fn __lasx_xvmini_h(a: v16i16, b: i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmini.w"] + fn __lasx_xvmini_w(a: v8i32, b: i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmini.d"] + fn __lasx_xvmini_d(a: v4i64, b: i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmin.bu"] + fn __lasx_xvmin_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvmin.hu"] + fn __lasx_xvmin_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvmin.wu"] + fn __lasx_xvmin_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvmin.du"] + fn __lasx_xvmin_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvmini.bu"] + fn __lasx_xvmini_bu(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvmini.hu"] + fn __lasx_xvmini_hu(a: v16u16, b: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvmini.wu"] + fn __lasx_xvmini_wu(a: v8u32, b: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvmini.du"] + fn __lasx_xvmini_du(a: v4u64, b: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvseq.b"] + fn __lasx_xvseq_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvseq.h"] + fn __lasx_xvseq_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvseq.w"] + fn __lasx_xvseq_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvseq.d"] + fn __lasx_xvseq_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvseqi.b"] + fn __lasx_xvseqi_b(a: v32i8, b: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvseqi.h"] + fn __lasx_xvseqi_h(a: v16i16, b: i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvseqi.w"] + fn __lasx_xvseqi_w(a: v8i32, b: i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvseqi.d"] + fn __lasx_xvseqi_d(a: v4i64, b: i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvslt.b"] + fn __lasx_xvslt_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvslt.h"] + fn __lasx_xvslt_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvslt.w"] + fn __lasx_xvslt_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvslt.d"] + fn __lasx_xvslt_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvslti.b"] + fn __lasx_xvslti_b(a: v32i8, b: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvslti.h"] + fn __lasx_xvslti_h(a: v16i16, b: i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvslti.w"] + fn __lasx_xvslti_w(a: v8i32, b: i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvslti.d"] + fn __lasx_xvslti_d(a: v4i64, b: i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvslt.bu"] + fn __lasx_xvslt_bu(a: v32u8, b: v32u8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvslt.hu"] + fn __lasx_xvslt_hu(a: v16u16, b: v16u16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvslt.wu"] + fn __lasx_xvslt_wu(a: v8u32, b: v8u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvslt.du"] + fn __lasx_xvslt_du(a: v4u64, b: v4u64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvslti.bu"] + fn __lasx_xvslti_bu(a: v32u8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvslti.hu"] + fn __lasx_xvslti_hu(a: v16u16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvslti.wu"] + fn __lasx_xvslti_wu(a: v8u32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvslti.du"] + fn __lasx_xvslti_du(a: v4u64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsle.b"] + fn __lasx_xvsle_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsle.h"] + fn __lasx_xvsle_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsle.w"] + fn __lasx_xvsle_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsle.d"] + fn __lasx_xvsle_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvslei.b"] + fn __lasx_xvslei_b(a: v32i8, b: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvslei.h"] + fn __lasx_xvslei_h(a: v16i16, b: i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvslei.w"] + fn __lasx_xvslei_w(a: v8i32, b: i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvslei.d"] + fn __lasx_xvslei_d(a: v4i64, b: i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsle.bu"] + fn __lasx_xvsle_bu(a: v32u8, b: v32u8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsle.hu"] + fn __lasx_xvsle_hu(a: v16u16, b: v16u16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsle.wu"] + fn __lasx_xvsle_wu(a: v8u32, b: v8u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsle.du"] + fn __lasx_xvsle_du(a: v4u64, b: v4u64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvslei.bu"] + fn __lasx_xvslei_bu(a: v32u8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvslei.hu"] + fn __lasx_xvslei_hu(a: v16u16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvslei.wu"] + fn __lasx_xvslei_wu(a: v8u32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvslei.du"] + fn __lasx_xvslei_du(a: v4u64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsat.b"] + fn __lasx_xvsat_b(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsat.h"] + fn __lasx_xvsat_h(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsat.w"] + fn __lasx_xvsat_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsat.d"] + fn __lasx_xvsat_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsat.bu"] + fn __lasx_xvsat_bu(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvsat.hu"] + fn __lasx_xvsat_hu(a: v16u16, b: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvsat.wu"] + fn __lasx_xvsat_wu(a: v8u32, b: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvsat.du"] + fn __lasx_xvsat_du(a: v4u64, b: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvadda.b"] + fn __lasx_xvadda_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvadda.h"] + fn __lasx_xvadda_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvadda.w"] + fn __lasx_xvadda_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvadda.d"] + fn __lasx_xvadda_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsadd.b"] + fn __lasx_xvsadd_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsadd.h"] + fn __lasx_xvsadd_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsadd.w"] + fn __lasx_xvsadd_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsadd.d"] + fn __lasx_xvsadd_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsadd.bu"] + fn __lasx_xvsadd_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvsadd.hu"] + fn __lasx_xvsadd_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvsadd.wu"] + fn __lasx_xvsadd_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvsadd.du"] + fn __lasx_xvsadd_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvavg.b"] + fn __lasx_xvavg_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvavg.h"] + fn __lasx_xvavg_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvavg.w"] + fn __lasx_xvavg_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvavg.d"] + fn __lasx_xvavg_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvavg.bu"] + fn __lasx_xvavg_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvavg.hu"] + fn __lasx_xvavg_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvavg.wu"] + fn __lasx_xvavg_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvavg.du"] + fn __lasx_xvavg_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvavgr.b"] + fn __lasx_xvavgr_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvavgr.h"] + fn __lasx_xvavgr_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvavgr.w"] + fn __lasx_xvavgr_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvavgr.d"] + fn __lasx_xvavgr_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvavgr.bu"] + fn __lasx_xvavgr_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvavgr.hu"] + fn __lasx_xvavgr_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvavgr.wu"] + fn __lasx_xvavgr_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvavgr.du"] + fn __lasx_xvavgr_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvssub.b"] + fn __lasx_xvssub_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvssub.h"] + fn __lasx_xvssub_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvssub.w"] + fn __lasx_xvssub_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssub.d"] + fn __lasx_xvssub_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvssub.bu"] + fn __lasx_xvssub_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvssub.hu"] + fn __lasx_xvssub_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvssub.wu"] + fn __lasx_xvssub_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvssub.du"] + fn __lasx_xvssub_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvabsd.b"] + fn __lasx_xvabsd_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvabsd.h"] + fn __lasx_xvabsd_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvabsd.w"] + fn __lasx_xvabsd_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvabsd.d"] + fn __lasx_xvabsd_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvabsd.bu"] + fn __lasx_xvabsd_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvabsd.hu"] + fn __lasx_xvabsd_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvabsd.wu"] + fn __lasx_xvabsd_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvabsd.du"] + fn __lasx_xvabsd_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvmul.b"] + fn __lasx_xvmul_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmul.h"] + fn __lasx_xvmul_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmul.w"] + fn __lasx_xvmul_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmul.d"] + fn __lasx_xvmul_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmadd.b"] + fn __lasx_xvmadd_b(a: v32i8, b: v32i8, c: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmadd.h"] + fn __lasx_xvmadd_h(a: v16i16, b: v16i16, c: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmadd.w"] + fn __lasx_xvmadd_w(a: v8i32, b: v8i32, c: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmadd.d"] + fn __lasx_xvmadd_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmsub.b"] + fn __lasx_xvmsub_b(a: v32i8, b: v32i8, c: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmsub.h"] + fn __lasx_xvmsub_h(a: v16i16, b: v16i16, c: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmsub.w"] + fn __lasx_xvmsub_w(a: v8i32, b: v8i32, c: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmsub.d"] + fn __lasx_xvmsub_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvdiv.b"] + fn __lasx_xvdiv_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvdiv.h"] + fn __lasx_xvdiv_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvdiv.w"] + fn __lasx_xvdiv_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvdiv.d"] + fn __lasx_xvdiv_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvdiv.bu"] + fn __lasx_xvdiv_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvdiv.hu"] + fn __lasx_xvdiv_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvdiv.wu"] + fn __lasx_xvdiv_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvdiv.du"] + fn __lasx_xvdiv_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvhaddw.h.b"] + fn __lasx_xvhaddw_h_b(a: v32i8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvhaddw.w.h"] + fn __lasx_xvhaddw_w_h(a: v16i16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvhaddw.d.w"] + fn __lasx_xvhaddw_d_w(a: v8i32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvhaddw.hu.bu"] + fn __lasx_xvhaddw_hu_bu(a: v32u8, b: v32u8) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvhaddw.wu.hu"] + fn __lasx_xvhaddw_wu_hu(a: v16u16, b: v16u16) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvhaddw.du.wu"] + fn __lasx_xvhaddw_du_wu(a: v8u32, b: v8u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvhsubw.h.b"] + fn __lasx_xvhsubw_h_b(a: v32i8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvhsubw.w.h"] + fn __lasx_xvhsubw_w_h(a: v16i16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvhsubw.d.w"] + fn __lasx_xvhsubw_d_w(a: v8i32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvhsubw.hu.bu"] + fn __lasx_xvhsubw_hu_bu(a: v32u8, b: v32u8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvhsubw.wu.hu"] + fn __lasx_xvhsubw_wu_hu(a: v16u16, b: v16u16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvhsubw.du.wu"] + fn __lasx_xvhsubw_du_wu(a: v8u32, b: v8u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmod.b"] + fn __lasx_xvmod_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmod.h"] + fn __lasx_xvmod_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmod.w"] + fn __lasx_xvmod_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmod.d"] + fn __lasx_xvmod_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmod.bu"] + fn __lasx_xvmod_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvmod.hu"] + fn __lasx_xvmod_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvmod.wu"] + fn __lasx_xvmod_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvmod.du"] + fn __lasx_xvmod_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvrepl128vei.b"] + fn __lasx_xvrepl128vei_b(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvrepl128vei.h"] + fn __lasx_xvrepl128vei_h(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvrepl128vei.w"] + fn __lasx_xvrepl128vei_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvrepl128vei.d"] + fn __lasx_xvrepl128vei_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvpickev.b"] + fn __lasx_xvpickev_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvpickev.h"] + fn __lasx_xvpickev_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvpickev.w"] + fn __lasx_xvpickev_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvpickev.d"] + fn __lasx_xvpickev_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvpickod.b"] + fn __lasx_xvpickod_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvpickod.h"] + fn __lasx_xvpickod_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvpickod.w"] + fn __lasx_xvpickod_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvpickod.d"] + fn __lasx_xvpickod_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvilvh.b"] + fn __lasx_xvilvh_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvilvh.h"] + fn __lasx_xvilvh_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvilvh.w"] + fn __lasx_xvilvh_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvilvh.d"] + fn __lasx_xvilvh_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvilvl.b"] + fn __lasx_xvilvl_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvilvl.h"] + fn __lasx_xvilvl_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvilvl.w"] + fn __lasx_xvilvl_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvilvl.d"] + fn __lasx_xvilvl_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvpackev.b"] + fn __lasx_xvpackev_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvpackev.h"] + fn __lasx_xvpackev_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvpackev.w"] + fn __lasx_xvpackev_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvpackev.d"] + fn __lasx_xvpackev_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvpackod.b"] + fn __lasx_xvpackod_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvpackod.h"] + fn __lasx_xvpackod_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvpackod.w"] + fn __lasx_xvpackod_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvpackod.d"] + fn __lasx_xvpackod_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvshuf.b"] + fn __lasx_xvshuf_b(a: v32i8, b: v32i8, c: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvshuf.h"] + fn __lasx_xvshuf_h(a: v16i16, b: v16i16, c: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvshuf.w"] + fn __lasx_xvshuf_w(a: v8i32, b: v8i32, c: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvshuf.d"] + fn __lasx_xvshuf_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvand.v"] + fn __lasx_xvand_v(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvandi.b"] + fn __lasx_xvandi_b(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvor.v"] + fn __lasx_xvor_v(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvori.b"] + fn __lasx_xvori_b(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvnor.v"] + fn __lasx_xvnor_v(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvnori.b"] + fn __lasx_xvnori_b(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvxor.v"] + fn __lasx_xvxor_v(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvxori.b"] + fn __lasx_xvxori_b(a: v32u8, b: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvbitsel.v"] + fn __lasx_xvbitsel_v(a: v32u8, b: v32u8, c: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvbitseli.b"] + fn __lasx_xvbitseli_b(a: v32u8, b: v32u8, c: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvshuf4i.b"] + fn __lasx_xvshuf4i_b(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvshuf4i.h"] + fn __lasx_xvshuf4i_h(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvshuf4i.w"] + fn __lasx_xvshuf4i_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvreplgr2vr.b"] + fn __lasx_xvreplgr2vr_b(a: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvreplgr2vr.h"] + fn __lasx_xvreplgr2vr_h(a: i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvreplgr2vr.w"] + fn __lasx_xvreplgr2vr_w(a: i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvreplgr2vr.d"] + fn __lasx_xvreplgr2vr_d(a: i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvpcnt.b"] + fn __lasx_xvpcnt_b(a: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvpcnt.h"] + fn __lasx_xvpcnt_h(a: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvpcnt.w"] + fn __lasx_xvpcnt_w(a: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvpcnt.d"] + fn __lasx_xvpcnt_d(a: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvclo.b"] + fn __lasx_xvclo_b(a: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvclo.h"] + fn __lasx_xvclo_h(a: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvclo.w"] + fn __lasx_xvclo_w(a: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvclo.d"] + fn __lasx_xvclo_d(a: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvclz.b"] + fn __lasx_xvclz_b(a: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvclz.h"] + fn __lasx_xvclz_h(a: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvclz.w"] + fn __lasx_xvclz_w(a: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvclz.d"] + fn __lasx_xvclz_d(a: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfadd.s"] + fn __lasx_xvfadd_s(a: v8f32, b: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfadd.d"] + fn __lasx_xvfadd_d(a: v4f64, b: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfsub.s"] + fn __lasx_xvfsub_s(a: v8f32, b: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfsub.d"] + fn __lasx_xvfsub_d(a: v4f64, b: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfmul.s"] + fn __lasx_xvfmul_s(a: v8f32, b: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfmul.d"] + fn __lasx_xvfmul_d(a: v4f64, b: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfdiv.s"] + fn __lasx_xvfdiv_s(a: v8f32, b: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfdiv.d"] + fn __lasx_xvfdiv_d(a: v4f64, b: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfcvt.h.s"] + fn __lasx_xvfcvt_h_s(a: v8f32, b: v8f32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvfcvt.s.d"] + fn __lasx_xvfcvt_s_d(a: v4f64, b: v4f64) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfmin.s"] + fn __lasx_xvfmin_s(a: v8f32, b: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfmin.d"] + fn __lasx_xvfmin_d(a: v4f64, b: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfmina.s"] + fn __lasx_xvfmina_s(a: v8f32, b: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfmina.d"] + fn __lasx_xvfmina_d(a: v4f64, b: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfmax.s"] + fn __lasx_xvfmax_s(a: v8f32, b: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfmax.d"] + fn __lasx_xvfmax_d(a: v4f64, b: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfmaxa.s"] + fn __lasx_xvfmaxa_s(a: v8f32, b: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfmaxa.d"] + fn __lasx_xvfmaxa_d(a: v4f64, b: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfclass.s"] + fn __lasx_xvfclass_s(a: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfclass.d"] + fn __lasx_xvfclass_d(a: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfsqrt.s"] + fn __lasx_xvfsqrt_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfsqrt.d"] + fn __lasx_xvfsqrt_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfrecip.s"] + fn __lasx_xvfrecip_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfrecip.d"] + fn __lasx_xvfrecip_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfrecipe.s"] + fn __lasx_xvfrecipe_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfrecipe.d"] + fn __lasx_xvfrecipe_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfrsqrte.s"] + fn __lasx_xvfrsqrte_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfrsqrte.d"] + fn __lasx_xvfrsqrte_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfrint.s"] + fn __lasx_xvfrint_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfrint.d"] + fn __lasx_xvfrint_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfrsqrt.s"] + fn __lasx_xvfrsqrt_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfrsqrt.d"] + fn __lasx_xvfrsqrt_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvflogb.s"] + fn __lasx_xvflogb_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvflogb.d"] + fn __lasx_xvflogb_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfcvth.s.h"] + fn __lasx_xvfcvth_s_h(a: v16i16) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfcvth.d.s"] + fn __lasx_xvfcvth_d_s(a: v8f32) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfcvtl.s.h"] + fn __lasx_xvfcvtl_s_h(a: v16i16) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfcvtl.d.s"] + fn __lasx_xvfcvtl_d_s(a: v8f32) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvftint.w.s"] + fn __lasx_xvftint_w_s(a: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvftint.l.d"] + fn __lasx_xvftint_l_d(a: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftint.wu.s"] + fn __lasx_xvftint_wu_s(a: v8f32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvftint.lu.d"] + fn __lasx_xvftint_lu_d(a: v4f64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvftintrz.w.s"] + fn __lasx_xvftintrz_w_s(a: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvftintrz.l.d"] + fn __lasx_xvftintrz_l_d(a: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrz.wu.s"] + fn __lasx_xvftintrz_wu_s(a: v8f32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvftintrz.lu.d"] + fn __lasx_xvftintrz_lu_d(a: v4f64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvffint.s.w"] + fn __lasx_xvffint_s_w(a: v8i32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvffint.d.l"] + fn __lasx_xvffint_d_l(a: v4i64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvffint.s.wu"] + fn __lasx_xvffint_s_wu(a: v8u32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvffint.d.lu"] + fn __lasx_xvffint_d_lu(a: v4u64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvreplve.b"] + fn __lasx_xvreplve_b(a: v32i8, b: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvreplve.h"] + fn __lasx_xvreplve_h(a: v16i16, b: i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvreplve.w"] + fn __lasx_xvreplve_w(a: v8i32, b: i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvreplve.d"] + fn __lasx_xvreplve_d(a: v4i64, b: i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvpermi.w"] + fn __lasx_xvpermi_w(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvandn.v"] + fn __lasx_xvandn_v(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvneg.b"] + fn __lasx_xvneg_b(a: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvneg.h"] + fn __lasx_xvneg_h(a: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvneg.w"] + fn __lasx_xvneg_w(a: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvneg.d"] + fn __lasx_xvneg_d(a: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmuh.b"] + fn __lasx_xvmuh_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmuh.h"] + fn __lasx_xvmuh_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmuh.w"] + fn __lasx_xvmuh_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmuh.d"] + fn __lasx_xvmuh_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmuh.bu"] + fn __lasx_xvmuh_bu(a: v32u8, b: v32u8) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvmuh.hu"] + fn __lasx_xvmuh_hu(a: v16u16, b: v16u16) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvmuh.wu"] + fn __lasx_xvmuh_wu(a: v8u32, b: v8u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvmuh.du"] + fn __lasx_xvmuh_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvsllwil.h.b"] + fn __lasx_xvsllwil_h_b(a: v32i8, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsllwil.w.h"] + fn __lasx_xvsllwil_w_h(a: v16i16, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsllwil.d.w"] + fn __lasx_xvsllwil_d_w(a: v8i32, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsllwil.hu.bu"] + fn __lasx_xvsllwil_hu_bu(a: v32u8, b: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvsllwil.wu.hu"] + fn __lasx_xvsllwil_wu_hu(a: v16u16, b: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvsllwil.du.wu"] + fn __lasx_xvsllwil_du_wu(a: v8u32, b: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvsran.b.h"] + fn __lasx_xvsran_b_h(a: v16i16, b: v16i16) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsran.h.w"] + fn __lasx_xvsran_h_w(a: v8i32, b: v8i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsran.w.d"] + fn __lasx_xvsran_w_d(a: v4i64, b: v4i64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssran.b.h"] + fn __lasx_xvssran_b_h(a: v16i16, b: v16i16) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvssran.h.w"] + fn __lasx_xvssran_h_w(a: v8i32, b: v8i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvssran.w.d"] + fn __lasx_xvssran_w_d(a: v4i64, b: v4i64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssran.bu.h"] + fn __lasx_xvssran_bu_h(a: v16u16, b: v16u16) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvssran.hu.w"] + fn __lasx_xvssran_hu_w(a: v8u32, b: v8u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvssran.wu.d"] + fn __lasx_xvssran_wu_d(a: v4u64, b: v4u64) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvsrarn.b.h"] + fn __lasx_xvsrarn_b_h(a: v16i16, b: v16i16) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrarn.h.w"] + fn __lasx_xvsrarn_h_w(a: v8i32, b: v8i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrarn.w.d"] + fn __lasx_xvsrarn_w_d(a: v4i64, b: v4i64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssrarn.b.h"] + fn __lasx_xvssrarn_b_h(a: v16i16, b: v16i16) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvssrarn.h.w"] + fn __lasx_xvssrarn_h_w(a: v8i32, b: v8i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvssrarn.w.d"] + fn __lasx_xvssrarn_w_d(a: v4i64, b: v4i64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssrarn.bu.h"] + fn __lasx_xvssrarn_bu_h(a: v16u16, b: v16u16) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvssrarn.hu.w"] + fn __lasx_xvssrarn_hu_w(a: v8u32, b: v8u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvssrarn.wu.d"] + fn __lasx_xvssrarn_wu_d(a: v4u64, b: v4u64) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvsrln.b.h"] + fn __lasx_xvsrln_b_h(a: v16i16, b: v16i16) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrln.h.w"] + fn __lasx_xvsrln_h_w(a: v8i32, b: v8i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrln.w.d"] + fn __lasx_xvsrln_w_d(a: v4i64, b: v4i64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssrln.bu.h"] + fn __lasx_xvssrln_bu_h(a: v16u16, b: v16u16) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvssrln.hu.w"] + fn __lasx_xvssrln_hu_w(a: v8u32, b: v8u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvssrln.wu.d"] + fn __lasx_xvssrln_wu_d(a: v4u64, b: v4u64) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvsrlrn.b.h"] + fn __lasx_xvsrlrn_b_h(a: v16i16, b: v16i16) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrlrn.h.w"] + fn __lasx_xvsrlrn_h_w(a: v8i32, b: v8i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrlrn.w.d"] + fn __lasx_xvsrlrn_w_d(a: v4i64, b: v4i64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssrlrn.bu.h"] + fn __lasx_xvssrlrn_bu_h(a: v16u16, b: v16u16) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvssrlrn.hu.w"] + fn __lasx_xvssrlrn_hu_w(a: v8u32, b: v8u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvssrlrn.wu.d"] + fn __lasx_xvssrlrn_wu_d(a: v4u64, b: v4u64) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvfrstpi.b"] + fn __lasx_xvfrstpi_b(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvfrstpi.h"] + fn __lasx_xvfrstpi_h(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvfrstp.b"] + fn __lasx_xvfrstp_b(a: v32i8, b: v32i8, c: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvfrstp.h"] + fn __lasx_xvfrstp_h(a: v16i16, b: v16i16, c: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvshuf4i.d"] + fn __lasx_xvshuf4i_d(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvbsrl.v"] + fn __lasx_xvbsrl_v(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvbsll.v"] + fn __lasx_xvbsll_v(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvextrins.b"] + fn __lasx_xvextrins_b(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvextrins.h"] + fn __lasx_xvextrins_h(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvextrins.w"] + fn __lasx_xvextrins_w(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvextrins.d"] + fn __lasx_xvextrins_d(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmskltz.b"] + fn __lasx_xvmskltz_b(a: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmskltz.h"] + fn __lasx_xvmskltz_h(a: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmskltz.w"] + fn __lasx_xvmskltz_w(a: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmskltz.d"] + fn __lasx_xvmskltz_d(a: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsigncov.b"] + fn __lasx_xvsigncov_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsigncov.h"] + fn __lasx_xvsigncov_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsigncov.w"] + fn __lasx_xvsigncov_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsigncov.d"] + fn __lasx_xvsigncov_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfmadd.s"] + fn __lasx_xvfmadd_s(a: v8f32, b: v8f32, c: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfmadd.d"] + fn __lasx_xvfmadd_d(a: v4f64, b: v4f64, c: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfmsub.s"] + fn __lasx_xvfmsub_s(a: v8f32, b: v8f32, c: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfmsub.d"] + fn __lasx_xvfmsub_d(a: v4f64, b: v4f64, c: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfnmadd.s"] + fn __lasx_xvfnmadd_s(a: v8f32, b: v8f32, c: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfnmadd.d"] + fn __lasx_xvfnmadd_d(a: v4f64, b: v4f64, c: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfnmsub.s"] + fn __lasx_xvfnmsub_s(a: v8f32, b: v8f32, c: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfnmsub.d"] + fn __lasx_xvfnmsub_d(a: v4f64, b: v4f64, c: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvftintrne.w.s"] + fn __lasx_xvftintrne_w_s(a: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvftintrne.l.d"] + fn __lasx_xvftintrne_l_d(a: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrp.w.s"] + fn __lasx_xvftintrp_w_s(a: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvftintrp.l.d"] + fn __lasx_xvftintrp_l_d(a: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrm.w.s"] + fn __lasx_xvftintrm_w_s(a: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvftintrm.l.d"] + fn __lasx_xvftintrm_l_d(a: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftint.w.d"] + fn __lasx_xvftint_w_d(a: v4f64, b: v4f64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvffint.s.l"] + fn __lasx_xvffint_s_l(a: v4i64, b: v4i64) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvftintrz.w.d"] + fn __lasx_xvftintrz_w_d(a: v4f64, b: v4f64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvftintrp.w.d"] + fn __lasx_xvftintrp_w_d(a: v4f64, b: v4f64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvftintrm.w.d"] + fn __lasx_xvftintrm_w_d(a: v4f64, b: v4f64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvftintrne.w.d"] + fn __lasx_xvftintrne_w_d(a: v4f64, b: v4f64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvftinth.l.s"] + fn __lasx_xvftinth_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintl.l.s"] + fn __lasx_xvftintl_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvffinth.d.w"] + fn __lasx_xvffinth_d_w(a: v8i32) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvffintl.d.w"] + fn __lasx_xvffintl_d_w(a: v8i32) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvftintrzh.l.s"] + fn __lasx_xvftintrzh_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrzl.l.s"] + fn __lasx_xvftintrzl_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrph.l.s"] + fn __lasx_xvftintrph_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrpl.l.s"] + fn __lasx_xvftintrpl_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrmh.l.s"] + fn __lasx_xvftintrmh_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrml.l.s"] + fn __lasx_xvftintrml_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrneh.l.s"] + fn __lasx_xvftintrneh_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvftintrnel.l.s"] + fn __lasx_xvftintrnel_l_s(a: v8f32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfrintrne.s"] + fn __lasx_xvfrintrne_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfrintrne.d"] + fn __lasx_xvfrintrne_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfrintrz.s"] + fn __lasx_xvfrintrz_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfrintrz.d"] + fn __lasx_xvfrintrz_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfrintrp.s"] + fn __lasx_xvfrintrp_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfrintrp.d"] + fn __lasx_xvfrintrp_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvfrintrm.s"] + fn __lasx_xvfrintrm_s(a: v8f32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvfrintrm.d"] + fn __lasx_xvfrintrm_d(a: v4f64) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvld"] + fn __lasx_xvld(a: *const i8, b: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvst"] + fn __lasx_xvst(a: v32i8, b: *mut i8, c: i32); + #[link_name = "llvm.loongarch.lasx.xvstelm.b"] + fn __lasx_xvstelm_b(a: v32i8, b: *mut i8, c: i32, d: u32); + #[link_name = "llvm.loongarch.lasx.xvstelm.h"] + fn __lasx_xvstelm_h(a: v16i16, b: *mut i8, c: i32, d: u32); + #[link_name = "llvm.loongarch.lasx.xvstelm.w"] + fn __lasx_xvstelm_w(a: v8i32, b: *mut i8, c: i32, d: u32); + #[link_name = "llvm.loongarch.lasx.xvstelm.d"] + fn __lasx_xvstelm_d(a: v4i64, b: *mut i8, c: i32, d: u32); + #[link_name = "llvm.loongarch.lasx.xvinsve0.w"] + fn __lasx_xvinsve0_w(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvinsve0.d"] + fn __lasx_xvinsve0_d(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvpickve.w"] + fn __lasx_xvpickve_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvpickve.d"] + fn __lasx_xvpickve_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvssrlrn.b.h"] + fn __lasx_xvssrlrn_b_h(a: v16i16, b: v16i16) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvssrlrn.h.w"] + fn __lasx_xvssrlrn_h_w(a: v8i32, b: v8i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvssrlrn.w.d"] + fn __lasx_xvssrlrn_w_d(a: v4i64, b: v4i64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssrln.b.h"] + fn __lasx_xvssrln_b_h(a: v16i16, b: v16i16) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvssrln.h.w"] + fn __lasx_xvssrln_h_w(a: v8i32, b: v8i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvssrln.w.d"] + fn __lasx_xvssrln_w_d(a: v4i64, b: v4i64) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvorn.v"] + fn __lasx_xvorn_v(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvldi"] + fn __lasx_xvldi(a: i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvldx"] + fn __lasx_xvldx(a: *const i8, b: i64) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvstx"] + fn __lasx_xvstx(a: v32i8, b: *mut i8, c: i64); + #[link_name = "llvm.loongarch.lasx.xvextl.qu.du"] + fn __lasx_xvextl_qu_du(a: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvinsgr2vr.w"] + fn __lasx_xvinsgr2vr_w(a: v8i32, b: i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvinsgr2vr.d"] + fn __lasx_xvinsgr2vr_d(a: v4i64, b: i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvreplve0.b"] + fn __lasx_xvreplve0_b(a: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvreplve0.h"] + fn __lasx_xvreplve0_h(a: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvreplve0.w"] + fn __lasx_xvreplve0_w(a: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvreplve0.d"] + fn __lasx_xvreplve0_d(a: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvreplve0.q"] + fn __lasx_xvreplve0_q(a: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.vext2xv.h.b"] + fn __lasx_vext2xv_h_b(a: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.vext2xv.w.h"] + fn __lasx_vext2xv_w_h(a: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.vext2xv.d.w"] + fn __lasx_vext2xv_d_w(a: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.vext2xv.w.b"] + fn __lasx_vext2xv_w_b(a: v32i8) -> v8i32; + #[link_name = "llvm.loongarch.lasx.vext2xv.d.h"] + fn __lasx_vext2xv_d_h(a: v16i16) -> v4i64; + #[link_name = "llvm.loongarch.lasx.vext2xv.d.b"] + fn __lasx_vext2xv_d_b(a: v32i8) -> v4i64; + #[link_name = "llvm.loongarch.lasx.vext2xv.hu.bu"] + fn __lasx_vext2xv_hu_bu(a: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.vext2xv.wu.hu"] + fn __lasx_vext2xv_wu_hu(a: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.vext2xv.du.wu"] + fn __lasx_vext2xv_du_wu(a: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.vext2xv.wu.bu"] + fn __lasx_vext2xv_wu_bu(a: v32i8) -> v8i32; + #[link_name = "llvm.loongarch.lasx.vext2xv.du.hu"] + fn __lasx_vext2xv_du_hu(a: v16i16) -> v4i64; + #[link_name = "llvm.loongarch.lasx.vext2xv.du.bu"] + fn __lasx_vext2xv_du_bu(a: v32i8) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvpermi.q"] + fn __lasx_xvpermi_q(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvpermi.d"] + fn __lasx_xvpermi_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvperm.w"] + fn __lasx_xvperm_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvldrepl.b"] + fn __lasx_xvldrepl_b(a: *const i8, b: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvldrepl.h"] + fn __lasx_xvldrepl_h(a: *const i8, b: i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvldrepl.w"] + fn __lasx_xvldrepl_w(a: *const i8, b: i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvldrepl.d"] + fn __lasx_xvldrepl_d(a: *const i8, b: i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvpickve2gr.w"] + fn __lasx_xvpickve2gr_w(a: v8i32, b: u32) -> i32; + #[link_name = "llvm.loongarch.lasx.xvpickve2gr.wu"] + fn __lasx_xvpickve2gr_wu(a: v8i32, b: u32) -> u32; + #[link_name = "llvm.loongarch.lasx.xvpickve2gr.d"] + fn __lasx_xvpickve2gr_d(a: v4i64, b: u32) -> i64; + #[link_name = "llvm.loongarch.lasx.xvpickve2gr.du"] + fn __lasx_xvpickve2gr_du(a: v4i64, b: u32) -> u64; + #[link_name = "llvm.loongarch.lasx.xvaddwev.q.d"] + fn __lasx_xvaddwev_q_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwev.d.w"] + fn __lasx_xvaddwev_d_w(a: v8i32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwev.w.h"] + fn __lasx_xvaddwev_w_h(a: v16i16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvaddwev.h.b"] + fn __lasx_xvaddwev_h_b(a: v32i8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvaddwev.q.du"] + fn __lasx_xvaddwev_q_du(a: v4u64, b: v4u64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwev.d.wu"] + fn __lasx_xvaddwev_d_wu(a: v8u32, b: v8u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwev.w.hu"] + fn __lasx_xvaddwev_w_hu(a: v16u16, b: v16u16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvaddwev.h.bu"] + fn __lasx_xvaddwev_h_bu(a: v32u8, b: v32u8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsubwev.q.d"] + fn __lasx_xvsubwev_q_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsubwev.d.w"] + fn __lasx_xvsubwev_d_w(a: v8i32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsubwev.w.h"] + fn __lasx_xvsubwev_w_h(a: v16i16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsubwev.h.b"] + fn __lasx_xvsubwev_h_b(a: v32i8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsubwev.q.du"] + fn __lasx_xvsubwev_q_du(a: v4u64, b: v4u64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsubwev.d.wu"] + fn __lasx_xvsubwev_d_wu(a: v8u32, b: v8u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsubwev.w.hu"] + fn __lasx_xvsubwev_w_hu(a: v16u16, b: v16u16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsubwev.h.bu"] + fn __lasx_xvsubwev_h_bu(a: v32u8, b: v32u8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmulwev.q.d"] + fn __lasx_xvmulwev_q_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwev.d.w"] + fn __lasx_xvmulwev_d_w(a: v8i32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwev.w.h"] + fn __lasx_xvmulwev_w_h(a: v16i16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmulwev.h.b"] + fn __lasx_xvmulwev_h_b(a: v32i8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmulwev.q.du"] + fn __lasx_xvmulwev_q_du(a: v4u64, b: v4u64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwev.d.wu"] + fn __lasx_xvmulwev_d_wu(a: v8u32, b: v8u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwev.w.hu"] + fn __lasx_xvmulwev_w_hu(a: v16u16, b: v16u16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmulwev.h.bu"] + fn __lasx_xvmulwev_h_bu(a: v32u8, b: v32u8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvaddwod.q.d"] + fn __lasx_xvaddwod_q_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwod.d.w"] + fn __lasx_xvaddwod_d_w(a: v8i32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwod.w.h"] + fn __lasx_xvaddwod_w_h(a: v16i16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvaddwod.h.b"] + fn __lasx_xvaddwod_h_b(a: v32i8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvaddwod.q.du"] + fn __lasx_xvaddwod_q_du(a: v4u64, b: v4u64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwod.d.wu"] + fn __lasx_xvaddwod_d_wu(a: v8u32, b: v8u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwod.w.hu"] + fn __lasx_xvaddwod_w_hu(a: v16u16, b: v16u16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvaddwod.h.bu"] + fn __lasx_xvaddwod_h_bu(a: v32u8, b: v32u8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsubwod.q.d"] + fn __lasx_xvsubwod_q_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsubwod.d.w"] + fn __lasx_xvsubwod_d_w(a: v8i32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsubwod.w.h"] + fn __lasx_xvsubwod_w_h(a: v16i16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsubwod.h.b"] + fn __lasx_xvsubwod_h_b(a: v32i8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsubwod.q.du"] + fn __lasx_xvsubwod_q_du(a: v4u64, b: v4u64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsubwod.d.wu"] + fn __lasx_xvsubwod_d_wu(a: v8u32, b: v8u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsubwod.w.hu"] + fn __lasx_xvsubwod_w_hu(a: v16u16, b: v16u16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsubwod.h.bu"] + fn __lasx_xvsubwod_h_bu(a: v32u8, b: v32u8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmulwod.q.d"] + fn __lasx_xvmulwod_q_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwod.d.w"] + fn __lasx_xvmulwod_d_w(a: v8i32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwod.w.h"] + fn __lasx_xvmulwod_w_h(a: v16i16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmulwod.h.b"] + fn __lasx_xvmulwod_h_b(a: v32i8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmulwod.q.du"] + fn __lasx_xvmulwod_q_du(a: v4u64, b: v4u64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwod.d.wu"] + fn __lasx_xvmulwod_d_wu(a: v8u32, b: v8u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwod.w.hu"] + fn __lasx_xvmulwod_w_hu(a: v16u16, b: v16u16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmulwod.h.bu"] + fn __lasx_xvmulwod_h_bu(a: v32u8, b: v32u8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvaddwev.d.wu.w"] + fn __lasx_xvaddwev_d_wu_w(a: v8u32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwev.w.hu.h"] + fn __lasx_xvaddwev_w_hu_h(a: v16u16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvaddwev.h.bu.b"] + fn __lasx_xvaddwev_h_bu_b(a: v32u8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmulwev.d.wu.w"] + fn __lasx_xvmulwev_d_wu_w(a: v8u32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwev.w.hu.h"] + fn __lasx_xvmulwev_w_hu_h(a: v16u16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmulwev.h.bu.b"] + fn __lasx_xvmulwev_h_bu_b(a: v32u8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvaddwod.d.wu.w"] + fn __lasx_xvaddwod_d_wu_w(a: v8u32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwod.w.hu.h"] + fn __lasx_xvaddwod_w_hu_h(a: v16u16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvaddwod.h.bu.b"] + fn __lasx_xvaddwod_h_bu_b(a: v32u8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmulwod.d.wu.w"] + fn __lasx_xvmulwod_d_wu_w(a: v8u32, b: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwod.w.hu.h"] + fn __lasx_xvmulwod_w_hu_h(a: v16u16, b: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmulwod.h.bu.b"] + fn __lasx_xvmulwod_h_bu_b(a: v32u8, b: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvhaddw.q.d"] + fn __lasx_xvhaddw_q_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvhaddw.qu.du"] + fn __lasx_xvhaddw_qu_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvhsubw.q.d"] + fn __lasx_xvhsubw_q_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvhsubw.qu.du"] + fn __lasx_xvhsubw_qu_du(a: v4u64, b: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.q.d"] + fn __lasx_xvmaddwev_q_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.d.w"] + fn __lasx_xvmaddwev_d_w(a: v4i64, b: v8i32, c: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.w.h"] + fn __lasx_xvmaddwev_w_h(a: v8i32, b: v16i16, c: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.h.b"] + fn __lasx_xvmaddwev_h_b(a: v16i16, b: v32i8, c: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.q.du"] + fn __lasx_xvmaddwev_q_du(a: v4u64, b: v4u64, c: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.d.wu"] + fn __lasx_xvmaddwev_d_wu(a: v4u64, b: v8u32, c: v8u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.w.hu"] + fn __lasx_xvmaddwev_w_hu(a: v8u32, b: v16u16, c: v16u16) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.h.bu"] + fn __lasx_xvmaddwev_h_bu(a: v16u16, b: v32u8, c: v32u8) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.q.d"] + fn __lasx_xvmaddwod_q_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.d.w"] + fn __lasx_xvmaddwod_d_w(a: v4i64, b: v8i32, c: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.w.h"] + fn __lasx_xvmaddwod_w_h(a: v8i32, b: v16i16, c: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.h.b"] + fn __lasx_xvmaddwod_h_b(a: v16i16, b: v32i8, c: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.q.du"] + fn __lasx_xvmaddwod_q_du(a: v4u64, b: v4u64, c: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.d.wu"] + fn __lasx_xvmaddwod_d_wu(a: v4u64, b: v8u32, c: v8u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.w.hu"] + fn __lasx_xvmaddwod_w_hu(a: v8u32, b: v16u16, c: v16u16) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.h.bu"] + fn __lasx_xvmaddwod_h_bu(a: v16u16, b: v32u8, c: v32u8) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.q.du.d"] + fn __lasx_xvmaddwev_q_du_d(a: v4i64, b: v4u64, c: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.d.wu.w"] + fn __lasx_xvmaddwev_d_wu_w(a: v4i64, b: v8u32, c: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.w.hu.h"] + fn __lasx_xvmaddwev_w_hu_h(a: v8i32, b: v16u16, c: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmaddwev.h.bu.b"] + fn __lasx_xvmaddwev_h_bu_b(a: v16i16, b: v32u8, c: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.q.du.d"] + fn __lasx_xvmaddwod_q_du_d(a: v4i64, b: v4u64, c: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.d.wu.w"] + fn __lasx_xvmaddwod_d_wu_w(a: v4i64, b: v8u32, c: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.w.hu.h"] + fn __lasx_xvmaddwod_w_hu_h(a: v8i32, b: v16u16, c: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvmaddwod.h.bu.b"] + fn __lasx_xvmaddwod_h_bu_b(a: v16i16, b: v32u8, c: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvrotr.b"] + fn __lasx_xvrotr_b(a: v32i8, b: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvrotr.h"] + fn __lasx_xvrotr_h(a: v16i16, b: v16i16) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvrotr.w"] + fn __lasx_xvrotr_w(a: v8i32, b: v8i32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvrotr.d"] + fn __lasx_xvrotr_d(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvadd.q"] + fn __lasx_xvadd_q(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsub.q"] + fn __lasx_xvsub_q(a: v4i64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwev.q.du.d"] + fn __lasx_xvaddwev_q_du_d(a: v4u64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvaddwod.q.du.d"] + fn __lasx_xvaddwod_q_du_d(a: v4u64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwev.q.du.d"] + fn __lasx_xvmulwev_q_du_d(a: v4u64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmulwod.q.du.d"] + fn __lasx_xvmulwod_q_du_d(a: v4u64, b: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvmskgez.b"] + fn __lasx_xvmskgez_b(a: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvmsknz.b"] + fn __lasx_xvmsknz_b(a: v32i8) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvexth.h.b"] + fn __lasx_xvexth_h_b(a: v32i8) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvexth.w.h"] + fn __lasx_xvexth_w_h(a: v16i16) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvexth.d.w"] + fn __lasx_xvexth_d_w(a: v8i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvexth.q.d"] + fn __lasx_xvexth_q_d(a: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvexth.hu.bu"] + fn __lasx_xvexth_hu_bu(a: v32u8) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvexth.wu.hu"] + fn __lasx_xvexth_wu_hu(a: v16u16) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvexth.du.wu"] + fn __lasx_xvexth_du_wu(a: v8u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvexth.qu.du"] + fn __lasx_xvexth_qu_du(a: v4u64) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvrotri.b"] + fn __lasx_xvrotri_b(a: v32i8, b: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvrotri.h"] + fn __lasx_xvrotri_h(a: v16i16, b: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvrotri.w"] + fn __lasx_xvrotri_w(a: v8i32, b: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvrotri.d"] + fn __lasx_xvrotri_d(a: v4i64, b: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvextl.q.d"] + fn __lasx_xvextl_q_d(a: v4i64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrlni.b.h"] + fn __lasx_xvsrlni_b_h(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrlni.h.w"] + fn __lasx_xvsrlni_h_w(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrlni.w.d"] + fn __lasx_xvsrlni_w_d(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrlni.d.q"] + fn __lasx_xvsrlni_d_q(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrlrni.b.h"] + fn __lasx_xvsrlrni_b_h(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrlrni.h.w"] + fn __lasx_xvsrlrni_h_w(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrlrni.w.d"] + fn __lasx_xvsrlrni_w_d(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrlrni.d.q"] + fn __lasx_xvsrlrni_d_q(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvssrlni.b.h"] + fn __lasx_xvssrlni_b_h(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvssrlni.h.w"] + fn __lasx_xvssrlni_h_w(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvssrlni.w.d"] + fn __lasx_xvssrlni_w_d(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssrlni.d.q"] + fn __lasx_xvssrlni_d_q(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvssrlni.bu.h"] + fn __lasx_xvssrlni_bu_h(a: v32u8, b: v32i8, c: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvssrlni.hu.w"] + fn __lasx_xvssrlni_hu_w(a: v16u16, b: v16i16, c: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvssrlni.wu.d"] + fn __lasx_xvssrlni_wu_d(a: v8u32, b: v8i32, c: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvssrlni.du.q"] + fn __lasx_xvssrlni_du_q(a: v4u64, b: v4i64, c: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvssrlrni.b.h"] + fn __lasx_xvssrlrni_b_h(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvssrlrni.h.w"] + fn __lasx_xvssrlrni_h_w(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvssrlrni.w.d"] + fn __lasx_xvssrlrni_w_d(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssrlrni.d.q"] + fn __lasx_xvssrlrni_d_q(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvssrlrni.bu.h"] + fn __lasx_xvssrlrni_bu_h(a: v32u8, b: v32i8, c: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvssrlrni.hu.w"] + fn __lasx_xvssrlrni_hu_w(a: v16u16, b: v16i16, c: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvssrlrni.wu.d"] + fn __lasx_xvssrlrni_wu_d(a: v8u32, b: v8i32, c: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvssrlrni.du.q"] + fn __lasx_xvssrlrni_du_q(a: v4u64, b: v4i64, c: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvsrani.b.h"] + fn __lasx_xvsrani_b_h(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrani.h.w"] + fn __lasx_xvsrani_h_w(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrani.w.d"] + fn __lasx_xvsrani_w_d(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrani.d.q"] + fn __lasx_xvsrani_d_q(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvsrarni.b.h"] + fn __lasx_xvsrarni_b_h(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvsrarni.h.w"] + fn __lasx_xvsrarni_h_w(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvsrarni.w.d"] + fn __lasx_xvsrarni_w_d(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvsrarni.d.q"] + fn __lasx_xvsrarni_d_q(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvssrani.b.h"] + fn __lasx_xvssrani_b_h(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvssrani.h.w"] + fn __lasx_xvssrani_h_w(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvssrani.w.d"] + fn __lasx_xvssrani_w_d(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssrani.d.q"] + fn __lasx_xvssrani_d_q(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvssrani.bu.h"] + fn __lasx_xvssrani_bu_h(a: v32u8, b: v32i8, c: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvssrani.hu.w"] + fn __lasx_xvssrani_hu_w(a: v16u16, b: v16i16, c: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvssrani.wu.d"] + fn __lasx_xvssrani_wu_d(a: v8u32, b: v8i32, c: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvssrani.du.q"] + fn __lasx_xvssrani_du_q(a: v4u64, b: v4i64, c: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xvssrarni.b.h"] + fn __lasx_xvssrarni_b_h(a: v32i8, b: v32i8, c: u32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvssrarni.h.w"] + fn __lasx_xvssrarni_h_w(a: v16i16, b: v16i16, c: u32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvssrarni.w.d"] + fn __lasx_xvssrarni_w_d(a: v8i32, b: v8i32, c: u32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvssrarni.d.q"] + fn __lasx_xvssrarni_d_q(a: v4i64, b: v4i64, c: u32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvssrarni.bu.h"] + fn __lasx_xvssrarni_bu_h(a: v32u8, b: v32i8, c: u32) -> v32u8; + #[link_name = "llvm.loongarch.lasx.xvssrarni.hu.w"] + fn __lasx_xvssrarni_hu_w(a: v16u16, b: v16i16, c: u32) -> v16u16; + #[link_name = "llvm.loongarch.lasx.xvssrarni.wu.d"] + fn __lasx_xvssrarni_wu_d(a: v8u32, b: v8i32, c: u32) -> v8u32; + #[link_name = "llvm.loongarch.lasx.xvssrarni.du.q"] + fn __lasx_xvssrarni_du_q(a: v4u64, b: v4i64, c: u32) -> v4u64; + #[link_name = "llvm.loongarch.lasx.xbnz.b"] + fn __lasx_xbnz_b(a: v32u8) -> i32; + #[link_name = "llvm.loongarch.lasx.xbnz.d"] + fn __lasx_xbnz_d(a: v4u64) -> i32; + #[link_name = "llvm.loongarch.lasx.xbnz.h"] + fn __lasx_xbnz_h(a: v16u16) -> i32; + #[link_name = "llvm.loongarch.lasx.xbnz.v"] + fn __lasx_xbnz_v(a: v32u8) -> i32; + #[link_name = "llvm.loongarch.lasx.xbnz.w"] + fn __lasx_xbnz_w(a: v8u32) -> i32; + #[link_name = "llvm.loongarch.lasx.xbz.b"] + fn __lasx_xbz_b(a: v32u8) -> i32; + #[link_name = "llvm.loongarch.lasx.xbz.d"] + fn __lasx_xbz_d(a: v4u64) -> i32; + #[link_name = "llvm.loongarch.lasx.xbz.h"] + fn __lasx_xbz_h(a: v16u16) -> i32; + #[link_name = "llvm.loongarch.lasx.xbz.v"] + fn __lasx_xbz_v(a: v32u8) -> i32; + #[link_name = "llvm.loongarch.lasx.xbz.w"] + fn __lasx_xbz_w(a: v8u32) -> i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.caf.d"] + fn __lasx_xvfcmp_caf_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.caf.s"] + fn __lasx_xvfcmp_caf_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.ceq.d"] + fn __lasx_xvfcmp_ceq_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.ceq.s"] + fn __lasx_xvfcmp_ceq_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cle.d"] + fn __lasx_xvfcmp_cle_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cle.s"] + fn __lasx_xvfcmp_cle_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.clt.d"] + fn __lasx_xvfcmp_clt_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.clt.s"] + fn __lasx_xvfcmp_clt_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cne.d"] + fn __lasx_xvfcmp_cne_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cne.s"] + fn __lasx_xvfcmp_cne_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cor.d"] + fn __lasx_xvfcmp_cor_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cor.s"] + fn __lasx_xvfcmp_cor_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cueq.d"] + fn __lasx_xvfcmp_cueq_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cueq.s"] + fn __lasx_xvfcmp_cueq_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cule.d"] + fn __lasx_xvfcmp_cule_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cule.s"] + fn __lasx_xvfcmp_cule_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cult.d"] + fn __lasx_xvfcmp_cult_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cult.s"] + fn __lasx_xvfcmp_cult_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cun.d"] + fn __lasx_xvfcmp_cun_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cune.d"] + fn __lasx_xvfcmp_cune_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cune.s"] + fn __lasx_xvfcmp_cune_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.cun.s"] + fn __lasx_xvfcmp_cun_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.saf.d"] + fn __lasx_xvfcmp_saf_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.saf.s"] + fn __lasx_xvfcmp_saf_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.seq.d"] + fn __lasx_xvfcmp_seq_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.seq.s"] + fn __lasx_xvfcmp_seq_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sle.d"] + fn __lasx_xvfcmp_sle_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sle.s"] + fn __lasx_xvfcmp_sle_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.slt.d"] + fn __lasx_xvfcmp_slt_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.slt.s"] + fn __lasx_xvfcmp_slt_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sne.d"] + fn __lasx_xvfcmp_sne_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sne.s"] + fn __lasx_xvfcmp_sne_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sor.d"] + fn __lasx_xvfcmp_sor_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sor.s"] + fn __lasx_xvfcmp_sor_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sueq.d"] + fn __lasx_xvfcmp_sueq_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sueq.s"] + fn __lasx_xvfcmp_sueq_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sule.d"] + fn __lasx_xvfcmp_sule_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sule.s"] + fn __lasx_xvfcmp_sule_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sult.d"] + fn __lasx_xvfcmp_sult_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sult.s"] + fn __lasx_xvfcmp_sult_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sun.d"] + fn __lasx_xvfcmp_sun_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sune.d"] + fn __lasx_xvfcmp_sune_d(a: v4f64, b: v4f64) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sune.s"] + fn __lasx_xvfcmp_sune_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvfcmp.sun.s"] + fn __lasx_xvfcmp_sun_s(a: v8f32, b: v8f32) -> v8i32; + #[link_name = "llvm.loongarch.lasx.xvpickve.d.f"] + fn __lasx_xvpickve_d_f(a: v4f64, b: u32) -> v4f64; + #[link_name = "llvm.loongarch.lasx.xvpickve.w.f"] + fn __lasx_xvpickve_w_f(a: v8f32, b: u32) -> v8f32; + #[link_name = "llvm.loongarch.lasx.xvrepli.b"] + fn __lasx_xvrepli_b(a: i32) -> v32i8; + #[link_name = "llvm.loongarch.lasx.xvrepli.d"] + fn __lasx_xvrepli_d(a: i32) -> v4i64; + #[link_name = "llvm.loongarch.lasx.xvrepli.h"] + fn __lasx_xvrepli_h(a: i32) -> v16i16; + #[link_name = "llvm.loongarch.lasx.xvrepli.w"] + fn __lasx_xvrepli_w(a: i32) -> v8i32; +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsll_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvsll_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsll_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvsll_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsll_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvsll_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsll_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsll_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslli_b(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvslli_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslli_h(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvslli_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslli_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvslli_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslli_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvslli_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsra_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvsra_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsra_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvsra_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsra_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvsra_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsra_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsra_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrai_b(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvsrai_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrai_h(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsrai_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrai_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsrai_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrai_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsrai_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrar_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvsrar_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrar_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvsrar_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrar_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvsrar_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrar_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsrar_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrari_b(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvsrari_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrari_h(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsrari_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrari_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsrari_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrari_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsrari_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrl_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvsrl_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrl_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvsrl_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrl_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvsrl_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrl_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsrl_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrli_b(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvsrli_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrli_h(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsrli_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrli_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsrli_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrli_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsrli_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlr_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvsrlr_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlr_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvsrlr_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlr_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvsrlr_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlr_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsrlr_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlri_b(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvsrlri_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlri_h(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsrlri_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlri_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsrlri_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlri_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsrlri_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitclr_b(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvbitclr_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitclr_h(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvbitclr_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitclr_w(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvbitclr_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitclr_d(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvbitclr_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitclri_b(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvbitclri_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitclri_h(a: v16u16) -> v16u16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvbitclri_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitclri_w(a: v8u32) -> v8u32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvbitclri_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitclri_d(a: v4u64) -> v4u64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvbitclri_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitset_b(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvbitset_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitset_h(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvbitset_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitset_w(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvbitset_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitset_d(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvbitset_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitseti_b(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvbitseti_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitseti_h(a: v16u16) -> v16u16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvbitseti_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitseti_w(a: v8u32) -> v8u32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvbitseti_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitseti_d(a: v4u64) -> v4u64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvbitseti_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitrev_b(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvbitrev_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitrev_h(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvbitrev_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitrev_w(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvbitrev_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitrev_d(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvbitrev_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitrevi_b(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvbitrevi_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitrevi_h(a: v16u16) -> v16u16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvbitrevi_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitrevi_w(a: v8u32) -> v8u32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvbitrevi_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitrevi_d(a: v4u64) -> v4u64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvbitrevi_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvadd_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvadd_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvadd_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvadd_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvadd_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvadd_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvadd_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvadd_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddi_bu(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvaddi_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddi_hu(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvaddi_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddi_wu(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvaddi_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddi_du(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvaddi_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsub_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvsub_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsub_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvsub_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsub_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvsub_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsub_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsub_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubi_bu(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsubi_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubi_hu(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsubi_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubi_wu(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsubi_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubi_du(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsubi_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmax_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvmax_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmax_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvmax_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmax_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvmax_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmax_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvmax_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaxi_b(a: v32i8) -> v32i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvmaxi_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaxi_h(a: v16i16) -> v16i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvmaxi_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaxi_w(a: v8i32) -> v8i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvmaxi_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaxi_d(a: v4i64) -> v4i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvmaxi_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmax_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvmax_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmax_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvmax_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmax_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvmax_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmax_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvmax_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaxi_bu(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvmaxi_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaxi_hu(a: v16u16) -> v16u16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvmaxi_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaxi_wu(a: v8u32) -> v8u32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvmaxi_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaxi_du(a: v4u64) -> v4u64 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvmaxi_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmin_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvmin_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmin_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvmin_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmin_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvmin_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmin_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvmin_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmini_b(a: v32i8) -> v32i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvmini_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmini_h(a: v16i16) -> v16i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvmini_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmini_w(a: v8i32) -> v8i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvmini_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmini_d(a: v4i64) -> v4i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvmini_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmin_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvmin_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmin_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvmin_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmin_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvmin_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmin_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvmin_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmini_bu(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvmini_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmini_hu(a: v16u16) -> v16u16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvmini_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmini_wu(a: v8u32) -> v8u32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvmini_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmini_du(a: v4u64) -> v4u64 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvmini_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvseq_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvseq_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvseq_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvseq_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvseq_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvseq_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvseq_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvseq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvseqi_b(a: v32i8) -> v32i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvseqi_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvseqi_h(a: v16i16) -> v16i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvseqi_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvseqi_w(a: v8i32) -> v8i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvseqi_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvseqi_d(a: v4i64) -> v4i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvseqi_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslt_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvslt_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslt_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvslt_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslt_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvslt_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslt_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvslt_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslti_b(a: v32i8) -> v32i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvslti_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslti_h(a: v16i16) -> v16i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvslti_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslti_w(a: v8i32) -> v8i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvslti_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslti_d(a: v4i64) -> v4i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvslti_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslt_bu(a: v32u8, b: v32u8) -> v32i8 { + __lasx_xvslt_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslt_hu(a: v16u16, b: v16u16) -> v16i16 { + __lasx_xvslt_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslt_wu(a: v8u32, b: v8u32) -> v8i32 { + __lasx_xvslt_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslt_du(a: v4u64, b: v4u64) -> v4i64 { + __lasx_xvslt_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslti_bu(a: v32u8) -> v32i8 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvslti_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslti_hu(a: v16u16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvslti_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslti_wu(a: v8u32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvslti_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslti_du(a: v4u64) -> v4i64 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvslti_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsle_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvsle_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsle_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvsle_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsle_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvsle_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsle_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsle_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslei_b(a: v32i8) -> v32i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvslei_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslei_h(a: v16i16) -> v16i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvslei_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslei_w(a: v8i32) -> v8i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvslei_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslei_d(a: v4i64) -> v4i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lasx_xvslei_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsle_bu(a: v32u8, b: v32u8) -> v32i8 { + __lasx_xvsle_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsle_hu(a: v16u16, b: v16u16) -> v16i16 { + __lasx_xvsle_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsle_wu(a: v8u32, b: v8u32) -> v8i32 { + __lasx_xvsle_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsle_du(a: v4u64, b: v4u64) -> v4i64 { + __lasx_xvsle_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslei_bu(a: v32u8) -> v32i8 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvslei_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslei_hu(a: v16u16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvslei_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslei_wu(a: v8u32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvslei_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvslei_du(a: v4u64) -> v4i64 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvslei_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsat_b(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvsat_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsat_h(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsat_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsat_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsat_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsat_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsat_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsat_bu(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvsat_bu(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsat_hu(a: v16u16) -> v16u16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsat_hu(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsat_wu(a: v8u32) -> v8u32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsat_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsat_du(a: v4u64) -> v4u64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsat_du(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvadda_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvadda_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvadda_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvadda_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvadda_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvadda_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvadda_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvadda_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsadd_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvsadd_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsadd_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvsadd_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsadd_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvsadd_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsadd_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsadd_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsadd_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvsadd_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsadd_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvsadd_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsadd_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvsadd_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsadd_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvsadd_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavg_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvavg_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavg_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvavg_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavg_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvavg_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavg_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvavg_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavg_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvavg_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavg_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvavg_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavg_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvavg_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavg_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvavg_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavgr_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvavgr_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavgr_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvavgr_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavgr_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvavgr_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavgr_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvavgr_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavgr_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvavgr_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavgr_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvavgr_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavgr_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvavgr_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvavgr_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvavgr_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssub_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvssub_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssub_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvssub_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssub_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvssub_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssub_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvssub_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssub_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvssub_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssub_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvssub_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssub_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvssub_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssub_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvssub_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvabsd_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvabsd_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvabsd_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvabsd_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvabsd_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvabsd_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvabsd_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvabsd_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvabsd_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvabsd_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvabsd_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvabsd_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvabsd_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvabsd_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvabsd_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvabsd_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmul_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvmul_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmul_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvmul_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmul_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvmul_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmul_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvmul_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmadd_b(a: v32i8, b: v32i8, c: v32i8) -> v32i8 { + __lasx_xvmadd_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmadd_h(a: v16i16, b: v16i16, c: v16i16) -> v16i16 { + __lasx_xvmadd_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmadd_w(a: v8i32, b: v8i32, c: v8i32) -> v8i32 { + __lasx_xvmadd_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmadd_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64 { + __lasx_xvmadd_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmsub_b(a: v32i8, b: v32i8, c: v32i8) -> v32i8 { + __lasx_xvmsub_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmsub_h(a: v16i16, b: v16i16, c: v16i16) -> v16i16 { + __lasx_xvmsub_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmsub_w(a: v8i32, b: v8i32, c: v8i32) -> v8i32 { + __lasx_xvmsub_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmsub_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64 { + __lasx_xvmsub_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvdiv_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvdiv_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvdiv_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvdiv_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvdiv_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvdiv_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvdiv_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvdiv_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvdiv_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvdiv_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvdiv_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvdiv_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvdiv_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvdiv_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvdiv_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvdiv_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhaddw_h_b(a: v32i8, b: v32i8) -> v16i16 { + __lasx_xvhaddw_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhaddw_w_h(a: v16i16, b: v16i16) -> v8i32 { + __lasx_xvhaddw_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhaddw_d_w(a: v8i32, b: v8i32) -> v4i64 { + __lasx_xvhaddw_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhaddw_hu_bu(a: v32u8, b: v32u8) -> v16u16 { + __lasx_xvhaddw_hu_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhaddw_wu_hu(a: v16u16, b: v16u16) -> v8u32 { + __lasx_xvhaddw_wu_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhaddw_du_wu(a: v8u32, b: v8u32) -> v4u64 { + __lasx_xvhaddw_du_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhsubw_h_b(a: v32i8, b: v32i8) -> v16i16 { + __lasx_xvhsubw_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhsubw_w_h(a: v16i16, b: v16i16) -> v8i32 { + __lasx_xvhsubw_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhsubw_d_w(a: v8i32, b: v8i32) -> v4i64 { + __lasx_xvhsubw_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhsubw_hu_bu(a: v32u8, b: v32u8) -> v16i16 { + __lasx_xvhsubw_hu_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhsubw_wu_hu(a: v16u16, b: v16u16) -> v8i32 { + __lasx_xvhsubw_wu_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhsubw_du_wu(a: v8u32, b: v8u32) -> v4i64 { + __lasx_xvhsubw_du_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmod_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvmod_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmod_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvmod_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmod_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvmod_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmod_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvmod_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmod_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvmod_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmod_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvmod_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmod_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvmod_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmod_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvmod_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrepl128vei_b(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvrepl128vei_b(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrepl128vei_h(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvrepl128vei_h(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrepl128vei_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM2, 2); + __lasx_xvrepl128vei_w(a, IMM2) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrepl128vei_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM1, 1); + __lasx_xvrepl128vei_d(a, IMM1) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickev_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvpickev_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickev_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvpickev_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickev_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvpickev_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickev_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvpickev_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickod_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvpickod_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickod_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvpickod_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickod_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvpickod_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickod_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvpickod_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvilvh_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvilvh_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvilvh_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvilvh_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvilvh_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvilvh_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvilvh_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvilvh_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvilvl_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvilvl_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvilvl_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvilvl_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvilvl_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvilvl_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvilvl_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvilvl_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpackev_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvpackev_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpackev_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvpackev_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpackev_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvpackev_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpackev_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvpackev_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpackod_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvpackod_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpackod_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvpackod_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpackod_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvpackod_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpackod_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvpackod_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvshuf_b(a: v32i8, b: v32i8, c: v32i8) -> v32i8 { + __lasx_xvshuf_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvshuf_h(a: v16i16, b: v16i16, c: v16i16) -> v16i16 { + __lasx_xvshuf_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvshuf_w(a: v8i32, b: v8i32, c: v8i32) -> v8i32 { + __lasx_xvshuf_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvshuf_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64 { + __lasx_xvshuf_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvand_v(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvand_v(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvandi_b(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvandi_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvor_v(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvor_v(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvori_b(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvori_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvnor_v(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvnor_v(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvnori_b(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvnori_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvxor_v(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvxor_v(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvxori_b(a: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvxori_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitsel_v(a: v32u8, b: v32u8, c: v32u8) -> v32u8 { + __lasx_xvbitsel_v(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbitseli_b(a: v32u8, b: v32u8) -> v32u8 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvbitseli_b(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvshuf4i_b(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvshuf4i_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvshuf4i_h(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvshuf4i_h(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvshuf4i_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvshuf4i_w(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplgr2vr_b(a: i32) -> v32i8 { + __lasx_xvreplgr2vr_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplgr2vr_h(a: i32) -> v16i16 { + __lasx_xvreplgr2vr_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplgr2vr_w(a: i32) -> v8i32 { + __lasx_xvreplgr2vr_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplgr2vr_d(a: i64) -> v4i64 { + __lasx_xvreplgr2vr_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpcnt_b(a: v32i8) -> v32i8 { + __lasx_xvpcnt_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpcnt_h(a: v16i16) -> v16i16 { + __lasx_xvpcnt_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpcnt_w(a: v8i32) -> v8i32 { + __lasx_xvpcnt_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpcnt_d(a: v4i64) -> v4i64 { + __lasx_xvpcnt_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvclo_b(a: v32i8) -> v32i8 { + __lasx_xvclo_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvclo_h(a: v16i16) -> v16i16 { + __lasx_xvclo_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvclo_w(a: v8i32) -> v8i32 { + __lasx_xvclo_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvclo_d(a: v4i64) -> v4i64 { + __lasx_xvclo_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvclz_b(a: v32i8) -> v32i8 { + __lasx_xvclz_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvclz_h(a: v16i16) -> v16i16 { + __lasx_xvclz_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvclz_w(a: v8i32) -> v8i32 { + __lasx_xvclz_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvclz_d(a: v4i64) -> v4i64 { + __lasx_xvclz_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfadd_s(a: v8f32, b: v8f32) -> v8f32 { + __lasx_xvfadd_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfadd_d(a: v4f64, b: v4f64) -> v4f64 { + __lasx_xvfadd_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfsub_s(a: v8f32, b: v8f32) -> v8f32 { + __lasx_xvfsub_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfsub_d(a: v4f64, b: v4f64) -> v4f64 { + __lasx_xvfsub_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmul_s(a: v8f32, b: v8f32) -> v8f32 { + __lasx_xvfmul_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmul_d(a: v4f64, b: v4f64) -> v4f64 { + __lasx_xvfmul_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfdiv_s(a: v8f32, b: v8f32) -> v8f32 { + __lasx_xvfdiv_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfdiv_d(a: v4f64, b: v4f64) -> v4f64 { + __lasx_xvfdiv_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcvt_h_s(a: v8f32, b: v8f32) -> v16i16 { + __lasx_xvfcvt_h_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcvt_s_d(a: v4f64, b: v4f64) -> v8f32 { + __lasx_xvfcvt_s_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmin_s(a: v8f32, b: v8f32) -> v8f32 { + __lasx_xvfmin_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmin_d(a: v4f64, b: v4f64) -> v4f64 { + __lasx_xvfmin_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmina_s(a: v8f32, b: v8f32) -> v8f32 { + __lasx_xvfmina_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmina_d(a: v4f64, b: v4f64) -> v4f64 { + __lasx_xvfmina_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmax_s(a: v8f32, b: v8f32) -> v8f32 { + __lasx_xvfmax_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmax_d(a: v4f64, b: v4f64) -> v4f64 { + __lasx_xvfmax_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmaxa_s(a: v8f32, b: v8f32) -> v8f32 { + __lasx_xvfmaxa_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmaxa_d(a: v4f64, b: v4f64) -> v4f64 { + __lasx_xvfmaxa_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfclass_s(a: v8f32) -> v8i32 { + __lasx_xvfclass_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfclass_d(a: v4f64) -> v4i64 { + __lasx_xvfclass_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfsqrt_s(a: v8f32) -> v8f32 { + __lasx_xvfsqrt_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfsqrt_d(a: v4f64) -> v4f64 { + __lasx_xvfsqrt_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrecip_s(a: v8f32) -> v8f32 { + __lasx_xvfrecip_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrecip_d(a: v4f64) -> v4f64 { + __lasx_xvfrecip_d(a) +} + +#[inline] +#[target_feature(enable = "lasx,frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrecipe_s(a: v8f32) -> v8f32 { + __lasx_xvfrecipe_s(a) +} + +#[inline] +#[target_feature(enable = "lasx,frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrecipe_d(a: v4f64) -> v4f64 { + __lasx_xvfrecipe_d(a) +} + +#[inline] +#[target_feature(enable = "lasx,frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrsqrte_s(a: v8f32) -> v8f32 { + __lasx_xvfrsqrte_s(a) +} + +#[inline] +#[target_feature(enable = "lasx,frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrsqrte_d(a: v4f64) -> v4f64 { + __lasx_xvfrsqrte_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrint_s(a: v8f32) -> v8f32 { + __lasx_xvfrint_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrint_d(a: v4f64) -> v4f64 { + __lasx_xvfrint_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrsqrt_s(a: v8f32) -> v8f32 { + __lasx_xvfrsqrt_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrsqrt_d(a: v4f64) -> v4f64 { + __lasx_xvfrsqrt_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvflogb_s(a: v8f32) -> v8f32 { + __lasx_xvflogb_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvflogb_d(a: v4f64) -> v4f64 { + __lasx_xvflogb_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcvth_s_h(a: v16i16) -> v8f32 { + __lasx_xvfcvth_s_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcvth_d_s(a: v8f32) -> v4f64 { + __lasx_xvfcvth_d_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcvtl_s_h(a: v16i16) -> v8f32 { + __lasx_xvfcvtl_s_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcvtl_d_s(a: v8f32) -> v4f64 { + __lasx_xvfcvtl_d_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftint_w_s(a: v8f32) -> v8i32 { + __lasx_xvftint_w_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftint_l_d(a: v4f64) -> v4i64 { + __lasx_xvftint_l_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftint_wu_s(a: v8f32) -> v8u32 { + __lasx_xvftint_wu_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftint_lu_d(a: v4f64) -> v4u64 { + __lasx_xvftint_lu_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrz_w_s(a: v8f32) -> v8i32 { + __lasx_xvftintrz_w_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrz_l_d(a: v4f64) -> v4i64 { + __lasx_xvftintrz_l_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrz_wu_s(a: v8f32) -> v8u32 { + __lasx_xvftintrz_wu_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrz_lu_d(a: v4f64) -> v4u64 { + __lasx_xvftintrz_lu_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvffint_s_w(a: v8i32) -> v8f32 { + __lasx_xvffint_s_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvffint_d_l(a: v4i64) -> v4f64 { + __lasx_xvffint_d_l(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvffint_s_wu(a: v8u32) -> v8f32 { + __lasx_xvffint_s_wu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvffint_d_lu(a: v4u64) -> v4f64 { + __lasx_xvffint_d_lu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplve_b(a: v32i8, b: i32) -> v32i8 { + __lasx_xvreplve_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplve_h(a: v16i16, b: i32) -> v16i16 { + __lasx_xvreplve_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplve_w(a: v8i32, b: i32) -> v8i32 { + __lasx_xvreplve_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplve_d(a: v4i64, b: i32) -> v4i64 { + __lasx_xvreplve_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpermi_w(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvpermi_w(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvandn_v(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvandn_v(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvneg_b(a: v32i8) -> v32i8 { + __lasx_xvneg_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvneg_h(a: v16i16) -> v16i16 { + __lasx_xvneg_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvneg_w(a: v8i32) -> v8i32 { + __lasx_xvneg_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvneg_d(a: v4i64) -> v4i64 { + __lasx_xvneg_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmuh_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvmuh_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmuh_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvmuh_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmuh_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvmuh_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmuh_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvmuh_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmuh_bu(a: v32u8, b: v32u8) -> v32u8 { + __lasx_xvmuh_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmuh_hu(a: v16u16, b: v16u16) -> v16u16 { + __lasx_xvmuh_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmuh_wu(a: v8u32, b: v8u32) -> v8u32 { + __lasx_xvmuh_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmuh_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvmuh_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsllwil_h_b(a: v32i8) -> v16i16 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvsllwil_h_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsllwil_w_h(a: v16i16) -> v8i32 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsllwil_w_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsllwil_d_w(a: v8i32) -> v4i64 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsllwil_d_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsllwil_hu_bu(a: v32u8) -> v16u16 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvsllwil_hu_bu(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsllwil_wu_hu(a: v16u16) -> v8u32 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsllwil_wu_hu(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsllwil_du_wu(a: v8u32) -> v4u64 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsllwil_du_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsran_b_h(a: v16i16, b: v16i16) -> v32i8 { + __lasx_xvsran_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsran_h_w(a: v8i32, b: v8i32) -> v16i16 { + __lasx_xvsran_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsran_w_d(a: v4i64, b: v4i64) -> v8i32 { + __lasx_xvsran_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssran_b_h(a: v16i16, b: v16i16) -> v32i8 { + __lasx_xvssran_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssran_h_w(a: v8i32, b: v8i32) -> v16i16 { + __lasx_xvssran_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssran_w_d(a: v4i64, b: v4i64) -> v8i32 { + __lasx_xvssran_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssran_bu_h(a: v16u16, b: v16u16) -> v32u8 { + __lasx_xvssran_bu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssran_hu_w(a: v8u32, b: v8u32) -> v16u16 { + __lasx_xvssran_hu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssran_wu_d(a: v4u64, b: v4u64) -> v8u32 { + __lasx_xvssran_wu_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrarn_b_h(a: v16i16, b: v16i16) -> v32i8 { + __lasx_xvsrarn_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrarn_h_w(a: v8i32, b: v8i32) -> v16i16 { + __lasx_xvsrarn_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrarn_w_d(a: v4i64, b: v4i64) -> v8i32 { + __lasx_xvsrarn_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarn_b_h(a: v16i16, b: v16i16) -> v32i8 { + __lasx_xvssrarn_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarn_h_w(a: v8i32, b: v8i32) -> v16i16 { + __lasx_xvssrarn_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarn_w_d(a: v4i64, b: v4i64) -> v8i32 { + __lasx_xvssrarn_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarn_bu_h(a: v16u16, b: v16u16) -> v32u8 { + __lasx_xvssrarn_bu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarn_hu_w(a: v8u32, b: v8u32) -> v16u16 { + __lasx_xvssrarn_hu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarn_wu_d(a: v4u64, b: v4u64) -> v8u32 { + __lasx_xvssrarn_wu_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrln_b_h(a: v16i16, b: v16i16) -> v32i8 { + __lasx_xvsrln_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrln_h_w(a: v8i32, b: v8i32) -> v16i16 { + __lasx_xvsrln_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrln_w_d(a: v4i64, b: v4i64) -> v8i32 { + __lasx_xvsrln_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrln_bu_h(a: v16u16, b: v16u16) -> v32u8 { + __lasx_xvssrln_bu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrln_hu_w(a: v8u32, b: v8u32) -> v16u16 { + __lasx_xvssrln_hu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrln_wu_d(a: v4u64, b: v4u64) -> v8u32 { + __lasx_xvssrln_wu_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlrn_b_h(a: v16i16, b: v16i16) -> v32i8 { + __lasx_xvsrlrn_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlrn_h_w(a: v8i32, b: v8i32) -> v16i16 { + __lasx_xvsrlrn_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlrn_w_d(a: v4i64, b: v4i64) -> v8i32 { + __lasx_xvsrlrn_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrn_bu_h(a: v16u16, b: v16u16) -> v32u8 { + __lasx_xvssrlrn_bu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrn_hu_w(a: v8u32, b: v8u32) -> v16u16 { + __lasx_xvssrlrn_hu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrn_wu_d(a: v4u64, b: v4u64) -> v8u32 { + __lasx_xvssrlrn_wu_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrstpi_b(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvfrstpi_b(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrstpi_h(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvfrstpi_h(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrstp_b(a: v32i8, b: v32i8, c: v32i8) -> v32i8 { + __lasx_xvfrstp_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrstp_h(a: v16i16, b: v16i16, c: v16i16) -> v16i16 { + __lasx_xvfrstp_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvshuf4i_d(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvshuf4i_d(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbsrl_v(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvbsrl_v(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvbsll_v(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvbsll_v(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvextrins_b(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvextrins_b(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvextrins_h(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvextrins_h(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvextrins_w(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvextrins_w(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvextrins_d(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvextrins_d(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmskltz_b(a: v32i8) -> v32i8 { + __lasx_xvmskltz_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmskltz_h(a: v16i16) -> v16i16 { + __lasx_xvmskltz_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmskltz_w(a: v8i32) -> v8i32 { + __lasx_xvmskltz_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmskltz_d(a: v4i64) -> v4i64 { + __lasx_xvmskltz_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsigncov_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvsigncov_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsigncov_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvsigncov_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsigncov_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvsigncov_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsigncov_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsigncov_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmadd_s(a: v8f32, b: v8f32, c: v8f32) -> v8f32 { + __lasx_xvfmadd_s(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmadd_d(a: v4f64, b: v4f64, c: v4f64) -> v4f64 { + __lasx_xvfmadd_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmsub_s(a: v8f32, b: v8f32, c: v8f32) -> v8f32 { + __lasx_xvfmsub_s(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfmsub_d(a: v4f64, b: v4f64, c: v4f64) -> v4f64 { + __lasx_xvfmsub_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfnmadd_s(a: v8f32, b: v8f32, c: v8f32) -> v8f32 { + __lasx_xvfnmadd_s(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfnmadd_d(a: v4f64, b: v4f64, c: v4f64) -> v4f64 { + __lasx_xvfnmadd_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfnmsub_s(a: v8f32, b: v8f32, c: v8f32) -> v8f32 { + __lasx_xvfnmsub_s(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfnmsub_d(a: v4f64, b: v4f64, c: v4f64) -> v4f64 { + __lasx_xvfnmsub_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrne_w_s(a: v8f32) -> v8i32 { + __lasx_xvftintrne_w_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrne_l_d(a: v4f64) -> v4i64 { + __lasx_xvftintrne_l_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrp_w_s(a: v8f32) -> v8i32 { + __lasx_xvftintrp_w_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrp_l_d(a: v4f64) -> v4i64 { + __lasx_xvftintrp_l_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrm_w_s(a: v8f32) -> v8i32 { + __lasx_xvftintrm_w_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrm_l_d(a: v4f64) -> v4i64 { + __lasx_xvftintrm_l_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftint_w_d(a: v4f64, b: v4f64) -> v8i32 { + __lasx_xvftint_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvffint_s_l(a: v4i64, b: v4i64) -> v8f32 { + __lasx_xvffint_s_l(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrz_w_d(a: v4f64, b: v4f64) -> v8i32 { + __lasx_xvftintrz_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrp_w_d(a: v4f64, b: v4f64) -> v8i32 { + __lasx_xvftintrp_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrm_w_d(a: v4f64, b: v4f64) -> v8i32 { + __lasx_xvftintrm_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrne_w_d(a: v4f64, b: v4f64) -> v8i32 { + __lasx_xvftintrne_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftinth_l_s(a: v8f32) -> v4i64 { + __lasx_xvftinth_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintl_l_s(a: v8f32) -> v4i64 { + __lasx_xvftintl_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvffinth_d_w(a: v8i32) -> v4f64 { + __lasx_xvffinth_d_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvffintl_d_w(a: v8i32) -> v4f64 { + __lasx_xvffintl_d_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrzh_l_s(a: v8f32) -> v4i64 { + __lasx_xvftintrzh_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrzl_l_s(a: v8f32) -> v4i64 { + __lasx_xvftintrzl_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrph_l_s(a: v8f32) -> v4i64 { + __lasx_xvftintrph_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrpl_l_s(a: v8f32) -> v4i64 { + __lasx_xvftintrpl_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrmh_l_s(a: v8f32) -> v4i64 { + __lasx_xvftintrmh_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrml_l_s(a: v8f32) -> v4i64 { + __lasx_xvftintrml_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrneh_l_s(a: v8f32) -> v4i64 { + __lasx_xvftintrneh_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvftintrnel_l_s(a: v8f32) -> v4i64 { + __lasx_xvftintrnel_l_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrintrne_s(a: v8f32) -> v8f32 { + __lasx_xvfrintrne_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrintrne_d(a: v4f64) -> v4f64 { + __lasx_xvfrintrne_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrintrz_s(a: v8f32) -> v8f32 { + __lasx_xvfrintrz_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrintrz_d(a: v4f64) -> v4f64 { + __lasx_xvfrintrz_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrintrp_s(a: v8f32) -> v8f32 { + __lasx_xvfrintrp_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrintrp_d(a: v4f64) -> v4f64 { + __lasx_xvfrintrp_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrintrm_s(a: v8f32) -> v8f32 { + __lasx_xvfrintrm_s(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfrintrm_d(a: v4f64) -> v4f64 { + __lasx_xvfrintrm_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvld(mem_addr: *const i8) -> v32i8 { + static_assert_simm_bits!(IMM_S12, 12); + __lasx_xvld(mem_addr, IMM_S12) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvst(a: v32i8, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S12, 12); + __lasx_xvst(a, mem_addr, IMM_S12) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvstelm_b(a: v32i8, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S8, 8); + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvstelm_b(a, mem_addr, IMM_S8, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvstelm_h(a: v16i16, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S8, 8); + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvstelm_h(a, mem_addr, IMM_S8, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvstelm_w(a: v8i32, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S8, 8); + static_assert_uimm_bits!(IMM2, 2); + __lasx_xvstelm_w(a, mem_addr, IMM_S8, IMM2) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvstelm_d(a: v4i64, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S8, 8); + static_assert_uimm_bits!(IMM1, 1); + __lasx_xvstelm_d(a, mem_addr, IMM_S8, IMM1) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvinsve0_w(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvinsve0_w(a, b, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvinsve0_d(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM2, 2); + __lasx_xvinsve0_d(a, b, IMM2) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickve_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvpickve_w(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickve_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM2, 2); + __lasx_xvpickve_d(a, IMM2) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrn_b_h(a: v16i16, b: v16i16) -> v32i8 { + __lasx_xvssrlrn_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrn_h_w(a: v8i32, b: v8i32) -> v16i16 { + __lasx_xvssrlrn_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrn_w_d(a: v4i64, b: v4i64) -> v8i32 { + __lasx_xvssrlrn_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrln_b_h(a: v16i16, b: v16i16) -> v32i8 { + __lasx_xvssrln_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrln_h_w(a: v8i32, b: v8i32) -> v16i16 { + __lasx_xvssrln_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrln_w_d(a: v4i64, b: v4i64) -> v8i32 { + __lasx_xvssrln_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvorn_v(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvorn_v(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvldi() -> v4i64 { + static_assert_simm_bits!(IMM_S13, 13); + __lasx_xvldi(IMM_S13) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvldx(mem_addr: *const i8, b: i64) -> v32i8 { + __lasx_xvldx(mem_addr, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvstx(a: v32i8, mem_addr: *mut i8, b: i64) { + __lasx_xvstx(a, mem_addr, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvextl_qu_du(a: v4u64) -> v4u64 { + __lasx_xvextl_qu_du(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvinsgr2vr_w(a: v8i32, b: i32) -> v8i32 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvinsgr2vr_w(a, b, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvinsgr2vr_d(a: v4i64, b: i64) -> v4i64 { + static_assert_uimm_bits!(IMM2, 2); + __lasx_xvinsgr2vr_d(a, b, IMM2) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplve0_b(a: v32i8) -> v32i8 { + __lasx_xvreplve0_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplve0_h(a: v16i16) -> v16i16 { + __lasx_xvreplve0_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplve0_w(a: v8i32) -> v8i32 { + __lasx_xvreplve0_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplve0_d(a: v4i64) -> v4i64 { + __lasx_xvreplve0_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvreplve0_q(a: v32i8) -> v32i8 { + __lasx_xvreplve0_q(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_h_b(a: v32i8) -> v16i16 { + __lasx_vext2xv_h_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_w_h(a: v16i16) -> v8i32 { + __lasx_vext2xv_w_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_d_w(a: v8i32) -> v4i64 { + __lasx_vext2xv_d_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_w_b(a: v32i8) -> v8i32 { + __lasx_vext2xv_w_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_d_h(a: v16i16) -> v4i64 { + __lasx_vext2xv_d_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_d_b(a: v32i8) -> v4i64 { + __lasx_vext2xv_d_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_hu_bu(a: v32i8) -> v16i16 { + __lasx_vext2xv_hu_bu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_wu_hu(a: v16i16) -> v8i32 { + __lasx_vext2xv_wu_hu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_du_wu(a: v8i32) -> v4i64 { + __lasx_vext2xv_du_wu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_wu_bu(a: v32i8) -> v8i32 { + __lasx_vext2xv_wu_bu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_du_hu(a: v16i16) -> v4i64 { + __lasx_vext2xv_du_hu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_vext2xv_du_bu(a: v32i8) -> v4i64 { + __lasx_vext2xv_du_bu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpermi_q(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvpermi_q(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpermi_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM8, 8); + __lasx_xvpermi_d(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvperm_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvperm_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvldrepl_b(mem_addr: *const i8) -> v32i8 { + static_assert_simm_bits!(IMM_S12, 12); + __lasx_xvldrepl_b(mem_addr, IMM_S12) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvldrepl_h(mem_addr: *const i8) -> v16i16 { + static_assert_simm_bits!(IMM_S11, 11); + __lasx_xvldrepl_h(mem_addr, IMM_S11) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvldrepl_w(mem_addr: *const i8) -> v8i32 { + static_assert_simm_bits!(IMM_S10, 10); + __lasx_xvldrepl_w(mem_addr, IMM_S10) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvldrepl_d(mem_addr: *const i8) -> v4i64 { + static_assert_simm_bits!(IMM_S9, 9); + __lasx_xvldrepl_d(mem_addr, IMM_S9) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickve2gr_w(a: v8i32) -> i32 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvpickve2gr_w(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickve2gr_wu(a: v8i32) -> u32 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvpickve2gr_wu(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickve2gr_d(a: v4i64) -> i64 { + static_assert_uimm_bits!(IMM2, 2); + __lasx_xvpickve2gr_d(a, IMM2) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickve2gr_du(a: v4i64) -> u64 { + static_assert_uimm_bits!(IMM2, 2); + __lasx_xvpickve2gr_du(a, IMM2) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_q_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvaddwev_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_d_w(a: v8i32, b: v8i32) -> v4i64 { + __lasx_xvaddwev_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_w_h(a: v16i16, b: v16i16) -> v8i32 { + __lasx_xvaddwev_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_h_b(a: v32i8, b: v32i8) -> v16i16 { + __lasx_xvaddwev_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_q_du(a: v4u64, b: v4u64) -> v4i64 { + __lasx_xvaddwev_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_d_wu(a: v8u32, b: v8u32) -> v4i64 { + __lasx_xvaddwev_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_w_hu(a: v16u16, b: v16u16) -> v8i32 { + __lasx_xvaddwev_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_h_bu(a: v32u8, b: v32u8) -> v16i16 { + __lasx_xvaddwev_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwev_q_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsubwev_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwev_d_w(a: v8i32, b: v8i32) -> v4i64 { + __lasx_xvsubwev_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwev_w_h(a: v16i16, b: v16i16) -> v8i32 { + __lasx_xvsubwev_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwev_h_b(a: v32i8, b: v32i8) -> v16i16 { + __lasx_xvsubwev_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwev_q_du(a: v4u64, b: v4u64) -> v4i64 { + __lasx_xvsubwev_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwev_d_wu(a: v8u32, b: v8u32) -> v4i64 { + __lasx_xvsubwev_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwev_w_hu(a: v16u16, b: v16u16) -> v8i32 { + __lasx_xvsubwev_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwev_h_bu(a: v32u8, b: v32u8) -> v16i16 { + __lasx_xvsubwev_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_q_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvmulwev_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_d_w(a: v8i32, b: v8i32) -> v4i64 { + __lasx_xvmulwev_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_w_h(a: v16i16, b: v16i16) -> v8i32 { + __lasx_xvmulwev_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_h_b(a: v32i8, b: v32i8) -> v16i16 { + __lasx_xvmulwev_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_q_du(a: v4u64, b: v4u64) -> v4i64 { + __lasx_xvmulwev_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_d_wu(a: v8u32, b: v8u32) -> v4i64 { + __lasx_xvmulwev_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_w_hu(a: v16u16, b: v16u16) -> v8i32 { + __lasx_xvmulwev_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_h_bu(a: v32u8, b: v32u8) -> v16i16 { + __lasx_xvmulwev_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_q_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvaddwod_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_d_w(a: v8i32, b: v8i32) -> v4i64 { + __lasx_xvaddwod_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_w_h(a: v16i16, b: v16i16) -> v8i32 { + __lasx_xvaddwod_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_h_b(a: v32i8, b: v32i8) -> v16i16 { + __lasx_xvaddwod_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_q_du(a: v4u64, b: v4u64) -> v4i64 { + __lasx_xvaddwod_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_d_wu(a: v8u32, b: v8u32) -> v4i64 { + __lasx_xvaddwod_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_w_hu(a: v16u16, b: v16u16) -> v8i32 { + __lasx_xvaddwod_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_h_bu(a: v32u8, b: v32u8) -> v16i16 { + __lasx_xvaddwod_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwod_q_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsubwod_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwod_d_w(a: v8i32, b: v8i32) -> v4i64 { + __lasx_xvsubwod_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwod_w_h(a: v16i16, b: v16i16) -> v8i32 { + __lasx_xvsubwod_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwod_h_b(a: v32i8, b: v32i8) -> v16i16 { + __lasx_xvsubwod_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwod_q_du(a: v4u64, b: v4u64) -> v4i64 { + __lasx_xvsubwod_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwod_d_wu(a: v8u32, b: v8u32) -> v4i64 { + __lasx_xvsubwod_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwod_w_hu(a: v16u16, b: v16u16) -> v8i32 { + __lasx_xvsubwod_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsubwod_h_bu(a: v32u8, b: v32u8) -> v16i16 { + __lasx_xvsubwod_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_q_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvmulwod_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_d_w(a: v8i32, b: v8i32) -> v4i64 { + __lasx_xvmulwod_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_w_h(a: v16i16, b: v16i16) -> v8i32 { + __lasx_xvmulwod_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_h_b(a: v32i8, b: v32i8) -> v16i16 { + __lasx_xvmulwod_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_q_du(a: v4u64, b: v4u64) -> v4i64 { + __lasx_xvmulwod_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_d_wu(a: v8u32, b: v8u32) -> v4i64 { + __lasx_xvmulwod_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_w_hu(a: v16u16, b: v16u16) -> v8i32 { + __lasx_xvmulwod_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_h_bu(a: v32u8, b: v32u8) -> v16i16 { + __lasx_xvmulwod_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_d_wu_w(a: v8u32, b: v8i32) -> v4i64 { + __lasx_xvaddwev_d_wu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_w_hu_h(a: v16u16, b: v16i16) -> v8i32 { + __lasx_xvaddwev_w_hu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_h_bu_b(a: v32u8, b: v32i8) -> v16i16 { + __lasx_xvaddwev_h_bu_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_d_wu_w(a: v8u32, b: v8i32) -> v4i64 { + __lasx_xvmulwev_d_wu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_w_hu_h(a: v16u16, b: v16i16) -> v8i32 { + __lasx_xvmulwev_w_hu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_h_bu_b(a: v32u8, b: v32i8) -> v16i16 { + __lasx_xvmulwev_h_bu_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_d_wu_w(a: v8u32, b: v8i32) -> v4i64 { + __lasx_xvaddwod_d_wu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_w_hu_h(a: v16u16, b: v16i16) -> v8i32 { + __lasx_xvaddwod_w_hu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_h_bu_b(a: v32u8, b: v32i8) -> v16i16 { + __lasx_xvaddwod_h_bu_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_d_wu_w(a: v8u32, b: v8i32) -> v4i64 { + __lasx_xvmulwod_d_wu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_w_hu_h(a: v16u16, b: v16i16) -> v8i32 { + __lasx_xvmulwod_w_hu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_h_bu_b(a: v32u8, b: v32i8) -> v16i16 { + __lasx_xvmulwod_h_bu_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhaddw_q_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvhaddw_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhaddw_qu_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvhaddw_qu_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhsubw_q_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvhsubw_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvhsubw_qu_du(a: v4u64, b: v4u64) -> v4u64 { + __lasx_xvhsubw_qu_du(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_q_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64 { + __lasx_xvmaddwev_q_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_d_w(a: v4i64, b: v8i32, c: v8i32) -> v4i64 { + __lasx_xvmaddwev_d_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_w_h(a: v8i32, b: v16i16, c: v16i16) -> v8i32 { + __lasx_xvmaddwev_w_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_h_b(a: v16i16, b: v32i8, c: v32i8) -> v16i16 { + __lasx_xvmaddwev_h_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_q_du(a: v4u64, b: v4u64, c: v4u64) -> v4u64 { + __lasx_xvmaddwev_q_du(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_d_wu(a: v4u64, b: v8u32, c: v8u32) -> v4u64 { + __lasx_xvmaddwev_d_wu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_w_hu(a: v8u32, b: v16u16, c: v16u16) -> v8u32 { + __lasx_xvmaddwev_w_hu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_h_bu(a: v16u16, b: v32u8, c: v32u8) -> v16u16 { + __lasx_xvmaddwev_h_bu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_q_d(a: v4i64, b: v4i64, c: v4i64) -> v4i64 { + __lasx_xvmaddwod_q_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_d_w(a: v4i64, b: v8i32, c: v8i32) -> v4i64 { + __lasx_xvmaddwod_d_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_w_h(a: v8i32, b: v16i16, c: v16i16) -> v8i32 { + __lasx_xvmaddwod_w_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_h_b(a: v16i16, b: v32i8, c: v32i8) -> v16i16 { + __lasx_xvmaddwod_h_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_q_du(a: v4u64, b: v4u64, c: v4u64) -> v4u64 { + __lasx_xvmaddwod_q_du(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_d_wu(a: v4u64, b: v8u32, c: v8u32) -> v4u64 { + __lasx_xvmaddwod_d_wu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_w_hu(a: v8u32, b: v16u16, c: v16u16) -> v8u32 { + __lasx_xvmaddwod_w_hu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_h_bu(a: v16u16, b: v32u8, c: v32u8) -> v16u16 { + __lasx_xvmaddwod_h_bu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_q_du_d(a: v4i64, b: v4u64, c: v4i64) -> v4i64 { + __lasx_xvmaddwev_q_du_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_d_wu_w(a: v4i64, b: v8u32, c: v8i32) -> v4i64 { + __lasx_xvmaddwev_d_wu_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_w_hu_h(a: v8i32, b: v16u16, c: v16i16) -> v8i32 { + __lasx_xvmaddwev_w_hu_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwev_h_bu_b(a: v16i16, b: v32u8, c: v32i8) -> v16i16 { + __lasx_xvmaddwev_h_bu_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_q_du_d(a: v4i64, b: v4u64, c: v4i64) -> v4i64 { + __lasx_xvmaddwod_q_du_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_d_wu_w(a: v4i64, b: v8u32, c: v8i32) -> v4i64 { + __lasx_xvmaddwod_d_wu_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_w_hu_h(a: v8i32, b: v16u16, c: v16i16) -> v8i32 { + __lasx_xvmaddwod_w_hu_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmaddwod_h_bu_b(a: v16i16, b: v32u8, c: v32i8) -> v16i16 { + __lasx_xvmaddwod_h_bu_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrotr_b(a: v32i8, b: v32i8) -> v32i8 { + __lasx_xvrotr_b(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrotr_h(a: v16i16, b: v16i16) -> v16i16 { + __lasx_xvrotr_h(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrotr_w(a: v8i32, b: v8i32) -> v8i32 { + __lasx_xvrotr_w(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrotr_d(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvrotr_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvadd_q(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvadd_q(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsub_q(a: v4i64, b: v4i64) -> v4i64 { + __lasx_xvsub_q(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwev_q_du_d(a: v4u64, b: v4i64) -> v4i64 { + __lasx_xvaddwev_q_du_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvaddwod_q_du_d(a: v4u64, b: v4i64) -> v4i64 { + __lasx_xvaddwod_q_du_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwev_q_du_d(a: v4u64, b: v4i64) -> v4i64 { + __lasx_xvmulwev_q_du_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmulwod_q_du_d(a: v4u64, b: v4i64) -> v4i64 { + __lasx_xvmulwod_q_du_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmskgez_b(a: v32i8) -> v32i8 { + __lasx_xvmskgez_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvmsknz_b(a: v32i8) -> v32i8 { + __lasx_xvmsknz_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvexth_h_b(a: v32i8) -> v16i16 { + __lasx_xvexth_h_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvexth_w_h(a: v16i16) -> v8i32 { + __lasx_xvexth_w_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvexth_d_w(a: v8i32) -> v4i64 { + __lasx_xvexth_d_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvexth_q_d(a: v4i64) -> v4i64 { + __lasx_xvexth_q_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvexth_hu_bu(a: v32u8) -> v16u16 { + __lasx_xvexth_hu_bu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvexth_wu_hu(a: v16u16) -> v8u32 { + __lasx_xvexth_wu_hu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvexth_du_wu(a: v8u32) -> v4u64 { + __lasx_xvexth_du_wu(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvexth_qu_du(a: v4u64) -> v4u64 { + __lasx_xvexth_qu_du(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrotri_b(a: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvrotri_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrotri_h(a: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvrotri_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrotri_w(a: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvrotri_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrotri_d(a: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvrotri_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvextl_q_d(a: v4i64) -> v4i64 { + __lasx_xvextl_q_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlni_b_h(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsrlni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlni_h_w(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsrlni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlni_w_d(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsrlni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlni_d_q(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvsrlni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlrni_b_h(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsrlrni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlrni_h_w(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsrlrni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlrni_w_d(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsrlrni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrlrni_d_q(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvsrlrni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlni_b_h(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvssrlni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlni_h_w(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvssrlni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlni_w_d(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvssrlni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlni_d_q(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvssrlni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlni_bu_h(a: v32u8, b: v32i8) -> v32u8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvssrlni_bu_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlni_hu_w(a: v16u16, b: v16i16) -> v16u16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvssrlni_hu_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlni_wu_d(a: v8u32, b: v8i32) -> v8u32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvssrlni_wu_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlni_du_q(a: v4u64, b: v4i64) -> v4u64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvssrlni_du_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrni_b_h(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvssrlrni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrni_h_w(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvssrlrni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrni_w_d(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvssrlrni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrni_d_q(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvssrlrni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrni_bu_h(a: v32u8, b: v32i8) -> v32u8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvssrlrni_bu_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrni_hu_w(a: v16u16, b: v16i16) -> v16u16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvssrlrni_hu_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrni_wu_d(a: v8u32, b: v8i32) -> v8u32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvssrlrni_wu_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrlrni_du_q(a: v4u64, b: v4i64) -> v4u64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvssrlrni_du_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrani_b_h(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsrani_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrani_h_w(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsrani_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrani_w_d(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsrani_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrani_d_q(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvsrani_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrarni_b_h(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvsrarni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrarni_h_w(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvsrarni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrarni_w_d(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvsrarni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvsrarni_d_q(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvsrarni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrani_b_h(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvssrani_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrani_h_w(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvssrani_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrani_w_d(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvssrani_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrani_d_q(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvssrani_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrani_bu_h(a: v32u8, b: v32i8) -> v32u8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvssrani_bu_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrani_hu_w(a: v16u16, b: v16i16) -> v16u16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvssrani_hu_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrani_wu_d(a: v8u32, b: v8i32) -> v8u32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvssrani_wu_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrani_du_q(a: v4u64, b: v4i64) -> v4u64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvssrani_du_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarni_b_h(a: v32i8, b: v32i8) -> v32i8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvssrarni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarni_h_w(a: v16i16, b: v16i16) -> v16i16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvssrarni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarni_w_d(a: v8i32, b: v8i32) -> v8i32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvssrarni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarni_d_q(a: v4i64, b: v4i64) -> v4i64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvssrarni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarni_bu_h(a: v32u8, b: v32i8) -> v32u8 { + static_assert_uimm_bits!(IMM4, 4); + __lasx_xvssrarni_bu_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarni_hu_w(a: v16u16, b: v16i16) -> v16u16 { + static_assert_uimm_bits!(IMM5, 5); + __lasx_xvssrarni_hu_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarni_wu_d(a: v8u32, b: v8i32) -> v8u32 { + static_assert_uimm_bits!(IMM6, 6); + __lasx_xvssrarni_wu_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvssrarni_du_q(a: v4u64, b: v4i64) -> v4u64 { + static_assert_uimm_bits!(IMM7, 7); + __lasx_xvssrarni_du_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbnz_b(a: v32u8) -> i32 { + __lasx_xbnz_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbnz_d(a: v4u64) -> i32 { + __lasx_xbnz_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbnz_h(a: v16u16) -> i32 { + __lasx_xbnz_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbnz_v(a: v32u8) -> i32 { + __lasx_xbnz_v(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbnz_w(a: v8u32) -> i32 { + __lasx_xbnz_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbz_b(a: v32u8) -> i32 { + __lasx_xbz_b(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbz_d(a: v4u64) -> i32 { + __lasx_xbz_d(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbz_h(a: v16u16) -> i32 { + __lasx_xbz_h(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbz_v(a: v32u8) -> i32 { + __lasx_xbz_v(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xbz_w(a: v8u32) -> i32 { + __lasx_xbz_w(a) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_caf_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_caf_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_caf_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_caf_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_ceq_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_ceq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_ceq_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_ceq_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cle_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_cle_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cle_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_cle_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_clt_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_clt_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_clt_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_clt_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cne_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_cne_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cne_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_cne_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cor_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_cor_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cor_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_cor_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cueq_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_cueq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cueq_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_cueq_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cule_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_cule_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cule_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_cule_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cult_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_cult_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cult_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_cult_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cun_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_cun_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cune_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_cune_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cune_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_cune_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_cun_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_cun_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_saf_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_saf_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_saf_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_saf_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_seq_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_seq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_seq_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_seq_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sle_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_sle_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sle_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_sle_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_slt_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_slt_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_slt_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_slt_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sne_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_sne_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sne_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_sne_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sor_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_sor_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sor_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_sor_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sueq_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_sueq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sueq_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_sueq_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sule_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_sule_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sule_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_sule_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sult_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_sult_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sult_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_sult_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sun_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_sun_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sune_d(a: v4f64, b: v4f64) -> v4i64 { + __lasx_xvfcmp_sune_d(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sune_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_sune_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvfcmp_sun_s(a: v8f32, b: v8f32) -> v8i32 { + __lasx_xvfcmp_sun_s(a, b) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickve_d_f(a: v4f64) -> v4f64 { + static_assert_uimm_bits!(IMM2, 2); + __lasx_xvpickve_d_f(a, IMM2) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvpickve_w_f(a: v8f32) -> v8f32 { + static_assert_uimm_bits!(IMM3, 3); + __lasx_xvpickve_w_f(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrepli_b() -> v32i8 { + static_assert_simm_bits!(IMM_S10, 10); + __lasx_xvrepli_b(IMM_S10) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrepli_d() -> v4i64 { + static_assert_simm_bits!(IMM_S10, 10); + __lasx_xvrepli_d(IMM_S10) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrepli_h() -> v16i16 { + static_assert_simm_bits!(IMM_S10, 10); + __lasx_xvrepli_h(IMM_S10) +} + +#[inline] +#[target_feature(enable = "lasx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lasx_xvrepli_w() -> v8i32 { + static_assert_simm_bits!(IMM_S10, 10); + __lasx_xvrepli_w(IMM_S10) +} diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/mod.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/mod.rs new file mode 100644 index 000000000000..c3a244e740e9 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/mod.rs @@ -0,0 +1,21 @@ +//! LoongArch64 LASX intrinsics + +#![allow(non_camel_case_types)] + +#[rustfmt::skip] +mod types; + +#[rustfmt::skip] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub use self::types::*; + +#[rustfmt::skip] +mod generated; + +#[rustfmt::skip] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub use self::generated::*; + +#[rustfmt::skip] +#[cfg(test)] +mod tests; diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/tests.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/tests.rs new file mode 100644 index 000000000000..54771d7b5110 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/tests.rs @@ -0,0 +1,14758 @@ +// This code is automatically generated. DO NOT MODIFY. +// See crates/stdarch-gen-loongarch/README.md + +use crate::{ + core_arch::{loongarch64::*, simd::*}, + mem::transmute, +}; +use stdarch_test::simd_test; + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsll_b() { + let a = i8x32::new( + -111, -98, 47, -106, -82, -72, -70, 0, 110, -61, -20, 36, 41, -103, 42, 95, 15, -11, + -25, -5, 40, -63, 56, -39, 43, 127, 86, 75, -48, -32, 72, 69, + ); + let b = i8x32::new( + 64, -127, -78, 84, -102, -98, 45, 43, -78, -108, 25, 29, -65, 91, 36, 33, 61, 47, 69, + -59, -10, 108, 121, -25, -125, 62, -69, 74, 121, -89, -57, 75, + ); + let r = i64x4::new( + 18015190406413457, + -4710544755986517832, + -9191829245651812128, + 2882304449461665880, + ); + + assert_eq!(r, transmute(lasx_xvsll_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsll_h() { + let a = i16x16::new( + 4856, -12188, 28154, -30840, -28949, 18688, -15524, 15161, 5118, 9078, -28997, 27522, + 32276, -26448, -5994, -10720, + ); + let b = i16x16::new( + -489, 29679, -21849, 9497, -19660, -26644, 7745, 5176, 4522, 9574, -4384, 20128, 7874, + -19019, -3312, -26556, + ); + let r = i64x4::new( + 1153199681048706048, + 4107430984994057904, + 7746911246556919808, + 7061899947028838480, + ); + + assert_eq!(r, transmute(lasx_xvsll_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsll_w() { + let a = i32x8::new( + 1510216636, + 213576479, + 1189254660, + -1355467453, + 1294786218, + -1710122153, + -615586704, + -1571284743, + ); + let b = i32x8::new( + -529192780, + 352003269, + -770638911, + 706076772, + -1938691801, + -1503291372, + -471620902, + 769195345, + ); + let r = i64x4::new( + -7539760386422079488, + -913293731912406008, + -5372794352929123072, + 3598939055443673088, + ); + + assert_eq!(r, transmute(lasx_xvsll_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsll_d() { + let a = i64x4::new( + 5587460212497087617, + 8474749651444529729, + 1738438059605040390, + -4067680789859467618, + ); + let b = i64x4::new( + 6741938213225194797, + 5195523862780666814, + -3609057746391313602, + 4479859630248272682, + ); + let r = i64x4::new( + -8101940545267433472, + 4611686018427387904, + -9223372036854775808, + -289787284616642560, + ); + + assert_eq!(r, transmute(lasx_xvsll_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslli_b() { + let a = i8x32::new( + -94, -3, -119, 48, 100, -37, 40, -38, -29, -51, 88, 4, -25, -114, 55, 88, 100, 38, 83, + 104, -128, 126, -102, 105, 5, -72, 101, 124, 38, -108, 10, -44, + ); + let r = i64x4::new( + 7539145145172948104, + 6979515765458220172, + -6599752572338399088, + 5775955139904200724, + ); + + assert_eq!(r, transmute(lasx_xvslli_b::<2>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslli_h() { + let a = i16x16::new( + -28940, -25950, 22837, -4210, -14698, -22498, 27809, 10311, -17231, 19306, 6966, 1632, + -29260, 23078, 2703, -10254, + ); + let r = i64x4::new( + -9223301665963114496, + -4611615647535693824, + 140739635855360, + -9223160928474759168, + ); + + assert_eq!(r, transmute(lasx_xvslli_h::<14>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslli_w() { + let a = i32x8::new( + 1994019050, + -2143307169, + -1465670605, + -1894478348, + 307662278, + 836483069, + 412058602, + -1025645846, + ); + let r = i64x4::new( + 6845471437529022464, + -864691127599497216, + -216172778791895040, + -1585267064908546048, + ); + + assert_eq!(r, transmute(lasx_xvslli_w::<24>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslli_d() { + let a = i64x4::new( + 4336457422713836724, + 8560628373228459557, + 7599406461945619908, + -8194824695476258169, + ); + let r = i64x4::new( + -9223372036854775808, + -6917529027641081856, + -9223372036854775808, + -2305843009213693952, + ); + + assert_eq!(r, transmute(lasx_xvslli_d::<61>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsra_b() { + let a = i8x32::new( + 52, 91, -50, -85, -69, -95, -127, 8, 86, -4, -99, 72, 8, -14, 107, -97, -44, 105, 87, + -117, -90, 118, 127, -106, 77, -92, -40, -82, -12, -112, -67, -118, + ); + let b = i8x32::new( + 27, 13, -111, 16, -29, 45, -40, 67, -68, 121, -101, -38, 25, -121, 103, 74, 99, 16, + -21, 6, 56, -24, 30, -89, 114, -108, -46, 9, 2, 53, 100, -76, + ); + let r = i64x4::new( + 108647106216395270, + -1801159457985266171, + -71645659462473222, + -505532365968836077, + ); + + assert_eq!(r, transmute(lasx_xvsra_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsra_h() { + let a = i16x16::new( + -13251, -24270, -27793, -1924, -989, 12103, 27324, 24449, 18911, 19481, -8980, 16617, + 28550, -13690, -1971, 3939, + ); + let b = i16x16::new( + -21726, 27818, 27200, -20739, -19045, -6458, 30141, -312, -15113, -30000, 21700, 17092, + 14409, 3061, -14681, 20631, + ); + let r = i64x4::new( + -119365732601073, + 26740135684866047, + 292450088307458195, + 8725659825471543, + ); + + assert_eq!(r, transmute(lasx_xvsra_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsra_w() { + let a = i32x8::new( + -1962976084, + -1947195007, + -955995895, + -845185028, + 679708613, + -1609457592, + 2012287263, + -279940829, + ); + let b = i32x8::new( + 763303798, + 231194360, + 470062549, + -1292464267, + -359409273, + 1320465704, + -1970959884, + -137912049, + ); + let r = i64x4::new( + -498216206805, + -1730871820744, + -27002218866473201, + -36696200575105, + ); + + assert_eq!(r, transmute(lasx_xvsra_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsra_d() { + let a = i64x4::new( + 1051630801678824769, + -4354070504513252833, + -43346970620111970, + 8876173186758680051, + ); + let b = i64x4::new( + 3011489794605089083, + -9183865802690171879, + 1530248905177224378, + -4896156283978786540, + ); + let r = i64x4::new(1, -129761412875, -1, 8464978396185); + + assert_eq!(r, transmute(lasx_xvsra_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrai_b() { + let a = i8x32::new( + 46, 37, 112, -119, 96, -75, 53, -50, 100, 120, 90, 18, 32, 73, 63, 27, 73, 42, 111, + -33, 12, 3, 108, 70, -108, 97, 15, -88, -9, 32, -126, -58, + ); + let r = i64x4::new( + -287109943871995390, + 72906425621612294, + 289919230257005060, + -218421283493247239, + ); + + assert_eq!(r, transmute(lasx_xvsrai_b::<4>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrai_h() { + let a = i16x16::new( + -30922, -13998, -8176, -18755, 11883, -28383, 17428, 4209, 30936, -20707, -28809, + -5893, 6072, 26622, -29177, 17463, + ); + let r = i64x4::new(-281474976710658, 8589803520, -4295098367, 562941363552256); + + assert_eq!(r, transmute(lasx_xvsrai_h::<14>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrai_w() { + let a = i32x8::new( + -751445431, + 2057508448, + -2111778568, + -33537291, + -1895386689, + 499743663, + 521751715, + -784629424, + ); + let r = i64x4::new(68719476730, -16, 17179869169, -25769803773); + + assert_eq!(r, transmute(lasx_xvsrai_w::<27>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrai_d() { + let a = i64x4::new( + -1330027126485395847, + 2853839147873904128, + -6472260273666122769, + -8461705224280067242, + ); + let r = i64x4::new(-2, 2, -6, -8); + + assert_eq!(r, transmute(lasx_xvsrai_d::<60>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrar_b() { + let a = i8x32::new( + -45, 43, -69, -26, -38, 7, -79, 41, -6, -94, 1, 62, -82, -97, -39, 124, -99, 0, -23, + 12, 74, 16, -39, -15, -15, 31, -87, -124, -112, -39, 102, 7, + ); + let b = i8x32::new( + 7, 68, -10, -95, -30, 74, -78, -17, -99, 98, 98, 80, -128, -62, 119, -13, 7, 92, -80, + 88, -70, -115, 81, 99, 110, 14, 7, -60, -89, -109, 97, 81, + ); + let r = i64x4::new( + 66431358477468416, + 1153177339669047552, + -77404437262827265, + 302862676776648704, + ); + + assert_eq!(r, transmute(lasx_xvsrar_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrar_h() { + let a = i16x16::new( + 9840, 12527, -16657, 1341, 1073, -31572, -646, 17766, -16172, -9625, -27578, -20296, + -9439, 19781, 4269, -7939, + ); + let b = i16x16::new( + 29495, 11395, -1796, 26363, 26559, -12537, -23906, 29853, -17327, 20486, -24193, 16816, + -26916, 11389, 8615, 25146, + ); + let r = i64x4::new( + 562932876181581, + 562954232201216, + -5712534652352536470, + -2251658079567874, + ); + + assert_eq!(r, transmute(lasx_xvsrar_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrar_w() { + let a = i32x8::new( + 1944832391, + -1034950307, + -1451047471, + 1427692017, + -938846690, + 1764815474, + -1610593481, + -198860459, + ); + let b = i32x8::new( + -1327964835, + 1934527229, + -13271412, + 1797333888, + 1389622833, + -155405641, + -1581591786, + 335424649, + ); + let r = i64x4::new( + -8589934588, + 6131890526069889068, + 906238092293, + -1668156707832192, + ); + + assert_eq!(r, transmute(lasx_xvsrar_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrar_d() { + let a = i64x4::new( + 5484150993813900402, + 9102605893479197027, + -7628992365150862705, + 407230793930236127, + ); + let b = i64x4::new( + 5977319318978215334, + 4512528532199919670, + 6381392913686620354, + 5222959627777138290, + ); + let r = i64x4::new(19951225, 505, -1907248091287715676, 362); + + assert_eq!(r, transmute(lasx_xvsrar_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrari_b() { + let a = i8x32::new( + 109, 3, -113, -66, 80, 8, -16, -45, 106, 9, 96, 53, 102, 6, -51, -120, -121, -94, -127, + -109, 70, 112, 57, -43, -72, 63, -113, -113, 93, 124, -71, 81, + ); + let r = i64x4::new( + -360849773505150962, + -1010494010926825203, + -358302209459292943, + 790117907428411639, + ); + + assert_eq!(r, transmute(lasx_xvsrari_b::<3>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrari_h() { + let a = i16x16::new( + 10070, -32733, -17965, 31244, -29243, 6071, -3241, 7927, -285, 21152, -3903, 3660, + 13839, -14765, -18197, -22466, + ); + let r = i64x4::new( + 34621125774278695, + 9007143421804430, + 4222060231655423, + -24488623625338826, + ); + + assert_eq!(r, transmute(lasx_xvsrari_h::<8>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrari_w() { + let a = i32x8::new( + -500433597, + -325248258, + -1000460213, + 209976326, + -903490350, + -314707005, + -503879914, + -356101505, + ); + let r = i64x4::new(-1, 4294967294, -2, -1); + + assert_eq!(r, transmute(lasx_xvsrari_w::<29>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrari_d() { + let a = i64x4::new( + -3633983878249405921, + 5383874963092799521, + -4872778697398942371, + -2386944079627506318, + ); + let r = i64x4::new(-3228, 4782, -4328, -2120); + + assert_eq!(r, transmute(lasx_xvsrari_d::<50>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrl_b() { + let a = i8x32::new( + -118, -38, -124, -54, 98, -128, 79, -36, 103, -128, -88, -49, -98, 60, 2, -59, -16, -4, + 27, 59, 105, 95, -37, -72, -110, 11, 75, 114, -49, 90, -21, -35, + ); + let b = i8x32::new( + 98, -9, -55, 119, -93, -49, 14, 102, 104, -92, 48, 65, 46, 102, -33, -36, -80, -60, -4, + 56, 90, -121, 20, -53, -94, -28, -92, 39, 83, -100, -7, 114, + ); + let r = i64x4::new( + 216455408162832674, + 864691138784135271, + 1660983950228656112, + 3996105849293766692, + ); + + assert_eq!(r, transmute(lasx_xvsrl_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrl_h() { + let a = i16x16::new( + 10972, 24562, -12521, 26207, -104, -22440, -71, 23995, 14056, -10640, 15949, -18599, + 29813, -7756, 7950, -20154, + ); + let b = i16x16::new( + 7336, 20691, 12756, -11763, -7124, -20665, 2106, -26250, -26129, 24711, -15979, 11749, + -21358, -26257, -4616, 7882, + ); + let r = i64x4::new( + 858654357979178, + 105271911894745103, + 412644454779584512, + 12385032119328029, + ); + + assert_eq!(r, transmute(lasx_xvsrl_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrl_w() { + let a = i32x8::new( + -1772037605, + -1212681339, + 176585315, + -732660743, + -1822623484, + 992734189, + 1682031435, + 1636125097, + ); + let b = i32x8::new( + -938134804, + -1078907146, + -307437339, + -1035019720, + 338751406, + 1059144383, + -1414917923, + -363001284, + ); + let r = i64x4::new(3152506611213, 910538585043, 150899, 25769803779); + + assert_eq!(r, transmute(lasx_xvsrl_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrl_d() { + let a = i64x4::new( + 6435451644778058510, + -9196847159082085602, + -5149048879671131155, + 1388424134264678769, + ); + let b = i64x4::new( + -6322302375543819270, + -4446153186867162446, + -4228232340343120478, + 228185722174108108, + ); + let r = i64x4::new(22, 8215, 774027732, 338970735904462); + + assert_eq!(r, transmute(lasx_xvsrl_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrli_b() { + let a = i8x32::new( + 66, -19, -15, 83, -53, -81, -93, -68, -103, 77, 25, 65, 20, 104, -81, 127, -82, -32, + -11, 48, -83, -94, -74, 5, -117, -34, -28, 19, 13, -40, 68, 51, + ); + let r = i64x4::new( + -4853842685553676990, + 9200686999942024601, + 411695280685441198, + 3694315145030590091, + ); + + assert_eq!(r, transmute(lasx_xvsrli_b::<0>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrli_h() { + let a = i16x16::new( + -5451, -9527, 6137, -13536, -13439, 10877, -29799, 719, -28662, 31471, 20011, 1521, + 1386, -27895, 10040, 24311, + ); + let r = i64x4::new(7036883009470493, 73014771737, 38655688722, 3096241924866048); + + assert_eq!(r, transmute(lasx_xvsrli_h::<11>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrli_w() { + let a = i32x8::new( + -1988432857, + -1485450469, + -951392465, + -21616344, + 741104373, + -605174159, + -393417893, + 356142399, + ); + let r = i64x4::new( + 92058329040061, + 140028818776997, + 120903329388054, + 11669426172998, + ); + + assert_eq!(r, transmute(lasx_xvsrli_w::<17>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrli_d() { + let a = i64x4::new( + 8921700513621232732, + 1019177465435556626, + 2713436842570698733, + -3430716780195672879, + ); + let r = i64x4::new(16617962184, 1898365962, 5054169972, 27969530398); + + assert_eq!(r, transmute(lasx_xvsrli_d::<29>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlr_b() { + let a = i8x32::new( + -109, 126, -8, -44, -19, -72, -121, -116, 21, 24, -60, 73, 76, 95, -106, -89, 56, -82, + -93, 112, -38, -24, -39, -57, -106, -17, -14, 31, 116, 16, 47, 122, + ); + let b = i8x32::new( + 50, -60, 62, 57, -113, -30, -127, -21, -61, -84, -32, -113, -114, 1, 55, -73, 71, -95, + 8, -8, 28, 55, -59, -118, 89, 87, -10, 63, 2, 67, 25, 62, + ); + let r = i64x4::new( + 1316227579002488869, + 72391849897361923, + 3604852287775921920, + 150872911094481483, + ); + + assert_eq!(r, transmute(lasx_xvsrlr_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlr_h() { + let a = i16x16::new( + -18779, 7604, 13987, 29727, 8545, 14399, -23049, 5564, 17277, 27629, -24885, 8060, + -12999, 4495, 32293, -31802, + ); + let b = i16x16::new( + -19412, 3296, -29433, -25702, 19528, -23288, 18964, -13600, -11805, -27841, 14324, + 17650, -2, 18151, -24330, -10882, + ); + let r = i64x4::new( + 8163242974380043, + 1566138173559930913, + 567182991583938672, + 565118914199555, + ); + + assert_eq!(r, transmute(lasx_xvsrlr_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlr_w() { + let a = i32x8::new( + -1025998507, + -796106787, + 2021600494, + 398315156, + 965338474, + -828271652, + -102077533, + -995359010, + ); + let b = i32x8::new( + -2089285463, + 264222581, + -1942623583, + -928385941, + -1125618647, + -149370823, + -1786649473, + -1080417791, + ); + let r = i64x4::new( + 7164011834433, + 835329200199287, + 442383516915, + 7085854838990307330, + ); + + assert_eq!(r, transmute(lasx_xvsrlr_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlr_d() { + let a = i64x4::new( + 2027979514153200323, + 4238639346117886861, + -2310491845939102950, + -4959482478857813602, + ); + let b = i64x4::new( + 965489361978698802, + 4289858003677505067, + -4742704455438896809, + -8773295883299999969, + ); + let r = i64x4::new(1801, 481878, 1923591164085, 6280495597); + + assert_eq!(r, transmute(lasx_xvsrlr_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlri_b() { + let a = i8x32::new( + -73, -25, 49, -12, -91, -46, 0, -44, 48, -66, 31, -39, 50, -103, -78, -38, -126, -47, + -3, 84, 54, 112, -106, -46, 71, 28, 47, 27, -56, -119, -101, -95, + ); + let r = i64x4::new( + 3819110935244323374, + 3975875884220952588, + 3829779379936769057, + 2893318883870770962, + ); + + assert_eq!(r, transmute(lasx_xvsrlri_b::<2>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlri_h() { + let a = i16x16::new( + 6309, -29611, -25831, -4246, 15159, 10847, 16953, 29221, 6201, 24789, -30798, -15953, + 15706, -1900, 10475, -5507, + ); + let r = i64x4::new( + 33777332217315340, + 16044215407804446, + 27303364801855500, + 32932658182619167, + ); + + assert_eq!(r, transmute(lasx_xvsrlri_h::<9>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlri_w() { + let a = i32x8::new( + 828273676, + -644812120, + -857187805, + -176164509, + 981336800, + 1382840349, + -1522792930, + -176015403, + ); + let r = i64x4::new(8589934592, 8589934594, 4294967296, 8589934593); + + assert_eq!(r, transmute(lasx_xvsrlri_w::<31>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlri_d() { + let a = i64x4::new( + -5793930330848080801, + 3293244781940700302, + 1069657060216154101, + -5794364669081104952, + ); + let r = i64x4::new( + 197700214732210481, + 51456949717823442, + 16713391565877408, + 197693428197319479, + ); + + assert_eq!(r, transmute(lasx_xvsrlri_d::<6>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitclr_b() { + let a = u8x32::new( + 190, 161, 30, 161, 194, 88, 175, 219, 144, 202, 22, 193, 212, 153, 191, 196, 137, 221, + 106, 10, 16, 144, 31, 238, 61, 152, 213, 196, 195, 243, 50, 92, + ); + let b = u8x32::new( + 11, 9, 78, 66, 137, 176, 138, 254, 176, 67, 163, 134, 131, 97, 153, 72, 134, 128, 41, + 58, 184, 249, 6, 26, 185, 60, 185, 181, 44, 38, 89, 238, + ); + let r = i64x4::new( + -7229587192453094986, + -4270087733699493232, + -1576382945987863415, + 2031321085346416701, + ); + + assert_eq!(r, transmute(lasx_xvbitclr_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitclr_h() { + let a = u16x16::new( + 7799, 9627, 56384, 27998, 4661, 64335, 54264, 6382, 47409, 49178, 38272, 57390, 35004, + 32388, 62552, 35760, + ); + let b = u16x16::new( + 5291, 30357, 59434, 46615, 64011, 9844, 17102, 63063, 12386, 31313, 20554, 38159, + 54802, 37529, 18767, 51367, + ); + let r = i64x4::new( + 7880974167965374071, + 1760507201925878325, + 6930636858734459185, + -8417099780160452424, + ); + + assert_eq!(r, transmute(lasx_xvbitclr_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitclr_w() { + let a = u32x8::new( + 4257127193, 1617538994, 1062231453, 1690763623, 2766967375, 2604092619, 3654495562, + 101565771, + ); + let b = u32x8::new( + 1233687892, 2875139141, 3243465390, 3012934629, 2446741029, 1858096423, 3334422766, + 437336695, + ); + let r = i64x4::new( + 6947276946051865369, + 7261774329674735005, + -7262251986338409905, + 436221668492520778, + ); + + assert_eq!(r, transmute(lasx_xvbitclr_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitclr_d() { + let a = u64x4::new( + 16927321994427904653, + 2683926075985226749, + 16958486450995068185, + 3668272799860716893, + ); + let b = u64x4::new( + 15133760811038045272, + 12911195625023626617, + 15656282835364509484, + 1632666566472745103, + ); + let r = i64x4::new( + -1519422079281646963, + 2683926075985226749, + -1488257622714483431, + 3668272799860684125, + ); + + assert_eq!(r, transmute(lasx_xvbitclr_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitclri_b() { + let a = u8x32::new( + 141, 68, 55, 244, 88, 222, 227, 17, 167, 11, 144, 254, 176, 224, 143, 139, 254, 1, 83, + 117, 181, 160, 142, 4, 179, 103, 107, 27, 186, 98, 203, 106, + ); + let r = i64x4::new( + 1271033348788520077, + -8390310899796145241, + 328376522984587710, + 3065582154070828979, + ); + + assert_eq!(r, transmute(lasx_xvbitclri_b::<6>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitclri_h() { + let a = u16x16::new( + 38228, 2400, 61493, 22229, 35926, 42301, 55100, 57087, 23321, 21128, 18634, 59029, + 56405, 24055, 11367, 27455, + ); + let r = i64x4::new( + 6257171367882429780, + -2378508372711469996, + -1831477648240911591, + 7727381349517352021, + ); + + assert_eq!(r, transmute(lasx_xvbitclri_h::<1>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitclri_w() { + let a = u32x8::new( + 4093464829, 3397035519, 3710215001, 425447773, 2028980386, 1200168081, 1687167090, + 2988462494, + ); + let r = i64x4::new( + -8468273631661829891, + 1827284273827504985, + 542996640125929634, + -5611395396043530126, + ); + + assert_eq!(r, transmute(lasx_xvbitclri_w::<30>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitclri_d() { + let a = u64x4::new( + 11636830919927548139, + 10182450295979110848, + 14581196067604683625, + 18383675221698776393, + ); + let r = i64x4::new( + -6809983522526181141, + -8264364146474618432, + -3865618374849045655, + -63139220754952887, + ); + + assert_eq!(r, transmute(lasx_xvbitclri_d::<46>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitset_b() { + let a = u8x32::new( + 225, 92, 109, 112, 17, 10, 26, 83, 15, 81, 108, 14, 45, 110, 122, 43, 4, 150, 103, 97, + 111, 130, 134, 212, 62, 58, 9, 2, 56, 158, 26, 145, + ); + let b = u8x32::new( + 52, 116, 92, 53, 153, 232, 239, 116, 224, 124, 185, 146, 220, 6, 151, 66, 61, 170, 93, + 190, 38, 252, 85, 37, 106, 174, 206, 83, 194, 190, 144, 114, + ); + let r = i64x4::new( + 6024139629681007857, + 3457196872474448143, + -817805275247962588, + -7702318388235109826, + ); + + assert_eq!(r, transmute(lasx_xvbitset_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitset_h() { + let a = u16x16::new( + 17259, 49211, 15974, 6099, 8663, 62383, 26831, 38552, 3409, 2195, 20043, 5352, 3983, + 31516, 6274, 5947, + ); + let b = u16x16::new( + 53731, 18053, 52835, 11975, 35791, 12348, 45618, 26117, 33156, 26353, 49938, 43656, + 36487, 64856, 49663, 56384, + ); + let r = i64x4::new( + 1716784528350724971, + -7586198329949707817, + 1578597770746596689, + 1674099372676878223, + ); + + assert_eq!(r, transmute(lasx_xvbitset_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitset_w() { + let a = u32x8::new( + 2021234591, 3371814330, 3553513799, 494005311, 250094477, 2516669349, 1444421180, + 3141613342, + ); + let b = u32x8::new( + 3030677440, 3512547286, 2983366759, 1926382844, 3455887892, 2988190229, 2851051202, + 575886239, + ); + let r = i64x4::new( + -3964911796154165345, + 2121736658348822983, + -7628724325403057267, + -4953617511697867204, + ); + + assert_eq!(r, transmute(lasx_xvbitset_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitset_d() { + let a = u64x4::new( + 13787459408145721576, + 16595537902770630413, + 7409136402519495190, + 8641001130845153939, + ); + let b = u64x4::new( + 5192067677796360406, + 648800965073738257, + 18042109477292491586, + 15371630372089390212, + ); + let r = i64x4::new( + -4659284665559635736, + -1851206170938790131, + 7409136402519495190, + 8641001130845153939, + ); + + assert_eq!(r, transmute(lasx_xvbitset_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitseti_b() { + let a = u8x32::new( + 119, 80, 249, 199, 113, 106, 84, 111, 190, 194, 53, 9, 139, 230, 49, 32, 150, 255, 16, + 235, 219, 105, 54, 143, 119, 37, 74, 94, 47, 119, 97, 78, + ); + let r = i64x4::new( + -1165048079419059977, + -6867454469778062658, + -8091022549765259370, + -3539275497407339017, + ); + + assert_eq!(r, transmute(lasx_xvbitseti_b::<7>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitseti_h() { + let a = u16x16::new( + 3428, 49184, 29775, 38443, 2320, 51224, 40616, 46501, 26758, 21099, 57944, 43971, + 47859, 19503, 41964, 61802, + ); + let r = i64x4::new( + -5320030648396665500, + -5357666549029656304, + -6069759003260655482, + -1050847327214912781, + ); + + assert_eq!(r, transmute(lasx_xvbitseti_h::<13>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitseti_w() { + let a = u32x8::new( + 3638204102, 2069373672, 3681483208, 2380952857, 3881087295, 2378927021, 1601131765, + 3307909931, + ); + let r = i64x4::new( + 8887892248618505926, + -5914786406144738872, + -5923487305849065153, + -1933536090599238411, + ); + + assert_eq!(r, transmute(lasx_xvbitseti_w::<29>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitseti_d() { + let a = u64x4::new( + 9060047554002173201, + 464447178838056277, + 7020364402684265679, + 7640056937583456779, + ); + let r = i64x4::new( + 9060047554002173201, + 464447178838056277, + 7020364402684265679, + 7640056937583456779, + ); + + assert_eq!(r, transmute(lasx_xvbitseti_d::<17>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitrev_b() { + let a = u8x32::new( + 86, 45, 120, 26, 67, 111, 181, 110, 186, 247, 233, 56, 217, 245, 220, 182, 112, 159, + 77, 122, 167, 75, 37, 185, 177, 18, 190, 215, 60, 13, 253, 99, + ); + let b = u8x32::new( + 147, 78, 169, 66, 243, 63, 20, 253, 87, 88, 137, 49, 21, 0, 154, 117, 112, 42, 28, 48, + 22, 139, 165, 183, 96, 228, 17, 98, 218, 192, 92, 92, + ); + let r = i64x4::new( + 5667198812028562782, + -7577037021778282950, + 4108764896531684209, + 8353346322052154032, + ); + + assert_eq!(r, transmute(lasx_xvbitrev_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitrev_h() { + let a = u16x16::new( + 44834, 48985, 47421, 26123, 36975, 54201, 35400, 17963, 44073, 49622, 17677, 24094, + 34507, 53208, 48965, 4380, + ); + let b = u16x16::new( + 3119, 5355, 43390, 6709, 8036, 22161, 7944, 37786, 31676, 17612, 21999, 1550, 37643, + 51935, 23672, 51448, + ); + let r = i64x4::new( + 7362252059331604258, + 4768057775407992959, + 2170388733584915497, + 1161012008856358603, + ); + + assert_eq!(r, transmute(lasx_xvbitrev_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitrev_w() { + let a = u32x8::new( + 1780458127, 1583179777, 1403171735, 3038008548, 1551651469, 1192480700, 40883360, + 521408888, + ); + let b = u32x8::new( + 2551625282, 692446886, 1507542621, 1654251513, 25012964, 1671838513, 1315668038, + 3268446736, + ); + let r = i64x4::new( + 6799705642561938059, + -5254481525065206889, + 5121102659209417373, + 2239715596821320928, + ); + + assert_eq!(r, transmute(lasx_xvbitrev_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitrev_d() { + let a = u64x4::new( + 3534178575908999157, + 3435592769216332161, + 6355029412175758040, + 10622443384676276507, + ); + let b = u64x4::new( + 765862270911233836, + 2594415241338312820, + 11114879593910781230, + 15091508809743360642, + ); + let r = i64x4::new( + 3534196168095043573, + 3440096368843702657, + 6355099780919935704, + -7824300689033275105, + ); + + assert_eq!(r, transmute(lasx_xvbitrev_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitrevi_b() { + let a = u8x32::new( + 112, 47, 201, 157, 172, 239, 255, 219, 200, 1, 134, 120, 144, 4, 15, 114, 35, 84, 237, + 118, 244, 43, 132, 135, 32, 116, 216, 122, 83, 233, 95, 217, + ); + let r = i64x4::new( + -297290846994624688, + 5921992374835618280, + -6366950966577761277, + -468434338938596352, + ); + + assert_eq!(r, transmute(lasx_xvbitrevi_b::<5>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitrevi_h() { + let a = u16x16::new( + 32769, 5307, 42421, 62367, 28539, 63062, 1989, 15130, 7026, 1542, 27332, 53533, 17199, + 28761, 1428, 12804, + ); + let r = i64x4::new( + -315342455509907455, + 3682272988378851195, + -2801974798966189198, + 4180481285432101679, + ); + + assert_eq!(r, transmute(lasx_xvbitrevi_h::<11>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitrevi_w() { + let a = u32x8::new( + 4260813560, 2237147704, 787609405, 2632090994, 1944569031, 3636389111, 844354358, + 3691914548, + ); + let r = i64x4::new( + -4226581827093603592, + -2530313314094680259, + -7440257783990598457, + -7201777846932221130, + ); + + assert_eq!(r, transmute(lasx_xvbitrevi_w::<30>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitrevi_d() { + let a = u64x4::new( + 5820240183830393881, + 7908556960014755456, + 17094377170254219540, + 17105994065815884924, + ); + let r = i64x4::new( + 5820240183863948313, + 7908556959981201024, + -1352366903421777644, + -1340750007927221124, + ); + + assert_eq!(r, transmute(lasx_xvbitrevi_d::<25>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvadd_b() { + let a = i8x32::new( + -63, 97, -109, 57, -109, 103, -19, 65, 57, -37, 32, 5, -97, -108, 12, -61, -91, 104, + -2, 65, -41, -85, -54, 104, 40, -13, 78, 80, 75, -33, -121, -67, + ); + let b = i8x32::new( + -32, -51, 9, 94, 98, 84, -101, -90, -24, -111, 104, -25, 112, -85, 87, -10, -90, -59, + 96, -43, -67, 16, -8, 83, 126, -13, 58, 116, 73, -90, 6, 67, + ); + let r = i64x4::new( + -1762952590630572383, + -5088153816373105631, + -4917161598430335669, + 39834845715162790, + ); + + assert_eq!(r, transmute(lasx_xvadd_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvadd_h() { + let a = i16x16::new( + 19227, 23953, -4654, -5363, 31202, 4004, -2636, 15810, -18448, 29154, -23642, -23324, + 23716, 21938, -17499, -1447, + ); + let b = i16x16::new( + 10023, 12046, -30915, -30883, 29754, 22142, -11854, 5774, 8790, 19058, -32113, 4500, + 17933, 13821, 19847, 13830, + ); + let r = i64x4::new( + 8244530777499333186, + 6075575139936955932, + -5298442949366588858, + 3485514723534807729, + ); + + assert_eq!(r, transmute(lasx_xvadd_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvadd_w() { + let a = i32x8::new( + 130061221, + 1238983557, + 1050069092, + -1831874224, + -377156607, + 1147824901, + -1862271997, + 91173942, + ); + let b = i32x8::new( + 683768234, + -1042445407, + -327184682, + -1513884019, + 347904368, + 886761024, + -1570339601, + 13462118, + ); + let r = i64x4::new( + 844124927480171855, + 4076821840425015098, + 8738480013042623857, + 449408456544649458, + ); + + assert_eq!(r, transmute(lasx_xvadd_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvadd_d() { + let a = i64x4::new( + -3908230933439843201, + -2965012514388925511, + -336128270114892540, + -637330020659137335, + ); + let b = i64x4::new( + -7034299759176626990, + -361127056732231567, + 4052152376745196186, + -2695706064065117364, + ); + let r = i64x4::new( + 7504213381093081425, + -3326139571121157078, + 3716024106630303646, + -3333036084724254699, + ); + + assert_eq!(r, transmute(lasx_xvadd_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddi_bu() { + let a = i8x32::new( + 97, -53, -62, 74, 99, 103, 85, -62, 12, -18, -65, 32, 19, -86, 65, -26, -98, 56, -9, + -49, 4, 57, -22, 9, 93, 38, 124, -2, -121, 70, 125, 21, + ); + let r = i64x4::new( + -4226511262663192988, + -1637994053855153905, + 931466702237612961, + 1765491911008659808, + ); + + assert_eq!(r, transmute(lasx_xvaddi_bu::<3>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddi_hu() { + let a = i16x16::new( + 28186, 30980, -18298, 10584, -13771, -23924, -28546, 30222, -16145, -32706, -20261, + 19828, 22395, -2057, 5657, 15125, + ); + let r = i64x4::new( + 2979615520472788507, + 8507177098988603958, + 5581561774286553328, + 4257614802810591100, + ); + + assert_eq!(r, transmute(lasx_xvaddi_hu::<1>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddi_wu() { + let a = i32x8::new( + 832142867, + -97637134, + 470208227, + -904606685, + -2133615997, + -538764334, + 627855087, + 2056153787, + ); + let r = i64x4::new( + -419348219263615451, + -3885256050038354187, + -2313975115310458219, + 8831113348648816385, + ); + + assert_eq!(r, transmute(lasx_xvaddi_wu::<18>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddi_du() { + let a = i64x4::new( + 2524418528961435407, + -8855335564236661523, + -6695152760024429972, + -4546559236496052098, + ); + let r = i64x4::new( + 2524418528961435431, + -8855335564236661499, + -6695152760024429948, + -4546559236496052074, + ); + + assert_eq!(r, transmute(lasx_xvaddi_du::<24>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsub_b() { + let a = i8x32::new( + 69, 68, 89, -122, -10, 4, 91, -20, -104, 41, -2, 28, -58, 89, 8, 71, 46, 82, -101, 51, + -88, -102, -124, -9, 40, -59, -102, -16, 3, 103, 85, -97, + ); + let b = i8x32::new( + -65, -118, -63, 106, 15, -103, -19, -85, -42, 55, -34, -9, 15, 86, 74, 4, -118, -124, + 43, 2, 17, -82, 112, -28, 76, -58, 103, -48, -26, 27, -97, 14, + ); + let r = i64x4::new( + 4714824500264876678, + 4881343131253011138, + 1374983920368537252, + -7947080804470620196, + ); + + assert_eq!(r, transmute(lasx_xvsub_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsub_h() { + let a = i16x16::new( + 13861, -12177, -9887, -27491, 3957, -5779, -6788, 4221, -12561, 4789, -8335, -24637, + 660, -11584, -22855, 31170, + ); + let b = i16x16::new( + -10247, 15942, -17883, -32294, -13460, -6485, 4553, 25005, -26816, -11045, 312, 22201, + 12797, -7932, -13605, -24793, + ); + let r = i64x4::new( + 1351958658151964204, + -5849943150155381751, + 5263263451968059311, + -2694318201466204009, + ); + + assert_eq!(r, transmute(lasx_xvsub_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsub_w() { + let a = i32x8::new( + 178703054, + -696864732, + 212849982, + -285846503, + -1117046518, + 705292054, + 739892078, + 504545429, + ); + let b = i32x8::new( + 1845948974, + 513755820, + -260175909, + -530928548, + 1413787975, + -1421495822, + 1424414367, + 1652017030, + ); + let r = i64x4::new( + -5199575676077746016, + 1052619368584826211, + 9134484374713436099, + -4928352995773315889, + ); + + assert_eq!(r, transmute(lasx_xvsub_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsub_d() { + let a = i64x4::new( + -7646834273082474631, + -3919573082038908840, + 1242665522125115913, + -7118090461806523548, + ); + let b = i64x4::new( + 8536740478963669238, + -5376035241109169794, + -2919045115911617717, + -5820964252152272230, + ); + let r = i64x4::new( + 2263169321663407747, + 1456462159070260954, + 4161710638036733630, + -1297126209654251318, + ); + + assert_eq!(r, transmute(lasx_xvsub_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubi_bu() { + let a = i8x32::new( + -110, 82, -20, -84, 15, -27, -19, -10, 74, -11, 10, -87, 103, -61, 21, -98, -92, -49, + 78, 102, -11, -49, -45, 65, 12, 93, 109, -99, -11, -82, -27, 98, + ); + let r = i64x4::new( + -1594036762305411707, + -7995940638099118019, + 3802941238546645655, + 6185872108420092159, + ); + + assert_eq!(r, transmute(lasx_xvsubi_bu::<13>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubi_hu() { + let a = i16x16::new( + 20553, 24028, -32247, -8607, 12622, -11323, -26896, -27740, -12003, -16731, 2560, + -6936, -6669, -11254, -12625, 5415, + ); + let r = i64x4::new( + -2424482502709784510, + -7809920247766568633, + -1954269795052498666, + 1522443898558080492, + ); + + assert_eq!(r, transmute(lasx_xvsubi_hu::<7>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubi_wu() { + let a = i32x8::new( + 755271012, + 658180721, + -240702681, + -573588257, + -869840064, + -1735073421, + 798270655, + 299197982, + ); + let r = i64x4::new( + 2826864560638821706, + -2463542912799528179, + -7452083707597862106, + 1285045436848317605, + ); + + assert_eq!(r, transmute(lasx_xvsubi_wu::<26>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubi_du() { + let a = i64x4::new( + -6314492083383377124, + -2455352880818468995, + 4567295273188684508, + 4145748346670499022, + ); + let r = i64x4::new( + -6314492083383377136, + -2455352880818469007, + 4567295273188684496, + 4145748346670499010, + ); + + assert_eq!(r, transmute(lasx_xvsubi_du::<12>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmax_b() { + let a = i8x32::new( + 25, 6, 107, -17, 0, 0, -33, -126, -67, -110, -28, -71, 103, -104, 76, -67, -63, 109, + -111, 21, -117, 23, 0, 127, 97, 55, -124, -87, -49, -29, -50, 33, + ); + let b = i8x32::new( + -9, 89, -54, -48, -35, 107, -21, 85, -105, -19, -97, -119, 110, -49, -29, 38, 88, 38, + 43, 117, -99, -12, -56, 125, -117, 87, 98, -75, 64, 37, 116, 118, + ); + let r = i64x4::new( + 6191159764511840537, + 2759808746143411645, + 9151340407859932504, + 8535488153625188193, + ); + + assert_eq!(r, transmute(lasx_xvmax_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmax_h() { + let a = i16x16::new( + 30763, 3415, 26324, -7315, -21080, 18524, -4450, 24816, -15714, -28542, -635, -31873, + -26693, 15869, -3002, -24310, + ); + let b = i16x16::new( + -31234, 12467, 15235, -27825, 27576, -30308, 5780, 15439, 5332, -17912, 27099, -21207, + 26461, -8845, 28810, -15394, + ); + let r = i64x4::new( + -2058876393102280661, + 6985107848176626616, + -5969123438663035692, + -4332902052436023459, + ); + + assert_eq!(r, transmute(lasx_xvmax_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmax_w() { + let a = i32x8::new( + 1577861415, + 918171955, + -750433312, + 187580904, + 2059773788, + -1443991497, + -1216535607, + 1560471573, + ); + let b = i32x8::new( + -1945753238, + -891888859, + -78561680, + 1374400928, + -70918058, + 1356405224, + -371800255, + -244516818, + ); + let r = i64x4::new( + 3943518520407245095, + 5903007041568456304, + 5825716079263328092, + 6702174376295843649, + ); + + assert_eq!(r, transmute(lasx_xvmax_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmax_d() { + let a = i64x4::new( + -2766896964461117900, + -5078071472767258214, + 9065828085534222331, + -6500758532071144491, + ); + let b = i64x4::new( + -22138921162050098, + -8125932019434035875, + -7840786109368633952, + -880822478913123851, + ); + let r = i64x4::new( + -22138921162050098, + -5078071472767258214, + 9065828085534222331, + -880822478913123851, + ); + + assert_eq!(r, transmute(lasx_xvmax_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaxi_b() { + let a = i8x32::new( + -125, -85, -100, -36, 78, -85, 8, -111, -4, 10, -124, -8, 85, 25, -92, 61, 61, -45, 68, + 58, -5, 10, 121, 74, -100, 75, 78, 36, -81, 0, 21, 82, + ); + let r = i64x4::new( + -790112015120730635, + 4464502462647438076, + 5366332505119323453, + 5914634738497113077, + ); + + assert_eq!(r, transmute(lasx_xvmaxi_b::<-11>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaxi_h() { + let a = i16x16::new( + 10159, 11019, -527, 25779, 18814, -6803, -7822, -21020, 17899, -30211, -21703, -32203, + -17678, -31762, -12745, 15653, + ); + let r = i64x4::new( + 7256424853078222767, + -2814792717481602, + -2814792717482517, + 4406209242478280693, + ); + + assert_eq!(r, transmute(lasx_xvmaxi_h::<-11>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaxi_w() { + let a = i32x8::new( + -1902781562, + -701262116, + 1050694797, + 1927374994, + 2034319488, + 1270402141, + 1507027857, + -2022667122, + ); + let r = i64x4::new( + 21474836485, + 8278012567408891021, + 5456335650397700224, + 22981864337, + ); + + assert_eq!(r, transmute(lasx_xvmaxi_w::<5>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaxi_d() { + let a = i64x4::new( + 1922310852027675403, + 1444112415686500862, + -2217486151251900264, + 2429249725865673045, + ); + let r = i64x4::new( + 1922310852027675403, + 1444112415686500862, + -3, + 2429249725865673045, + ); + + assert_eq!(r, transmute(lasx_xvmaxi_d::<-3>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmax_bu() { + let a = u8x32::new( + 85, 114, 198, 232, 2, 92, 134, 60, 6, 73, 97, 135, 118, 147, 202, 24, 163, 26, 22, 241, + 100, 118, 187, 179, 231, 20, 8, 232, 203, 101, 192, 9, + ); + let b = u8x32::new( + 53, 108, 137, 217, 144, 216, 90, 50, 81, 196, 11, 85, 124, 110, 245, 183, 35, 166, 114, + 134, 174, 222, 3, 134, 149, 130, 39, 166, 182, 16, 44, 58, + ); + let r = i64x4::new( + 4361411406047113813, + -5191080832418069423, + -5495554077319059805, + 4233495576175936231, + ); + + assert_eq!(r, transmute(lasx_xvmax_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmax_hu() { + let a = u16x16::new( + 5749, 55167, 53819, 29245, 38403, 35505, 59653, 25124, 35403, 58917, 5938, 9735, 59292, + 13480, 10576, 54135, + ); + let b = u16x16::new( + 5035, 18828, 58275, 53640, 3989, 38318, 53531, 14719, 27606, 5401, 62928, 12836, 16867, + 7709, 62726, 59945, + ); + let r = i64x4::new( + -3348176030115359115, + 7072033525073876483, + 3613283078621203019, + -1573457187787184228, + ); + + assert_eq!(r, transmute(lasx_xvmax_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmax_wu() { + let a = u32x8::new( + 1479333943, 2676167483, 3836141683, 1561090643, 2383304043, 4050203265, 880499204, + 1213140090, + ); + let b = u32x8::new( + 3319622969, 1208019942, 2301441769, 3536726941, 665528183, 2671171581, 1912772755, + 2591579616, + ); + let r = i64x4::new( + -6952692252286292679, + -3256617523396288397, + -1051253505998826133, + -7315994376096540525, + ); + + assert_eq!(r, transmute(lasx_xvmax_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmax_du() { + let a = u64x4::new( + 15606303230109259264, + 8116571215893940866, + 8029178663488389518, + 1343606515742555302, + ); + let b = u64x4::new( + 12474736035319899163, + 7894892261694004420, + 3771675238777573447, + 5141420152487342561, + ); + let r = i64x4::new( + -2840440843600292352, + 8116571215893940866, + 8029178663488389518, + 5141420152487342561, + ); + + assert_eq!(r, transmute(lasx_xvmax_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaxi_bu() { + let a = u8x32::new( + 5, 31, 107, 171, 93, 98, 60, 232, 147, 171, 189, 163, 227, 182, 246, 12, 186, 67, 84, + 153, 12, 95, 0, 34, 84, 166, 191, 25, 19, 211, 84, 138, + ); + let r = i64x4::new( + -1712385603860226294, + 934135061546904467, + 2452877454773339066, + -8478920119441971628, + ); + + assert_eq!(r, transmute(lasx_xvmaxi_bu::<10>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaxi_hu() { + let a = u16x16::new( + 48338, 4001, 46491, 35597, 23103, 58140, 58650, 37062, 44161, 23848, 12302, 18312, + 7294, 3406, 24569, 9169, + ); + let r = i64x4::new( + -8426879650153513774, + -8014466583217022401, + 5154422611776154753, + 2580949584734723198, + ); + + assert_eq!(r, transmute(lasx_xvmaxi_hu::<15>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaxi_wu() { + let a = u32x8::new( + 3721611043, 1077683923, 3718582126, 906645810, 3702930805, 3185396072, 3048402980, + 1473444340, + ); + let r = i64x4::new( + 4628617208431593251, + 3894014106724011886, + -4765572115959759499, + 6328395255824707620, + ); + + assert_eq!(r, transmute(lasx_xvmaxi_wu::<12>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaxi_du() { + let a = u64x4::new( + 6545420797271239625, + 14656235662490779697, + 8085422797121321277, + 3280369825537805033, + ); + let r = i64x4::new( + 6545420797271239625, + -3790508411218771919, + 8085422797121321277, + 3280369825537805033, + ); + + assert_eq!(r, transmute(lasx_xvmaxi_du::<18>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmin_b() { + let a = i8x32::new( + 60, -51, 1, -10, 118, -28, -35, 82, -26, -121, -72, 104, 120, -114, -89, 101, -21, + -122, 65, -87, -82, 111, -120, 76, 3, -76, 9, 56, -41, -101, -3, 66, + ); + let b = i8x32::new( + -95, -1, -42, 28, 90, -13, 93, 39, -93, -126, -63, 119, -82, -11, -1, 28, 58, -54, 83, + -38, 50, 121, 99, -78, -10, 115, 116, 63, 20, -24, 81, -7, + ); + let r = i64x4::new( + 2872703216671706529, + 2064775833905037987, + -5582088942171093269, + -433018640497265418, + ); + + assert_eq!(r, transmute(lasx_xvmin_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmin_h() { + let a = i16x16::new( + -11212, 17053, 31831, -17088, -26082, -20339, -29027, -7113, -12378, 23981, -6343, + -15884, -7455, -31741, -26691, 26033, + ); + let b = i16x16::new( + -3990, -653, 31824, -8429, -13156, 20074, -32658, 26465, -31268, -28012, 12849, 11972, + -8106, 16341, 14932, -6230, + ); + let r = i64x4::new( + -4809707714740235212, + -2001990296446068194, + -4470694295613700644, + -1753422264687927210, + ); + + assert_eq!(r, transmute(lasx_xvmin_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmin_w() { + let a = i32x8::new( + 545076841, + 427733287, + -1694168270, + 454215425, + 1619909203, + 1120598019, + 1819961244, + -165320673, + ); + let b = i32x8::new( + 440778392, -880154888, 659189867, -948070867, 303440078, 2084920396, -670807717, + 1250241, + ); + let r = i64x4::new( + -3780236458933764456, + -4071933365454566606, + 4812931843870826702, + -710046880263550629, + ); + + assert_eq!(r, transmute(lasx_xvmin_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmin_d() { + let a = i64x4::new( + 2741334847700576739, + -5405962583790843561, + 8459180020282222757, + -1572925480949669194, + ); + let b = i64x4::new( + -5261141090878992044, + -1222006182046777526, + 4309148539181077305, + -3792381296290037631, + ); + let r = i64x4::new( + -5261141090878992044, + -5405962583790843561, + 4309148539181077305, + -3792381296290037631, + ); + + assert_eq!(r, transmute(lasx_xvmin_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmini_b() { + let a = i8x32::new( + -85, 86, -102, -46, -93, 29, -46, 15, 36, -49, 80, -47, -57, 0, 17, 89, 60, 93, 100, + -34, 49, -3, -48, 22, -95, 29, -77, -48, 44, -92, -27, 74, + ); + let r = i64x4::new( + -1093547173093904213, + -1085102769184911376, + -1094109792127880976, + -1088282380739546975, + ); + + assert_eq!(r, transmute(lasx_xvmini_b::<-16>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmini_h() { + let a = i16x16::new( + 29579, 25294, -26291, 17601, 19548, -1571, -3670, -17609, 15721, 11767, 5051, -4718, + 14977, -104, -21933, 11733, + ); + let r = i64x4::new( + 2420355805741064, + -4956227148259196920, + -1327998905760612344, + 2439077560844296, + ); + + assert_eq!(r, transmute(lasx_xvmini_h::<8>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmini_w() { + let a = i32x8::new( + 938211063, + 1582718046, + -710671495, + -1169124073, + 71125607, + 1365032606, + -1290216030, + -736436725, + ); + let r = i64x4::new( + -64424509456, + -5021349654917020807, + -64424509456, + -3162971646443594334, + ); + + assert_eq!(r, transmute(lasx_xvmini_w::<-16>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmini_d() { + let a = i64x4::new( + 6621191429364538735, + 8224746792719035443, + 4688148425230961784, + 823273303261270164, + ); + let r = i64x4::new(-8, -8, -8, -8); + + assert_eq!(r, transmute(lasx_xvmini_d::<-8>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmin_bu() { + let a = u8x32::new( + 21, 215, 240, 12, 207, 254, 97, 176, 94, 73, 182, 18, 231, 216, 171, 39, 221, 31, 171, + 24, 170, 126, 78, 115, 189, 104, 30, 71, 73, 13, 173, 124, + ); + let b = u8x32::new( + 156, 34, 210, 157, 237, 204, 11, 176, 14, 3, 254, 148, 151, 143, 59, 162, 24, 238, 63, + 85, 169, 120, 197, 108, 204, 8, 244, 238, 23, 109, 248, 6, + ); + let r = i64x4::new( + -5761286108645023211, + 2827011070121870094, + 7804307871931244312, + 481055128827070653, + ); + + assert_eq!(r, transmute(lasx_xvmin_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmin_hu() { + let a = u16x16::new( + 38440, 49714, 29557, 49236, 1896, 30340, 23067, 13106, 50372, 7988, 45184, 3030, 64318, + 11696, 24753, 38944, + ); + let b = u16x16::new( + 37782, 2130, 14692, 21829, 22760, 43371, 63045, 45289, 2584, 36405, 12186, 43636, 1930, + 62345, 57746, 16665, + ); + let r = i64x4::new( + 6144380368416052118, + 3689110118768838504, + 852921518428260888, + 4690886800975071114, + ); + + assert_eq!(r, transmute(lasx_xvmin_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmin_wu() { + let a = u32x8::new( + 2388959959, 3753576755, 2396056833, 1264941814, 1407811024, 4062547104, 3162258102, + 2894799861, + ); + let b = u32x8::new( + 1131111124, 1117231814, 2238242135, 3549614188, 791311618, 4010634425, 445826884, + 195885173, + ); + let r = i64x4::new( + 4798474104311866068, + 5432883724711157079, + -1221200381331475198, + 841320412252129092, + ); + + assert_eq!(r, transmute(lasx_xvmin_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmin_du() { + let a = u64x4::new( + 16262575865555500950, + 9397610354038464998, + 11047831233023881635, + 168959420679376173, + ); + let b = u64x4::new( + 5191113397333195233, + 15218861976244884079, + 15362510705177390571, + 3583188655927147541, + ); + let r = i64x4::new( + 5191113397333195233, + -9049133719671086618, + -7398912840685669981, + 168959420679376173, + ); + + assert_eq!(r, transmute(lasx_xvmin_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmini_bu() { + let a = u8x32::new( + 89, 194, 153, 118, 89, 237, 7, 106, 114, 216, 237, 232, 42, 35, 243, 48, 137, 126, 222, + 196, 191, 34, 53, 34, 63, 196, 193, 56, 2, 174, 6, 34, + ); + let r = i64x4::new( + 1803437771371125017, + 1808504320951916825, + 1808504320951916825, + 1803156197610166553, + ); + + assert_eq!(r, transmute(lasx_xvmini_bu::<25>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmini_hu() { + let a = u16x16::new( + 22785, 53436, 15467, 7600, 19970, 32791, 46922, 27359, 3030, 22997, 38845, 6828, 50455, + 53714, 5069, 34493, + ); + let r = i64x4::new( + 7881419608817692, + 7881419608817692, + 7881419608817692, + 7881419608817692, + ); + + assert_eq!(r, transmute(lasx_xvmini_hu::<28>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmini_wu() { + let a = u32x8::new( + 2549040097, 380059779, 106274074, 1242619380, 2422816304, 2036217770, 2017469655, + 192110697, + ); + let r = i64x4::new(94489280534, 94489280534, 94489280534, 94489280534); + + assert_eq!(r, transmute(lasx_xvmini_wu::<22>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmini_du() { + let a = u64x4::new( + 2554982158549964334, + 5946824623239713063, + 1554570220268300262, + 16460909687025642884, + ); + let r = i64x4::new(18, 18, 18, 18); + + assert_eq!(r, transmute(lasx_xvmini_du::<18>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvseq_b() { + let a = i8x32::new( + -76, -8, 108, 108, 76, 13, -20, -73, -55, 105, 67, -14, 50, 11, -128, 38, -48, 61, -45, + 0, -31, 68, 108, 17, 86, 59, -124, 71, 118, -60, -119, 53, + ); + let b = i8x32::new( + 67, 97, 92, 3, -94, -47, 103, 58, 78, 108, 121, -13, -27, -20, -58, -75, -64, 121, 8, + -31, 56, -8, -43, 119, 10, -100, 50, 122, 34, 124, -65, -92, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvseq_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvseq_h() { + let a = i16x16::new( + 5587, -19681, -31618, -9619, 10724, 19984, 15759, -19212, -10822, 2437, -7916, -32319, + 8472, 25354, -32596, 17629, + ); + let b = i16x16::new( + -14248, 23765, 17541, -22426, -2225, 29478, -18012, -13943, 12940, 20394, 19156, -4063, + -17913, -12088, 8465, -31204, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvseq_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvseq_w() { + let a = i32x8::new( + 884869290, + -2032301802, + 1693636022, + 1594721776, + 2082937065, + -1159093260, + -1590139557, + -1882875192, + ); + let b = i32x8::new( + 186525406, + -1399001207, + -1514443895, + -1051577172, + 1585652521, + 90050345, + -1674322849, + 2124996559, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvseq_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvseq_d() { + let a = i64x4::new( + 2669611874067870445, + 1365590924683817055, + 2596664035622609827, + -5919289436914592027, + ); + let b = i64x4::new( + -7435987568868960430, + -3618747286388594676, + 1852961913881539893, + 158448424073614869, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvseq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvseqi_b() { + let a = i8x32::new( + 8, -28, 17, -71, 11, 26, -79, 95, 102, 106, -100, -83, 116, -105, -72, 60, -64, -39, + -65, -93, -52, 80, 126, 38, 46, 91, -15, 42, -119, -109, 10, 70, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvseqi_b::<-14>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvseqi_h() { + let a = i16x16::new( + 31558, 20053, 8868, 28957, 9939, -14167, 15718, -32625, 24920, 19118, 27698, -19776, + -15714, 14099, 21403, 13371, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvseqi_h::<-8>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvseqi_w() { + let a = i32x8::new( + 1596885720, + 1682548012, + -1583429372, + 1961831515, + -1312514367, + 263282180, + -1647205143, + 409452108, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvseqi_w::<-11>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvseqi_d() { + let a = i64x4::new( + 4860385404364618706, + -866096761684413508, + -6886759413716464738, + -1240694713477982808, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvseqi_d::<-2>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslt_b() { + let a = i8x32::new( + 29, -4, -38, -40, -57, -127, 6, 23, -51, 12, 91, -49, 33, -64, 42, -82, 110, 44, -44, + -115, 78, -111, -13, -67, 97, -30, -44, 35, 108, 49, -20, -60, + ); + let b = i8x32::new( + 120, -26, -121, 12, 72, 65, -5, 75, 16, -1, 116, 18, -94, -26, -104, -66, -38, 101, + -92, 71, -74, 2, 17, -84, 102, 49, -4, -87, 30, -83, -9, -81, + ); + let r = i64x4::new( + -71776119077994241, + -71777214277943041, + 72056498804555520, + 71776119077994495, + ); + + assert_eq!(r, transmute(lasx_xvslt_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslt_h() { + let a = i16x16::new( + -26246, 12525, 27206, -1022, 22747, 18600, -9895, -30775, -29586, 24084, -27504, -8187, + -18487, 5560, 18096, -17473, + ); + let b = i16x16::new( + -25007, 1947, 11331, 32443, 1338, 4043, 6432, 22428, -5023, -29819, -32277, 19148, + -4421, 17327, -30689, 4545, + ); + let r = i64x4::new( + -281474976645121, + -4294967296, + -281474976645121, + -281470681743361, + ); + + assert_eq!(r, transmute(lasx_xvslt_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslt_w() { + let a = i32x8::new( + -343022897, + 2023876173, + 564434564, + 1237034632, + 563192717, + -1067626766, + 2022145749, + 1215921380, + ); + let b = i32x8::new( + -319278722, + -804141589, + -453029596, + -1367666903, + 1987558200, + 1387908488, + 705912447, + -1635535899, + ); + let r = i64x4::new(4294967295, 0, -1, 0); + + assert_eq!(r, transmute(lasx_xvslt_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslt_d() { + let a = i64x4::new( + 8053537017603706522, + 8148317798642968933, + 661692989904488737, + 5141151145278580641, + ); + let b = i64x4::new( + 6944929519578764358, + -3223671261003932077, + 8970791908210514994, + -3152991651421490245, + ); + let r = i64x4::new(0, 0, -1, 0); + + assert_eq!(r, transmute(lasx_xvslt_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslti_b() { + let a = i8x32::new( + -60, -44, 123, -31, 39, 115, -8, -17, 10, -6, 68, 82, -123, 86, -95, -108, -78, 45, 88, + -6, -82, 69, 96, 13, 79, 14, 43, -72, -35, 27, -30, 54, + ); + let r = i64x4::new( + -72057589759672321, + -280379760050176, + 1095216660735, + 71777218556067840, + ); + + assert_eq!(r, transmute(lasx_xvslti_b::<-16>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslti_h() { + let a = i16x16::new( + -5839, -18013, 17630, 18447, -5550, -28050, -30597, -14016, -985, -1930, 10497, -28472, + -15481, 29582, 19157, 5547, + ); + let r = i64x4::new(4294967295, -1, -281470681743361, 65535); + + assert_eq!(r, transmute(lasx_xvslti_h::<-4>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslti_w() { + let a = i32x8::new( + -1407512371, + -898959054, + 572699307, + 1642426185, + 797353241, + -259466597, + -1199389426, + -1398642331, + ); + let r = i64x4::new(-1, 0, -4294967296, -1); + + assert_eq!(r, transmute(lasx_xvslti_w::<-4>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslti_d() { + let a = i64x4::new( + -2819395691046139625, + 5088541563771000132, + 8992157267117868445, + 3707348005090466869, + ); + let r = i64x4::new(-1, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvslti_d::<1>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslt_bu() { + let a = u8x32::new( + 25, 12, 175, 147, 216, 93, 84, 21, 98, 182, 199, 128, 107, 68, 249, 142, 59, 204, 118, + 136, 201, 137, 11, 155, 238, 201, 130, 187, 247, 151, 109, 109, + ); + let b = u8x32::new( + 231, 122, 213, 181, 40, 150, 168, 103, 114, 67, 58, 96, 9, 131, 109, 87, 228, 98, 233, + 122, 32, 208, 212, 193, 69, 197, 199, 67, 125, 145, 103, 17, + ); + let r = i64x4::new(-1095216660481, 280375465083135, -1099494915841, 16711680); + + assert_eq!(r, transmute(lasx_xvslt_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslt_hu() { + let a = u16x16::new( + 52525, 2955, 54772, 12603, 44380, 34508, 12576, 61085, 25504, 9162, 5951, 6485, 30570, + 47057, 5871, 54003, + ); + let b = u16x16::new( + 40432, 50345, 37115, 20747, 38363, 42964, 2046, 26895, 7013, 23222, 19013, 43373, + 50793, 25948, 61295, 35633, + ); + let r = i64x4::new(-281470681808896, 4294901760, -65536, 281470681808895); + + assert_eq!(r, transmute(lasx_xvslt_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslt_wu() { + let a = u32x8::new( + 645248129, 159156202, 442053255, 3539240300, 2212555000, 3589590552, 594555403, + 303909752, + ); + let b = u32x8::new( + 3201000514, 1412178107, 2697992684, 4141300489, 840057459, 3810448458, 959312926, + 2834332590, + ); + let r = i64x4::new(-1, -1, -4294967296, -1); + + assert_eq!(r, transmute(lasx_xvslt_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslt_du() { + let a = u64x4::new( + 9001861276662418785, + 11243806946003621417, + 16522311710011399892, + 3265452243993188662, + ); + let b = u64x4::new( + 12075582354920739274, + 16153578604538879596, + 2722606569672017936, + 5142428655769651710, + ); + let r = i64x4::new(-1, -1, 0, -1); + + assert_eq!(r, transmute(lasx_xvslt_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslti_bu() { + let a = u8x32::new( + 68, 117, 2, 67, 233, 205, 12, 99, 127, 21, 171, 71, 18, 146, 167, 76, 141, 21, 234, + 150, 135, 213, 231, 122, 22, 117, 124, 46, 149, 74, 11, 213, + ); + let r = i64x4::new(16711680, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvslti_bu::<7>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslti_hu() { + let a = u16x16::new( + 45362, 8378, 15038, 64046, 51883, 25813, 52028, 8730, 1255, 3100, 9043, 37803, 61269, + 5418, 42755, 28604, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvslti_hu::<13>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslti_wu() { + let a = u32x8::new( + 1740233903, 2267221026, 574370304, 3294215750, 3920854673, 2171367380, 3811836140, + 671324390, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvslti_wu::<8>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslti_du() { + let a = u64x4::new( + 7794944984440982613, + 6781669147121119045, + 9839484777866727672, + 2217716842113203908, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvslti_du::<2>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsle_b() { + let a = i8x32::new( + -20, -44, 90, -101, -69, -3, -5, 99, -59, -13, 35, 125, 64, 21, 66, -2, 57, 4, 60, -35, + 57, 37, -74, 54, -55, -125, -28, 64, -60, -10, 111, 91, + ); + let b = i8x32::new( + -44, 127, 36, 48, 36, 79, 56, 54, -123, 29, -105, -117, -46, -9, -30, 97, 3, -5, -10, + 118, -64, -118, -31, -42, 120, -84, -77, 40, 69, -80, 104, 61, + ); + let r = i64x4::new( + 72057594021216000, + -72057594037862656, + 71776123339407360, + 1095216726015, + ); + + assert_eq!(r, transmute(lasx_xvsle_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsle_h() { + let a = i16x16::new( + -22122, -10270, -26549, -14589, 15764, 15351, -8429, 14898, -20819, -8483, -1055, + -5229, -21058, -26881, 1568, -1544, + ); + let b = i16x16::new( + 27196, -6538, 20190, -14481, 4568, 31469, -13818, -16230, -26411, 20205, -4192, -29119, + 11920, 25504, -19817, -370, + ); + let r = i64x4::new(-1, 4294901760, 4294901760, -281470681743361); + + assert_eq!(r, transmute(lasx_xvsle_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsle_w() { + let a = i32x8::new( + -44465502, + -1482288791, + 1430386258, + -837657585, + -294092640, + -1581080100, + -558275350, + -217520013, + ); + let b = i32x8::new( + -251270550, + 1931207536, + -1348623461, + -961792969, + 845442346, + 1529991774, + -2079565201, + 2051352953, + ); + let r = i64x4::new(-4294967296, 0, -1, -4294967296); + + assert_eq!(r, transmute(lasx_xvsle_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsle_d() { + let a = i64x4::new( + -3700065874729391328, + 3324167660406962127, + -431069737981318264, + 4685397384184188250, + ); + let b = i64x4::new( + 3966484960661616600, + 2732585182508661538, + -1886887956095472452, + 3407078622354590260, + ); + let r = i64x4::new(-1, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvsle_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslei_b() { + let a = i8x32::new( + -34, 34, -112, 113, 77, 45, 109, -125, 31, -88, -1, -53, 72, 39, 39, -99, -47, -45, 4, + 17, -100, -96, 41, -62, -56, -88, 37, 8, 68, -53, 52, 61, + ); + let r = i64x4::new( + -72057594021216001, + -72057589759672576, + -71776123356119041, + 280375465148415, + ); + + assert_eq!(r, transmute(lasx_xvslei_b::<-14>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslei_h() { + let a = i16x16::new( + 11585, -30889, -24807, -28938, -11929, -7, -8205, -24769, -12225, -7956, -26751, 11963, + 30916, -25385, -28797, -6515, + ); + let r = i64x4::new(-65536, -4294901761, 281474976710655, -65536); + + assert_eq!(r, transmute(lasx_xvslei_h::<-15>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslei_w() { + let a = i32x8::new( + 98083171, -839282918, 950280284, 1423312628, -74628250, -400513137, 1893412843, + 1627152567, + ); + let r = i64x4::new(-4294967296, 0, -1, 0); + + assert_eq!(r, transmute(lasx_xvslei_w::<-3>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslei_d() { + let a = i64x4::new( + -4859364474358523407, + 5515090293678524269, + -8825168226110066470, + -1006722941532041773, + ); + let r = i64x4::new(-1, 0, -1, -1); + + assert_eq!(r, transmute(lasx_xvslei_d::<6>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsle_bu() { + let a = u8x32::new( + 158, 49, 59, 206, 238, 37, 129, 237, 128, 170, 238, 175, 10, 110, 43, 210, 223, 144, + 115, 87, 183, 177, 226, 216, 74, 40, 36, 142, 76, 48, 213, 148, + ); + let b = u8x32::new( + 10, 235, 145, 113, 48, 119, 124, 22, 154, 225, 240, 6, 37, 126, 38, 233, 129, 30, 90, + 103, 109, 14, 51, 10, 128, 242, 103, 199, 215, 228, 164, 115, + ); + let r = i64x4::new( + 280375481859840, + -71776123339407361, + 4278190080, + 281474976710655, + ); + + assert_eq!(r, transmute(lasx_xvsle_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsle_hu() { + let a = u16x16::new( + 61722, 23067, 57576, 43934, 56523, 22563, 45126, 9681, 5860, 62938, 40464, 22653, + 53470, 26636, 64060, 22853, + ); + let b = u16x16::new( + 61426, 33539, 62959, 2501, 21021, 20564, 64705, 12707, 6875, 56968, 45402, 15505, + 50807, 25207, 42588, 21407, + ); + let r = i64x4::new(281474976645120, -4294967296, 281470681808895, 0); + + assert_eq!(r, transmute(lasx_xvsle_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsle_wu() { + let a = u32x8::new( + 3492865309, 1162904456, 1212423957, 2856547492, 4084218464, 1751333879, 3162347846, + 990759844, + ); + let b = u32x8::new( + 525215252, 3081836083, 3319970808, 3111004663, 2712599486, 1206390980, 1598064821, + 440769207, + ); + let r = i64x4::new(-4294967296, -1, 0, 0); + + assert_eq!(r, transmute(lasx_xvsle_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsle_du() { + let a = u64x4::new( + 2621502387249005267, + 2893454517032185854, + 7681654086665024795, + 5020934994941644473, + ); + let b = u64x4::new( + 2069393685367888462, + 16283420533139074356, + 5426371663235070936, + 6959847307032735963, + ); + let r = i64x4::new(0, -1, 0, -1); + + assert_eq!(r, transmute(lasx_xvsle_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslei_bu() { + let a = u8x32::new( + 31, 26, 96, 32, 50, 17, 14, 211, 51, 145, 198, 89, 217, 16, 184, 197, 220, 224, 23, + 208, 243, 188, 17, 240, 237, 207, 250, 185, 88, 127, 104, 96, + ); + let r = i64x4::new(72056494526365440, 280375465082880, 71776119077928960, 0); + + assert_eq!(r, transmute(lasx_xvslei_bu::<29>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslei_hu() { + let a = u16x16::new( + 43587, 14195, 3048, 63749, 62756, 59029, 53861, 44436, 63820, 31431, 3098, 39702, + 37252, 60430, 367, 9201, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvslei_hu::<30>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslei_wu() { + let a = u32x8::new( + 2210674294, 4169142079, 3945251466, 1311516675, 2977874622, 3173129893, 3425645958, + 2905333026, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvslei_wu::<31>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvslei_du() { + let a = u64x4::new( + 16014799523010103844, + 8709196257349731516, + 16077124464953821716, + 14402865276083654462, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvslei_du::<5>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsat_b() { + let a = i8x32::new( + 72, 84, 50, -112, -54, -10, 114, 37, -37, -9, 56, -1, -39, -51, 16, 88, -107, -47, -66, + -81, 83, 50, -69, 103, -46, 17, 121, 43, 8, -121, -113, 27, + ); + let r = i64x4::new( + 2698490476611392584, + 6345798211138549723, + 7474623341563662741, + 1985954429852520914, + ); + + assert_eq!(r, transmute(lasx_xvsat_b::<7>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsat_h() { + let a = i16x16::new( + -22224, 6834, -23483, -28336, -15236, 8349, -30647, -16818, -27867, 17449, -7303, + -20496, -3398, 17074, -14188, 16934, + ); + let r = i64x4::new( + -1152657621547749376, + -1152657621547749376, + -1152657621547749376, + 1152903912689234618, + ); + + assert_eq!(r, transmute(lasx_xvsat_h::<12>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsat_w() { + let a = i32x8::new( + 970917085, + -759322255, + -332118787, + 127481445, + -925804081, + -2116293410, + 240264455, + -1921693726, + ); + let r = i64x4::new(-34359738361, 34359738360, -30064771080, -34359738361); + + assert_eq!(r, transmute(lasx_xvsat_w::<3>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsat_d() { + let a = i64x4::new( + -7987623316798584571, + -7247559336295709650, + -5048248303955768218, + 6102033771404793023, + ); + let r = i64x4::new( + -7987623316798584571, + -7247559336295709650, + -5048248303955768218, + 6102033771404793023, + ); + + assert_eq!(r, transmute(lasx_xvsat_d::<63>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsat_bu() { + let a = u8x32::new( + 25, 84, 86, 237, 15, 25, 247, 37, 97, 77, 124, 211, 71, 31, 112, 78, 71, 3, 68, 103, + 56, 251, 164, 254, 198, 72, 14, 7, 154, 42, 226, 35, + ); + let r = i64x4::new( + 2683891456212418329, + 4557395704426741567, + 4557430858734043967, + 2539795165049929535, + ); + + assert_eq!(r, transmute(lasx_xvsat_bu::<5>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsat_hu() { + let a = u16x16::new( + 50818, 7191, 19885, 24886, 23947, 902, 63438, 16327, 21304, 41986, 6658, 26825, 35878, + 54181, 37442, 24336, + ); + let r = i64x4::new( + 1970354902204423, + 1970354902204423, + 1970354902204423, + 1970354902204423, + ); + + assert_eq!(r, transmute(lasx_xvsat_hu::<2>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsat_wu() { + let a = u32x8::new( + 2643833778, 2163840459, 3648859312, 2300494776, 1210790323, 4241633778, 1830707970, + 1058612721, + ); + let r = i64x4::new(270582939711, 270582939711, 270582939711, 270582939711); + + assert_eq!(r, transmute(lasx_xvsat_wu::<5>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsat_du() { + let a = u64x4::new( + 8558995131692178872, + 17439570087619166841, + 9621706324971219491, + 6096695286958361953, + ); + let r = i64x4::new(8796093022207, 8796093022207, 8796093022207, 8796093022207); + + assert_eq!(r, transmute(lasx_xvsat_du::<42>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvadda_b() { + let a = i8x32::new( + 25, 59, -110, -62, -36, -22, 27, -104, 32, 127, 92, 19, -127, -111, 2, 41, 37, 108, + -111, 108, -101, 89, -53, -16, 87, -111, 66, 68, 95, -47, 125, 105, + ); + let b = i8x32::new( + -121, 110, -17, 74, 16, -33, -80, 48, -69, 114, 9, -63, -38, 6, -82, -112, -105, 5, 61, + 119, 9, -72, 69, -21, 109, -14, -103, 72, -126, 41, -34, 60, + ); + let r = i64x4::new( + -7463811258668570222, + -7398158934950416027, + 2700648424200237454, + -6512388827583513148, + ); + + assert_eq!(r, transmute(lasx_xvadda_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvadda_h() { + let a = i16x16::new( + -7007, 10506, -11262, 28686, 22120, 22431, 1054, -2239, -28418, 24459, -8927, -15512, + 9064, 22935, 26563, 2466, + ); + let b = i16x16::new( + -1992, -19568, 12795, -27246, 14193, 19953, -3803, -27680, 2139, 30064, -7379, -12284, + 5720, -19123, 21658, -12768, + ); + let r = i64x4::new( + -2703182350329961689, + 8421470691639987673, + 7823948489959372637, + 4288196905584441792, + ); + + assert_eq!(r, transmute(lasx_xvadda_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvadda_w() { + let a = i32x8::new( + 1265529071, + -1075977129, + -583802219, + -13912299, + -172400466, + -972042514, + -260823873, + -1620748450, + ); + let b = i32x8::new( + 489335551, 1611173717, -476611840, -751628752, -192801793, 1467389657, -374333972, + 35803655, + ); + let r = i64x4::new( + -6905519068965954578, + 3287973778850882155, + -7969462678089069741, + 7114837115730115925, + ); + + assert_eq!(r, transmute(lasx_xvadda_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvadda_d() { + let a = i64x4::new( + 7814609303075513348, + -7772522798724755627, + -1147865382247844592, + -7562711493144146696, + ); + let b = i64x4::new( + 3766721551761496817, + 8105329332137326997, + -9194637465570314907, + 7351062589763608413, + ); + let r = i64x4::new( + -6865413218872541451, + -2568891942847468992, + -8104241225891392117, + -3532969990801796507, + ); + + assert_eq!(r, transmute(lasx_xvadda_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsadd_b() { + let a = i8x32::new( + 95, 7, -14, -94, -86, -102, -123, 76, -40, 78, -16, 71, -122, 75, 8, -59, 43, 71, -16, + -38, -67, -40, 97, 101, -45, -28, -58, -99, 48, -111, -128, 118, + ); + let b = i8x32::new( + -86, 59, 75, 107, -90, -1, 114, 4, -60, 20, -8, -67, 58, 47, 100, 122, -75, -106, -118, + -95, -44, 22, 76, 54, 90, 108, 113, 21, -92, -53, 125, -70, + ); + let r = i64x4::new( + 5834300617538748937, + 4570162687008858780, + 9187324073552698848, + 3530119333939728429, + ); + + assert_eq!(r, transmute(lasx_xvsadd_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsadd_h() { + let a = i16x16::new( + 21287, 1075, 1515, 13634, 27666, -29218, 10797, -29531, -16877, -31125, 29749, 23913, + -6583, -15233, 14925, 1745, + ); + let b = i16x16::new( + 9900, -26262, -15712, 25834, 18751, -9376, 8538, -1589, -21802, 18049, 18837, -21370, + -11718, 2110, -13829, -19996, + ); + let r = i64x4::new( + 9223311063848417747, + -8759418229895430145, + 715931602406637568, + -5137195089227040637, + ); + + assert_eq!(r, transmute(lasx_xvsadd_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsadd_w() { + let a = i32x8::new( + 192209429, + -2001895259, + 1526351324, + 940020268, + -971929246, + -265649149, + 126711930, + 1060927451, + ); + let b = i32x8::new( + -1362410074, + 17289452, + 1453224925, + -157303455, + -1002635563, + -153598928, + 1744530306, + 450932350, + ); + let r = i64x4::new( + -8523817033391921221, + 3361743116011831295, + -1800656777305487305, + 6493388403303310332, + ); + + assert_eq!(r, transmute(lasx_xvsadd_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsadd_d() { + let a = i64x4::new( + 7784983044177669725, + 8101097656675707195, + 5701949277844824642, + -9115087610184891150, + ); + let b = i64x4::new( + -7435730805386005247, + -2620412598612541303, + -7972576523543653821, + 7444842305858583495, + ); + let r = i64x4::new( + 349252238791664478, + 5480685058063165892, + -2270627245698829179, + -1670245304326307655, + ); + + assert_eq!(r, transmute(lasx_xvsadd_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsadd_bu() { + let a = u8x32::new( + 25, 97, 235, 222, 176, 210, 161, 94, 48, 209, 231, 48, 45, 90, 187, 6, 29, 48, 193, + 158, 240, 147, 240, 248, 228, 195, 131, 114, 9, 239, 172, 211, + ); + let b = u8x32::new( + 156, 230, 197, 50, 226, 217, 198, 2, 133, 7, 31, 251, 185, 83, 103, 173, 4, 107, 100, + 3, 81, 209, 161, 88, 169, 211, 90, 7, 158, 153, 112, 221, + ); + let r = i64x4::new( + 6989586621679009717, + -5476467414210193227, + -1577084127, + -380207497217, + ); + + assert_eq!(r, transmute(lasx_xvsadd_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsadd_hu() { + let a = u16x16::new( + 18927, 31835, 27291, 15842, 30595, 45554, 31277, 2570, 50726, 18451, 33555, 31286, + 37571, 1090, 50630, 36004, + ); + let b = u16x16::new( + 51573, 3134, 27346, 11433, 45605, 6834, 26138, 61459, 26540, 3859, 63747, 9497, 47455, + 22235, 55919, 64188, + ); + let r = i64x4::new( + 7677464656203087871, + -423936190922293249, + -6967068626374950913, + -2766274561, + ); + + assert_eq!(r, transmute(lasx_xvsadd_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsadd_wu() { + let a = u32x8::new( + 2641259570, 2413939116, 2244295016, 1265788506, 4032439236, 2078944785, 2529147076, + 1095977188, + ); + let b = u32x8::new( + 1074491620, 785068578, 441575896, 2827260071, 654541549, 2711155200, 2667914280, + 1025335263, + ); + let r = i64x4::new( + -4707110644611425002, + -867234291869342912, + -1, + 9110967605937569791, + ); + + assert_eq!(r, transmute(lasx_xvsadd_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsadd_du() { + let a = u64x4::new( + 14430626347567901108, + 8966103699466030320, + 15088600594909856287, + 4617508821066205697, + ); + let b = u64x4::new( + 9949819222347987503, + 1797352673890553460, + 93407820607851767, + 16329185982288463052, + ); + let r = i64x4::new(-1, -7683287700352967836, -3264735658191843562, -1); + + assert_eq!(r, transmute(lasx_xvsadd_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavg_b() { + let a = i8x32::new( + 1, -7, 51, 121, -46, 91, 117, 56, -128, -103, 77, -124, 47, -81, 71, -97, 9, -22, -45, + 81, 64, -36, 18, -57, 53, -23, -56, -113, 55, -76, -98, -89, + ); + let b = i8x32::new( + 116, 40, 94, 32, 108, -83, -72, 62, 118, 3, 75, 51, -64, 117, 106, -76, 98, 102, -74, + 83, -104, -25, 103, 87, -99, -120, 40, -83, -51, 73, 88, 19, + ); + let r = i64x4::new( + 4257595030195671098, + -6244220027603726597, + 1098000814288676917, + -2451086284962613015, + ); + + assert_eq!(r, transmute(lasx_xvavg_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavg_h() { + let a = i16x16::new( + 22420, 2514, -1496, 12197, 18773, 25141, -11922, 14759, 28272, 9957, -8329, -18095, + 14119, 4453, 29447, -17743, + ); + let b = i16x16::new( + -23665, 8821, -12487, 30493, -29228, -14701, 16266, 5372, 21222, 5396, -495, -4093, + 8979, 15419, 24369, -25475, + ); + let r = i64x4::new( + 6008334822825786769, + 2833054969603877780, + -3122420865543937877, + -6082277202109387491, + ); + + assert_eq!(r, transmute(lasx_xvavg_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavg_w() { + let a = i32x8::new( + 264220248, + 1806183666, + -744175589, + 1149257464, + 649257353, + 1343192175, + -1646288099, + 1777956369, + ); + let b = i32x8::new( + -490550100, + 1650015069, + 602037366, + -115507354, + -1351815309, + -919786860, + 1796894888, + -1823377644, + ); + let r = i64x4::new( + 7422130269685104002, + 2219961461567099464, + 909255992234993790, + -97541447405991454, + ); + + assert_eq!(r, transmute(lasx_xvavg_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavg_d() { + let a = i64x4::new( + -5353831456328489109, + 1116026769917166857, + -6482325223661420741, + 4644114914180465662, + ); + let b = i64x4::new( + -8278784043739101899, + 8898944017823987194, + 162737312931734425, + -3156875890654220898, + ); + let r = i64x4::new( + -6816307750033795504, + 5007485393870577025, + -3159793955364843158, + 743619511763122382, + ); + + assert_eq!(r, transmute(lasx_xvavg_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavg_bu() { + let a = u8x32::new( + 222, 174, 254, 188, 116, 111, 1, 67, 236, 108, 184, 99, 34, 41, 62, 74, 228, 117, 143, + 190, 202, 68, 177, 5, 102, 26, 144, 229, 66, 185, 137, 73, + ); + let b = u8x32::new( + 9, 86, 55, 74, 146, 206, 99, 36, 206, 46, 174, 95, 25, 21, 140, 91, 99, 120, 100, 243, + 231, 197, 230, 158, 188, 38, 162, 58, 130, 77, 72, 87, + ); + let r = i64x4::new( + 3689185332455703155, + 5937185894811520477, + 5893950604224067235, + 5794025379951354001, + ); + + assert_eq!(r, transmute(lasx_xvavg_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavg_hu() { + let a = u16x16::new( + 59347, 14794, 56762, 36383, 41235, 53425, 15726, 15850, 6947, 17893, 10811, 18470, + 35860, 14001, 21530, 58912, + ); + let b = u16x16::new( + 45476, 48517, 33041, 8160, 7865, 37717, 29068, 45168, 12673, 29576, 21, 26212, 20245, + 43416, 16626, 44166, + ); + let r = i64x4::new( + 6268922056724171963, + 8587616261834498022, + 6288455717791082066, + -3939723307751543404, + ); + + assert_eq!(r, transmute(lasx_xvavg_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavg_wu() { + let a = u32x8::new( + 1600834277, 4196831994, 2108873255, 518030497, 3166298163, 3812054340, 3824732684, + 1900211486, + ); + let b = u32x8::new( + 1499894424, 568816404, 3212845718, 500610814, 585554707, 2609103780, 7570780, 977655961, + ); + let r = i64x4::new( + -8212592065336791362, + 2187515559063158366, + -4657412007911203421, + 6180173283312674740, + ); + + assert_eq!(r, transmute(lasx_xvavg_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavg_du() { + let a = u64x4::new( + 16716089796022894912, + 10136836254171396504, + 5055029870739857077, + 1722276628667681589, + ); + let b = u64x4::new( + 2981839357822260236, + 4395528145348260085, + 9124113278861486873, + 17073319773492299474, + ); + let r = i64x4::new( + -8597779496786974042, + 7266182199759828294, + 7089571574800671975, + -9048945872629561085, + ); + + assert_eq!(r, transmute(lasx_xvavg_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavgr_b() { + let a = i8x32::new( + 70, 49, 125, -63, -42, -19, 98, -71, -39, -43, 62, -91, -109, -76, -2, 73, -82, -26, + 31, -13, -19, 61, -64, -122, -66, -36, 15, 102, 72, 18, -9, -30, + ); + let b = i8x32::new( + -101, 91, 109, 12, 107, -108, -99, 124, -72, -12, -23, -93, 0, -21, -65, 51, -90, -9, + 94, -109, -17, -42, -4, 45, -18, 41, 13, 6, 79, 39, 60, -14, + ); + let r = i64x4::new( + 1945767390385358577, + 4530569318912812489, + -2675689108017254486, + -1577916506278329386, + ); + + assert_eq!(r, transmute(lasx_xvavgr_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavgr_h() { + let a = i16x16::new( + -23160, -26916, 22577, 3623, -22521, -16865, 13203, 26275, -20646, 12156, -26885, + -1419, -20243, 28347, -3617, -21473, + ); + let b = i16x16::new( + 23255, 16173, -15467, -21396, 14626, -27747, 22216, -25899, 14208, 23641, 23787, 27175, + -6255, -22851, -20976, 28894, + ); + let r = i64x4::new( + -2501171370499178448, + 52993362325598357, + 3625109573325288301, + 1044782302812228671, + ); + + assert_eq!(r, transmute(lasx_xvavgr_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavgr_w() { + let a = i32x8::new( + -500594887, + -775813621, + -892322315, + -1910111140, + 573941213, + 1978372579, + 765765621, + 1237953660, + ); + let b = i32x8::new( + -541556784, + 538719952, + -1163583489, + 56482881, + -978953184, + -804071754, + 1958602350, + 1082613894, + ); + let r = i64x4::new( + -509154771300449403, + -3980636370258710790, + 2521791825760354559, + 4983380877656540978, + ); + + assert_eq!(r, transmute(lasx_xvavgr_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavgr_d() { + let a = i64x4::new( + -560846199430459987, + -6913595054902211026, + 1018627982636790344, + -4796205388927403814, + ); + let b = i64x4::new( + -1503583177859445318, + 2269985815924150324, + 8892159546918356586, + 5254840197509918769, + ); + let r = i64x4::new( + -1032214688644952652, + -2321804619489030351, + 4955393764777573465, + 229317404291257478, + ); + + assert_eq!(r, transmute(lasx_xvavgr_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavgr_bu() { + let a = u8x32::new( + 173, 186, 248, 144, 15, 66, 150, 226, 30, 14, 68, 38, 255, 233, 148, 172, 133, 29, 57, + 83, 110, 70, 253, 31, 175, 67, 167, 162, 54, 221, 53, 188, + ); + let b = u8x32::new( + 73, 42, 164, 127, 251, 107, 243, 43, 224, 179, 219, 9, 103, 205, 153, 157, 108, 89, 40, + 102, 99, 142, 142, 155, 155, 170, 95, 233, 116, 68, 9, 47, + ); + let r = i64x4::new( + -8663422077139783045, + -6514496773710388865, + 6757205291683625849, + 8511681618342279077, + ); + + assert_eq!(r, transmute(lasx_xvavgr_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavgr_hu() { + let a = u16x16::new( + 748, 52495, 35014, 19986, 51280, 1137, 33343, 41113, 44125, 44938, 39033, 4840, 8926, + 20195, 61480, 38149, + ); + let b = u16x16::new( + 49450, 21694, 20295, 62811, 50314, 20597, 51590, 51120, 20909, 7005, 34026, 24886, + 1353, 12358, 20971, 58564, + ); + let r = i64x4::new( + -6793842733113449973, + -5465780177655839123, + 4183719475707936517, + -4835281559523879916, + ); + + assert_eq!(r, transmute(lasx_xvavgr_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavgr_wu() { + let a = u32x8::new( + 725985028, 2564620547, 4042355808, 1169637821, 2193709333, 848280370, 2882464312, + 222274907, + ); + let b = u32x8::new( + 3005308642, 568881719, 1868204939, 3839859286, 1155339100, 2594656893, 1645672275, + 936913519, + ); + let r = i64x4::new( + 6729144879071593203, + -7688930946620981258, + 7393651477204383289, + 2489338192049926342, + ); + + assert_eq!(r, transmute(lasx_xvavgr_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvavgr_du() { + let a = u64x4::new( + 2554728288465437854, + 11449711494451353492, + 3273645684131385521, + 10253723919691993285, + ); + let b = u64x4::new( + 7302091036247388883, + 15155026503610587821, + 2157260177986334855, + 2575722548058380647, + ); + let r = i64x4::new( + 4928409662356413369, + -5144375074678580959, + 2715452931058860188, + 6414723233875186966, + ); + + assert_eq!(r, transmute(lasx_xvavgr_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssub_b() { + let a = i8x32::new( + 73, 33, 105, 6, 76, 22, -108, 53, 0, 81, 98, 121, -77, 54, 85, 86, 22, 5, -91, 107, + -24, 31, -120, 60, -115, 78, 110, 39, -112, 112, -39, 29, + ); + let b = i8x32::new( + -83, -99, 27, 4, -80, 31, 26, -29, -50, 39, -93, 6, 26, 105, -109, -36, 65, -14, -120, + 103, -50, -109, -38, -78, -38, 70, -79, -27, 61, 12, 39, 93, + ); + let r = i64x4::new( + 5945023633000660863, + 8826999853620865586, + 9200430838479393749, + -4561472970538678093, + ); + + assert_eq!(r, transmute(lasx_xvssub_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssub_h() { + let a = i16x16::new( + 30107, 16338, 20726, 3737, -28092, -2792, 11304, -3451, 32157, 18332, 16586, 2662, + 17942, -23482, 23033, -833, + ); + let b = i16x16::new( + -212, 2969, -3923, -10268, -14795, -2019, 863, -28427, -5609, 18395, -17614, -2870, + -1551, 14381, 1242, -29426, + ); + let r = i64x4::new( + 3942162916357797487, + 7030163866323241999, + 1557260308647608319, + 8048307602867637285, + ); + + assert_eq!(r, transmute(lasx_xvssub_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssub_w() { + let a = i32x8::new( + -638701442, + 124032353, + -1177957330, + 1822772002, + -624208464, + -690157477, + -752614768, + 1017525230, + ); + let b = i32x8::new( + 932721978, + -1730383729, + 2006657743, + -1118024603, + 1361667737, + -932072815, + -1709865093, + -66403119, + ); + let r = i64x4::new( + 7964656428089998148, + 9223372034707292160, + 1039018467419877143, + 4655436811119524629, + ); + + assert_eq!(r, transmute(lasx_xvssub_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssub_d() { + let a = i64x4::new( + 8715609043439660533, + 6520891714816295946, + -9200207215764087611, + -4552769804861861814, + ); + let b = i64x4::new( + 8369052152539855925, + 2070139234200116232, + -8565613288638792421, + 6969198225778950763, + ); + let r = i64x4::new( + 346556890899804608, + 4450752480616179714, + -634593927125295190, + -9223372036854775808, + ); + + assert_eq!(r, transmute(lasx_xvssub_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssub_bu() { + let a = u8x32::new( + 194, 170, 115, 69, 137, 47, 83, 232, 208, 7, 239, 24, 252, 237, 181, 153, 99, 109, 110, + 137, 12, 246, 132, 6, 201, 93, 177, 189, 98, 6, 85, 252, + ); + let b = u8x32::new( + 192, 185, 64, 8, 157, 119, 247, 72, 81, 33, 0, 242, 154, 190, 235, 167, 199, 215, 118, + 14, 79, 208, 68, 149, 8, 111, 58, 97, 85, 219, 178, 240, + ); + let r = i64x4::new( + -6917529026614329342, + 52097968963711, + 18056182014935040, + 864691185841012929, + ); + + assert_eq!(r, transmute(lasx_xvssub_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssub_hu() { + let a = u16x16::new( + 32377, 48753, 23359, 60048, 51933, 60261, 16706, 5683, 42654, 19286, 27115, 5230, + 25323, 3004, 59060, 28377, + ); + let b = u16x16::new( + 20524, 46292, 39370, 44869, 11104, 28817, 18216, 21295, 15477, 23627, 5697, 53043, + 24168, 62463, 15113, 55444, + ); + let r = i64x4::new( + 4272508671652343373, + 2060754813, + 91989609572905, + 188750927758467, + ); + + assert_eq!(r, transmute(lasx_xvssub_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssub_wu() { + let a = u32x8::new( + 1657277873, 1330142084, 2851707029, 329302965, 4012116382, 3796717712, 1394210702, + 3853566063, + ); + let b = u32x8::new( + 3002534878, 3166207065, 1567450925, 39925211, 2740035937, 1015422746, 235666751, + 2928176588, + ); + let r = i64x4::new( + 0, + 1242867990904189288, + -6501173152938039235, + 3974517532346153551, + ); + + assert_eq!(r, transmute(lasx_xvssub_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssub_du() { + let a = u64x4::new( + 15530474406792892207, + 11041265010582297193, + 12958884950634485683, + 10554031950250935627, + ); + let b = u64x4::new( + 14455090273467103742, + 13018023957546859856, + 4721944463560386324, + 13428322516292168868, + ); + let r = i64x4::new(1075384133325788465, 0, 8236940487074099359, 0); + + assert_eq!(r, transmute(lasx_xvssub_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvabsd_b() { + let a = i8x32::new( + 77, 34, -55, -6, -27, 106, -19, 107, 7, -43, -15, 64, 88, -60, 98, 5, 123, -72, -69, + -120, -106, -29, -62, 112, -78, -24, 105, -79, 74, 24, -122, -33, + ); + let b = i8x32::new( + 70, -55, 105, 62, 94, -15, 120, -122, -62, 75, -50, -61, -74, -125, 109, 53, -51, -35, + -29, -26, 66, 19, -98, 51, 50, 111, 106, 64, 24, 86, -114, -90, + ); + let r = i64x4::new( + -1906296455511910137, + 3461932904704341573, + 4405699852347385262, + 4109603046844106624, + ); + + assert_eq!(r, transmute(lasx_xvabsd_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvabsd_h() { + let a = i16x16::new( + -3523, -20106, 11040, 6484, 22611, -2497, 28408, 18680, 14501, -17999, -17051, 5091, + 17047, -23076, 3361, 4856, + ); + let b = i16x16::new( + 15765, 31104, 9632, 30835, -6611, 20000, -27189, 15641, 6191, 28248, 28092, 28462, + -4315, -1294, -14727, 24445, + ); + let r = i64x4::new( + 6854203208551254872, + 855641242994831910, + 6578545571444236406, + 5513891007581016946, + ); + + assert_eq!(r, transmute(lasx_xvabsd_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvabsd_w() { + let a = i32x8::new( + -516201776, + -1265475612, + -789611388, + -170081681, + 903632669, + -211238418, + -1863976799, + 639146993, + ); + let b = i32x8::new( + 1884052123, + -78957215, + 260861474, + -2114421033, + -1460646598, + -1379633816, + 1900992494, + -2022565365, + ); + let r = i64x4::new( + 5096057713617598411, + 8350873930216305054, + 5018220025571183075, + -7014776540975538355, + ); + + assert_eq!(r, transmute(lasx_xvabsd_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvabsd_d() { + let a = i64x4::new( + -391271937360884965, + -20808483467978826, + 2531375025191050735, + -2026665653248710281, + ); + let b = i64x4::new( + 716104320672255601, + -518451966573772136, + 3032418447389694341, + -6748971658539956270, + ); + let r = i64x4::new( + 1107376258033140566, + 497643483105793310, + 501043422198643606, + 4722306005291245989, + ); + + assert_eq!(r, transmute(lasx_xvabsd_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvabsd_bu() { + let a = u8x32::new( + 167, 63, 182, 73, 179, 226, 126, 48, 51, 89, 114, 98, 233, 151, 164, 141, 121, 82, 125, + 131, 94, 231, 83, 187, 111, 196, 18, 11, 152, 164, 19, 164, + ); + let b = u8x32::new( + 204, 191, 64, 88, 65, 66, 113, 230, 140, 89, 240, 41, 98, 215, 60, 243, 232, 132, 39, + 170, 30, 165, 206, 56, 230, 91, 235, 13, 185, 191, 68, 138, + ); + let r = i64x4::new( + -5328426372363288539, + 7379218938975879257, + -8972504989300280721, + 1887319547440621943, + ); + + assert_eq!(r, transmute(lasx_xvabsd_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvabsd_hu() { + let a = u16x16::new( + 3423, 48528, 56740, 39409, 50360, 13926, 57000, 4567, 4452, 31543, 58373, 9298, 48132, + 51688, 31647, 52056, + ); + let b = u16x16::new( + 4223, 51844, 62479, 1974, 39743, 1068, 23170, 3816, 24418, 43609, 63727, 13263, 6596, + 17773, 11934, 45434, + ); + let r = i64x4::new( + -7909703671511514336, + 211533007095998841, + 1116071278703431166, + 1864011964690965056, + ); + + assert_eq!(r, transmute(lasx_xvabsd_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvabsd_wu() { + let a = u32x8::new( + 596511673, 1656018177, 862222472, 3855869253, 1555502903, 50646434, 688234186, + 2814498786, + ); + let b = u32x8::new( + 2976814235, 296937998, 3274139740, 128554952, 227946291, 3566260080, 3443244200, + 2459204000, + ); + let r = i64x4::new( + 5837204923827128546, + -2438051046589534252, + -3347298437440673788, + 1525979489064328670, + ); + + assert_eq!(r, transmute(lasx_xvabsd_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvabsd_du() { + let a = u64x4::new( + 12734602602054551239, + 14766664927105746582, + 15860998294904895250, + 6219187986984895141, + ); + let b = u64x4::new( + 14337911389010813068, + 18082222857282413983, + 12137634856997955567, + 8346674176989823087, + ); + let r = i64x4::new( + 1603308786956261829, + 3315557930176667401, + 3723363437906939683, + 2127486190004927946, + ); + + assert_eq!(r, transmute(lasx_xvabsd_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmul_b() { + let a = i8x32::new( + 79, -96, -64, -1, -115, -89, -42, 81, 83, -94, 126, -51, 60, -90, -52, 65, 113, 30, + -64, -32, -115, 18, -120, -103, 68, -52, -106, 124, -90, 23, 39, 46, + ); + let b = i8x32::new( + -85, 53, -41, 89, -85, -87, -95, 98, 86, 91, 64, 121, -108, 74, 124, 103, 27, -110, 66, + -68, -29, -83, -3, -62, 124, 30, -91, 77, -28, 116, -27, 64, + ); + let r = i64x4::new( + 186405908484464837, + 2869070799329859298, + -979486707244065557, + -9159357540886189840, + ); + + assert_eq!(r, transmute(lasx_xvmul_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmul_h() { + let a = i16x16::new( + -4021, 8043, -7726, -25122, -30015, -30658, -18708, -10900, 3772, -3578, -17492, + -13851, -17265, 32476, -4087, 27743, + ); + let b = i16x16::new( + -2689, -26491, 4625, 17707, 7226, 23738, 2364, -25740, 1919, 17707, 29523, -15101, + -9498, -8760, 352, -20751, + ); + let r = i64x4::new( + 6506226959995370549, + 1796983875076656058, + -7588815217799040188, + -7534790044979024262, + ); + + assert_eq!(r, transmute(lasx_xvmul_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmul_w() { + let a = i32x8::new( + 1226983252, + 1810325729, + -263694346, + -895831021, + -666287351, + 1386398263, + -1628946240, + -76075817, + ); + let b = i32x8::new( + 268813984, + 1729713250, + -1600000134, + 160164970, + 1783576517, + -2129626845, + 307974730, + -511240490, + ); + let r = i64x4::new( + 3987350480567897216, + -909423805039995588, + 1476209283271918829, + 1142495638330554240, + ); + + assert_eq!(r, transmute(lasx_xvmul_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmul_d() { + let a = i64x4::new( + 7081580607883685997, + 8110222974893566630, + -8608830426521534350, + 590950945391337126, + ); + let b = i64x4::new( + 5261749457268646376, + -3861654047048473926, + 2264171061650339978, + -2049567854949213368, + ); + let r = i64x4::new( + -9157092306373316664, + -1248560416451753828, + 7374339937678077300, + -3668010491661410128, + ); + + assert_eq!(r, transmute(lasx_xvmul_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmadd_b() { + let a = i8x32::new( + -80, 6, -31, 32, -90, -72, 112, 83, 57, 119, -115, 85, -124, 56, 112, 8, 55, -29, -86, + -43, -88, 94, 98, -85, 111, -93, -82, 53, 79, -43, 14, -67, + ); + let b = i8x32::new( + 86, -88, -20, -70, -85, 89, -29, -112, -123, -89, 29, 42, -11, -125, -93, -49, -27, -7, + 99, 68, 125, -84, -21, -114, 79, -118, 99, -23, 69, 9, -20, -112, + ); + let c = i8x32::new( + -63, 26, 78, 67, 81, 21, 10, -51, 114, -15, 89, -83, 83, -69, -105, -86, 92, 63, -57, + -19, 3, 118, -24, 53, 17, 70, 49, 96, -75, -120, -92, -112, + ); + let r = i64x4::new( + -6679394867387754874, + 9121453853276024435, + 1250494502005582467, + -4810234623069954130, + ); + + assert_eq!( + r, + transmute(lasx_xvmadd_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmadd_h() { + let a = i16x16::new( + -18216, 6658, -25854, 27669, 16377, -14455, 1886, -6575, 31234, 14625, 26195, -12640, + 24030, -29160, 29917, -29533, + ); + let b = i16x16::new( + -3405, 23202, -23415, -21889, -9055, -26344, -21723, -29614, -15925, -27403, -3911, + -6313, 18640, 2098, 7776, 25873, + ); + let c = i16x16::new( + -28853, -6876, -18951, -29568, 17346, 756, -1848, -28084, -18031, -29179, -17665, 5467, + -7564, -24294, -5418, -17877, + ); + let r = i64x4::new( + 2275867314736517193, + 5956455014341383419, + 3282447490748182781, + -2270208808738554850, + ); + + assert_eq!( + r, + transmute(lasx_xvmadd_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmadd_w() { + let a = i32x8::new( + 631333548, + -711233206, + -373490054, + -1088004305, + 1976762993, + -1387656422, + -955329396, + -154134074, + ); + let b = i32x8::new( + -1871585382, + 1805289828, + -855267305, + -1685758538, + 1205523204, + -199185288, + 1115810744, + -1091019827, + ); + let c = i32x8::new( + -1280005623, + 719575493, + -616783227, + 1851306944, + 1226448706, + -1988503778, + 998289127, + -1282400946, + ); + let r = i64x4::new( + -2474464942478687466, + 1027640603165319277, + 8552064293631354233, + 4842015271998822292, + ); + + assert_eq!( + r, + transmute(lasx_xvmadd_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmadd_d() { + let a = i64x4::new( + -8550317712350613337, + 8202606384933985240, + 5087434227784990050, + -1267807070683885625, + ); + let b = i64x4::new( + 802127189675314302, + 3753081308686166762, + -8729512035384580104, + -6163460252766523953, + ); + let c = i64x4::new( + 9117516500379534748, + 7040045067230881407, + -6924119543016236368, + -3601551888108100797, + ); + let r = i64x4::new( + 74735811180856175, + -6992817346463866386, + -821701661344765982, + -5913164195617334796, + ); + + assert_eq!( + r, + transmute(lasx_xvmadd_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmsub_b() { + let a = i8x32::new( + 41, 66, 49, 41, -31, 101, 127, 22, -98, 62, 39, -62, -91, 97, 100, 46, 4, 17, 71, 25, + 127, 34, 34, -64, 56, -11, 109, -98, 39, -34, -124, -56, + ); + let b = i8x32::new( + -126, 107, 108, -102, -4, -15, -17, -100, 43, 106, -14, -106, -108, 12, 54, 116, -15, + -102, 74, 95, -5, -115, 63, 100, -47, -1, 43, -111, 18, -6, -33, -59, + ); + let c = i8x32::new( + -12, -61, 80, 77, 76, 74, -19, -82, 43, -87, 110, -104, 33, -78, -99, -79, 24, -83, -6, + 122, -25, -80, -114, 88, 127, -19, 122, -59, 54, 43, 103, 122, + ); + let r = i64x4::new( + 1025900500437025089, + -412631794493733787, + 6931094814234771308, + -1816111343100501367, + ); + + assert_eq!( + r, + transmute(lasx_xvmsub_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmsub_h() { + let a = i16x16::new( + 26038, 237, 16351, -25337, -23596, 9950, 32416, -11130, -4158, -30128, 4774, -23969, + 18009, 9294, -3126, -30265, + ); + let b = i16x16::new( + -31480, 9797, -14893, 24037, 11613, 4212, 22821, 26358, -744, -21778, -26335, 25179, + -6708, -1235, -24224, 19814, + ); + let c = i16x16::new( + -26405, -560, -18771, -10193, -26133, 18220, 11977, 15766, 19965, 5097, 6382, -14160, + 17216, 29647, -20172, -31904, + ); + let r = i64x4::new( + 2881334304583833566, + -2133902947871987083, + -1454770464836380918, + 5874888860683169625, + ); + + assert_eq!( + r, + transmute(lasx_xvmsub_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmsub_w() { + let a = i32x8::new( + -1934260879, + 1181160590, + -1986745, + -225146926, + 599588188, + 1708212146, + -1981989107, + 1701829445, + ); + let b = i32x8::new( + -763566835, + 214100032, + -67293570, + 1596390731, + -1705509662, + -1061894423, + -18782985, + 1095295438, + ); + let c = i32x8::new( + 333156491, + -310224012, + -1373786280, + 699045355, + 681377550, + -1946631976, + 1564749118, + 996805551, + ); + let r = i64x4::new( + 362284194097715042, + -5652196781102231049, + 243945460745636608, + -6224637193866223557, + ); + + assert_eq!( + r, + transmute(lasx_xvmsub_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmsub_d() { + let a = i64x4::new( + -3841665993514658557, + 6022894223412086471, + -8518556207745298564, + -1430476343179717412, + ); + let b = i64x4::new( + 7897629235985733517, + 228540188827833305, + -8463927364436887671, + -8371521766374880332, + ); + let c = i64x4::new( + -4481659901844799958, + -4869069543228428543, + -327735423889799522, + -3356219160756661306, + ); + let r = i64x4::new( + 7809193441161400801, + 2981175878869326830, + 2247972583277073134, + -8100971496301761628, + ); + + assert_eq!( + r, + transmute(lasx_xvmsub_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvdiv_b() { + let a = i8x32::new( + 2, 48, -45, 96, 6, -14, 2, -26, -29, 13, -116, -94, -82, 97, -85, 21, -74, -3, -122, + -75, -114, -79, -14, -42, -40, -66, 107, 72, 117, -23, 55, 11, + ); + let b = i8x32::new( + -113, -102, -25, 23, 113, -81, -87, 61, -8, 115, 14, -87, -39, -62, -33, 117, -111, + 123, 30, 85, -119, -89, 37, 68, 93, 36, 94, 79, -50, 110, -128, -128, + ); + let r = i64x4::new(67174400, 843334041468931, 16515072, 1090921824000); + + assert_eq!(r, transmute(lasx_xvdiv_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvdiv_h() { + let a = i16x16::new( + -12734, -9855, -5625, -19685, -5760, 20073, -4828, 32152, -17118, -23694, 12801, + -32702, -21927, 29064, -255, 24493, + ); + let b = i16x16::new( + 5202, -19363, -28050, 14286, -31733, 14009, 1475, 5279, -16963, -26208, -32414, 583, + -21866, -8394, -11158, -24288, + ); + let r = i64x4::new( + -281474976645122, + 1970311952138240, + -15762598695796735, + -281470681939967, + ); + + assert_eq!(r, transmute(lasx_xvdiv_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvdiv_w() { + let a = i32x8::new( + -1639036870, + 1679737548, + -1853446119, + 1425169187, + 709689254, + 1564169372, + -368472440, + 754854064, + ); + let b = i32x8::new( + 809279458, + -211299601, + 1005342056, + 1721341232, + -194511872, + 199704853, + -196761589, + -1316660885, + ); + let r = i64x4::new(-25769803778, 4294967295, 34359738365, 1); + + assert_eq!(r, transmute(lasx_xvdiv_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvdiv_d() { + let a = i64x4::new( + -7822845930831810797, + 4993735058150674767, + 7948083854887733828, + -5125159230108645154, + ); + let b = i64x4::new( + 2343656432981471704, + -7268480484218017416, + -2152977508876073544, + -6907442353788163718, + ); + let r = i64x4::new(-3, 0, -3, 0); + + assert_eq!(r, transmute(lasx_xvdiv_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvdiv_bu() { + let a = u8x32::new( + 40, 120, 155, 70, 202, 73, 51, 248, 122, 27, 98, 122, 31, 221, 63, 177, 129, 222, 159, + 41, 95, 74, 144, 15, 252, 14, 101, 220, 155, 209, 168, 214, + ); + let b = u8x32::new( + 105, 3, 186, 90, 103, 16, 157, 200, 195, 15, 101, 16, 92, 118, 205, 221, 131, 139, 234, + 115, 14, 110, 40, 173, 4, 100, 228, 49, 164, 68, 238, 100, + ); + let r = i64x4::new( + 72061996379416576, + 1099629068544, + 844450699936000, + 144118486677848127, + ); + + assert_eq!(r, transmute(lasx_xvdiv_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvdiv_hu() { + let a = u16x16::new( + 50698, 15156, 21232, 20163, 45596, 12286, 58595, 95, 55092, 17141, 32523, 54385, 48523, + 48676, 43699, 52279, + ); + let b = u16x16::new( + 11498, 6508, 15832, 27488, 24369, 64684, 6317, 20994, 2748, 14521, 46887, 35685, 40979, + 25137, 94, 32966, + ); + let r = i64x4::new(4295098372, 38654705665, 281474976776212, 283467841601537); + + assert_eq!(r, transmute(lasx_xvdiv_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvdiv_wu() { + let a = u32x8::new( + 2271275962, 1878803191, 1899241851, 435455463, 2545672438, 1798262264, 2100509405, + 2360750144, + ); + let b = u32x8::new( + 4032427811, 1883431317, 1741576561, 2070639342, 54934516, 2950464411, 621309259, + 1280987465, + ); + let r = i64x4::new(0, 1, 46, 4294967299); + + assert_eq!(r, transmute(lasx_xvdiv_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvdiv_du() { + let a = u64x4::new( + 275328165009035219, + 4227696010240224586, + 8090530403053432892, + 18434063998903182990, + ); + let b = u64x4::new( + 5339394187150320758, + 10250881649499684594, + 7311272300344996355, + 2859467035949281895, + ); + let r = i64x4::new(0, 0, 1, 6); + + assert_eq!(r, transmute(lasx_xvdiv_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhaddw_h_b() { + let a = i8x32::new( + -5, 56, 50, 120, 77, -103, 42, -127, 8, 14, 21, 38, 52, -56, 89, 77, 35, -121, 96, + -122, -68, 11, 79, -97, 3, 75, -125, 100, -38, 16, 97, -27, + ); + let b = i8x32::new( + 111, -97, -90, 28, -46, -48, -5, -21, -82, -34, 99, 31, -37, -82, 19, -57, -101, 13, + 47, 8, 125, 38, 118, -109, -122, -71, 47, -65, -74, -3, -41, 82, + ); + let r = i64x4::new( + -36873861897256793, + 27302673318019004, + 5911562916593442, + -18859072538017839, + ); + + assert_eq!(r, transmute(lasx_xvhaddw_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhaddw_w_h() { + let a = i16x16::new( + 503, 16837, 17816, -5134, -2110, 16197, 4755, 25985, 3954, -31560, 16582, 19389, + -15163, 24197, -23773, -18386, + ); + let b = i16x16::new( + -23093, -2745, 8695, 3948, 29248, 22668, 15341, -17908, 18023, -1280, 5749, -6270, + 2684, 12529, 9865, -12718, + ); + let r = i64x4::new( + 15298673502096, + 177493818519941, + 107971182840607, + -36597416302335, + ); + + assert_eq!(r, transmute(lasx_xvhaddw_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhaddw_d_w() { + let a = i32x8::new( + 1750963922, + 584909082, + 1421536823, + -1912125255, + -1415675154, + -950003373, + 85319168, + -762670446, + ); + let b = i32x8::new( + 459045461, + -2028594364, + 1976546319, + -755242326, + -53664060, + 861552329, + 642848731, + -407580162, + ); + let r = i64x4::new(1043954543, 64421064, -1003667433, -119821715); + + assert_eq!(r, transmute(lasx_xvhaddw_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhaddw_hu_bu() { + let a = u8x32::new( + 38, 74, 29, 69, 140, 185, 4, 140, 17, 27, 252, 79, 243, 186, 145, 220, 13, 122, 179, + 16, 98, 184, 199, 160, 74, 126, 80, 155, 7, 140, 148, 161, + ); + let b = u8x32::new( + 133, 115, 144, 226, 30, 38, 232, 188, 154, 67, 7, 165, 19, 149, 99, 178, 168, 65, 209, + 54, 133, 14, 77, 82, 70, 34, 115, 197, 56, 192, 38, 122, + ); + let r = i64x4::new( + 104709614768292047, + 89791398044631221, + 66710930999804194, + 56014362196705476, + ); + + assert_eq!(r, transmute(lasx_xvhaddw_hu_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhaddw_wu_hu() { + let a = u16x16::new( + 63778, 40631, 16392, 22225, 8863, 7513, 8207, 22318, 52096, 47974, 5062, 54405, 51728, + 26552, 52537, 29064, + ); + let b = u16x16::new( + 13712, 64264, 56403, 59007, 46671, 35207, 62888, 11353, 49037, 2930, 56459, 32449, + 28370, 14428, 62265, 12050, + ); + let r = i64x4::new( + 337704688604231, + 365956983477160, + 476157254400755, + 392255068231306, + ); + + assert_eq!(r, transmute(lasx_xvhaddw_wu_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhaddw_du_wu() { + let a = u32x8::new( + 3700951359, 1340423021, 2816770908, 613522875, 1598890202, 536370888, 825435814, + 1465472531, + ); + let b = u32x8::new( + 1643146315, 730247298, 3900765507, 744547675, 1943326068, 179507092, 214959309, + 1444692790, + ); + let r = i64x4::new(2983569336, 4514288382, 2479696956, 1680431840); + + assert_eq!(r, transmute(lasx_xvhaddw_du_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhsubw_h_b() { + let a = i8x32::new( + -110, 85, -53, -96, -5, 14, -71, 50, -128, -83, 57, -86, 65, 24, 32, -119, 59, -41, + -85, 22, -67, -124, -126, -18, 54, -36, 103, 81, 116, -79, -55, -52, + ); + let b = i8x32::new( + -15, -92, 68, 76, -101, -42, -21, -32, -36, 23, -114, -76, 40, 19, 111, -124, -29, + -110, -123, -123, 24, 35, 126, 25, -14, 6, -91, 78, 49, -69, 27, -22, + ); + let r = i64x4::new( + 19985221551915108, + -64457838384316463, + -40251557315215372, + -21955597927907350, + ); + + assert_eq!(r, transmute(lasx_xvhsubw_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhsubw_w_h() { + let a = i16x16::new( + 32475, -17580, 4965, -21648, -16988, -15947, 18483, -27381, -26195, 19027, 19784, + -13358, -6180, 27442, 23283, 1155, + ); + let b = i16x16::new( + 7640, 26084, 32525, 1062, -7851, 17013, -8159, 21593, 32263, -22862, 17816, 30577, + -11674, 14875, 26487, -22021, + ); + let r = i64x4::new( + -232666968384132, + -82553566404512, + -133887015531444, + -108800111503156, + ); + + assert_eq!(r, transmute(lasx_xvhsubw_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhsubw_d_w() { + let a = i32x8::new( + 1120555405, 606416783, 1862962829, 65716515, -720291245, 1995296165, 1877873639, + 383778576, + ); + let b = i32x8::new( + -2142481365, + -2015795383, + 110862808, + 1067722925, + 1036379333, + 1746215780, + -901547317, + -304263170, + ); + let r = i64x4::new(2748898148, -45146293, 958916832, 1285325893); + + assert_eq!(r, transmute(lasx_xvhsubw_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhsubw_hu_bu() { + let a = u8x32::new( + 113, 29, 201, 242, 134, 250, 176, 112, 14, 192, 71, 63, 59, 39, 230, 197, 232, 110, 2, + 134, 244, 44, 110, 200, 209, 99, 15, 169, 39, 126, 139, 207, + ); + let b = u8x32::new( + 235, 233, 194, 214, 34, 190, 122, 157, 241, 119, 67, 242, 183, 26, 163, 208, 6, 32, + 249, 49, 62, 56, 64, 107, 68, 140, 184, 157, 27, 232, 174, 226, + ); + let r = i64x4::new( + -2813822050959566, + 9851010004352975, + 38561998787379304, + 9289103727198239, + ); + + assert_eq!(r, transmute(lasx_xvhsubw_hu_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhsubw_wu_hu() { + let a = u16x16::new( + 24627, 1925, 40631, 41120, 48598, 56441, 57360, 63413, 60803, 9134, 1910, 34890, 8361, + 20497, 16343, 44260, + ); + let b = u16x16::new( + 63771, 7054, 62761, 8243, 13185, 3930, 52006, 48295, 37094, 2357, 31496, 1199, 13321, + 56020, 36805, 30263, + ); + let r = i64x4::new( + -92943092347286, + 48992691988728, + 14581413941960, + 32018981198856, + ); + + assert_eq!(r, transmute(lasx_xvhsubw_wu_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhsubw_du_wu() { + let a = u32x8::new( + 1851655538, 2991049929, 4109504012, 1371213815, 2264711690, 1359668665, 2742473455, + 1279993359, + ); + let b = u32x8::new( + 4047783060, 556492643, 3984363807, 4250070195, 975052988, 1299555592, 2868269900, + 2929723348, + ); + let r = i64x4::new(-1056733131, -2613149992, 384615677, -1588276541); + + assert_eq!(r, transmute(lasx_xvhsubw_du_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmod_b() { + let a = i8x32::new( + -14, -64, -81, 32, -14, -85, 120, 64, 95, 126, -11, 38, 2, -53, 40, 54, -35, 41, 58, + -60, 86, -9, 57, -11, 34, -17, -81, 89, -55, 25, 84, -101, + ); + let b = i8x32::new( + -98, -114, 25, 100, -111, 71, 35, 63, -23, 3, 93, -41, -3, -48, 91, 95, 98, 92, -113, + -82, -81, 121, -35, 73, -83, -95, 75, 65, 26, 60, -124, -5, + ); + let r = i64x4::new( + 76546840437899506, + 3902645063778631683, + -786169480790529571, + -48385121157714142, + ); + + assert_eq!(r, transmute(lasx_xvmod_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmod_h() { + let a = i16x16::new( + 13568, -26495, 27958, 11226, -17868, -9288, -10627, -29659, -16286, -27756, 22645, + -14990, 1109, 782, 5976, -13268, + ); + let b = i16x16::new( + 22907, -30762, -26890, -2623, -3889, -8952, 27558, -27225, -1007, -2649, -19000, -1212, + 3583, -14136, -1124, 6289, + ); + let r = i64x4::new( + 206607222489298176, + -684874256681470216, + -125522180245094574, + -194216204870745003, + ); + + assert_eq!(r, transmute(lasx_xvmod_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmod_w() { + let a = i32x8::new( + 1309045772, + -1137265851, + -1474148809, + -826641461, + 517262391, + -454945903, + -2059227752, + 1033836629, + ); + let b = i32x8::new( + 1742453362, -859625876, 711512169, 963835525, 1823286802, 1062091570, 1215420851, + -845753957, + ); + let r = i64x4::new( + -1192454611378211828, + -3550398036268816631, + -1953977774316925897, + 807808928635455307, + ); + + assert_eq!(r, transmute(lasx_xvmod_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmod_d() { + let a = i64x4::new( + 1878041523555568774, + 1556025246870009445, + 8042729508142516845, + -3048989907394276239, + ); + let b = i64x4::new( + 4139731099187900579, + -5256541293724606275, + -289001035147795771, + -6358290177153594057, + ); + let r = i64x4::new( + 1878041523555568774, + 1556025246870009445, + 239701559152031028, + -3048989907394276239, + ); + + assert_eq!(r, transmute(lasx_xvmod_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmod_bu() { + let a = u8x32::new( + 124, 195, 23, 51, 29, 150, 162, 114, 37, 233, 71, 130, 185, 243, 82, 178, 55, 114, 198, + 194, 51, 128, 183, 135, 254, 147, 93, 254, 157, 231, 225, 75, + ); + let b = u8x32::new( + 4, 234, 86, 5, 151, 127, 208, 171, 229, 154, 21, 203, 87, 142, 153, 152, 109, 75, 195, + 182, 135, 251, 242, 45, 15, 229, 168, 223, 89, 83, 178, 220, + ); + let r = i64x4::new( + 8260190079890735872, + 1896689493177028389, + 51650877471270711, + 5417620637589803790, + ); + + assert_eq!(r, transmute(lasx_xvmod_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmod_hu() { + let a = u16x16::new( + 59302, 64062, 17665, 34634, 39674, 40771, 56476, 39054, 20128, 46806, 28975, 5092, + 32039, 65514, 52991, 10995, + ); + let b = u16x16::new( + 30365, 10559, 8088, 37622, 54157, 864, 21095, 43558, 39181, 49555, 45853, 63130, 49482, + 1077, 5568, 1505, + ); + let r = i64x4::new( + -8698133335059959543, + -7453958975338079494, + 1433395031155560096, + 129490854556368167, + ); + + assert_eq!(r, transmute(lasx_xvmod_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmod_wu() { + let a = u32x8::new( + 2536195964, 1025991305, 145727133, 1179968501, 2535376324, 2624321769, 500804646, + 3445505165, + ); + let b = u32x8::new( + 4283722185, 726568518, 2648066980, 2591107739, 3836915245, 1768721904, 1082904228, + 128214904, + ); + let r = i64x4::new( + 1286011080378369916, + 5067926122250870429, + 3674773441172391364, + 480682694340619302, + ); + + assert_eq!(r, transmute(lasx_xvmod_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmod_du() { + let a = u64x4::new( + 3050922509882516945, + 14221067967600195195, + 8310753426098198776, + 150087784552479859, + ); + let b = u64x4::new( + 9108987739022803721, + 14892726191598876390, + 10175125705243076843, + 8880022576671073801, + ); + let r = i64x4::new( + 3050922509882516945, + -4225676106109356421, + 8310753426098198776, + 150087784552479859, + ); + + assert_eq!(r, transmute(lasx_xvmod_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrepl128vei_b() { + let a = i8x32::new( + 14, 7, 83, 99, -72, -90, 66, -53, 33, 27, -21, 110, -96, -58, -96, 54, -73, 74, -33, + 51, -15, -108, -39, 124, 124, -74, -17, -17, -41, 84, 46, -73, + ); + let r = i64x4::new( + 2387225703656530209, + 2387225703656530209, + 8970181431921507452, + 8970181431921507452, + ); + + assert_eq!(r, transmute(lasx_xvrepl128vei_b::<8>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrepl128vei_h() { + let a = i16x16::new( + 2674, -3702, -21458, 12674, 26270, 949, -26647, 9913, 30933, 30654, -32697, -13873, + 16165, -5608, 18102, -20233, + ); + let r = i64x4::new( + 3567468290076979586, + 3567468290076979586, + -3904680457625679409, + -3904680457625679409, + ); + + assert_eq!(r, transmute(lasx_xvrepl128vei_h::<3>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrepl128vei_w() { + let a = i32x8::new( + -64196701, + 1709481199, + -1911955655, + 1777845271, + 1233260806, + -309058551, + -557473503, + -1179212061, + ); + let r = i64x4::new( + 7342165844541349103, + 7342165844541349103, + -1327396365108239351, + -1327396365108239351, + ); + + assert_eq!(r, transmute(lasx_xvrepl128vei_w::<1>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrepl128vei_d() { + let a = i64x4::new( + 5505097689447100650, + -5456987454315761481, + 4427502889722976813, + 8082072270131265608, + ); + let r = i64x4::new( + 5505097689447100650, + 5505097689447100650, + 4427502889722976813, + 4427502889722976813, + ); + + assert_eq!(r, transmute(lasx_xvrepl128vei_d::<0>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickev_b() { + let a = i8x32::new( + 68, 32, 62, -48, -57, 81, -17, -49, 89, 83, 84, -17, -84, 27, 125, 34, 45, 22, -76, + -126, -58, -15, 52, 46, -101, -120, -128, -63, 125, -119, 62, -25, + ); + let b = i8x32::new( + -18, 6, -55, 4, 74, 5, 59, 34, 92, 70, 29, -38, 91, 22, 15, 54, 5, -31, -103, -121, + -83, 48, -87, -100, 69, 89, -111, -61, 66, 85, 5, 122, + ); + let r = i64x4::new( + 1106510415418542574, + 9055705695986859588, + 379025047038040325, + 4502896606534087725, + ); + + assert_eq!(r, transmute(lasx_xvpickev_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickev_h() { + let a = i16x16::new( + 13779, -9769, -21673, -32164, -29136, -24643, -35, -10237, -15874, -1630, -366, -22027, + -18176, 10211, -7522, 20788, + ); + let b = i16x16::new( + 16573, -27194, 21452, -4952, 10891, -6280, -31016, -14088, -21903, -8934, 20641, 23162, + -12223, 6236, -15855, -20126, + ); + let r = i64x4::new( + -8730181099762990915, + -9695284500679213, + -4462556776803227023, + -2117051360895385090, + ); + + assert_eq!(r, transmute(lasx_xvpickev_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickev_w() { + let a = i32x8::new( + -946752951, + -207147822, + -193366329, + -1481453777, + -750923229, + -575660669, + -1037215364, + 1221718353, + ); + let b = i32x8::new( + -1468110932, + -1007107613, + 1371137124, + 1715394094, + -920814431, + 907354058, + 597912747, + 1796030124, + ); + let r = i64x4::new( + 5888989108738353068, + -830502055854362039, + 2568015697600674977, + -4454806063744691677, + ); + + assert_eq!(r, transmute(lasx_xvpickev_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickev_d() { + let a = i64x4::new( + -94428288198650872, + 4107006669052123351, + 1952973857169882715, + -3468095864189526981, + ); + let b = i64x4::new( + -2104254403922616194, + -5215534061403539132, + 4917599455110663395, + -3171208575864229825, + ); + let r = i64x4::new( + -2104254403922616194, + -94428288198650872, + 4917599455110663395, + 1952973857169882715, + ); + + assert_eq!(r, transmute(lasx_xvpickev_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickod_b() { + let a = i8x32::new( + -56, -8, -6, -10, 108, -8, 122, 120, -75, -26, -47, 2, -35, -87, -61, 70, -24, -48, + 125, 19, -66, 42, -2, -49, -94, -84, -63, 74, -45, -54, -120, 56, + ); + let b = i8x32::new( + -65, -120, -46, -90, -108, -41, -28, -32, -125, -114, -59, 122, -3, 76, -67, -50, -59, + -94, 83, 122, -100, 12, -81, -57, 6, 29, 6, 85, -94, -36, -30, -43, + ); + let r = i64x4::new( + -3581352849590212984, + 5091604042614372088, + -3036458462372660574, + 4092165317489988560, + ); + + assert_eq!(r, transmute(lasx_xvpickod_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickod_h() { + let a = i16x16::new( + -31000, 26625, -24749, -26219, 27675, -16099, 12139, 4936, 17198, 8639, 15258, 14842, + -6785, 3344, 2053, 21006, + ); + let b = i16x16::new( + -1278, -30287, -424, 21484, 7821, 21393, 23139, -7886, 2473, 16757, -29424, 14324, + 15035, 18736, -9314, 7772, + ); + let r = i64x4::new( + -2219619782696859215, + 1389572817918715905, + 2187703990441230709, + 5912677724127371711, + ); + + assert_eq!(r, transmute(lasx_xvpickod_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickod_w() { + let a = i32x8::new( + 2143199982, + -991627533, + 1630737785, + -175139906, + -976073052, + -1793301951, + -834831207, + 3306425, + ); + let b = i32x8::new( + 1564508527, + 626529718, + 264606833, + -1943354886, + 1166719003, + -869473680, + 1896581238, + -1078061273, + ); + let r = i64x4::new( + -8346645679265278538, + -752220165191174413, + -4630237907193634192, + 14200989743342145, + ); + + assert_eq!(r, transmute(lasx_xvpickod_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickod_d() { + let a = i64x4::new( + 4767160600123418734, + 8001080746285135394, + -2817760190229042067, + 3923084493864153244, + ); + let b = i64x4::new( + -3317389585990069371, + 8793937455278562227, + 7703929803523851571, + 5524330706927878132, + ); + let r = i64x4::new( + 8793937455278562227, + 8001080746285135394, + 5524330706927878132, + 3923084493864153244, + ); + + assert_eq!(r, transmute(lasx_xvpickod_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvilvh_b() { + let a = i8x32::new( + -72, 73, -43, 126, -52, 83, 85, -79, -99, 67, 27, 28, 39, -21, -74, -30, 61, 83, 80, + -18, 48, 18, 55, 82, 107, -26, -7, 17, 91, -87, 97, 84, + ); + let b = i8x32::new( + -3, -33, -12, -52, 73, 87, -102, -3, -114, -95, -78, 65, -102, 36, 40, 102, 102, 115, + 48, -41, 109, -110, -6, 9, -8, 86, 119, -37, 25, 96, 23, 62, + ); + let r = i64x4::new( + 2035938959000968590, + -2132817086653388902, + 1286896411905256440, + 6070396101995813657, + ); + + assert_eq!(r, transmute(lasx_xvilvh_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvilvh_h() { + let a = i16x16::new( + -28753, 23947, 10110, -8166, 18168, -1619, 12029, 10309, 22060, -11658, 8123, 22354, + 23552, 27450, -16412, 24672, + ); + let b = i16x16::new( + -31442, 23864, 15251, -12304, -23752, -1685, -10720, 21446, 19318, 27618, 10892, -9393, + -29179, 13870, 16716, 10233, + ); + let r = i64x4::new( + -455433748147035336, + 2901817645567170080, + 7726547683447442949, + 6944594579025051980, + ); + + assert_eq!(r, transmute(lasx_xvilvh_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvilvh_w() { + let a = i32x8::new( + 678797694, + -1852295486, + -632882964, + -375269950, + 1655683337, + 562516909, + -759600517, + 595568887, + ); + let b = i32x8::new( + -2114925053, + 1623015448, + -398485927, + -271020427, + -284878929, + -1558239614, + -902548533, + 1778292534, + ); + let r = i64x4::new( + -2718211628679063975, + -1611772158397608331, + -3262459375147273269, + 2557948893958412086, + ); + + assert_eq!(r, transmute(lasx_xvilvh_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvilvh_d() { + let a = i64x4::new( + -5521345585808929096, + -2494281556296927351, + 2989419257337371241, + -1576924492614617443, + ); + let b = i64x4::new( + -7666029279891695247, + -1067545656448973211, + 7271996920619620214, + -3924745280397255469, + ); + let r = i64x4::new( + -1067545656448973211, + -2494281556296927351, + -3924745280397255469, + -1576924492614617443, + ); + + assert_eq!(r, transmute(lasx_xvilvh_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvilvl_b() { + let a = i8x32::new( + -79, -60, -80, 23, 8, 83, -52, -72, 18, 98, 69, -81, -15, -95, 68, -38, 108, -9, -95, + 110, 63, -24, -106, -24, 78, -109, 117, 10, 36, 13, -9, -70, + ); + let b = i8x32::new( + -4, -37, -54, -19, 91, 52, 111, -6, 23, 24, 50, 18, 58, 109, 35, -89, -55, -31, 21, + -28, 76, 16, -53, -16, 73, 97, -99, 70, 75, -124, 75, 70, + ); + let r = i64x4::new( + 1724228617285382652, + -5117553248043792293, + 7990688754587233481, + -1661662459983806644, + ); + + assert_eq!(r, transmute(lasx_xvilvl_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvilvl_h() { + let a = i16x16::new( + 16116, 7715, 3432, 24398, -2759, -24490, -19436, 8863, -24282, 23416, -26870, -3179, + -23599, -9862, 20524, 10277, + ); + let b = i16x16::new( + -29120, 15023, -2814, 7040, -19198, -5516, 30715, 18311, -1346, 32030, -17709, -30250, + 21978, 26007, -6093, 28687, + ); + let r = i64x4::new( + 2171643969672613440, + 6867456718581331202, + 6591155625162898110, + -894657396213105965, + ); + + assert_eq!(r, transmute(lasx_xvilvl_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvilvl_w() { + let a = i32x8::new( + 1489997232, + 1342252220, + 136381167, + 288285197, + -1772559171, + 1615944068, + 1604328217, + -70958228, + ); + let b = i32x8::new( + -794555105, + 44816804, + 2089609888, + 313909292, + 2017363432, + -1414750261, + 1773836405, + 138829633, + ); + let r = i64x4::new( + 6399489386070936863, + 5764929387928213924, + -7613083667652508184, + 6940426927105417163, + ); + + assert_eq!(r, transmute(lasx_xvilvl_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvilvl_d() { + let a = i64x4::new( + 2785967349713819381, + 4295622653064831557, + -2688716944239585727, + 1495201372757695383, + ); + let b = i64x4::new( + -6882080563044023861, + 8040350606767129885, + 9211364387423765025, + -7760991016985753125, + ); + let r = i64x4::new( + -6882080563044023861, + 2785967349713819381, + 9211364387423765025, + -2688716944239585727, + ); + + assert_eq!(r, transmute(lasx_xvilvl_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpackev_b() { + let a = i8x32::new( + 34, -14, -37, 93, 107, -43, -84, 47, -2, 72, -44, -4, -21, -45, 91, 44, -67, 47, 78, + -88, -77, 54, -48, -4, -115, 28, 45, -112, -16, -93, -125, 86, + ); + let b = i8x32::new( + 45, -46, 115, 63, -60, -89, 34, 1, -32, 96, -41, -112, 72, 24, 68, 64, 65, -60, 104, + -83, -54, 125, -86, 98, -18, -128, 68, -66, -17, 92, 8, 64, + ); + let r = i64x4::new( + -6043149256738266579, + 6576640053908864736, + -3410716086299476671, + -9004682544879989266, + ); + + assert_eq!(r, transmute(lasx_xvpackev_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpackev_h() { + let a = i16x16::new( + -31926, 14925, 3993, -25807, -28395, 26414, 8241, 24589, -2983, -24679, 19318, 9614, + 10323, 27545, -18762, -18536, + ); + let b = i16x16::new( + -7985, 4641, -22978, 7805, 3248, 14824, -30918, 8002, 2172, -19190, -6029, 4840, 24125, + 16864, 9543, -919, + ); + let r = i64x4::new( + 1124112369426555087, + 2319783968684444848, + 5437789184814811260, + -5280992525495869891, + ); + + assert_eq!(r, transmute(lasx_xvpackev_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpackev_w() { + let a = i32x8::new( + -332151772, + 1303690878, + 1282065842, + -1700272560, + -443102472, + 2142454870, + 78857966, + -1548128347, + ); + let b = i32x8::new( + -804493639, + 452785364, + -1917157806, + -914796730, + -2002581887, + -390090579, + 927546388, + 154785025, + ); + let r = i64x4::new( + -1426580994557974855, + 5506430865086512722, + -1903110623724370303, + 338692385926626324, + ); + + assert_eq!(r, transmute(lasx_xvpackev_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpackev_d() { + let a = i64x4::new( + 6553071732696091666, + 6908931613033995721, + -3601691172781761847, + -4565881074922016381, + ); + let b = i64x4::new( + -4424638855877852796, + -3616236802390284562, + -8253892234265412575, + 6668303162003192752, + ); + let r = i64x4::new( + -4424638855877852796, + 6553071732696091666, + -8253892234265412575, + -3601691172781761847, + ); + + assert_eq!(r, transmute(lasx_xvpackev_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpackod_b() { + let a = i8x32::new( + 62, -60, -127, 84, -107, -106, -66, -119, -110, 28, 57, 97, 19, 34, -37, -7, -42, -117, + 104, -27, 81, 106, -19, 80, -20, 127, -104, 54, -37, 108, -37, 51, + ); + let b = i8x32::new( + -126, 96, -65, -4, 53, 69, -10, -33, 102, 21, -35, 115, -63, 15, -13, -3, 25, 100, 22, + -95, -81, 17, -18, 101, -67, -115, 82, 4, 123, -94, 98, 91, + ); + let r = i64x4::new( + -8511919546184186784, + -433152539702911979, + 5793153120781568868, + 3700670962761760653, + ); + + assert_eq!(r, transmute(lasx_xvpackod_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpackod_h() { + let a = i16x16::new( + -15659, -944, 746, -2159, -14115, 32333, 7687, 7300, 16484, -5418, 17483, -23753, + -11433, 8096, 6365, -19623, + ); + let b = i16x16::new( + -16063, 24227, 15870, -31985, -14423, 10575, -5597, -29174, 8408, 3527, 9997, 27250, + 16855, -32478, -12854, 24292, + ); + let r = i64x4::new( + -607560370037432669, + 2054923505707592015, + -6685758080009499193, + -5523279134117035742, + ); + + assert_eq!(r, transmute(lasx_xvpackod_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpackod_w() { + let a = i32x8::new( + -842203551, + -1271389188, + -2068525802, + -1822181077, + -986051686, + -837897746, + 37690010, + -1697819510, + ); + let b = i32x8::new( + 224471764, + -768842241, + -1859806928, + 1498474664, + -223957810, + 2079941216, + -338745357, + -2090020855, + ); + let r = i64x4::new( + -5460574979421870593, + -7826208131606583128, + -3598743414382173600, + -7292079267755798519, + ); + + assert_eq!(r, transmute(lasx_xvpackod_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpackod_d() { + let a = i64x4::new( + -7495668983396862169, + 8274812346114337628, + 4379006400301575850, + -8628096693516187272, + ); + let b = i64x4::new( + -8614497367106654999, + -7004520942966577002, + 5232114663469258860, + 5306174777811604017, + ); + let r = i64x4::new( + -7004520942966577002, + 8274812346114337628, + 5306174777811604017, + -8628096693516187272, + ); + + assert_eq!(r, transmute(lasx_xvpackod_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvshuf_b() { + let a = i8x32::new( + 39, -115, -21, 29, -109, -123, 49, 7, 120, 96, 121, 123, -87, 122, -27, 5, -103, -90, + -93, 98, -37, -100, 93, 27, -86, 15, -22, -80, -5, -16, 124, 124, + ); + let b = i8x32::new( + -102, 106, 26, -77, 48, 65, 21, -98, 122, -73, 124, -79, 94, 69, 52, -84, -21, -99, + -41, 63, -91, 26, -63, 44, -37, -5, -99, 53, -126, -109, -61, -55, + ); + let c = i8x32::new( + 0, 27, 12, 22, 17, 20, 12, 27, 24, 7, 29, 9, 30, 3, 21, 25, 25, 15, 16, 11, 11, 12, 9, + 11, 29, 16, 7, 30, 18, 12, 8, 10, + ); + let r = i64x4::new( + 8889704949103885210, + 6955162998750748280, + 3889845868208703759, + -7071915151180654096, + ); + + assert_eq!( + r, + transmute(lasx_xvshuf_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvshuf_h() { + let a = i16x16::new(14, 0, 11, 10, 2, 6, 5, 1, 3, 12, 4, 7, 10, 8, 10, 4); + let b = i16x16::new( + -21254, 15426, -9904, -9348, 19843, 4700, -18790, 16378, -12463, 13093, 1534, -947, + -22603, -31524, -24301, -13577, + ); + let c = i16x16::new( + -6824, -21705, 6609, -73, 752, 8612, -13615, 29408, 31778, -1056, 20474, 23005, -10590, + 8605, -3153, 16014, + ); + let r = i64x4::new( + -2787486839872112998, + -6109377377843734063, + 4507776271131171293, + -2980813411407821314, + ); + + assert_eq!( + r, + transmute(lasx_xvshuf_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvshuf_w() { + let a = i32x8::new(6, 4, 1, 5, 3, 0, 3, 2); + let b = i32x8::new( + 112260284, + 143215906, + -519532509, + 2126848278, + -1874926296, + 888441697, + -716493665, + -1989603791, + ); + let c = i32x8::new( + 174486498, + 1186503117, + -1753459384, + 1078106035, + -2055158107, + 2071085725, + 1120609144, + -109951450, + ); + let r = i64x4::new( + 482154252195106851, + 615107633723513293, + -8826836853489252826, + 4812979629263570470, + ); + + assert_eq!( + r, + transmute(lasx_xvshuf_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvshuf_d() { + let a = i64x4::new(0, 1, 2, 3); + let b = i64x4::new( + -4818789571452434899, + -1419914372991806078, + -1036924962456047190, + 5694315469710360861, + ); + let c = i64x4::new( + 6580926913588532380, + -6246203397488305553, + -6030997396381573391, + -9089767205636240503, + ); + let r = i64x4::new( + 6580926913588532380, + -6246203397488305553, + -1036924962456047190, + 5694315469710360861, + ); + + assert_eq!( + r, + transmute(lasx_xvshuf_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvand_v() { + let a = u8x32::new( + 90, 203, 15, 155, 63, 105, 53, 48, 190, 209, 178, 76, 210, 20, 95, 140, 100, 15, 124, + 254, 188, 84, 233, 191, 139, 236, 35, 122, 198, 9, 3, 147, + ); + let b = u8x32::new( + 213, 245, 251, 19, 199, 6, 225, 234, 198, 129, 17, 8, 53, 155, 124, 177, 193, 194, 146, + 194, 233, 18, 7, 81, 49, 91, 33, 177, 131, 65, 221, 245, + ); + let r = i64x4::new( + 2315131713829454160, + -9197458677956574842, + 1225278890617864768, + -7998109804568426495, + ); + + assert_eq!(r, transmute(lasx_xvand_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvandi_b() { + let a = u8x32::new( + 76, 191, 179, 169, 134, 148, 220, 33, 48, 114, 218, 175, 149, 53, 89, 64, 173, 218, + 209, 46, 131, 153, 196, 101, 69, 5, 138, 207, 219, 29, 3, 11, + ); + let r = i64x4::new( + 2381282727478636300, + 2573978984653344, + 2667266788571548205, + 793492300495455493, + ); + + assert_eq!(r, transmute(lasx_xvandi_b::<47>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvor_v() { + let a = u8x32::new( + 125, 60, 243, 199, 224, 172, 254, 103, 105, 229, 245, 138, 160, 89, 141, 68, 218, 162, + 229, 242, 225, 91, 142, 124, 4, 158, 13, 29, 31, 24, 19, 236, + ); + let b = u8x32::new( + 61, 24, 19, 82, 93, 44, 145, 86, 125, 230, 60, 205, 17, 204, 228, 220, 145, 189, 138, + 34, 184, 52, 178, 93, 142, 223, 59, 0, 197, 149, 61, 209, + ); + let r = i64x4::new( + 8646820015824387197, + -2527120060116506755, + 9060820211815399387, + -198266276987019378, + ); + + assert_eq!(r, transmute(lasx_xvor_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvori_b() { + let a = u8x32::new( + 224, 64, 88, 211, 150, 151, 191, 121, 45, 29, 78, 44, 95, 182, 208, 27, 245, 89, 219, + 195, 171, 1, 240, 194, 102, 138, 54, 60, 40, 239, 106, 1, + ); + let r = i64x4::new( + 9079248013888353524, + 9220265364544191869, + -651766303824052747, + 8466485259632311926, + ); + + assert_eq!(r, transmute(lasx_xvori_b::<116>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvnor_v() { + let a = u8x32::new( + 76, 54, 61, 63, 251, 146, 243, 33, 217, 111, 210, 198, 26, 170, 74, 175, 96, 81, 208, + 187, 214, 194, 59, 158, 142, 191, 224, 234, 79, 178, 30, 115, + ); + let b = u8x32::new( + 188, 24, 29, 204, 122, 22, 58, 38, 82, 168, 2, 213, 73, 48, 85, 251, 211, 186, 195, 15, + 123, 225, 156, 253, 77, 213, 172, 132, 177, 163, 80, 23, + ); + let r = i64x4::new( + -2881062395696725757, + 45112567624699940, + 18045185911686156, + -8601510250130767824, + ); + + assert_eq!(r, transmute(lasx_xvnor_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvnori_b() { + let a = u8x32::new( + 111, 178, 133, 23, 105, 149, 64, 248, 248, 8, 96, 98, 70, 20, 213, 175, 56, 216, 223, + 118, 46, 113, 0, 12, 209, 39, 73, 77, 16, 194, 218, 171, + ); + let r = i64x4::new( + 440871273092500496, + 5767503740212762118, + 5935197095815284294, + 6053994920729270286, + ); + + assert_eq!(r, transmute(lasx_xvnori_b::<161>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvxor_v() { + let a = u8x32::new( + 126, 139, 80, 168, 116, 128, 183, 120, 15, 152, 183, 62, 51, 179, 32, 150, 207, 108, + 88, 207, 22, 73, 189, 112, 204, 236, 216, 24, 10, 70, 249, 168, + ); + let b = u8x32::new( + 3, 89, 57, 121, 152, 63, 89, 15, 254, 77, 130, 223, 192, 140, 229, 207, 202, 154, 208, + 62, 3, 30, 110, 85, 8, 137, 208, 97, 40, 65, 148, 234, + ); + let r = i64x4::new( + 8642055758817120893, + 6468646756475590129, + 2725617951247496709, + 4786489823605581252, + ); + + assert_eq!(r, transmute(lasx_xvxor_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvxori_b() { + let a = u8x32::new( + 36, 245, 58, 172, 188, 20, 51, 56, 127, 7, 39, 87, 209, 54, 137, 206, 217, 81, 137, 48, + 141, 135, 84, 138, 252, 157, 45, 234, 89, 34, 196, 168, + ); + let r = i64x4::new( + -8394526022023166313, + 9023671463178450124, + 4172361022876344938, + 1979210996964535887, + ); + + assert_eq!(r, transmute(lasx_xvxori_b::<179>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitsel_v() { + let a = u8x32::new( + 69, 119, 244, 57, 103, 127, 203, 220, 144, 88, 221, 99, 13, 153, 253, 10, 8, 78, 153, + 186, 144, 233, 66, 26, 137, 170, 201, 216, 251, 59, 188, 201, + ); + let b = u8x32::new( + 58, 118, 243, 153, 246, 176, 29, 116, 177, 226, 235, 9, 57, 218, 185, 77, 171, 107, + 162, 224, 75, 59, 187, 183, 56, 33, 90, 30, 188, 49, 190, 107, + ); + let c = u8x32::new( + 8, 253, 144, 97, 31, 113, 95, 153, 184, 212, 7, 183, 120, 52, 43, 202, 55, 34, 46, 82, + 88, 35, 171, 65, 101, 142, 107, 208, 15, 137, 143, 201, + ); + let r = i64x4::new( + 6097098147492034125, + 5259528428215584944, + 2011960906681118251, + 5313741768184438952, + ); + + assert_eq!( + r, + transmute(lasx_xvbitsel_v(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbitseli_b() { + let a = u8x32::new( + 178, 71, 136, 149, 190, 92, 86, 87, 135, 81, 18, 106, 61, 240, 71, 242, 187, 166, 218, + 183, 12, 80, 244, 242, 232, 140, 161, 227, 35, 23, 225, 97, + ); + let b = u8x32::new( + 173, 155, 189, 0, 17, 102, 85, 215, 175, 177, 175, 162, 203, 4, 46, 80, 41, 131, 12, + 130, 254, 191, 191, 230, 198, 211, 197, 37, 29, 13, 108, 138, + ); + let r = i64x4::new( + -7776240335059051363, + -8057901949774876500, + -7737254534663338600, + -8463358690923847794, + ); + + assert_eq!( + r, + transmute(lasx_xvbitseli_b::<156>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvshuf4i_b() { + let a = i8x32::new( + 108, -102, 33, -112, -6, -76, 115, -16, 40, -100, -76, 37, -61, -55, -102, 17, 25, 99, + 89, -78, 55, -35, 116, 64, 75, 14, -106, 67, -49, 18, -91, -41, + ); + let r = i64x4::new( + -5408624464691684710, + -3958160729736635236, + -2503757449887849629, + 1357573681433480718, + ); + + assert_eq!(r, transmute(lasx_xvshuf4i_b::<117>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvshuf4i_h() { + let a = i16x16::new( + -6971, -14860, 30437, 17998, 739, 5931, -29626, 13221, 14940, -31006, -17153, -20574, + 19219, 15653, -6222, 26534, + ); + let r = i64x4::new( + -4182640851919387148, + 1669484871499978539, + -8727220014624373022, + 4406041774853078309, + ); + + assert_eq!(r, transmute(lasx_xvshuf4i_h::<125>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvshuf4i_w() { + let a = i32x8::new( + -1698591186, + -189845668, + 1075366445, + -1020663141, + -48015581, + 913540401, + -1408537529, + 218710667, + ); + let r = i64x4::new( + 4618663713566149165, + -7295393590547476946, + -6049622619357221817, + -206225345846487261, + ); + + assert_eq!(r, transmute(lasx_xvshuf4i_w::<10>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplgr2vr_b() { + let r = i64x4::new( + 8463800222054970741, + 8463800222054970741, + 8463800222054970741, + 8463800222054970741, + ); + + assert_eq!(r, transmute(lasx_xvreplgr2vr_b(-139770763))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplgr2vr_h() { + let r = i64x4::new( + -1100020993973555013, + -1100020993973555013, + -1100020993973555013, + -1100020993973555013, + ); + + assert_eq!(r, transmute(lasx_xvreplgr2vr_h(-111546181))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplgr2vr_w() { + let r = i64x4::new( + -8112237653938959659, + -8112237653938959659, + -8112237653938959659, + -8112237653938959659, + ); + + assert_eq!(r, transmute(lasx_xvreplgr2vr_w(-1888777515))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplgr2vr_d() { + let r = i64x4::new( + -1472556476011894783, + -1472556476011894783, + -1472556476011894783, + -1472556476011894783, + ); + + assert_eq!(r, transmute(lasx_xvreplgr2vr_d(-1472556476011894783))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpcnt_b() { + let a = i8x32::new( + -78, -95, 2, -80, -45, 8, -113, 34, -100, -34, 69, 126, -9, -4, -51, 89, -32, 120, 99, + 84, 74, -26, -84, 118, -104, -104, -2, -10, 56, 17, 66, 116, + ); + let r = i64x4::new( + 145523683996271364, + 289644378270664196, + 361419380590117891, + 288795538114413315, + ); + + assert_eq!(r, transmute(lasx_xvpcnt_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpcnt_h() { + let a = i16x16::new( + 11626, 5283, -7476, -20299, -21862, -7933, -26579, 26723, -24113, 8952, 15751, -20804, + 3834, 23833, -21664, 23370, + ); + let r = i64x4::new( + 2251834173816840, + 1970354902138888, + 2814788422270985, + 2251829878980617, + ); + + assert_eq!(r, transmute(lasx_xvpcnt_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpcnt_w() { + let a = i32x8::new( + 769725316, + 1329443403, + 3455051, + -1024015807, + 1113804345, + 533788195, + 1478448269, + 663132689, + ); + let r = i64x4::new(77309411341, 60129542155, 73014444046, 55834574863); + + assert_eq!(r, transmute(lasx_xvpcnt_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpcnt_d() { + let a = i64x4::new( + -1195667126994002745, + 574485287218873120, + 4359670550805993357, + -166544779870738672, + ); + let r = i64x4::new(33, 31, 29, 33); + + assert_eq!(r, transmute(lasx_xvpcnt_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvclo_b() { + let a = i8x32::new( + -87, -42, 123, 30, -64, -61, 45, 65, 116, 65, 36, 53, -53, 107, 76, 11, -15, -38, -46, + 88, -114, -107, 55, 53, -61, -70, -103, -62, 21, -29, 40, 95, + ); + let r = i64x4::new(2207613190657, 8589934592, 1103806726660, 3298568503554); + + assert_eq!(r, transmute(lasx_xvclo_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvclo_h() { + let a = i16x16::new( + -4880, 19940, -15012, -1377, -9664, 29017, 15571, -20185, -11621, 32665, -31110, 32554, + -31842, 20391, -23474, -18820, + ); + let r = i64x4::new( + 1407383473487875, + 281474976710658, + 4294967298, + 281479271677953, + ); + + assert_eq!(r, transmute(lasx_xvclo_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvclo_w() { + let a = i32x8::new( + -472837395, + -2135587215, + -2000467762, + 411236038, + -1457849736, + 1672236706, + -1251091450, + -777023005, + ); + let r = i64x4::new(4294967299, 1, 1, 8589934593); + + assert_eq!(r, transmute(lasx_xvclo_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvclo_d() { + let a = i64x4::new( + -2662002076602604283, + 1069611961163112747, + -5322946916564324351, + 7672935739349466106, + ); + let r = i64x4::new(2, 0, 1, 0); + + assert_eq!(r, transmute(lasx_xvclo_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvclz_b() { + let a = i8x32::new( + 48, -6, 70, -124, -16, -25, -31, -91, -16, -19, -117, -25, -17, 92, 40, 116, -123, 91, + 22, -73, 100, 103, -72, 27, 14, -67, 118, 82, 90, 31, -83, -15, + ); + let r = i64x4::new(65538, 72621643502977024, 216173885920575744, 3302846693380); + + assert_eq!(r, transmute(lasx_xvclz_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvclz_h() { + let a = i16x16::new( + -11088, -2624, 9587, 10227, -21358, -32061, -32593, 20863, -13412, -5184, -28388, + 12581, 27368, 29494, 2214, -12445, + ); + let r = i64x4::new( + 562958543355904, + 281474976710656, + 562949953421312, + 17179934721, + ); + + assert_eq!(r, transmute(lasx_xvclz_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvclz_w() { + let a = i32x8::new( + -1816955803, + 631623303, + -844798554, + -571080345, + 439698339, + -377278351, + -2011143491, + 1645796965, + ); + let r = i64x4::new(8589934592, 0, 3, 4294967296); + + assert_eq!(r, transmute(lasx_xvclz_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvclz_d() { + let a = i64x4::new( + -3450263516250458188, + -4779789731770767580, + -2256592148267722054, + 4713387490250241941, + ); + let r = i64x4::new(0, 0, 0, 1); + + assert_eq!(r, transmute(lasx_xvclz_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfadd_s() { + let a = u32x8::new( + 1058561863, 1064952952, 1049344074, 1062702316, 1057792746, 1062620339, 1060506486, + 1055219670, + ); + let b = u32x8::new( + 1058369685, 1062538381, 1060953918, 1045575432, 1041469388, 993916160, 1061165480, + 1040806504, + ); + let r = i64x4::new( + 4604781644817557486, + 4577360739647446450, + 4564128465094280925, + 4545553165339792015, + ); + + assert_eq!(r, transmute(lasx_xvfadd_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfadd_d() { + let a = u64x4::new( + 4604104186982846811, + 4594101328742252424, + 4601686809902104562, + 4591010495556540480, + ); + let b = u64x4::new( + 4599295489329742538, + 4597621922535438280, + 4568770145289685248, + 4606509170156045614, + ); + let r = i64x4::new( + 4606916121688765120, + 4600365225266215848, + 4601738557736193412, + 4607242424158867483, + ); + + assert_eq!(r, transmute(lasx_xvfadd_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfsub_s() { + let a = u32x8::new( + 1051284612, 1063062529, 1065074933, 1061303845, 1040445544, 1065277127, 1050456038, + 1028474080, + ); + let b = u32x8::new( + 1061323418, 1047742504, 1041252032, 1046362676, 1058536139, 1062234929, 1060266892, + 1051059318, + ); + let r = i64x4::new( + 4548699359865974960, + 4542627446496145733, + 4483806600207662434, + -4716328899074058446, + ); + + assert_eq!(r, transmute(lasx_xvfsub_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfsub_d() { + let a = u64x4::new( + 4600171060344923522, + 4605546915627674696, + 4592595361373027936, + 4605218827740699453, + ); + let b = u64x4::new( + 4605618236610286151, + 4595024973508085836, + 4603596942845220543, + 4598338803059870948, + ); + let r = i64x4::new( + -4621313823233868020, + 4604082677323287093, + -4620839705514447386, + 4602885236169716939, + ); + + assert_eq!(r, transmute(lasx_xvfsub_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmul_s() { + let a = u32x8::new( + 1052320864, 1047132356, 1062268100, 1046708728, 1041045324, 1063314176, 1059310073, + 1049796536, + ); + let b = u32x8::new( + 1064358048, 1061515003, 1057528231, 1058432998, 1063900744, 1052241494, 1052600868, + 1042517172, + ); + let r = i64x4::new( + 4482332724193798395, + 4469165660137518684, + 4513050635226112077, + 4412217640780718091, + ); + + assert_eq!(r, transmute(lasx_xvfmul_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmul_d() { + let a = u64x4::new( + 4606629864418855094, + 4605003539487786257, + 4590479879446676128, + 4606513106899913084, + ); + let b = u64x4::new( + 4605920112889960858, + 4598179153756612874, + 4606290518673084028, + 4605164664361830142, + ); + let r = i64x4::new( + 4605444995749970010, + 4596002305251241714, + 4589904028032657573, + 4604645288864682176, + ); + + assert_eq!(r, transmute(lasx_xvfmul_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfdiv_s() { + let a = u32x8::new( + 1057794250, 1042162504, 1058563973, 1059452123, 1050358290, 1044764232, 1058075458, + 1044755920, + ); + let b = u32x8::new( + 1059441919, 1061487805, 1048043892, 1042438684, 1061822186, 1057796721, 1060121466, + 1051587390, + ); + let r = i64x4::new( + 4489379395443175003, + 4648514715526194553, + 4518231675762938086, + 4544549637634302505, + ); + + assert_eq!(r, transmute(lasx_xvfdiv_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfdiv_d() { + let a = u64x4::new( + 4599185246498765334, + 4599944651523203368, + 4605116834688397287, + 4604853047950220214, + ); + let b = u64x4::new( + 4564176709757936128, + 4602766877113246240, + 4596205261335386636, + 4603651841724508284, + ); + let r = i64x4::new( + 4641804750140101849, + 4604327948136618660, + 4616067223277414565, + 4608170208670026319, + ); + + assert_eq!(r, transmute(lasx_xvfdiv_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcvt_h_s() { + let a = u32x8::new( + 1058469229, 1050453282, 1035903176, 1054073088, 1063294292, 1008492480, 1057298766, + 1061246000, + ); + let b = u32x8::new( + 1023462464, 1060058935, 1063991271, 1051666694, 1026891648, 1059128978, 1040948004, + 1063761400, + ); + let r = i64x4::new( + 3853176214889572358, + 3935915130522777784, + 4268902673740736937, + 4182498428240214789, + ); + + assert_eq!(r, transmute(lasx_xvfcvt_h_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcvt_s_d() { + let a = u64x4::new( + 4597447768952621592, + 4604521658660448767, + 4602704275491810917, + 4598917842979840742, + ); + let b = u64x4::new( + 4599553378754216492, + 4584512794443142976, + 4602292684825622938, + 4600582838384043714, + ); + let r = i64x4::new( + 4394300226931207022, + 4554371141198369562, + 4522860581064345217, + 4509540616169896248, + ); + + assert_eq!(r, transmute(lasx_xvfcvt_s_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmin_s() { + let a = u32x8::new( + 1055713836, 1054644052, 1049275150, 1057289061, 1061461229, 1041818012, 1060715063, + 1040785036, + ); + let b = u32x8::new( + 1048823100, 1053139848, 1065067350, 1058425698, 1057910475, 1058359832, 1051231814, + 1042813160, + ); + let r = i64x4::new( + 4523201206323234108, + 4541021940462824206, + 4474574290981646027, + 4470137692837414470, + ); + + assert_eq!(r, transmute(lasx_xvfmin_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmin_d() { + let a = u64x4::new( + 4594570070884899116, + 4601942383326036568, + 4603863714261060635, + 4604069842204647079, + ); + let b = u64x4::new( + 4597923907797027300, + 4602734374246572404, + 4583371218452703040, + 4596668800324369880, + ); + let r = i64x4::new( + 4594570070884899116, + 4601942383326036568, + 4583371218452703040, + 4596668800324369880, + ); + + assert_eq!(r, transmute(lasx_xvfmin_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmina_s() { + let a = u32x8::new( + 1051583574, 1048334100, 1008901056, 1048010844, 1058048126, 1046481300, 1034708664, + 1062424645, + ); + let b = u32x8::new( + 1057050977, 1054905968, 1057610003, 1058883162, 1036134312, 1020267520, 1059621961, + 1062129138, + ); + let r = i64x4::new( + 4502560675833177174, + 4501172301842258880, + 4382015632607160232, + 4561809912873379512, + ); + + assert_eq!(r, transmute(lasx_xvfmina_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmina_d() { + let a = u64x4::new( + 4600343614636459278, + 4586078532026713744, + 4605522001302794605, + 4604680104437291828, + ); + let b = u64x4::new( + 4606967369913508220, + 4606214846243616482, + 4587216688083732016, + 4597161583916257152, + ); + let r = i64x4::new( + 4600343614636459278, + 4586078532026713744, + 4587216688083732016, + 4597161583916257152, + ); + + assert_eq!(r, transmute(lasx_xvfmina_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmax_s() { + let a = u32x8::new( + 1040557328, 1056374346, 1061211328, 1043258760, 1036675480, 1065222105, 1042177632, + 1023489024, + ); + let b = u32x8::new( + 1030428272, 1047669536, 1035741736, 1064496616, 1062615049, 1064308633, 1058514955, + 1065140306, + ); + let r = i64x4::new( + 4537093269443945744, + 4571978153483881664, + 4575094105013893129, + 4574742780979947531, + ); + + assert_eq!(r, transmute(lasx_xvfmax_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmax_d() { + let a = u64x4::new( + 4598455083545818248, + 4600184556479215682, + 4605785336194907924, + 4595051938027720488, + ); + let b = u64x4::new( + 4598044308154343000, + 4602111953345143140, + 4606540384570465960, + 4602928137069840177, + ); + let r = i64x4::new( + 4598455083545818248, + 4602111953345143140, + 4606540384570465960, + 4602928137069840177, + ); + + assert_eq!(r, transmute(lasx_xvfmax_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmaxa_s() { + let a = u32x8::new( + 1029731152, 1046633312, 1057699093, 1057848545, 1056015154, 1053369950, 1043177732, + 1054203026, + ); + let b = u32x8::new( + 1056523808, 1057137213, 1057627244, 1053365006, 1056989330, 1060333719, 1061877148, + 1001482496, + ); + let r = i64x4::new( + 4540369758276109856, + 4543424905953883413, + 4554098647008043154, + 4527767521076114844, + ); + + assert_eq!(r, transmute(lasx_xvfmaxa_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmaxa_d() { + let a = u64x4::new( + 4607057953546777183, + 4598029803916303580, + 4606768199731078735, + 4577576246859464512, + ); + let b = u64x4::new( + 4602769751297399272, + 4606575139730018588, + 4600779924965638822, + 4596362093665607644, + ); + let r = i64x4::new( + 4607057953546777183, + 4606575139730018588, + 4606768199731078735, + 4596362093665607644, + ); + + assert_eq!(r, transmute(lasx_xvfmaxa_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfclass_s() { + let a = u32x8::new( + 1055311824, 1052041740, 1046016912, 1053948390, 1064758783, 1058940353, 1054333862, + 1048790772, + ); + let r = i64x4::new(549755814016, 549755814016, 549755814016, 549755814016); + + assert_eq!(r, transmute(lasx_xvfclass_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfclass_d() { + let a = u64x4::new( + 4601866312729243692, + 4603727160924846294, + 4581175864218244800, + 4596173124127472804, + ); + let r = i64x4::new(128, 128, 128, 128); + + assert_eq!(r, transmute(lasx_xvfclass_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfsqrt_s() { + let a = u32x8::new( + 1065040686, 1045332480, 1058748054, 1041454996, 1045312756, 1048325884, 1051863384, + 1061201844, + ); + let r = i64x4::new( + 4532289266943630008, + 4522237574588618202, + 4539089286789972523, + 4566109703441416989, + ); + + assert_eq!(r, transmute(lasx_xvfsqrt_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfsqrt_d() { + let a = u64x4::new( + 4604266936093488453, + 4603635094556032126, + 4604345755115950647, + 4595358066919885688, + ); + let r = i64x4::new( + 4605582601319773315, + 4605187935290824484, + 4605630368329407402, + 4601138545884238765, + ); + + assert_eq!(r, transmute(lasx_xvfsqrt_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrecip_s() { + let a = u32x8::new( + 1060913758, 1057137592, 1056500078, 1053365486, 1052072368, 1058849416, 1061191779, + 1061827646, + ); + let r = i64x4::new( + 4610230120071696079, + 4621525987145000223, + 4598466002793312350, + 4585242601638738136, + ); + + assert_eq!(r, transmute(lasx_xvfrecip_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrecip_d() { + let a = u64x4::new( + 4600818034032403792, + 4605811415521276862, + 4603750608638111426, + 4602783159858591242, + ); + let r = i64x4::new( + 4612858666853570563, + 4607990995462358858, + 4609954512138978824, + 4611482062367896141, + ); + + assert_eq!(r, transmute(lasx_xvfrecip_d(transmute(a)))); +} + +#[simd_test(enable = "lasx,frecipe")] +unsafe fn test_lasx_xvfrecipe_s() { + let a = u32x8::new( + 1061538089, 1009467584, 1043164316, 1030910448, 1059062619, 1048927856, 1064915194, + 1028524176, + ); + let r = i64x4::new( + 4809660548434472067, + 4721787188318892829, + 4644815739361740708, + 4728509413412007938, + ); + + assert_eq!(r, transmute(lasx_xvfrecipe_s(transmute(a)))); +} + +#[simd_test(enable = "lasx,frecipe")] +unsafe fn test_lasx_xvfrecipe_d() { + let a = u64x4::new( + 4599514006383746620, + 4607114589130093485, + 4603063439897885463, + 4602774413388259784, + ); + let r = i64x4::new( + 4614125529786744832, + 4607216711966392320, + 4610977572161847296, + 4611499011256352768, + ); + + assert_eq!(r, transmute(lasx_xvfrecipe_d(transmute(a)))); +} + +#[simd_test(enable = "lasx,frecipe")] +unsafe fn test_lasx_xvfrsqrte_s() { + let a = u32x8::new( + 1042369896, 1033402040, 1063640659, 1061099374, 1064617699, 1050687308, 1049602990, + 1047907124, + ); + let r = i64x4::new( + 4641680627989561881, + 4581330281566770462, + 4604034110053345047, + 4612427253546066334, + ); + + assert_eq!(r, transmute(lasx_xvfrsqrte_s(transmute(a)))); +} + +#[simd_test(enable = "lasx,frecipe")] +unsafe fn test_lasx_xvfrsqrte_d() { + let a = u64x4::new( + 4601640737224225970, + 4602882853441572005, + 4594899837086694432, + 4596019513190087348, + ); + let r = i64x4::new( + 4609450077243572224, + 4608908592999825408, + 4612828109287194624, + 4612346183891812352, + ); + + assert_eq!(r, transmute(lasx_xvfrsqrte_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrint_s() { + let a = u32x8::new( + 1043178464, 1038460040, 1061848728, 1058680620, 1058193187, 1046712064, 1061839389, + 1062791786, + ); + let r = i64x4::new(0, 4575657222473777152, 1065353216, 4575657222473777152); + + assert_eq!(r, transmute(lasx_xvfrint_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrint_d() { + let a = u64x4::new( + 4602995275079155807, + 4605303966018459675, + 4604656441302899118, + 4598894354395850360, + ); + let r = i64x4::new( + 4607182418800017408, + 4607182418800017408, + 4607182418800017408, + 0, + ); + + assert_eq!(r, transmute(lasx_xvfrint_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrsqrt_s() { + let a = u32x8::new( + 1061523868, 1058283912, 1058667997, 1055761106, 1039496312, 1051937612, 1064817002, + 1028487648, + ); + let r = i64x4::new( + 4586992255349404714, + 4592512950478375290, + 4600512219702681066, + 4651901116840286347, + ); + + assert_eq!(r, transmute(lasx_xvfrsqrt_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrsqrt_d() { + let a = u64x4::new( + 4605274633765138187, + 4606739923803408012, + 4600049100582648664, + 4595639907624537812, + ); + let r = i64x4::new( + 4607751568495560074, + 4607297292863467031, + 4610247933797877877, + 4612495411087822923, + ); + + assert_eq!(r, transmute(lasx_xvfrsqrt_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvflogb_s() { + let a = u32x8::new( + 1060538931, 1046083924, 1058790721, 1059749771, 1051275772, 1063729353, 1063250692, + 1040020680, + ); + let r = i64x4::new( + -4593671616705069056, + -4647714812233515008, + -4647714812225126400, + -4575657218195587072, + ); + + assert_eq!(r, transmute(lasx_xvflogb_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvflogb_d() { + let a = u64x4::new( + 4595455049368719724, + 4604388813668624941, + 4600944141083734502, + 4606323839843915451, + ); + let r = i64x4::new( + -4609434218613702656, + -4616189618054758400, + -4611686018427387904, + -4616189618054758400, + ); + + assert_eq!(r, transmute(lasx_xvflogb_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcvth_s_h() { + let a = i16x16::new( + 1011, -3094, -23967, -2302, -29675, 24707, 31603, 27606, -10030, -23722, -4960, 8886, + 4716, -14999, -10137, 25474, + ); + let r = i64x4::new( + 4904525550435082240, + 5006525043206676480, + -4562955662106198016, + 4931511963987271680, + ); + + assert_eq!(r, transmute(lasx_xvfcvth_s_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcvth_d_s() { + let a = u32x8::new( + 1060080295, 1063430965, 1058931094, 1057151472, 1062318208, 1041069740, 1040628608, + 1062563894, + ); + let r = i64x4::new( + 4603734568304902144, + 4602779141018746880, + 4593908495954214912, + 4605684912954015744, + ); + + assert_eq!(r, transmute(lasx_xvfcvth_d_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcvtl_s_h() { + let a = i16x16::new( + -18572, -3633, 26136, -30442, 5487, 21033, 2005, -18343, 32598, 19034, -13880, 19435, + 17289, 6097, -12500, -28967, + ); + let r = i64x4::new( + -4163050086719389696, + -5106307920098557952, + 4704924606608883712, + 4719033540912152576, + ); + + assert_eq!(r, transmute(lasx_xvfcvtl_s_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcvtl_d_s() { + let a = u32x8::new( + 1059008236, 1026243936, 1059912059, 1060873661, 1059957992, 1049687936, 1054458174, + 1049339368, + ); + let r = i64x4::new( + 4603775983600795648, + 4586185783978754048, + 4604285879970693120, + 4598772185639682048, + ); + + assert_eq!(r, transmute(lasx_xvfcvtl_d_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftint_w_s() { + let a = u32x8::new( + 1052778524, 1039011152, 1033877208, 1049693252, 1062408118, 1030474672, 1042423356, + 1038564616, + ); + let r = i64x4::new(0, 0, 1, 0); + + assert_eq!(r, transmute(lasx_xvftint_w_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftint_l_d() { + let a = u64x4::new( + 4592491724896152048, + 4600509745735788044, + 4603560565683465563, + 4606886496010904906, + ); + let r = i64x4::new(0, 0, 1, 1); + + assert_eq!(r, transmute(lasx_xvftint_l_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftint_wu_s() { + let a = u32x8::new( + 1063402225, 1023548352, 1060204123, 1061208993, 1059244058, 1039466608, 1058287960, + 1058024007, + ); + let r = i64x4::new(1, 4294967297, 1, 4294967297); + + assert_eq!(r, transmute(lasx_xvftint_wu_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftint_lu_d() { + let a = u64x4::new( + 4601437466420634120, + 4585269234107004032, + 4602560385055197892, + 4595388119831910552, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftint_lu_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrz_w_s() { + let a = u32x8::new( + 1045143016, 1048815390, 1047014848, 1055489924, 1060619700, 1055895842, 1061091259, + 1052720902, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrz_w_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrz_l_d() { + let a = u64x4::new( + 4603359584605772664, + 4597259202045947564, + 4606604696181460379, + 4590200021857252112, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrz_l_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrz_wu_s() { + let a = u32x8::new( + 1063820452, 1055661474, 1056124138, 1058294578, 1014656512, 1017634272, 1061863649, + 1032276584, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrz_wu_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrz_lu_d() { + let a = u64x4::new( + 4593109369482747112, + 4606352005652581516, + 4604267331764801794, + 4603828603416455704, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrz_lu_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvffint_s_w() { + let a = i32x8::new( + -1936685818, + -292241542, + -386041592, + -1489663378, + 1127778163, + -365070454, + -1830468239, + 1453047639, + ); + let r = i64x4::new( + -3635713297473937674, + -3552894890528992200, + -3625938366378905329, + 5669248528000103797, + ); + + assert_eq!(r, transmute(lasx_xvffint_s_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvffint_d_l() { + let a = i64x4::new( + -3627358051950006798, + 3291026422392521824, + 9114456262655749128, + -101300809730113961, + ); + let r = i64x4::new( + -4338888956717313783, + 4883826182423482562, + 4890802832263617419, + -4362160337941248997, + ); + + assert_eq!(r, transmute(lasx_xvffint_d_l(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvffint_s_wu() { + let a = u32x8::new( + 1942522276, 3012872942, 4057450175, 3500418877, 3140467966, 1802049055, 2479355692, + 3991791589, + ); + let r = i64x4::new( + 5707068753731621139, + 5715248415876700103, + 5680959067724132285, + 5723492283472660471, + ); + + assert_eq!(r, transmute(lasx_xvffint_s_wu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvffint_d_lu() { + let a = u64x4::new( + 10285239871254038779, + 10585860489684064217, + 15302850682570301194, + 12001223008770454391, + ); + let r = i64x4::new( + 4891427685477873921, + 4891574472889216707, + 4893877690756836940, + 4892265567869239358, + ); + + assert_eq!(r, transmute(lasx_xvffint_d_lu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplve_b() { + let a = i8x32::new( + -75, -65, 124, 6, 28, -41, 60, 12, -41, 91, 81, -114, 54, 98, -78, -94, 13, 26, 36, + 112, -41, -74, -94, -71, 43, 54, 17, 60, -27, -89, 98, -78, + ); + let r = i64x4::new( + -2893606913523066921, + -2893606913523066921, + -5280832617179597130, + -5280832617179597130, + ); + + assert_eq!(r, transmute(lasx_xvreplve_b(transmute(a), 5))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplve_h() { + let a = i16x16::new( + 10589, 16925, 2072, 2556, -20735, 27162, -30076, -21408, 26095, 24700, 11691, -31646, + 14016, 23092, 1827, 2108, + ); + let r = i64x4::new( + 719461018576357884, + 719461018576357884, + -8907411554322709406, + -8907411554322709406, + ); + + assert_eq!(r, transmute(lasx_xvreplve_h(transmute(a), -5))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplve_w() { + let a = i32x8::new( + -1943637254, + 265328695, + 1624811313, + -907897952, + 733901407, + -598309268, + -2022404353, + -945690723, + ); + let r = i64x4::new( + 1139578067980687415, + 1139578067980687415, + -2569718735257041300, + -2569718735257041300, + ); + + assert_eq!(r, transmute(lasx_xvreplve_w(transmute(a), 1))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplve_d() { + let a = i64x4::new( + 8108160509866679259, + 2816226171091081324, + -7945890434069746992, + 7527726914374549897, + ); + let r = i64x4::new( + 8108160509866679259, + 8108160509866679259, + -7945890434069746992, + -7945890434069746992, + ); + + assert_eq!(r, transmute(lasx_xvreplve_d(transmute(a), -6))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpermi_w() { + let a = i32x8::new( + 1434116256, + 1142162281, + -1871700525, + -394957889, + 382419347, + -785097055, + -1928161383, + -401992430, + ); + let b = i32x8::new( + -1595257764, + 1089333930, + -235320537, + -1276032758, + -803245169, + -82420548, + -1649409266, + 665022456, + ); + let r = i64x4::new( + -1010694009402824022, + -1696331215410035863, + -7084158850976817988, + -1726544336579699039, + ); + + assert_eq!( + r, + transmute(lasx_xvpermi_w::<217>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvandn_v() { + let a = u8x32::new( + 174, 130, 100, 230, 117, 190, 128, 90, 135, 70, 67, 190, 102, 177, 131, 213, 116, 200, + 40, 62, 198, 99, 109, 141, 122, 251, 83, 215, 87, 248, 140, 29, + ); + let b = u8x32::new( + 146, 145, 124, 157, 66, 158, 147, 40, 44, 251, 68, 171, 189, 227, 212, 251, 56, 131, + 99, 225, 136, 245, 154, 179, 245, 155, 220, 217, 4, 18, 19, 17, + ); + let r = i64x4::new( + 2311191042782138640, + 3050136072551184680, + 3644137813819196168, + 5350223724150917, + ); + + assert_eq!(r, transmute(lasx_xvandn_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvneg_b() { + let a = i8x32::new( + -41, -111, 119, -69, 55, 67, 126, -127, -123, 59, 34, -93, -12, -33, -35, -11, 89, -72, + -52, 6, 106, 79, 77, 58, -123, -99, 44, 27, 96, -32, -57, 75, + ); + let r = i64x4::new( + 9188114861941944105, + 802521495600285051, + -4128761171367671641, + -5388239603749330053, + ); + + assert_eq!(r, transmute(lasx_xvneg_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvneg_h() { + let a = i16x16::new( + -4516, 26216, -27554, -11408, 20653, 18328, -4198, -15292, 23460, 9679, -8566, 23542, + -2503, 31678, 9261, -19575, + ); + let r = i64x4::new( + 3211184880420917668, + 4304333377225928531, + -6626447107371719588, + 5510114370614593991, + ); + + assert_eq!(r, transmute(lasx_xvneg_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvneg_w() { + let a = i32x8::new( + 740574678, + 2076342027, + 968647939, + 130194259, + 1872650231, + -1690505081, + -594724042, + 1453048102, + ); + let r = i64x4::new( + -8917821097720956374, + -559180081205634307, + 7260664039039148041, + -6240794077010148150, + ); + + assert_eq!(r, transmute(lasx_xvneg_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvneg_d() { + let a = i64x4::new( + -5535082554430398173, + 7802847596802572188, + -4410306127860279470, + 906750919774206543, + ); + let r = i64x4::new( + 5535082554430398173, + -7802847596802572188, + 4410306127860279470, + -906750919774206543, + ); + + assert_eq!(r, transmute(lasx_xvneg_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmuh_b() { + let a = i8x32::new( + -1, 124, 20, 57, 41, 122, 83, 77, 119, 119, 127, 45, 107, 51, 67, 89, 59, 88, 71, -124, + 62, 101, -53, -37, 2, 102, 69, 72, -83, 115, -102, 5, + ); + let b = i8x32::new( + 108, -29, 45, -93, 78, -21, 19, 10, 52, 107, 104, 75, 31, -9, -27, 72, -68, -20, -102, + 95, 106, 38, -79, -7, 42, -112, -7, -41, 40, 124, 115, 91, + ); + let r = i64x4::new( + 218131067805364735, + 1871524972886962456, + 76576697723648496, + 131228860074087168, + ); + + assert_eq!(r, transmute(lasx_xvmuh_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmuh_h() { + let a = i16x16::new( + 16678, 11413, -27848, -7978, -31217, -4869, 11843, 2166, -13263, -23440, 16372, 27675, + 23654, 25588, -21093, 1464, + ); + let b = i16x16::new( + -20021, -19612, 828, 8516, 14133, -5487, -7596, 26880, -23795, 2896, 7031, 19513, + -6376, 6003, -19930, -2328, + ); + let r = i64x4::new( + -291609583629571048, + 250225357332407731, + 2319361354285454031, + -14890625691814142, + ); + + assert_eq!(r, transmute(lasx_xvmuh_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmuh_w() { + let a = i32x8::new( + -833159784, + -1689012066, + 1138643536, + 1201390084, + -1615224698, + -984104182, + -991848752, + -18112020, + ); + let b = i32x8::new( + -846972528, + -848270332, + -2071563046, + -1685604813, + 2085038950, + 696813713, + 2076492369, + -867396671, + ); + let r = i64x4::new( + 1432738825969009962, + -2025068907290430086, + -685737288172100118, + 15710306989437773, + ); + + assert_eq!(r, transmute(lasx_xvmuh_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmuh_d() { + let a = i64x4::new( + -3091297468664313081, + -4254143725647386536, + -6994439148056979459, + 878201001794537760, + ); + let b = i64x4::new( + -2819683255232823594, + 272893378245750433, + -2696341058713804350, + 5752544304986593708, + ); + let r = i64x4::new( + 472521311864415951, + -62934014165103622, + 1022369767923424239, + 273863514955286020, + ); + + assert_eq!(r, transmute(lasx_xvmuh_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmuh_bu() { + let a = u8x32::new( + 252, 82, 157, 236, 123, 56, 117, 92, 87, 103, 53, 123, 55, 40, 186, 21, 199, 125, 151, + 2, 152, 104, 145, 142, 138, 222, 115, 99, 79, 43, 91, 11, + ); + let b = u8x32::new( + 106, 138, 241, 29, 35, 19, 100, 212, 48, 52, 216, 195, 63, 32, 226, 9, 68, 212, 1, 104, + 22, 101, 248, 114, 169, 245, 173, 78, 68, 135, 101, 145, + ); + let r = i64x4::new( + 5489047988046343272, + 46167451136431120, + 4579080056940291892, + 442221464076014683, + ); + + assert_eq!(r, transmute(lasx_xvmuh_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmuh_hu() { + let a = u16x16::new( + 63486, 10379, 4610, 59627, 39525, 8192, 13999, 30090, 39838, 4996, 62860, 23112, 32783, + 45419, 34018, 15191, + ); + let b = u16x16::new( + 50083, 9034, 31705, 24116, 14858, 32357, 59501, 26719, 43788, 29210, 55002, 25980, + 7566, 49006, 61645, 1668, + ); + let r = i64x4::new( + 6175852041879338372, + 3452908124314018560, + 2579100322063607801, + 108786773599653576, + ); + + assert_eq!(r, transmute(lasx_xvmuh_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmuh_wu() { + let a = u32x8::new( + 604375860, 434631772, 87186606, 1568632560, 3782451787, 1385975439, 3741892279, + 2636678075, + ); + let b = u32x8::new( + 2866028752, 733937737, 283660427, 1865216280, 1246451636, 3799448094, 3234768261, + 1243610100, + ); + let r = i64x4::new( + 318992658905816335, + 2925838984554208529, + 5265941737799272199, + 3278999485098399815, + ); + + assert_eq!(r, transmute(lasx_xvmuh_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmuh_du() { + let a = u64x4::new( + 9309142847278954140, + 11105915746381107654, + 776831405492317725, + 7350193390691079752, + ); + let b = u64x4::new( + 6484084708453170899, + 12483776948923073243, + 16553528344993857967, + 3939779038690448735, + ); + let r = i64x4::new( + 3272191045945883120, + 7515894102360886861, + 697104087242456940, + 1569823798457591419, + ); + + assert_eq!(r, transmute(lasx_xvmuh_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsllwil_h_b() { + let a = i8x32::new( + -80, -11, 36, -88, -14, -123, -124, -15, -73, 95, -109, 108, -41, -128, 74, 81, 42, 54, + -105, 1, -17, -78, 85, 63, -18, 22, -37, 78, -116, -76, -104, -80, + ); + let r = i64x4::new( + -396314289023943936, + -67281036482904288, + 4777859115647648, + 283732621893107440, + ); + + assert_eq!(r, transmute(lasx_xvsllwil_h_b::<4>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsllwil_w_h() { + let a = i16x16::new( + -26490, -6081, 17297, 4860, -12591, -12327, -8532, -26767, -1364, 8756, 17192, -2170, + -9517, -24859, -20497, 19179, + ); + let r = i64x4::new( + -53489037427331072, + 42749012123355136, + 77018594794627072, + -19087521822982144, + ); + + assert_eq!(r, transmute(lasx_xvsllwil_w_h::<11>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsllwil_d_w() { + let a = i32x8::new( + -279919227, + 1520692612, + 58332548, + -1055411175, + -1879666532, + -1328702681, + 2013268804, + 1780320808, + ); + let r = i64x4::new( + -4586196615168, + 24915027755008, + -30796456460288, + -21769464725504, + ); + + assert_eq!(r, transmute(lasx_xvsllwil_d_w::<14>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsllwil_hu_bu() { + let a = u8x32::new( + 166, 242, 65, 29, 16, 173, 110, 19, 218, 174, 141, 254, 161, 96, 39, 227, 221, 101, + 204, 143, 26, 87, 89, 20, 72, 61, 5, 44, 62, 179, 22, 150, + ); + let r = i64x4::new( + 261217712426980544, + 171151904487768576, + 1288057531186289568, + 180156217344131904, + ); + + assert_eq!(r, transmute(lasx_xvsllwil_hu_bu::<5>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsllwil_wu_hu() { + let a = u16x16::new( + 28185, 27375, 29501, 18099, 10709, 55262, 57183, 25962, 46284, 59737, 9967, 49646, + 20816, 18431, 34014, 61614, + ); + let r = i64x4::new( + 1926344372325335040, + 1273603901354885120, + 4203617671699431424, + 3493526673607606272, + ); + + assert_eq!(r, transmute(lasx_xvsllwil_wu_hu::<14>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsllwil_du_wu() { + let a = u32x8::new( + 3871859378, 2804433615, 2931671754, 4116141862, 2330569940, 549563545, 2423689534, + 763790591, + ); + let r = i64x4::new( + 1039344337701306368, + 752809416264253440, + 625607604583792640, + 147522340803051520, + ); + + assert_eq!(r, transmute(lasx_xvsllwil_du_wu::<28>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsran_b_h() { + let a = i16x16::new( + -3209, -6235, 10611, -108, -9326, 31718, 21536, 23681, -6783, -12443, -19057, 16054, + 30697, -5640, -15815, -16666, + ); + let b = i16x16::new( + -29110, -4589, 15031, -23437, 23404, 22985, -4128, -14921, 3799, -12876, -14071, + -20170, -30663, -21093, 2493, -19963, + ); + let r = i64x4::new(-5107013816536599300, 0, -576745268203292981, 0); + + assert_eq!(r, transmute(lasx_xvsran_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsran_h_w() { + let a = i32x8::new( + 596228330, + -1214659999, + 1365164495, + -1509876796, + 191976733, + 887390545, + 1777692712, + -916491986, + ); + let b = i32x8::new( + 325990384, + 675640582, + 253768478, + -874708050, + -1204136396, + 185722351, + -1391425532, + -614583871, + ); + let r = i64x4::new(-7492863874014043255, 0, -5145548381371170633, 0); + + assert_eq!(r, transmute(lasx_xvsran_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsran_w_d() { + let a = i64x4::new( + 8440735619768910515, + 3831375747389155813, + -7157949860071951471, + 8075321479849390902, + ); + let b = i64x4::new( + -4836402813541090096, + -5722420231286296070, + -8822340179414145626, + 7458838578211487240, + ); + let r = i64x4::new(58054624080, 0, 1863787881113495402, 0); + + assert_eq!(r, transmute(lasx_xvsran_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssran_b_h() { + let a = i16x16::new( + 27446, 31312, 14232, -17034, -2200, 9528, 17283, 22858, -16583, -20644, -19786, -30210, + -15134, -5982, 7374, -10469, + ); + let b = i16x16::new( + 32393, 13397, -26656, -25817, -11729, -3876, 5367, 32237, -5363, 14821, 8454, -2793, + 30922, -19145, -25237, 355, + ); + let r = i64x4::new(179865806513864501, 0, -9222296776751415043, 0); + + assert_eq!(r, transmute(lasx_xvssran_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssran_h_w() { + let a = i32x8::new( + 1069406291, + -421683701, + -1805581192, + 775037443, + 2123240059, + 1014398272, + -968236564, + 1181957260, + ); + let b = i32x8::new( + -313676516, + 794950557, + -1459200584, + -1233298689, + 310419478, + 2115419690, + 370441503, + 353523551, + ); + let r = i64x4::new(281015415144451, 0, 281472829161978, 0); + + assert_eq!(r, transmute(lasx_xvssran_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssran_w_d() { + let a = i64x4::new( + -3959032103812617007, + -6999276452061988148, + 4785867104307053316, + -5846301556546422840, + ); + let b = i64x4::new( + -9038176721428294357, + -7430682151090141786, + 3023804747709575069, + -4263412213075666259, + ); + let r = i64x4::new(-109363692856335914, 0, -713658208354305, 0); + + assert_eq!(r, transmute(lasx_xvssran_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssran_bu_h() { + let a = u16x16::new( + 15557, 60840, 1956, 59995, 38025, 11411, 47465, 2661, 64580, 57024, 5440, 30131, 5746, + 43753, 23484, 38540, + ); + let b = u16x16::new( + 22970, 29096, 60132, 33800, 43597, 36861, 5794, 9818, 31709, 42253, 40665, 26755, + 45611, 14534, 22385, 24914, + ); + let r = i64x4::new(144116287595479055, 0, 71776131929997312, 0); + + assert_eq!(r, transmute(lasx_xvssran_bu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssran_hu_w() { + let a = u32x8::new( + 2082097075, 1270167653, 972125472, 2358850873, 720341052, 2316145162, 1290262192, + 3046238320, + ); + let b = u32x8::new( + 2086901452, 208185378, 3688640302, 858280348, 2470849871, 2168901411, 1405490695, + 3256489998, + ); + let r = i64x4::new(254837589540863, 0, 281470681765343, 0); + + assert_eq!(r, transmute(lasx_xvssran_hu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssran_wu_d() { + let a = u64x4::new( + 12808251596834061909, + 18221436405775299246, + 16388143564854988150, + 17532454272773126756, + ); + let b = u64x4::new( + 5233973111979334474, + 11067258236306167045, + 5186189126720253469, + 15129384477845142857, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvssran_wu_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrarn_b_h() { + let a = i16x16::new( + 13316, 16982, 17373, -4234, 12579, 29238, 26519, -27768, 29243, -28641, -6034, -30599, + 7597, 22800, -24346, -21360, + ); + let b = i16x16::new( + 2182, -26731, -7280, -21775, 13607, -10194, -26196, 2085, 14341, 30747, 19786, -15409, + 13019, 31558, 333, -15416, + ); + let r = i64x4::new(-7204067930850651184, 0, -5909457163402939758, 0); + + assert_eq!(r, transmute(lasx_xvsrarn_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrarn_h_w() { + let a = i32x8::new( + 1424546002, + -1218125754, + 2040047341, + -1355580190, + 957370543, + -1800756932, + -244296865, + -324211997, + ); + let b = i32x8::new( + -873611939, + -646116137, + -2104124404, + 269272004, + -873453569, + -222623147, + -1684845205, + 1120133990, + ); + let r = i64x4::new(4021320339558432771, 0, -5499970420202995712, 0); + + assert_eq!(r, transmute(lasx_xvsrarn_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrarn_w_d() { + let a = i64x4::new( + 8313795273655551715, + -4571575745587141829, + 3452416880072805381, + -3498451052052081526, + ); + let b = i64x4::new( + 1902594917407971969, + 7038774598204297904, + 1354840157561429239, + 9153650925323248775, + ); + let r = i64x4::new(-69752906595470, 0, -7240468610764767136, 0); + + assert_eq!(r, transmute(lasx_xvsrarn_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarn_b_h() { + let a = i16x16::new( + 30268, -30574, -1837, 13767, -29475, -25587, -27160, 25225, 4600, 30417, 28, -6434, + -6579, 16114, -5281, -15339, + ); + let b = i16x16::new( + -29433, 6019, 25218, 19636, -20124, 25723, 21788, 20831, 32007, 16431, -14025, 1630, + -8234, 9749, 12924, 11326, + ); + let r = i64x4::new(142413695971000447, 0, -141179869986524, 0); + + assert_eq!(r, transmute(lasx_xvssrarn_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarn_h_w() { + let a = i32x8::new( + 170943894, + -1558232070, + 1056252926, + -626239215, + -1035289292, + -1714887456, + 869374752, + 1218167748, + ); + let b = i32x8::new( + -541237538, + -280182861, + 655685335, + 1285042104, + -1042547864, + -1616713045, + 901223026, + -913984956, + ); + let r = i64x4::new(-10414028872220672, 0, 9223104806137135104, 0); + + assert_eq!(r, transmute(lasx_xvssrarn_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarn_w_d() { + let a = i64x4::new( + -7095223716985142210, + -1864464750390939278, + 3939082291268576295, + 652125571964745491, + ); + let b = i64x4::new( + -3290989318705091519, + -1709619047887212993, + 6583279263353400787, + -8657326507673774559, + ); + let r = i64x4::new(2147483648, 0, 326062786704572415, 0); + + assert_eq!(r, transmute(lasx_xvssrarn_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarn_bu_h() { + let a = u16x16::new( + 26678, 38033, 32719, 23307, 55563, 49876, 43497, 48918, 15082, 47368, 47490, 13865, + 14066, 28158, 29325, 39432, + ); + let b = u16x16::new( + 14063, 62353, 26936, 63778, 59375, 39648, 62782, 47347, 52496, 47247, 21846, 59427, + 51935, 24463, 38090, 55890, + ); + let r = i64x4::new(4286578689, 0, 8163878114427135, 0); + + assert_eq!(r, transmute(lasx_xvssrarn_bu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarn_hu_w() { + let a = u32x8::new( + 2720431924, 4147079016, 3167137960, 1370790237, 4041948877, 3496440502, 1072767482, + 2933895593, + ); + let b = u32x8::new( + 747428871, 338187819, 2081920183, 3557659142, 2646673999, 138734404, 3410962197, + 3574237192, + ); + let r = i64x4::new(-281474976710656, 0, 2199023255552, 0); + + assert_eq!(r, transmute(lasx_xvssrarn_hu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarn_wu_d() { + let a = u64x4::new( + 6490501207978917237, + 8209259321665773339, + 14187940483119607818, + 18034167934937299566, + ); + let b = u64x4::new( + 16181569100899671009, + 7894668117654109960, + 16341906792341189640, + 4752425178296070145, + ); + let r = i64x4::new(-3539373509, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvssrarn_wu_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrln_b_h() { + let a = i16x16::new( + -8859, -11711, 4363, -9439, -25357, 1884, 29173, -24389, 21528, -30451, -30750, -2629, + -22379, -10965, 22026, 4187, + ); + let b = i16x16::new( + 21400, -30654, 29959, 14320, 6060, -24401, -522, -8436, 27927, -10967, 11921, 19837, + 3224, 2334, 27694, -1779, + ); + let r = i64x4::new(776589499955319005, 0, 285495199351976, 0); + + assert_eq!(r, transmute(lasx_xvsrln_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrln_h_w() { + let a = i32x8::new( + -741337180, + -1087033752, + 1206017450, + -177254878, + -1655113328, + -889941782, + -267978430, + 1844637616, + ); + let b = i32x8::new( + 196728630, + -568667475, + -273820408, + -1204576979, + -639636375, + 889717098, + 93317070, + -1535736032, + ); + let r = i64x4::new(-6090306652816735409, 0, -1175228277373752196, 0); + + assert_eq!(r, transmute(lasx_xvsrln_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrln_w_d() { + let a = i64x4::new( + -9145728687467639594, + 8409501532987558867, + 4702360266572413762, + -3159959081500746646, + ); + let b = i64x4::new( + 8658043654634750665, + -5736940948870912859, + -8385798465328465883, + -3467766742630042131, + ); + let r = i64x4::new(262796920316080678, 0, 1866060245111069, 0); + + assert_eq!(r, transmute(lasx_xvsrln_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrln_bu_h() { + let a = u16x16::new( + 11222, 49369, 51083, 11755, 50527, 33895, 45751, 48397, 60912, 8893, 53498, 37814, + 34588, 16791, 58737, 47927, + ); + let b = u16x16::new( + 44696, 19424, 49640, 20286, 46891, 46704, 50673, 49527, 19154, 6152, 25954, 33988, + 37143, 16014, 63839, 56839, + ); + let r = i64x4::new(-996419305685, 0, -71773920038018305, 0); + + assert_eq!(r, transmute(lasx_xvssrln_bu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrln_hu_w() { + let a = u32x8::new( + 2345037823, 2695836952, 4130802340, 2404297034, 295813801, 2039155670, 3495629229, + 1556296817, + ); + let b = u32x8::new( + 294807188, 58363281, 19412242, 562851868, 1581507437, 3738447960, 1843096024, 195940565, + ); + let r = i64x4::new(2319476961249468, 0, 208855326080470286, 0); + + assert_eq!(r, transmute(lasx_xvssrln_hu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrln_wu_d() { + let a = u64x4::new( + 1202535702403380748, + 15707874870216391550, + 13668879554311196884, + 12302928023198114227, + ); + let b = u64x4::new( + 1500625420116916625, + 18438653662202195541, + 12192242821332678016, + 6891738943843097628, + ); + let r = i64x4::new(-1, 0, -1, 0); + + assert_eq!(r, transmute(lasx_xvssrln_wu_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlrn_b_h() { + let a = i16x16::new( + -12342, 30454, 25730, 6015, 26316, -10548, -7973, -11903, 14548, -7939, 27317, -22987, + -25067, -26999, 30994, -21757, + ); + let b = i16x16::new( + 31424, 29919, 27640, 2377, -27671, 6812, -24773, -17881, -24476, -13065, 24935, 4284, + 4227, 20246, -28660, -22488, + ); + let r = i64x4::new(-6693460433276960310, 0, -6122543899663285619, 0); + + assert_eq!(r, transmute(lasx_xvsrlrn_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlrn_h_w() { + let a = i32x8::new( + 48275673, + 2044228048, + 2011304917, + 727641203, + 711821092, + 1084745670, + -1100065176, + 1918073576, + ); + let b = i32x8::new( + -609574414, + 559467902, + -1150013148, + -2027938157, + -294433871, + -690493396, + 1585922176, + 1450222536, + ); + let r = i64x4::new(390723813551243448, 0, 6015496732136052023, 0); + + assert_eq!(r, transmute(lasx_xvsrlrn_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlrn_w_d() { + let a = i64x4::new( + -2014408193554501338, + -6765353383424633305, + 5967977535334656496, + 3402886661353956602, + ); + let b = i64x4::new( + 5950007641993014960, + 2150696278963909567, + -4878722002685010440, + 7186750387494925249, + ); + let r = i64x4::new(4295025675, 0, -3281590872273059757, 0); + + assert_eq!(r, transmute(lasx_xvsrlrn_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrn_bu_h() { + let a = u16x16::new( + 4000, 26692, 55377, 5068, 29863, 20111, 65511, 27422, 7702, 63753, 34415, 139, 25413, + 7385, 60703, 6991, + ); + let b = u16x16::new( + 60293, 44656, 25351, 5858, 32033, 34410, 41111, 15552, 22567, 60279, 27841, 635, 63102, + 61738, 21315, 12439, + ); + let r = i64x4::new(-258385232527491, 0, 4034951496335359804, 0); + + assert_eq!(r, transmute(lasx_xvssrlrn_bu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrn_hu_w() { + let a = u32x8::new( + 1512713352, 3525452897, 3680819492, 4269631286, 1077814176, 4243464555, 472893356, + 2300045605, + ); + let b = u32x8::new( + 677817847, 3453937427, 172488718, 1972766946, 1046876255, 486725940, 1920931524, + 3626282368, + ); + let r = i64x4::new(-3854303052, 0, -4029743103, 0); + + assert_eq!(r, transmute(lasx_xvssrlrn_hu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrn_wu_d() { + let a = u64x4::new( + 4599848732973711922, + 15463958724268349352, + 4237045593978887151, + 9203743234400791071, + ); + let b = u64x4::new( + 15971018346755767904, + 235976279705162838, + 15093271767346221587, + 12421981949945891560, + ); + let r = i64x4::new(-3223981555, 0, 35952127557763071, 0); + + assert_eq!(r, transmute(lasx_xvssrlrn_wu_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrstpi_b() { + let a = i8x32::new( + -16, -22, -111, -51, 76, 5, -7, -91, 99, -21, 88, -22, 39, 49, 5, -92, 64, -124, 62, + 98, 108, -72, 96, -71, 50, 121, -20, -59, 69, 86, -45, -4, + ); + let b = i8x32::new( + 34, 105, -73, 60, 0, 99, -75, -90, -92, -86, 97, 72, 28, -72, 89, 120, 9, -116, 91, 83, + -104, 9, -13, -69, -74, 11, 0, -65, -1, -29, -117, -97, + ); + let r = i64x4::new( + -6487147960825943312, + -6627837229100635390, + -5088864803284417472, + -228744298392422143, + ); + + assert_eq!( + r, + transmute(lasx_xvfrstpi_b::<24>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrstpi_h() { + let a = i16x16::new( + 20931, 3906, -9803, -1590, 13500, -5932, 24528, -5092, 5805, 13930, 18709, -29274, + -4438, -28349, -16792, -12293, + ); + let b = i16x16::new( + 25543, -11013, -16650, -29925, 4461, 18433, 13374, 9428, 26865, -4164, -13533, -10962, + -8190, -12396, 472, 9930, + ); + let r = i64x4::new( + -447545208418971197, + -1433165230546602820, + -8239898463019854163, + -3459962532381069654, + ); + + assert_eq!( + r, + transmute(lasx_xvfrstpi_h::<10>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrstp_b() { + let a = i8x32::new( + -104, -22, 61, 22, 9, -98, -4, 16, 115, -71, 58, 60, -74, 82, 83, 120, 120, -76, 92, + -20, 37, 35, -57, -10, 47, -90, -97, -3, 27, -117, 77, 75, + ); + let b = i8x32::new( + 29, 125, -59, -37, -90, 2, -50, -85, -72, 9, 38, 58, -122, 62, 66, -25, 27, 108, -84, + 1, -6, 9, -62, 80, 77, 16, 68, 121, -110, -117, -33, 90, + ); + let c = i8x32::new( + 122, -19, -9, 106, -21, 115, -78, 36, -91, -76, 31, -109, -81, -42, 64, 54, -42, 104, + -10, 41, 36, -38, 119, 49, -46, 79, -83, 96, -51, 113, -126, 105, + ); + let r = i64x4::new( + 1224026960602983064, + 8670364650262673779, + -719974344639597448, + 5426146078386791983, + ); + + assert_eq!( + r, + transmute(lasx_xvfrstp_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrstp_h() { + let a = i16x16::new( + -9233, 24063, -20305, -23399, -22605, 11453, -986, -31974, 19489, -22401, -5866, + -32108, -8271, 27096, -1449, -1571, + ); + let b = i16x16::new( + -27552, -7496, 14541, 20848, -24250, -18305, -23029, -15273, -2721, -22998, 32468, + 11610, -23627, -30946, 1373, -6292, + ); + let c = i16x16::new( + -14010, 12802, 15942, 32257, 32320, 28150, 20653, -9131, 4498, -8203, 4826, 11234, + -20272, 17945, -15074, 28179, + ); + let r = i64x4::new( + -6586038712809825297, + -8999880904595888205, + -9037598549398827999, + -441921935067521103, + ); + + assert_eq!( + r, + transmute(lasx_xvfrstp_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvshuf4i_d() { + let a = i64x4::new( + -8852874241090285557, + -6166977094442369600, + 3546810114463111685, + 2862787957781039790, + ); + let b = i64x4::new( + 7077230945960720129, + -5857643695380455375, + -8499609572374301387, + 9199878426816461564, + ); + let r = i64x4::new( + -5857643695380455375, + -8852874241090285557, + 9199878426816461564, + 3546810114463111685, + ); + + assert_eq!( + r, + transmute(lasx_xvshuf4i_d::<115>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbsrl_v() { + let a = i8x32::new( + 79, 63, 116, -13, 32, -126, 102, -10, -64, 71, -81, -118, -128, -14, 21, 13, 75, 38, 6, + 30, -2, 62, 83, 84, 37, -74, -123, 97, -18, -91, -74, 122, + ); + let r = i64x4::new( + -691722414719746225, + 942926330900465600, + 6076269583399265867, + 8842437361645499941, + ); + + assert_eq!(r, transmute(lasx_xvbsrl_v::<0>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvbsll_v() { + let a = i8x32::new( + -101, -112, 50, 67, 51, 4, 101, -35, 34, 44, 17, -5, -113, 12, 52, 63, -61, 11, -55, + 12, -55, 6, -98, -116, -104, -58, -93, -35, -18, 109, -49, 69, + ); + let r = i64x4::new( + -2493582200462471013, + 4554278935710477346, + -8314200401506661437, + 5030360181484275352, + ); + + assert_eq!(r, transmute(lasx_xvbsll_v::<0>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvextrins_b() { + let a = i8x32::new( + 17, -80, 64, 44, -72, 82, -2, 38, -55, -73, 25, 31, 4, -29, -17, -48, 104, -21, -34, + -20, -21, 70, -35, 46, 99, -119, -21, 1, -57, -91, -18, 20, + ); + let b = i8x32::new( + -77, -46, -33, 123, 16, 123, -111, 58, 36, -70, 57, -6, -59, 45, -77, -82, -98, -91, + -44, -27, -123, 108, -117, 80, 118, -39, -48, -95, 85, -53, 92, 73, + ); + let r = i64x4::new( + 2809773906502660113, + -3391242387545540663, + 3376932729242184552, + 1508325199364983139, + ); + + assert_eq!( + r, + transmute(lasx_xvextrins_b::<69>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvextrins_h() { + let a = i16x16::new( + -10446, -20013, -2609, -3677, 25411, -15077, 11399, 31407, -25336, 8187, 17545, 4284, + 14539, -25105, -16568, -899, + ); + let b = i16x16::new( + -17598, -13358, 1810, -11305, -19139, 20824, 10197, 16587, 27552, -14288, 10157, + -25428, -25392, -10580, -28041, 20313, + ); + let r = i64x4::new( + 2870470609909045042, + 8840333555190686531, + -7892764466205713144, + -252835685454628661, + ); + + assert_eq!( + r, + transmute(lasx_xvextrins_h::<190>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvextrins_w() { + let a = i32x8::new( + 538640697, + -1247440870, + 2006632382, + -1215324238, + -1411224161, + -1343292937, + -407107379, + -1849972197, + ); + let b = i32x8::new( + 1928001842, + 817819193, + -1886180706, + -2057556111, + -1558391607, + 1824082297, + -341759024, + 147045346, + ); + let r = i64x4::new( + -5357717739525968327, + -5219777854239488066, + -5769399231538706055, + -7945570080736409395, + ); + + assert_eq!( + r, + transmute(lasx_xvextrins_w::<133>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvextrins_d() { + let a = i64x4::new( + -7415577103741432638, + 9028147385060226899, + 3806483413885303329, + -8139040440396540849, + ); + let b = i64x4::new( + -7025567873801693340, + 8074885789654734557, + -9150208635842546941, + -6790202101278745327, + ); + let r = i64x4::new( + -7415577103741432638, + -7025567873801693340, + 3806483413885303329, + -9150208635842546941, + ); + + assert_eq!( + r, + transmute(lasx_xvextrins_d::<210>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmskltz_b() { + let a = i8x32::new( + 123, 97, -46, 106, -84, -121, 69, 50, 76, -32, -42, 117, -89, 121, 85, 101, 103, 26, + -117, 20, -90, 44, 126, -128, -120, 12, -28, -18, 45, 77, 45, -59, + ); + let r = i64x4::new(5684, 0, 36244, 0); + + assert_eq!(r, transmute(lasx_xvmskltz_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmskltz_h() { + let a = i16x16::new( + -9300, 15427, 23501, 8110, 29557, -8385, -18123, -869, 19048, 30280, 32130, 6792, 3533, + -19264, -7144, 21429, + ); + let r = i64x4::new(225, 0, 96, 0); + + assert_eq!(r, transmute(lasx_xvmskltz_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmskltz_w() { + let a = i32x8::new( + -1225647162, + 786607282, + -476336095, + -591696091, + 1992561919, + -832745020, + 1971757146, + -1595190261, + ); + let r = i64x4::new(13, 0, 10, 0); + + assert_eq!(r, transmute(lasx_xvmskltz_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmskltz_d() { + let a = i64x4::new( + 1070935900765754723, + 8590124656098588796, + 2469446778159209649, + 5778474674811894997, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvmskltz_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsigncov_b() { + let a = i8x32::new( + 88, -3, -96, 121, 86, -94, 40, 5, -55, -8, 84, 31, -93, -72, -28, 58, -87, 56, 8, 94, + 97, -72, 116, 71, 73, -21, -109, 123, 81, 125, 24, -23, + ); + let b = i8x32::new( + 92, -37, 80, 100, 79, -105, -24, 16, -113, -66, -48, 32, 107, 11, -100, -43, 7, 99, 24, + 38, 84, -40, 55, -73, -112, 84, 59, -88, -102, 83, -65, 87, + ); + let r = i64x4::new( + 1218339488916317532, + -3070059025110384015, + -5244678899168156679, + -6215157037026399088, + ); + + assert_eq!(r, transmute(lasx_xvsigncov_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsigncov_h() { + let a = i16x16::new( + 14096, 7677, -14561, -21692, 19661, -15938, 19461, 3041, -31532, 19690, -2669, -20964, + -23817, -21867, 16694, -15396, + ); + let b = i16x16::new( + -15034, -7726, 181, 30057, -22414, -21472, 21361, 4765, -12995, -32566, 7068, -18429, + -22953, -7497, 14762, -10184, + ); + let r = i64x4::new( + -8460012673615870650, + 1341320010229917810, + 5187553466109276867, + 2866604565619890601, + ); + + assert_eq!(r, transmute(lasx_xvsigncov_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsigncov_w() { + let a = i32x8::new( + -1256172687, + 1338321047, + 354406336, + -462763275, + 187721986, + -940691165, + -1179299422, + -1424929206, + ); + let b = i32x8::new( + -118338197, + 331139357, + 644951541, + -1931633026, + -3454036, + -520396646, + 1909538523, + 41991994, + ); + let r = i64x4::new( + 1422232708851806869, + 8296300675188469237, + 2235086579809602476, + -180354238538399451, + ); + + assert_eq!(r, transmute(lasx_xvsigncov_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsigncov_d() { + let a = i64x4::new( + 3750427451628106019, + -1382697069711266350, + -503292598450220754, + -2919664281580184898, + ); + let b = i64x4::new( + -1642478899758371170, + 4653675866380276086, + -6612106063359352920, + -293290471183495768, + ); + let r = i64x4::new( + -1642478899758371170, + -4653675866380276086, + 6612106063359352920, + 293290471183495768, + ); + + assert_eq!(r, transmute(lasx_xvsigncov_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmadd_s() { + let a = u32x8::new( + 1062320727, 1052840336, 1056978973, 1021320864, 1047491708, 1057181752, 1065099904, + 1057641824, + ); + let b = u32x8::new( + 1031536608, 1056182872, 1060915258, 1049713234, 1050950720, 1059791774, 1059318083, + 1051234082, + ); + let c = u32x8::new( + 1061252634, 1060194113, 1034936984, 1061661636, 1060064922, 1006614016, 1059417135, + 1050039034, + ); + let r = i64x4::new( + 4566451999453631823, + 4560361667101758314, + 4518113787508851321, + 4535521032267853298, + ); + + assert_eq!( + r, + transmute(lasx_xvfmadd_s(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmadd_d() { + let a = u64x4::new( + 4602842753634531585, + 4595402401334175048, + 4601214875019142940, + 4604030967498454410, + ); + let b = u64x4::new( + 4598948128295145186, + 4601733706721520294, + 4603769303486824150, + 4604117155996961650, + ); + let c = u64x4::new( + 4580452284864657312, + 4600663302047027414, + 4606609389472923777, + 4596161355449103520, + ); + let r = i64x4::new( + 4595235980529776159, + 4602058356150948088, + 4608067122875931060, + 4603786516863404306, + ); + + assert_eq!( + r, + transmute(lasx_xvfmadd_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmsub_s() { + let a = u32x8::new( + 1053706718, 1064190592, 1065194002, 1049204796, 1058065270, 1054990514, 1052198782, + 1061344475, + ); + let b = u32x8::new( + 1052072326, 1062946662, 1062413428, 1054564788, 1064477491, 1062331484, 1058685254, + 1048115308, + ); + let c = u32x8::new( + 1051545776, 1052538894, 1034162080, 1012676672, 1042769032, 1060397176, 1036487208, + 1047947488, + ); + let r = i64x4::new( + 4529410253708099330, + 4454144102220210572, + -4706385850068449532, + -4799792193244875572, + ); + + assert_eq!( + r, + transmute(lasx_xvfmsub_s(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfmsub_d() { + let a = u64x4::new( + 4600920645370262278, + 4606351881217070920, + 4605318237650453082, + 4606278590304909259, + ); + let b = u64x4::new( + 4587150424227513280, + 4605394922115166652, + 4600659107885415374, + 4603309679459912257, + ); + let c = u64x4::new( + 4599568550479871818, + 4607122878168983077, + 4594751414351299244, + 4606268515473003992, + ); + let r = i64x4::new( + -4624155064942819898, + -4624913073348173037, + 4594667261719455656, + -4622752308912416305, + ); + + assert_eq!( + r, + transmute(lasx_xvfmsub_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfnmadd_s() { + let a = u32x8::new( + 1039663832, 1061072453, 1059429769, 1055008244, 1064943875, 1031669664, 1057273263, + 1059384715, + ); + let b = u32x8::new( + 1048864374, 1058998841, 1057533884, 1058902812, 1062707313, 1041334952, 1042897040, + 1049077472, + ); + let c = u32x8::new( + 1059665677, 1057796240, 1060649005, 1032551792, 1054598086, 1052603136, 1052306030, + 1040847308, + ); + let r = i64x4::new( + -4647271481419416743, + -4706804117592845625, + -4701205915483756606, + -4711770517136945317, + ); + + assert_eq!( + r, + transmute(lasx_xvfnmadd_s(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfnmadd_d() { + let a = u64x4::new( + 4604608697786889945, + 4602612366462296312, + 4601635234875928748, + 4605244074506891174, + ); + let b = u64x4::new( + 4589783027170388200, + 4605787546878420832, + 4591185942485517728, + 4604114400983891746, + ); + let c = u64x4::new( + 4606499207929193159, + 4602090155238640016, + 4605981237511158859, + 4603473909221104351, + ); + let r = i64x4::new( + -4616415827217001188, + -4617209466841496233, + -4617030428660783542, + -4615713336403701073, + ); + + assert_eq!( + r, + transmute(lasx_xvfnmadd_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfnmsub_s() { + let a = u32x8::new( + 1064224098, 1059043256, 1061588698, 1059572349, 1061959798, 1042453224, 1036562968, + 1056461556, + ); + let b = u32x8::new( + 1061205590, 1049560178, 1059192066, 1061005027, 1054917726, 1061034231, 1058796762, + 1061794461, + ); + let c = u32x8::new( + 1025067264, 1063481799, 1058824148, 1061822410, 1057397992, 1059256144, 1059389703, + 1052234474, + ); + let r = i64x4::new( + 4555061808459295114, + 4511379579414633985, + 4540975425961318277, + -4846656492652873586, + ); + + assert_eq!( + r, + transmute(lasx_xvfnmsub_s(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfnmsub_d() { + let a = u64x4::new( + 4585643461608569024, + 4605011746261589541, + 4602843862374894962, + 4596919096453581616, + ); + let b = u64x4::new( + 4603616678040017345, + 4599749349009999872, + 4603258706135001603, + 4603783118222515934, + ); + let c = u64x4::new( + 4605444602262387771, + 4593682097024038340, + 4599004459823205548, + 4595599337151422272, + ); + let r = i64x4::new( + 4605237590347011909, + -4629492016214849095, + 4570217977506301115, + 4586582751878211231, + ); + + assert_eq!( + r, + transmute(lasx_xvfnmsub_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrne_w_s() { + let a = u32x8::new( + 1064249874, 1024076480, 1048811302, 1045498088, 1062853975, 1050962974, 1062155621, + 1062916560, + ); + let r = i64x4::new(1, 0, 1, 4294967297); + + assert_eq!(r, transmute(lasx_xvftintrne_w_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrne_l_d() { + let a = u64x4::new( + 4591358556337662184, + 4604590073262881231, + 4606169601365380521, + 4596710878897869904, + ); + let r = i64x4::new(0, 1, 1, 0); + + assert_eq!(r, transmute(lasx_xvftintrne_l_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrp_w_s() { + let a = u32x8::new( + 1036136200, 1059809120, 1051167120, 1057100667, 1042968648, 1063707411, 1063195788, + 1061888439, + ); + let r = i64x4::new(4294967297, 4294967297, 4294967297, 4294967297); + + assert_eq!(r, transmute(lasx_xvftintrp_w_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrp_l_d() { + let a = u64x4::new( + 4585505041718488768, + 4601087510575360504, + 4599806583262831052, + 4595165936320641380, + ); + let r = i64x4::new(1, 1, 1, 1); + + assert_eq!(r, transmute(lasx_xvftintrp_l_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrm_w_s() { + let a = u32x8::new( + 1057789434, 1054177120, 1060875884, 1015620960, 1056089726, 1050746790, 1022621568, + 1056386214, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrm_w_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrm_l_d() { + let a = u64x4::new( + 4603222821759326038, + 4603232821889844771, + 4606305215983768062, + 4597476035020392948, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrm_l_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftint_w_d() { + let a = u64x4::new( + 4590993770331821784, + 4601838197892262822, + 4578381772647210176, + 4602974423286505396, + ); + let b = u64x4::new( + 4598764447835256340, + 4585609299219476064, + 4605520309365062132, + 4604323432136071446, + ); + let r = i64x4::new(0, 0, 4294967297, 4294967296); + + assert_eq!(r, transmute(lasx_xvftint_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvffint_s_l() { + let a = i64x4::new( + -4594969696763236122, + -6690984686308779928, + 4592510749553568480, + -8490928078748263946, + ); + let b = i64x4::new( + 7654740714754719601, + 4897940113865969438, + 5957877121068211806, + -7012236593339611923, + ); + let r = i64x4::new( + 6811678997581428276, + -2397684876741504398, + -2395175097567191741, + -2383622820954443903, + ); + + assert_eq!(r, transmute(lasx_xvffint_s_l(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrz_w_d() { + let a = u64x4::new( + 4596886727296090208, + 4602058111141126830, + 4582692816602031424, + 4600921050551730962, + ); + let b = u64x4::new( + 4594050684390877628, + 4605818316975650567, + 4606490477487570572, + 4599704434038566766, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrz_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrp_w_d() { + let a = u64x4::new( + 4589404978031986168, + 4606941481982333029, + 4594924203912769356, + 4597184562267174648, + ); + let b = u64x4::new( + 4604805957576412467, + 4605348751714663856, + 4603064242276236026, + 4597541345541924472, + ); + let r = i64x4::new(4294967297, 4294967297, 4294967297, 4294967297); + + assert_eq!(r, transmute(lasx_xvftintrp_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrm_w_d() { + let a = u64x4::new( + 4606666486099429909, + 4601456430561276036, + 4591400719822715992, + 4601150269438174040, + ); + let b = u64x4::new( + 4601898131328640396, + 4603752803994862807, + 4602971578268526784, + 4607166074459830797, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrm_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrne_w_d() { + let a = u64x4::new( + 4603578020825687150, + 4602331063342270938, + 4607074154698712999, + 4606049262608662240, + ); + let b = u64x4::new( + 4604303573618654118, + 4605305650790770757, + 4594624155139674016, + 4597424226611516804, + ); + let r = i64x4::new(4294967297, 1, 0, 4294967297); + + assert_eq!( + r, + transmute(lasx_xvftintrne_w_d(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftinth_l_s() { + let a = u32x8::new( + 1060793948, 1047845056, 1008256256, 1062225417, 1052160478, 1061682279, 1017836000, + 1061679812, + ); + let r = i64x4::new(0, 1, 0, 1); + + assert_eq!(r, transmute(lasx_xvftinth_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintl_l_s() { + let a = u32x8::new( + 1049069272, 1055517436, 1058463365, 1060600954, 1053028452, 1058398899, 1062375625, + 1064635140, + ); + let r = i64x4::new(0, 0, 0, 1); + + assert_eq!(r, transmute(lasx_xvftintl_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvffinth_d_w() { + let a = i32x8::new( + -158173087, + -27800957, + 1158068870, + 278371207, + 106487733, + -1801338365, + -1891310322, + -527557220, + ); + let r = i64x4::new( + 4742644100887478272, + 4733449902607040512, + -4477652498412208128, + -4485741486683455488, + ); + + assert_eq!(r, transmute(lasx_xvffinth_d_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvffintl_d_w() { + let a = i32x8::new( + -1977997193, + -1979528264, + 836984862, + -201390618, + 1072540196, + -288815065, + -387961600, + -174426466, + ); + let r = i64x4::new( + -4477288907322425344, + -4477282485545205760, + 4742280327634878464, + -4489746915386195968, + ); + + assert_eq!(r, transmute(lasx_xvffintl_d_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrzh_l_s() { + let a = u32x8::new( + 1056351604, 1063464564, 1064583750, 1057296352, 1041896748, 1045603520, 1056628952, + 1057862380, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrzh_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrzl_l_s() { + let a = u32x8::new( + 1037928632, 1054629686, 1054996640, 1060820265, 1056507210, 1065161891, 1061180536, + 1053528304, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrzl_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrph_l_s() { + let a = u32x8::new( + 1059417377, 1040833844, 1045894588, 1063338397, 1056670958, 1064221427, 1042275464, + 1040737828, + ); + let r = i64x4::new(1, 1, 1, 1); + + assert_eq!(r, transmute(lasx_xvftintrph_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrpl_l_s() { + let a = u32x8::new( + 1050993336, 1043212320, 1055353974, 1052104546, 1049173258, 1052001038, 1062670733, + 1064792601, + ); + let r = i64x4::new(1, 1, 1, 1); + + assert_eq!(r, transmute(lasx_xvftintrpl_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrmh_l_s() { + let a = u32x8::new( + 1050100898, 1059826813, 1064587005, 1060468211, 1054982654, 1058930731, 1048352436, + 1059136196, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrmh_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrml_l_s() { + let a = u32x8::new( + 1064932806, 1062327525, 1041996288, 1056298428, 1055943822, 1051470160, 1059582897, + 1054164774, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvftintrml_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrneh_l_s() { + let a = u32x8::new( + 1064823377, 1059036914, 1061655628, 1036637816, 1061056914, 1057581036, 1048480136, + 1057425421, + ); + let r = i64x4::new(1, 0, 0, 1); + + assert_eq!(r, transmute(lasx_xvftintrneh_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvftintrnel_l_s() { + let a = u32x8::new( + 1051117486, 1064733813, 1057650292, 1054601720, 1060065354, 1042171252, 1055495904, + 1060965253, + ); + let r = i64x4::new(0, 1, 1, 0); + + assert_eq!(r, transmute(lasx_xvftintrnel_l_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrintrne_s() { + let a = u32x8::new( + 1042191636, 1057149553, 1054208692, 1059070307, 1043946500, 1058368204, 1065187361, + 1055502338, + ); + let r = i64x4::new( + 4575657221408423936, + 4575657221408423936, + 4575657221408423936, + 1065353216, + ); + + assert_eq!(r, transmute(lasx_xvfrintrne_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrintrne_d() { + let a = u64x4::new( + 4595948761324680740, + 4599917619990044612, + 4603982357523822254, + 4602664966963180606, + ); + let r = i64x4::new(0, 0, 4607182418800017408, 0); + + assert_eq!(r, transmute(lasx_xvfrintrne_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrintrz_s() { + let a = u32x8::new( + 1058076241, 1061463006, 1057120056, 1053378848, 1048357040, 1060603738, 1014341632, + 1064059317, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfrintrz_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrintrz_d() { + let a = u64x4::new( + 4601618692275492658, + 4600007493587145094, + 4605876890989719085, + 4600499427656278116, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfrintrz_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrintrp_s() { + let a = u32x8::new( + 1061637682, 1060303004, 1048139028, 1064254459, 1060496485, 1063015260, 1050062098, + 1060031891, + ); + let r = i64x4::new( + 4575657222473777152, + 4575657222473777152, + 4575657222473777152, + 4575657222473777152, + ); + + assert_eq!(r, transmute(lasx_xvfrintrp_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrintrp_d() { + let a = u64x4::new( + 4596277205079353652, + 4602920367780564368, + 4605931026619472063, + 4600342272679781386, + ); + let r = i64x4::new( + 4607182418800017408, + 4607182418800017408, + 4607182418800017408, + 4607182418800017408, + ); + + assert_eq!(r, transmute(lasx_xvfrintrp_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrintrm_s() { + let a = u32x8::new( + 1052396158, 1055096688, 1056860582, 1050315636, 1062873063, 1057089721, 1060819485, + 1031018704, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfrintrm_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfrintrm_d() { + let a = u64x4::new( + 4593814259274657568, + 4602367426014166064, + 4595326936223928604, + 4605375676692406871, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfrintrm_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvld() { + let a: [i8; 32] = [ + 86, 26, -5, 19, -6, -100, -44, 108, -106, 70, -118, 126, 31, -112, -39, -11, -120, -25, + -62, -45, 43, 83, 3, -116, 87, -28, -69, -91, -68, -126, -96, -88, + ]; + let r = i64x4::new( + 7842065449049856598, + -731394999529617770, + -8357745035768043640, + -6295888532317936553, + ); + + assert_eq!(r, transmute(lasx_xvld::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvst() { + let a = i8x32::new( + 88, 98, -23, 115, 114, -11, 37, 91, -109, 37, -83, 109, -95, -96, -38, 5, -13, 112, + 113, -80, 90, -37, -112, -76, 57, -113, -52, -109, -125, -124, -52, -18, + ); + let mut o: [i8; 32] = [ + 52, -18, -107, -17, 53, 34, 71, -16, 7, -75, -38, -105, -114, 37, 36, 62, -91, 104, 87, + 85, 74, -94, -53, -98, -77, -7, -17, 107, -9, -78, -64, -68, + ]; + let r = i64x4::new( + 6567925503509488216, + 421826130302805395, + -5435603567682424589, + -1239470096778490055, + ); + + lasx_xvst::<0>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvstelm_b() { + let a = i8x32::new( + -5, -21, 65, 59, 32, 48, -6, 103, 97, 7, 43, -113, -102, 30, -32, -75, 71, 80, 71, -83, + 73, -113, -77, 110, -111, -85, 8, 101, -41, 127, -20, 92, + ); + let mut o: [i8; 32] = [ + -29, -20, -68, -24, 64, 3, -46, 0, -51, -114, 2, 12, 120, -127, -52, 114, -102, -91, + -118, 57, 124, 0, -68, -77, -33, 18, -124, -23, -108, 127, -65, -18, + ]; + let r = i64x4::new( + 59113322426723335, + 8272128968170311373, + -5495516911757515366, + -1243134694581333281, + ); + + lasx_xvstelm_b::<0, 9>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvstelm_h() { + let a = i16x16::new( + -11648, -19047, -15513, 1973, 24885, -9476, 7637, 28480, 13018, 7333, -12654, 16215, + 26055, 26861, -1163, 20219, + ); + let mut o: [i8; 32] = [ + 23, 88, -111, 29, 32, 115, 1, -69, 82, 35, 2, 27, 44, -48, 117, -60, 88, 72, 106, -42, + 73, 79, 56, -63, 58, 55, -84, -49, 124, 26, -123, 64, + ]; + let r = i64x4::new( + -4971565931868119595, + -4290294182150266030, + -4523778647145166760, + 4649151313692342074, + ); + + lasx_xvstelm_h::<0, 6>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvstelm_w() { + let a = i32x8::new( + -1636077495, + -1913212378, + 402520069, + 1598923340, + -615956201, + -719313542, + -1002278595, + -1955360887, + ); + let mut o: [i8; 32] = [ + -111, 55, 4, 18, 52, 121, -113, 36, -50, 17, -101, 124, -119, -45, -16, 64, 57, -59, + -31, 29, -24, 92, 56, -72, 60, 90, 23, -26, -15, -40, -18, 75, + ]; + let r = i64x4::new( + 2634457572879213132, + 4679472600292463054, + -5172282020031511239, + 5471549130760739388, + ); + + lasx_xvstelm_w::<0, 3>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvstelm_d() { + let a = i64x4::new( + -7526664681033668234, + 9215683190885160466, + -7392730922884510993, + 8273081902285331784, + ); + let mut o: [i8; 32] = [ + -19, -84, 7, -70, 72, -73, -100, -123, 14, -16, 82, 9, -66, -78, -112, -3, 124, 110, + 103, -66, -1, 109, 69, 70, 103, 8, -6, 99, -125, -94, 100, -56, + ]; + let r = i64x4::new( + -7526664681033668234, + -175443856197488626, + 5063574301226528380, + -4006899083251152793, + ); + + lasx_xvstelm_d::<0, 0>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvinsve0_w() { + let a = i32x8::new( + -1106154721, + 634412656, + -1100544436, + -1769767887, + -1012647261, + 2136829593, + 1072879419, + -1993022923, + ); + let b = i32x8::new( + -2041359214, + -474600924, + 276373021, + 687517976, + -1931658504, + 392817806, + -1316466623, + 736368242, + ); + let r = i64x4::new( + 2724781612877310751, + -7601095192981600692, + -8767571060235945309, + -8559968273390446789, + ); + + assert_eq!( + r, + transmute(lasx_xvinsve0_w::<5>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvinsve0_d() { + let a = i64x4::new( + -3740248607430046939, + 1767794107206960110, + -9137064168958473066, + -7852825851844941424, + ); + let b = i64x4::new( + 431855113748835185, + 3288039304988384340, + -5708126726787922006, + 4289161164888851504, + ); + let r = i64x4::new( + -3740248607430046939, + 1767794107206960110, + -9137064168958473066, + 431855113748835185, + ); + + assert_eq!( + r, + transmute(lasx_xvinsve0_d::<3>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickve_w() { + let a = i32x8::new( + -1564826515, + -458927896, + 1138467779, + 1659848021, + -885088458, + -737326650, + -47750787, + -414548426, + ); + let r = i64x4::new(1138467779, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvpickve_w::<2>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickve_d() { + let a = i64x4::new( + 8402618222187512066, + -7057900739934826301, + -6839567064019939265, + 8714541331515896284, + ); + let r = i64x4::new(8402618222187512066, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvpickve_d::<0>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrn_b_h() { + let a = i16x16::new( + -798, 1398, -623, -4797, -18857, 26443, 16384, -16263, 21881, -27973, -23498, -9777, + 26657, -16754, 19690, 951, + ); + let b = i16x16::new( + -3568, 18618, 18284, -20348, 30931, -13978, -28022, 30586, 8502, -29737, 27777, 2457, + -24560, 7519, 9137, 13151, + ); + let r = i64x4::new(3463408299017240959, 0, 35748968851799935, 0); + + assert_eq!(r, transmute(lasx_xvssrlrn_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrn_h_w() { + let a = i32x8::new( + -709437285, + 1569944173, + 840839991, + 1276120983, + -1380474679, + 1717565103, + 1662438257, + 41628460, + ); + let b = i32x8::new( + 1222449199, + -859865335, + -1646420307, + 2051326847, + -1328302771, + -2115559725, + 275103578, + 95546356, + ); + let r = i64x4::new(422210317549567, 0, 11259106657337343, 0); + + assert_eq!(r, transmute(lasx_xvssrlrn_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrn_w_d() { + let a = i64x4::new( + 6389812745870818755, + 8763001741694997752, + -1562866978917178065, + 9133752987191586761, + ); + let b = i64x4::new( + -7467566672980641247, + -2330366242646492110, + 7828472137399229278, + 5811058912891800907, + ); + let r = i64x4::new(33428474336875, 0, 9223372034707292159, 0); + + assert_eq!(r, transmute(lasx_xvssrlrn_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrln_b_h() { + let a = i16x16::new( + 1623, -14920, 1170, 12351, -25346, 8330, 32675, 4619, -31613, -16397, 9976, -5234, + 20684, 31015, -27130, 426, + ); + let b = i16x16::new( + 20578, -6736, -13719, -3491, 28139, 17968, -30166, 24185, -29828, 6212, 17476, 15478, + -21520, -14119, -3397, 14549, + ); + let r = i64x4::new(657383790217428863, 0, 941881790371430152, 0); + + assert_eq!(r, transmute(lasx_xvssrln_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrln_h_w() { + let a = i32x8::new( + -1842464126, + -1331342000, + -1187112242, + 453446042, + 960156121, + -1968872136, + -603223901, + -1134334019, + ); + let b = i32x8::new( + -592357508, + 969628508, + 2062627988, + -1366484086, + -1901031633, + 1742501272, + -1277076789, + 2022930291, + ); + let r = i64x4::new(9223103287866884105, 0, 1696871892814295669, 0); + + assert_eq!(r, transmute(lasx_xvssrln_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrln_w_d() { + let a = i64x4::new( + 6056280160463852946, + 3937140140114293823, + -6849002485680852776, + 8030598250493987596, + ); + let b = i64x4::new( + 7030461610430840286, + 3499193251729970464, + 1325445643267409553, + -1126160333119085812, + ); + let r = i64x4::new(3937140138060021759, 0, 9223372034707292159, 0); + + assert_eq!(r, transmute(lasx_xvssrln_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvorn_v() { + let a = i8x32::new( + -112, -60, -62, -15, 46, 34, 52, -37, 122, -78, -19, 95, -80, -17, -47, -38, 49, -4, + -92, -111, 17, 38, 13, -58, -51, -39, -94, -58, -123, -32, 27, -12, + ); + let b = i8x32::new( + 79, -128, 107, 13, 36, -50, 69, -31, 63, 17, -79, 95, -58, 12, 0, 94, -33, -112, -46, + 80, 57, 78, 40, 71, -44, 127, 1, 41, -79, -109, -55, 5, + ); + let r = i64x4::new( + -2324363183275966544, + -288230676800471302, + -81144131007676623, + -126121887133672977, + ); + + assert_eq!(r, transmute(lasx_xvorn_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvldi() { + let r = i64x4::new( + -1679332213128, + -1679332213128, + -1679332213128, + -1679332213128, + ); + + assert_eq!(r, transmute(lasx_xvldi::<2680>())); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvldx() { + let a: [i8; 32] = [ + 108, -99, 50, 65, 4, -113, -105, 42, 11, 14, 121, -66, -35, -37, -126, -77, -17, 83, + -77, 28, -33, -105, -107, 20, 119, 103, 51, 7, -108, 37, -15, -93, + ]; + let r = i64x4::new( + 3069078919512759660, + -5511601248518205941, + 1483258636803462127, + -6633479458433833097, + ); + + assert_eq!(r, transmute(lasx_xvldx(a.as_ptr(), 0))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvstx() { + let a = i8x32::new( + -124, -113, -93, 99, -114, 45, -113, 30, 80, -29, 126, 12, -88, -106, -117, -12, 63, + -56, -65, -120, -128, -93, -97, 117, -23, 30, -14, -37, 30, -3, 60, -58, + ); + let mut o: [i8; 32] = [ + 31, -103, -100, 104, 70, 123, -86, -93, -10, 88, 2, 88, 45, -4, 120, -23, -4, 71, -56, + 100, 122, -46, 113, 113, -106, -127, -49, 31, -4, -85, 85, -37, + ]; + let r = i64x4::new( + 2202028832387731332, + -825400458184039600, + 8475672796179974207, + -4162173646616256791, + ); + + lasx_xvstx(transmute(a), o.as_mut_ptr(), 0); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvextl_qu_du() { + let a = u64x4::new( + 13363392893058409879, + 13062266778638186908, + 4121325568380818738, + 16525525054189099432, + ); + let r = i64x4::new(-5083351180651141737, 0, 4121325568380818738, 0); + + assert_eq!(r, transmute(lasx_xvextl_qu_du(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvinsgr2vr_w() { + let a = i32x8::new( + 37894851, + 6792754, + -1258538001, + -1755752185, + 45667801, + 270850755, + -1397420984, + -643296765, + ); + let r = i64x4::new( + 29174656317668035, + -7540898211419112465, + 1163295138520418131, + -2762938564400051128, + ); + + assert_eq!( + r, + transmute(lasx_xvinsgr2vr_w::<4>(transmute(a), -596457645)) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvinsgr2vr_d() { + let a = i64x4::new( + -8759780246633869569, + 7376911929131157332, + 8748197595361481626, + 15419583081814202, + ); + let r = i64x4::new( + -8759780246633869569, + 7376911929131157332, + 8748197595361481626, + -1262509914, + ); + + assert_eq!( + r, + transmute(lasx_xvinsgr2vr_d::<3>(transmute(a), -1262509914)) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplve0_b() { + let a = i8x32::new( + 48, -8, -123, 35, -50, -64, 25, -100, -19, -112, 93, 46, -80, 59, 28, 42, -47, -52, 18, + -55, 50, -48, -25, -127, 97, 19, 71, -24, -71, -21, -114, -110, + ); + let r = i64x4::new( + 3472328296227680304, + 3472328296227680304, + 3472328296227680304, + 3472328296227680304, + ); + + assert_eq!(r, transmute(lasx_xvreplve0_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplve0_h() { + let a = i16x16::new( + 412, 15338, 12582, -13132, -4679, 11713, 23076, 26826, 14471, -7190, 2282, 29936, + 25689, 11463, -14855, 18183, + ); + let r = i64x4::new( + 115969459958317468, + 115969459958317468, + 115969459958317468, + 115969459958317468, + ); + + assert_eq!(r, transmute(lasx_xvreplve0_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplve0_w() { + let a = i32x8::new( + 1243734577, + 1718395406, + -1635863561, + 863207308, + 71140354, + 1238191531, + -785900261, + -1886172704, + ); + let r = i64x4::new( + 5341799334363128369, + 5341799334363128369, + 5341799334363128369, + 5341799334363128369, + ); + + assert_eq!(r, transmute(lasx_xvreplve0_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplve0_d() { + let a = i64x4::new( + -7669512117913941619, + 3607794435492173678, + 6416911432565038933, + 7089802970627232981, + ); + let r = i64x4::new( + -7669512117913941619, + -7669512117913941619, + -7669512117913941619, + -7669512117913941619, + ); + + assert_eq!(r, transmute(lasx_xvreplve0_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvreplve0_q() { + let a = i8x32::new( + 38, -64, -93, 68, 35, 91, 48, -77, 11, -127, -113, -96, -101, 2, -106, -104, 66, 3, + -45, 82, 95, 100, -99, 112, -127, 125, 100, 20, 17, -9, 77, -6, + ); + let r = i64x4::new( + -5534823735004774362, + -7451765666000961269, + -5534823735004774362, + -7451765666000961269, + ); + + assert_eq!(r, transmute(lasx_xvreplve0_q(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_h_b() { + let a = i8x32::new( + -114, -31, -50, -82, -63, -45, 61, -97, -121, 119, 25, 112, 43, 80, 70, 86, -80, 101, + 109, -126, 58, 103, 8, -108, 124, -29, 93, -96, 26, -11, -63, 58, + ); + let r = i64x4::new( + -22799683568926834, + -27302806455844927, + 31525304773640071, + 24207148650070059, + ); + + assert_eq!(r, transmute(lasx_vext2xv_h_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_w_h() { + let a = i16x16::new( + 24818, 30826, -26283, -18137, -18647, -30298, 9378, -8000, 3374, -6396, 3703, 19569, + 25155, 17959, 16236, 26635, + ); + let r = i64x4::new( + 132396661891314, + -77893526906539, + -130124624185559, + -34359738358622, + ); + + assert_eq!(r, transmute(lasx_vext2xv_w_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_d_w() { + let a = i32x8::new( + -585251458, + -2113345963, + -1846838006, + -474453663, + -1394782646, + 229470412, + 1572845627, + -904846098, + ); + let r = i64x4::new(-585251458, -2113345963, -1846838006, -474453663); + + assert_eq!(r, transmute(lasx_vext2xv_d_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_w_b() { + let a = i8x32::new( + 36, -56, 126, -123, -107, 6, 4, -114, -114, 112, -98, -14, 4, -112, 83, -33, 94, -20, + -123, 85, -34, -65, -73, -33, -84, -29, 9, 42, -76, -59, -84, -18, + ); + let r = i64x4::new(-240518168540, -528280977282, 30064770965, -489626271740); + + assert_eq!(r, transmute(lasx_vext2xv_w_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_d_h() { + let a = i16x16::new( + 28568, -25911, 12053, -2728, -19449, -11747, -4351, 8975, -18854, 29749, -13852, 32702, + 6750, 21089, -15985, 20408, + ); + let r = i64x4::new(28568, -25911, 12053, -2728); + + assert_eq!(r, transmute(lasx_vext2xv_d_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_d_b() { + let a = i8x32::new( + 18, 112, -36, -67, -20, 76, -103, -91, -114, 14, -121, 115, 35, -36, -123, 13, -107, + -52, 82, 36, 90, 43, -21, 13, -61, -84, 21, -59, 59, -116, -79, -65, + ); + let r = i64x4::new(18, 112, -36, -67); + + assert_eq!(r, transmute(lasx_vext2xv_d_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_hu_bu() { + let a = i8x32::new( + 38, -47, -21, -14, 36, 120, -8, -12, 76, 36, 42, 41, -54, 103, 93, 60, -6, -1, 68, -86, + 49, 60, 6, -17, -118, -56, -71, 7, 1, 79, 68, 95, + ); + let r = i64x4::new( + 68117953694990374, + 68680959477153828, + 11540654436122700, + 16888898041348298, + ); + + assert_eq!(r, transmute(lasx_vext2xv_hu_bu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_wu_hu() { + let a = i16x16::new( + -31465, -19962, 4074, 27214, -1117, 19026, -8469, -13109, 19316, 5127, 15001, -32657, + 4699, 24472, 1480, -18381, + ); + let r = i64x4::new( + 195738839581975, + 116883239997418, + 81716047838115, + 225172250484459, + ); + + assert_eq!(r, transmute(lasx_vext2xv_wu_hu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_du_wu() { + let a = i32x8::new( + -267466250, + -936328606, + -1799333696, + 1035808674, + -2072455456, + 239819000, + 1616827243, + 740798354, + ); + let r = i64x4::new(4027501046, 3358638690, 2495633600, 1035808674); + + assert_eq!(r, transmute(lasx_vext2xv_du_wu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_wu_bu() { + let a = i8x32::new( + 54, -26, 32, 112, -121, 62, -95, -28, -103, -110, -103, 110, 127, -48, 101, -81, 35, + -54, -116, 14, -97, 97, -45, 85, -18, 126, 31, 115, -59, 10, -16, -71, + ); + let r = i64x4::new(987842478134, 481036337184, 266287972487, 979252543649); + + assert_eq!(r, transmute(lasx_vext2xv_wu_bu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_du_hu() { + let a = i16x16::new( + -4235, -24126, -30181, 19598, -24220, 19618, -8899, 20393, 31336, -6256, 3392, -18554, + -31864, -32356, -15170, 18814, + ); + let r = i64x4::new(61301, 41410, 35355, 19598); + + assert_eq!(r, transmute(lasx_vext2xv_du_hu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_vext2xv_du_bu() { + let a = i8x32::new( + 69, 25, 36, -52, -55, 23, -66, 10, 23, 74, 121, 113, 82, 22, 49, -96, -124, 46, -78, + 72, -37, 113, 126, -115, 79, -105, -39, -110, -96, 77, -54, -35, + ); + let r = i64x4::new(69, 25, 36, 204); + + assert_eq!(r, transmute(lasx_vext2xv_du_bu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpermi_q() { + let a = i8x32::new( + 53, 32, -81, -96, 38, -39, 42, -111, -82, -104, -58, 101, 92, -89, -77, 71, -121, -110, + -125, -48, 97, 91, 90, -120, 44, -98, -107, 3, -85, 64, -45, -14, + ); + let b = i8x32::new( + 23, -5, 51, 85, 46, -5, -102, 2, -73, -121, 18, -2, 113, -122, -117, -20, -47, 84, 117, + -17, -21, -78, -91, 69, 6, 34, -115, 73, -21, 9, -36, 92, + ); + let r = i64x4::new( + 5018614086178788561, + 6691234052521665030, + -8621478060979154297, + -949343993201320404, + ); + + assert_eq!( + r, + transmute(lasx_xvpermi_q::<49>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpermi_d() { + let a = i64x4::new( + 539162827834580224, + 7362188367992869351, + 1609032298240495217, + 1788653247091024267, + ); + let r = i64x4::new( + 7362188367992869351, + 1609032298240495217, + 539162827834580224, + 1609032298240495217, + ); + + assert_eq!(r, transmute(lasx_xvpermi_d::<137>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvperm_w() { + let a = i32x8::new( + -708303872, + -376964930, + -1808535729, + -2054828055, + 71139817, + -306901690, + -1914618818, + -1977032311, + ); + let b = i32x8::new( + 1288050919, 621948080, 1756136778, 1515604090, 408174564, 1809111645, 451808315, + 1595060072, + ); + let r = i64x4::new( + -3042141963552235127, + -7767601807216087217, + -1318132721565990423, + -3042141963630030871, + ); + + assert_eq!(r, transmute(lasx_xvperm_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvldrepl_b() { + let a: [i8; 32] = [ + -37, -75, -9, 68, 120, 101, -40, 41, -16, -103, 89, 95, 83, 50, -109, 30, 72, -8, 21, + -41, -5, -67, -60, -85, 111, 105, 122, -69, -33, -5, 118, -114, + ]; + let r = i64x4::new( + -2604246222170760229, + -2604246222170760229, + -2604246222170760229, + -2604246222170760229, + ); + + assert_eq!(r, transmute(lasx_xvldrepl_b::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvldrepl_h() { + let a: [i8; 32] = [ + 9, 11, -106, 72, -118, -25, 63, -96, -91, -77, -71, 41, -74, -21, -12, 79, -78, -66, + -20, -66, 5, -116, -88, 0, 7, -59, 7, 36, -83, -122, -42, -71, + ]; + let r = i64x4::new( + 795178942675356425, + 795178942675356425, + 795178942675356425, + 795178942675356425, + ); + + assert_eq!(r, transmute(lasx_xvldrepl_h::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvldrepl_w() { + let a: [i8; 32] = [ + 42, 19, -74, -120, -24, 115, 114, 79, 108, 51, 109, 64, -123, 115, 4, 60, -127, 78, + -103, 44, 28, 14, 75, 19, 126, 86, -22, -55, -66, 32, -11, 112, + ]; + let r = i64x4::new( + -8595661765386824918, + -8595661765386824918, + -8595661765386824918, + -8595661765386824918, + ); + + assert_eq!(r, transmute(lasx_xvldrepl_w::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvldrepl_d() { + let a: [i8; 32] = [ + -58, -81, 9, -23, -6, 105, 110, 81, 123, -99, -71, 23, 21, 18, 21, -94, 123, 120, -87, + -27, 43, 83, 12, -68, 80, 26, 14, 64, 61, 4, -104, -45, + ]; + let r = i64x4::new( + 5867743890882801606, + 5867743890882801606, + 5867743890882801606, + 5867743890882801606, + ); + + assert_eq!(r, transmute(lasx_xvldrepl_d::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickve2gr_w() { + let a = i32x8::new( + -171617667, + 1234499290, + -496270783, + 916647463, + 1367768596, + -1156952470, + 172419522, + -1633257882, + ); + let r: i32 = 1367768596; + + assert_eq!(r, transmute(lasx_xvpickve2gr_w::<4>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickve2gr_wu() { + let a = i32x8::new( + -547854042, + 1057749415, + -1081569551, + -1895010720, + -1615052351, + -472405371, + 1482004122, + -1099972589, + ); + let r: u32 = 3194994707; + + assert_eq!(r, transmute(lasx_xvpickve2gr_wu::<7>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickve2gr_d() { + let a = i64x4::new( + 5494820280860382649, + -235896250341393106, + 6739870851682505277, + -2213972721378902369, + ); + let r: i64 = 6739870851682505277; + + assert_eq!(r, transmute(lasx_xvpickve2gr_d::<2>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickve2gr_du() { + let a = i64x4::new( + -3274379179178335548, + -1748909263142723978, + -4272175049937479582, + -8920910898336101981, + ); + let r: u64 = 9525833175373449635; + + assert_eq!(r, transmute(lasx_xvpickve2gr_du::<3>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_q_d() { + let a = i64x4::new( + -1487944422194570539, + 6635250509470966842, + -5056614467208325955, + -6125778217946781600, + ); + let b = i64x4::new( + -5984805769944216142, + 5786714665975619996, + -2702111374414975767, + -5035182099645850808, + ); + let r = i64x4::new(-7472750192138786681, -1, -7758725841623301722, -1); + + assert_eq!(r, transmute(lasx_xvaddwev_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_d_w() { + let a = i32x8::new( + 675098803, + -75093512, + -81250247, + -121202336, + -1671001294, + -285443775, + 1247275542, + 1556903730, + ); + let b = i32x8::new( + -60118452, + 780831551, + -1865678894, + -1327225627, + -1638401313, + 1476017431, + -1866352749, + -523966227, + ); + let r = i64x4::new(614980351, -1946929141, -3309402607, -619077207); + + assert_eq!(r, transmute(lasx_xvaddwev_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_w_h() { + let a = i16x16::new( + 22608, -32211, 15906, -27286, -31014, -22869, -2185, 30553, 0, 12445, 343, -20393, + -7421, 12619, -32283, 25803, + ); + let b = i16x16::new( + -922, 25119, -27975, 3966, 7351, -30447, -29386, 20153, -8260, -10355, 15526, -17976, + 30119, 32034, -21917, 30756, + ); + let r = i64x4::new( + -51835960273738, + -135592117558383, + 68161130979260, + -232787227420502, + ); + + assert_eq!(r, transmute(lasx_xvaddwev_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_h_b() { + let a = i8x32::new( + 101, 34, 41, -107, -36, -117, 4, -53, -1, -113, 85, 83, 24, -54, -19, -128, 34, 37, + -45, 11, -78, -60, -13, 10, -97, -34, -128, 8, 88, 107, 65, -45, + ); + let b = i8x32::new( + -117, -119, -45, -12, -81, 85, -5, -43, 118, 117, 123, -107, 55, -109, 18, 96, -89, + -92, -16, -107, 64, 123, 12, -1, 110, 18, -96, 77, -60, -100, -102, -47, + ); + let r = i64x4::new( + -498216402960, + -281135660662667, + -55838507063, + -10414449598922739, + ); + + assert_eq!(r, transmute(lasx_xvaddwev_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_q_du() { + let a = u64x4::new( + 10116771403081209132, + 4409447541453417390, + 898338891308675373, + 2921491360808722992, + ); + let b = u64x4::new( + 13196093984731278668, + 13568223424734996564, + 18446645167103959087, + 1830481894073719508, + ); + let r = i64x4::new(4866121314102936184, 1, 898239984703082844, 1); + + assert_eq!(r, transmute(lasx_xvaddwev_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_d_wu() { + let a = u32x8::new( + 1198556156, 4098846235, 136525854, 1406990253, 2217403106, 390213570, 1993119836, + 1839111140, + ); + let b = u32x8::new( + 2802853372, 1144229232, 3262242038, 3483335391, 3804489865, 583269177, 2356229233, + 699141534, + ); + let r = i64x4::new(4001409528, 3398767892, 6021892971, 4349349069); + + assert_eq!(r, transmute(lasx_xvaddwev_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_w_hu() { + let a = u16x16::new( + 6322, 31121, 27313, 37809, 33019, 46908, 8254, 44176, 58710, 48196, 24711, 20406, + 18042, 38301, 32766, 13444, + ); + let b = u16x16::new( + 14794, 51570, 1750, 49106, 762, 47300, 64778, 26934, 42322, 39382, 42708, 58300, 788, + 59906, 54890, 41392, + ); + let r = i64x4::new( + 124824634544764, + 313670051595253, + 289562400230056, + 376479653317006, + ); + + assert_eq!(r, transmute(lasx_xvaddwev_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_h_bu() { + let a = u8x32::new( + 161, 193, 11, 51, 139, 70, 76, 148, 89, 35, 229, 97, 137, 39, 176, 219, 87, 90, 7, 151, + 124, 135, 127, 143, 231, 76, 225, 208, 193, 51, 197, 27, + ); + let b = u8x32::new( + 60, 218, 230, 194, 245, 20, 179, 100, 21, 163, 236, 184, 84, 87, 122, 61, 25, 209, 185, + 207, 241, 56, 216, 245, 230, 103, 251, 152, 157, 115, 48, 190, + ); + let r = i64x4::new( + 71777768344453341, + 83880492278022254, + 96547484687401072, + 68962872563859917, + ); + + assert_eq!(r, transmute(lasx_xvaddwev_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwev_q_d() { + let a = i64x4::new( + -7742993219420546326, + -101212755683599810, + -6868163898247798277, + -8375244535493076926, + ); + let b = i64x4::new( + 2520168195081268699, + 9108054891736382097, + 6081995959065773172, + -7633503910634037993, + ); + let r = i64x4::new(8183582659207736591, -1, 5496584216395980167, -1); + + assert_eq!(r, transmute(lasx_xvsubwev_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwev_d_w() { + let a = i32x8::new( + -331902539, + -410274173, + 61822184, + -21356706, + -1286351195, + 1770474991, + -682957064, + -1751781451, + ); + let b = i32x8::new( + 1613863191, + 982997422, + -1638727663, + -849407734, + -68285193, + 822007285, + 144325628, + 1766216748, + ); + let r = i64x4::new(-1945765730, 1700549847, -1218066002, -827282692); + + assert_eq!(r, transmute(lasx_xvsubwev_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwev_w_h() { + let a = i16x16::new( + 28743, 20624, 20703, 30472, -4294, 10753, -24932, 2990, 15363, 6155, 32468, -23754, + -2447, 26852, 22688, -14794, + ); + let b = i16x16::new( + 23978, -18333, -16768, 15041, 16101, -22819, -5374, -14505, -14490, -28486, 31912, + -14640, 9360, -7613, -27955, 24096, + ); + let r = i64x4::new( + 160936719553181, + -83996675428267, + 2388001846429, + 217514323726817, + ); + + assert_eq!(r, transmute(lasx_xvsubwev_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwev_h_b() { + let a = i8x32::new( + -15, -3, 45, 48, -83, -44, 39, -105, -84, -28, 100, 105, 92, -27, -25, -10, -66, 81, + -107, 86, -125, 111, 23, -60, -67, -7, -53, 26, 114, -11, -82, -3, + ); + let b = i8x32::new( + -3, -39, 34, -41, 12, -46, 111, -59, 120, -86, -90, -16, -80, 110, 115, -3, 124, 93, + -42, 74, 52, 126, -65, 28, 109, 69, -64, 67, -69, -62, -61, 39, + ); + let r = i64x4::new( + -19985131367563276, + -39405757992599756, + 25050517008809794, + -5910188531122352, + ); + + assert_eq!(r, transmute(lasx_xvsubwev_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwev_q_du() { + let a = u64x4::new( + 4097334132097570986, + 3004224617145960419, + 6567223884870023457, + 342771278501784235, + ); + let b = u64x4::new( + 11278175901218237219, + 17453302179390276683, + 10469031865427428464, + 13567003215182256574, + ); + let r = i64x4::new(-7180841769120666233, -1, -3901807980557405007, -1); + + assert_eq!(r, transmute(lasx_xvsubwev_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwev_d_wu() { + let a = u32x8::new( + 1172933923, 3561590261, 603333963, 754041205, 663327014, 1707091866, 2563659074, + 2321081680, + ); + let b = u32x8::new( + 3703975407, 3067249102, 1688677432, 1970014868, 2563703919, 3474073919, 962829505, + 706481691, + ); + let r = i64x4::new(-2531041484, -1085343469, -1900376905, 1600829569); + + assert_eq!(r, transmute(lasx_xvsubwev_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwev_w_hu() { + let a = u16x16::new( + 59679, 17198, 28545, 44644, 31522, 21827, 19256, 56166, 8797, 57585, 50535, 47800, + 56204, 43584, 6516, 57953, + ); + let b = u16x16::new( + 12708, 41280, 57347, 58871, 47516, 27619, 53764, 58057, 32314, 65212, 64025, 62782, + 47743, 20389, 33764, 7173, + ); + let r = i64x4::new( + -123703648012421, + -148206436499066, + -57934813879261, + -117029268872947, + ); + + assert_eq!(r, transmute(lasx_xvsubwev_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwev_h_bu() { + let a = u8x32::new( + 56, 244, 182, 253, 193, 214, 55, 239, 186, 251, 78, 32, 93, 2, 4, 132, 53, 6, 173, 35, + 84, 227, 58, 79, 196, 41, 163, 128, 246, 219, 120, 87, + ); + let b = u8x32::new( + 90, 193, 215, 114, 199, 50, 46, 90, 225, 253, 111, 26, 28, 238, 131, 245, 47, 87, 30, + 95, 33, 50, 192, 132, 14, 240, 47, 254, 29, 155, 145, 45, + ); + let r = i64x4::new( + 2814728290172894, + -35747038576508967, + -37717427826524154, + -7035942402260810, + ); + + assert_eq!(r, transmute(lasx_xvsubwev_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_q_d() { + let a = i64x4::new( + -683494492458261228, + -5241422472417437680, + 6650370058493421125, + 4779596395103551457, + ); + let b = i64x4::new( + -1623383963768224463, + 6756255500546970238, + -7555682488592816357, + -7648860611106928873, + ); + let r = i64x4::new( + 5539873801618144468, + 60150126978886031, + 3692294931598396487, + -2723954123981949807, + ); + + assert_eq!(r, transmute(lasx_xvmulwev_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_d_w() { + let a = i32x8::new( + 2140792624, + 1544321576, + 1549060875, + -630248052, + -1129263074, + -73878937, + 521128826, + 22556670, + ); + let b = i32x8::new( + -346749156, + 1202859377, + 1486656968, + 370617591, + 1270867102, + -810144613, + 1735249190, + -1555085961, + ); + let r = i64x4::new( + -742318035543025344, + 2302922143674927000, + -1435143290249991548, + 904288373202150940, + ); + + assert_eq!(r, transmute(lasx_xvmulwev_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_w_h() { + let a = i16x16::new( + 14750, -29841, -17709, -8196, 31466, 7862, -25367, -12539, 9353, 10914, -12320, -17148, + -6831, -498, 2288, 29204, + ); + let b = i16x16::new( + -12026, 22388, -5312, 184, 18130, -7473, -25877, 31312, -9813, 24876, 26780, -7436, + -15441, 11581, -22259, 14954, + ); + let r = i64x4::new( + 404028471005501364, + 2819310417355001844, + -1417036837779175293, + -218736636965849761, + ); + + assert_eq!(r, transmute(lasx_xvmulwev_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_h_b() { + let a = i8x32::new( + -32, 93, 5, -3, -61, -113, 57, 15, -19, 95, 84, 13, 85, -84, 23, 37, -74, -33, -40, 52, + 9, -63, 21, 55, 68, -20, -70, -53, 117, 50, -31, 80, + ); + let b = i8x32::new( + 7, 32, 85, -70, -87, -72, -87, 1, 26, -19, -128, 116, -6, -98, -11, -79, -19, 4, 90, + 47, 88, 112, -37, -100, -119, -82, 7, 77, -62, 76, 61, -120, + ); + let r = i64x4::new( + -1395811616088785120, + -70933880974017006, + -218702651231042178, + -532018857412992924, + ); + + assert_eq!(r, transmute(lasx_xvmulwev_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_q_du() { + let a = u64x4::new( + 3072820657428859233, + 11609640493721306675, + 12008349959063387869, + 5948138397283294636, + ); + let b = u64x4::new( + 10527245875383164815, + 7916669328935928828, + 3031495739290315758, + 13060234924687571269, + ); + let r = i64x4::new( + -1534093344768443345, + 1753606948871441014, + -1876472381986713482, + 1973424773030267173, + ); + + assert_eq!(r, transmute(lasx_xvmulwev_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_d_wu() { + let a = u32x8::new( + 2949007290, 703271383, 711423165, 1456866992, 3752229871, 2536591346, 2389736494, + 3966991514, + ); + let b = u32x8::new( + 196315048, 1279932854, 2296087324, 1350671471, 2200714021, 3470805434, 130970026, + 3503786742, + ); + let r = i64x4::new( + 578934507688699920, + 1633489711156460460, + 8257584887124721291, + 312983850752328844, + ); + + assert_eq!(r, transmute(lasx_xvmulwev_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_w_hu() { + let a = u16x16::new( + 47934, 48824, 8863, 27185, 38746, 3540, 44988, 31735, 10219, 30176, 19749, 47625, 9605, + 42752, 51816, 20943, + ); + let b = u16x16::new( + 1352, 35948, 33502, 40543, 34675, 10670, 35261, 56591, 28340, 28503, 7709, 11425, + 35242, 32021, 61306, 37078, + ); + let r = i64x4::new( + 1275297019994103664, + 6813200545333146478, + 653887472362785596, + -4803214827614038190, + ); + + assert_eq!(r, transmute(lasx_xvmulwev_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_h_bu() { + let a = u8x32::new( + 181, 7, 169, 169, 172, 103, 102, 36, 203, 92, 62, 74, 182, 211, 40, 13, 241, 11, 168, + 240, 139, 224, 217, 76, 58, 133, 28, 147, 22, 142, 180, 136, + ); + let b = u8x32::new( + 247, 29, 191, 188, 209, 191, 193, 157, 228, 251, 166, 237, 216, 180, 183, 151, 51, 82, + 28, 3, 146, 77, 65, 127, 70, 150, 194, 49, 235, 0, 88, 29, + ); + let r = i64x4::new( + 5541270789125811875, + 2060565673950885068, + 3970291708878401539, + 4458585836433706972, + ); + + assert_eq!(r, transmute(lasx_xvmulwev_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_q_d() { + let a = i64x4::new( + -4400532975246140561, + 6103963578734860361, + 6538041862964443552, + 9150349465675238484, + ); + let b = i64x4::new( + 8731574776501689511, + 8529056615916614298, + -5177328656834536965, + -8950246356268516094, + ); + let r = i64x4::new(-3813723879058076957, 0, 200103109406722390, 0); + + assert_eq!(r, transmute(lasx_xvaddwod_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_d_w() { + let a = i32x8::new( + 107177346, + 1165229099, + -1855482949, + -1506158220, + -530530472, + -1932018412, + 1027697605, + -653089829, + ); + let b = i32x8::new( + 605852783, + 1977495085, + 71767549, + -1079077108, + -1117877219, + 1146297949, + -89842401, + 1580029832, + ); + let r = i64x4::new(3142724184, -2585235328, -785720463, 926940003); + + assert_eq!(r, transmute(lasx_xvaddwod_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_w_h() { + let a = i16x16::new( + 8333, 3159, -8340, 2860, -10086, -10705, -22151, 9693, -10758, 24078, -6146, -22105, + -9685, -11464, 1434, -10313, + ); + let b = i16x16::new( + 24703, 26602, -11086, -20999, -31901, 27136, 3427, -26885, 13303, 12337, 32133, 9869, + 13049, -11935, 7268, -24263, + ); + let r = i64x4::new( + -77906411752383, + -73839077736401, + -52553219797441, + -148498494282599, + ); + + assert_eq!(r, transmute(lasx_xvaddwod_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_h_b() { + let a = i8x32::new( + 84, -26, 37, -73, 68, -16, -46, 83, -36, 80, -20, 61, 84, -41, 48, 23, 117, 43, -82, + -1, -6, -5, -88, -59, -24, 126, -122, -29, -30, 41, 88, -82, + ); + let b = i8x32::new( + 101, -60, -48, 109, 26, -30, -114, -67, 36, -33, -1, -26, 102, 46, 10, -96, 122, -84, + 121, -64, 14, -41, -110, -120, 7, -54, 69, -95, 24, -112, -75, 47, + ); + let r = i64x4::new( + 4784877038010282, + -20547651822747601, + -50102739132219433, + -9570449863999416, + ); + + assert_eq!(r, transmute(lasx_xvaddwod_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_q_du() { + let a = u64x4::new( + 5678527968265482955, + 15561833412025074700, + 6604122729549136851, + 2064090124976043119, + ); + let b = u64x4::new( + 17348958871868652420, + 3636555885647953059, + 13556112850172780139, + 15106752613120000479, + ); + let r = i64x4::new(751645223963476143, 1, -1275901335613508018, 0); + + assert_eq!(r, transmute(lasx_xvaddwod_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_d_wu() { + let a = u32x8::new( + 1981196003, 503742005, 890731178, 1132725820, 1082789967, 1773388022, 3687035574, + 2761826754, + ); + let b = u32x8::new( + 239559029, 4254142036, 2675411124, 540730773, 3579454499, 389539593, 2282534290, + 2381309647, + ); + let r = i64x4::new(4757884041, 1673456593, 2162927615, 5143136401); + + assert_eq!(r, transmute(lasx_xvaddwod_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_w_hu() { + let a = u16x16::new( + 2281, 18176, 25719, 13571, 60992, 4744, 29330, 13668, 8334, 51018, 34330, 25476, 39478, + 10512, 18653, 36146, + ); + let b = u16x16::new( + 12509, 23819, 52059, 39413, 59587, 22877, 24693, 50088, 16716, 29478, 46962, 20510, + 63245, 56365, 48918, 21693, + ); + let r = i64x4::new( + 227564547253259, + 273829934951397, + 197508366154352, + 248416613500221, + ); + + assert_eq!(r, transmute(lasx_xvaddwod_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_h_bu() { + let a = u8x32::new( + 60, 80, 117, 71, 182, 90, 20, 252, 34, 80, 102, 107, 49, 1, 75, 51, 175, 113, 29, 130, + 107, 245, 172, 220, 129, 144, 11, 136, 248, 112, 109, 250, + ); + let b = u8x32::new( + 138, 100, 21, 101, 14, 54, 118, 39, 31, 118, 184, 186, 69, 89, 154, 138, 240, 210, 94, + 39, 11, 71, 157, 238, 181, 78, 88, 102, 165, 50, 235, 48, + ); + let r = i64x4::new( + 81909836709363892, + 53199157164572870, + 128916896554221891, + 83880238860075230, + ); + + assert_eq!(r, transmute(lasx_xvaddwod_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwod_q_d() { + let a = i64x4::new( + -3945435774433072696, + -5580639112190912700, + -8147998114407044390, + -4275535762638580926, + ); + let b = i64x4::new( + 4407006886911950173, + -7345495209927165189, + -2920599937444079395, + 6487551432709971357, + ); + let r = i64x4::new(1764856097736252489, 0, 7683656878360999333, -1); + + assert_eq!(r, transmute(lasx_xvsubwod_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwod_d_w() { + let a = i32x8::new( + 1480945437, + -383133422, + -450202465, + -1667474532, + 425467038, + 483856367, + 397851792, + 2047398851, + ); + let b = i32x8::new( + -1994579383, + 576791476, + -807849214, + -1675047435, + 1888930513, + -1622703443, + 1826948151, + -1929022406, + ); + let r = i64x4::new(-959924898, 7572903, 2106559810, 3976421257); + + assert_eq!(r, transmute(lasx_xvsubwod_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwod_w_h() { + let a = i16x16::new( + 17856, 7337, -32600, -17170, 20316, -23074, 3419, 31841, -19556, 25126, 32449, -4845, + -4101, -15325, -15552, -29507, + ); + let b = i16x16::new( + -5321, -4306, 7409, -32016, -5351, 21871, 12529, 25151, -16361, 17466, 24705, 14901, + -30601, 20878, 16678, -25393, + ); + let r = i64x4::new( + 63763084488059, + 28737626132591, + -84808424219156, + -17665200524651, + ); + + assert_eq!(r, transmute(lasx_xvsubwod_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwod_h_b() { + let a = i8x32::new( + 18, -21, -84, 117, -114, 12, 106, -85, -51, -119, -70, -63, 118, -92, 124, 114, -40, + -12, 116, 97, 61, 0, 121, 33, 123, 85, 26, -89, 30, 99, 21, 25, + ); + let b = i8x32::new( + 23, 122, -99, -17, -36, -51, -64, 99, 20, -7, 85, 1, 65, -15, -45, 43, -82, 77, 103, + 57, -10, 27, 105, -78, 78, 69, 75, 65, 94, -116, 22, 39, + ); + let r = i64x4::new( + -51791125122973839, + 20265871901523856, + 31525081430163367, + -3939721971105776, + ); + + assert_eq!(r, transmute(lasx_xvsubwod_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwod_q_du() { + let a = u64x4::new( + 14173893774454482457, + 3810444305251451895, + 11573438380633440776, + 14010021571042449665, + ); + let b = u64x4::new( + 3850106411190823856, + 9879970351878579373, + 18286343935048656427, + 15814090293156005950, + ); + let r = i64x4::new(-6069526046627127478, -1, -1804068722113556285, -1); + + assert_eq!(r, transmute(lasx_xvsubwod_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwod_d_wu() { + let a = u32x8::new( + 3407590693, 1202785013, 1220235957, 847407948, 1753366487, 1588252312, 949725107, + 660365194, + ); + let b = u32x8::new( + 3894489434, 440627342, 2074663244, 1619627426, 1047192238, 3243399158, 5736380, + 2062766786, + ); + let r = i64x4::new(762157671, -772219478, -1655146846, -1402401592); + + assert_eq!(r, transmute(lasx_xvsubwod_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwod_w_hu() { + let a = u16x16::new( + 5666, 61402, 18774, 63704, 5634, 763, 10164, 61056, 3316, 2644, 36526, 37166, 39369, + 62637, 25134, 63401, + ); + let b = u16x16::new( + 42490, 58823, 51099, 26297, 14231, 33107, 29618, 35846, 40233, 15170, 7280, 21532, + 43600, 42150, 29384, 25015, + ); + let r = i64x4::new( + 160661841644051, + 108280420467112, + 67151813660434, + 164866614644743, + ); + + assert_eq!(r, transmute(lasx_xvsubwod_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsubwod_h_bu() { + let a = u8x32::new( + 52, 64, 145, 201, 179, 240, 245, 105, 232, 134, 159, 238, 112, 26, 116, 151, 98, 187, + 75, 8, 123, 231, 244, 249, 2, 61, 252, 18, 221, 229, 97, 180, + ); + let b = u8x32::new( + 161, 161, 97, 228, 198, 212, 5, 77, 243, 42, 221, 12, 112, 20, 43, 195, 186, 156, 232, + 81, 76, 136, 175, 151, 238, 192, 18, 14, 227, 58, 213, 181, + ); + let r = i64x4::new( + 7881423900245919, + -12384873190653860, + 27584960029720607, + -280740536975491, + ); + + assert_eq!(r, transmute(lasx_xvsubwod_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_q_d() { + let a = i64x4::new( + -4810434630060465465, + 4688732257687902806, + -4456839103181700987, + -8917453762606400882, + ); + let b = i64x4::new( + 6208173123158669961, + -127816522776177372, + 1052866109299034740, + 233879409784875239, + ); + let r = i64x4::new( + -5178962405540445672, + -32487980047399636, + -4213378220890601950, + -113061080830775254, + ); + + assert_eq!(r, transmute(lasx_xvmulwod_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_d_w() { + let a = i32x8::new( + -2055655783, + -830862243, + -847861086, + -336854390, + -1217543653, + -1512465773, + -1029760180, + 696500116, + ); + let b = i32x8::new( + 1867516505, + -867512649, + 533129786, + 1783687399, + -1192533976, + 1399910380, + -1289839662, + -1915471625, + ); + let r = i64x4::new( + 720783505379011707, + -600842930740831610, + -2117316535017423740, + -1334126209007208500, + ); + + assert_eq!(r, transmute(lasx_xvmulwod_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_w_h() { + let a = i16x16::new( + -11721, 24971, -11669, 16270, -6825, 11583, 26517, -2001, -9346, -14979, 6799, -913, + 32665, 19801, 21245, 3779, + ); + let b = i16x16::new( + -22224, -12256, 16952, -4627, -11217, 527, 18001, -14755, -27194, 17253, -12454, + -27169, 32549, 32431, 24685, 20780, + ); + let r = i64x4::new( + -323330674561769120, + 126807857153516721, + 106537943419101521, + 337273560374881751, + ); + + assert_eq!(r, transmute(lasx_xvmulwod_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_h_b() { + let a = i8x32::new( + 95, 23, -127, -44, -50, -2, -107, -94, 28, -90, 111, -51, -6, 84, -14, 63, 28, 31, + -120, 33, -68, -22, 49, 85, -42, 36, -99, -60, 119, -39, 55, -81, + ); + let b = i8x32::new( + -76, -123, 85, -8, 61, 68, -54, 35, 75, 25, -10, 41, -88, 30, 106, 13, -47, 51, 14, 52, + -61, 53, -114, -91, -69, 3, -27, -105, -56, 89, -97, 35, + ); + let r = i64x4::new( + -925771782493768461, + 230538833401607990, + -2176932477699619283, + -797714991416606612, + ); + + assert_eq!(r, transmute(lasx_xvmulwod_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_q_du() { + let a = u64x4::new( + 7091632338891003648, + 3739044658401562681, + 17715177360220060439, + 15881729055260995184, + ); + let b = u64x4::new( + 3957896596496566926, + 14072319404382751448, + 8435476695188152907, + 13452684919273724788, + ); + let r = i64x4::new( + 6176011447065373208, + 2852374949748893805, + 5535184026733238976, + -6864651532066967840, + ); + + assert_eq!(r, transmute(lasx_xvmulwod_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_d_wu() { + let a = u32x8::new( + 2766740249, 1667577703, 3569036313, 1579235215, 3396253061, 2456107502, 1991409426, + 75424938, + ); + let b = u32x8::new( + 3618661585, 2352411935, 3028582487, 1023986068, 3092028317, 3835802450, 3486468402, + 2263667528, + ); + let r = i64x4::new( + 3922829691077085305, + 1617114858254984620, + -9025600900074571716, + 170736982952013264, + ); + + assert_eq!(r, transmute(lasx_xvmulwod_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_w_hu() { + let a = u16x16::new( + 55236, 28771, 53988, 52341, 33854, 22292, 10394, 61333, 4522, 48545, 32239, 37616, + 60335, 27122, 32053, 14922, + ); + let b = u16x16::new( + 64490, 59642, 2029, 25643, 55072, 32592, 44282, 23992, 17266, 4336, 3878, 44058, 48161, + 63520, 51113, 10126, + ); + let r = i64x4::new( + 5764620336637638830, + 6320050114866848320, + 7117988002098042608, + 648970298882764352, + ); + + assert_eq!(r, transmute(lasx_xvmulwod_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_h_bu() { + let a = u8x32::new( + 34, 239, 30, 169, 91, 195, 107, 97, 212, 207, 110, 55, 238, 210, 149, 21, 238, 150, 4, + 49, 158, 137, 81, 246, 145, 164, 238, 229, 151, 250, 105, 19, + ); + let b = u8x32::new( + 109, 186, 165, 193, 216, 121, 71, 232, 9, 233, 215, 188, 234, 112, 250, 183, 159, 61, + 140, 67, 64, 225, 148, 142, 58, 178, 120, 106, 37, 216, 186, 161, + ); + let r = i64x4::new( + 6334414217787583910, + 1081809353807543399, + -8614127794670853186, + 861263883582730760, + ); + + assert_eq!(r, transmute(lasx_xvmulwod_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_d_wu_w() { + let a = u32x8::new( + 1465537318, 1382340624, 1603365560, 1355400303, 145165353, 3595116789, 4194509835, + 314900647, + ); + let b = i32x8::new( + -2079155596, + -637150629, + -1781445929, + -2000249885, + 1523945572, + -1514431741, + -1149336021, + 1501805778, + ); + let r = i64x4::new(-613618278, -178080369, 1669110925, 3045173814); + + assert_eq!( + r, + transmute(lasx_xvaddwev_d_wu_w(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_w_hu_h() { + let a = u16x16::new( + 748, 28718, 22726, 4135, 23777, 12746, 33222, 13229, 5619, 33293, 48512, 19489, 24736, + 5690, 53405, 55687, + ); + let b = i16x16::new( + 8622, -30951, -14339, -27770, -7815, -8146, 31809, -9126, -16637, 3437, 23015, 376, + -964, 9550, -5336, -25533, + ); + let r = i64x4::new( + 36021890720922, + 279306018242138, + 307210420737270, + 206454782975196, + ); + + assert_eq!( + r, + transmute(lasx_xvaddwev_w_hu_h(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_h_bu_b() { + let a = u8x32::new( + 88, 218, 182, 176, 220, 158, 136, 109, 143, 78, 151, 35, 3, 38, 106, 192, 31, 178, 127, + 52, 28, 247, 210, 133, 22, 228, 225, 177, 65, 2, 28, 171, + ); + let b = i8x32::new( + -1, 67, 111, 96, 125, 14, -82, -67, -93, -127, 85, -72, 20, -47, 83, -13, -87, -111, + 27, -75, 125, 39, 93, 89, 25, 66, -76, -14, -52, -50, 43, -81, + ); + let r = i64x4::new( + 15201130525294679, + 53198869398028338, + 85287575083483080, + 19984779190796335, + ); + + assert_eq!( + r, + transmute(lasx_xvaddwev_h_bu_b(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_d_wu_w() { + let a = u32x8::new( + 1117566668, 2171866262, 3863150800, 2917715295, 3911708395, 1228484642, 2321269874, + 4261467450, + ); + let b = i32x8::new( + 298065186, + 1000727430, + -1974818719, + -2115019739, + 1124007321, + 786270369, + -898501534, + 600072896, + ); + let r = i64x4::new( + 333107716764820248, + -7629022514159825200, + 4396788873597159795, + -2085664542616986716, + ); + + assert_eq!( + r, + transmute(lasx_xvmulwev_d_wu_w(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_w_hu_h() { + let a = u16x16::new( + 22502, 13622, 44730, 46411, 64382, 64178, 62884, 38859, 27367, 39034, 18915, 47916, + 24716, 55834, 5119, 58864, + ); + let b = i16x16::new( + 21292, -10920, 292, 28750, -26856, 28754, -1172, -21835, 20852, -32278, -12338, 25813, + -10142, -19321, -22247, 30137, + ); + let r = i64x4::new( + 56097255526935944, + -316539293307705904, + -1002330561839921236, + -489121149480921704, + ); + + assert_eq!( + r, + transmute(lasx_xvmulwev_w_hu_h(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_h_bu_b() { + let a = u8x32::new( + 64, 87, 43, 223, 59, 110, 8, 116, 204, 242, 108, 218, 63, 128, 143, 210, 147, 184, 202, + 200, 78, 84, 158, 241, 147, 241, 17, 99, 53, 113, 83, 131, + ); + let b = i8x32::new( + 59, 34, 117, 84, 8, -46, -24, -51, 38, -14, -14, 47, -52, 32, -19, -121, 65, 44, 108, + -40, -89, 15, -31, 88, -51, 75, 71, -50, -15, -77, -11, -98, + ); + let r = i64x4::new( + -54041167974166848, + -764500102863118776, + -1378412775185308333, + -256708593179958601, + ); + + assert_eq!( + r, + transmute(lasx_xvmulwev_h_bu_b(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_d_wu_w() { + let a = u32x8::new( + 2842977577, 726151833, 3624948328, 3635170403, 2399571401, 2980175388, 1959530649, + 2789073224, + ); + let b = i32x8::new( + 1477701582, + -1440126406, + -1077662088, + 60551123, + 287903770, + -1406443306, + 1729475940, + 1185250387, + ); + let r = i64x4::new(-713974573, 3695721526, 1573732082, 3974323611); + + assert_eq!( + r, + transmute(lasx_xvaddwod_d_wu_w(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_w_hu_h() { + let a = u16x16::new( + 15858, 62454, 8143, 63292, 12915, 37488, 58571, 3762, 9835, 37317, 31941, 1155, 43404, + 17532, 22889, 49328, + ); + let b = i16x16::new( + -10821, -16732, 3696, -6656, 20270, 19108, -9737, 3921, -19713, 14465, -4985, 8060, + 19692, -13193, -8849, 8523, + ); + let r = i64x4::new( + 243249767821978, + 32998233791764, + 39578123684422, + 248468153045235, + ); + + assert_eq!( + r, + transmute(lasx_xvaddwod_w_hu_h(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_h_bu_b() { + let a = u8x32::new( + 207, 56, 245, 126, 208, 205, 19, 229, 182, 28, 85, 188, 132, 80, 149, 101, 93, 95, 56, + 213, 181, 220, 90, 139, 206, 87, 97, 213, 245, 152, 219, 209, + ); + let b = i8x32::new( + 30, -46, -91, 101, 47, -13, 3, -11, -106, 65, 62, 83, 92, -28, -71, 122, 15, -84, -19, + -97, -128, -82, 28, -105, 111, -73, 119, -25, 7, 76, 54, 72, + ); + let r = i64x4::new( + 61362369571520522, + 62769143162536029, + 9570741921251339, + 79095447720558606, + ); + + assert_eq!( + r, + transmute(lasx_xvaddwod_h_bu_b(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_d_wu_w() { + let a = u32x8::new( + 3988094295, 3678296912, 2524886697, 507830363, 60676336, 2042142864, 911246321, + 2627081751, + ); + let b = i32x8::new( + -1423964992, + -300941917, + -1300830690, + 301547719, + -728801849, + 1812067428, + -1853372246, + 1459690332, + ); + let r = i64x4::new( + -1106953723992460304, + 153135087601591997, + 3700500567177033792, + 3834725833308331332, + ); + + assert_eq!( + r, + transmute(lasx_xvmulwod_d_wu_w(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_w_hu_h() { + let a = u16x16::new( + 22867, 24578, 38420, 43680, 56323, 53684, 33271, 54214, 382, 37378, 51385, 11786, 9873, + 685, 59607, 7054, + ); + let b = i16x16::new( + 14263, 1867, -4762, 7093, 9219, 14229, 23256, -2657, -24665, -648, 14592, -26979, + 12560, 28471, -30607, 30723, + ); + let r = i64x4::new( + 1330676388419350166, + -618675426746189372, + -1365690048421401872, + 930805492797249067, + ); + + assert_eq!( + r, + transmute(lasx_xvmulwod_w_hu_h(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_h_bu_b() { + let a = u8x32::new( + 106, 63, 35, 106, 240, 140, 62, 226, 24, 172, 209, 236, 201, 120, 85, 107, 133, 48, + 166, 220, 124, 12, 206, 73, 77, 93, 122, 44, 170, 245, 79, 125, + ); + let b = i8x32::new( + 49, -59, 51, -69, -83, 90, 118, 66, -127, -31, -92, -123, 22, -96, 127, -91, 103, 27, + 111, -67, 79, 32, 36, 51, -18, -108, -123, -57, -30, 14, -66, -118, + ); + let r = i64x4::new( + 4198534873019773307, + -2740489848885548244, + 1047932990890181904, + -4151741170613692220, + ); + + assert_eq!( + r, + transmute(lasx_xvmulwod_h_bu_b(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhaddw_q_d() { + let a = i64x4::new( + 7195063412416833019, + -7198414538777237107, + 3618874101468146190, + 5075453792844537994, + ); + let b = i64x4::new( + -4177888634615683669, + 159708792916303045, + -493012886919538920, + -3327952250593224264, + ); + let r = i64x4::new(7070440900316630840, -1, 4582440905924999074, 0); + + assert_eq!(r, transmute(lasx_xvhaddw_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhaddw_qu_du() { + let a = u64x4::new( + 14174115972304041760, + 11184692435390355059, + 6036753630285484734, + 16987794702390801127, + ); + let b = u64x4::new( + 919078441558396978, + 520168700921507198, + 13672733098019829533, + 11854214779067813220, + ); + let r = i64x4::new(-6342973196760799579, 0, -6232960347008472572, 1); + + assert_eq!(r, transmute(lasx_xvhaddw_qu_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhsubw_q_d() { + let a = i64x4::new( + 671584889846600733, + 8179701147067091777, + 8820752382384406910, + -8816577614727005023, + ); + let b = i64x4::new( + 2862152648469207935, + 4714581857093657849, + 3474818266521795377, + -2843283552126606269, + ); + let r = i64x4::new(5317548498597883842, 0, 6155348192460751216, -1); + + assert_eq!(r, transmute(lasx_xvhsubw_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvhsubw_qu_du() { + let a = u64x4::new( + 15891261469744917624, + 6124172835044839452, + 13470444488722494141, + 514760401991858000, + ); + let b = u64x4::new( + 6113118953514320833, + 14909065838985392334, + 1730613981074135290, + 11653977149369645375, + ); + let r = i64x4::new(11053881530518619, 0, -1215853579082277290, -1); + + assert_eq!(r, transmute(lasx_xvhsubw_qu_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_q_d() { + let a = i64x4::new( + 6851852253375557634, + -687859074247996461, + -2847020890783636723, + -3396011480229435207, + ); + let b = i64x4::new( + 4881265308617523092, + -6946920457192015262, + 2620975855235645060, + -3109202070840153061, + ); + let c = i64x4::new( + 8576064979838144125, + 4734381367362523796, + 1223742651533162362, + -6069819910741619678, + ); + let r = i64x4::new( + -8703171595748273338, + 1581487120574302805, + 942353693594667509, + -3222137980934690913, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_q_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_d_w() { + let a = i64x4::new( + 4283476221971713520, + 5997311160552489534, + -7461538125080812198, + 584666845411625444, + ); + let b = i32x8::new( + -1699017988, + -1597461813, + 1949179714, + -22329469, + -25282868, + -1833476595, + -712935020, + -1228584225, + ); + let c = i32x8::new( + 1933742369, + -902774021, + 1152039469, + -966950160, + -2014121439, + -847909444, + 205263209, + 533619002, + ); + let r = i64x4::new( + 998013152882979948, + 8242843123254621400, + -7410615358602605146, + 438327515397946264, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_d_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_w_h() { + let a = i32x8::new( + -497197979, + 2128466895, + 1827806706, + -1515704287, + 1900959403, + -10679846, + 1566686168, + -747997169, + ); + let b = i16x16::new( + 13631, 27024, -7774, -32582, 29199, 15396, -401, -17852, 10337, 15890, -26044, 11510, + 10732, 3619, 18520, -7838, + ); + let c = i16x16::new( + 24759, -9415, -26783, -18619, 13757, -17352, 16725, -25610, 14981, 21116, 23650, + -18473, 13862, 20053, 3522, -18723, + ); + let r = i64x4::new( + -8410788748874544018, + -6538705505380766203, + -2691314320519116016, + -2932473655038329632, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_w_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_h_b() { + let a = i16x16::new( + -2623, -5568, -5250, 8004, 12247, 20872, 32727, 17906, -11062, -13097, -29604, 32623, + -13541, 1792, -32288, 28892, + ); + let b = i8x32::new( + -8, 40, -69, 8, -104, 45, -81, 60, -52, -13, -3, -37, 77, 20, 76, -82, -102, 112, 71, + -10, -62, 75, 112, 96, 49, -67, 98, 67, -118, -51, -77, 67, + ); + let c = i8x32::new( + -40, 23, 23, 75, -24, -86, -52, -98, 74, -106, -3, -8, -40, 43, 31, -7, -120, -68, + -122, -119, 103, 59, 49, -2, -77, 113, 119, 80, 101, -6, 116, 33, + ); + let r = i64x4::new( + 3438767965960271617, + 5703373312375201999, + -7719324334317042534, + 5618332147678887006, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_h_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_q_du() { + let a = u64x4::new( + 11023906961007219829, + 13619495672295375563, + 7572980537071490433, + 10145709682911964133, + ); + let b = u64x4::new( + 1145103061481704635, + 2210139848484195129, + 8860436254952346498, + 12573896192036293152, + ); + let c = u64x4::new( + 17650249419725637273, + 9888846271395867734, + 14715851951823475494, + 14739680783109267384, + ); + let r = i64x4::new( + -6602489221663665608, + -3731588670586723767, + 4220731810419531981, + -1232639684640242354, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_q_du( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_d_wu() { + let a = u64x4::new( + 8055198384779363938, + 9925260815913558465, + 6835430604549063591, + 15441192025398831710, + ); + let b = u32x8::new( + 1867493599, 3245935582, 1629087126, 1061202312, 3389402698, 3034357496, 1394979327, + 2925040328, + ); + let c = u32x8::new( + 1765089209, 2899492783, 2529172711, 2742597877, 1149322351, 3557681406, 3462656435, + 2152082771, + ); + let r = i64x4::new( + -7095252889458714487, + -4401240554875374565, + -7715797191809385027, + 1824782095017799339, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_d_wu( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_w_hu() { + let a = u32x8::new( + 4068171594, 513777862, 1662628135, 150786756, 3404482708, 1100545508, 1296617840, + 2568385675, + ); + let b = u16x16::new( + 9976, 32227, 62018, 53049, 21882, 59596, 30529, 48620, 19006, 49187, 50174, 12259, + 3616, 50420, 60433, 40578, + ); + let c = u16x16::new( + 34105, 44006, 33269, 34929, 41783, 55207, 10361, 3583, 20219, 63815, 58487, 18415, + 9646, 27639, 14059, 7949, + ); + let r = i64x4::new( + -7378378399913155454, + 2006169455487925341, + -1116240736353519778, + -3766489066592466128, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_w_hu( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_h_bu() { + let a = u16x16::new( + 54677, 20231, 5485, 25733, 3289, 32970, 11379, 23649, 29852, 32207, 10148, 12942, + 13168, 40138, 12570, 48782, + ); + let b = u8x32::new( + 83, 24, 36, 206, 232, 251, 52, 50, 21, 26, 144, 30, 118, 81, 232, 118, 197, 143, 213, + 244, 155, 125, 186, 64, 225, 178, 192, 14, 230, 216, 201, 105, + ); + let c = u8x32::new( + 66, 75, 68, 238, 158, 103, 71, 149, 162, 2, 116, 125, 70, 2, 36, 29, 7, 16, 38, 243, + 166, 196, 122, 253, 77, 64, 67, 156, 8, 203, 49, 225, + ); + let r = i64x4::new( + 8282582185414224635, + 9007565081835870755, + -8416510656124192257, + -1943522820234774755, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_h_bu( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_q_d() { + let a = i64x4::new( + 9157238656205642393, + -8031082356106754985, + -4372970903210999763, + -8400782536501424126, + ); + let b = i64x4::new( + -2947828926389048030, + 286858961466620958, + -7198913950768528345, + -4558524846284502477, + ); + let c = i64x4::new( + -8966978539573787816, + 5965781064088812819, + 6785842876481166596, + 8957716835940181125, + ); + let r = i64x4::new( + 8877886904970852051, + -7938310550223636451, + -7469536578939176788, + 7832347329765892515, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_q_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_d_w() { + let a = i64x4::new( + 3501241531332783035, + 968696574349111989, + 1223338638204507697, + 5231578199334978816, + ); + let b = i32x8::new( + 1210545902, + 706290701, + -1971714524, + 2103465668, + -305785715, + -218897263, + 280223963, + -838568119, + ); + let c = i32x8::new( + -949605894, + -1724400178, + 172821226, + 2123929230, + -909785648, + 1230257751, + 620207705, + 1402502047, + ); + let r = i64x4::new( + 2283313720808638257, + 5436308790915787629, + 954038583726072184, + 4055484695888539223, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_d_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_w_h() { + let a = i32x8::new( + -598204125, + -531177195, + 1076911560, + 259752194, + -1069455958, + -916568789, + -1193369377, + 1159492541, + ); + let b = i16x16::new( + 10650, -5211, -12808, -28115, -27527, 6937, -16741, 16285, -6142, -7067, -10826, -6660, + -22889, -25629, -3527, -6119, + ); + let c = i16x16::new( + 16852, -6030, -13801, 9261, 24273, 26563, 11733, -28445, 25099, 14402, -23168, -31577, + 25012, 1004, -19731, -30323, + ); + let r = i64x4::new( + -3399682261363746659, + -873916884449488685, + -3033389236009017420, + 5776898425431129891, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_w_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_h_b() { + let a = i16x16::new( + -2829, -7831, 1134, 23799, 31864, -8205, -20884, 2782, -724, -8414, 10611, 31362, + 15971, -25563, 3175, -6328, + ); + let b = i8x32::new( + 112, 116, -120, 74, -42, 25, 1, 19, 51, 102, -40, -73, -28, 14, -45, -57, -17, -77, + -111, -98, -9, 114, -32, -69, 45, -122, -65, 56, -78, 21, 111, -19, + ); + let c = i8x32::new( + 59, 63, -124, -50, -52, 12, 38, 62, 77, -127, 76, -78, 64, -80, -5, 28, 110, -44, -100, + 45, -43, 62, 66, 112, -49, -120, 123, -18, 34, -119, -20, -74, + ); + let r = i64x4::new( + 7030406655824433535, + 334016295025592798, + 6652455533761006184, + -1385416929418315885, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_h_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_q_du() { + let a = u64x4::new( + 1898209592653721751, + 10926860906964806867, + 18361012878168580252, + 14644115162811948975, + ); + let b = u64x4::new( + 1945372576834807415, + 5117230234174825110, + 14390591298317442216, + 9089518245930555118, + ); + let c = u64x4::new( + 17504435078500289086, + 15243444480193333955, + 7810225885258468877, + 13257884975254190749, + ); + let r = i64x4::new( + 1757588433868711129, + -3291266200231780782, + 95766916559772818, + 2730111333315477935, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_q_du( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_d_wu() { + let a = u64x4::new( + 2715769757208659525, + 2216806074012029777, + 6525838187075271506, + 15876394068735907698, + ); + let b = u32x8::new( + 3928005420, 3020795031, 3881759315, 3226709793, 1296481505, 1362116053, 1131484424, + 3814393787, + ); + let c = u32x8::new( + 2745998525, 4219603367, 1735962907, 3082063756, 2410634838, 3360953922, 2094521244, + 1329875844, + ); + let r = i64x4::new( + -2984417432676422714, + -6285012695561959331, + -7342896596084770244, + 2502320151861337310, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_d_wu( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_w_hu() { + let a = u32x8::new( + 2005770472, 418747954, 1467912967, 68663314, 284343496, 1733214400, 2615496661, + 3890476135, + ); + let b = u16x16::new( + 58498, 2430, 4588, 20804, 7171, 26934, 39619, 36043, 59802, 43896, 1388, 64198, 49922, + 4660, 8826, 1254, + ); + let c = u16x16::new( + 17893, 61614, 2263, 35439, 2530, 16965, 34585, 18123, 54862, 61539, 38281, 59547, + 42561, 50393, 65080, 29977, + ); + let r = i64x4::new( + 4965072004097651852, + 3100410633753647765, + 5416148797706570800, + -1575823510936973847, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_w_hu( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_h_bu() { + let a = u16x16::new( + 36194, 9930, 14883, 39417, 2438, 15023, 58620, 33090, 16572, 36810, 21479, 35773, + 33259, 56285, 62068, 46564, + ); + let b = u8x32::new( + 34, 125, 103, 16, 211, 122, 70, 50, 215, 127, 193, 64, 67, 238, 249, 121, 154, 248, 31, + 26, 187, 25, 188, 191, 248, 214, 207, 40, 155, 190, 91, 127, + ); + let c = u8x32::new( + 67, 32, 89, 53, 76, 235, 37, 230, 178, 122, 2, 56, 126, 94, 210, 6, 69, 2, 54, 188, 23, + 253, 185, 113, 97, 190, 149, 34, 20, 7, 214, 32, + ); + let r = i64x4::new( + -4114695625116050174, + -8928319877028035060, + -2302345889489730900, + -4195956656687996737, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_h_bu( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_q_du_d() { + let a = i64x4::new( + 7904206285198314726, + -1225358394899025904, + 5806604712820367446, + -4659173034171397511, + ); + let b = u64x4::new( + 6100446525668817642, + 10688882673264876757, + 1423085255226033079, + 13938405669196411480, + ); + let c = i64x4::new( + -8389902415543029131, + -8632894406175228839, + 2642929561135509190, + -3267299416902109004, + ); + let r = i64x4::new( + 6198441987982339544, + -3999948362488217274, + 7993947555517161952, + -4455282630855544942, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_q_du_d( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_d_wu_w() { + let a = i64x4::new( + -3676091534899840180, + -2004073272115093645, + 5676581346203765904, + 8270698864684440208, + ); + let b = u32x8::new( + 397399052, 3551436848, 2738656943, 743389966, 3499899009, 2260562895, 1875038063, + 133906470, + ); + let c = i32x8::new( + 512699397, + -586471006, + -81269365, + -1769533728, + 2120410562, + -2111545843, + -1045820519, + -2113967596, + ); + let r = i64x4::new( + -3472345280571068536, + -2226642182825544840, + -5348939902888852654, + 6309745584493025511, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_d_wu_w( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_w_hu_h() { + let a = i32x8::new( + -1456024465, + 1292205813, + -1759432335, + -548381486, + 1089611198, + 478189353, + -1368461698, + -1240728243, + ); + let b = u16x16::new( + 65391, 50824, 24841, 10069, 30833, 20379, 53070, 4097, 15307, 38738, 30453, 47989, + 55589, 23759, 34121, 44875, + ); + let c = i16x16::new( + -28613, 8390, 29884, 18408, -17696, -3658, 16755, 18613, 24281, -18, -26803, 16674, + -7826, -21398, -12825, 21830, + ); + let r = i64x4::new( + 8738343996720489220, + 1463752189638585937, + -1451881076969873711, + -7208372751461990044, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_w_hu_h( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwev_h_bu_b() { + let a = i16x16::new( + -2899, -28885, 21233, 25414, 18986, 27436, 5272, 11999, -21932, -7709, -1809, -22022, + 19152, 6809, 3926, 23920, + ); + let b = u8x32::new( + 60, 166, 243, 60, 101, 145, 58, 139, 11, 119, 37, 242, 205, 208, 21, 14, 69, 216, 114, + 226, 255, 0, 96, 241, 247, 89, 59, 46, 160, 208, 252, 246, + ); + let c = i8x32::new( + -37, 73, -80, 20, 87, 34, -43, -125, -37, -126, -19, -52, -10, 38, -55, -26, 4, 79, + -59, 3, -48, -97, 73, -126, -122, 84, -108, 90, -73, 123, 53, 51, + ); + let r = i64x4::new( + 6451535402254461953, + 3052328487586973843, + -4225844162003621016, + -7954234670014147302, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwev_h_bu_b( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_q_du_d() { + let a = i64x4::new( + 5040785179692297413, + -5698968703706500445, + -731068043920228861, + 3965235820245190976, + ); + let b = u64x4::new( + 10854493275645220911, + 16138982903185851834, + 5339741244155318123, + 14666659343881516356, + ); + let c = i64x4::new( + 3608705967944035653, + 2602681461334264776, + 2583771862194956886, + -8807004962159335926, + ); + let r = i64x4::new( + 2112387857800094741, + -3421893061573111304, + -7025246865660777813, + -3037048219893810668, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_q_du_d( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_d_wu_w() { + let a = i64x4::new( + -6548782426860122444, + -5512378810555054389, + -8313251399158871596, + -2631108805874731030, + ); + let b = u32x8::new( + 3411181446, 4063156506, 4162056821, 1798829201, 223212533, 2591023005, 958942780, + 723906610, + ); + let c = i32x8::new( + -1601726534, + -337872632, + 396528058, + 691753867, + 2049925652, + -947032016, + -1272465465, + -802105137, + ); + let r = i64x4::new( + -7921611809770266236, + -4268031754690784122, + 7679710934623151940, + -3211758016463986600, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_d_wu_w( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_w_hu_h() { + let a = i32x8::new( + 29411709, -487241679, -445814375, -898026796, 1702472835, 1332407325, 428234819, + 36330620, + ); + let b = u16x16::new( + 6115, 7084, 54578, 41741, 10808, 9353, 62741, 13372, 25833, 45511, 2751, 162, 49362, + 49913, 10572, 63054, + ); + let c = i16x16::new( + -4084, 19702, -31174, 24313, 27489, -17948, -4193, -6492, -20772, 11511, 15075, -18053, + -30409, 25187, -9190, -5069, + ); + let r = i64x4::new( + 2266055901231345861, + -4229846225082649443, + 5710084886827853700, + -1216721738864979826, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_w_hu_h( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmaddwod_h_bu_b() { + let a = i16x16::new( + -31362, 19310, 4398, 21644, -18947, -19503, 21298, 6464, -22249, 24001, 29448, 11657, + -25193, -16348, 5631, 18801, + ); + let b = u8x32::new( + 255, 169, 91, 69, 97, 249, 150, 91, 30, 132, 219, 186, 87, 159, 227, 164, 250, 45, 9, + 167, 101, 32, 191, 101, 124, 84, 2, 10, 146, 179, 65, 134, + ); + let c = i8x32::new( + -69, 4, -26, 80, -124, 33, 78, -58, -13, -100, 88, -23, 70, 18, 48, -30, -81, 4, 29, + -53, 118, 123, 7, 51, 27, 62, -41, -75, 114, 101, -44, 93, + ); + let r = i64x4::new( + 4606673651486328866, + 434701133187613293, + 4731174792733829579, + 8799854033754305007, + ); + + assert_eq!( + r, + transmute(lasx_xvmaddwod_h_bu_b( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrotr_b() { + let a = i8x32::new( + -76, -66, -50, 116, 83, -40, -66, 16, 118, -125, 54, 31, 77, -105, -66, 96, 81, -86, + -10, 31, -90, 37, 33, -20, 68, -9, -69, -76, -120, 95, 49, -94, + ); + let b = i8x32::new( + 91, -91, -119, -120, 66, -54, 8, -3, -118, -6, -52, -20, 13, 106, -107, -104, -59, -50, + 31, 106, -25, -35, 115, 62, -31, 120, 59, -89, 7, 35, -100, -87, + ); + let r = i64x4::new( + -9169831505165814378, + 6986742644414341277, + -5538256227715405174, + 5842271601646106402, + ); + + assert_eq!(r, transmute(lasx_xvrotr_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrotr_h() { + let a = i16x16::new( + -391, -26680, -19180, 8374, -10657, 16157, 18976, -9288, -10450, 9732, 26117, 31925, + 20483, -14847, -1605, 8796, + ); + let b = i16x16::new( + -24978, -7031, 20444, 9930, -18507, -2797, 10351, -20863, 2342, -7299, 397, -8738, + -6411, 11173, 25086, -9162, + ); + let r = i64x4::new( + 3280961714933987815, + 7916365250426044082, + -948799184442377380, + 8109266518466894464, + ); + + assert_eq!(r, transmute(lasx_xvrotr_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrotr_w() { + let a = i32x8::new( + 807443288, 659305929, 215715568, 461653638, 1156975794, -140043152, 572930522, + -305210344, + ); + let b = i32x8::new( + 425095120, 2007398487, 1779876326, 867842254, -355714240, 1021676577, 2008058921, + -149962463, + ); + let r = i64x4::new( + -7463711091125112800, + 1880373866945277499, + 8922631659077373106, + 8567937817891640092, + ); + + assert_eq!(r, transmute(lasx_xvrotr_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrotr_d() { + let a = i64x4::new( + 1798291896688439472, + -8678294225084614636, + -3360425612013625394, + 6141382649032010789, + ); + let b = i64x4::new( + -4687895735595482806, + 7366925603772764024, + 113747709542135138, + -4369447114926223278, + ); + let r = i64x4::new( + 3172290282099188988, + -8034032776515152761, + 8319107233083774893, + 4254025119287920211, + ); + + assert_eq!(r, transmute(lasx_xvrotr_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvadd_q() { + let a = i64x4::new( + -2609166907920397576, + 4277631384595295751, + -6908798269010317006, + 5982715628809494048, + ); + let b = i64x4::new( + -8390221664220170851, + 5630840603034329774, + -482468290988389688, + -4276184844647827597, + ); + let r = i64x4::new( + 7447355501568983189, + -8538272086079926090, + -7391266559998706694, + 1706530784161666452, + ); + + assert_eq!(r, transmute(lasx_xvadd_q(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsub_q() { + let a = i64x4::new( + 5635628360514667431, + 8563800808356171400, + -8195308523117763518, + 3653510787018366900, + ); + let b = i64x4::new( + 2471979813421155001, + 4980523206404219656, + 5227116936323454967, + 2410762289023585517, + ); + let r = i64x4::new( + 3163648547093512430, + 3583277601951951744, + 5024318614268333131, + 1242748497994781383, + ); + + assert_eq!(r, transmute(lasx_xvsub_q(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwev_q_du_d() { + let a = u64x4::new( + 11512774700636858764, + 3877920437650491653, + 5348767768447622976, + 10610828160678410847, + ); + let b = i64x4::new( + 4538357695196601706, + 962354258063947537, + 461386020283085419, + -3214659782190620189, + ); + let r = i64x4::new(-2395611677876091146, 0, 5810153788730708395, 0); + + assert_eq!( + r, + transmute(lasx_xvaddwev_q_du_d(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvaddwod_q_du_d() { + let a = u64x4::new( + 2811249209376266688, + 65866753992142741, + 10134352057937866409, + 17378632901315704999, + ); + let b = i64x4::new( + 771717384571916075, + -6276542900978063061, + -782791668238120654, + -4337892955900394734, + ); + let r = i64x4::new(-6210676146985920320, -1, -5406004128294241351, 0); + + assert_eq!( + r, + transmute(lasx_xvaddwod_q_du_d(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwev_q_du_d() { + let a = u64x4::new( + 1631079386587456680, + 4565265601922112419, + 5351621054404189773, + 12518175210587903555, + ); + let b = i64x4::new( + 7907402685955854803, + -6034016436240875818, + -1692667855436677787, + 857071248435905820, + ); + let r = i64x4::new( + -9215090926608146824, + 699180379527824028, + 8322461491295210849, + -491063186927300825, + ); + + assert_eq!( + r, + transmute(lasx_xvmulwev_q_du_d(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmulwod_q_du_d() { + let a = u64x4::new( + 16516519389168658270, + 11550123424719201061, + 18023411584703351911, + 5733925898426927381, + ); + let b = i64x4::new( + -1630542181497141953, + -8299748862195853267, + -3768558747736596235, + -8223031783298003100, + ); + let r = i64x4::new( + 8208983644526863745, + -5196750351687252927, + 2416926856050984756, + -2556020440107861891, + ); + + assert_eq!( + r, + transmute(lasx_xvmulwod_q_du_d(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmskgez_b() { + let a = i8x32::new( + 3, -116, -122, 1, -82, 30, 73, 60, 22, 102, -51, -22, 59, 125, -61, -78, 89, 25, 31, + 107, 111, 27, -119, -90, 119, 49, -86, -82, 1, -113, -8, -40, + ); + let r = i64x4::new(13289, 0, 4927, 0); + + assert_eq!(r, transmute(lasx_xvmskgez_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvmsknz_b() { + let a = i8x32::new( + 52, -33, -37, -47, -126, -26, -42, -37, -96, 90, -32, 25, 62, -95, 114, 53, -88, -66, + -49, -31, -126, -89, -92, 127, -113, -43, 41, 40, -79, 108, -63, -57, + ); + let r = i64x4::new(65535, 0, 65535, 0); + + assert_eq!(r, transmute(lasx_xvmsknz_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvexth_h_b() { + let a = i8x32::new( + 86, 82, -64, 55, 99, -98, 18, 55, 53, -101, -88, -23, 101, -32, -7, -69, -92, 77, 92, + -110, 99, 46, 88, -36, 84, 42, 42, -1, -24, -95, -48, -7, + ); + let r = i64x4::new( + -6192823156408267, + -19140324188225435, + -281294585331628, + -1689051729887256, + ); + + assert_eq!(r, transmute(lasx_xvexth_h_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvexth_w_h() { + let a = i16x16::new( + -22892, -26139, 11053, 11772, -13928, 20772, 16551, -20590, -10608, 9266, 29842, + -10111, -3519, 29175, 10737, -27281, + ); + let r = i64x4::new( + 89219355625880, + -88433376608089, + 125309965824577, + -117171002791439, + ); + + assert_eq!(r, transmute(lasx_xvexth_w_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvexth_d_w() { + let a = i32x8::new( + -825627036, + -1996938691, + 78514216, + -1063299454, + 257564527, + -138481584, + -1487536177, + 1875317589, + ); + let r = i64x4::new(78514216, -1063299454, -1487536177, 1875317589); + + assert_eq!(r, transmute(lasx_xvexth_d_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvexth_q_d() { + let a = i64x4::new( + 5979507577341197552, + 5196480214883180720, + -8000060569264941491, + 7776492634988202392, + ); + let r = i64x4::new(5196480214883180720, 0, 7776492634988202392, 0); + + assert_eq!(r, transmute(lasx_xvexth_q_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvexth_hu_bu() { + let a = u8x32::new( + 47, 59, 186, 7, 161, 218, 234, 101, 186, 179, 42, 250, 253, 76, 169, 142, 127, 7, 4, + 56, 123, 5, 152, 53, 224, 98, 177, 197, 49, 13, 16, 40, + ); + let r = i64x4::new( + 70368924578021562, + 39970172547367165, + 55451330627633376, + 11259067788754993, + ); + + assert_eq!(r, transmute(lasx_xvexth_hu_bu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvexth_wu_hu() { + let a = u16x16::new( + 11201, 3109, 64518, 58951, 32582, 32792, 2605, 46256, 28808, 30095, 54960, 26138, + 39952, 56608, 20537, 49215, + ); + let r = i64x4::new( + 140840567603014, + 198668007246381, + 243129508731920, + 211376815493177, + ); + + assert_eq!(r, transmute(lasx_xvexth_wu_hu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvexth_du_wu() { + let a = u32x8::new( + 1580507769, 1550554068, 3486710391, 717721410, 434913819, 742461632, 1954296323, + 1406265475, + ); + let r = i64x4::new(3486710391, 717721410, 1954296323, 1406265475); + + assert_eq!(r, transmute(lasx_xvexth_du_wu(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvexth_qu_du() { + let a = u64x4::new( + 15671254659731561180, + 6305760528044738869, + 3619266805555730982, + 3857202168052068182, + ); + let r = i64x4::new(6305760528044738869, 0, 3857202168052068182, 0); + + assert_eq!(r, transmute(lasx_xvexth_qu_du(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrotri_b() { + let a = i8x32::new( + 37, 16, -44, -97, 31, 23, 58, -46, 3, -22, 31, -79, 59, -102, -113, 89, -12, 97, -16, + -83, -69, -115, 127, -110, -107, -36, -16, -51, 26, 48, -58, -4, + ); + let r = i64x4::new( + 3288597436994224466, + -7640170181100982736, + 3024123976131483215, + -3500418816657076903, + ); + + assert_eq!(r, transmute(lasx_xvrotri_b::<4>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrotri_h() { + let a = i16x16::new( + 8999, -7250, -4236, 2845, 21265, -24726, -14769, -11915, -12193, 28179, 16866, -23983, + -11259, 31467, -30522, 8490, + ); + let r = i64x4::new( + 1601837713137157710, + -6707112604456344030, + 4945941697313284287, + 4779464405959485451, + ); + + assert_eq!(r, transmute(lasx_xvrotri_h::<15>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrotri_w() { + let a = i32x8::new( + 1273906952, + 1323123989, + -1657206810, + -758313569, + 30529353, + -1084318195, + 470709136, + -1831448763, + ); + let r = i64x4::new( + -6725603050124640824, + -5477967444476451040, + -4487859208269579718, + -1679179889808014898, + ); + + assert_eq!(r, transmute(lasx_xvrotri_w::<11>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrotri_d() { + let a = i64x4::new( + -6269890993217399490, + 4900582678319344510, + 4744796290155065976, + 7326839228001128846, + ); + let r = i64x4::new( + 1530846727385147611, + 3134017167653815720, + -5586642937907364280, + -7958311692822812825, + ); + + assert_eq!(r, transmute(lasx_xvrotri_d::<16>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvextl_q_d() { + let a = i64x4::new( + -4167783494125842132, + -8818287186975390348, + 7476993593286219399, + 362651956781912161, + ); + let r = i64x4::new(-4167783494125842132, -1, 7476993593286219399, 0); + + assert_eq!(r, transmute(lasx_xvextl_q_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlni_b_h() { + let a = i8x32::new( + -122, -57, 103, 68, 81, 117, 10, -11, 85, 78, 51, -68, 17, 5, 57, 15, 82, -13, -58, 32, + -126, -109, -28, -108, -90, -102, -13, -26, 80, 87, 44, 12, + ); + let b = i8x32::new( + 107, 49, -98, -36, -98, 81, 126, -15, 96, 112, 83, 75, 70, 12, -92, -96, 119, -26, -75, + 9, -68, 107, 80, 126, -58, 38, -112, 85, 36, -27, 17, -109, + ); + let r = i64x4::new( + 775944073576565014, + -913733859716807048, + 3554001380194360167, + -4434515480828965835, + ); + + assert_eq!( + r, + transmute(lasx_xvsrlni_b_h::<4>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlni_h_w() { + let a = i16x16::new( + 7707, -22772, -29741, -9919, -14059, 17567, -31900, -30801, -21839, 26160, 23241, + -17751, 11400, 21178, -10087, -1621, + ); + let b = i16x16::new( + 26329, -6694, -20485, 30132, 26844, -6674, 8539, 29251, -25304, -9125, -8199, 29075, + 25395, -30076, -29212, -25696, + ); + let r = i64x4::new( + 8233677356103165402, + -8669635304329468148, + -7232628700111184805, + -456179975298914768, + ); + + assert_eq!( + r, + transmute(lasx_xvsrlni_h_w::<16>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlni_w_d() { + let a = i32x8::new( + -406185034, + -895467686, + -717037773, + -469050531, + -1539233593, + -1778247886, + -1546187185, + -2026338244, + ); + let b = i32x8::new( + -446056064, + -1691954961, + -981213165, + -458936270, + -1860231155, + 2056121344, + 1905674092, + 45485615, + ); + let r = i64x4::new( + 2975767411517832185, + 195580534167951033, + -5943753303447109596, + -3593292886058873687, + ); + + assert_eq!( + r, + transmute(lasx_xvsrlni_w_d::<26>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlni_d_q() { + let a = i64x4::new( + 7597626924148193039, + 8987414085353164021, + -3181901883582161412, + -5484978136186304133, + ); + let b = i64x4::new( + 3950632511964740415, + 1415609115522181708, + 3151552885247761103, + -4372710870967542224, + ); + let r = i64x4::new(5149955, 32696021, 51201034, 47154629); + + assert_eq!( + r, + transmute(lasx_xvsrlni_d_q::<102>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlrni_b_h() { + let a = i8x32::new( + 34, -7, -78, 100, -21, -1, 17, 9, -61, -37, -34, -101, 35, -116, 122, -18, -81, -45, + 109, -42, 100, -92, -112, -23, -31, 5, -113, 35, 49, 53, 114, -92, + ); + let b = i8x32::new( + 112, 121, 80, 76, -7, 100, 61, 66, 108, 0, 80, -24, -2, 119, -19, 70, -14, -70, -100, + -17, -108, -13, -85, 119, -8, -115, -56, -35, 14, -83, -84, 17, + ); + let r = i64x4::new( + 5150121261709741177, + -1257457727085451783, + 1345976567149686971, + -6614340865598630188, + ); + + assert_eq!( + r, + transmute(lasx_xvsrlrni_b_h::<8>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlrni_h_w() { + let a = i16x16::new( + 27342, -1239, 27928, 29682, 26896, -14508, -15889, 28618, 8114, -5723, 6531, 16489, + 9888, 9809, 24468, -17705, + ); + let b = i16x16::new( + -4757, 26542, -29532, 16718, -14266, 32474, -3741, 20715, -3284, 22232, -12159, 12153, + 9095, 12312, -9885, 15691, + ); + let r = i64x4::new( + 6884832036274927467, + 6201354748313553750, + 6830765589305542553, + -4972667551599548162, + ); + + assert_eq!( + r, + transmute(lasx_xvsrlrni_h_w::<5>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlrni_w_d() { + let a = i32x8::new( + 289756682, + 172661920, + 1612205654, + 1151400165, + -170063304, + -1551308632, + 700728065, + -2116148576, + ); + let b = i32x8::new( + -2050725054, + 1576856049, + 1261747784, + 550730851, + 956136959, + -2117291501, + 333722873, + 1623097423, + ); + let r = i64x4::new( + 1154968246271901, + 2414660678666580, + 3403881842227606, + 4569312628338973, + ); + + assert_eq!( + r, + transmute(lasx_xvsrlrni_w_d::<43>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrlrni_d_q() { + let a = i64x4::new( + 3267303445176803893, + -4716941928717011909, + -2526932137083513614, + 895449181781228437, + ); + let b = i64x4::new( + 2365189083440669290, + -2671456009299896653, + -5051789062015102943, + 8552962343526201846, + ); + let r = i64x4::new(3, 3, 2, 0); + + assert_eq!( + r, + transmute(lasx_xvsrlrni_d_q::<126>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlni_b_h() { + let a = i8x32::new( + -107, 64, -59, -36, -40, 105, -55, -99, -41, 36, -103, -12, -28, -101, 45, 100, 73, + -21, -30, -52, 105, 47, 41, -81, -123, -14, -118, 97, -35, 59, 106, 86, + ); + let b = i8x32::new( + -73, 37, -91, -37, 7, -55, -86, -122, 88, 17, 59, 126, -32, -53, 61, -110, 23, 50, + -108, -47, 85, 64, -55, -30, 95, 76, -30, -4, -20, 62, 101, 45, + ); + let r = i64x4::new( + 9187201950435737471, + 9187201950435737471, + 9187201950435737471, + 9187201950435737471, + ); + + assert_eq!( + r, + transmute(lasx_xvssrlni_b_h::<4>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlni_h_w() { + let a = i16x16::new( + 5930, 18178, 9007, -17010, -26714, -2479, 7566, 5590, 16536, 7100, -23266, -11745, + -13529, 4421, -4886, -13565, + ); + let b = i16x16::new( + -24390, 15351, 27329, 19807, 29414, -20147, 32425, 16919, -13702, 24649, 12504, 19625, + -21621, -18266, -9493, 32188, + ); + let r = i64x4::new(4294967296, 4295032832, 4294967296, 281474976776192); + + assert_eq!( + r, + transmute(lasx_xvssrlni_h_w::<31>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlni_w_d() { + let a = i32x8::new( + 1916052008, + -1597654810, + -1773664899, + -1047601895, + 1186726373, + -322280569, + -1340612407, + -1064828410, + ); + let b = i32x8::new( + 962275142, + 367045968, + -1148735443, + -1235460518, + 1290051946, + -1409071527, + -1206112029, + -438247212, + ); + let r = i64x4::new( + 9223372034707292159, + 9223372034707292159, + 9223372034707292159, + 9223372034707292159, + ); + + assert_eq!( + r, + transmute(lasx_xvssrlni_w_d::<14>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlni_d_q() { + let a = i64x4::new( + 8566260488262197161, + 7230026431777616732, + 5171247138929999763, + 7672209083386018537, + ); + let b = i64x4::new( + 7413144581871225401, + 1963917804351928008, + 4461413294595322647, + -319568179542390733, + ); + let r = i64x4::new( + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + ); + + assert_eq!( + r, + transmute(lasx_xvssrlni_d_q::<35>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlni_bu_h() { + let a = u8x32::new( + 63, 110, 160, 217, 255, 151, 31, 161, 90, 119, 205, 201, 53, 121, 107, 243, 140, 191, + 5, 109, 173, 46, 21, 136, 126, 162, 107, 116, 221, 46, 104, 127, + ); + let b = i8x32::new( + -19, -66, 94, -50, 114, -125, 71, -72, 91, -112, -36, -97, 0, -113, 63, 124, 21, 67, + -17, 63, -30, -100, -64, 42, 84, 106, 81, 119, 26, 105, -15, 93, + ); + let r = i64x4::new( + 1085669953590270231, + 2165977494045465357, + 796308158196942600, + 1082286764800150807, + ); + + assert_eq!( + r, + transmute(lasx_xvssrlni_bu_h::<11>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlni_hu_w() { + let a = u16x16::new( + 6179, 35983, 31969, 1127, 39823, 7636, 13877, 49933, 49881, 18256, 23272, 43743, 14779, + 42488, 11284, 24455, + ); + let b = i16x16::new( + -2976, 3715, -23929, -18386, 13544, -26884, -14757, 9675, -17650, 8814, 4366, 2063, + 1167, -30247, -25786, 9281, + ); + let r = i64x4::new(4295032832, 281474976710657, 4294967296, 4295032832); + + assert_eq!( + r, + transmute(lasx_xvssrlni_hu_w::<31>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlni_wu_d() { + let a = u32x8::new( + 1435242449, 2536238660, 3898848008, 4040623161, 743412748, 1784708443, 2900988959, + 1523459155, + ); + let b = i32x8::new( + -1925581805, + -241685045, + 745827979, + -811389509, + 834544392, + 1909578565, + 2098160602, + -1160686393, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!( + r, + transmute(lasx_xvssrlni_wu_d::<24>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlni_du_q() { + let a = u64x4::new( + 4906311251686769180, + 5252105969529596252, + 3036147848110573085, + 6245591556930524613, + ); + let b = i64x4::new( + -1139822228972687264, + -3655945315912724740, + 6046255801009758548, + -8615916243772089902, + ); + let r = i64x4::new(420379, 149273, 279408, 177510); + + assert_eq!( + r, + transmute(lasx_xvssrlni_du_q::<109>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrni_b_h() { + let a = i8x32::new( + -47, 51, -118, -97, 65, -7, -102, 38, -97, -64, 87, -87, -10, 84, -105, -80, -8, 81, + -112, -40, -15, 20, -72, -108, -23, -18, 93, -125, -55, 33, 12, -21, + ); + let b = i8x32::new( + 92, 106, -122, -65, -16, 86, 50, -59, 59, -29, -92, -41, 101, 10, 35, 106, -53, 112, + 79, 78, -52, 18, 62, 29, -78, -65, -73, 122, -105, -105, -27, -72, + ); + let r = i64x4::new( + 9157365602904407935, + 9187201949596876648, + 9187201949272276863, + 9170594926804238207, + ); + + assert_eq!( + r, + transmute(lasx_xvssrlrni_b_h::<7>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrni_h_w() { + let a = i16x16::new( + 27571, 10886, 30311, -21575, -21376, -15868, 15443, -27608, -9760, 16249, 24860, -3987, + 25742, 25311, 2125, -1676, + ); + let b = i16x16::new( + 20889, 11322, -17186, 17589, 10767, 165, 25424, -3527, -16029, -18830, -3174, -27403, + 20745, 19828, 7102, 10767, + ); + let r = i64x4::new( + 9223113262927675391, + 9223231297218904063, + 9223231297218904063, + 9223231297218904063, + ); + + assert_eq!( + r, + transmute(lasx_xvssrlrni_h_w::<11>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrni_w_d() { + let a = i32x8::new( + 13685129, + -1250749430, + -1392632470, + -496387445, + -859105657, + -188800497, + 1260867999, + -2071975844, + ); + let b = i32x8::new( + -2147085485, + -1138150986, + 1740486083, + 129550606, + 761255804, + 107768592, + -897233831, + 2135540054, + ); + let r = i64x4::new( + 9223372034707292159, + 9223372034707292159, + 9223372034707292159, + 9223372034707292159, + ); + + assert_eq!( + r, + transmute(lasx_xvssrlrni_w_d::<27>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrni_d_q() { + let a = i64x4::new( + 2126435331828238132, + 2988359539712387032, + 2606687986635590409, + -5337426820831497192, + ); + let b = i64x4::new( + 5599657380360976171, + 1936278255544613151, + 4350470739273890826, + 4020807834764701096, + ); + let r = i64x4::new(1803299650, 2783126700, 3744669105, 12209003095); + + assert_eq!( + r, + transmute(lasx_xvssrlrni_d_q::<94>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrni_bu_h() { + let a = u8x32::new( + 10, 53, 247, 169, 200, 197, 15, 35, 40, 63, 25, 238, 115, 150, 127, 27, 72, 180, 151, + 194, 68, 16, 94, 145, 159, 31, 157, 147, 248, 155, 228, 94, + ); + let b = i8x32::new( + 28, 76, -8, 123, -45, -21, 72, -114, -80, -30, 52, -17, -86, 92, 41, -102, 69, 7, 126, + 112, 93, 90, 52, 3, 85, -11, 40, 64, -22, -54, 38, 100, + ); + let r = i64x4::new(-1, -1, -3422552204, -1); + + assert_eq!( + r, + transmute(lasx_xvssrlrni_bu_h::<4>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrni_hu_w() { + let a = u16x16::new( + 46345, 65470, 38947, 23932, 57842, 4833, 48042, 40409, 15235, 53592, 48941, 4323, 7891, + 47087, 8916, 53135, + ); + let b = i16x16::new( + -10645, 13954, 25607, -15109, -23253, 24216, -29088, 13185, 29191, -11398, -4777, + -18744, -9822, -25345, 19767, -4882, + ); + let r = i64x4::new( + 3711633057434515075, + -7072319501391495233, + -1373988209909181574, + -3490368948780347048, + ); + + assert_eq!( + r, + transmute(lasx_xvssrlrni_hu_w::<16>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrni_wu_d() { + let a = u32x8::new( + 2556609468, 306738319, 398886007, 1398704761, 4256553589, 1589981150, 4133102348, + 1371421151, + ); + let b = i32x8::new( + 1653381194, + 1981734587, + -1912314738, + 1375487329, + 900885316, + -1157483971, + 1097724788, + -1431477856, + ); + let r = i64x4::new( + 22535693409672, + 22917945492626, + 46913927786177, + 22471268898737, + ); + + assert_eq!( + r, + transmute(lasx_xvssrlrni_wu_d::<50>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrlrni_du_q() { + let a = u64x4::new( + 10427296977042877275, + 11482184389991123309, + 17526981944466620659, + 4352829566336418219, + ); + let b = i64x4::new( + -2649024960844804464, + -4562273421517696438, + -4420539680558072379, + 3588904051642804143, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!( + r, + transmute(lasx_xvssrlrni_du_q::<53>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrani_b_h() { + let a = i8x32::new( + -75, 121, -21, -15, 41, -7, 35, 38, -68, -73, -76, -71, 96, 43, -94, 56, -117, -109, + -28, -15, -125, 90, -42, -48, -12, -96, -55, 4, 32, 81, 64, -29, + ); + let b = i8x32::new( + -57, -25, -35, -108, 14, 83, 114, -49, -48, 1, -109, 103, 36, -56, 111, 36, 126, 67, + 32, 11, -52, 28, -69, -5, -2, 118, -85, -104, -45, 106, 32, -56, + ); + let r = i64x4::new( + 2650481638178526439, + 4047532886406590841, + -4005221281806152893, + -2066865665249447533, + ); + + assert_eq!( + r, + transmute(lasx_xvsrani_b_h::<8>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrani_h_w() { + let a = i16x16::new( + -18891, 1637, 13894, -632, 7479, -28444, -346, -630, -10322, -16816, 24786, -20705, + 25886, -25922, 2142, 28477, + ); + let b = i16x16::new( + 21255, -8544, -16076, 8180, -16685, -4813, 15309, -18986, 11259, -27708, -15696, 2064, + -27273, -24407, -22250, 31561, + ); + let r = i64x4::new( + 4309310235152241415, + -97358218970876363, + -6262653890212123653, + 603030581262079918, + ); + + assert_eq!( + r, + transmute(lasx_xvsrani_h_w::<0>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrani_w_d() { + let a = i32x8::new( + 495374215, + -1163373413, + 976054174, + 1739213032, + -1526300426, + -1390196250, + 1721157436, + 191851664, + ); + let b = i32x8::new( + -250032972, + -1792143742, + 873982753, + 1073657849, + -422789767, + -119562076, + 282475947, + -868496874, + ); + let r = i64x4::new( + -5770703783532497, + 8837345064960477617, + -4342418500326127026, + -5262798083402195350, + ); + + assert_eq!( + r, + transmute(lasx_xvsrani_w_d::<28>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrani_d_q() { + let a = i64x4::new( + -5439693678259807595, + -4580626123248901724, + 2865305240006228264, + -8764287857747577448, + ); + let b = i64x4::new( + -5465329153910229449, + -6398397336342204188, + -6140402929126091639, + -227294431853722285, + ); + let r = i64x4::new( + -1599599334085551047, + -1145156530812225431, + -56823607963430572, + -2191071964436894362, + ); + + assert_eq!( + r, + transmute(lasx_xvsrani_d_q::<66>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrarni_b_h() { + let a = i8x32::new( + -127, 9, 115, 3, -36, -14, 60, 5, -69, -24, 124, -51, 64, -85, 106, -22, -9, -70, 26, + -34, 108, 46, 86, 1, -82, -103, 79, 112, -121, 40, 36, 4, + ); + let b = i8x32::new( + 7, 107, -33, 86, -95, -124, 35, 9, -27, 101, -64, 109, 34, 39, 38, -48, 20, 118, -95, + 127, 77, -88, 31, 76, 81, 10, 73, -32, -72, 121, 97, 83, + ); + let r = i64x4::new( + 176445634160258736, + -6362222276348332136, + 3935026383906273889, + 4794087966981481135, + ); + + assert_eq!( + r, + transmute(lasx_xvsrarni_b_h::<4>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrarni_h_w() { + let a = i16x16::new( + 72, 17107, 2659, -22852, 13209, -19338, 29569, 8828, -14716, 1062, 26914, 1211, 14641, + 462, -8884, 7159, + ); + let b = i16x16::new( + -17232, 10103, 3681, -11092, 29619, 422, -25692, 26710, 4183, 27520, 31478, -21569, + -7123, 10033, 12272, -5070, + ); + let r = i64x4::new( + 3120663839319243742, + 4483961363433351552, + 1808363419292516360, + -292761337443576989, + ); + + assert_eq!( + r, + transmute(lasx_xvsrarni_h_w::<9>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrarni_w_d() { + let a = i32x8::new( + 1755618482, 374523356, -792192312, 1238002187, -327197280, 1104823907, 1830966401, + 1692510686, + ); + let b = i32x8::new( + -918051703, + -2012887920, + 1331552048, + -1402691916, + 1043562559, + 2068236941, + -2026755109, + 267314745, + ); + let r = i64x4::new(-1, 4294967296, 1, 4294967297); + + assert_eq!( + r, + transmute(lasx_xvsrarni_w_d::<63>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvsrarni_d_q() { + let a = i64x4::new( + 567870545843316755, + -8340148388286757707, + 5574111920016803397, + 4080639718254229578, + ); + let b = i64x4::new( + 2101950651821444783, + -8893233216031885881, + -1626396509648873280, + -8228614332001484946, + ); + let r = i64x4::new(-32353394, -30341283, -29935525, 14845281); + + assert_eq!( + r, + transmute(lasx_xvsrarni_d_q::<102>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrani_b_h() { + let a = i8x32::new( + -9, 79, -25, 24, 113, 13, 74, -64, -92, 21, 94, -9, -20, -54, -92, 20, 108, 43, 104, + -53, 111, -89, -71, -19, 63, 98, -15, 42, 22, 34, -71, -122, + ); + let b = i8x32::new( + 89, 71, -128, -8, -83, 115, -7, -12, 45, -29, 1, 28, 32, 14, -93, 78, 52, -30, -13, 38, + -28, -8, -119, -64, 67, 107, -11, 52, 25, -112, 40, 98, + ); + let r = i64x4::new( + 9183261305727861887, + 9187548296613953407, + 9187483425433943936, + -9187484529219043201, + ); + + assert_eq!( + r, + transmute(lasx_xvssrani_b_h::<5>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrani_h_w() { + let a = i16x16::new( + 22159, -17585, 27907, -32059, 19510, -6875, -20701, -10302, -10451, 21878, 24873, + 29927, 15505, -6217, -18330, 22702, + ); + let b = i16x16::new( + -29049, -3742, -27686, -18440, 8738, 8686, 29608, -1629, -4626, 5557, -6248, 23821, + 29245, 14976, -6969, 13087, + ); + let r = i64x4::new( + -9223231301513871360, + -9223231297218904064, + 9223231297218904063, + 9223231301513871359, + ); + + assert_eq!( + r, + transmute(lasx_xvssrani_h_w::<0>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrani_w_d() { + let a = i32x8::new( + 1147593201, + 1099386066, + 1235877455, + 692249820, + -2135577276, + -886668236, + -2044672723, + 1727555657, + ); + let b = i32x8::new( + -1064236617, + 1620556139, + 1782308008, + -1034014776, + 1536995212, + 533284065, + -1618986886, + 1843302450, + ); + let r = i64x4::new( + -542123656805187, + 362937621548090, + 966419181272650, + 905739883141428, + ); + + assert_eq!( + r, + transmute(lasx_xvssrani_w_d::<45>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrani_d_q() { + let a = i64x4::new( + 1412208151721093534, + -7916875471977804537, + -1917411313405122179, + -2235840015390028939, + ); + let b = i64x4::new( + 1186137621302436836, + 759241008247506430, + -5558106622572300047, + -7286741001002884564, + ); + let r = i64x4::new( + 1482892594233410, + -15462647406206650, + -14231916017583759, + -4366875030058651, + ); + + assert_eq!( + r, + transmute(lasx_xvssrani_d_q::<73>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrani_bu_h() { + let a = u8x32::new( + 1, 205, 104, 217, 117, 189, 143, 23, 134, 233, 247, 251, 129, 173, 74, 226, 108, 34, + 242, 113, 228, 183, 247, 66, 58, 69, 232, 3, 194, 209, 216, 161, + ); + let b = i8x32::new( + 83, 63, -94, -103, -40, -9, 26, 104, 112, -91, 71, 32, 61, 29, 79, -128, 112, -4, -66, + -40, -90, -40, 49, 54, -61, 120, 6, -48, 10, -33, -13, 10, + ); + let r = i64x4::new(283674100629507, 16777216, 30115102720, 17246979842); + + assert_eq!( + r, + transmute(lasx_xvssrani_bu_h::<12>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrani_hu_w() { + let a = u16x16::new( + 19110, 42732, 10660, 61644, 61010, 42962, 42748, 16931, 50634, 1738, 45781, 12001, + 56715, 59669, 23910, 35943, + ); + let b = i16x16::new( + 12924, 5630, 2751, 7961, 14757, 29792, -8632, 13429, -3048, -12501, -25328, 421, 20048, + -3741, -20350, 17918, + ); + let r = i64x4::new(-1, -281474976710656, -281471439994880, 4294967295); + + assert_eq!( + r, + transmute(lasx_xvssrani_hu_w::<9>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrani_wu_d() { + let a = u32x8::new( + 790410016, 1846446414, 17060282, 4137690011, 4225559886, 456167206, 2038191803, + 3549679132, + ); + let b = i32x8::new( + 1124175113, + 194327297, + 1714613, + 1768781089, + 1565600638, + -239088013, + -1330211045, + -142923536, + ); + let r = i64x4::new(7418804384752972, 1803170, 0, 445475); + + assert_eq!( + r, + transmute(lasx_xvssrani_wu_d::<42>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrani_du_q() { + let a = u64x4::new( + 4439635547532985516, + 2617773814700322397, + 11329239143202498931, + 5215941649340885573, + ); + let b = i64x4::new( + 4456285459677935774, + -4219123236529314050, + 2135308934637733797, + -2097442597384769114, + ); + let r = i64x4::new(0, 1162, 0, 2316); + + assert_eq!( + r, + transmute(lasx_xvssrani_du_q::<115>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarni_b_h() { + let a = i8x32::new( + 37, 100, -16, 92, 65, -5, 44, -80, 109, -99, -15, -22, -16, -48, -109, 81, -4, -31, + -48, -58, 103, -92, -51, -109, 32, -112, 36, 90, 79, 79, 66, -50, + ); + let b = i8x32::new( + -17, 23, 25, -46, 124, -97, -58, -51, -68, -108, -48, 69, -126, 115, 46, 13, 66, 54, + 114, 115, -18, -123, 98, 118, -7, -56, -122, -103, -94, -100, 112, 77, + ); + let r = i64x4::new( + 3854939995940880480, + 9187532907754651519, + 9187484529219108735, + -9187484524924075896, + ); + + assert_eq!( + r, + transmute(lasx_xvssrarni_b_h::<6>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarni_h_w() { + let a = i16x16::new( + -2373, -24512, -6581, 18622, -28242, 12319, -8850, -19323, 12925, -6513, -5054, 31054, + 6907, -16683, -29917, 16639, + ); + let b = i16x16::new( + 30767, -4399, -21574, -27342, 15257, -2000, -28741, 6286, -10858, -32114, -2565, 11901, + -815, -13930, 31355, 23314, + ); + let r = i64x4::new( + 3659161808928759, + -10695946033365040, + 13229207942856641, + 9288532501594099, + ); + + assert_eq!( + r, + transmute(lasx_xvssrarni_h_w::<25>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarni_w_d() { + let a = i32x8::new( + -409627486, + 1892097986, + -1750910325, + -1547433679, + -1884419017, + 1579223214, + 1151303281, + -1571586603, + ); + let b = i32x8::new( + 364285131, + 2006347587, + 155571363, + -1533032556, + -1176543806, + 163000547, + 557435884, + -1610070779, + ); + let r = i64x4::new(-12884901884, -12884901884, -12884901888, -12884901885); + + assert_eq!( + r, + transmute(lasx_xvssrarni_w_d::<61>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarni_d_q() { + let a = i64x4::new( + -3977765823996238174, + 7327308686384468121, + 6534356875603597306, + -6213176538981319905, + ); + let b = i64x4::new( + 3336126315622887836, + -1421822040970831870, + -3632342560101816908, + 6607031745644833811, + ); + let r = i64x4::new(-2, 13, 11, -11); + + assert_eq!( + r, + transmute(lasx_xvssrarni_d_q::<123>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarni_bu_h() { + let a = u8x32::new( + 193, 242, 89, 76, 29, 42, 190, 17, 62, 209, 26, 45, 231, 78, 123, 125, 177, 121, 30, + 205, 85, 184, 30, 54, 64, 91, 228, 123, 242, 32, 245, 116, + ); + let b = i8x32::new( + 5, 94, 72, 83, 78, 23, 5, 51, 110, -86, 74, 70, -99, 111, -112, -94, -89, 1, -14, -17, + 116, -105, 29, 34, -52, 15, 45, 47, -121, -106, 28, 37, + ); + let r = i64x4::new( + 7901090775700760, + 2239427009405719296, + 648531557811748864, + 2091956210793185310, + ); + + assert_eq!( + r, + transmute(lasx_xvssrarni_bu_h::<10>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarni_hu_w() { + let a = u16x16::new( + 51760, 63593, 22275, 32531, 40741, 58073, 26835, 39742, 8352, 44544, 27074, 30619, + 37450, 62701, 34849, 52300, + ); + let b = i16x16::new( + 4460, -2173, 9587, -13951, -27036, 22540, -29433, 21420, 8161, -13247, -22431, -17918, + 14542, -22571, 29221, -25316, + ); + let r = i64x4::new(281479271677952, 131072, 0, 131072); + + assert_eq!( + r, + transmute(lasx_xvssrarni_hu_w::<30>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarni_wu_d() { + let a = u32x8::new( + 3607104991, 1691528601, 1646387994, 3297780207, 1308777898, 2787161654, 1384884119, + 2469722276, + ); + let b = i32x8::new( + 1057151305, + 1547571989, + -1438179575, + -674675006, + -1782337903, + -1886071573, + 1398821536, + -842047108, + ); + let r = i64x4::new(3, 3, 0, 0); + + assert_eq!( + r, + transmute(lasx_xvssrarni_wu_d::<61>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvssrarni_du_q() { + let a = u64x4::new( + 17745120891134780613, + 17495926160423737090, + 17172121380293899495, + 9650615204759187347, + ); + let b = i64x4::new( + -1697356653875036425, + 8295898722167744374, + 3345487212441260159, + -6164422872274135032, + ); + let r = i64x4::new(-1, 0, 0, 0); + + assert_eq!( + r, + transmute(lasx_xvssrarni_du_q::<15>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbnz_b() { + let a = u8x32::new( + 52, 144, 253, 233, 192, 255, 120, 244, 63, 161, 189, 203, 12, 208, 233, 255, 43, 119, + 120, 82, 121, 194, 249, 47, 211, 41, 120, 204, 13, 67, 208, 223, + ); + let r: i32 = 1; + + assert_eq!(r, transmute(lasx_xbnz_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbnz_d() { + let a = u64x4::new( + 1072041358626911785, + 13770317343519767693, + 7609734988530058463, + 15151929908370022007, + ); + let r: i32 = 1; + + assert_eq!(r, transmute(lasx_xbnz_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbnz_h() { + let a = u16x16::new( + 19391, 20489, 16878, 56279, 52740, 3527, 27948, 60443, 25278, 61969, 6762, 35448, + 28924, 34327, 22427, 5444, + ); + let r: i32 = 1; + + assert_eq!(r, transmute(lasx_xbnz_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbnz_v() { + let a = u8x32::new( + 137, 127, 48, 118, 43, 194, 48, 37, 231, 38, 31, 50, 240, 208, 254, 90, 200, 158, 40, + 38, 192, 180, 105, 245, 102, 149, 53, 213, 112, 215, 100, 152, + ); + let r: i32 = 1; + + assert_eq!(r, transmute(lasx_xbnz_v(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbnz_w() { + let a = u32x8::new( + 1332660055, 2747714226, 143160005, 119041189, 2584280725, 894305940, 2774463674, + 2502507106, + ); + let r: i32 = 1; + + assert_eq!(r, transmute(lasx_xbnz_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbz_b() { + let a = u8x32::new( + 156, 147, 147, 177, 127, 216, 32, 152, 55, 208, 206, 60, 244, 31, 57, 39, 72, 181, 147, + 141, 238, 33, 32, 5, 231, 1, 227, 42, 133, 202, 103, 67, + ); + let r: i32 = 0; + + assert_eq!(r, transmute(lasx_xbz_b(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbz_d() { + let a = u64x4::new( + 6400818938894159638, + 10728379594538160633, + 1581126190179348917, + 18400090329472768228, + ); + let r: i32 = 0; + + assert_eq!(r, transmute(lasx_xbz_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbz_h() { + let a = u16x16::new( + 34066, 39412, 64746, 3863, 50032, 22525, 9079, 56473, 53585, 42778, 58380, 52817, + 62358, 53187, 65430, 56633, + ); + let r: i32 = 0; + + assert_eq!(r, transmute(lasx_xbz_h(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbz_v() { + let a = u8x32::new( + 163, 229, 46, 44, 39, 89, 56, 38, 233, 178, 116, 135, 122, 191, 3, 141, 240, 213, 178, + 12, 81, 195, 113, 34, 100, 51, 70, 4, 238, 90, 144, 128, + ); + let r: i32 = 0; + + assert_eq!(r, transmute(lasx_xbz_v(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xbz_w() { + let a = u32x8::new( + 1201964702, 3804322072, 2566580464, 1047038968, 3180983430, 3379242404, 4047354705, + 444599201, + ); + let r: i32 = 0; + + assert_eq!(r, transmute(lasx_xbz_w(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_caf_d() { + let a = u64x4::new( + 4606839356548580067, + 4597657891815152040, + 4603435215712027397, + 4604372277177725810, + ); + let b = u64x4::new( + 4603866787258734895, + 4605750987205548493, + 4594271025112584476, + 4604044410019184426, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_caf_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_caf_s() { + let a = u32x8::new( + 1027122768, 1048202064, 1061996851, 1056399152, 1053612728, 1059134546, 1058685361, + 1059303636, + ); + let b = u32x8::new( + 1052329028, 1041170924, 1053459178, 1051113546, 1055408428, 1052614588, 1059435003, + 1062279267, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_caf_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_ceq_d() { + let a = u64x4::new( + 4604351168364659876, + 4598833803415332886, + 4605119133668748091, + 4606763866461983079, + ); + let b = u64x4::new( + 4604789538755812401, + 4598766034813670762, + 4594451263359797256, + 4601380068795295764, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_ceq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_ceq_s() { + let a = u32x8::new( + 1064654513, 1047582960, 1060336644, 1065079996, 1052824856, 1061207347, 1063892428, + 1001614208, + ); + let b = u32x8::new( + 1044141476, 1021192768, 1060376772, 1050417278, 1061038362, 1056139396, 1057149355, + 1055333616, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_ceq_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cle_d() { + let a = u64x4::new( + 4595367725174333184, + 4596790595174909884, + 4593132764781967144, + 4599038464418852978, + ); + let b = u64x4::new( + 4602705386887165787, + 4606260944252637140, + 4599015506541096164, + 4595819902199976812, + ); + let r = i64x4::new(-1, -1, -1, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_cle_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cle_s() { + let a = u32x8::new( + 1062033024, 1059343465, 1055578206, 1041885056, 1044779744, 1062731853, 1043491496, + 1049977384, + ); + let b = u32x8::new( + 1056391070, 1056787090, 1064058770, 1062459426, 1064795941, 1064011655, 1031362688, + 1057735956, + ); + let r = i64x4::new(0, -1, -1, -4294967296); + + assert_eq!(r, transmute(lasx_xvfcmp_cle_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_clt_d() { + let a = u64x4::new( + 4604242319890507255, + 4600980435115810514, + 4605419716078684891, + 4599564622270556718, + ); + let b = u64x4::new( + 4589220872256482592, + 4602715102780925632, + 4604097858141367250, + 4605812683073652447, + ); + let r = i64x4::new(0, -1, 0, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_clt_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_clt_s() { + let a = u32x8::new( + 1051323696, 1049201802, 1005628672, 1056692360, 1044683352, 1052201626, 1058314596, + 1020000992, + ); + let b = u32x8::new( + 1055411522, 1059584260, 1046257332, 1041146612, 1064440240, 1064500639, 1062809438, + 1064342005, + ); + let r = i64x4::new(-1, 4294967295, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_clt_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cne_d() { + let a = u64x4::new( + 4598267260722064680, + 4605603034614740670, + 4604843132364965720, + 4595126942010545664, + ); + let b = u64x4::new( + 4606134769367779594, + 4605453748913122312, + 4599415837069158138, + 4601771367817563314, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_cne_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cne_s() { + let a = u32x8::new( + 1042659128, 1065350244, 1032310576, 1061728337, 1062313491, 1063903497, 1063781692, + 1057998506, + ); + let b = u32x8::new( + 1041065828, 1061625246, 1045204740, 1054328432, 1036315496, 1061417737, 1047548872, + 1049890404, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_cne_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cor_d() { + let a = u64x4::new( + 4603862490470319449, + 4601565668275439290, + 4606067119428218406, + 4606327024345603527, + ); + let b = u64x4::new( + 4605708081396913008, + 4604379998889664770, + 4584756849579116944, + 4604755606278723296, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_cor_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cor_s() { + let a = u32x8::new( + 1058610981, 1045033144, 1052398652, 1063724666, 1043910192, 1059183076, 1058489697, + 1040176728, + ); + let b = u32x8::new( + 1032397784, 1054938542, 1057767324, 1054806424, 1055680194, 1057342938, 1060622406, + 1055092632, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_cor_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cueq_d() { + let a = u64x4::new( + 4603200339689238557, + 4602812037576416711, + 4606851174908484583, + 4606385521842539189, + ); + let b = u64x4::new( + 4603085364717668671, + 4606853743461984788, + 4585080339878261296, + 4606053791400332699, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_cueq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cueq_s() { + let a = u32x8::new( + 1057425562, 1063555579, 1046256744, 1022920160, 1065220069, 1052327026, 1014579968, + 1048239780, + ); + let b = u32x8::new( + 1049557526, 1053332678, 1051191726, 1064421754, 1057629639, 1060344219, 1035702088, + 1050028150, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_cueq_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cule_d() { + let a = u64x4::new( + 4604971750650888499, + 4593662226049896016, + 4595869612440915848, + 4601748250340185114, + ); + let b = u64x4::new( + 4591931242171514960, + 4603997046544929558, + 4604974910786711097, + 4594297721205202168, + ); + let r = i64x4::new(0, -1, -1, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_cule_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cule_s() { + let a = u32x8::new( + 1052434396, 1026804576, 1041964148, 1063157036, 1048709802, 1060293833, 1047340196, + 1024531168, + ); + let b = u32x8::new( + 1047645820, 1057293405, 1052020188, 1057942586, 1063407758, 1049107470, 1057298442, + 1048069496, + ); + let r = i64x4::new(-4294967296, 4294967295, 4294967295, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_cule_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cult_d() { + let a = u64x4::new( + 4606775288794066380, + 4598808693757211694, + 4606790379429412870, + 4605939949509363873, + ); + let b = u64x4::new( + 4603717006707963555, + 4603504390160152243, + 4603259926905419449, + 4601857582598168522, + ); + let r = i64x4::new(0, -1, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_cult_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cult_s() { + let a = u32x8::new( + 1040152456, 1054570724, 1057645741, 1059637215, 1036822376, 1036413584, 1003370880, + 1061729841, + ); + let b = u32x8::new( + 1060169565, 1056061318, 1052047112, 1053313212, 1044605328, 1064898859, 1050643938, + 1064626494, + ); + let r = i64x4::new(-1, 0, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_cult_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cun_d() { + let a = u64x4::new( + 4601926997709293092, + 4595132289995141556, + 4600980852994617218, + 4594388740429843072, + ); + let b = u64x4::new( + 4600518403789793172, + 4603476024215184625, + 4605134822967030979, + 4602608048300777812, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_cun_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cune_d() { + let a = u64x4::new( + 4592724877670260624, + 4603613641675881288, + 4597286359527586476, + 4601708681880094032, + ); + let b = u64x4::new( + 4598966797677485150, + 4587297906823272784, + 4604035321505064646, + 4604260243109134356, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_cune_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cune_s() { + let a = u32x8::new( + 1064210263, 1059501406, 1055862424, 1054523594, 1059174050, 1050594182, 1052822848, + 1051372950, + ); + let b = u32x8::new( + 1032533328, 1051044268, 1051967492, 1051754540, 1059816024, 1063426731, 1052204618, + 1064439988, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_cune_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_cun_s() { + let a = u32x8::new( + 1062241044, 1056379734, 1063223413, 1034390344, 1044998176, 1057590594, 1059237612, + 1057447940, + ); + let b = u32x8::new( + 1058046704, 1055331758, 1057614999, 1063039091, 1058229285, 1058774306, 1059987402, + 1033042696, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_cun_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_saf_d() { + let a = u64x4::new( + 4594182209901295828, + 4581464082173647264, + 4590099234403759840, + 4604004273369365130, + ); + let b = u64x4::new( + 4606819328552123016, + 4604091229052796023, + 4604586834115148931, + 4605037320947641934, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_saf_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_saf_s() { + let a = u32x8::new( + 1060812428, 1061245324, 1063578557, 1030594672, 1059247505, 1044611124, 1052152258, + 1054967010, + ); + let b = u32x8::new( + 1036948656, 1051225988, 1058720867, 1032456856, 1051436132, 1041087636, 1047267492, + 1051250362, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_saf_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_seq_d() { + let a = u64x4::new( + 4604294705496916441, + 4593686918662327792, + 4605517303678922516, + 4604494135015023007, + ); + let b = u64x4::new( + 4606394023713761400, + 4604455367892895376, + 4599018364404818718, + 4605980286735586821, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_seq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_seq_s() { + let a = u32x8::new( + 1044265248, 1058937550, 1056790200, 1052048406, 1059868687, 1051483336, 1046520332, + 1043191144, + ); + let b = u32x8::new( + 1063109529, 1055603330, 1062415892, 1040213636, 1058253673, 1058703239, 1061796632, + 1061413795, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_seq_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sle_d() { + let a = u64x4::new( + 4602408684767598022, + 4594250615798987092, + 4604963756353006013, + 4606163211162118467, + ); + let b = u64x4::new( + 4601947900990812282, + 4593344345788988968, + 4593039683552237328, + 4589469470804985856, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_sle_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sle_s() { + let a = u32x8::new( + 1058066838, 1064865582, 1052694366, 1057408270, 1045092236, 1055900780, 1062509444, + 1031929176, + ); + let b = u32x8::new( + 1034008560, 1055354624, 1065161513, 1050271030, 1063181654, 1057764124, 1061600359, + 1025107040, + ); + let r = i64x4::new(0, 4294967295, -1, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_sle_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_slt_d() { + let a = u64x4::new( + 4604369883332347358, + 4591650558117088944, + 4596580563429877336, + 4602996385956780830, + ); + let b = u64x4::new( + 4600043432599191356, + 4605816405801305324, + 4604195043424640949, + 4599985899346669220, + ); + let r = i64x4::new(0, -1, -1, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_slt_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_slt_s() { + let a = u32x8::new( + 1059565142, 1057830491, 1052849564, 1049794018, 1063910487, 1059818709, 1027439600, + 1057381646, + ); + let b = u32x8::new( + 1040414724, 1040288116, 1043374880, 1056311634, 1065024654, 1056424062, 1057720509, + 1063111390, + ); + let r = i64x4::new(0, -4294967296, 4294967295, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_slt_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sne_d() { + let a = u64x4::new( + 4593560649779963032, + 4604654429289647502, + 4603296524089071766, + 4600835325257043198, + ); + let b = u64x4::new( + 4605487761864572918, + 4605408876521930103, + 4598422649694782656, + 4592189012823412008, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_sne_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sne_s() { + let a = u32x8::new( + 1042871300, 1062745184, 1064937837, 1040277356, 1057066266, 1018600128, 1059841200, + 1051941856, + ); + let b = u32x8::new( + 1061164420, 1056972365, 1057052091, 1057171641, 1057154275, 1064004148, 1053173190, + 1062872949, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_sne_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sor_d() { + let a = u64x4::new( + 4600032844669681944, + 4594463383805270076, + 4592958727948323240, + 4598474090378898318, + ); + let b = u64x4::new( + 4602979608704078034, + 4606565228276935378, + 4604003678580242406, + 4604391192007326981, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_sor_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sor_s() { + let a = u32x8::new( + 1061014415, 1062349523, 1051726058, 1055193302, 1042014376, 1060862292, 1049178518, + 1057703558, + ); + let b = u32x8::new( + 1049131624, 1041520484, 1065237143, 1062513527, 1050805196, 1050889556, 1064403532, + 1054988022, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_sor_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sueq_d() { + let a = u64x4::new( + 4603806425689581476, + 4602719352745602774, + 4594235151654053920, + 4598585482869376160, + ); + let b = u64x4::new( + 4597192397006933792, + 4602801475688800384, + 4599539096838817414, + 4603943496423544517, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_sueq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sueq_s() { + let a = u32x8::new( + 1063023580, 1064528754, 1050308238, 1037288408, 1040252868, 1052571256, 1054474094, + 1060927468, + ); + let b = u32x8::new( + 1046997360, 1061154107, 1053281976, 1040631584, 1047759184, 1060702185, 1058969574, + 1055588604, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_sueq_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sule_d() { + let a = u64x4::new( + 4603957166332235709, + 4606383957649489661, + 4606330328898957118, + 4604578658311008992, + ); + let b = u64x4::new( + 4603539942547513158, + 4603598897708702396, + 4606250921023174648, + 4592187933910963896, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_sule_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sule_s() { + let a = u32x8::new( + 1048433556, 1057438072, 1054557166, 1065240380, 1060486424, 1064222633, 1065198422, + 1034306768, + ); + let b = u32x8::new( + 1041928380, 1018285056, 1055996038, 1059481010, 1024438512, 1052197062, 1055194940, + 1033264360, + ); + let r = i64x4::new(0, 4294967295, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_sule_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sult_d() { + let a = u64x4::new( + 4605366058991696696, + 4601232121105121062, + 4601996581218373232, + 4602266745451684294, + ); + let b = u64x4::new( + 4599548937774345734, + 4604614363604787867, + 4593970533267593656, + 4605031421622352277, + ); + let r = i64x4::new(0, -1, 0, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_sult_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sult_s() { + let a = u32x8::new( + 1044761596, 1015684704, 1049105674, 1061214845, 1031561696, 1055360952, 1060420352, + 1063461022, + ); + let b = u32x8::new( + 1063585876, 1063262278, 1062673201, 1059017275, 1032877328, 1063558131, 1057454077, + 1062968413, + ); + let r = i64x4::new(-1, 4294967295, -1, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_sult_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sun_d() { + let a = u64x4::new( + 4581684619237043552, + 4604681260167973492, + 4602321601943005466, + 4605768364153053538, + ); + let b = u64x4::new( + 4604919109359715487, + 4606713834219051412, + 4601813019181652070, + 4598024963761131488, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_sun_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sune_d() { + let a = u64x4::new( + 4602842520488526831, + 4586377859926895520, + 4595797380069114560, + 4597668933134490352, + ); + let b = u64x4::new( + 4603719292253049421, + 4601306102929155814, + 4606447272167981658, + 4595752422326832136, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_sune_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sune_s() { + let a = u32x8::new( + 1060627725, 1063145029, 1064291001, 1058025149, 1037522088, 1059097656, 1041307400, + 1059437048, + ); + let b = u32x8::new( + 1048507540, 1059109210, 1029412928, 1063377178, 1059646047, 1061716080, 1057060099, + 1040743680, + ); + let r = i64x4::new(-1, -1, -1, -1); + + assert_eq!(r, transmute(lasx_xvfcmp_sune_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvfcmp_sun_s() { + let a = u32x8::new( + 1062269194, 1017878048, 1020862944, 1063553320, 1052587356, 1041348304, 1063597708, + 1046660292, + ); + let b = u32x8::new( + 1053486118, 1028652080, 1057647183, 1051605726, 987074560, 1053988970, 1063915975, + 1039720984, + ); + let r = i64x4::new(0, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvfcmp_sun_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickve_d_f() { + let a = u64x4::new( + 4601462012634722388, + 4605596490350167974, + 4589580703778483496, + 4590176684263748456, + ); + let r = i64x4::new(4605596490350167974, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvpickve_d_f::<1>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvpickve_w_f() { + let a = u32x8::new( + 1050978982, 1040565756, 1052944866, 1048054444, 1050714578, 1048632290, 1064399621, + 1049634380, + ); + let r = i64x4::new(1040565756, 0, 0, 0); + + assert_eq!(r, transmute(lasx_xvpickve_w_f::<1>(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrepli_b() { + let r = i64x4::new( + -940422246894996750, + -940422246894996750, + -940422246894996750, + -940422246894996750, + ); + + assert_eq!(r, transmute(lasx_xvrepli_b::<498>())); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrepli_d() { + let r = i64x4::new(169, 169, 169, 169); + + assert_eq!(r, transmute(lasx_xvrepli_d::<169>())); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrepli_h() { + let r = i64x4::new( + -108650998892986755, + -108650998892986755, + -108650998892986755, + -108650998892986755, + ); + + assert_eq!(r, transmute(lasx_xvrepli_h::<-387>())); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_xvrepli_w() { + let r = i64x4::new( + -1662152343940, + -1662152343940, + -1662152343940, + -1662152343940, + ); + + assert_eq!(r, transmute(lasx_xvrepli_w::<-388>())); +} diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/types.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/types.rs new file mode 100644 index 000000000000..9611517e6370 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/types.rs @@ -0,0 +1,33 @@ +types! { + #![unstable(feature = "stdarch_loongarch", issue = "117427")] + + /// LOONGARCH-specific 256-bit wide vector of 32 packed `i8`. + pub struct v32i8(32 x pub(crate) i8); + + /// LOONGARCH-specific 256-bit wide vector of 16 packed `i16`. + pub struct v16i16(16 x pub(crate) i16); + + /// LOONGARCH-specific 256-bit wide vector of 8 packed `i32`. + pub struct v8i32(8 x pub(crate) i32); + + /// LOONGARCH-specific 256-bit wide vector of 4 packed `i64`. + pub struct v4i64(4 x pub(crate) i64); + + /// LOONGARCH-specific 256-bit wide vector of 32 packed `u8`. + pub struct v32u8(32 x pub(crate) u8); + + /// LOONGARCH-specific 256-bit wide vector of 16 packed `u16`. + pub struct v16u16(16 x pub(crate) u16); + + /// LOONGARCH-specific 256-bit wide vector of 8 packed `u32`. + pub struct v8u32(8 x pub(crate) u32); + + /// LOONGARCH-specific 256-bit wide vector of 4 packed `u64`. + pub struct v4u64(4 x pub(crate) u64); + + /// LOONGARCH-specific 128-bit wide vector of 8 packed `f32`. + pub struct v8f32(8 x pub(crate) f32); + + /// LOONGARCH-specific 256-bit wide vector of 4 packed `f64`. + pub struct v4f64(4 x pub(crate) f64); +} diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs new file mode 100644 index 000000000000..2bc364f3e069 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -0,0 +1,6879 @@ +// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen-loongarch/lsx.spec` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsx.spec +// ``` + +use super::types::*; + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.loongarch.lsx.vsll.b"] + fn __lsx_vsll_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsll.h"] + fn __lsx_vsll_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsll.w"] + fn __lsx_vsll_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsll.d"] + fn __lsx_vsll_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vslli.b"] + fn __lsx_vslli_b(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vslli.h"] + fn __lsx_vslli_h(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vslli.w"] + fn __lsx_vslli_w(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vslli.d"] + fn __lsx_vslli_d(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsra.b"] + fn __lsx_vsra_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsra.h"] + fn __lsx_vsra_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsra.w"] + fn __lsx_vsra_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsra.d"] + fn __lsx_vsra_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrai.b"] + fn __lsx_vsrai_b(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrai.h"] + fn __lsx_vsrai_h(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrai.w"] + fn __lsx_vsrai_w(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrai.d"] + fn __lsx_vsrai_d(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrar.b"] + fn __lsx_vsrar_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrar.h"] + fn __lsx_vsrar_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrar.w"] + fn __lsx_vsrar_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrar.d"] + fn __lsx_vsrar_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrari.b"] + fn __lsx_vsrari_b(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrari.h"] + fn __lsx_vsrari_h(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrari.w"] + fn __lsx_vsrari_w(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrari.d"] + fn __lsx_vsrari_d(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrl.b"] + fn __lsx_vsrl_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrl.h"] + fn __lsx_vsrl_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrl.w"] + fn __lsx_vsrl_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrl.d"] + fn __lsx_vsrl_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrli.b"] + fn __lsx_vsrli_b(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrli.h"] + fn __lsx_vsrli_h(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrli.w"] + fn __lsx_vsrli_w(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrli.d"] + fn __lsx_vsrli_d(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrlr.b"] + fn __lsx_vsrlr_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrlr.h"] + fn __lsx_vsrlr_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrlr.w"] + fn __lsx_vsrlr_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrlr.d"] + fn __lsx_vsrlr_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrlri.b"] + fn __lsx_vsrlri_b(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrlri.h"] + fn __lsx_vsrlri_h(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrlri.w"] + fn __lsx_vsrlri_w(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrlri.d"] + fn __lsx_vsrlri_d(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vbitclr.b"] + fn __lsx_vbitclr_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vbitclr.h"] + fn __lsx_vbitclr_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vbitclr.w"] + fn __lsx_vbitclr_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vbitclr.d"] + fn __lsx_vbitclr_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vbitclri.b"] + fn __lsx_vbitclri_b(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vbitclri.h"] + fn __lsx_vbitclri_h(a: v8u16, b: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vbitclri.w"] + fn __lsx_vbitclri_w(a: v4u32, b: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vbitclri.d"] + fn __lsx_vbitclri_d(a: v2u64, b: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vbitset.b"] + fn __lsx_vbitset_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vbitset.h"] + fn __lsx_vbitset_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vbitset.w"] + fn __lsx_vbitset_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vbitset.d"] + fn __lsx_vbitset_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vbitseti.b"] + fn __lsx_vbitseti_b(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vbitseti.h"] + fn __lsx_vbitseti_h(a: v8u16, b: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vbitseti.w"] + fn __lsx_vbitseti_w(a: v4u32, b: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vbitseti.d"] + fn __lsx_vbitseti_d(a: v2u64, b: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vbitrev.b"] + fn __lsx_vbitrev_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vbitrev.h"] + fn __lsx_vbitrev_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vbitrev.w"] + fn __lsx_vbitrev_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vbitrev.d"] + fn __lsx_vbitrev_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vbitrevi.b"] + fn __lsx_vbitrevi_b(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vbitrevi.h"] + fn __lsx_vbitrevi_h(a: v8u16, b: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vbitrevi.w"] + fn __lsx_vbitrevi_w(a: v4u32, b: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vbitrevi.d"] + fn __lsx_vbitrevi_d(a: v2u64, b: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vadd.b"] + fn __lsx_vadd_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vadd.h"] + fn __lsx_vadd_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vadd.w"] + fn __lsx_vadd_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vadd.d"] + fn __lsx_vadd_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddi.bu"] + fn __lsx_vaddi_bu(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vaddi.hu"] + fn __lsx_vaddi_hu(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vaddi.wu"] + fn __lsx_vaddi_wu(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vaddi.du"] + fn __lsx_vaddi_du(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsub.b"] + fn __lsx_vsub_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsub.h"] + fn __lsx_vsub_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsub.w"] + fn __lsx_vsub_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsub.d"] + fn __lsx_vsub_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsubi.bu"] + fn __lsx_vsubi_bu(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsubi.hu"] + fn __lsx_vsubi_hu(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsubi.wu"] + fn __lsx_vsubi_wu(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsubi.du"] + fn __lsx_vsubi_du(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmax.b"] + fn __lsx_vmax_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmax.h"] + fn __lsx_vmax_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmax.w"] + fn __lsx_vmax_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmax.d"] + fn __lsx_vmax_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmaxi.b"] + fn __lsx_vmaxi_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmaxi.h"] + fn __lsx_vmaxi_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmaxi.w"] + fn __lsx_vmaxi_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmaxi.d"] + fn __lsx_vmaxi_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmax.bu"] + fn __lsx_vmax_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vmax.hu"] + fn __lsx_vmax_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vmax.wu"] + fn __lsx_vmax_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vmax.du"] + fn __lsx_vmax_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vmaxi.bu"] + fn __lsx_vmaxi_bu(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vmaxi.hu"] + fn __lsx_vmaxi_hu(a: v8u16, b: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vmaxi.wu"] + fn __lsx_vmaxi_wu(a: v4u32, b: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vmaxi.du"] + fn __lsx_vmaxi_du(a: v2u64, b: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vmin.b"] + fn __lsx_vmin_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmin.h"] + fn __lsx_vmin_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmin.w"] + fn __lsx_vmin_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmin.d"] + fn __lsx_vmin_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmini.b"] + fn __lsx_vmini_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmini.h"] + fn __lsx_vmini_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmini.w"] + fn __lsx_vmini_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmini.d"] + fn __lsx_vmini_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmin.bu"] + fn __lsx_vmin_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vmin.hu"] + fn __lsx_vmin_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vmin.wu"] + fn __lsx_vmin_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vmin.du"] + fn __lsx_vmin_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vmini.bu"] + fn __lsx_vmini_bu(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vmini.hu"] + fn __lsx_vmini_hu(a: v8u16, b: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vmini.wu"] + fn __lsx_vmini_wu(a: v4u32, b: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vmini.du"] + fn __lsx_vmini_du(a: v2u64, b: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vseq.b"] + fn __lsx_vseq_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vseq.h"] + fn __lsx_vseq_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vseq.w"] + fn __lsx_vseq_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vseq.d"] + fn __lsx_vseq_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vseqi.b"] + fn __lsx_vseqi_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vseqi.h"] + fn __lsx_vseqi_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vseqi.w"] + fn __lsx_vseqi_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vseqi.d"] + fn __lsx_vseqi_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vslti.b"] + fn __lsx_vslti_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vslt.b"] + fn __lsx_vslt_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vslt.h"] + fn __lsx_vslt_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vslt.w"] + fn __lsx_vslt_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vslt.d"] + fn __lsx_vslt_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vslti.h"] + fn __lsx_vslti_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vslti.w"] + fn __lsx_vslti_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vslti.d"] + fn __lsx_vslti_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vslt.bu"] + fn __lsx_vslt_bu(a: v16u8, b: v16u8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vslt.hu"] + fn __lsx_vslt_hu(a: v8u16, b: v8u16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vslt.wu"] + fn __lsx_vslt_wu(a: v4u32, b: v4u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vslt.du"] + fn __lsx_vslt_du(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vslti.bu"] + fn __lsx_vslti_bu(a: v16u8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vslti.hu"] + fn __lsx_vslti_hu(a: v8u16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vslti.wu"] + fn __lsx_vslti_wu(a: v4u32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vslti.du"] + fn __lsx_vslti_du(a: v2u64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsle.b"] + fn __lsx_vsle_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsle.h"] + fn __lsx_vsle_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsle.w"] + fn __lsx_vsle_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsle.d"] + fn __lsx_vsle_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vslei.b"] + fn __lsx_vslei_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vslei.h"] + fn __lsx_vslei_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vslei.w"] + fn __lsx_vslei_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vslei.d"] + fn __lsx_vslei_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsle.bu"] + fn __lsx_vsle_bu(a: v16u8, b: v16u8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsle.hu"] + fn __lsx_vsle_hu(a: v8u16, b: v8u16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsle.wu"] + fn __lsx_vsle_wu(a: v4u32, b: v4u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsle.du"] + fn __lsx_vsle_du(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vslei.bu"] + fn __lsx_vslei_bu(a: v16u8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vslei.hu"] + fn __lsx_vslei_hu(a: v8u16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vslei.wu"] + fn __lsx_vslei_wu(a: v4u32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vslei.du"] + fn __lsx_vslei_du(a: v2u64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsat.b"] + fn __lsx_vsat_b(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsat.h"] + fn __lsx_vsat_h(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsat.w"] + fn __lsx_vsat_w(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsat.d"] + fn __lsx_vsat_d(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsat.bu"] + fn __lsx_vsat_bu(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vsat.hu"] + fn __lsx_vsat_hu(a: v8u16, b: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vsat.wu"] + fn __lsx_vsat_wu(a: v4u32, b: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vsat.du"] + fn __lsx_vsat_du(a: v2u64, b: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vadda.b"] + fn __lsx_vadda_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vadda.h"] + fn __lsx_vadda_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vadda.w"] + fn __lsx_vadda_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vadda.d"] + fn __lsx_vadda_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsadd.b"] + fn __lsx_vsadd_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsadd.h"] + fn __lsx_vsadd_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsadd.w"] + fn __lsx_vsadd_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsadd.d"] + fn __lsx_vsadd_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsadd.bu"] + fn __lsx_vsadd_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vsadd.hu"] + fn __lsx_vsadd_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vsadd.wu"] + fn __lsx_vsadd_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vsadd.du"] + fn __lsx_vsadd_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vavg.b"] + fn __lsx_vavg_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vavg.h"] + fn __lsx_vavg_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vavg.w"] + fn __lsx_vavg_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vavg.d"] + fn __lsx_vavg_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vavg.bu"] + fn __lsx_vavg_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vavg.hu"] + fn __lsx_vavg_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vavg.wu"] + fn __lsx_vavg_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vavg.du"] + fn __lsx_vavg_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vavgr.b"] + fn __lsx_vavgr_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vavgr.h"] + fn __lsx_vavgr_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vavgr.w"] + fn __lsx_vavgr_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vavgr.d"] + fn __lsx_vavgr_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vavgr.bu"] + fn __lsx_vavgr_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vavgr.hu"] + fn __lsx_vavgr_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vavgr.wu"] + fn __lsx_vavgr_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vavgr.du"] + fn __lsx_vavgr_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vssub.b"] + fn __lsx_vssub_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vssub.h"] + fn __lsx_vssub_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vssub.w"] + fn __lsx_vssub_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssub.d"] + fn __lsx_vssub_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vssub.bu"] + fn __lsx_vssub_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vssub.hu"] + fn __lsx_vssub_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vssub.wu"] + fn __lsx_vssub_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vssub.du"] + fn __lsx_vssub_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vabsd.b"] + fn __lsx_vabsd_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vabsd.h"] + fn __lsx_vabsd_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vabsd.w"] + fn __lsx_vabsd_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vabsd.d"] + fn __lsx_vabsd_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vabsd.bu"] + fn __lsx_vabsd_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vabsd.hu"] + fn __lsx_vabsd_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vabsd.wu"] + fn __lsx_vabsd_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vabsd.du"] + fn __lsx_vabsd_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vmul.b"] + fn __lsx_vmul_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmul.h"] + fn __lsx_vmul_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmul.w"] + fn __lsx_vmul_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmul.d"] + fn __lsx_vmul_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmadd.b"] + fn __lsx_vmadd_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmadd.h"] + fn __lsx_vmadd_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmadd.w"] + fn __lsx_vmadd_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmadd.d"] + fn __lsx_vmadd_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmsub.b"] + fn __lsx_vmsub_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmsub.h"] + fn __lsx_vmsub_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmsub.w"] + fn __lsx_vmsub_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmsub.d"] + fn __lsx_vmsub_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vdiv.b"] + fn __lsx_vdiv_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vdiv.h"] + fn __lsx_vdiv_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vdiv.w"] + fn __lsx_vdiv_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vdiv.d"] + fn __lsx_vdiv_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vdiv.bu"] + fn __lsx_vdiv_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vdiv.hu"] + fn __lsx_vdiv_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vdiv.wu"] + fn __lsx_vdiv_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vdiv.du"] + fn __lsx_vdiv_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vhaddw.h.b"] + fn __lsx_vhaddw_h_b(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vhaddw.w.h"] + fn __lsx_vhaddw_w_h(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vhaddw.d.w"] + fn __lsx_vhaddw_d_w(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vhaddw.hu.bu"] + fn __lsx_vhaddw_hu_bu(a: v16u8, b: v16u8) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vhaddw.wu.hu"] + fn __lsx_vhaddw_wu_hu(a: v8u16, b: v8u16) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vhaddw.du.wu"] + fn __lsx_vhaddw_du_wu(a: v4u32, b: v4u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vhsubw.h.b"] + fn __lsx_vhsubw_h_b(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vhsubw.w.h"] + fn __lsx_vhsubw_w_h(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vhsubw.d.w"] + fn __lsx_vhsubw_d_w(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vhsubw.hu.bu"] + fn __lsx_vhsubw_hu_bu(a: v16u8, b: v16u8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vhsubw.wu.hu"] + fn __lsx_vhsubw_wu_hu(a: v8u16, b: v8u16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vhsubw.du.wu"] + fn __lsx_vhsubw_du_wu(a: v4u32, b: v4u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmod.b"] + fn __lsx_vmod_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmod.h"] + fn __lsx_vmod_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmod.w"] + fn __lsx_vmod_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmod.d"] + fn __lsx_vmod_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmod.bu"] + fn __lsx_vmod_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vmod.hu"] + fn __lsx_vmod_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vmod.wu"] + fn __lsx_vmod_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vmod.du"] + fn __lsx_vmod_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vreplve.b"] + fn __lsx_vreplve_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vreplve.h"] + fn __lsx_vreplve_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vreplve.w"] + fn __lsx_vreplve_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vreplve.d"] + fn __lsx_vreplve_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vreplvei.b"] + fn __lsx_vreplvei_b(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vreplvei.h"] + fn __lsx_vreplvei_h(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vreplvei.w"] + fn __lsx_vreplvei_w(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vreplvei.d"] + fn __lsx_vreplvei_d(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vpickev.b"] + fn __lsx_vpickev_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vpickev.h"] + fn __lsx_vpickev_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vpickev.w"] + fn __lsx_vpickev_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vpickev.d"] + fn __lsx_vpickev_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vpickod.b"] + fn __lsx_vpickod_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vpickod.h"] + fn __lsx_vpickod_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vpickod.w"] + fn __lsx_vpickod_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vpickod.d"] + fn __lsx_vpickod_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vilvh.b"] + fn __lsx_vilvh_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vilvh.h"] + fn __lsx_vilvh_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vilvh.w"] + fn __lsx_vilvh_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vilvh.d"] + fn __lsx_vilvh_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vilvl.b"] + fn __lsx_vilvl_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vilvl.h"] + fn __lsx_vilvl_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vilvl.w"] + fn __lsx_vilvl_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vilvl.d"] + fn __lsx_vilvl_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vpackev.b"] + fn __lsx_vpackev_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vpackev.h"] + fn __lsx_vpackev_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vpackev.w"] + fn __lsx_vpackev_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vpackev.d"] + fn __lsx_vpackev_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vpackod.b"] + fn __lsx_vpackod_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vpackod.h"] + fn __lsx_vpackod_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vpackod.w"] + fn __lsx_vpackod_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vpackod.d"] + fn __lsx_vpackod_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vshuf.h"] + fn __lsx_vshuf_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vshuf.w"] + fn __lsx_vshuf_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vshuf.d"] + fn __lsx_vshuf_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vand.v"] + fn __lsx_vand_v(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vandi.b"] + fn __lsx_vandi_b(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vor.v"] + fn __lsx_vor_v(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vori.b"] + fn __lsx_vori_b(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vnor.v"] + fn __lsx_vnor_v(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vnori.b"] + fn __lsx_vnori_b(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vxor.v"] + fn __lsx_vxor_v(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vxori.b"] + fn __lsx_vxori_b(a: v16u8, b: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vbitsel.v"] + fn __lsx_vbitsel_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vbitseli.b"] + fn __lsx_vbitseli_b(a: v16u8, b: v16u8, c: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vshuf4i.b"] + fn __lsx_vshuf4i_b(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vshuf4i.h"] + fn __lsx_vshuf4i_h(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vshuf4i.w"] + fn __lsx_vshuf4i_w(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vreplgr2vr.b"] + fn __lsx_vreplgr2vr_b(a: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vreplgr2vr.h"] + fn __lsx_vreplgr2vr_h(a: i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vreplgr2vr.w"] + fn __lsx_vreplgr2vr_w(a: i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vreplgr2vr.d"] + fn __lsx_vreplgr2vr_d(a: i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vpcnt.b"] + fn __lsx_vpcnt_b(a: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vpcnt.h"] + fn __lsx_vpcnt_h(a: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vpcnt.w"] + fn __lsx_vpcnt_w(a: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vpcnt.d"] + fn __lsx_vpcnt_d(a: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vclo.b"] + fn __lsx_vclo_b(a: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vclo.h"] + fn __lsx_vclo_h(a: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vclo.w"] + fn __lsx_vclo_w(a: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vclo.d"] + fn __lsx_vclo_d(a: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vclz.b"] + fn __lsx_vclz_b(a: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vclz.h"] + fn __lsx_vclz_h(a: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vclz.w"] + fn __lsx_vclz_w(a: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vclz.d"] + fn __lsx_vclz_d(a: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vpickve2gr.b"] + fn __lsx_vpickve2gr_b(a: v16i8, b: u32) -> i32; + #[link_name = "llvm.loongarch.lsx.vpickve2gr.h"] + fn __lsx_vpickve2gr_h(a: v8i16, b: u32) -> i32; + #[link_name = "llvm.loongarch.lsx.vpickve2gr.w"] + fn __lsx_vpickve2gr_w(a: v4i32, b: u32) -> i32; + #[link_name = "llvm.loongarch.lsx.vpickve2gr.d"] + fn __lsx_vpickve2gr_d(a: v2i64, b: u32) -> i64; + #[link_name = "llvm.loongarch.lsx.vpickve2gr.bu"] + fn __lsx_vpickve2gr_bu(a: v16i8, b: u32) -> u32; + #[link_name = "llvm.loongarch.lsx.vpickve2gr.hu"] + fn __lsx_vpickve2gr_hu(a: v8i16, b: u32) -> u32; + #[link_name = "llvm.loongarch.lsx.vpickve2gr.wu"] + fn __lsx_vpickve2gr_wu(a: v4i32, b: u32) -> u32; + #[link_name = "llvm.loongarch.lsx.vpickve2gr.du"] + fn __lsx_vpickve2gr_du(a: v2i64, b: u32) -> u64; + #[link_name = "llvm.loongarch.lsx.vinsgr2vr.b"] + fn __lsx_vinsgr2vr_b(a: v16i8, b: i32, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vinsgr2vr.h"] + fn __lsx_vinsgr2vr_h(a: v8i16, b: i32, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vinsgr2vr.w"] + fn __lsx_vinsgr2vr_w(a: v4i32, b: i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vinsgr2vr.d"] + fn __lsx_vinsgr2vr_d(a: v2i64, b: i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfadd.s"] + fn __lsx_vfadd_s(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfadd.d"] + fn __lsx_vfadd_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfsub.s"] + fn __lsx_vfsub_s(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfsub.d"] + fn __lsx_vfsub_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfmul.s"] + fn __lsx_vfmul_s(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfmul.d"] + fn __lsx_vfmul_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfdiv.s"] + fn __lsx_vfdiv_s(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfdiv.d"] + fn __lsx_vfdiv_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfcvt.h.s"] + fn __lsx_vfcvt_h_s(a: v4f32, b: v4f32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vfcvt.s.d"] + fn __lsx_vfcvt_s_d(a: v2f64, b: v2f64) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfmin.s"] + fn __lsx_vfmin_s(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfmin.d"] + fn __lsx_vfmin_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfmina.s"] + fn __lsx_vfmina_s(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfmina.d"] + fn __lsx_vfmina_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfmax.s"] + fn __lsx_vfmax_s(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfmax.d"] + fn __lsx_vfmax_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfmaxa.s"] + fn __lsx_vfmaxa_s(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfmaxa.d"] + fn __lsx_vfmaxa_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfclass.s"] + fn __lsx_vfclass_s(a: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfclass.d"] + fn __lsx_vfclass_d(a: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfsqrt.s"] + fn __lsx_vfsqrt_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfsqrt.d"] + fn __lsx_vfsqrt_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfrecip.s"] + fn __lsx_vfrecip_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfrecip.d"] + fn __lsx_vfrecip_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfrecipe.s"] + fn __lsx_vfrecipe_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfrecipe.d"] + fn __lsx_vfrecipe_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfrsqrte.s"] + fn __lsx_vfrsqrte_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfrsqrte.d"] + fn __lsx_vfrsqrte_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfrint.s"] + fn __lsx_vfrint_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfrint.d"] + fn __lsx_vfrint_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfrsqrt.s"] + fn __lsx_vfrsqrt_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfrsqrt.d"] + fn __lsx_vfrsqrt_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vflogb.s"] + fn __lsx_vflogb_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vflogb.d"] + fn __lsx_vflogb_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfcvth.s.h"] + fn __lsx_vfcvth_s_h(a: v8i16) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfcvth.d.s"] + fn __lsx_vfcvth_d_s(a: v4f32) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfcvtl.s.h"] + fn __lsx_vfcvtl_s_h(a: v8i16) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfcvtl.d.s"] + fn __lsx_vfcvtl_d_s(a: v4f32) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vftint.w.s"] + fn __lsx_vftint_w_s(a: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vftint.l.d"] + fn __lsx_vftint_l_d(a: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftint.wu.s"] + fn __lsx_vftint_wu_s(a: v4f32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vftint.lu.d"] + fn __lsx_vftint_lu_d(a: v2f64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vftintrz.w.s"] + fn __lsx_vftintrz_w_s(a: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vftintrz.l.d"] + fn __lsx_vftintrz_l_d(a: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrz.wu.s"] + fn __lsx_vftintrz_wu_s(a: v4f32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vftintrz.lu.d"] + fn __lsx_vftintrz_lu_d(a: v2f64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vffint.s.w"] + fn __lsx_vffint_s_w(a: v4i32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vffint.d.l"] + fn __lsx_vffint_d_l(a: v2i64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vffint.s.wu"] + fn __lsx_vffint_s_wu(a: v4u32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vffint.d.lu"] + fn __lsx_vffint_d_lu(a: v2u64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vandn.v"] + fn __lsx_vandn_v(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vneg.b"] + fn __lsx_vneg_b(a: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vneg.h"] + fn __lsx_vneg_h(a: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vneg.w"] + fn __lsx_vneg_w(a: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vneg.d"] + fn __lsx_vneg_d(a: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmuh.b"] + fn __lsx_vmuh_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmuh.h"] + fn __lsx_vmuh_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmuh.w"] + fn __lsx_vmuh_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmuh.d"] + fn __lsx_vmuh_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmuh.bu"] + fn __lsx_vmuh_bu(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vmuh.hu"] + fn __lsx_vmuh_hu(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vmuh.wu"] + fn __lsx_vmuh_wu(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vmuh.du"] + fn __lsx_vmuh_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vsllwil.h.b"] + fn __lsx_vsllwil_h_b(a: v16i8, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsllwil.w.h"] + fn __lsx_vsllwil_w_h(a: v8i16, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsllwil.d.w"] + fn __lsx_vsllwil_d_w(a: v4i32, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsllwil.hu.bu"] + fn __lsx_vsllwil_hu_bu(a: v16u8, b: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vsllwil.wu.hu"] + fn __lsx_vsllwil_wu_hu(a: v8u16, b: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vsllwil.du.wu"] + fn __lsx_vsllwil_du_wu(a: v4u32, b: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vsran.b.h"] + fn __lsx_vsran_b_h(a: v8i16, b: v8i16) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsran.h.w"] + fn __lsx_vsran_h_w(a: v4i32, b: v4i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsran.w.d"] + fn __lsx_vsran_w_d(a: v2i64, b: v2i64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssran.b.h"] + fn __lsx_vssran_b_h(a: v8i16, b: v8i16) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vssran.h.w"] + fn __lsx_vssran_h_w(a: v4i32, b: v4i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vssran.w.d"] + fn __lsx_vssran_w_d(a: v2i64, b: v2i64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssran.bu.h"] + fn __lsx_vssran_bu_h(a: v8u16, b: v8u16) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vssran.hu.w"] + fn __lsx_vssran_hu_w(a: v4u32, b: v4u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vssran.wu.d"] + fn __lsx_vssran_wu_d(a: v2u64, b: v2u64) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vsrarn.b.h"] + fn __lsx_vsrarn_b_h(a: v8i16, b: v8i16) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrarn.h.w"] + fn __lsx_vsrarn_h_w(a: v4i32, b: v4i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrarn.w.d"] + fn __lsx_vsrarn_w_d(a: v2i64, b: v2i64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssrarn.b.h"] + fn __lsx_vssrarn_b_h(a: v8i16, b: v8i16) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vssrarn.h.w"] + fn __lsx_vssrarn_h_w(a: v4i32, b: v4i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vssrarn.w.d"] + fn __lsx_vssrarn_w_d(a: v2i64, b: v2i64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssrarn.bu.h"] + fn __lsx_vssrarn_bu_h(a: v8u16, b: v8u16) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vssrarn.hu.w"] + fn __lsx_vssrarn_hu_w(a: v4u32, b: v4u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vssrarn.wu.d"] + fn __lsx_vssrarn_wu_d(a: v2u64, b: v2u64) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vsrln.b.h"] + fn __lsx_vsrln_b_h(a: v8i16, b: v8i16) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrln.h.w"] + fn __lsx_vsrln_h_w(a: v4i32, b: v4i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrln.w.d"] + fn __lsx_vsrln_w_d(a: v2i64, b: v2i64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssrln.bu.h"] + fn __lsx_vssrln_bu_h(a: v8u16, b: v8u16) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vssrln.hu.w"] + fn __lsx_vssrln_hu_w(a: v4u32, b: v4u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vssrln.wu.d"] + fn __lsx_vssrln_wu_d(a: v2u64, b: v2u64) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vsrlrn.b.h"] + fn __lsx_vsrlrn_b_h(a: v8i16, b: v8i16) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrlrn.h.w"] + fn __lsx_vsrlrn_h_w(a: v4i32, b: v4i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrlrn.w.d"] + fn __lsx_vsrlrn_w_d(a: v2i64, b: v2i64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssrlrn.bu.h"] + fn __lsx_vssrlrn_bu_h(a: v8u16, b: v8u16) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vssrlrn.hu.w"] + fn __lsx_vssrlrn_hu_w(a: v4u32, b: v4u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vssrlrn.wu.d"] + fn __lsx_vssrlrn_wu_d(a: v2u64, b: v2u64) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vfrstpi.b"] + fn __lsx_vfrstpi_b(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vfrstpi.h"] + fn __lsx_vfrstpi_h(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vfrstp.b"] + fn __lsx_vfrstp_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vfrstp.h"] + fn __lsx_vfrstp_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vshuf4i.d"] + fn __lsx_vshuf4i_d(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vbsrl.v"] + fn __lsx_vbsrl_v(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vbsll.v"] + fn __lsx_vbsll_v(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vextrins.b"] + fn __lsx_vextrins_b(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vextrins.h"] + fn __lsx_vextrins_h(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vextrins.w"] + fn __lsx_vextrins_w(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vextrins.d"] + fn __lsx_vextrins_d(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmskltz.b"] + fn __lsx_vmskltz_b(a: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmskltz.h"] + fn __lsx_vmskltz_h(a: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmskltz.w"] + fn __lsx_vmskltz_w(a: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmskltz.d"] + fn __lsx_vmskltz_d(a: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsigncov.b"] + fn __lsx_vsigncov_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsigncov.h"] + fn __lsx_vsigncov_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsigncov.w"] + fn __lsx_vsigncov_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsigncov.d"] + fn __lsx_vsigncov_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfmadd.s"] + fn __lsx_vfmadd_s(a: v4f32, b: v4f32, c: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfmadd.d"] + fn __lsx_vfmadd_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfmsub.s"] + fn __lsx_vfmsub_s(a: v4f32, b: v4f32, c: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfmsub.d"] + fn __lsx_vfmsub_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfnmadd.s"] + fn __lsx_vfnmadd_s(a: v4f32, b: v4f32, c: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfnmadd.d"] + fn __lsx_vfnmadd_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfnmsub.s"] + fn __lsx_vfnmsub_s(a: v4f32, b: v4f32, c: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfnmsub.d"] + fn __lsx_vfnmsub_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vftintrne.w.s"] + fn __lsx_vftintrne_w_s(a: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vftintrne.l.d"] + fn __lsx_vftintrne_l_d(a: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrp.w.s"] + fn __lsx_vftintrp_w_s(a: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vftintrp.l.d"] + fn __lsx_vftintrp_l_d(a: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrm.w.s"] + fn __lsx_vftintrm_w_s(a: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vftintrm.l.d"] + fn __lsx_vftintrm_l_d(a: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftint.w.d"] + fn __lsx_vftint_w_d(a: v2f64, b: v2f64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vffint.s.l"] + fn __lsx_vffint_s_l(a: v2i64, b: v2i64) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vftintrz.w.d"] + fn __lsx_vftintrz_w_d(a: v2f64, b: v2f64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vftintrp.w.d"] + fn __lsx_vftintrp_w_d(a: v2f64, b: v2f64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vftintrm.w.d"] + fn __lsx_vftintrm_w_d(a: v2f64, b: v2f64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vftintrne.w.d"] + fn __lsx_vftintrne_w_d(a: v2f64, b: v2f64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vftintl.l.s"] + fn __lsx_vftintl_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftinth.l.s"] + fn __lsx_vftinth_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vffinth.d.w"] + fn __lsx_vffinth_d_w(a: v4i32) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vffintl.d.w"] + fn __lsx_vffintl_d_w(a: v4i32) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vftintrzl.l.s"] + fn __lsx_vftintrzl_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrzh.l.s"] + fn __lsx_vftintrzh_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrpl.l.s"] + fn __lsx_vftintrpl_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrph.l.s"] + fn __lsx_vftintrph_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrml.l.s"] + fn __lsx_vftintrml_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrmh.l.s"] + fn __lsx_vftintrmh_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrnel.l.s"] + fn __lsx_vftintrnel_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vftintrneh.l.s"] + fn __lsx_vftintrneh_l_s(a: v4f32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfrintrne.s"] + fn __lsx_vfrintrne_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfrintrne.d"] + fn __lsx_vfrintrne_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfrintrz.s"] + fn __lsx_vfrintrz_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfrintrz.d"] + fn __lsx_vfrintrz_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfrintrp.s"] + fn __lsx_vfrintrp_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfrintrp.d"] + fn __lsx_vfrintrp_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vfrintrm.s"] + fn __lsx_vfrintrm_s(a: v4f32) -> v4f32; + #[link_name = "llvm.loongarch.lsx.vfrintrm.d"] + fn __lsx_vfrintrm_d(a: v2f64) -> v2f64; + #[link_name = "llvm.loongarch.lsx.vstelm.b"] + fn __lsx_vstelm_b(a: v16i8, b: *mut i8, c: i32, d: u32); + #[link_name = "llvm.loongarch.lsx.vstelm.h"] + fn __lsx_vstelm_h(a: v8i16, b: *mut i8, c: i32, d: u32); + #[link_name = "llvm.loongarch.lsx.vstelm.w"] + fn __lsx_vstelm_w(a: v4i32, b: *mut i8, c: i32, d: u32); + #[link_name = "llvm.loongarch.lsx.vstelm.d"] + fn __lsx_vstelm_d(a: v2i64, b: *mut i8, c: i32, d: u32); + #[link_name = "llvm.loongarch.lsx.vaddwev.d.w"] + fn __lsx_vaddwev_d_w(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwev.w.h"] + fn __lsx_vaddwev_w_h(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vaddwev.h.b"] + fn __lsx_vaddwev_h_b(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vaddwod.d.w"] + fn __lsx_vaddwod_d_w(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwod.w.h"] + fn __lsx_vaddwod_w_h(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vaddwod.h.b"] + fn __lsx_vaddwod_h_b(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vaddwev.d.wu"] + fn __lsx_vaddwev_d_wu(a: v4u32, b: v4u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwev.w.hu"] + fn __lsx_vaddwev_w_hu(a: v8u16, b: v8u16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vaddwev.h.bu"] + fn __lsx_vaddwev_h_bu(a: v16u8, b: v16u8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vaddwod.d.wu"] + fn __lsx_vaddwod_d_wu(a: v4u32, b: v4u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwod.w.hu"] + fn __lsx_vaddwod_w_hu(a: v8u16, b: v8u16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vaddwod.h.bu"] + fn __lsx_vaddwod_h_bu(a: v16u8, b: v16u8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vaddwev.d.wu.w"] + fn __lsx_vaddwev_d_wu_w(a: v4u32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwev.w.hu.h"] + fn __lsx_vaddwev_w_hu_h(a: v8u16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vaddwev.h.bu.b"] + fn __lsx_vaddwev_h_bu_b(a: v16u8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vaddwod.d.wu.w"] + fn __lsx_vaddwod_d_wu_w(a: v4u32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwod.w.hu.h"] + fn __lsx_vaddwod_w_hu_h(a: v8u16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vaddwod.h.bu.b"] + fn __lsx_vaddwod_h_bu_b(a: v16u8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsubwev.d.w"] + fn __lsx_vsubwev_d_w(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsubwev.w.h"] + fn __lsx_vsubwev_w_h(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsubwev.h.b"] + fn __lsx_vsubwev_h_b(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsubwod.d.w"] + fn __lsx_vsubwod_d_w(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsubwod.w.h"] + fn __lsx_vsubwod_w_h(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsubwod.h.b"] + fn __lsx_vsubwod_h_b(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsubwev.d.wu"] + fn __lsx_vsubwev_d_wu(a: v4u32, b: v4u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsubwev.w.hu"] + fn __lsx_vsubwev_w_hu(a: v8u16, b: v8u16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsubwev.h.bu"] + fn __lsx_vsubwev_h_bu(a: v16u8, b: v16u8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsubwod.d.wu"] + fn __lsx_vsubwod_d_wu(a: v4u32, b: v4u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsubwod.w.hu"] + fn __lsx_vsubwod_w_hu(a: v8u16, b: v8u16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsubwod.h.bu"] + fn __lsx_vsubwod_h_bu(a: v16u8, b: v16u8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vaddwev.q.d"] + fn __lsx_vaddwev_q_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwod.q.d"] + fn __lsx_vaddwod_q_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwev.q.du"] + fn __lsx_vaddwev_q_du(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwod.q.du"] + fn __lsx_vaddwod_q_du(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsubwev.q.d"] + fn __lsx_vsubwev_q_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsubwod.q.d"] + fn __lsx_vsubwod_q_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsubwev.q.du"] + fn __lsx_vsubwev_q_du(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsubwod.q.du"] + fn __lsx_vsubwod_q_du(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwev.q.du.d"] + fn __lsx_vaddwev_q_du_d(a: v2u64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vaddwod.q.du.d"] + fn __lsx_vaddwod_q_du_d(a: v2u64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwev.d.w"] + fn __lsx_vmulwev_d_w(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwev.w.h"] + fn __lsx_vmulwev_w_h(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmulwev.h.b"] + fn __lsx_vmulwev_h_b(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmulwod.d.w"] + fn __lsx_vmulwod_d_w(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwod.w.h"] + fn __lsx_vmulwod_w_h(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmulwod.h.b"] + fn __lsx_vmulwod_h_b(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmulwev.d.wu"] + fn __lsx_vmulwev_d_wu(a: v4u32, b: v4u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwev.w.hu"] + fn __lsx_vmulwev_w_hu(a: v8u16, b: v8u16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmulwev.h.bu"] + fn __lsx_vmulwev_h_bu(a: v16u8, b: v16u8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmulwod.d.wu"] + fn __lsx_vmulwod_d_wu(a: v4u32, b: v4u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwod.w.hu"] + fn __lsx_vmulwod_w_hu(a: v8u16, b: v8u16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmulwod.h.bu"] + fn __lsx_vmulwod_h_bu(a: v16u8, b: v16u8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmulwev.d.wu.w"] + fn __lsx_vmulwev_d_wu_w(a: v4u32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwev.w.hu.h"] + fn __lsx_vmulwev_w_hu_h(a: v8u16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmulwev.h.bu.b"] + fn __lsx_vmulwev_h_bu_b(a: v16u8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmulwod.d.wu.w"] + fn __lsx_vmulwod_d_wu_w(a: v4u32, b: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwod.w.hu.h"] + fn __lsx_vmulwod_w_hu_h(a: v8u16, b: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmulwod.h.bu.b"] + fn __lsx_vmulwod_h_bu_b(a: v16u8, b: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmulwev.q.d"] + fn __lsx_vmulwev_q_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwod.q.d"] + fn __lsx_vmulwod_q_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwev.q.du"] + fn __lsx_vmulwev_q_du(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwod.q.du"] + fn __lsx_vmulwod_q_du(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwev.q.du.d"] + fn __lsx_vmulwev_q_du_d(a: v2u64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmulwod.q.du.d"] + fn __lsx_vmulwod_q_du_d(a: v2u64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vhaddw.q.d"] + fn __lsx_vhaddw_q_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vhaddw.qu.du"] + fn __lsx_vhaddw_qu_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vhsubw.q.d"] + fn __lsx_vhsubw_q_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vhsubw.qu.du"] + fn __lsx_vhsubw_qu_du(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vmaddwev.d.w"] + fn __lsx_vmaddwev_d_w(a: v2i64, b: v4i32, c: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmaddwev.w.h"] + fn __lsx_vmaddwev_w_h(a: v4i32, b: v8i16, c: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmaddwev.h.b"] + fn __lsx_vmaddwev_h_b(a: v8i16, b: v16i8, c: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmaddwev.d.wu"] + fn __lsx_vmaddwev_d_wu(a: v2u64, b: v4u32, c: v4u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vmaddwev.w.hu"] + fn __lsx_vmaddwev_w_hu(a: v4u32, b: v8u16, c: v8u16) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vmaddwev.h.bu"] + fn __lsx_vmaddwev_h_bu(a: v8u16, b: v16u8, c: v16u8) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vmaddwod.d.w"] + fn __lsx_vmaddwod_d_w(a: v2i64, b: v4i32, c: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmaddwod.w.h"] + fn __lsx_vmaddwod_w_h(a: v4i32, b: v8i16, c: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmaddwod.h.b"] + fn __lsx_vmaddwod_h_b(a: v8i16, b: v16i8, c: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmaddwod.d.wu"] + fn __lsx_vmaddwod_d_wu(a: v2u64, b: v4u32, c: v4u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vmaddwod.w.hu"] + fn __lsx_vmaddwod_w_hu(a: v4u32, b: v8u16, c: v8u16) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vmaddwod.h.bu"] + fn __lsx_vmaddwod_h_bu(a: v8u16, b: v16u8, c: v16u8) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vmaddwev.d.wu.w"] + fn __lsx_vmaddwev_d_wu_w(a: v2i64, b: v4u32, c: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmaddwev.w.hu.h"] + fn __lsx_vmaddwev_w_hu_h(a: v4i32, b: v8u16, c: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmaddwev.h.bu.b"] + fn __lsx_vmaddwev_h_bu_b(a: v8i16, b: v16u8, c: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmaddwod.d.wu.w"] + fn __lsx_vmaddwod_d_wu_w(a: v2i64, b: v4u32, c: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmaddwod.w.hu.h"] + fn __lsx_vmaddwod_w_hu_h(a: v4i32, b: v8u16, c: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vmaddwod.h.bu.b"] + fn __lsx_vmaddwod_h_bu_b(a: v8i16, b: v16u8, c: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vmaddwev.q.d"] + fn __lsx_vmaddwev_q_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmaddwod.q.d"] + fn __lsx_vmaddwod_q_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmaddwev.q.du"] + fn __lsx_vmaddwev_q_du(a: v2u64, b: v2u64, c: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vmaddwod.q.du"] + fn __lsx_vmaddwod_q_du(a: v2u64, b: v2u64, c: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vmaddwev.q.du.d"] + fn __lsx_vmaddwev_q_du_d(a: v2i64, b: v2u64, c: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmaddwod.q.du.d"] + fn __lsx_vmaddwod_q_du_d(a: v2i64, b: v2u64, c: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vrotr.b"] + fn __lsx_vrotr_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vrotr.h"] + fn __lsx_vrotr_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vrotr.w"] + fn __lsx_vrotr_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vrotr.d"] + fn __lsx_vrotr_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vadd.q"] + fn __lsx_vadd_q(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsub.q"] + fn __lsx_vsub_q(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vldrepl.b"] + fn __lsx_vldrepl_b(a: *const i8, b: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vldrepl.h"] + fn __lsx_vldrepl_h(a: *const i8, b: i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vldrepl.w"] + fn __lsx_vldrepl_w(a: *const i8, b: i32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vldrepl.d"] + fn __lsx_vldrepl_d(a: *const i8, b: i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vmskgez.b"] + fn __lsx_vmskgez_b(a: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vmsknz.b"] + fn __lsx_vmsknz_b(a: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vexth.h.b"] + fn __lsx_vexth_h_b(a: v16i8) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vexth.w.h"] + fn __lsx_vexth_w_h(a: v8i16) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vexth.d.w"] + fn __lsx_vexth_d_w(a: v4i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vexth.q.d"] + fn __lsx_vexth_q_d(a: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vexth.hu.bu"] + fn __lsx_vexth_hu_bu(a: v16u8) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vexth.wu.hu"] + fn __lsx_vexth_wu_hu(a: v8u16) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vexth.du.wu"] + fn __lsx_vexth_du_wu(a: v4u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vexth.qu.du"] + fn __lsx_vexth_qu_du(a: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vrotri.b"] + fn __lsx_vrotri_b(a: v16i8, b: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vrotri.h"] + fn __lsx_vrotri_h(a: v8i16, b: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vrotri.w"] + fn __lsx_vrotri_w(a: v4i32, b: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vrotri.d"] + fn __lsx_vrotri_d(a: v2i64, b: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vextl.q.d"] + fn __lsx_vextl_q_d(a: v2i64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrlni.b.h"] + fn __lsx_vsrlni_b_h(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrlni.h.w"] + fn __lsx_vsrlni_h_w(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrlni.w.d"] + fn __lsx_vsrlni_w_d(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrlni.d.q"] + fn __lsx_vsrlni_d_q(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrlrni.b.h"] + fn __lsx_vsrlrni_b_h(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrlrni.h.w"] + fn __lsx_vsrlrni_h_w(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrlrni.w.d"] + fn __lsx_vsrlrni_w_d(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrlrni.d.q"] + fn __lsx_vsrlrni_d_q(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vssrlni.b.h"] + fn __lsx_vssrlni_b_h(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vssrlni.h.w"] + fn __lsx_vssrlni_h_w(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vssrlni.w.d"] + fn __lsx_vssrlni_w_d(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssrlni.d.q"] + fn __lsx_vssrlni_d_q(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vssrlni.bu.h"] + fn __lsx_vssrlni_bu_h(a: v16u8, b: v16i8, c: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vssrlni.hu.w"] + fn __lsx_vssrlni_hu_w(a: v8u16, b: v8i16, c: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vssrlni.wu.d"] + fn __lsx_vssrlni_wu_d(a: v4u32, b: v4i32, c: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vssrlni.du.q"] + fn __lsx_vssrlni_du_q(a: v2u64, b: v2i64, c: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vssrlrni.b.h"] + fn __lsx_vssrlrni_b_h(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vssrlrni.h.w"] + fn __lsx_vssrlrni_h_w(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vssrlrni.w.d"] + fn __lsx_vssrlrni_w_d(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssrlrni.d.q"] + fn __lsx_vssrlrni_d_q(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vssrlrni.bu.h"] + fn __lsx_vssrlrni_bu_h(a: v16u8, b: v16i8, c: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vssrlrni.hu.w"] + fn __lsx_vssrlrni_hu_w(a: v8u16, b: v8i16, c: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vssrlrni.wu.d"] + fn __lsx_vssrlrni_wu_d(a: v4u32, b: v4i32, c: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vssrlrni.du.q"] + fn __lsx_vssrlrni_du_q(a: v2u64, b: v2i64, c: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vsrani.b.h"] + fn __lsx_vsrani_b_h(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrani.h.w"] + fn __lsx_vsrani_h_w(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrani.w.d"] + fn __lsx_vsrani_w_d(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrani.d.q"] + fn __lsx_vsrani_d_q(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vsrarni.b.h"] + fn __lsx_vsrarni_b_h(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vsrarni.h.w"] + fn __lsx_vsrarni_h_w(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vsrarni.w.d"] + fn __lsx_vsrarni_w_d(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vsrarni.d.q"] + fn __lsx_vsrarni_d_q(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vssrani.b.h"] + fn __lsx_vssrani_b_h(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vssrani.h.w"] + fn __lsx_vssrani_h_w(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vssrani.w.d"] + fn __lsx_vssrani_w_d(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssrani.d.q"] + fn __lsx_vssrani_d_q(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vssrani.bu.h"] + fn __lsx_vssrani_bu_h(a: v16u8, b: v16i8, c: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vssrani.hu.w"] + fn __lsx_vssrani_hu_w(a: v8u16, b: v8i16, c: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vssrani.wu.d"] + fn __lsx_vssrani_wu_d(a: v4u32, b: v4i32, c: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vssrani.du.q"] + fn __lsx_vssrani_du_q(a: v2u64, b: v2i64, c: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vssrarni.b.h"] + fn __lsx_vssrarni_b_h(a: v16i8, b: v16i8, c: u32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vssrarni.h.w"] + fn __lsx_vssrarni_h_w(a: v8i16, b: v8i16, c: u32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vssrarni.w.d"] + fn __lsx_vssrarni_w_d(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssrarni.d.q"] + fn __lsx_vssrarni_d_q(a: v2i64, b: v2i64, c: u32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vssrarni.bu.h"] + fn __lsx_vssrarni_bu_h(a: v16u8, b: v16i8, c: u32) -> v16u8; + #[link_name = "llvm.loongarch.lsx.vssrarni.hu.w"] + fn __lsx_vssrarni_hu_w(a: v8u16, b: v8i16, c: u32) -> v8u16; + #[link_name = "llvm.loongarch.lsx.vssrarni.wu.d"] + fn __lsx_vssrarni_wu_d(a: v4u32, b: v4i32, c: u32) -> v4u32; + #[link_name = "llvm.loongarch.lsx.vssrarni.du.q"] + fn __lsx_vssrarni_du_q(a: v2u64, b: v2i64, c: u32) -> v2u64; + #[link_name = "llvm.loongarch.lsx.vpermi.w"] + fn __lsx_vpermi_w(a: v4i32, b: v4i32, c: u32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vld"] + fn __lsx_vld(a: *const i8, b: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vst"] + fn __lsx_vst(a: v16i8, b: *mut i8, c: i32); + #[link_name = "llvm.loongarch.lsx.vssrlrn.b.h"] + fn __lsx_vssrlrn_b_h(a: v8i16, b: v8i16) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vssrlrn.h.w"] + fn __lsx_vssrlrn_h_w(a: v4i32, b: v4i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vssrlrn.w.d"] + fn __lsx_vssrlrn_w_d(a: v2i64, b: v2i64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vssrln.b.h"] + fn __lsx_vssrln_b_h(a: v8i16, b: v8i16) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vssrln.h.w"] + fn __lsx_vssrln_h_w(a: v4i32, b: v4i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vssrln.w.d"] + fn __lsx_vssrln_w_d(a: v2i64, b: v2i64) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vorn.v"] + fn __lsx_vorn_v(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vldi"] + fn __lsx_vldi(a: i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vshuf.b"] + fn __lsx_vshuf_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vldx"] + fn __lsx_vldx(a: *const i8, b: i64) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vstx"] + fn __lsx_vstx(a: v16i8, b: *mut i8, c: i64); + #[link_name = "llvm.loongarch.lsx.vextl.qu.du"] + fn __lsx_vextl_qu_du(a: v2u64) -> v2u64; + #[link_name = "llvm.loongarch.lsx.bnz.b"] + fn __lsx_bnz_b(a: v16u8) -> i32; + #[link_name = "llvm.loongarch.lsx.bnz.d"] + fn __lsx_bnz_d(a: v2u64) -> i32; + #[link_name = "llvm.loongarch.lsx.bnz.h"] + fn __lsx_bnz_h(a: v8u16) -> i32; + #[link_name = "llvm.loongarch.lsx.bnz.v"] + fn __lsx_bnz_v(a: v16u8) -> i32; + #[link_name = "llvm.loongarch.lsx.bnz.w"] + fn __lsx_bnz_w(a: v4u32) -> i32; + #[link_name = "llvm.loongarch.lsx.bz.b"] + fn __lsx_bz_b(a: v16u8) -> i32; + #[link_name = "llvm.loongarch.lsx.bz.d"] + fn __lsx_bz_d(a: v2u64) -> i32; + #[link_name = "llvm.loongarch.lsx.bz.h"] + fn __lsx_bz_h(a: v8u16) -> i32; + #[link_name = "llvm.loongarch.lsx.bz.v"] + fn __lsx_bz_v(a: v16u8) -> i32; + #[link_name = "llvm.loongarch.lsx.bz.w"] + fn __lsx_bz_w(a: v4u32) -> i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.caf.d"] + fn __lsx_vfcmp_caf_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.caf.s"] + fn __lsx_vfcmp_caf_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.ceq.d"] + fn __lsx_vfcmp_ceq_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.ceq.s"] + fn __lsx_vfcmp_ceq_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.cle.d"] + fn __lsx_vfcmp_cle_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.cle.s"] + fn __lsx_vfcmp_cle_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.clt.d"] + fn __lsx_vfcmp_clt_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.clt.s"] + fn __lsx_vfcmp_clt_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.cne.d"] + fn __lsx_vfcmp_cne_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.cne.s"] + fn __lsx_vfcmp_cne_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.cor.d"] + fn __lsx_vfcmp_cor_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.cor.s"] + fn __lsx_vfcmp_cor_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.cueq.d"] + fn __lsx_vfcmp_cueq_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.cueq.s"] + fn __lsx_vfcmp_cueq_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.cule.d"] + fn __lsx_vfcmp_cule_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.cule.s"] + fn __lsx_vfcmp_cule_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.cult.d"] + fn __lsx_vfcmp_cult_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.cult.s"] + fn __lsx_vfcmp_cult_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.cun.d"] + fn __lsx_vfcmp_cun_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.cune.d"] + fn __lsx_vfcmp_cune_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.cune.s"] + fn __lsx_vfcmp_cune_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.cun.s"] + fn __lsx_vfcmp_cun_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.saf.d"] + fn __lsx_vfcmp_saf_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.saf.s"] + fn __lsx_vfcmp_saf_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.seq.d"] + fn __lsx_vfcmp_seq_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.seq.s"] + fn __lsx_vfcmp_seq_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.sle.d"] + fn __lsx_vfcmp_sle_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.sle.s"] + fn __lsx_vfcmp_sle_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.slt.d"] + fn __lsx_vfcmp_slt_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.slt.s"] + fn __lsx_vfcmp_slt_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.sne.d"] + fn __lsx_vfcmp_sne_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.sne.s"] + fn __lsx_vfcmp_sne_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.sor.d"] + fn __lsx_vfcmp_sor_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.sor.s"] + fn __lsx_vfcmp_sor_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.sueq.d"] + fn __lsx_vfcmp_sueq_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.sueq.s"] + fn __lsx_vfcmp_sueq_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.sule.d"] + fn __lsx_vfcmp_sule_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.sule.s"] + fn __lsx_vfcmp_sule_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.sult.d"] + fn __lsx_vfcmp_sult_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.sult.s"] + fn __lsx_vfcmp_sult_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.sun.d"] + fn __lsx_vfcmp_sun_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.sune.d"] + fn __lsx_vfcmp_sune_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vfcmp.sune.s"] + fn __lsx_vfcmp_sune_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vfcmp.sun.s"] + fn __lsx_vfcmp_sun_s(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.loongarch.lsx.vrepli.b"] + fn __lsx_vrepli_b(a: i32) -> v16i8; + #[link_name = "llvm.loongarch.lsx.vrepli.d"] + fn __lsx_vrepli_d(a: i32) -> v2i64; + #[link_name = "llvm.loongarch.lsx.vrepli.h"] + fn __lsx_vrepli_h(a: i32) -> v8i16; + #[link_name = "llvm.loongarch.lsx.vrepli.w"] + fn __lsx_vrepli_w(a: i32) -> v4i32; +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsll_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vsll_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsll_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vsll_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsll_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vsll_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsll_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsll_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslli_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vslli_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslli_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vslli_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslli_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vslli_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslli_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vslli_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsra_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vsra_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsra_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vsra_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsra_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vsra_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsra_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsra_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrai_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vsrai_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrai_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsrai_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrai_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsrai_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrai_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsrai_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrar_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vsrar_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrar_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vsrar_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrar_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vsrar_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrar_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsrar_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrari_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vsrari_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrari_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsrari_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrari_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsrari_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrari_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsrari_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrl_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vsrl_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrl_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vsrl_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrl_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vsrl_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrl_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsrl_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrli_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vsrli_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrli_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsrli_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrli_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsrli_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrli_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsrli_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlr_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vsrlr_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlr_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vsrlr_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlr_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vsrlr_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlr_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsrlr_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlri_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vsrlri_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlri_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsrlri_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlri_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsrlri_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlri_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsrlri_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitclr_b(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vbitclr_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitclr_h(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vbitclr_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitclr_w(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vbitclr_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitclr_d(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vbitclr_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitclri_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vbitclri_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitclri_h(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vbitclri_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitclri_w(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vbitclri_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitclri_d(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vbitclri_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitset_b(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vbitset_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitset_h(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vbitset_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitset_w(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vbitset_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitset_d(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vbitset_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitseti_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vbitseti_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitseti_h(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vbitseti_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitseti_w(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vbitseti_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitseti_d(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vbitseti_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitrev_b(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vbitrev_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitrev_h(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vbitrev_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitrev_w(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vbitrev_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitrev_d(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vbitrev_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitrevi_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vbitrevi_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitrevi_h(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vbitrevi_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitrevi_w(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vbitrevi_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitrevi_d(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vbitrevi_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vadd_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vadd_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vadd_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vadd_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vadd_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vadd_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vadd_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vadd_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddi_bu(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vaddi_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddi_hu(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vaddi_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddi_wu(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vaddi_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddi_du(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vaddi_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsub_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vsub_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsub_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vsub_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsub_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vsub_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsub_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsub_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubi_bu(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsubi_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubi_hu(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsubi_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubi_wu(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsubi_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubi_du(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsubi_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmax_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vmax_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmax_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vmax_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmax_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vmax_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmax_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vmax_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaxi_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vmaxi_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaxi_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vmaxi_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaxi_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vmaxi_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaxi_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vmaxi_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmax_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vmax_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmax_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vmax_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmax_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vmax_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmax_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vmax_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaxi_bu(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vmaxi_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaxi_hu(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vmaxi_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaxi_wu(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vmaxi_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaxi_du(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vmaxi_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmin_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vmin_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmin_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vmin_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmin_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vmin_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmin_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vmin_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmini_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vmini_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmini_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vmini_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmini_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vmini_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmini_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vmini_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmin_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vmin_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmin_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vmin_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmin_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vmin_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmin_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vmin_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmini_bu(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vmini_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmini_hu(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vmini_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmini_wu(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vmini_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmini_du(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vmini_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vseq_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vseq_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vseq_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vseq_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vseq_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vseq_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vseq_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vseq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vseqi_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vseqi_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vseqi_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vseqi_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vseqi_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vseqi_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vseqi_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vseqi_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslti_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vslti_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslt_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vslt_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslt_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vslt_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslt_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vslt_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslt_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vslt_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslti_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vslti_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslti_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vslti_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslti_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vslti_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslt_bu(a: v16u8, b: v16u8) -> v16i8 { + __lsx_vslt_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslt_hu(a: v8u16, b: v8u16) -> v8i16 { + __lsx_vslt_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslt_wu(a: v4u32, b: v4u32) -> v4i32 { + __lsx_vslt_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslt_du(a: v2u64, b: v2u64) -> v2i64 { + __lsx_vslt_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslti_bu(a: v16u8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vslti_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslti_hu(a: v8u16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vslti_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslti_wu(a: v4u32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vslti_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslti_du(a: v2u64) -> v2i64 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vslti_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsle_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vsle_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsle_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vsle_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsle_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vsle_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsle_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsle_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslei_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vslei_b(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslei_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vslei_h(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslei_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vslei_w(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslei_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + __lsx_vslei_d(a, IMM_S5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsle_bu(a: v16u8, b: v16u8) -> v16i8 { + __lsx_vsle_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsle_hu(a: v8u16, b: v8u16) -> v8i16 { + __lsx_vsle_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsle_wu(a: v4u32, b: v4u32) -> v4i32 { + __lsx_vsle_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsle_du(a: v2u64, b: v2u64) -> v2i64 { + __lsx_vsle_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslei_bu(a: v16u8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vslei_bu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslei_hu(a: v8u16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vslei_hu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslei_wu(a: v4u32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vslei_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vslei_du(a: v2u64) -> v2i64 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vslei_du(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsat_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vsat_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsat_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsat_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsat_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsat_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsat_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsat_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsat_bu(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vsat_bu(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsat_hu(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsat_hu(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsat_wu(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsat_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsat_du(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsat_du(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vadda_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vadda_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vadda_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vadda_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vadda_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vadda_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vadda_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vadda_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsadd_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vsadd_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsadd_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vsadd_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsadd_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vsadd_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsadd_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsadd_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsadd_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vsadd_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsadd_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vsadd_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsadd_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vsadd_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsadd_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vsadd_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavg_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vavg_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavg_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vavg_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavg_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vavg_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavg_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vavg_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavg_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vavg_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavg_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vavg_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavg_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vavg_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavg_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vavg_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavgr_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vavgr_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavgr_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vavgr_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavgr_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vavgr_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavgr_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vavgr_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavgr_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vavgr_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavgr_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vavgr_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavgr_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vavgr_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vavgr_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vavgr_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssub_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vssub_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssub_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vssub_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssub_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vssub_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssub_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vssub_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssub_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vssub_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssub_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vssub_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssub_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vssub_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssub_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vssub_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vabsd_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vabsd_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vabsd_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vabsd_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vabsd_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vabsd_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vabsd_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vabsd_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vabsd_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vabsd_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vabsd_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vabsd_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vabsd_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vabsd_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vabsd_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vabsd_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmul_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vmul_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmul_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vmul_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmul_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vmul_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmul_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vmul_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmadd_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { + __lsx_vmadd_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmadd_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + __lsx_vmadd_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmadd_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + __lsx_vmadd_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmadd_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { + __lsx_vmadd_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmsub_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { + __lsx_vmsub_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmsub_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + __lsx_vmsub_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmsub_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + __lsx_vmsub_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmsub_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { + __lsx_vmsub_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vdiv_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vdiv_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vdiv_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vdiv_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vdiv_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vdiv_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vdiv_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vdiv_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vdiv_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vdiv_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vdiv_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vdiv_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vdiv_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vdiv_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vdiv_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vdiv_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhaddw_h_b(a: v16i8, b: v16i8) -> v8i16 { + __lsx_vhaddw_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhaddw_w_h(a: v8i16, b: v8i16) -> v4i32 { + __lsx_vhaddw_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhaddw_d_w(a: v4i32, b: v4i32) -> v2i64 { + __lsx_vhaddw_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhaddw_hu_bu(a: v16u8, b: v16u8) -> v8u16 { + __lsx_vhaddw_hu_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhaddw_wu_hu(a: v8u16, b: v8u16) -> v4u32 { + __lsx_vhaddw_wu_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhaddw_du_wu(a: v4u32, b: v4u32) -> v2u64 { + __lsx_vhaddw_du_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhsubw_h_b(a: v16i8, b: v16i8) -> v8i16 { + __lsx_vhsubw_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhsubw_w_h(a: v8i16, b: v8i16) -> v4i32 { + __lsx_vhsubw_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhsubw_d_w(a: v4i32, b: v4i32) -> v2i64 { + __lsx_vhsubw_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhsubw_hu_bu(a: v16u8, b: v16u8) -> v8i16 { + __lsx_vhsubw_hu_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhsubw_wu_hu(a: v8u16, b: v8u16) -> v4i32 { + __lsx_vhsubw_wu_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhsubw_du_wu(a: v4u32, b: v4u32) -> v2i64 { + __lsx_vhsubw_du_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmod_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vmod_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmod_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vmod_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmod_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vmod_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmod_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vmod_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmod_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vmod_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmod_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vmod_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmod_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vmod_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmod_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vmod_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplve_b(a: v16i8, b: i32) -> v16i8 { + __lsx_vreplve_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplve_h(a: v8i16, b: i32) -> v8i16 { + __lsx_vreplve_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplve_w(a: v4i32, b: i32) -> v4i32 { + __lsx_vreplve_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplve_d(a: v2i64, b: i32) -> v2i64 { + __lsx_vreplve_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplvei_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vreplvei_b(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplvei_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vreplvei_h(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplvei_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM2, 2); + __lsx_vreplvei_w(a, IMM2) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplvei_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM1, 1); + __lsx_vreplvei_d(a, IMM1) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickev_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vpickev_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickev_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vpickev_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickev_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vpickev_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickev_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vpickev_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickod_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vpickod_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickod_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vpickod_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickod_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vpickod_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickod_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vpickod_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vilvh_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vilvh_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vilvh_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vilvh_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vilvh_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vilvh_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vilvh_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vilvh_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vilvl_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vilvl_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vilvl_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vilvl_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vilvl_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vilvl_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vilvl_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vilvl_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpackev_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vpackev_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpackev_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vpackev_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpackev_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vpackev_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpackev_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vpackev_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpackod_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vpackod_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpackod_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vpackod_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpackod_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vpackod_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpackod_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vpackod_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vshuf_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + __lsx_vshuf_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vshuf_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + __lsx_vshuf_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vshuf_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { + __lsx_vshuf_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vand_v(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vand_v(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vandi_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vandi_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vor_v(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vor_v(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vori_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vori_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vnor_v(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vnor_v(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vnori_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vnori_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vxor_v(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vxor_v(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vxori_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vxori_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitsel_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { + __lsx_vbitsel_v(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbitseli_b(a: v16u8, b: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vbitseli_b(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vshuf4i_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vshuf4i_b(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vshuf4i_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vshuf4i_h(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vshuf4i_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vshuf4i_w(a, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplgr2vr_b(a: i32) -> v16i8 { + __lsx_vreplgr2vr_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplgr2vr_h(a: i32) -> v8i16 { + __lsx_vreplgr2vr_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplgr2vr_w(a: i32) -> v4i32 { + __lsx_vreplgr2vr_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vreplgr2vr_d(a: i64) -> v2i64 { + __lsx_vreplgr2vr_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpcnt_b(a: v16i8) -> v16i8 { + __lsx_vpcnt_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpcnt_h(a: v8i16) -> v8i16 { + __lsx_vpcnt_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpcnt_w(a: v4i32) -> v4i32 { + __lsx_vpcnt_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpcnt_d(a: v2i64) -> v2i64 { + __lsx_vpcnt_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vclo_b(a: v16i8) -> v16i8 { + __lsx_vclo_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vclo_h(a: v8i16) -> v8i16 { + __lsx_vclo_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vclo_w(a: v4i32) -> v4i32 { + __lsx_vclo_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vclo_d(a: v2i64) -> v2i64 { + __lsx_vclo_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vclz_b(a: v16i8) -> v16i8 { + __lsx_vclz_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vclz_h(a: v8i16) -> v8i16 { + __lsx_vclz_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vclz_w(a: v4i32) -> v4i32 { + __lsx_vclz_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vclz_d(a: v2i64) -> v2i64 { + __lsx_vclz_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickve2gr_b(a: v16i8) -> i32 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vpickve2gr_b(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickve2gr_h(a: v8i16) -> i32 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vpickve2gr_h(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickve2gr_w(a: v4i32) -> i32 { + static_assert_uimm_bits!(IMM2, 2); + __lsx_vpickve2gr_w(a, IMM2) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickve2gr_d(a: v2i64) -> i64 { + static_assert_uimm_bits!(IMM1, 1); + __lsx_vpickve2gr_d(a, IMM1) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickve2gr_bu(a: v16i8) -> u32 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vpickve2gr_bu(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickve2gr_hu(a: v8i16) -> u32 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vpickve2gr_hu(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickve2gr_wu(a: v4i32) -> u32 { + static_assert_uimm_bits!(IMM2, 2); + __lsx_vpickve2gr_wu(a, IMM2) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpickve2gr_du(a: v2i64) -> u64 { + static_assert_uimm_bits!(IMM1, 1); + __lsx_vpickve2gr_du(a, IMM1) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vinsgr2vr_b(a: v16i8, b: i32) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vinsgr2vr_b(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vinsgr2vr_h(a: v8i16, b: i32) -> v8i16 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vinsgr2vr_h(a, b, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vinsgr2vr_w(a: v4i32, b: i32) -> v4i32 { + static_assert_uimm_bits!(IMM2, 2); + __lsx_vinsgr2vr_w(a, b, IMM2) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vinsgr2vr_d(a: v2i64, b: i64) -> v2i64 { + static_assert_uimm_bits!(IMM1, 1); + __lsx_vinsgr2vr_d(a, b, IMM1) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfadd_s(a: v4f32, b: v4f32) -> v4f32 { + __lsx_vfadd_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfadd_d(a: v2f64, b: v2f64) -> v2f64 { + __lsx_vfadd_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfsub_s(a: v4f32, b: v4f32) -> v4f32 { + __lsx_vfsub_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfsub_d(a: v2f64, b: v2f64) -> v2f64 { + __lsx_vfsub_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmul_s(a: v4f32, b: v4f32) -> v4f32 { + __lsx_vfmul_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmul_d(a: v2f64, b: v2f64) -> v2f64 { + __lsx_vfmul_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfdiv_s(a: v4f32, b: v4f32) -> v4f32 { + __lsx_vfdiv_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfdiv_d(a: v2f64, b: v2f64) -> v2f64 { + __lsx_vfdiv_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcvt_h_s(a: v4f32, b: v4f32) -> v8i16 { + __lsx_vfcvt_h_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcvt_s_d(a: v2f64, b: v2f64) -> v4f32 { + __lsx_vfcvt_s_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmin_s(a: v4f32, b: v4f32) -> v4f32 { + __lsx_vfmin_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmin_d(a: v2f64, b: v2f64) -> v2f64 { + __lsx_vfmin_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmina_s(a: v4f32, b: v4f32) -> v4f32 { + __lsx_vfmina_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmina_d(a: v2f64, b: v2f64) -> v2f64 { + __lsx_vfmina_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmax_s(a: v4f32, b: v4f32) -> v4f32 { + __lsx_vfmax_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmax_d(a: v2f64, b: v2f64) -> v2f64 { + __lsx_vfmax_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmaxa_s(a: v4f32, b: v4f32) -> v4f32 { + __lsx_vfmaxa_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmaxa_d(a: v2f64, b: v2f64) -> v2f64 { + __lsx_vfmaxa_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfclass_s(a: v4f32) -> v4i32 { + __lsx_vfclass_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfclass_d(a: v2f64) -> v2i64 { + __lsx_vfclass_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfsqrt_s(a: v4f32) -> v4f32 { + __lsx_vfsqrt_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfsqrt_d(a: v2f64) -> v2f64 { + __lsx_vfsqrt_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrecip_s(a: v4f32) -> v4f32 { + __lsx_vfrecip_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrecip_d(a: v2f64) -> v2f64 { + __lsx_vfrecip_d(a) +} + +#[inline] +#[target_feature(enable = "lsx,frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrecipe_s(a: v4f32) -> v4f32 { + __lsx_vfrecipe_s(a) +} + +#[inline] +#[target_feature(enable = "lsx,frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrecipe_d(a: v2f64) -> v2f64 { + __lsx_vfrecipe_d(a) +} + +#[inline] +#[target_feature(enable = "lsx,frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrsqrte_s(a: v4f32) -> v4f32 { + __lsx_vfrsqrte_s(a) +} + +#[inline] +#[target_feature(enable = "lsx,frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrsqrte_d(a: v2f64) -> v2f64 { + __lsx_vfrsqrte_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrint_s(a: v4f32) -> v4f32 { + __lsx_vfrint_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrint_d(a: v2f64) -> v2f64 { + __lsx_vfrint_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrsqrt_s(a: v4f32) -> v4f32 { + __lsx_vfrsqrt_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrsqrt_d(a: v2f64) -> v2f64 { + __lsx_vfrsqrt_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vflogb_s(a: v4f32) -> v4f32 { + __lsx_vflogb_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vflogb_d(a: v2f64) -> v2f64 { + __lsx_vflogb_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcvth_s_h(a: v8i16) -> v4f32 { + __lsx_vfcvth_s_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcvth_d_s(a: v4f32) -> v2f64 { + __lsx_vfcvth_d_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcvtl_s_h(a: v8i16) -> v4f32 { + __lsx_vfcvtl_s_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcvtl_d_s(a: v4f32) -> v2f64 { + __lsx_vfcvtl_d_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftint_w_s(a: v4f32) -> v4i32 { + __lsx_vftint_w_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftint_l_d(a: v2f64) -> v2i64 { + __lsx_vftint_l_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftint_wu_s(a: v4f32) -> v4u32 { + __lsx_vftint_wu_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftint_lu_d(a: v2f64) -> v2u64 { + __lsx_vftint_lu_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrz_w_s(a: v4f32) -> v4i32 { + __lsx_vftintrz_w_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrz_l_d(a: v2f64) -> v2i64 { + __lsx_vftintrz_l_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrz_wu_s(a: v4f32) -> v4u32 { + __lsx_vftintrz_wu_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrz_lu_d(a: v2f64) -> v2u64 { + __lsx_vftintrz_lu_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vffint_s_w(a: v4i32) -> v4f32 { + __lsx_vffint_s_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vffint_d_l(a: v2i64) -> v2f64 { + __lsx_vffint_d_l(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vffint_s_wu(a: v4u32) -> v4f32 { + __lsx_vffint_s_wu(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vffint_d_lu(a: v2u64) -> v2f64 { + __lsx_vffint_d_lu(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vandn_v(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vandn_v(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vneg_b(a: v16i8) -> v16i8 { + __lsx_vneg_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vneg_h(a: v8i16) -> v8i16 { + __lsx_vneg_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vneg_w(a: v4i32) -> v4i32 { + __lsx_vneg_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vneg_d(a: v2i64) -> v2i64 { + __lsx_vneg_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmuh_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vmuh_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmuh_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vmuh_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmuh_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vmuh_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmuh_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vmuh_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmuh_bu(a: v16u8, b: v16u8) -> v16u8 { + __lsx_vmuh_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmuh_hu(a: v8u16, b: v8u16) -> v8u16 { + __lsx_vmuh_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmuh_wu(a: v4u32, b: v4u32) -> v4u32 { + __lsx_vmuh_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmuh_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vmuh_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsllwil_h_b(a: v16i8) -> v8i16 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vsllwil_h_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsllwil_w_h(a: v8i16) -> v4i32 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsllwil_w_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsllwil_d_w(a: v4i32) -> v2i64 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsllwil_d_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsllwil_hu_bu(a: v16u8) -> v8u16 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vsllwil_hu_bu(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsllwil_wu_hu(a: v8u16) -> v4u32 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsllwil_wu_hu(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsllwil_du_wu(a: v4u32) -> v2u64 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsllwil_du_wu(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsran_b_h(a: v8i16, b: v8i16) -> v16i8 { + __lsx_vsran_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsran_h_w(a: v4i32, b: v4i32) -> v8i16 { + __lsx_vsran_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsran_w_d(a: v2i64, b: v2i64) -> v4i32 { + __lsx_vsran_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssran_b_h(a: v8i16, b: v8i16) -> v16i8 { + __lsx_vssran_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssran_h_w(a: v4i32, b: v4i32) -> v8i16 { + __lsx_vssran_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssran_w_d(a: v2i64, b: v2i64) -> v4i32 { + __lsx_vssran_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssran_bu_h(a: v8u16, b: v8u16) -> v16u8 { + __lsx_vssran_bu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssran_hu_w(a: v4u32, b: v4u32) -> v8u16 { + __lsx_vssran_hu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssran_wu_d(a: v2u64, b: v2u64) -> v4u32 { + __lsx_vssran_wu_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrarn_b_h(a: v8i16, b: v8i16) -> v16i8 { + __lsx_vsrarn_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrarn_h_w(a: v4i32, b: v4i32) -> v8i16 { + __lsx_vsrarn_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrarn_w_d(a: v2i64, b: v2i64) -> v4i32 { + __lsx_vsrarn_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarn_b_h(a: v8i16, b: v8i16) -> v16i8 { + __lsx_vssrarn_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarn_h_w(a: v4i32, b: v4i32) -> v8i16 { + __lsx_vssrarn_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarn_w_d(a: v2i64, b: v2i64) -> v4i32 { + __lsx_vssrarn_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarn_bu_h(a: v8u16, b: v8u16) -> v16u8 { + __lsx_vssrarn_bu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarn_hu_w(a: v4u32, b: v4u32) -> v8u16 { + __lsx_vssrarn_hu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarn_wu_d(a: v2u64, b: v2u64) -> v4u32 { + __lsx_vssrarn_wu_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrln_b_h(a: v8i16, b: v8i16) -> v16i8 { + __lsx_vsrln_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrln_h_w(a: v4i32, b: v4i32) -> v8i16 { + __lsx_vsrln_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrln_w_d(a: v2i64, b: v2i64) -> v4i32 { + __lsx_vsrln_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrln_bu_h(a: v8u16, b: v8u16) -> v16u8 { + __lsx_vssrln_bu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrln_hu_w(a: v4u32, b: v4u32) -> v8u16 { + __lsx_vssrln_hu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrln_wu_d(a: v2u64, b: v2u64) -> v4u32 { + __lsx_vssrln_wu_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlrn_b_h(a: v8i16, b: v8i16) -> v16i8 { + __lsx_vsrlrn_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlrn_h_w(a: v4i32, b: v4i32) -> v8i16 { + __lsx_vsrlrn_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlrn_w_d(a: v2i64, b: v2i64) -> v4i32 { + __lsx_vsrlrn_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrn_bu_h(a: v8u16, b: v8u16) -> v16u8 { + __lsx_vssrlrn_bu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrn_hu_w(a: v4u32, b: v4u32) -> v8u16 { + __lsx_vssrlrn_hu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrn_wu_d(a: v2u64, b: v2u64) -> v4u32 { + __lsx_vssrlrn_wu_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrstpi_b(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vfrstpi_b(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrstpi_h(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vfrstpi_h(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrstp_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { + __lsx_vfrstp_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrstp_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + __lsx_vfrstp_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vshuf4i_d(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vshuf4i_d(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbsrl_v(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vbsrl_v(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vbsll_v(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vbsll_v(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vextrins_b(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vextrins_b(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vextrins_h(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vextrins_h(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vextrins_w(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vextrins_w(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vextrins_d(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vextrins_d(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmskltz_b(a: v16i8) -> v16i8 { + __lsx_vmskltz_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmskltz_h(a: v8i16) -> v8i16 { + __lsx_vmskltz_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmskltz_w(a: v4i32) -> v4i32 { + __lsx_vmskltz_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmskltz_d(a: v2i64) -> v2i64 { + __lsx_vmskltz_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsigncov_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vsigncov_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsigncov_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vsigncov_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsigncov_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vsigncov_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsigncov_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsigncov_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmadd_s(a: v4f32, b: v4f32, c: v4f32) -> v4f32 { + __lsx_vfmadd_s(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmadd_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64 { + __lsx_vfmadd_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmsub_s(a: v4f32, b: v4f32, c: v4f32) -> v4f32 { + __lsx_vfmsub_s(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfmsub_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64 { + __lsx_vfmsub_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfnmadd_s(a: v4f32, b: v4f32, c: v4f32) -> v4f32 { + __lsx_vfnmadd_s(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfnmadd_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64 { + __lsx_vfnmadd_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfnmsub_s(a: v4f32, b: v4f32, c: v4f32) -> v4f32 { + __lsx_vfnmsub_s(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfnmsub_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64 { + __lsx_vfnmsub_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrne_w_s(a: v4f32) -> v4i32 { + __lsx_vftintrne_w_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrne_l_d(a: v2f64) -> v2i64 { + __lsx_vftintrne_l_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrp_w_s(a: v4f32) -> v4i32 { + __lsx_vftintrp_w_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrp_l_d(a: v2f64) -> v2i64 { + __lsx_vftintrp_l_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrm_w_s(a: v4f32) -> v4i32 { + __lsx_vftintrm_w_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrm_l_d(a: v2f64) -> v2i64 { + __lsx_vftintrm_l_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftint_w_d(a: v2f64, b: v2f64) -> v4i32 { + __lsx_vftint_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vffint_s_l(a: v2i64, b: v2i64) -> v4f32 { + __lsx_vffint_s_l(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrz_w_d(a: v2f64, b: v2f64) -> v4i32 { + __lsx_vftintrz_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrp_w_d(a: v2f64, b: v2f64) -> v4i32 { + __lsx_vftintrp_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrm_w_d(a: v2f64, b: v2f64) -> v4i32 { + __lsx_vftintrm_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrne_w_d(a: v2f64, b: v2f64) -> v4i32 { + __lsx_vftintrne_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintl_l_s(a: v4f32) -> v2i64 { + __lsx_vftintl_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftinth_l_s(a: v4f32) -> v2i64 { + __lsx_vftinth_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vffinth_d_w(a: v4i32) -> v2f64 { + __lsx_vffinth_d_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vffintl_d_w(a: v4i32) -> v2f64 { + __lsx_vffintl_d_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrzl_l_s(a: v4f32) -> v2i64 { + __lsx_vftintrzl_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrzh_l_s(a: v4f32) -> v2i64 { + __lsx_vftintrzh_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrpl_l_s(a: v4f32) -> v2i64 { + __lsx_vftintrpl_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrph_l_s(a: v4f32) -> v2i64 { + __lsx_vftintrph_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrml_l_s(a: v4f32) -> v2i64 { + __lsx_vftintrml_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrmh_l_s(a: v4f32) -> v2i64 { + __lsx_vftintrmh_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrnel_l_s(a: v4f32) -> v2i64 { + __lsx_vftintrnel_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vftintrneh_l_s(a: v4f32) -> v2i64 { + __lsx_vftintrneh_l_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrintrne_s(a: v4f32) -> v4f32 { + __lsx_vfrintrne_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrintrne_d(a: v2f64) -> v2f64 { + __lsx_vfrintrne_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrintrz_s(a: v4f32) -> v4f32 { + __lsx_vfrintrz_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrintrz_d(a: v2f64) -> v2f64 { + __lsx_vfrintrz_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrintrp_s(a: v4f32) -> v4f32 { + __lsx_vfrintrp_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrintrp_d(a: v2f64) -> v2f64 { + __lsx_vfrintrp_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrintrm_s(a: v4f32) -> v4f32 { + __lsx_vfrintrm_s(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfrintrm_d(a: v2f64) -> v2f64 { + __lsx_vfrintrm_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vstelm_b(a: v16i8, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S8, 8); + static_assert_uimm_bits!(IMM4, 4); + __lsx_vstelm_b(a, mem_addr, IMM_S8, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vstelm_h(a: v8i16, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S8, 8); + static_assert_uimm_bits!(IMM3, 3); + __lsx_vstelm_h(a, mem_addr, IMM_S8, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vstelm_w(a: v4i32, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S8, 8); + static_assert_uimm_bits!(IMM2, 2); + __lsx_vstelm_w(a, mem_addr, IMM_S8, IMM2) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vstelm_d(a: v2i64, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S8, 8); + static_assert_uimm_bits!(IMM1, 1); + __lsx_vstelm_d(a, mem_addr, IMM_S8, IMM1) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_d_w(a: v4i32, b: v4i32) -> v2i64 { + __lsx_vaddwev_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_w_h(a: v8i16, b: v8i16) -> v4i32 { + __lsx_vaddwev_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_h_b(a: v16i8, b: v16i8) -> v8i16 { + __lsx_vaddwev_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_d_w(a: v4i32, b: v4i32) -> v2i64 { + __lsx_vaddwod_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_w_h(a: v8i16, b: v8i16) -> v4i32 { + __lsx_vaddwod_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_h_b(a: v16i8, b: v16i8) -> v8i16 { + __lsx_vaddwod_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_d_wu(a: v4u32, b: v4u32) -> v2i64 { + __lsx_vaddwev_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_w_hu(a: v8u16, b: v8u16) -> v4i32 { + __lsx_vaddwev_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_h_bu(a: v16u8, b: v16u8) -> v8i16 { + __lsx_vaddwev_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_d_wu(a: v4u32, b: v4u32) -> v2i64 { + __lsx_vaddwod_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_w_hu(a: v8u16, b: v8u16) -> v4i32 { + __lsx_vaddwod_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_h_bu(a: v16u8, b: v16u8) -> v8i16 { + __lsx_vaddwod_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_d_wu_w(a: v4u32, b: v4i32) -> v2i64 { + __lsx_vaddwev_d_wu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_w_hu_h(a: v8u16, b: v8i16) -> v4i32 { + __lsx_vaddwev_w_hu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_h_bu_b(a: v16u8, b: v16i8) -> v8i16 { + __lsx_vaddwev_h_bu_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_d_wu_w(a: v4u32, b: v4i32) -> v2i64 { + __lsx_vaddwod_d_wu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_w_hu_h(a: v8u16, b: v8i16) -> v4i32 { + __lsx_vaddwod_w_hu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_h_bu_b(a: v16u8, b: v16i8) -> v8i16 { + __lsx_vaddwod_h_bu_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwev_d_w(a: v4i32, b: v4i32) -> v2i64 { + __lsx_vsubwev_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwev_w_h(a: v8i16, b: v8i16) -> v4i32 { + __lsx_vsubwev_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwev_h_b(a: v16i8, b: v16i8) -> v8i16 { + __lsx_vsubwev_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwod_d_w(a: v4i32, b: v4i32) -> v2i64 { + __lsx_vsubwod_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwod_w_h(a: v8i16, b: v8i16) -> v4i32 { + __lsx_vsubwod_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwod_h_b(a: v16i8, b: v16i8) -> v8i16 { + __lsx_vsubwod_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwev_d_wu(a: v4u32, b: v4u32) -> v2i64 { + __lsx_vsubwev_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwev_w_hu(a: v8u16, b: v8u16) -> v4i32 { + __lsx_vsubwev_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwev_h_bu(a: v16u8, b: v16u8) -> v8i16 { + __lsx_vsubwev_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwod_d_wu(a: v4u32, b: v4u32) -> v2i64 { + __lsx_vsubwod_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwod_w_hu(a: v8u16, b: v8u16) -> v4i32 { + __lsx_vsubwod_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwod_h_bu(a: v16u8, b: v16u8) -> v8i16 { + __lsx_vsubwod_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_q_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vaddwev_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_q_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vaddwod_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_q_du(a: v2u64, b: v2u64) -> v2i64 { + __lsx_vaddwev_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_q_du(a: v2u64, b: v2u64) -> v2i64 { + __lsx_vaddwod_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwev_q_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsubwev_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwod_q_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsubwod_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwev_q_du(a: v2u64, b: v2u64) -> v2i64 { + __lsx_vsubwev_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsubwod_q_du(a: v2u64, b: v2u64) -> v2i64 { + __lsx_vsubwod_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwev_q_du_d(a: v2u64, b: v2i64) -> v2i64 { + __lsx_vaddwev_q_du_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vaddwod_q_du_d(a: v2u64, b: v2i64) -> v2i64 { + __lsx_vaddwod_q_du_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_d_w(a: v4i32, b: v4i32) -> v2i64 { + __lsx_vmulwev_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_w_h(a: v8i16, b: v8i16) -> v4i32 { + __lsx_vmulwev_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_h_b(a: v16i8, b: v16i8) -> v8i16 { + __lsx_vmulwev_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_d_w(a: v4i32, b: v4i32) -> v2i64 { + __lsx_vmulwod_d_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_w_h(a: v8i16, b: v8i16) -> v4i32 { + __lsx_vmulwod_w_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_h_b(a: v16i8, b: v16i8) -> v8i16 { + __lsx_vmulwod_h_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_d_wu(a: v4u32, b: v4u32) -> v2i64 { + __lsx_vmulwev_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_w_hu(a: v8u16, b: v8u16) -> v4i32 { + __lsx_vmulwev_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_h_bu(a: v16u8, b: v16u8) -> v8i16 { + __lsx_vmulwev_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_d_wu(a: v4u32, b: v4u32) -> v2i64 { + __lsx_vmulwod_d_wu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_w_hu(a: v8u16, b: v8u16) -> v4i32 { + __lsx_vmulwod_w_hu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_h_bu(a: v16u8, b: v16u8) -> v8i16 { + __lsx_vmulwod_h_bu(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_d_wu_w(a: v4u32, b: v4i32) -> v2i64 { + __lsx_vmulwev_d_wu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_w_hu_h(a: v8u16, b: v8i16) -> v4i32 { + __lsx_vmulwev_w_hu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_h_bu_b(a: v16u8, b: v16i8) -> v8i16 { + __lsx_vmulwev_h_bu_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_d_wu_w(a: v4u32, b: v4i32) -> v2i64 { + __lsx_vmulwod_d_wu_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_w_hu_h(a: v8u16, b: v8i16) -> v4i32 { + __lsx_vmulwod_w_hu_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_h_bu_b(a: v16u8, b: v16i8) -> v8i16 { + __lsx_vmulwod_h_bu_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_q_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vmulwev_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_q_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vmulwod_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_q_du(a: v2u64, b: v2u64) -> v2i64 { + __lsx_vmulwev_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_q_du(a: v2u64, b: v2u64) -> v2i64 { + __lsx_vmulwod_q_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwev_q_du_d(a: v2u64, b: v2i64) -> v2i64 { + __lsx_vmulwev_q_du_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmulwod_q_du_d(a: v2u64, b: v2i64) -> v2i64 { + __lsx_vmulwod_q_du_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhaddw_q_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vhaddw_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhaddw_qu_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vhaddw_qu_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhsubw_q_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vhsubw_q_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vhsubw_qu_du(a: v2u64, b: v2u64) -> v2u64 { + __lsx_vhsubw_qu_du(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_d_w(a: v2i64, b: v4i32, c: v4i32) -> v2i64 { + __lsx_vmaddwev_d_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_w_h(a: v4i32, b: v8i16, c: v8i16) -> v4i32 { + __lsx_vmaddwev_w_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_h_b(a: v8i16, b: v16i8, c: v16i8) -> v8i16 { + __lsx_vmaddwev_h_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_d_wu(a: v2u64, b: v4u32, c: v4u32) -> v2u64 { + __lsx_vmaddwev_d_wu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_w_hu(a: v4u32, b: v8u16, c: v8u16) -> v4u32 { + __lsx_vmaddwev_w_hu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_h_bu(a: v8u16, b: v16u8, c: v16u8) -> v8u16 { + __lsx_vmaddwev_h_bu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_d_w(a: v2i64, b: v4i32, c: v4i32) -> v2i64 { + __lsx_vmaddwod_d_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_w_h(a: v4i32, b: v8i16, c: v8i16) -> v4i32 { + __lsx_vmaddwod_w_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_h_b(a: v8i16, b: v16i8, c: v16i8) -> v8i16 { + __lsx_vmaddwod_h_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_d_wu(a: v2u64, b: v4u32, c: v4u32) -> v2u64 { + __lsx_vmaddwod_d_wu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_w_hu(a: v4u32, b: v8u16, c: v8u16) -> v4u32 { + __lsx_vmaddwod_w_hu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_h_bu(a: v8u16, b: v16u8, c: v16u8) -> v8u16 { + __lsx_vmaddwod_h_bu(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_d_wu_w(a: v2i64, b: v4u32, c: v4i32) -> v2i64 { + __lsx_vmaddwev_d_wu_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_w_hu_h(a: v4i32, b: v8u16, c: v8i16) -> v4i32 { + __lsx_vmaddwev_w_hu_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_h_bu_b(a: v8i16, b: v16u8, c: v16i8) -> v8i16 { + __lsx_vmaddwev_h_bu_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_d_wu_w(a: v2i64, b: v4u32, c: v4i32) -> v2i64 { + __lsx_vmaddwod_d_wu_w(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_w_hu_h(a: v4i32, b: v8u16, c: v8i16) -> v4i32 { + __lsx_vmaddwod_w_hu_h(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_h_bu_b(a: v8i16, b: v16u8, c: v16i8) -> v8i16 { + __lsx_vmaddwod_h_bu_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_q_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { + __lsx_vmaddwev_q_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_q_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { + __lsx_vmaddwod_q_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_q_du(a: v2u64, b: v2u64, c: v2u64) -> v2u64 { + __lsx_vmaddwev_q_du(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_q_du(a: v2u64, b: v2u64, c: v2u64) -> v2u64 { + __lsx_vmaddwod_q_du(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwev_q_du_d(a: v2i64, b: v2u64, c: v2i64) -> v2i64 { + __lsx_vmaddwev_q_du_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmaddwod_q_du_d(a: v2i64, b: v2u64, c: v2i64) -> v2i64 { + __lsx_vmaddwod_q_du_d(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrotr_b(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vrotr_b(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrotr_h(a: v8i16, b: v8i16) -> v8i16 { + __lsx_vrotr_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrotr_w(a: v4i32, b: v4i32) -> v4i32 { + __lsx_vrotr_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrotr_d(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vrotr_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vadd_q(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vadd_q(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsub_q(a: v2i64, b: v2i64) -> v2i64 { + __lsx_vsub_q(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vldrepl_b(mem_addr: *const i8) -> v16i8 { + static_assert_simm_bits!(IMM_S12, 12); + __lsx_vldrepl_b(mem_addr, IMM_S12) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vldrepl_h(mem_addr: *const i8) -> v8i16 { + static_assert_simm_bits!(IMM_S11, 11); + __lsx_vldrepl_h(mem_addr, IMM_S11) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vldrepl_w(mem_addr: *const i8) -> v4i32 { + static_assert_simm_bits!(IMM_S10, 10); + __lsx_vldrepl_w(mem_addr, IMM_S10) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vldrepl_d(mem_addr: *const i8) -> v2i64 { + static_assert_simm_bits!(IMM_S9, 9); + __lsx_vldrepl_d(mem_addr, IMM_S9) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmskgez_b(a: v16i8) -> v16i8 { + __lsx_vmskgez_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vmsknz_b(a: v16i8) -> v16i8 { + __lsx_vmsknz_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vexth_h_b(a: v16i8) -> v8i16 { + __lsx_vexth_h_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vexth_w_h(a: v8i16) -> v4i32 { + __lsx_vexth_w_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vexth_d_w(a: v4i32) -> v2i64 { + __lsx_vexth_d_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vexth_q_d(a: v2i64) -> v2i64 { + __lsx_vexth_q_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vexth_hu_bu(a: v16u8) -> v8u16 { + __lsx_vexth_hu_bu(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vexth_wu_hu(a: v8u16) -> v4u32 { + __lsx_vexth_wu_hu(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vexth_du_wu(a: v4u32) -> v2u64 { + __lsx_vexth_du_wu(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vexth_qu_du(a: v2u64) -> v2u64 { + __lsx_vexth_qu_du(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrotri_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + __lsx_vrotri_b(a, IMM3) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrotri_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vrotri_h(a, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrotri_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vrotri_w(a, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrotri_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vrotri_d(a, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vextl_q_d(a: v2i64) -> v2i64 { + __lsx_vextl_q_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlni_b_h(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsrlni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlni_h_w(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsrlni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlni_w_d(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsrlni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlni_d_q(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vsrlni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlrni_b_h(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsrlrni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlrni_h_w(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsrlrni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlrni_w_d(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsrlrni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrlrni_d_q(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vsrlrni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlni_b_h(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vssrlni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlni_h_w(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vssrlni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlni_w_d(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vssrlni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlni_d_q(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vssrlni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlni_bu_h(a: v16u8, b: v16i8) -> v16u8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vssrlni_bu_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlni_hu_w(a: v8u16, b: v8i16) -> v8u16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vssrlni_hu_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlni_wu_d(a: v4u32, b: v4i32) -> v4u32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vssrlni_wu_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlni_du_q(a: v2u64, b: v2i64) -> v2u64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vssrlni_du_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrni_b_h(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vssrlrni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrni_h_w(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vssrlrni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrni_w_d(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vssrlrni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrni_d_q(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vssrlrni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrni_bu_h(a: v16u8, b: v16i8) -> v16u8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vssrlrni_bu_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrni_hu_w(a: v8u16, b: v8i16) -> v8u16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vssrlrni_hu_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrni_wu_d(a: v4u32, b: v4i32) -> v4u32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vssrlrni_wu_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrni_du_q(a: v2u64, b: v2i64) -> v2u64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vssrlrni_du_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrani_b_h(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsrani_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrani_h_w(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsrani_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrani_w_d(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsrani_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrani_d_q(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vsrani_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrarni_b_h(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vsrarni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrarni_h_w(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vsrarni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrarni_w_d(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vsrarni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vsrarni_d_q(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vsrarni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrani_b_h(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vssrani_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrani_h_w(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vssrani_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrani_w_d(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vssrani_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrani_d_q(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vssrani_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrani_bu_h(a: v16u8, b: v16i8) -> v16u8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vssrani_bu_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrani_hu_w(a: v8u16, b: v8i16) -> v8u16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vssrani_hu_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrani_wu_d(a: v4u32, b: v4i32) -> v4u32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vssrani_wu_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrani_du_q(a: v2u64, b: v2i64) -> v2u64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vssrani_du_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarni_b_h(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vssrarni_b_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarni_h_w(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vssrarni_h_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarni_w_d(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vssrarni_w_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarni_d_q(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vssrarni_d_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarni_bu_h(a: v16u8, b: v16i8) -> v16u8 { + static_assert_uimm_bits!(IMM4, 4); + __lsx_vssrarni_bu_h(a, b, IMM4) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarni_hu_w(a: v8u16, b: v8i16) -> v8u16 { + static_assert_uimm_bits!(IMM5, 5); + __lsx_vssrarni_hu_w(a, b, IMM5) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarni_wu_d(a: v4u32, b: v4i32) -> v4u32 { + static_assert_uimm_bits!(IMM6, 6); + __lsx_vssrarni_wu_d(a, b, IMM6) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrarni_du_q(a: v2u64, b: v2i64) -> v2u64 { + static_assert_uimm_bits!(IMM7, 7); + __lsx_vssrarni_du_q(a, b, IMM7) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vpermi_w(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM8, 8); + __lsx_vpermi_w(a, b, IMM8) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vld(mem_addr: *const i8) -> v16i8 { + static_assert_simm_bits!(IMM_S12, 12); + __lsx_vld(mem_addr, IMM_S12) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vst(a: v16i8, mem_addr: *mut i8) { + static_assert_simm_bits!(IMM_S12, 12); + __lsx_vst(a, mem_addr, IMM_S12) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrn_b_h(a: v8i16, b: v8i16) -> v16i8 { + __lsx_vssrlrn_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrn_h_w(a: v4i32, b: v4i32) -> v8i16 { + __lsx_vssrlrn_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrlrn_w_d(a: v2i64, b: v2i64) -> v4i32 { + __lsx_vssrlrn_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrln_b_h(a: v8i16, b: v8i16) -> v16i8 { + __lsx_vssrln_b_h(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrln_h_w(a: v4i32, b: v4i32) -> v8i16 { + __lsx_vssrln_h_w(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vssrln_w_d(a: v2i64, b: v2i64) -> v4i32 { + __lsx_vssrln_w_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vorn_v(a: v16i8, b: v16i8) -> v16i8 { + __lsx_vorn_v(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vldi() -> v2i64 { + static_assert_simm_bits!(IMM_S13, 13); + __lsx_vldi(IMM_S13) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vshuf_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { + __lsx_vshuf_b(a, b, c) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vldx(mem_addr: *const i8, b: i64) -> v16i8 { + __lsx_vldx(mem_addr, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vstx(a: v16i8, mem_addr: *mut i8, b: i64) { + __lsx_vstx(a, mem_addr, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vextl_qu_du(a: v2u64) -> v2u64 { + __lsx_vextl_qu_du(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bnz_b(a: v16u8) -> i32 { + __lsx_bnz_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bnz_d(a: v2u64) -> i32 { + __lsx_bnz_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bnz_h(a: v8u16) -> i32 { + __lsx_bnz_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bnz_v(a: v16u8) -> i32 { + __lsx_bnz_v(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bnz_w(a: v4u32) -> i32 { + __lsx_bnz_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bz_b(a: v16u8) -> i32 { + __lsx_bz_b(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bz_d(a: v2u64) -> i32 { + __lsx_bz_d(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bz_h(a: v8u16) -> i32 { + __lsx_bz_h(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bz_v(a: v16u8) -> i32 { + __lsx_bz_v(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_bz_w(a: v4u32) -> i32 { + __lsx_bz_w(a) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_caf_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_caf_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_caf_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_caf_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_ceq_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_ceq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_ceq_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_ceq_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cle_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_cle_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cle_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_cle_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_clt_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_clt_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_clt_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_clt_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cne_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_cne_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cne_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_cne_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cor_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_cor_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cor_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_cor_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cueq_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_cueq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cueq_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_cueq_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cule_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_cule_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cule_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_cule_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cult_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_cult_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cult_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_cult_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cun_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_cun_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cune_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_cune_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cune_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_cune_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_cun_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_cun_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_saf_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_saf_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_saf_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_saf_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_seq_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_seq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_seq_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_seq_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sle_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_sle_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sle_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_sle_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_slt_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_slt_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_slt_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_slt_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sne_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_sne_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sne_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_sne_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sor_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_sor_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sor_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_sor_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sueq_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_sueq_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sueq_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_sueq_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sule_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_sule_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sule_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_sule_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sult_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_sult_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sult_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_sult_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sun_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_sun_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sune_d(a: v2f64, b: v2f64) -> v2i64 { + __lsx_vfcmp_sune_d(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sune_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_sune_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vfcmp_sun_s(a: v4f32, b: v4f32) -> v4i32 { + __lsx_vfcmp_sun_s(a, b) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrepli_b() -> v16i8 { + static_assert_simm_bits!(IMM_S10, 10); + __lsx_vrepli_b(IMM_S10) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrepli_d() -> v2i64 { + static_assert_simm_bits!(IMM_S10, 10); + __lsx_vrepli_d(IMM_S10) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrepli_h() -> v8i16 { + static_assert_simm_bits!(IMM_S10, 10); + __lsx_vrepli_h(IMM_S10) +} + +#[inline] +#[target_feature(enable = "lsx")] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lsx_vrepli_w() -> v4i32 { + static_assert_simm_bits!(IMM_S10, 10); + __lsx_vrepli_w(IMM_S10) +} diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/mod.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/mod.rs new file mode 100644 index 000000000000..67a08985a963 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/mod.rs @@ -0,0 +1,21 @@ +//! LoongArch64 LSX intrinsics + +#![allow(non_camel_case_types)] + +#[rustfmt::skip] +mod types; + +#[rustfmt::skip] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub use self::types::*; + +#[rustfmt::skip] +mod generated; + +#[rustfmt::skip] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub use self::generated::*; + +#[rustfmt::skip] +#[cfg(test)] +mod tests; diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/tests.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/tests.rs new file mode 100644 index 000000000000..5670bd4378a8 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/tests.rs @@ -0,0 +1,7164 @@ +// This code is automatically generated. DO NOT MODIFY. +// See crates/stdarch-gen-loongarch/README.md + +use crate::{ + core_arch::{loongarch64::*, simd::*}, + mem::transmute, +}; +use stdarch_test::simd_test; + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsll_b() { + let a = i8x16::new( + -96, 33, -12, -39, 82, 20, 52, 0, -99, -60, -50, -85, -6, -83, -52, -23, + ); + let b = i8x16::new( + 50, 37, 88, 105, -45, -52, 119, 2, 19, 109, 95, 116, -101, -126, -104, -119, + ); + let r = i64x2::new(70990221811840, -3257029622096690968); + + assert_eq!(r, transmute(lsx_vsll_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsll_h() { + let a = i16x8::new(2551, -25501, -5868, -8995, 27363, 18426, -10212, -26148); + let b = i16x8::new(-10317, -20778, -9962, -8975, 25298, 12929, -13803, -18669); + let r = i64x2::new(-5063658964307128392, -3539825456407336052); + + assert_eq!(r, transmute(lsx_vsll_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsll_w() { + let a = i32x4::new(1371197240, -1100536513, 781269067, -294302078); + let b = i32x4::new(82237029, -819106294, -96895338, -456101700); + let r = i64x2::new(-7163824029380778240, 2305843009528266752); + + assert_eq!(r, transmute(lsx_vsll_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsll_d() { + let a = i64x2::new(5700293115058898640, 9057986892130087440); + let b = i64x2::new(8592669249977019309, -1379694176202045825); + let r = i64x2::new(1790743801833193472, 0); + + assert_eq!(r, transmute(lsx_vsll_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslli_b() { + let a = i8x16::new( + 90, 123, 29, -67, 120, -106, 104, -39, -62, -56, -92, -75, 113, 123, -120, -52, + ); + let r = i64x2::new(-2780807324588213414, -3708578564830607166); + + assert_eq!(r, transmute(lsx_vslli_b::<0>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslli_h() { + let a = i16x8::new(18469, -14840, 23655, -3474, 7467, 2798, -15418, 26847); + let r = i64x2::new(-7241759886206301888, 4017476402818337472); + + assert_eq!(r, transmute(lsx_vslli_h::<6>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslli_w() { + let a = i32x4::new(20701902, -1777432355, 6349179, 1747667894); + let r = i64x2::new(4189319625752393728, -5967594959501136896); + + assert_eq!(r, transmute(lsx_vslli_w::<10>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslli_d() { + let a = i64x2::new(-5896889635782282086, -8807609320972692839); + let r = i64x2::new(-4233027607937510592, -5142337165482896608); + + assert_eq!(r, transmute(lsx_vslli_d::<5>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsra_b() { + let a = i8x16::new( + 0, 72, -102, -88, 101, -100, 66, -113, 68, -13, 2, 4, -61, 66, -24, 72, + ); + let b = i8x16::new( + 34, 5, 102, 83, -87, 43, 94, 107, -84, 88, -103, 5, 127, 43, -28, -69, + ); + let r = i64x2::new(-1080315035391229440, 720022881735668484); + + assert_eq!(r, transmute(lsx_vsra_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsra_h() { + let a = i16x8::new(29313, 15702, 30839, 9343, -19597, 5316, -32305, -13755); + let b = i16x8::new(14017, 3796, 23987, -27244, -13363, 21333, -10262, 23633); + let r = i64x2::new(164116464290576704, -1935703552267190275); + + assert_eq!(r, transmute(lsx_vsra_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsra_w() { + let a = i32x4::new(-309802992, -833530117, -1757716660, 1577882592); + let b = i32x4::new(-670772992, 2044335288, -1224858031, 520588790); + let r = i64x2::new(-210763200496, 1619202657181); + + assert_eq!(r, transmute(lsx_vsra_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsra_d() { + let a = i64x2::new(-1372092312892164486, 6937900992858870877); + let b = i64x2::new(4251079558060308329, 4657697142994416829); + let r = i64x2::new(-623956, 3); + + assert_eq!(r, transmute(lsx_vsra_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrai_b() { + let a = i8x16::new( + -4, 92, -7, -110, 81, -20, -18, -113, 43, 110, -105, 53, -101, -100, -56, -120, + ); + let r = i64x2::new(-2018743940785760257, -2093355901512246518); + + assert_eq!(r, transmute(lsx_vsrai_b::<2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrai_h() { + let a = i16x8::new(-22502, -7299, 19084, -21578, -28082, 20851, 23456, 15524); + let r = i64x2::new(-1688828385492998, 844446405361657); + + assert_eq!(r, transmute(lsx_vsrai_h::<12>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrai_w() { + let a = i32x4::new(743537539, 1831641900, -1639033567, -984629971); + let r = i64x2::new(30008936499988, -16131897170029); + + assert_eq!(r, transmute(lsx_vsrai_w::<18>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrai_d() { + let a = i64x2::new(-8375997486414293750, 1714581574012370587); + let r = i64x2::new(-476121, 97462); + + assert_eq!(r, transmute(lsx_vsrai_d::<44>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrar_b() { + let a = i8x16::new( + 123, 17, -3, 27, 49, 89, -61, 105, -77, 87, 87, 15, -113, 75, -69, 40, + ); + let b = i8x16::new( + 14, 5, 123, -33, 72, -126, -70, -33, -124, -55, -82, -78, -33, -12, -25, -114, + ); + let r = i64x2::new(139917463134404866, 143840305941130491); + + assert_eq!(r, transmute(lsx_vsrar_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrar_h() { + let a = i16x8::new(-25154, -18230, -10510, -29541, 25913, 29143, 21372, 14979); + let b = i16x8::new(-26450, 2176, 31587, 2222, 13726, 30172, 1067, -14273); + let r = i64x2::new(-287115463426050, 42950131714); + + assert_eq!(r, transmute(lsx_vsrar_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrar_w() { + let a = i32x4::new(-139995520, 1671693163, -640570871, 2138298219); + let b = i32x4::new(-1532076758, 940127488, 1781366421, 1497262222); + let r = i64x2::new(7179867468326627830, 560544771735247); + + assert_eq!(r, transmute(lsx_vsrar_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrar_d() { + let a = i64x2::new(-489385672013329488, -1253364580216579403); + let b = i64x2::new(3571440266112779495, -725943254065719378); + let r = i64x2::new(-890187, -17811); + + assert_eq!(r, transmute(lsx_vsrar_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrari_b() { + let a = i8x16::new( + -20, 33, -49, -120, -30, -40, 67, 93, -77, -2, 16, -36, 108, -107, 23, -53, + ); + let r = i64x2::new(867219992078845182, -503291487652282122); + + assert_eq!(r, transmute(lsx_vsrari_b::<3>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrari_h() { + let a = i16x8::new(29939, -1699, 12357, 30805, -30883, 31936, 15701, -11818); + let r = i64x2::new(4222154715365391, -1688815499411471); + + assert_eq!(r, transmute(lsx_vsrari_h::<11>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrari_w() { + let a = i32x4::new(588196178, -1058764534, 1325397591, 1169671026); + let r = i64x2::new(-4294967295, 4294967297); + + assert_eq!(r, transmute(lsx_vsrari_w::<30>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrari_d() { + let a = i64x2::new(-2795326946470057100, 6746045132217841338); + let r = i64x2::new(-174707934154378569, 421627820763615084); + + assert_eq!(r, transmute(lsx_vsrari_d::<4>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrl_b() { + let a = i8x16::new( + 73, 74, 66, -104, -30, 25, 93, -107, 105, -89, -115, -22, -94, -36, -55, -28, + ); + let b = i8x16::new( + 81, 13, -9, -46, -24, 0, 91, 123, 90, -52, -24, 56, 64, -4, -66, -17, + ); + let r = i64x2::new(1300161376517358116, 72917012339034650); + + assert_eq!(r, transmute(lsx_vsrl_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrl_h() { + let a = i16x8::new(29049, 13489, 20776, -12268, 25704, -28758, -6146, -27463); + let b = i16x8::new(16605, -13577, -26644, -17739, 11000, -29283, -15971, 20169); + let r = i64x2::new(468374382728249347, 20829178341621860); + + assert_eq!(r, transmute(lsx_vsrl_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrl_w() { + let a = i32x4::new(-2108561731, -402290458, -1418385618, 1489749824); + let b = i32x4::new(1777885221, -1725401090, 1849724045, -1051851102); + let r = i64x2::new(12953227061, 1599606693325790121); + + assert_eq!(r, transmute(lsx_vsrl_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrl_d() { + let a = i64x2::new(2854528248771186187, 804951867404831945); + let b = i64x2::new(-7903128394835365398, 7601347629202818185); + let r = i64x2::new(649044, 1572171616025062); + + assert_eq!(r, transmute(lsx_vsrl_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrli_b() { + let a = i8x16::new( + 84, -108, 98, 45, 126, -124, 105, 108, 0, 61, -29, -31, -75, -41, 114, -33, + ); + let r = i64x2::new(1952909805632365845, 3971107439766933248); + + assert_eq!(r, transmute(lsx_vsrli_b::<2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrli_h() { + let a = i16x8::new(29545, 354, 27695, 20915, -32766, -24491, 10641, 20310); + let r = i64x2::new(11259230996660281, 10977609996304448); + + assert_eq!(r, transmute(lsx_vsrli_h::<9>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrli_w() { + let a = i32x4::new(627703601, 922874410, -234412645, -1216101872); + let r = i64x2::new(3870813506329215, 12913695352717769); + + assert_eq!(r, transmute(lsx_vsrli_w::<10>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrli_d() { + let a = i64x2::new(1407685950714554203, -6076144426076800688); + let r = i64x2::new(9, 85); + + assert_eq!(r, transmute(lsx_vsrli_d::<57>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlr_b() { + let a = i8x16::new( + -79, 91, -123, 112, -84, 70, -78, -74, -104, 27, -94, -46, -49, -78, 113, -2, + ); + let b = i8x16::new( + 23, 4, -120, -11, -13, 103, 84, 58, -108, 121, -66, -9, -81, 91, 71, -33, + ); + let r = i64x2::new(3317746744565237249, 144420860932066826); + + assert_eq!(r, transmute(lsx_vsrlr_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlr_h() { + let a = i16x8::new(14153, -26873, 3115, 28304, 4881, -8446, 28628, 8837); + let b = i16x8::new(19500, -26403, -1282, 12290, -18989, 25105, -24347, 6707); + let r = i64x2::new(1991716935204929539, 311033695131730530); + + assert_eq!(r, transmute(lsx_vsrlr_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlr_w() { + let a = i32x4::new(1997879294, 120007491, -1807289594, -1854395615); + let b = i32x4::new(1830015593, -1452673200, 962662328, -252736055); + let r = i64x2::new(7864089021084, 20473000998469780); + + assert_eq!(r, transmute(lsx_vsrlr_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlr_d() { + let a = i64x2::new(5993546441420611680, 4358546479290416194); + let b = i64x2::new(-1543621369665313706, 8544381131364512650); + let r = i64x2::new(1428972826343, 4256393046182047); + + assert_eq!(r, transmute(lsx_vsrlr_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlri_b() { + let a = i8x16::new( + -41, 87, -43, -35, 79, -10, -103, 1, 52, -35, 8, -17, -116, 84, -91, 51, + ); + let r = i64x2::new(93866580842851436, 1896906350202744602); + + assert_eq!(r, transmute(lsx_vsrlri_b::<1>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlri_h() { + let a = i16x8::new(-18045, 1968, 22966, 3692, 2010, -17108, 3373, -30706); + let r = i64x2::new(1039304252363684227, -8642956144778934310); + + assert_eq!(r, transmute(lsx_vsrlri_h::<0>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlri_w() { + let a = i32x4::new(1306456564, -1401620667, -839707416, -1634862919); + let r = i64x2::new(1553353645217275455, 1428132662790218397); + + assert_eq!(r, transmute(lsx_vsrlri_w::<3>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlri_d() { + let a = i64x2::new(-3683179565838693027, 6160461828074490983); + let r = i64x2::new(205, 85); + + assert_eq!(r, transmute(lsx_vsrlri_d::<56>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitclr_b() { + let a = u8x16::new( + 238, 18, 41, 55, 84, 12, 87, 155, 124, 76, 175, 240, 181, 121, 58, 183, + ); + let b = u8x16::new( + 57, 132, 149, 173, 76, 177, 99, 144, 8, 167, 2, 144, 70, 60, 105, 232, + ); + let r = i64x2::new(-7325372782311046420, -5316383129963115396); + + assert_eq!(r, transmute(lsx_vbitclr_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitclr_h() { + let a = u16x8::new(14340, 59474, 49868, 46012, 53117, 6307, 22589, 53749); + let b = u16x8::new(26587, 57597, 34751, 38678, 23919, 45729, 62569, 5978); + let r = i64x2::new(-5495443997997256700, -3317648531059028099); + + assert_eq!(r, transmute(lsx_vbitclr_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitclr_w() { + let a = u32x4::new(1581022148, 2519245321, 296293885, 127383934); + let b = u32x4::new(1968231094, 2827365864, 4097273355, 4016923215); + let r = i64x2::new(-7626667807832507452, 546969093373761021); + + assert_eq!(r, transmute(lsx_vbitclr_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitclr_d() { + let a = u64x2::new(17203892527896963423, 12937109545250696056); + let b = u64x2::new(5723204188033770667, 2981956604140378920); + let r = i64x2::new(-1242851545812588193, -5509634528458855560); + + assert_eq!(r, transmute(lsx_vbitclr_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitclri_b() { + let a = u8x16::new( + 146, 23, 223, 183, 109, 56, 35, 105, 178, 156, 170, 57, 196, 164, 185, 161, + ); + let r = i64x2::new(7503621968728299154, -6865556469255070542); + + assert_eq!(r, transmute(lsx_vbitclri_b::<0>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitclri_h() { + let a = u16x8::new(17366, 58985, 22108, 45942, 27326, 19605, 9632, 32322); + let r = i64x2::new(-5515130134779575338, 8809640793386347198); + + assert_eq!(r, transmute(lsx_vbitclri_h::<10>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitclri_w() { + let a = u32x4::new(718858183, 3771164920, 1842485081, 896350597); + let r = i64x2::new(-2249714073768237625, 3849796501707560281); + + assert_eq!(r, transmute(lsx_vbitclri_w::<9>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitclri_d() { + let a = u64x2::new(10838658690401820648, 3833745076866321369); + let r = i64x2::new(-7608085933063544856, 3833744527110507481); + + assert_eq!(r, transmute(lsx_vbitclri_d::<39>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitset_b() { + let a = u8x16::new( + 229, 230, 162, 180, 94, 215, 193, 145, 28, 90, 35, 171, 225, 7, 84, 128, + ); + let b = u8x16::new( + 209, 178, 73, 112, 118, 233, 139, 239, 2, 23, 209, 152, 236, 51, 195, 75, + ); + let r = i64x2::new(-7941579666116909337, -8620998056061183460); + + assert_eq!(r, transmute(lsx_vbitset_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitset_h() { + let a = u16x8::new(967, 49899, 53264, 29198, 56634, 42461, 51022, 31627); + let b = u16x8::new(64512, 23847, 57770, 47705, 8024, 31966, 14493, 50266); + let r = i64x2::new(8218739538452480967, 9190693790629616954); + + assert_eq!(r, transmute(lsx_vbitset_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitset_w() { + let a = u32x4::new(2899706360, 1274114722, 1170526770, 3308854969); + let b = u32x4::new(3259082048, 1303228302, 1429001720, 209615081); + let r = i64x2::new(5472281065241838073, -4235320193476931022); + + assert_eq!(r, transmute(lsx_vbitset_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitset_d() { + let a = u64x2::new(8117422063017946604, 5026948610774344635); + let b = u64x2::new(12687331714071910183, 1753585392879336372); + let r = i64x2::new(8117422612773760492, 5031452210401715131); + + assert_eq!(r, transmute(lsx_vbitset_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitseti_b() { + let a = u8x16::new( + 163, 123, 56, 129, 159, 111, 214, 85, 141, 240, 190, 190, 175, 215, 20, 81, + ); + let r = i64x2::new(6185254145054243811, 5860546440891134157); + + assert_eq!(r, transmute(lsx_vbitseti_b::<6>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitseti_h() { + let a = u16x8::new(15222, 59961, 52253, 2908, 61562, 41309, 63627, 4191); + let r = i64x2::new(819316619673811830, 1179934905985921146); + + assert_eq!(r, transmute(lsx_vbitseti_h::<1>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitseti_w() { + let a = u32x4::new(3788412756, 1863556832, 1913138259, 1199998627); + let r = i64x2::new(8012922850722617172, 5162962059379878995); + + assert_eq!(r, transmute(lsx_vbitseti_w::<21>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitseti_d() { + let a = u64x2::new(10744510173660993785, 16946223211744108759); + let r = i64x2::new(-7702233900048557831, -1500520861831225129); + + assert_eq!(r, transmute(lsx_vbitseti_d::<27>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitrev_b() { + let a = u8x16::new( + 50, 114, 173, 149, 9, 38, 147, 232, 52, 235, 56, 98, 113, 120, 249, 238, + ); + let b = u8x16::new( + 252, 187, 218, 48, 148, 63, 222, 247, 56, 181, 124, 130, 243, 202, 86, 253, + ); + let r = i64x2::new(7553563628828981794, -3550669970358088907); + + assert_eq!(r, transmute(lsx_vbitrev_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitrev_h() { + let a = u16x8::new(8304, 965, 30335, 58555, 41304, 8461, 30573, 59417); + let b = u16x8::new(21347, 23131, 57157, 13786, 34463, 33445, 23964, 48087); + let r = i64x2::new(-2253077037977362312, -1686202867067838120); + + assert_eq!(r, transmute(lsx_vbitrev_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitrev_w() { + let a = u32x4::new(3821500454, 1067219398, 1766391845, 676798616); + let b = u32x4::new(3330530584, 4153020036, 822570638, 2652744506); + let r = i64x2::new(4583672484591007782, 3195058299616182309); + + assert_eq!(r, transmute(lsx_vbitrev_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitrev_d() { + let a = u64x2::new(16016664040604304047, 18062107512190600767); + let b = u64x2::new(10942298949673565895, 12884740754463765660); + let r = i64x2::new(-2430080033105247697, -384636561250515393); + + assert_eq!(r, transmute(lsx_vbitrev_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitrevi_b() { + let a = u8x16::new( + 184, 147, 93, 34, 212, 175, 25, 125, 50, 34, 160, 241, 228, 231, 77, 110, + ); + let r = i64x2::new(8727320563398842300, 7658903196653594166); + + assert_eq!(r, transmute(lsx_vbitrevi_b::<2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitrevi_h() { + let a = u16x8::new(15083, 24599, 61212, 12408, 48399, 59833, 45416, 58826); + let r = i64x2::new(8104420064785562347, -6500117680329458417); + + assert_eq!(r, transmute(lsx_vbitrevi_h::<14>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitrevi_w() { + let a = u32x4::new(1200613355, 1418062686, 3847355950, 3312937419); + let r = i64x2::new(6099540060505368555, -4226793400815190482); + + assert_eq!(r, transmute(lsx_vbitrevi_w::<21>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitrevi_d() { + let a = u64x2::new(295858379748270823, 1326723086853575042); + let r = i64x2::new(295858379748254439, 1326723086853591426); + + assert_eq!(r, transmute(lsx_vbitrevi_d::<14>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vadd_b() { + let a = i8x16::new( + 14, -124, 73, 125, 119, 60, 127, -10, 31, 89, 50, -88, 29, -28, -53, -8, + ); + let b = i8x16::new( + 94, -52, -56, 75, -104, 77, 16, 82, 82, 69, -81, -75, 25, -102, -109, 23, + ); + let r = i64x2::new(5228548393274527852, 1107461330348121713); + + assert_eq!(r, transmute(lsx_vadd_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vadd_h() { + let a = i16x8::new(14051, -27363, -25412, -27329, 25098, 5182, -13698, -15422); + let b = i16x8::new(-25040, 15453, -28080, -31322, -24429, -12453, -18073, 27019); + let r = i64x2::new(1938006946753467667, 3264410328302682781); + + assert_eq!(r, transmute(lsx_vadd_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vadd_w() { + let a = i32x4::new(-724548235, -1051318497, -203352059, 1502361914); + let b = i32x4::new(-1169804484, 389773725, -731843701, -1825112934); + let r = i64x2::new(-2841313158179161935, -1386205072290870384); + + assert_eq!(r, transmute(lsx_vadd_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vadd_d() { + let a = i64x2::new(-7298628992874088690, 8943248591432696479); + let b = i64x2::new(7093939531558864473, 4047047970310912233); + let r = i64x2::new(-204689461315224217, -5456447511965942904); + + assert_eq!(r, transmute(lsx_vadd_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddi_bu() { + let a = i8x16::new( + -126, 4, -123, -78, -37, -26, -41, -119, -16, -82, 33, 59, -110, -98, 26, -6, + ); + let r = i64x2::new(-7790681010872578420, 298548864442153210); + + assert_eq!(r, transmute(lsx_vaddi_bu::<10>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddi_hu() { + let a = i16x8::new(-16986, -28417, 11657, 16608, -30167, 18602, 8897, -854); + let r = i64x2::new(4681541984598867390, -233585914045887935); + + assert_eq!(r, transmute(lsx_vaddi_hu::<24>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddi_wu() { + let a = i32x4::new(1142343549, 56714754, -180143297, 408668191); + let r = i64x2::new(243588023362963327, 1755216527965240129); + + assert_eq!(r, transmute(lsx_vaddi_wu::<2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddi_du() { + let a = i64x2::new(4516502893749962130, 9158051921593642947); + let r = i64x2::new(4516502893749962139, 9158051921593642956); + + assert_eq!(r, transmute(lsx_vaddi_du::<9>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsub_b() { + let a = i8x16::new( + 125, 95, 56, 31, 69, -81, 65, -123, -72, 14, -43, 81, -12, -107, 106, 3, + ); + let b = i8x16::new( + -80, 10, -21, 84, -99, 8, 125, -66, 79, -71, 123, 61, 61, -31, 41, -118, + ); + let r = i64x2::new(-4051929421319416371, 8737463450488952169); + + assert_eq!(r, transmute(lsx_vsub_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsub_h() { + let a = i16x8::new(-17949, -2606, 1774, 18199, 28344, 28423, 16206, 25414); + let b = i16x8::new(15368, 16207, 9677, 21447, -29583, -22036, 1845, 15671); + let r = i64x2::new(-913983189443969573, 2742472381424198215); + + assert_eq!(r, transmute(lsx_vsub_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsub_w() { + let a = i32x4::new(678216285, 1230738403, -1278396773, -1257816042); + let b = i32x4::new(617176389, -1376778690, 1463940361, 620446698); + let r = i64x2::new(-7247543435452521192, -8067077040042720878); + + assert_eq!(r, transmute(lsx_vsub_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsub_d() { + let a = i64x2::new(7239192343295591267, -5127457864580422409); + let b = i64x2::new(1314101702815749241, 7673634401554993450); + let r = i64x2::new(5925090640479842026, 5645651807574135757); + + assert_eq!(r, transmute(lsx_vsub_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubi_bu() { + let a = i8x16::new( + -83, 36, 83, -2, 40, -92, 98, -95, -24, 113, 46, -20, 120, -93, 28, 85, + ); + let r = i64x2::new(-8192169673836457574, 4758493248402185941); + + assert_eq!(r, transmute(lsx_vsubi_bu::<19>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubi_hu() { + let a = i16x8::new(13272, -26858, -235, 16054, 29698, 1377, 4604, -3878); + let r = i64x2::new(4514576075959186376, -1096043853912116238); + + assert_eq!(r, transmute(lsx_vsubi_hu::<16>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubi_wu() { + let a = i32x4::new(1277091145, -2076591216, -1523555105, -945754023); + let r = i64x2::new(-8918891362898748088, -4061982600368986914); + + assert_eq!(r, transmute(lsx_vsubi_wu::<1>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubi_du() { + let a = i64x2::new(-8248876128472283209, -2119651236628000925); + let r = i64x2::new(-8248876128472283234, -2119651236628000950); + + assert_eq!(r, transmute(lsx_vsubi_du::<25>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmax_b() { + let a = i8x16::new( + -120, -51, 13, 82, 100, 7, 127, 17, -89, -95, -45, 121, 64, -60, 89, 105, + ); + let b = i8x16::new( + -47, -64, 96, 41, -30, -122, 3, -7, 123, -96, 68, 36, 14, 31, 74, -22, + ); + let r = i64x2::new(1260734548147228113, 7591133008682590587); + + assert_eq!(r, transmute(lsx_vmax_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmax_h() { + let a = i16x8::new(-14821, -29280, 26700, -12293, 2186, -23309, 13454, -1630); + let b = i16x8::new(25637, -11569, -23103, 6983, -17125, 5183, -709, 5986); + let r = i64x2::new(1965654441534120997, 1684966995419662474); + + assert_eq!(r, transmute(lsx_vmax_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmax_w() { + let a = i32x4::new(-2113940850, -647459228, -686153447, 852904547); + let b = i32x4::new(643859790, -389733899, -1309288060, 1934346522); + let r = i64x2::new(-1673894349703707314, 8307955054730158361); + + assert_eq!(r, transmute(lsx_vmax_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmax_d() { + let a = i64x2::new(-990960773872867733, 6406870358170165030); + let b = i64x2::new(-6137495199657896371, 2160025776787809810); + let r = i64x2::new(-990960773872867733, 6406870358170165030); + + assert_eq!(r, transmute(lsx_vmax_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaxi_b() { + let a = i8x16::new( + -67, 109, 33, -22, -96, 84, -56, 81, 122, 23, -70, -71, -42, 108, -50, 23, + ); + let r = i64x2::new(5908253215318699518, 1728939149412407162); + + assert_eq!(r, transmute(lsx_vmaxi_b::<-2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaxi_h() { + let a = i16x8::new(-14059, 19536, 15816, 28251, 23079, -10486, -11781, 25565); + let r = i64x2::new(7952017497535807498, 7195907822558272039); + + assert_eq!(r, transmute(lsx_vmaxi_h::<10>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaxi_w() { + let a = i32x4::new(-1136628686, -168033999, -2082324641, -1789957469); + let r = i64x2::new(55834574861, 55834574861); + + assert_eq!(r, transmute(lsx_vmaxi_w::<13>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaxi_d() { + let a = i64x2::new(-490958606840895025, -602287987736508723); + let r = i64x2::new(-5, -5); + + assert_eq!(r, transmute(lsx_vmaxi_d::<-5>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmax_bu() { + let a = u8x16::new( + 22, 96, 70, 57, 83, 248, 184, 163, 4, 150, 223, 247, 226, 242, 18, 63, + ); + let b = u8x16::new( + 13, 251, 236, 121, 148, 91, 24, 176, 232, 197, 195, 34, 31, 120, 173, 27, + ); + let r = i64x2::new(-5712542810735052010, 4588590651995571688); + + assert_eq!(r, transmute(lsx_vmax_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmax_hu() { + let a = u16x8::new(1178, 52364, 32269, 22619, 17388, 4159, 51894, 12662); + let b = u16x8::new(61508, 27224, 11696, 15294, 30725, 4809, 55995, 24012); + let r = i64x2::new(6366821095949791300, 6759017637785204741); + + assert_eq!(r, transmute(lsx_vmax_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmax_wu() { + let a = u32x4::new(2081333956, 40837464, 1440470019, 1657093799); + let b = u32x4::new(2856502284, 546582019, 3814541188, 2370198139); + let r = i64x2::new(2347551899043152908, -8266820577849948284); + + assert_eq!(r, transmute(lsx_vmax_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmax_du() { + let a = u64x2::new(17105634039018730835, 11926654155810942548); + let b = u64x2::new(15559502733477870114, 3537017767853389449); + let r = i64x2::new(-1341110034690820781, -6520089917898609068); + + assert_eq!(r, transmute(lsx_vmax_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaxi_bu() { + let a = u8x16::new( + 216, 225, 158, 238, 152, 8, 124, 241, 175, 62, 154, 175, 216, 127, 235, 143, + ); + let r = i64x2::new(-1045930669804428840, -8076220938123067729); + + assert_eq!(r, transmute(lsx_vmaxi_bu::<27>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaxi_hu() { + let a = u16x8::new(56394, 18974, 59, 64239, 15178, 38205, 20044, 21066); + let r = i64x2::new(-365072790147113910, 5929637950214978378); + + assert_eq!(r, transmute(lsx_vmaxi_hu::<23>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaxi_wu() { + let a = u32x4::new(2234002286, 3837532269, 3218694441, 2956128392); + let r = i64x2::new(-1964668478775874706, -5750269304073789143); + + assert_eq!(r, transmute(lsx_vmaxi_wu::<15>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaxi_du() { + let a = u64x2::new(3145066433415682744, 697260191203805367); + let r = i64x2::new(3145066433415682744, 697260191203805367); + + assert_eq!(r, transmute(lsx_vmaxi_du::<15>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmin_b() { + let a = i8x16::new( + -18, -126, -77, 105, 18, -106, -12, 89, 93, 22, -51, -103, -63, -106, -23, -125, + ); + let b = i8x16::new( + -10, 83, 19, -119, -1, 95, 11, 25, -11, 38, -28, -23, -36, -104, 110, 0, + ); + let r = i64x2::new(1870285769536668398, -8941449826914199819); + + assert_eq!(r, transmute(lsx_vmin_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmin_h() { + let a = i16x8::new(7767, 30288, -1525, 24469, 16179, 7042, 6326, 21055); + let b = i16x8::new(-5519, 15267, -28304, -5842, 32145, 6582, -9646, -24918); + let r = i64x2::new(-1644216902720689551, -7013553423522578637); + + assert_eq!(r, transmute(lsx_vmin_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmin_w() { + let a = i32x4::new(280954204, 1916591882, 1901481995, 787566518); + let b = i32x4::new(-425011290, -2104111279, 175390640, 571448257); + let r = i64x2::new(-9037089126579775578, 2454351575346593712); + + assert_eq!(r, transmute(lsx_vmin_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmin_d() { + let a = i64x2::new(5262417572890363865, 5296071757031183187); + let b = i64x2::new(7269804448576860985, -2384075780126369706); + let r = i64x2::new(5262417572890363865, -2384075780126369706); + + assert_eq!(r, transmute(lsx_vmin_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmini_b() { + let a = i8x16::new( + -20, 19, 89, -115, 65, 94, -124, -17, 36, -127, -101, -123, -122, -62, 44, 121, + ); + let r = i64x2::new(-1187557278141451540, -940475489144045070); + + assert_eq!(r, transmute(lsx_vmini_b::<-14>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmini_h() { + let a = i16x8::new(26119, -26421, -26720, 11534, 11181, -13024, -9525, -1565); + let r = i64x2::new(-677708916064259, -440267769697468419); + + assert_eq!(r, transmute(lsx_vmini_h::<-3>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmini_w() { + let a = i32x4::new(1937226480, -56354461, -210581139, 118641668); + let r = i64x2::new(-242040566978707451, 25559222637); + + assert_eq!(r, transmute(lsx_vmini_w::<5>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmini_d() { + let a = i64x2::new(-6839357499730806877, 2982085289136510651); + let r = i64x2::new(-6839357499730806877, 11); + + assert_eq!(r, transmute(lsx_vmini_d::<11>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmin_bu() { + let a = u8x16::new( + 72, 253, 194, 62, 100, 41, 53, 50, 53, 249, 47, 215, 113, 227, 189, 66, + ); + let b = u8x16::new( + 20, 165, 214, 231, 201, 17, 81, 203, 41, 209, 98, 88, 135, 118, 100, 83, + ); + let r = i64x2::new(3617816997909406996, 4784078933357220137); + + assert_eq!(r, transmute(lsx_vmin_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmin_hu() { + let a = u16x8::new(45665, 56395, 48109, 47478, 46813, 59058, 42125, 32550); + let b = u16x8::new(30424, 14541, 7654, 46014, 42452, 14971, 14903, 13871); + let r = i64x2::new(-5494921620712753448, 3904403410832303572); + + assert_eq!(r, transmute(lsx_vmin_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmin_wu() { + let a = u32x4::new(1809171870, 3212127932, 1131140001, 2157144340); + let b = u32x4::new(1456829356, 2264966310, 1587887390, 645429404); + let r = i64x2::new(-8718787844260924500, 2772098183187911585); + + assert_eq!(r, transmute(lsx_vmin_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmin_du() { + let a = u64x2::new(6641707046382446478, 5750385968612732680); + let b = u64x2::new(15079551366517035256, 13891052596545854864); + let r = i64x2::new(6641707046382446478, 5750385968612732680); + + assert_eq!(r, transmute(lsx_vmin_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmini_bu() { + let a = u8x16::new( + 14, 244, 217, 183, 206, 234, 5, 185, 152, 22, 4, 35, 30, 177, 252, 137, + ); + let r = i64x2::new(361700864190383365, 361700864190317829); + + assert_eq!(r, transmute(lsx_vmini_bu::<5>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmini_hu() { + let a = u16x8::new(51791, 41830, 16737, 31634, 36341, 58491, 48701, 8690); + let r = i64x2::new(5066626891382802, 5066626891382802); + + assert_eq!(r, transmute(lsx_vmini_hu::<18>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmini_wu() { + let a = u32x4::new(1158888991, 2639721369, 556001789, 2902942998); + let r = i64x2::new(77309411346, 77309411346); + + assert_eq!(r, transmute(lsx_vmini_wu::<18>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmini_du() { + let a = u64x2::new(17903595768445663391, 13119300660970895532); + let r = i64x2::new(13, 13); + + assert_eq!(r, transmute(lsx_vmini_du::<13>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vseq_b() { + let a = i8x16::new( + 8, 73, 39, 20, 64, -98, -64, 83, 32, 84, -121, 9, -45, -118, -26, 100, + ); + let b = i8x16::new( + -90, -2, -77, -76, -19, 48, 91, 31, 65, -29, -112, -7, 77, 98, -126, 5, + ); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vseq_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vseq_h() { + let a = i16x8::new(7490, 32190, -24684, 16245, -18425, -12556, 19179, -23230); + let b = i16x8::new(-7387, -24074, 15709, -4629, 30465, -9504, -21403, -30287); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vseq_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vseq_w() { + let a = i32x4::new(-364333737, 833593451, -1047433707, 1224903962); + let b = i32x4::new(-493722413, -522973881, -1254416384, -884207273); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vseq_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vseq_d() { + let a = i64x2::new(8059130761383772313, -728251064129355704); + let b = i64x2::new(3023654898382436999, 1783520577741396523); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vseq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vseqi_b() { + let a = i8x16::new( + 114, -39, -58, -47, -46, 68, 126, -41, 50, -24, 109, 120, -81, -22, 86, 2, + ); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vseqi_b::<12>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vseqi_h() { + let a = i16x8::new(-3205, 25452, 20774, 22065, -8424, 16590, -15971, -14154); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vseqi_h::<-1>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vseqi_w() { + let a = i32x4::new(199798215, -798304779, -1812193878, -1830438161); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vseqi_w::<11>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vseqi_d() { + let a = i64x2::new(-7376858177879278972, 1947027764115386661); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vseqi_d::<3>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslti_b() { + let a = i8x16::new( + 45, 70, 62, 83, 116, -29, -34, -91, 96, 48, 109, 92, -18, 93, 14, 22, + ); + let r = i64x2::new(-1099511627776, 1095216660480); + + assert_eq!(r, transmute(lsx_vslti_b::<-4>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslt_b() { + let a = i8x16::new( + -68, 126, 28, -97, -24, 118, 61, -9, 5, 115, -122, 5, -40, 107, -98, -93, + ); + let b = i8x16::new( + 22, 124, 33, 93, 0, -81, -62, 63, 1, 35, -64, 23, 61, 9, -56, 89, + ); + let r = i64x2::new(-72056494526365441, -280375465148416); + + assert_eq!(r, transmute(lsx_vslt_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslt_h() { + let a = i16x8::new(32283, 16403, -32598, 8049, -10290, 21116, 23894, 5619); + let b = i16x8::new(-10624, 12762, 31216, 13253, 2299, -12591, -8652, -22348); + let r = i64x2::new(-4294967296, 65535); + + assert_eq!(r, transmute(lsx_vslt_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslt_w() { + let a = i32x4::new(-158999818, -1928813163, -140040541, 494178107); + let b = i32x4::new(-1849021639, -756143028, 54274044, 646446450); + let r = i64x2::new(-4294967296, -1); + + assert_eq!(r, transmute(lsx_vslt_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslt_d() { + let a = i64x2::new(-179055155347449719, 6182805737835801255); + let b = i64x2::new(1481173131774551907, 270656941607020532); + let r = i64x2::new(-1, 0); + + assert_eq!(r, transmute(lsx_vslt_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslti_h() { + let a = i16x8::new(-8902, 5527, 17224, -27356, 4424, 28839, 29975, 18805); + let r = i64x2::new(-281474976645121, 0); + + assert_eq!(r, transmute(lsx_vslti_h::<14>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslti_w() { + let a = i32x4::new(995282502, -1964668207, -996118772, 1812234755); + let r = i64x2::new(-4294967296, 4294967295); + + assert_eq!(r, transmute(lsx_vslti_w::<14>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslti_d() { + let a = i64x2::new(1441753618400573134, 3878439049744730841); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vslti_d::<14>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslt_bu() { + let a = u8x16::new( + 55, 192, 87, 242, 253, 133, 53, 76, 135, 6, 39, 64, 82, 182, 147, 19, + ); + let b = u8x16::new( + 108, 77, 229, 137, 242, 115, 152, 252, 99, 101, 44, 100, 58, 120, 101, 22, + ); + let r = i64x2::new(-281474959998721, -72057589742960896); + + assert_eq!(r, transmute(lsx_vslt_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslt_hu() { + let a = u16x8::new(16382, 2642, 8944, 48121, 7472, 49176, 63264, 1135); + let b = u16x8::new(513, 13075, 20319, 44422, 12609, 18638, 20227, 21354); + let r = i64x2::new(281474976645120, -281474976645121); + + assert_eq!(r, transmute(lsx_vslt_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslt_wu() { + let a = u32x4::new(137339688, 2061001419, 2322333619, 2113106148); + let b = u32x4::new(1402243125, 1129899238, 2591537060, 4152171743); + let r = i64x2::new(4294967295, -1); + + assert_eq!(r, transmute(lsx_vslt_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslt_du() { + let a = u64x2::new(15914553432791856307, 11132190561956652500); + let b = u64x2::new(835355141719377733, 10472626544222695938); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vslt_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslti_bu() { + let a = u8x16::new( + 215, 70, 65, 148, 249, 56, 59, 18, 118, 56, 250, 53, 144, 189, 98, 56, + ); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vslti_bu::<7>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslti_hu() { + let a = u16x8::new(60550, 12178, 30950, 44771, 25514, 35987, 55940, 21614); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vslti_hu::<2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslti_wu() { + let a = u32x4::new(912580668, 18660032, 3405726641, 4033549497); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vslti_wu::<8>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslti_du() { + let a = u64x2::new(17196150830761730262, 5893061291971214149); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vslti_du::<14>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsle_b() { + let a = i8x16::new( + 16, 13, 47, 41, 9, -73, 92, 108, -77, -106, -115, -20, 107, -101, -54, 16, + ); + let b = i8x16::new( + 71, 43, 24, 28, 83, 69, -109, -33, 81, 71, -126, -61, -45, -11, -105, -70, + ); + let r = i64x2::new(281470681808895, 280375465148415); + + assert_eq!(r, transmute(lsx_vsle_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsle_h() { + let a = i16x8::new(15130, 12644, -27298, 13979, 28696, -28425, 23806, -20696); + let b = i16x8::new(-30602, -9535, 10944, 3343, -1093, 6600, -19453, -4561); + let r = i64x2::new(281470681743360, -281470681808896); + + assert_eq!(r, transmute(lsx_vsle_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsle_w() { + let a = i32x4::new(-549852719, 335768045, 1882235130, 603655976); + let b = i32x4::new(-1810853975, 2021418524, 215198844, 1124361386); + let r = i64x2::new(-4294967296, -4294967296); + + assert_eq!(r, transmute(lsx_vsle_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsle_d() { + let a = i64x2::new(-5807954019703375704, 7802006580674332206); + let b = i64x2::new(71694374951002423, -4307912969104303925); + let r = i64x2::new(-1, 0); + + assert_eq!(r, transmute(lsx_vsle_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslei_b() { + let a = i8x16::new( + 22, -8, 10, 55, 103, -103, -106, 30, 54, 82, 29, 44, 75, -9, 36, 111, + ); + let r = i64x2::new(72056494526365440, 280375465082880); + + assert_eq!(r, transmute(lsx_vslei_b::<3>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslei_h() { + let a = i16x8::new(31276, -16628, -30006, -20587, 2104, -30062, 18261, -6449); + let r = i64x2::new(-65536, -281470681808896); + + assert_eq!(r, transmute(lsx_vslei_h::<-3>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslei_w() { + let a = i32x4::new(-1890390435, 1289536678, 1490122113, 2120063492); + let r = i64x2::new(4294967295, 0); + + assert_eq!(r, transmute(lsx_vslei_w::<-16>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslei_d() { + let a = i64x2::new(-123539898448811963, 8007480165241051883); + let r = i64x2::new(-1, 0); + + assert_eq!(r, transmute(lsx_vslei_d::<8>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsle_bu() { + let a = u8x16::new( + 156, 210, 61, 51, 143, 107, 237, 69, 241, 117, 66, 79, 161, 68, 22, 152, + ); + let b = u8x16::new( + 83, 68, 27, 36, 209, 74, 204, 32, 123, 97, 44, 82, 238, 202, 133, 107, + ); + let r = i64x2::new(1095216660480, 72057594021150720); + + assert_eq!(r, transmute(lsx_vsle_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsle_hu() { + let a = u16x8::new(57583, 52549, 12485, 59674, 7283, 26602, 6409, 58628); + let b = u16x8::new(50529, 35111, 24746, 62465, 21587, 30574, 11054, 11653); + let r = i64x2::new(-4294967296, 281474976710655); + + assert_eq!(r, transmute(lsx_vsle_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsle_wu() { + let a = u32x4::new(3325048208, 3863618944, 2967312103, 2626474550); + let b = u32x4::new(1321018603, 1091195011, 3525236625, 4061062671); + let r = i64x2::new(0, -1); + + assert_eq!(r, transmute(lsx_vsle_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsle_du() { + let a = u64x2::new(17131200460153340378, 17148253643287276161); + let b = u64x2::new(16044633718831874991, 3531311371811276914); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vsle_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslei_bu() { + let a = u8x16::new( + 33, 181, 170, 160, 192, 237, 16, 175, 82, 65, 186, 46, 143, 9, 37, 35, + ); + let r = i64x2::new(71776119061217280, 280375465082880); + + assert_eq!(r, transmute(lsx_vslei_bu::<18>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslei_hu() { + let a = u16x8::new(1430, 10053, 35528, 28458, 2394, 22098, 40236, 20853); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vslei_hu::<10>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslei_wu() { + let a = u32x4::new(3289026584, 3653636092, 2919866047, 2895662832); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vslei_wu::<2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vslei_du() { + let a = u64x2::new(17462377852989253439, 17741928456729041079); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vslei_du::<12>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsat_b() { + let a = i8x16::new( + -66, 2, -76, 126, 9, -44, -37, -42, 8, 68, -72, 10, 113, 70, 58, 44, + ); + let r = i64x2::new(-2964542792447819074, 3186937137643144200); + + assert_eq!(r, transmute(lsx_vsat_b::<7>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsat_h() { + let a = i16x8::new(-22234, -8008, -23350, 13768, 26313, -27447, -3569, 6025); + let r = i64x2::new(576451960371214336, 576451960371152895); + + assert_eq!(r, transmute(lsx_vsat_h::<11>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsat_w() { + let a = i32x4::new(-84179653, 874415975, 1823119516, 1667850968); + let r = i64x2::new(137438953440, 133143986207); + + assert_eq!(r, transmute(lsx_vsat_w::<5>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsat_d() { + let a = i64x2::new(6859869867233872152, 2514172105675226457); + let r = i64x2::new(262143, 262143); + + assert_eq!(r, transmute(lsx_vsat_d::<18>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsat_bu() { + let a = u8x16::new( + 119, 190, 12, 39, 41, 110, 238, 29, 14, 135, 54, 90, 36, 89, 72, 91, + ); + let r = i64x2::new(2125538672170008439, 6577605268441825038); + + assert_eq!(r, transmute(lsx_vsat_bu::<6>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsat_hu() { + let a = u16x8::new(36681, 34219, 6160, 8687, 4544, 20195, 35034, 916); + let r = i64x2::new(287953294993589247, 257835472485549055); + + assert_eq!(r, transmute(lsx_vsat_hu::<9>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsat_wu() { + let a = u32x4::new(1758000759, 4138051566, 2705324001, 3927640324); + let r = i64x2::new(70364449226751, 70364449226751); + + assert_eq!(r, transmute(lsx_vsat_wu::<13>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsat_du() { + let a = u64x2::new(1953136817312581670, 2606878300382729363); + let r = i64x2::new(9007199254740991, 9007199254740991); + + assert_eq!(r, transmute(lsx_vsat_du::<52>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vadda_b() { + let a = i8x16::new( + -44, -56, -103, -51, 118, -127, -39, -96, -49, 75, -110, 35, 123, -61, 57, 104, + ); + let b = i8x16::new( + 79, 88, -93, 36, 117, -15, -81, -18, -117, -47, -13, 83, -31, -61, 60, 14, + ); + let r = i64x2::new(8248499858970022011, 8535863472581999270); + + assert_eq!(r, transmute(lsx_vadda_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vadda_h() { + let a = i16x8::new(15992, -5603, -27115, -15673, 11461, -31471, -31137, -2291); + let b = i16x8::new(-21543, 21720, 14529, -19143, -28953, 13450, 8037, 29413); + let r = i64x2::new(-8646732423142600033, 8924050915627474398); + + assert_eq!(r, transmute(lsx_vadda_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vadda_w() { + let a = i32x4::new(1188987464, -1693707744, -1561184997, -104072194); + let b = i32x4::new(287041349, 249467792, 312776520, 1314435078); + let r = i64x2::new(8345875378983299469, 6092442344252138029); + + assert_eq!(r, transmute(lsx_vadda_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vadda_d() { + let a = i64x2::new(1747309060022550268, -6715694127559156035); + let b = i64x2::new(-4324432602362661920, 6402427893748093984); + let r = i64x2::new(6071741662385212188, -5328622052402301597); + + assert_eq!(r, transmute(lsx_vadda_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsadd_b() { + let a = i8x16::new( + 6, -114, -40, 76, -8, 4, -110, -105, -104, 86, -27, 68, -102, 108, 113, 76, + ); + let b = i8x16::new( + -47, 102, 105, 84, -127, 70, -116, 57, 66, 47, 74, -35, 61, -85, 48, -50, + ); + let r = i64x2::new(-3422653801050278697, 1909270979770548186); + + assert_eq!(r, transmute(lsx_vsadd_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsadd_h() { + let a = i16x8::new(-25724, -16509, -25895, 31488, -18727, 16765, 3340, 21218); + let b = i16x8::new(26970, 17131, 15547, -7614, -8479, 22338, 3567, -22299); + let r = i64x2::new(6720170624686097630, -304244782337649222); + + assert_eq!(r, transmute(lsx_vsadd_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsadd_w() { + let a = i32x4::new(-1981320133, -1751087788, 1176481176, 253883202); + let b = i32x4::new(-1026388582, 222487110, 501504960, -1863994162); + let r = i64x2::new(-6565289918505943040, -6915373914453178024); + + assert_eq!(r, transmute(lsx_vsadd_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsadd_d() { + let a = i64x2::new(-1967787987610391555, -8103697759704177767); + let b = i64x2::new(-6599608819082608284, -5088169537193133686); + let r = i64x2::new(-8567396806692999839, -9223372036854775808); + + assert_eq!(r, transmute(lsx_vsadd_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsadd_bu() { + let a = u8x16::new( + 182, 156, 225, 235, 23, 111, 224, 152, 158, 254, 143, 58, 230, 188, 119, 239, + ); + let b = u8x16::new( + 40, 219, 72, 211, 12, 37, 59, 28, 206, 173, 87, 21, 125, 229, 110, 102, + ); + let r = i64x2::new(-5404438145481572386, -7318352348905473); + + assert_eq!(r, transmute(lsx_vsadd_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsadd_hu() { + let a = u16x8::new(52962, 42889, 37893, 55695, 51804, 38647, 13774, 40745); + let b = u16x8::new(31219, 59227, 25607, 62798, 18845, 3238, 19902, 24978); + let r = i64x2::new(-8740258447361, -136834913009665); + + assert_eq!(r, transmute(lsx_vsadd_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsadd_wu() { + let a = u32x4::new(1617769210, 1445524000, 4168062781, 912440538); + let b = u32x4::new(3676524021, 3894343575, 904432536, 1616820031); + let r = i64x2::new(-1, -7583652642497232897); + + assert_eq!(r, transmute(lsx_vsadd_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsadd_du() { + let a = u64x2::new(3740778533337193809, 14274264382641271168); + let b = u64x2::new(11054638512585704882, 3549000132135395099); + let r = i64x2::new(-3651327027786652925, -623479558932885349); + + assert_eq!(r, transmute(lsx_vsadd_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavg_b() { + let a = i8x16::new( + 117, 127, 54, 98, -91, 42, 42, 76, 29, 63, -21, 26, -77, -7, -81, 78, + ); + let b = i8x16::new( + 30, 62, -76, -20, 127, 89, -99, -82, 69, -114, 84, 80, -78, -102, -107, 43, + ); + let r = i64x2::new(-152206416164856247, 4369276355735447089); + + assert_eq!(r, transmute(lsx_vavg_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavg_h() { + let a = i16x8::new(-12604, -917, -12088, 13367, -2577, -1073, 1365, -25654); + let b = i16x8::new(-3088, -25854, -32552, -8417, 7808, -12495, 22032, -5168); + let r = i64x2::new(696836182083297626, -4337760619710117321); + + assert_eq!(r, transmute(lsx_vavg_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavg_w() { + let a = i32x4::new(826230751, 1801449269, -284345024, 1777295732); + let b = i32x4::new(-324844828, -1580060766, -1909832882, 328273785); + let r = i64x2::new(475428188150908257, 4521676108535152711); + + assert_eq!(r, transmute(lsx_vavg_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavg_d() { + let a = i64x2::new(1486723108337487211, 6178549804180384276); + let b = i64x2::new(3169904420607189220, 5159962511251707672); + let r = i64x2::new(2328313764472338215, 5669256157716045974); + + assert_eq!(r, transmute(lsx_vavg_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavg_bu() { + let a = u8x16::new( + 84, 85, 64, 60, 241, 96, 145, 145, 51, 253, 205, 150, 135, 87, 248, 55, + ); + let b = u8x16::new( + 179, 216, 158, 135, 196, 75, 59, 209, 8, 58, 142, 152, 16, 220, 199, 21, + ); + let r = i64x2::new(-5663745084945885565, 2801126043194071837); + + assert_eq!(r, transmute(lsx_vavg_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavg_hu() { + let a = u16x8::new(46978, 53346, 32276, 58377, 57638, 42860, 43999, 59924); + let b = u16x8::new(44835, 36733, 12115, 42874, 4819, 12201, 27397, 25394); + let r = i64x2::new(-4196978047981735086, -6439149718662907396); + + assert_eq!(r, transmute(lsx_vavg_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavg_wu() { + let a = u32x4::new(529045804, 31575520, 1599127613, 3465214369); + let b = u32x4::new(160886383, 26081142, 459122380, 2523086630); + let r = i64x2::new(123816739188229069, -5586965600173345916); + + assert_eq!(r, transmute(lsx_vavg_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavg_du() { + let a = u64x2::new(11603952465622489487, 9916150703735650033); + let b = u64x2::new(9749063966076740681, 5963120178993456389); + let r = i64x2::new(-7770235857859936532, 7939635441364553211); + + assert_eq!(r, transmute(lsx_vavg_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavgr_b() { + let a = i8x16::new( + 42, -6, 89, -102, -107, 103, 13, -3, -19, -93, 0, 0, -17, 70, 54, 86, + ); + let b = i8x16::new( + 8, -32, -122, 22, -94, 44, 58, 54, -26, -34, -21, 27, -111, -96, -68, -122, + ); + let r = i64x2::new(1883712581662731545, -1226681417271426582); + + assert_eq!(r, transmute(lsx_vavgr_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavgr_h() { + let a = i16x8::new(-6008, 3940, -4691, -4052, 15265, -7180, 976, 11656); + let b = i16x8::new(-9758, -8332, 20577, 31066, 31120, 14788, -22323, 16722); + let r = i64x2::new(3801916629507170613, 3994084079587580569); + + assert_eq!(r, transmute(lsx_vavgr_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavgr_w() { + let a = i32x4::new(-518881442, 2037406651, -1244322310, -1948025633); + let b = i32x4::new(1278058715, -155858446, -195547847, -750518746); + let r = i64x2::new(4040594005688324125, -5795079921582298726); + + assert_eq!(r, transmute(lsx_vavgr_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavgr_d() { + let a = i64x2::new(-1958143381023430514, 3633380184275298119); + let b = i64x2::new(8758126674980055299, -7441643514470614533); + let r = i64x2::new(3399991646978312393, -1904131665097658207); + + assert_eq!(r, transmute(lsx_vavgr_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavgr_bu() { + let a = u8x16::new( + 205, 114, 125, 237, 6, 194, 197, 217, 10, 191, 130, 30, 247, 116, 199, 100, + ); + let b = u8x16::new( + 6, 139, 195, 209, 115, 27, 109, 34, 91, 48, 166, 147, 170, 83, 9, 65, + ); + let r = i64x2::new(9122444831751176042, 6010164553039771699); + + assert_eq!(r, transmute(lsx_vavgr_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavgr_hu() { + let a = u16x8::new(49326, 55416, 46414, 26192, 61759, 37293, 22943, 26741); + let b = u16x8::new(26111, 34713, 61420, 23702, 29204, 9543, 62786, 7043); + let r = i64x2::new(7022187818705851223, 4754859411904311722); + + assert_eq!(r, transmute(lsx_vavgr_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavgr_wu() { + let a = u32x4::new(3560278529, 2406185766, 3420917939, 1379681517); + let b = u32x4::new(1930150361, 3668628165, 2983921396, 2410913126); + let r = i64x2::new(-5401180487351753235, 8140240017388800980); + + assert_eq!(r, transmute(lsx_vavgr_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vavgr_du() { + let a = u64x2::new(3442342130569215862, 4810216499730807927); + let b = u64x2::new(8650759135311802962, 11380630663742852932); + let r = i64x2::new(6046550632940509412, 8095423581736830430); + + assert_eq!(r, transmute(lsx_vavgr_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssub_b() { + let a = i8x16::new( + 49, 58, 94, 93, 7, 40, -34, 27, 75, -67, -71, 2, -117, -22, 78, -78, + ); + let b = i8x16::new( + -104, 71, -79, -113, 21, 34, 36, 19, 92, 32, -77, 91, 28, -43, -69, 62, + ); + let r = i64x2::new(628822736562549631, -9187601072510296593); + + assert_eq!(r, transmute(lsx_vssub_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssub_h() { + let a = i16x8::new(14676, -4176, 31759, -22564, 6643, 20831, 15260, 18518); + let b = i16x8::new(-26027, 6118, -13204, 25080, 12458, 8441, 24701, 11617); + let r = i64x2::new(-9223231300041015297, 1942699741282756937); + + assert_eq!(r, transmute(lsx_vssub_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssub_w() { + let a = i32x4::new(-359085176, -924784873, 1280567100, 1138686008); + let b = i32x4::new(-1808829767, 2144666490, 146236682, 1180114488); + let r = i64x2::new(-9223372035405031217, -177933965588659662); + + assert_eq!(r, transmute(lsx_vssub_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssub_d() { + let a = i64x2::new(628092957162650618, 1527439654680677883); + let b = i64x2::new(-2293337525465880409, 5736255249834646932); + let r = i64x2::new(2921430482628531027, -4208815595153969049); + + assert_eq!(r, transmute(lsx_vssub_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssub_bu() { + let a = u8x16::new( + 198, 146, 80, 65, 122, 45, 61, 106, 212, 129, 170, 111, 183, 102, 130, 148, + ); + let b = u8x16::new( + 16, 110, 145, 170, 113, 220, 82, 86, 9, 255, 200, 230, 204, 22, 213, 203, + ); + let r = i64x2::new(1441151919413273782, 87960930222283); + + assert_eq!(r, transmute(lsx_vssub_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssub_hu() { + let a = u16x8::new(62355, 31259, 41090, 62278, 449, 36606, 38644, 57485); + let b = u16x8::new(50468, 33060, 15257, 59071, 59343, 21993, 42978, 20097); + let r = i64x2::new(902801202201243247, -7922957643493867520); + + assert_eq!(r, transmute(lsx_vssub_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssub_wu() { + let a = u32x4::new(360162968, 3504892941, 1150347916, 2195977376); + let b = u32x4::new(31483972, 3489479082, 152079374, 1875131600); + let r = i64x2::new(66202020638834260, 1378022115978010238); + + assert_eq!(r, transmute(lsx_vssub_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssub_du() { + let a = u64x2::new(14887776146288736271, 417684393846230822); + let b = u64x2::new(6460869225596371206, 16765308520486969885); + let r = i64x2::new(8426906920692365065, 0); + + assert_eq!(r, transmute(lsx_vssub_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vabsd_b() { + let a = i8x16::new( + -80, -35, -110, -126, -9, -18, -111, -50, -68, 115, -53, 79, -35, 102, -85, 68, + ); + let b = i8x16::new( + 85, -87, -91, 4, -102, 47, 70, 8, -16, 86, -14, -127, 2, -58, 10, 39, + ); + let r = i64x2::new(4230359294854509733, 2116586434120326452); + + assert_eq!(r, transmute(lsx_vabsd_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vabsd_h() { + let a = i16x8::new(-9487, 3116, 31071, -3514, -4374, 29502, 15788, 8887); + let b = i16x8::new(9346, 27961, 21592, 10762, -6831, 17219, 14968, -1750); + let r = i64x2::new(4018377481144584593, 2994052849949411737); + + assert_eq!(r, transmute(lsx_vabsd_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vabsd_w() { + let a = i32x4::new(1772435833, -142335623, -905419863, -1391379125); + let b = i32x4::new(-638463360, -1154268425, 818053243, -1766966029); + let r = i64x2::new(4346218292750542585, 1613133471209364690); + + assert_eq!(r, transmute(lsx_vabsd_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vabsd_d() { + let a = i64x2::new(-1345697660428932390, -6981332546532147421); + let b = i64x2::new(-8533946706796471089, 1165272962517390961); + let r = i64x2::new(7188249046367538699, 8146605509049538382); + + assert_eq!(r, transmute(lsx_vabsd_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vabsd_bu() { + let a = u8x16::new( + 3, 31, 230, 199, 201, 67, 112, 189, 15, 214, 56, 113, 214, 23, 217, 54, + ); + let b = u8x16::new( + 207, 196, 133, 201, 150, 94, 74, 221, 222, 61, 222, 248, 105, 208, 154, 128, + ); + let r = i64x2::new(2316568964225934796, 5350198762417854927); + + assert_eq!(r, transmute(lsx_vabsd_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vabsd_hu() { + let a = u16x8::new(30314, 20737, 52964, 57347, 14004, 37245, 9170, 22466); + let b = u16x8::new(42102, 40052, 6807, 16289, 29686, 38061, 42843, 26642); + let r = i64x2::new(-6889746235852116468, 1175584127230950722); + + assert_eq!(r, transmute(lsx_vabsd_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vabsd_wu() { + let a = u32x4::new(1481954749, 4094293310, 3199531334, 4211151920); + let b = u32x4::new(3008439409, 976530727, 1726048801, 4235308512); + let r = i64x2::new(-5056055741505581388, 103751774096297765); + + assert_eq!(r, transmute(lsx_vabsd_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vabsd_du() { + let a = u64x2::new(14212221485552223583, 1471016340493959617); + let b = u64x2::new(305704565845198935, 18327726360649467511); + let r = i64x2::new(-4540227154002526968, -1590034053554043722); + + assert_eq!(r, transmute(lsx_vabsd_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmul_b() { + let a = i8x16::new( + -108, -77, -99, -81, 97, 59, -58, 100, 104, -89, -58, -96, -25, 125, 127, -61, + ); + let b = i8x16::new( + 64, 109, -119, -124, -55, -11, -90, -123, 72, -18, 83, 46, 102, -25, -11, 27, + ); + let r = i64x2::new(-836412611799730432, -7959044669412588992); + + assert_eq!(r, transmute(lsx_vmul_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmul_h() { + let a = i16x8::new(20255, 19041, 15158, 5077, -29421, -8508, 6583, -968); + let b = i16x8::new(-18582, -25667, 17674, 8424, -17121, -21798, 28934, -353); + let r = i64x2::new(-7419436171490628650, 3947512047518358605); + + assert_eq!(r, transmute(lsx_vmul_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmul_w() { + let a = i32x4::new(1875532791, -2038975148, 754073945, 1245315915); + let b = i32x4::new(1754730718, 782084571, 894216679, -1895747372); + let r = i64x2::new(6602438528086061106, 4680306660704041039); + + assert_eq!(r, transmute(lsx_vmul_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmul_d() { + let a = i64x2::new(-4093110041189429887, 5371368149814248867); + let b = i64x2::new(8096709215426138432, -5454415917204378153); + let r = i64x2::new(-1062747544199352000, -649255846668983579); + + assert_eq!(r, transmute(lsx_vmul_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmadd_b() { + let a = i8x16::new( + 60, 90, -59, 50, 52, 30, -124, 62, -71, -71, -38, 22, 6, -18, 93, 102, + ); + let b = i8x16::new( + 22, 41, -112, 44, -93, -82, 11, -47, 37, -120, -108, 33, -66, 27, -74, -2, + ); + let c = i8x16::new( + 103, 59, 65, -2, -55, 98, -11, 85, 84, 50, -17, 14, -19, 120, 7, -90, + ); + let r = i64x2::new(-6698055306094195434, 1898151712142019037); + + assert_eq!( + r, + transmute(lsx_vmadd_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmadd_h() { + let a = i16x8::new(24257, 11879, -5695, -12734, -31748, 30664, 11820, 3259); + let b = i16x8::new(23734, 11732, -14134, -26857, 30756, 2629, 25687, 15749); + let c = i16x8::new(-9000, -804, 10411, 17571, -4985, -22809, -5536, -1762); + let r = i64x2::new(2154858825190408273, -6966693911367840008); + + assert_eq!( + r, + transmute(lsx_vmadd_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmadd_w() { + let a = i32x4::new(1344709991, 1633778942, 1825268167, 917193207); + let b = i32x4::new(147354288, -1478483633, -941638228, -173023515); + let c = i32x4::new(-1301057792, -1104623642, -1440212635, -8186971); + let r = i64x2::new(4970798576846304615, -3981205637140381021); + + assert_eq!( + r, + transmute(lsx_vmadd_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmadd_d() { + let a = i64x2::new(-7021558423493045864, 7607197079929138141); + let b = i64x2::new(-7461017148544541027, -326746346508808472); + let c = i64x2::new(9019083511238971943, 8084580083589700502); + let r = i64x2::new(-7790478971542305405, -5909066061947936819); + + assert_eq!( + r, + transmute(lsx_vmadd_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmsub_b() { + let a = i8x16::new( + -114, -46, 82, -75, -22, 31, 79, 84, -108, -13, -40, -121, -2, -20, 75, -35, + ); + let b = i8x16::new( + -29, 61, -62, 87, -22, 53, 51, 24, -27, -74, 119, -20, 21, 5, 14, -92, + ); + let c = i8x16::new( + -57, 111, 112, -66, 100, -31, -70, -71, 92, 63, 108, 61, -115, 17, -75, 16, + ); + let r = i64x2::new(-269782211120439527, -7105106341430810296); + + assert_eq!( + r, + transmute(lsx_vmsub_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmsub_h() { + let a = i16x8::new(28727, 27408, -23829, -25297, 24892, 31830, -2674, -17919); + let b = i16x8::new(6329, 13060, 18913, 18407, 28125, -26009, -14135, 22627); + let c = i16x8::new(26144, 29029, 6084, 10072, 21090, -4197, 21706, -19485); + let r = i64x2::new(-5420122113954766057, 2393824782223771810); + + assert_eq!( + r, + transmute(lsx_vmsub_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmsub_w() { + let a = i32x4::new(385413537, 143148625, 1902013465, -1637986171); + let b = i32x4::new(-1124183308, 1253368192, 1310051041, -750553442); + let c = i32x4::new(921070544, 1408695249, -136396947, -1525372302); + let r = i64x2::new(-9168294401733980319, -6685995888074347700); + + assert_eq!( + r, + transmute(lsx_vmsub_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmsub_d() { + let a = i64x2::new(-5022267712807149796, 8788062746333130381); + let b = i64x2::new(594946727227821886, -4907188100068238790); + let c = i64x2::new(-5753096081940451712, 2150588928473907718); + let r = i64x2::new(-734195902542963684, -4942536302810424015); + + assert_eq!( + r, + transmute(lsx_vmsub_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vdiv_b() { + let a = i8x16::new( + 56, 78, 12, -67, -45, -79, 3, -81, 85, 97, 41, -86, 106, -102, 35, 59, + ); + let b = i8x16::new( + 48, -92, -93, -74, -32, 113, 86, -8, -99, -21, -14, -19, 124, -113, 29, -120, + ); + let r = i64x2::new(720575944674246657, 281475060530176); + + assert_eq!(r, transmute(lsx_vdiv_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vdiv_h() { + let a = i16x8::new(17409, -1878, -20289, -20815, 23275, 32438, 27688, 29943); + let b = i16x8::new(-11221, 24673, 19931, 3799, -3251, -21373, -13758, -31286); + let r = i64x2::new(-1125904201744385, 281470681743353); + + assert_eq!(r, transmute(lsx_vdiv_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vdiv_w() { + let a = i32x4::new(912619458, 297234237, 1790081728, 1556369143); + let b = i32x4::new(-775731190, 1887886939, 1001718213, 1135075421); + let r = i64x2::new(4294967295, 4294967297); + + assert_eq!(r, transmute(lsx_vdiv_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vdiv_d() { + let a = i64x2::new(8060378764891126625, 720122833079320324); + let b = i64x2::new(-9175012156877545557, -6390704898809702209); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vdiv_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vdiv_bu() { + let a = u8x16::new( + 153, 216, 32, 99, 9, 152, 44, 162, 131, 155, 164, 32, 248, 152, 88, 220, + ); + let b = u8x16::new( + 27, 125, 253, 245, 104, 196, 141, 201, 107, 65, 51, 126, 107, 90, 130, 185, + ); + let r = i64x2::new(261, 72058702139687425); + + assert_eq!(r, transmute(lsx_vdiv_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vdiv_hu() { + let a = u16x8::new(47825, 17349, 21777, 60576, 31104, 31380, 8974, 51905); + let b = u16x8::new(25282, 44917, 13706, 63351, 58837, 46710, 29092, 57823); + let r = i64x2::new(4294967297, 0); + + assert_eq!(r, transmute(lsx_vdiv_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vdiv_wu() { + let a = u32x4::new(1861719625, 952645030, 2402876315, 3695614684); + let b = u32x4::new(1130189258, 1211056894, 2357258312, 3855913706); + let r = i64x2::new(1, 1); + + assert_eq!(r, transmute(lsx_vdiv_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vdiv_du() { + let a = u64x2::new(7958239212167095743, 5349587769754015194); + let b = u64x2::new(14945948123666054968, 10864054932328247404); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vdiv_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhaddw_h_b() { + let a = i8x16::new( + 33, -91, 3, -119, 28, -34, -19, -51, 41, -83, 102, 116, 45, 50, -94, 121, + ); + let b = i8x16::new( + 49, 50, 108, -49, -44, -25, 99, 7, -101, 39, -125, 11, -21, -99, -123, 29, + ); + let r = i64x2::new(13791943145684950, -562821104926904); + + assert_eq!(r, transmute(lsx_vhaddw_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhaddw_w_h() { + let a = i16x8::new(-20323, -26647, 21748, 24233, 27893, -27604, 16391, 14873); + let b = i16x8::new( + -10851, -15249, -11124, -22012, -32205, -17044, 27739, -19038, + ); + let r = i64x2::new(56307021213062, 183021441324639); + + assert_eq!(r, transmute(lsx_vhaddw_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhaddw_d_w() { + let a = i32x4::new(1127296124, -1382562520, -1791538949, 534516309); + let b = i32x4::new(-1119468785, -1334232049, -1752131604, -2016112631); + let r = i64x2::new(-2502031305, -1217615295); + + assert_eq!(r, transmute(lsx_vhaddw_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhaddw_hu_bu() { + let a = u8x16::new( + 72, 148, 45, 246, 151, 252, 69, 31, 91, 247, 215, 57, 125, 49, 141, 27, + ); + let b = u8x16::new( + 76, 120, 158, 172, 253, 12, 131, 16, 18, 131, 114, 207, 1, 100, 48, 141, + ); + let r = i64x2::new(45601115212087520, 21110838012870921); + + assert_eq!(r, transmute(lsx_vhaddw_hu_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhaddw_wu_hu() { + let a = u16x8::new(46665, 29041, 34462, 31370, 18289, 12579, 33777, 52188); + let b = u16x8::new(40369, 53005, 64424, 35720, 9231, 19965, 20662, 8208); + let r = i64x2::new(411432097222434, 312888367535410); + + assert_eq!(r, transmute(lsx_vhaddw_wu_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhaddw_du_wu() { + let a = u32x4::new(3058953381, 3443284865, 3364703869, 2180288462); + let b = u32x4::new(728838120, 1267673009, 2659634151, 2264611356); + let r = i64x2::new(4172122985, 4839922613); + + assert_eq!(r, transmute(lsx_vhaddw_du_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhsubw_h_b() { + let a = i8x16::new( + 20, -94, 56, 36, -78, -53, -65, 62, -23, 3, -26, 16, -36, 92, -87, -21, + ); + let b = i8x16::new( + -45, -92, 19, 45, -108, 44, 78, -127, -49, 23, -6, -3, 24, -8, 90, 51, + ); + let r = i64x2::new(-4503363402989617, -31243430355664844); + + assert_eq!(r, transmute(lsx_vhsubw_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhsubw_w_h() { + let a = i16x8::new(-32636, -15640, 17489, 24551, 28768, 8187, -7376, -16756); + let b = i16x8::new(-14204, -13312, 8240, -4455, -6362, -4711, -30790, -15773); + let r = i64x2::new(70059506530916, 60275571046613); + + assert_eq!(r, transmute(lsx_vhsubw_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhsubw_d_w() { + let a = i32x4::new(-1518455529, -1873161613, -1441786902, 713965134); + let b = i32x4::new(-1671723008, 870456702, 264823818, 13322401); + let r = i64x2::new(-201438605, 449141316); + + assert_eq!(r, transmute(lsx_vhsubw_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhsubw_hu_bu() { + let a = u8x16::new( + 67, 78, 163, 156, 17, 58, 245, 19, 180, 161, 166, 207, 240, 5, 221, 157, + ); + let b = u8x16::new( + 122, 131, 70, 56, 162, 5, 241, 241, 43, 5, 7, 236, 195, 26, 6, 17, + ); + let r = i64x2::new(-62206416523952172, 42783380429340790); + + assert_eq!(r, transmute(lsx_vhsubw_hu_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhsubw_wu_hu() { + let a = u16x8::new(48161, 61606, 48243, 42252, 5643, 40672, 13711, 1172); + let b = u16x8::new(5212, 32159, 36502, 59290, 7604, 229, 35511, 47443); + let r = i64x2::new(24696062008394, -147484881944276); + + assert_eq!(r, transmute(lsx_vhsubw_wu_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhsubw_du_wu() { + let a = u32x4::new(2721083043, 781151638, 4268150742, 392308867); + let b = u32x4::new(1383087137, 2403951939, 360532131, 3513614550); + let r = i64x2::new(-601935499, 31776736); + + assert_eq!(r, transmute(lsx_vhsubw_du_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmod_b() { + let a = i8x16::new( + -89, -117, 89, -114, -65, 67, -20, 38, -38, -118, 30, 91, -16, -100, -109, -35, + ); + let b = i8x16::new( + 94, -92, -13, 26, -6, -121, 39, -114, 74, -108, 95, 108, -65, -21, 67, 92, + ); + let r = i64x2::new(2804691417388804007, -2461515231199824166); + + assert_eq!(r, transmute(lsx_vmod_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmod_h() { + let a = i16x8::new(-29453, 12108, 10947, 28516, 4854, 1994, -30042, -18472); + let b = i16x8::new(1550, 9221, -12080, 14553, -24847, 28286, 1074, 192); + let r = i64x2::new(3930282117007147005, -10982007906888970); + + assert_eq!(r, transmute(lsx_vmod_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmod_w() { + let a = i32x4::new(-2061299866, -1170666395, -1617297141, 594549537); + let b = i32x4::new(344507881, 1692387020, -1397506903, -1257953510); + let r = i64x2::new(-5027973877095011085, 2553570821342119010); + + assert_eq!(r, transmute(lsx_vmod_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmod_d() { + let a = i64x2::new(-6018318621764124581, -5715738494441059378); + let b = i64x2::new(4636642606889723746, -259899475747531088); + let r = i64x2::new(-1381676014874400835, -257849503742906530); + + assert_eq!(r, transmute(lsx_vmod_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmod_bu() { + let a = u8x16::new( + 122, 163, 72, 171, 64, 10, 201, 101, 196, 162, 190, 86, 253, 173, 221, 65, + ); + let b = u8x16::new( + 186, 243, 157, 205, 48, 190, 55, 245, 72, 203, 140, 64, 8, 25, 252, 227, + ); + let r = i64x2::new(7287961163701724026, 4745974892933063220); + + assert_eq!(r, transmute(lsx_vmod_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmod_hu() { + let a = u16x8::new(26509, 32785, 35218, 8560, 18289, 13375, 35585, 60973); + let b = u16x8::new(15317, 24954, 61354, 3720, 21471, 6193, 8193, 35745); + let r = i64x2::new(315403234587388856, 7101062794264266609); + + assert_eq!(r, transmute(lsx_vmod_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmod_wu() { + let a = u32x4::new(3940871454, 2498938081, 2241198148, 777660345); + let b = u32x4::new(49228057, 2249712923, 358897384, 1782599598); + let r = i64x2::new(1070413902953059662, 3340025749258890964); + + assert_eq!(r, transmute(lsx_vmod_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmod_du() { + let a = u64x2::new(7747010922784437137, 16089799939101946183); + let b = u64x2::new(16850073055169051895, 16069565262862467484); + let r = i64x2::new(7747010922784437137, 20234676239478699); + + assert_eq!(r, transmute(lsx_vmod_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplve_b() { + let a = i8x16::new( + -62, -110, -89, -84, -11, -37, 90, -28, -41, -37, -53, 123, -55, 22, 20, -80, + ); + let r = i64x2::new(-2893606913523066921, -2893606913523066921); + + assert_eq!(r, transmute(lsx_vreplve_b(transmute(a), -8))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplve_h() { + let a = i16x8::new(-29429, -23495, 8705, -7614, -25353, 11887, -25989, -12818); + let r = i64x2::new(-3607719825936298514, -3607719825936298514); + + assert_eq!(r, transmute(lsx_vreplve_h(transmute(a), 7))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplve_w() { + let a = i32x4::new(1584940676, 95787593, -1655264847, 682404402); + let r = i64x2::new(411404579393346121, 411404579393346121); + + assert_eq!(r, transmute(lsx_vreplve_w(transmute(a), -3))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplve_d() { + let a = i64x2::new(7614424214598615675, -7096892795239148002); + let r = i64x2::new(7614424214598615675, 7614424214598615675); + + assert_eq!(r, transmute(lsx_vreplve_d(transmute(a), 0))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplvei_b() { + let a = i8x16::new( + 62, -120, 10, 58, 124, -30, 57, -78, -114, 6, -39, 46, 58, -72, -44, 21, + ); + let r = i64x2::new(-2097865012304223518, -2097865012304223518); + + assert_eq!(r, transmute(lsx_vreplvei_b::<5>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplvei_h() { + let a = i16x8::new(-15455, -4410, 5029, 25863, -23170, 26570, 27423, -834); + let r = i64x2::new(7719006069021698847, 7719006069021698847); + + assert_eq!(r, transmute(lsx_vreplvei_h::<6>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplvei_w() { + let a = i32x4::new(1843143434, 491125746, -328585251, -1996512058); + let r = i64x2::new(7916240772710277898, 7916240772710277898); + + assert_eq!(r, transmute(lsx_vreplvei_w::<0>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplvei_d() { + let a = i64x2::new(4333963848299154309, -8310246545782080694); + let r = i64x2::new(-8310246545782080694, -8310246545782080694); + + assert_eq!(r, transmute(lsx_vreplvei_d::<1>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickev_b() { + let a = i8x16::new( + 89, 84, -94, 3, 41, -86, -10, 120, 62, -102, 44, -88, 12, -75, -13, 65, + ); + let b = i8x16::new( + -31, 44, -76, -76, 52, -71, 44, -110, -4, 124, -38, 76, 108, 43, 54, 60, + ); + let r = i64x2::new(3921750152141124833, -933322373843017127); + + assert_eq!(r, transmute(lsx_vpickev_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickev_h() { + let a = i16x8::new(-5994, -14344, -28338, -25788, 5710, 1638, 494, -2554); + let b = i16x8::new(-5248, -1786, -21768, 23214, -4223, 23538, -24936, -32316); + let r = i64x2::new(-7018596679058658432, 139073165196191894); + + assert_eq!(r, transmute(lsx_vpickev_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickev_w() { + let a = i32x4::new(548489620, -968269400, -179106837, -1739507044); + let b = i32x4::new(-1187277846, -787064901, -980229113, 1746235326); + let r = i64x2::new(-4210051979814398998, -769258006856513132); + + assert_eq!(r, transmute(lsx_vpickev_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickev_d() { + let a = i64x2::new(1789073368466131160, 9168587701455881156); + let b = i64x2::new(6574352346370076190, -3979792156310826694); + let r = i64x2::new(6574352346370076190, 1789073368466131160); + + assert_eq!(r, transmute(lsx_vpickev_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickod_b() { + let a = i8x16::new( + -125, 4, -27, 25, 117, 98, -51, -93, -37, 110, -127, 115, 114, -108, 74, -85, + ); + let b = i8x16::new( + 93, -72, 89, 104, 84, 15, 77, 74, 91, -34, 118, -108, 13, 21, 105, 114, + ); + let r = i64x2::new(8220640377280882872, -6083110277645985532); + + assert_eq!(r, transmute(lsx_vpickod_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickod_h() { + let a = i16x8::new(1454, -18740, 13146, 10497, 4897, 31962, 19208, 21910); + let b = i16x8::new(12047, 25024, -10709, -28077, 24357, 19934, 10289, 28546); + let r = i64x2::new(8035070303515402688, 6167254016163165900); + + assert_eq!(r, transmute(lsx_vpickod_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickod_w() { + let a = i32x4::new(869069429, -1916930406, 1864611728, -1640302268); + let b = i32x4::new(-99240403, 314407358, 543396756, 1976776696); + let r = i64x2::new(8490191261129341374, -7045044594236590438); + + assert_eq!(r, transmute(lsx_vpickod_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickod_d() { + let a = i64x2::new(7031942541839550339, -7578696032343374601); + let b = i64x2::new(-4197243771252175958, -543692393753629390); + let r = i64x2::new(-543692393753629390, -7578696032343374601); + + assert_eq!(r, transmute(lsx_vpickod_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vilvh_b() { + let a = i8x16::new( + -58, -103, -5, 33, 124, -24, -18, 20, 22, -100, -6, 16, 40, 89, -41, -37, + ); + let b = i8x16::new( + -42, 76, 46, -4, 67, 45, 99, -7, 63, 20, 113, -50, 67, -23, -20, 112, + ); + let r = i64x2::new(1211180715666052671, -2634368371891034045); + + assert_eq!(r, transmute(lsx_vilvh_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vilvh_h() { + let a = i16x8::new(24338, 259, -22693, 16519, -28272, -16751, 1883, 16217); + let b = i16x8::new(23768, -31845, 28689, 14757, 9499, 7795, -13573, -10011); + let r = i64x2::new(-4714953853167983333, 4564918175499275003); + + assert_eq!(r, transmute(lsx_vilvh_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vilvh_w() { + let a = i32x4::new(-968342074, -1976160649, -1249304918, -279518364); + let b = i32x4::new(-737076987, 38515006, 602108871, -63099569); + let r = i64x2::new(-5365723764939852857, -1200522227779556017); + + assert_eq!(r, transmute(lsx_vilvh_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vilvh_d() { + let a = i64x2::new(2505149669372896333, 5375050218784453679); + let b = i64x2::new(-2160658667838026389, 1449429407527660400); + let r = i64x2::new(1449429407527660400, 5375050218784453679); + + assert_eq!(r, transmute(lsx_vilvh_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vilvl_b() { + let a = i8x16::new( + 57, 109, 61, 96, 101, 69, -42, 118, 112, -17, 63, 68, -54, 32, 17, -122, + ); + let b = i8x16::new( + -48, -30, -102, 100, -3, 85, 100, 46, 82, 67, -20, -56, 93, 96, -39, 108, + ); + let r = i64x2::new(6945744258789947856, 8515979671552484861); + + assert_eq!(r, transmute(lsx_vilvl_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vilvl_h() { + let a = i16x8::new(28844, -23308, 4163, -8033, 12472, -16423, 14534, 31242); + let b = i16x8::new(11601, 6788, 3174, -4208, -25999, -25660, -4591, 7133); + let r = i64x2::new(-6560589601043632815, -2260825085889541018); + + assert_eq!(r, transmute(lsx_vilvl_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vilvl_w() { + let a = i32x4::new(-997094955, 1731171907, 1528236839, -646874689); + let b = i32x4::new(486029703, 1245981961, 112180197, 1939621508); + let r = i64x2::new(-4282490222245561977, 7435326725564935433); + + assert_eq!(r, transmute(lsx_vilvl_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vilvl_d() { + let a = i64x2::new(7063413230460842607, -4234618008113981723); + let b = i64x2::new(3142531875873363679, 736682102982019415); + let r = i64x2::new(3142531875873363679, 7063413230460842607); + + assert_eq!(r, transmute(lsx_vilvl_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpackev_b() { + let a = i8x16::new( + 63, 38, -47, 98, 19, 68, -27, 1, 108, 65, 108, 31, -102, 37, -27, 50, + ); + let b = i8x16::new( + 59, 11, -44, 73, -74, -15, 61, 17, -37, 117, -39, 28, 38, 49, -34, -86, + ); + let r = i64x2::new(-1928363389519380677, -1882898104368665381); + + assert_eq!(r, transmute(lsx_vpackev_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpackev_h() { + let a = i16x8::new(26574, -30949, 26762, -28439, 5382, -25386, 5192, -9816); + let b = i16x8::new(-9444, 5210, -14402, 17972, 16606, 2450, 5123, 14727); + let r = i64x2::new(7533052947329899292, 1461440082551914718); + + assert_eq!(r, transmute(lsx_vpackev_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpackev_w() { + let a = i32x4::new(1312465803, -1752635324, -1943199176, -362848304); + let b = i32x4::new(-872903277, 1255047449, -2110158279, 682925573); + let r = i64x2::new(5636997704425442707, -8345976908349339079); + + assert_eq!(r, transmute(lsx_vpackev_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpackev_d() { + let a = i64x2::new(7118943335298607169, 3038173153862744209); + let b = i64x2::new(-9119315954224042738, -4563700463464702181); + let r = i64x2::new(-9119315954224042738, 7118943335298607169); + + assert_eq!(r, transmute(lsx_vpackev_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpackod_b() { + let a = i8x16::new( + 94, -48, 43, -58, -47, 27, -33, 60, 50, -38, 41, -41, 76, -46, 103, -60, + ); + let b = i8x16::new( + -117, -11, 72, -9, -99, -52, -102, -22, -7, -8, 8, -65, 101, 29, 86, 27, + ); + let r = i64x2::new(4389351353151377653, -4315624792288929032); + + assert_eq!(r, transmute(lsx_vpackod_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpackod_h() { + let a = i16x8::new(-18827, 19151, 4246, -15752, -1028, 29166, 3421, -32610); + let b = i16x8::new(-23247, 17928, -13353, -20146, 5696, 22071, -10728, -30262); + let r = i64x2::new(-4433598883325590008, -9178747487946648009); + + assert_eq!(r, transmute(lsx_vpackod_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpackod_w() { + let a = i32x4::new(-1183976810, 11929980, -1445863799, 1567314918); + let b = i32x4::new(445270781, 793617340, -1461557030, -22199234); + let r = i64x2::new(51238874735551420, 6731566319615689790); + + assert_eq!(r, transmute(lsx_vpackod_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpackod_d() { + let a = i64x2::new(-4549504442184266063, -4670773907187480618); + let b = i64x2::new(9039771682296134623, -6404442538060227683); + let r = i64x2::new(-6404442538060227683, -4670773907187480618); + + assert_eq!(r, transmute(lsx_vpackod_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vshuf_h() { + let a = i16x8::new(7, 12, 6, 8, 11, 2, 4, 7); + let b = i16x8::new(19221, 5841, 2738, -31394, -31337, -27662, 24655, 28090); + let c = i16x8::new(27835, 20061, 7214, -10489, -14005, -27870, -12303, 14443); + let r = i64x2::new(5410459163590867051, 4065564413064545630); + + assert_eq!( + r, + transmute(lsx_vshuf_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vshuf_w() { + let a = i32x4::new(0, 3, 4, 6); + let b = i32x4::new(921730307, -1175025178, 241337062, 53139449); + let c = i32x4::new(-67250654, 55397321, 1170999941, 1704507894); + let r = i64x2::new(7320805664731551266, 1036534789524454659); + + assert_eq!( + r, + transmute(lsx_vshuf_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vshuf_d() { + let a = i64x2::new(1, 2); + let b = i64x2::new(4033696695079994582, -3146912063343863773); + let c = i64x2::new(-4786751363389755273, 1769232540309840996); + let r = i64x2::new(1769232540309840996, 4033696695079994582); + + assert_eq!( + r, + transmute(lsx_vshuf_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vand_v() { + let a = u8x16::new( + 105, 106, 193, 101, 82, 63, 227, 23, 246, 17, 117, 134, 98, 233, 41, 128, + ); + let b = u8x16::new( + 254, 161, 164, 46, 166, 61, 123, 67, 90, 217, 49, 98, 166, 236, 128, 175, + ); + let r = i64x2::new(244105884219744360, -9223116804091473582); + + assert_eq!(r, transmute(lsx_vand_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vandi_b() { + let a = u8x16::new( + 167, 0, 108, 41, 255, 45, 24, 175, 229, 222, 89, 15, 63, 15, 187, 213, + ); + let r = i64x2::new(-8135737750142058361, -7666517314596397435); + + assert_eq!(r, transmute(lsx_vandi_b::<159>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vor_v() { + let a = u8x16::new( + 87, 193, 209, 232, 106, 36, 72, 199, 202, 213, 174, 2, 78, 181, 135, 178, + ); + let b = u8x16::new( + 253, 19, 178, 143, 132, 123, 29, 28, 200, 36, 9, 212, 12, 35, 164, 169, + ); + let r = i64x2::new(-2351582766212852737, -4924766118269159990); + + assert_eq!(r, transmute(lsx_vor_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vori_b() { + let a = u8x16::new( + 134, 61, 120, 206, 181, 179, 192, 181, 115, 179, 137, 110, 147, 51, 93, 65, + ); + let r = i64x2::new(-589140355308650538, -3179554720060804109); + + assert_eq!(r, transmute(lsx_vori_b::<210>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vnor_v() { + let a = u8x16::new( + 116, 165, 106, 148, 116, 117, 91, 213, 195, 131, 160, 33, 223, 207, 12, 147, + ); + let b = u8x16::new( + 242, 233, 135, 143, 129, 199, 130, 192, 222, 143, 223, 103, 232, 53, 98, 129, + ); + let r = i64x2::new(3036560889408918025, 7823034030269427744); + + assert_eq!(r, transmute(lsx_vnor_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vnori_b() { + let a = u8x16::new( + 142, 138, 177, 202, 121, 170, 99, 149, 251, 153, 234, 191, 10, 185, 182, 212, + ); + let r = i64x2::new(5227628601268782144, 596802560304890884); + + assert_eq!(r, transmute(lsx_vnori_b::<51>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vxor_v() { + let a = u8x16::new( + 33, 58, 188, 69, 128, 23, 145, 174, 229, 254, 21, 227, 196, 131, 115, 100, + ); + let b = u8x16::new( + 10, 61, 91, 105, 232, 114, 191, 215, 83, 11, 124, 157, 132, 242, 94, 59, + ); + let r = i64x2::new(8732028225622312747, 6858262329367852470); + + assert_eq!(r, transmute(lsx_vxor_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vxori_b() { + let a = u8x16::new( + 27, 105, 197, 119, 145, 141, 167, 209, 51, 206, 89, 42, 45, 215, 239, 160, + ); + let r = i64x2::new(3478586993001400570, 4687744515358339026); + + assert_eq!(r, transmute(lsx_vxori_b::<225>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitsel_v() { + let a = u8x16::new( + 217, 159, 221, 209, 154, 9, 59, 230, 33, 109, 205, 229, 188, 222, 1, 94, + ); + let b = u8x16::new( + 49, 116, 245, 6, 184, 146, 9, 1, 133, 27, 12, 4, 47, 11, 8, 133, + ); + let c = u8x16::new( + 140, 105, 10, 4, 218, 82, 128, 160, 67, 218, 139, 14, 248, 53, 35, 81, + ); + let r = i64x2::new(5060668949517432401, 1081087304254897953); + + assert_eq!( + r, + transmute(lsx_vbitsel_v(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbitseli_b() { + let a = u8x16::new( + 224, 93, 78, 91, 41, 115, 130, 96, 34, 22, 227, 254, 0, 44, 237, 193, + ); + let b = u8x16::new( + 138, 4, 83, 190, 229, 199, 235, 99, 62, 236, 201, 78, 160, 181, 45, 187, + ); + let r = i64x2::new(4857631126842327370, 8881540057610709020); + + assert_eq!( + r, + transmute(lsx_vbitseli_b::<65>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vshuf4i_b() { + let a = i8x16::new( + -83, 65, -54, 44, -52, -97, -93, 54, 118, -10, -20, -43, -60, -86, -116, -47, + ); + let r = i64x2::new(3937170420478429898, -3347145886530736916); + + assert_eq!(r, transmute(lsx_vshuf4i_b::<234>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vshuf4i_h() { + let a = i16x8::new(27707, -1094, -15784, -28387, 31634, -12323, -30387, -11480); + let r = i64x2::new(-7989953385787032646, -3231104182470389795); + + assert_eq!(r, transmute(lsx_vshuf4i_h::<209>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vshuf4i_w() { + let a = i32x4::new(768986805, -1036149600, -1196682940, -214444511); + let r = i64x2::new(3302773179299516085, -5139714087882845884); + + assert_eq!(r, transmute(lsx_vshuf4i_w::<160>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplgr2vr_b() { + let r = i64x2::new(795741901218843403, 795741901218843403); + + assert_eq!(r, transmute(lsx_vreplgr2vr_b(970839819))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplgr2vr_h() { + let r = i64x2::new(-6504141532176800324, -6504141532176800324); + + assert_eq!(r, transmute(lsx_vreplgr2vr_h(93693372))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplgr2vr_w() { + let r = i64x2::new(-6737078705572473188, -6737078705572473188); + + assert_eq!(r, transmute(lsx_vreplgr2vr_w(-1568598372))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vreplgr2vr_d() { + let r = i64x2::new(5000134708087557572, 5000134708087557572); + + assert_eq!(r, transmute(lsx_vreplgr2vr_d(5000134708087557572))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpcnt_b() { + let a = i8x16::new( + 29, -96, 22, 17, 38, -51, -97, 82, 17, -82, -30, -42, -44, 107, -51, 80, + ); + let r = i64x2::new(217867142450840068, 145528077781566722); + + assert_eq!(r, transmute(lsx_vpcnt_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpcnt_h() { + let a = i16x8::new(-512, 10388, -21267, -27094, 1085, -26444, -29360, -11576); + let r = i64x2::new(1970367786975239, 1970350607237126); + + assert_eq!(r, transmute(lsx_vpcnt_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpcnt_w() { + let a = i32x4::new(1399276601, -2094725994, -100739325, -1239551533); + let r = i64x2::new(47244640271, 81604378645); + + assert_eq!(r, transmute(lsx_vpcnt_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpcnt_d() { + let a = i64x2::new(-4470823169399930539, 3184270543884128372); + let r = i64x2::new(29, 25); + + assert_eq!(r, transmute(lsx_vpcnt_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vclo_b() { + let a = i8x16::new( + 94, 66, -88, -43, 113, 10, 5, -96, 96, 78, 3, -30, -24, -29, 20, 115, + ); + let r = i64x2::new(72057594071547904, 3311470116864); + + assert_eq!(r, transmute(lsx_vclo_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vclo_h() { + let a = i16x8::new(-5432, 27872, -9150, 27393, 25236, 1028, -21312, -25189); + let r = i64x2::new(8589934595, 281479271677952); + + assert_eq!(r, transmute(lsx_vclo_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vclo_w() { + let a = i32x4::new(1214322611, -1755838761, -1222326743, -1511364419); + let r = i64x2::new(4294967296, 4294967297); + + assert_eq!(r, transmute(lsx_vclo_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vclo_d() { + let a = i64x2::new(-249299854527467825, -459308653408461862); + let r = i64x2::new(6, 5); + + assert_eq!(r, transmute(lsx_vclo_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vclz_b() { + let a = i8x16::new( + -103, -39, -51, -74, -68, 126, -124, 33, 30, 54, -46, -53, -9, 96, 17, 74, + ); + let r = i64x2::new(144116287587483648, 72903118479688195); + + assert_eq!(r, transmute(lsx_vclz_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vclz_h() { + let a = i16x8::new(1222, 32426, 3164, -10763, 10189, -4197, -21841, -28676); + let r = i64x2::new(17179934725, 2); + + assert_eq!(r, transmute(lsx_vclz_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vclz_w() { + let a = i32x4::new(-490443689, -1039971379, -217310592, -1921086575); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vclz_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vclz_d() { + let a = i64x2::new(4630351532137644314, -6587611980764816064); + let r = i64x2::new(1, 0); + + assert_eq!(r, transmute(lsx_vclz_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickve2gr_b() { + let a = i8x16::new( + 119, 126, -107, -59, 22, -27, -67, 39, -66, -101, 34, -26, -16, 61, 20, 51, + ); + let r: i32 = 51; + + assert_eq!(r, transmute(lsx_vpickve2gr_b::<15>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickve2gr_h() { + let a = i16x8::new(-12924, 31013, 18171, 20404, 21226, 14128, -6255, 26521); + let r: i32 = 21226; + + assert_eq!(r, transmute(lsx_vpickve2gr_h::<4>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickve2gr_w() { + let a = i32x4::new(-1559379275, 2065542381, -1882161334, 1502157419); + let r: i32 = -1882161334; + + assert_eq!(r, transmute(lsx_vpickve2gr_w::<2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickve2gr_d() { + let a = i64x2::new(-6941380853339482104, 8405634758774935528); + let r: i64 = -6941380853339482104; + + assert_eq!(r, transmute(lsx_vpickve2gr_d::<0>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickve2gr_bu() { + let a = i8x16::new( + 18, -111, 100, 2, -105, 20, 92, -40, -57, 117, 6, -119, -94, 86, -52, 35, + ); + let r: u32 = 199; + + assert_eq!(r, transmute(lsx_vpickve2gr_bu::<8>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickve2gr_hu() { + let a = i16x8::new(25003, 5139, -12977, 7550, -12177, 19294, -2216, 12693); + let r: u32 = 25003; + + assert_eq!(r, transmute(lsx_vpickve2gr_hu::<0>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickve2gr_wu() { + let a = i32x4::new(-295894883, 551663550, -710853968, 82692774); + let r: u32 = 3999072413; + + assert_eq!(r, transmute(lsx_vpickve2gr_wu::<0>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpickve2gr_du() { + let a = i64x2::new(748282319555413922, -1352335765832355666); + let r: u64 = 748282319555413922; + + assert_eq!(r, transmute(lsx_vpickve2gr_du::<0>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vinsgr2vr_b() { + let a = i8x16::new( + 58, 12, -107, 35, 111, -15, -99, 117, 119, 92, -18, 32, -44, -34, 53, -34, + ); + let r = i64x2::new(8475195533421775930, -2423536021788533641); + + assert_eq!( + r, + transmute(lsx_vinsgr2vr_b::<14>(transmute(a), 1333652061)) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vinsgr2vr_h() { + let a = i16x8::new(-20591, 7819, 25287, -11296, 4604, 28833, -1306, 6418); + let r = i64x2::new(-3179432729573085295, 1806782266980897276); + + assert_eq!(r, transmute(lsx_vinsgr2vr_h::<5>(transmute(a), -987420193))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vinsgr2vr_w() { + let a = i32x4::new(1608179655, 886830932, -621638499, 2021214690); + let r = i64x2::new(3808909851629379527, 8681050995079237782); + + assert_eq!(r, transmute(lsx_vinsgr2vr_w::<2>(transmute(a), -960507754))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vinsgr2vr_d() { + let a = i64x2::new(-6562091001143116290, -2425423285843953307); + let r = i64x2::new(-6562091001143116290, -233659266); + + assert_eq!(r, transmute(lsx_vinsgr2vr_d::<1>(transmute(a), -233659266))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfadd_s() { + let a = u32x4::new(1063501234, 1064367472, 1065334422, 1012846272); + let b = u32x4::new(1050272808, 1054022924, 1064036136, 1063113730); + let r = i64x2::new(4588396142719948771, 4567018621615066847); + + assert_eq!(r, transmute(lsx_vfadd_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfadd_d() { + let a = u64x2::new(4602410992567934854, 4605792798803129629); + let b = u64x2::new(4605819027271079334, 4601207158507578498); + let r = i64x2::new(4608685566198055604, 4608371493448991663); + + assert_eq!(r, transmute(lsx_vfadd_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfsub_s() { + let a = u32x4::new(1064451273, 1059693825, 1036187576, 1050580506); + let b = u32x4::new(1063475462, 1045836432, 1065150677, 1042376676); + let r = i64x2::new(4532926601401089072, 4475386505810184670); + + assert_eq!(r, transmute(lsx_vfsub_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfsub_d() { + let a = u64x2::new(4601910797424251354, 4606993182294978423); + let b = u64x2::new(4605973926398825814, 4600156145303017004); + let r = i64x2::new(-4622342180736116526, 4603750919602422881); + + assert_eq!(r, transmute(lsx_vfsub_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmul_s() { + let a = u32x4::new(1060566900, 1061147127, 1010818944, 1053672244); + let b = u32x4::new(1065241951, 1044285812, 1050678216, 1009264512); + let r = i64x2::new(4471727895898079441, 4289440988347233543); + + assert_eq!(r, transmute(lsx_vfmul_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmul_d() { + let a = u64x2::new(4593483834506733144, 4602939512559809908); + let b = u64x2::new(4605208047666947899, 4599634375243914522); + let r = i64x2::new(4591550625791030606, 4595475933048682142); + + assert_eq!(r, transmute(lsx_vfmul_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfdiv_s() { + let a = u32x4::new(1057501460, 1051070718, 1065221347, 1051828876); + let b = u32x4::new(1055538538, 1042248668, 1061233585, 1063649172); + let r = i64x2::new(4613180427594946541, 4523223175100126088); + + assert_eq!(r, transmute(lsx_vfdiv_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfdiv_d() { + let a = u64x2::new(4591718910407182664, 4607068478646496456); + let b = u64x2::new(4606326032528596062, 4601783079746725386); + let r = i64x2::new(4592460108638699314, 4612120084672695832); + + assert_eq!(r, transmute(lsx_vfdiv_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcvt_h_s() { + let a = u32x4::new(1020611712, 1046448896, 1062035346, 1052255382); + let b = u32x4::new(1049501482, 1043939972, 1042291392, 1041250232); + let r = i64x2::new(3495410141992989809, 3873441386606634666); + + assert_eq!(r, transmute(lsx_vfcvt_h_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcvt_s_d() { + let a = u64x2::new(4586066291858051968, 4597324798333789044); + let b = u64x2::new(4600251021237488420, 4593890179408150924); + let r = i64x2::new(4469319308295208818, 4496796258465732597); + + assert_eq!(r, transmute(lsx_vfcvt_s_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmin_s() { + let a = u32x4::new(1016310272, 1064492378, 1043217948, 1060534856); + let b = u32x4::new(1060093085, 1026130528, 1057322097, 1057646773); + let r = i64x2::new(4407197060203522560, 4542558301798153756); + + assert_eq!(r, transmute(lsx_vfmin_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmin_d() { + let a = u64x2::new(4603437440563473519, 4603158282529654079); + let b = u64x2::new(4584808359801648672, 4602712060570539582); + let r = i64x2::new(4584808359801648672, 4602712060570539582); + + assert_eq!(r, transmute(lsx_vfmin_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmina_s() { + let a = u32x4::new(1061417856, 1052257408, 1056830440, 1055199170); + let b = u32x4::new(1049119234, 1058336224, 1057046116, 1029386720); + let r = i64x2::new(4519411155382848002, 4421182298393539560); + + assert_eq!(r, transmute(lsx_vfmina_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmina_d() { + let a = u64x2::new(4599160304044702024, 4603774209349450318); + let b = u64x2::new(4599088744110071826, 4598732503789588496); + let r = i64x2::new(4599088744110071826, 4598732503789588496); + + assert_eq!(r, transmute(lsx_vfmina_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmax_s() { + let a = u32x4::new(1054002242, 1061130492, 1034716288, 1064963760); + let b = u32x4::new(1042175760, 1040826492, 1059132266, 1050815434); + let r = i64x2::new(4557520760982391874, 4573984521684325226); + + assert_eq!(r, transmute(lsx_vfmax_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmax_d() { + let a = u64x2::new(4606275407710467505, 4593284088749839728); + let b = u64x2::new(4593616624275112016, 4605244843740986156); + let r = i64x2::new(4606275407710467505, 4605244843740986156); + + assert_eq!(r, transmute(lsx_vfmax_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmaxa_s() { + let a = u32x4::new(1059031357, 1043496676, 1044317464, 1055811838); + let b = u32x4::new(1064739422, 1055122552, 1049654310, 1057411362); + let r = i64x2::new(4531716855176798814, 4541547219258471462); + + assert_eq!(r, transmute(lsx_vfmaxa_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmaxa_d() { + let a = u64x2::new(4559235973242941440, 4606304546706191737); + let b = u64x2::new(4603647289310579471, 4603999027307573908); + let r = i64x2::new(4603647289310579471, 4606304546706191737); + + assert_eq!(r, transmute(lsx_vfmaxa_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfclass_s() { + let a = u32x4::new(1059786314, 1058231666, 1061513647, 1038650488); + let r = i64x2::new(549755814016, 549755814016); + + assert_eq!(r, transmute(lsx_vfclass_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfclass_d() { + let a = u64x2::new(4601724705608768104, 4601126152607382566); + let r = i64x2::new(128, 128); + + assert_eq!(r, transmute(lsx_vfclass_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfsqrt_s() { + let a = u32x4::new(1055398716, 1050305974, 995168768, 1064901995); + let r = i64x2::new(4543169501430832482, 4574681629207255333); + + assert_eq!(r, transmute(lsx_vfsqrt_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfsqrt_d() { + let a = u64x2::new(4605784293613801157, 4602267946351406890); + let r = i64x2::new(4606453893731357485, 4604397310232711799); + + assert_eq!(r, transmute(lsx_vfsqrt_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrecip_s() { + let a = u32x4::new(1003452672, 1050811504, 1044295808, 1064402913); + let r = i64x2::new(4632552602764963931, 4577820515916044016); + + assert_eq!(r, transmute(lsx_vfrecip_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrecip_d() { + let a = u64x2::new(4598634931235673106, 4598630619264835010); + let r = i64x2::new(4615355353482170689, 4615362460048142095); + + assert_eq!(r, transmute(lsx_vfrecip_d(transmute(a)))); +} + +#[simd_test(enable = "lsx,frecipe")] +unsafe fn test_lsx_vfrecipe_s() { + let a = u32x4::new(1057583779, 1062308847, 1060089100, 1048454688); + let r = i64x2::new(4583644530211711115, 4647978179615164140); + + assert_eq!(r, transmute(lsx_vfrecipe_s(transmute(a)))); +} + +#[simd_test(enable = "lsx,frecipe")] +unsafe fn test_lsx_vfrecipe_d() { + let a = u64x2::new(4605515926442181274, 4605369703273365674); + let r = i64x2::new(4608204937770303488, 4608317161507651584); + + assert_eq!(r, transmute(lsx_vfrecipe_d(transmute(a)))); +} + +#[simd_test(enable = "lsx,frecipe")] +unsafe fn test_lsx_vfrsqrte_s() { + let a = u32x4::new(1064377488, 1055815904, 1056897740, 1064016656); + let r = i64x2::new(4592421282989204764, 4577184195020153336); + + assert_eq!(r, transmute(lsx_vfrsqrte_s(transmute(a)))); +} + +#[simd_test(enable = "lsx,frecipe")] +unsafe fn test_lsx_vfrsqrte_d() { + let a = u64x2::new(4602766865443628663, 4605323203937791867); + let r = i64x2::new(4608986772678901760, 4607734355383549952); + + assert_eq!(r, transmute(lsx_vfrsqrte_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrint_s() { + let a = u32x4::new(1062138521, 1056849108, 1034089720, 1038314384); + let r = i64x2::new(1065353216, 0); + + assert_eq!(r, transmute(lsx_vfrint_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrint_d() { + let a = u64x2::new(4598620052333442366, 4603262362368837514); + let r = i64x2::new(0, 4607182418800017408); + + assert_eq!(r, transmute(lsx_vfrint_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrsqrt_s() { + let a = u32x4::new(1058614029, 1050504950, 1013814976, 1062355001); + let r = i64x2::new(4604601921912011494, 4579384257679777264); + + assert_eq!(r, transmute(lsx_vfrsqrt_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrsqrt_d() { + let a = u64x2::new(4602924191185043139, 4606088351077917251); + let r = i64x2::new(4608881149202581394, 4607483676176768181); + + assert_eq!(r, transmute(lsx_vfrsqrt_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vflogb_s() { + let a = u32x4::new(1053488512, 1061429282, 1064965594, 1061326585); + let r = i64x2::new(-4647714812225126400, -4647714812233515008); + + assert_eq!(r, transmute(lsx_vflogb_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vflogb_d() { + let a = u64x2::new(4589481276789128632, 4599408395082246526); + let r = i64x2::new(-4607182418800017408, -4611686018427387904); + + assert_eq!(r, transmute(lsx_vflogb_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcvth_s_h() { + let a = i16x8::new(29550, -13884, 689, -1546, 24006, -19112, -12769, 1779); + let r = i64x2::new(-4707668984349540352, 4097818267320836096); + + assert_eq!(r, transmute(lsx_vfcvth_s_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcvth_d_s() { + let a = u32x4::new(1051543000, 1042275304, 1038283216, 1063876621); + let r = i64x2::new(4592649323212177408, 4606389677895712768); + + assert_eq!(r, transmute(lsx_vfcvth_d_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcvtl_s_h() { + let a = i16x8::new(-21951, -13772, -17190, 9566, -19227, 9682, 13427, -30861); + let r = i64x2::new(-4519784435355738112, 4371798972740354048); + + assert_eq!(r, transmute(lsx_vfcvtl_s_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcvtl_d_s() { + let a = u32x4::new(1059809930, 1051084496, 1062618346, 1058273673); + let r = i64x2::new(4604206389789720576, 4599521958080544768); + + assert_eq!(r, transmute(lsx_vfcvtl_d_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftint_w_s() { + let a = u32x4::new(1064738153, 1040181800, 1064331056, 1050732566); + let r = i64x2::new(1, 1); + + assert_eq!(r, transmute(lsx_vftint_w_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftint_l_d() { + let a = u64x2::new(4602244632405616462, 4606437548563176328); + let r = i64x2::new(0, 1); + + assert_eq!(r, transmute(lsx_vftint_l_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftint_wu_s() { + let a = u32x4::new(1051598962, 1051261298, 1059326008, 1057784192); + let r = i64x2::new(0, 4294967297); + + assert_eq!(r, transmute(lsx_vftint_wu_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftint_lu_d() { + let a = u64x2::new(4605561240422589260, 4595241299507769712); + let r = i64x2::new(1, 0); + + assert_eq!(r, transmute(lsx_vftint_lu_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrz_w_s() { + let a = u32x4::new(1027659872, 1064207676, 1058472873, 1055740014); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrz_w_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrz_l_d() { + let a = u64x2::new(4605051539601556532, 4605129242354661923); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrz_l_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrz_wu_s() { + let a = u32x4::new(1060876751, 1053710034, 1057340881, 1055555596); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrz_wu_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrz_lu_d() { + let a = u64x2::new(4598711097624940956, 4598268778109474002); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrz_lu_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vffint_s_w() { + let a = i32x4::new(81337967, 1396520141, 2124859806, 1655115736); + let r = i64x2::new(5667351778062705614, 5676028806041521555); + + assert_eq!(r, transmute(lsx_vffint_s_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vffint_d_l() { + let a = i64x2::new(-1543454772280682525, -7672333112582708041); + let r = i64x2::new(-4344448119835677720, -4333977527979901593); + + assert_eq!(r, transmute(lsx_vffint_d_l(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vffint_s_wu() { + let a = u32x4::new(2224947834, 194720725, 2248289069, 1131100007); + let r = i64x2::new(5564675890493038082, 5658445755393114667); + + assert_eq!(r, transmute(lsx_vffint_s_wu(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vffint_d_lu() { + let a = u64x2::new(11793247389644223387, 1356636411353166515); + let r = i64x2::new(4892164017273962878, 4878194157796724979); + + assert_eq!(r, transmute(lsx_vffint_d_lu(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vandn_v() { + let a = u8x16::new( + 69, 83, 176, 218, 73, 205, 105, 229, 131, 233, 158, 58, 63, 68, 94, 223, + ); + let b = u8x16::new( + 12, 197, 21, 164, 196, 200, 144, 3, 232, 91, 46, 182, 156, 14, 53, 106, + ); + let r = i64x2::new(184648152262214664, 2315143230533931624); + + assert_eq!(r, transmute(lsx_vandn_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vneg_b() { + let a = i8x16::new( + -118, -51, 32, 96, -18, 11, -3, 86, 77, 78, -120, 105, -47, 6, -127, -49, + ); + let r = i64x2::new(-6195839201974406282, 3566844512212398771); + + assert_eq!(r, transmute(lsx_vneg_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vneg_h() { + let a = i16x8::new(-6540, 25893, -2534, 29805, -28719, -16331, -20168, 14650); + let r = i64x2::new(-8389350794815923828, -4123521786840387537); + + assert_eq!(r, transmute(lsx_vneg_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vneg_w() { + let a = i32x4::new(-927815384, -898911982, 716171852, -2025175544); + let r = i64x2::new(3860797565600356056, 8698062733717804468); + + assert_eq!(r, transmute(lsx_vneg_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vneg_d() { + let a = i64x2::new(4241851098775470984, 2487122929432859927); + let r = i64x2::new(-4241851098775470984, -2487122929432859927); + + assert_eq!(r, transmute(lsx_vneg_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmuh_b() { + let a = i8x16::new( + -123, 8, -7, 107, 85, 70, 44, 54, -34, -38, 48, 6, -23, 54, 25, -117, + ); + let b = i8x16::new( + 41, -97, -9, -98, 27, 101, -95, 58, 102, -37, -72, -8, 94, -112, -22, -61, + ); + let r = i64x2::new(931993372669836524, 2017024359980467698); + + assert_eq!(r, transmute(lsx_vmuh_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmuh_h() { + let a = i16x8::new(-7394, -18356, -22999, 24389, 5841, 15177, -27319, -19905); + let b = i16x8::new(-446, -16863, 19467, -13578, -9673, -26572, -7864, 9855); + let r = i64x2::new(-1422322400225984462, -842721997477184351); + + assert_eq!(r, transmute(lsx_vmuh_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmuh_w() { + let a = i32x4::new(1709346012, -2115891417, -530450121, 975457270); + let b = i32x4::new(-1684820454, 449222301, 1106076122, 431017950); + let r = i64x2::new(-950505610786872114, 420439596918869732); + + assert_eq!(r, transmute(lsx_vmuh_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmuh_d() { + let a = i64x2::new(1852303942214142839, -864913423017390364); + let b = i64x2::new(-1208434038665242614, -6078343251861677818); + let r = i64x2::new(-121343209662433286, 284995587689374477); + + assert_eq!(r, transmute(lsx_vmuh_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmuh_bu() { + let a = u8x16::new( + 7, 62, 97, 52, 145, 32, 36, 208, 81, 215, 70, 254, 95, 229, 130, 220, + ); + let b = u8x16::new( + 220, 110, 97, 25, 127, 138, 167, 150, 128, 32, 130, 157, 177, 237, 123, 244, + ); + let r = i64x2::new(8725461799780227590, -3369022092985820632); + + assert_eq!(r, transmute(lsx_vmuh_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmuh_hu() { + let a = u16x8::new(28423, 34360, 7900, 61040, 62075, 6281, 10041, 37733); + let b = u16x8::new(14769, 6489, 58866, 5997, 46648, 26325, 42186, 26942); + let r = i64x2::new(1572068217944938757, 4366267597274655896); + + assert_eq!(r, transmute(lsx_vmuh_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmuh_wu() { + let a = u32x4::new(1924935822, 3107975337, 289660636, 1367017690); + let b = u32x4::new(1981234883, 1290836259, 1284878577, 702668871); + let r = i64x2::new(4011887256539048298, 960560772888018584); + + assert_eq!(r, transmute(lsx_vmuh_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmuh_du() { + let a = u64x2::new(11605461634325977288, 4587630571657223131); + let b = u64x2::new(14805542397189366587, 10025341254588295994); + let r = i64x2::new(-9132083796568587258, 2493261783600858707); + + assert_eq!(r, transmute(lsx_vmuh_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsllwil_h_b() { + let a = i8x16::new( + -45, 48, 102, -110, 126, -43, 65, 14, 75, 88, 62, 46, -109, 119, -77, 59, + ); + let r = i64x2::new(-990777899147527584, 126109727303143360); + + assert_eq!(r, transmute(lsx_vsllwil_h_b::<5>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsllwil_w_h() { + let a = i16x8::new(25135, -4241, 25399, -32451, 5597, -16847, 3192, -14694); + let r = i64x2::new(-9326057613926912, -71360503652913664); + + assert_eq!(r, transmute(lsx_vsllwil_w_h::<9>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsllwil_d_w() { + let a = i32x4::new(1472328927, -2106442262, 379100488, -607174188); + let r = i64x2::new(6030659284992, -8627987505152); + + assert_eq!(r, transmute(lsx_vsllwil_d_w::<12>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsllwil_hu_bu() { + let a = u8x16::new( + 102, 12, 222, 193, 16, 21, 161, 189, 127, 57, 231, 81, 97, 68, 171, 68, + ); + let r = i64x2::new(6953679870551405312, 6809531147446388736); + + assert_eq!(r, transmute(lsx_vsllwil_hu_bu::<7>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsllwil_wu_hu() { + let a = u16x8::new(370, 47410, 29611, 6206, 10390, 34658, 65264, 5264); + let r = i64x2::new(52127846272954880, 6823569169558272); + + assert_eq!(r, transmute(lsx_vsllwil_wu_hu::<8>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsllwil_du_wu() { + let a = u32x4::new(3249798491, 4098547305, 1101510259, 3478509641); + let r = i64x2::new(13630642809995264, 17190553355550720); + + assert_eq!(r, transmute(lsx_vsllwil_du_wu::<22>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsran_b_h() { + let a = i16x8::new(-12554, -869, 6838, -18394, -26140, 20902, -222, -12466); + let b = i16x8::new(-12507, -16997, -17826, 5682, -298, -28572, -8117, -13478); + let r = i64x2::new(-864943573596831881, 0); + + assert_eq!(r, transmute(lsx_vsran_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsran_h_w() { + let a = i32x4::new(-950913431, 1557805031, 693572398, 1180916410); + let b = i32x4::new(-52337348, -677553123, -58200260, -1473338606); + let r = i64x2::new(1267763303694925820, 0); + + assert_eq!(r, transmute(lsx_vsran_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsran_w_d() { + let a = i64x2::new(-1288554130833689959, -11977059487539737); + let b = i64x2::new(-8585295495893484131, -2657141976436452013); + let r = i64x2::new(-5882350952887806270, 0); + + assert_eq!(r, transmute(lsx_vsran_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssran_b_h() { + let a = i16x8::new(-4232, -6038, -25131, -31144, -8955, 30109, -20875, 31748); + let b = i16x8::new(9459, 15241, 22170, 28027, 5348, 14784, 22613, -9469); + let r = i64x2::new(9187483431610086528, 0); + + assert_eq!(r, transmute(lsx_vssran_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssran_h_w() { + let a = i32x4::new(-287861089, -1513011801, -2092611716, -303792243); + let b = i32x4::new(2070726003, -944816867, -160621862, -1222036466); + let r = i64x2::new(-5219109151313101350, 0); + + assert_eq!(r, transmute(lsx_vssran_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssran_w_d() { + let a = i64x2::new(-3241370354549914429, -6946993314161316482); + let b = i64x2::new(-7078666005882550400, -2564990402652718339); + let r = i64x2::new(-15032385536, 0); + + assert_eq!(r, transmute(lsx_vssran_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssran_bu_h() { + let a = u16x8::new(42413, 20386, 34692, 25088, 5477, 58748, 14986, 55598); + let b = u16x8::new(2372, 26267, 4722, 47876, 44857, 55242, 45998, 51450); + let r = i64x2::new(47227865344, 0); + + assert_eq!(r, transmute(lsx_vssran_bu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssran_hu_w() { + let a = u32x4::new(98545765, 1277336728, 1198651242, 2259455561); + let b = u32x4::new(2085279153, 2679576985, 2935643238, 3797496208); + let r = i64x2::new(281470684234479, 0); + + assert_eq!(r, transmute(lsx_vssran_hu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssran_wu_d() { + let a = u64x2::new(13769400838855917836, 9078517924805296472); + let b = u64x2::new(3904652404244024971, 4230656884168675704); + let r = i64x2::new(536870912000, 0); + + assert_eq!(r, transmute(lsx_vssran_wu_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrarn_b_h() { + let a = i16x8::new(416, 1571, 19122, -32078, 26657, 3230, 12936, -5041); + let b = i16x8::new(-19071, -903, 11542, -25909, 24111, 14882, -27192, -8283); + let r = i64x2::new(7076043428318610384, 0); + + assert_eq!(r, transmute(lsx_vsrarn_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrarn_h_w() { + let a = i32x4::new(-1553871953, -1700232136, 1934164676, -322997351); + let b = i32x4::new(-1571698573, 1467958613, -1857488008, 424713310); + let r = i64x2::new(498163119212, 0); + + assert_eq!(r, transmute(lsx_vsrarn_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrarn_w_d() { + let a = i64x2::new(3489546309777968442, 4424654979674624573); + let b = i64x2::new(-8645668865455529235, -3129277582817496880); + let r = i64x2::new(-8628090759335017621, 0); + + assert_eq!(r, transmute(lsx_vsrarn_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarn_b_h() { + let a = i16x8::new(18764, -32156, 11073, -19939, -921, -18342, -16600, -13755); + let b = i16x8::new(24298, 2343, 24641, 20910, 3142, -1171, 25850, 15932); + let r = i64x2::new(-148338468081139694, 0); + + assert_eq!(r, transmute(lsx_vssrarn_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarn_h_w() { + let a = i32x4::new(-319370354, 225260835, 556195246, -699782233); + let b = i32x4::new(1911424854, -931292983, -1710824608, -1179580317); + let r = i64x2::new(-9223231301513904204, 0); + + assert_eq!(r, transmute(lsx_vssrarn_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarn_w_d() { + let a = i64x2::new(2645407519038125699, -6014465513887172991); + let b = i64x2::new(2843689038926761304, -6830262024912907383); + let r = i64x2::new(-9223372034707292161, 0); + + assert_eq!(r, transmute(lsx_vssrarn_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarn_bu_h() { + let a = u16x8::new(291, 64545, 16038, 57382, 18088, 10736, 57416, 55855); + let b = u16x8::new(60210, 40155, 14296, 25577, 1550, 1674, 5330, 10645); + let r = i64x2::new(10999415373897, 0); + + assert_eq!(r, transmute(lsx_vssrarn_bu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarn_hu_w() { + let a = u32x4::new(2157227758, 1970326245, 1829195047, 4061259315); + let b = u32x4::new(3570029841, 3229468238, 1070101998, 3159433736); + let r = i64x2::new(281474976645120, 0); + + assert_eq!(r, transmute(lsx_vssrarn_hu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarn_wu_d() { + let a = u64x2::new(8474558908443232483, 12352412821911429821); + let b = u64x2::new(1112771813772164907, 646071836375127186); + let r = i64x2::new(963446, 0); + + assert_eq!(r, transmute(lsx_vssrarn_wu_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrln_b_h() { + let a = i16x8::new(11215, 29524, -2225, -13955, 13622, 15178, -22920, 29185); + let b = i16x8::new(-11667, 13077, -23656, 5150, -23771, -31329, 20729, 15169); + let r = i64x2::new(23363148983015937, 0); + + assert_eq!(r, transmute(lsx_vsrln_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrln_h_w() { + let a = i32x4::new(273951092, 1016537129, 330941412, 1091816631); + let b = i32x4::new(1775989751, -1602688801, -801213995, -1801759515); + let r = i64x2::new(-7033214568759295968, 0); + + assert_eq!(r, transmute(lsx_vsrln_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrln_w_d() { + let a = i64x2::new(-4929290425724370873, -9113314549902232460); + let b = i64x2::new(-1428152872702150626, 3907864416256094744); + let r = i64x2::new(-8718771486483115547, 0); + + assert_eq!(r, transmute(lsx_vsrln_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrln_bu_h() { + let a = u16x8::new(53048, 1006, 61143, 41996, 57058, 25724, 43969, 62847); + let b = u16x8::new(41072, 41125, 44619, 49581, 20733, 905, 47558, 7801); + let r = i64x2::new(8862857593125412863, 0); + + assert_eq!(r, transmute(lsx_vssrln_bu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrln_hu_w() { + let a = u32x4::new(1889365848, 1818261427, 2701385771, 4063178210); + let b = u32x4::new(1325069171, 1380839173, 3495604120, 2839043866); + let r = i64x2::new(16889194387279379, 0); + + assert_eq!(r, transmute(lsx_vssrln_hu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrln_wu_d() { + let a = u64x2::new(7819967077464554342, 9878605573134710521); + let b = u64x2::new(3908262745817581251, 17131627096934512209); + let r = i64x2::new(-1, 0); + + assert_eq!(r, transmute(lsx_vssrln_wu_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlrn_b_h() { + let a = i16x8::new(-28299, -15565, -30638, -10884, -2538, 23256, 25217, 14524); + let b = i16x8::new(22830, -27866, -24616, -9547, 11336, 320, 19908, 7056); + let r = i64x2::new(-4888418841542521598, 0); + + assert_eq!(r, transmute(lsx_vsrlrn_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlrn_h_w() { + let a = i32x4::new(-146271143, 1373068571, 1580809863, -915867973); + let b = i32x4::new(1387862348, 119424523, 185407104, 1890720739); + let r = i64x2::new(2222313691660711041, 0); + + assert_eq!(r, transmute(lsx_vsrlrn_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlrn_w_d() { + let a = i64x2::new(-4585118244955419935, -6462467970618862820); + let b = i64x2::new(-8550351213501194562, 7071641301481388656); + let r = i64x2::new(182866822561795, 0); + + assert_eq!(r, transmute(lsx_vsrlrn_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrn_bu_h() { + let a = u16x8::new(13954, 8090, 46576, 53579, 4322, 20972, 17281, 18603); + let b = u16x8::new(51122, 39148, 45511, 57479, 62603, 43668, 5537, 61004); + let r = i64x2::new(432344477600776959, 0); + + assert_eq!(r, transmute(lsx_vssrlrn_bu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrn_hu_w() { + let a = u32x4::new(959062112, 2073250884, 2500149644, 3919033303); + let b = u32x4::new(1618795892, 3678356443, 862445734, 2115250342); + let r = i64x2::new(-4293983341, 0); + + assert_eq!(r, transmute(lsx_vssrlrn_hu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrn_wu_d() { + let a = u64x2::new(13828499145464267218, 4059850184169338184); + let b = u64x2::new(13406765083608623828, 7214649593148131096); + let r = i64x2::new(-1, 0); + + assert_eq!(r, transmute(lsx_vssrlrn_wu_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrstpi_b() { + let a = i8x16::new( + 116, 124, 21, 48, 24, 119, -108, 103, -77, -95, 68, -76, 67, -82, -96, 17, + ); + let b = i8x16::new( + -124, -52, -31, -108, 33, 71, -22, 0, -38, -20, -6, -90, 41, -58, -51, -51, + ); + let r = i64x2::new(7463721428229389428, 1270206412966109619); + + assert_eq!( + r, + transmute(lsx_vfrstpi_b::<28>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrstpi_h() { + let a = i16x8::new(8411, -11473, 30045, -14781, 12135, -6534, -3622, 21173); + let b = i16x8::new(9590, -8044, 15088, 4172, 1721, 27581, -19895, -25679); + let r = i64x2::new(-4160352588467724069, 5959935604366651239); + + assert_eq!(r, transmute(lsx_vfrstpi_h::<1>(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrstp_b() { + let a = i8x16::new( + 41, -46, -4, 113, -42, 96, 62, 9, 12, -71, -82, 3, 4, -42, 43, -57, + ); + let b = i8x16::new( + -123, 108, -25, -29, -60, 41, -50, -93, 33, 99, 43, 36, 41, 88, 125, 27, + ); + let c = i8x16::new( + 94, 2, 35, 33, 56, -117, -67, 85, 48, 94, -20, 112, -92, 47, -13, -80, + ); + let r = i64x2::new(666076269049074217, -4107047547431896820); + + assert_eq!( + r, + transmute(lsx_vfrstp_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrstp_h() { + let a = i16x8::new(-23724, -17384, -24117, -29825, -19683, -3257, 18098, 7693); + let b = i16x8::new(-20325, 3010, -32157, -32381, 13895, 10305, -4480, -12994); + let c = i16x8::new(-2897, -31862, -29510, -16688, -12596, -6396, 20900, -22026); + let r = i64x2::new(-8394813283989150892, 77734399685405); + + assert_eq!( + r, + transmute(lsx_vfrstp_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vshuf4i_d() { + let a = i64x2::new(358242861525536259, -3448068840836542886); + let b = i64x2::new(-5242415653399550268, -1504319281108156436); + let r = i64x2::new(-3448068840836542886, -5242415653399550268); + + assert_eq!( + r, + transmute(lsx_vshuf4i_d::<153>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbsrl_v() { + let a = i8x16::new( + 67, 57, -68, -24, 50, 58, 127, -80, -9, 17, 119, 81, 4, 110, 63, 56, + ); + let r = i64x2::new(4570595419764160432, 56); + + assert_eq!(r, transmute(lsx_vbsrl_v::<7>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vbsll_v() { + let a = i8x16::new( + -25, -57, 97, -71, 66, 71, -127, 74, -32, -1, 36, 111, 116, 79, 49, -92, + ); + let r = i64x2::new(0, -1801439850948198400); + + assert_eq!(r, transmute(lsx_vbsll_v::<15>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vextrins_b() { + let a = i8x16::new( + 72, 112, -116, 99, 55, 19, 50, -123, -98, -90, 79, -29, 18, -87, 79, 74, + ); + let b = i8x16::new( + -107, 59, -127, 85, -65, -45, 80, 65, 30, -46, -56, -117, 107, 122, 11, -55, + ); + let r = i64x2::new(-8848989189215300792, 5354684380554962590); + + assert_eq!( + r, + transmute(lsx_vextrins_b::<21>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vextrins_h() { + let a = i16x8::new(-8903, 13698, -1855, 30429, -28178, 21171, -17068, -10547); + let b = i16x8::new(-16309, 24895, 7753, 1535, 20205, 23989, 27706, -24274); + let r = i64x2::new(8565108990437154105, -2968508409504886290); + + assert_eq!( + r, + transmute(lsx_vextrins_h::<33>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vextrins_w() { + let a = i32x4::new(1225397826, 1289583478, 1287364839, 1276008188); + let b = i32x4::new(1511106319, -1591171516, -989081993, 1462597836); + let r = i64x2::new(5538718864697333314, -6834029622259375897); + + assert_eq!( + r, + transmute(lsx_vextrins_w::<57>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vextrins_d() { + let a = i64x2::new(7112618873032505596, -3605623410483258197); + let b = i64x2::new(-8508848216355653905, -4655572653097801607); + let r = i64x2::new(7112618873032505596, -8508848216355653905); + + assert_eq!( + r, + transmute(lsx_vextrins_d::<62>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmskltz_b() { + let a = i8x16::new( + 94, -6, -27, 108, 33, -86, -64, 68, 68, 9, -92, -83, -61, 99, 103, -77, + ); + let r = i64x2::new(40038, 0); + + assert_eq!(r, transmute(lsx_vmskltz_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmskltz_h() { + let a = i16x8::new(16730, 29121, -23447, -8647, -22303, 21817, 30964, -27069); + let r = i64x2::new(156, 0); + + assert_eq!(r, transmute(lsx_vmskltz_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmskltz_w() { + let a = i32x4::new(-657282776, -1247210048, 162595942, 949871015); + let r = i64x2::new(3, 0); + + assert_eq!(r, transmute(lsx_vmskltz_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmskltz_d() { + let a = i64x2::new(7728638770319849738, 4250984610820351699); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vmskltz_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsigncov_b() { + let a = i8x16::new( + 37, -39, 115, 66, -114, -76, -55, -39, -94, 114, 38, 13, 76, 124, 64, -67, + ); + let b = i8x16::new( + -56, -98, -95, 45, 65, -53, -16, 126, 78, -69, -10, 115, -110, 125, -110, -27, + ); + let r = i64x2::new(-9074694153930972472, 1986788453588057010); + + assert_eq!(r, transmute(lsx_vsigncov_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsigncov_h() { + let a = i16x8::new(-2481, 28461, 27326, -11105, -17659, 25439, 5753, -743); + let b = i16x8::new(27367, 4727, -2962, 14937, 26207, -19075, -26630, 10708); + let r = i64x2::new(-4204122973533661927, -3013866947575178847); + + assert_eq!(r, transmute(lsx_vsigncov_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsigncov_w() { + let a = i32x4::new(-1532048051, -2015529516, -586660708, 727735992); + let b = i32x4::new(-1719915889, 290419288, 202835952, -1715336967); + let r = i64x2::new(-1247341342367689359, -7367316170792699888); + + assert_eq!(r, transmute(lsx_vsigncov_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsigncov_d() { + let a = i64x2::new(150793719457004094, -135856607031921617); + let b = i64x2::new(-7146260093067324952, -4263419240070336957); + let r = i64x2::new(-7146260093067324952, 4263419240070336957); + + assert_eq!(r, transmute(lsx_vsigncov_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmadd_s() { + let a = u32x4::new(1053592010, 1057663388, 1062706459, 1052867704); + let b = u32x4::new(1058664483, 1064225083, 1063099591, 1054461138); + let c = u32x4::new(1054468004, 1058982987, 1020391296, 1060092638); + let r = i64x2::new(4580180050664125165, 4564646927777478184); + + assert_eq!( + r, + transmute(lsx_vfmadd_s(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmadd_d() { + let a = u64x2::new(4606327684689705003, 4598694159366762396); + let b = u64x2::new(4605185255799132053, 4599088917574843416); + let c = u64x2::new(4602818020827041428, 4603108774373140110); + let r = i64x2::new(4608172630826345532, 4603863964483257995); + + assert_eq!( + r, + transmute(lsx_vfmadd_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmsub_s() { + let a = u32x4::new(1044400636, 1063313520, 1060460798, 1056994960); + let b = u32x4::new(1016037632, 1057190051, 1042434224, 1054669464); + let c = u32x4::new(1063213924, 1047859900, 1063932683, 1059194076); + let r = i64x2::new(4492556612533126096, -4695805165913139817); + + assert_eq!( + r, + transmute(lsx_vfmsub_s(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfmsub_d() { + let a = u64x2::new(4594815360286672212, 4596595309069193244); + let b = u64x2::new(4603027383886900468, 4603059771165364192); + let c = u64x2::new(4602620994011391758, 4604927875076111771); + let r = i64x2::new(-4622272149514797982, -4619451105624653598); + + assert_eq!( + r, + transmute(lsx_vfmsub_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfnmadd_s() { + let a = u32x4::new(1061642899, 1052761434, 1063541119, 1058091924); + let b = u32x4::new(1044610040, 1047755448, 1062197759, 1051199080); + let c = u32x4::new(1061915520, 1064953425, 1057353824, 1063041453); + let r = i64x2::new(-4645363120071402583, -4645972958179775591); + + assert_eq!( + r, + transmute(lsx_vfnmadd_s(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfnmadd_d() { + let a = u64x2::new(4581972604415454304, 4606375442608807393); + let b = u64x2::new(4601574488118710932, 4600732882837014710); + let c = u64x2::new(4598552045727299030, 4597905936756546488); + let r = i64x2::new(-4624646832280694111, -4619798024319766060); + + assert_eq!( + r, + transmute(lsx_vfnmadd_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfnmsub_s() { + let a = u32x4::new(1063347858, 1055637882, 1012264384, 1037368648); + let b = u32x4::new(1054477234, 1065181074, 1060000965, 1061867853); + let c = u32x4::new(1064036393, 1038991248, 1057711476, 1049339888); + let r = i64x2::new(-4706852781727946153, 4486413029030305466); + + assert_eq!( + r, + transmute(lsx_vfnmsub_s(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfnmsub_d() { + let a = u64x2::new(4604322037070318179, 4603593616949749938); + let b = u64x2::new(4598988625246003058, 4600654731040688846); + let c = u64x2::new(4601892672002082676, 4603822465490492305); + let r = i64x2::new(4598264167668253799, 4600765330842720520); + + assert_eq!( + r, + transmute(lsx_vfnmsub_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrne_w_s() { + let a = u32x4::new(1031214064, 1059673230, 1042813024, 1053602874); + let r = i64x2::new(4294967296, 0); + + assert_eq!(r, transmute(lsx_vftintrne_w_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrne_l_d() { + let a = u64x2::new(4606989588359571497, 4604713245380178790); + let r = i64x2::new(1, 1); + + assert_eq!(r, transmute(lsx_vftintrne_l_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrp_w_s() { + let a = u32x4::new(1061716225, 1050491008, 1064711040, 1065018777); + let r = i64x2::new(4294967297, 4294967297); + + assert_eq!(r, transmute(lsx_vftintrp_w_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrp_l_d() { + let a = u64x2::new(4587516915944025472, 4601504548481216392); + let r = i64x2::new(1, 1); + + assert_eq!(r, transmute(lsx_vftintrp_l_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrm_w_s() { + let a = u32x4::new(1045772456, 1065200707, 1061587478, 1035467272); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrm_w_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrm_l_d() { + let a = u64x2::new(4597123259408216804, 4594399417822716772); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrm_l_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftint_w_d() { + let a = u64x2::new(4602226310642310974, 4598315153561102162); + let b = u64x2::new(4606905060326467647, 4606985586417166381); + let r = i64x2::new(4294967297, 0); + + assert_eq!(r, transmute(lsx_vftint_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vffint_s_l() { + let a = i64x2::new(-958368210120518642, 317739970300630807); + let b = i64x2::new(5814449889729512723, -111756032377486319); + let r = i64x2::new(-2610252963668467161, 6669016150524087533); + + assert_eq!(r, transmute(lsx_vffint_s_l(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrz_w_d() { + let a = u64x2::new(4588311497244995104, 4604793095801710714); + let b = u64x2::new(4599106720144900270, 4600531579473237336); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrz_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrp_w_d() { + let a = u64x2::new(4595926440353149184, 4601703964116560606); + let b = u64x2::new(4606104970322966899, 4595679410565085836); + let r = i64x2::new(4294967297, 4294967297); + + assert_eq!(r, transmute(lsx_vftintrp_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrm_w_d() { + let a = u64x2::new(4603847521361653326, 4600607722530696016); + let b = u64x2::new(4606733822200032543, 4589510164179968984); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrm_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrne_w_d() { + let a = u64x2::new(4601878512717779358, 4597694557130026508); + let b = u64x2::new(4599197176714081204, 4605745859931721980); + let r = i64x2::new(4294967296, 0); + + assert_eq!(r, transmute(lsx_vftintrne_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintl_l_s() { + let a = u32x4::new(1058856635, 1060563398, 1061422616, 1056124918); + let r = i64x2::new(1, 1); + + assert_eq!(r, transmute(lsx_vftintl_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftinth_l_s() { + let a = u32x4::new(1045383680, 1040752748, 1061879518, 1054801708); + let r = i64x2::new(1, 0); + + assert_eq!(r, transmute(lsx_vftinth_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vffinth_d_w() { + let a = i32x4::new(517100418, -188510766, 949226647, -87467194); + let r = i64x2::new(4741245898611228672, -4497729803343888384); + + assert_eq!(r, transmute(lsx_vffinth_d_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vffintl_d_w() { + let a = i32x4::new(1273684401, -2137528906, -2109294912, -1646387998); + let r = i64x2::new(4743129027571613696, -4476619782820462592); + + assert_eq!(r, transmute(lsx_vffintl_d_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrzl_l_s() { + let a = u32x4::new(1031186688, 987838976, 1034565688, 1061017371); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrzl_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrzh_l_s() { + let a = u32x4::new(1049433828, 1048953580, 1060964637, 1059899586); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrzh_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrpl_l_s() { + let a = u32x4::new(1061834803, 1064858941, 1060475110, 1063896216); + let r = i64x2::new(1, 1); + + assert_eq!(r, transmute(lsx_vftintrpl_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrph_l_s() { + let a = u32x4::new(1059691939, 1065187151, 1059017027, 1061117394); + let r = i64x2::new(1, 1); + + assert_eq!(r, transmute(lsx_vftintrph_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrml_l_s() { + let a = u32x4::new(1062985651, 1065211455, 1056421466, 1057373572); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrml_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrmh_l_s() { + let a = u32x4::new(1050224290, 1063763666, 1057677270, 1063622234); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vftintrmh_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrnel_l_s() { + let a = u32x4::new(1060174609, 1050974638, 1047193308, 1062040876); + let r = i64x2::new(1, 0); + + assert_eq!(r, transmute(lsx_vftintrnel_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vftintrneh_l_s() { + let a = u32x4::new(1055675382, 1036879184, 1064176794, 1063791852); + let r = i64x2::new(1, 1); + + assert_eq!(r, transmute(lsx_vftintrneh_l_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrintrne_s() { + let a = u32x4::new(1054667842, 1061395025, 1062986478, 1062529334); + let r = i64x2::new(4575657221408423936, 4575657222473777152); + + assert_eq!(r, transmute(lsx_vfrintrne_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrintrne_d() { + let a = u64x2::new(4603260356641870565, 4601614335120512898); + let r = i64x2::new(4607182418800017408, 0); + + assert_eq!(r, transmute(lsx_vfrintrne_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrintrz_s() { + let a = u32x4::new(1063039577, 1033416832, 1052369306, 1057885024); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfrintrz_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrintrz_d() { + let a = u64x2::new(4601515428088814484, 4604735152905786794); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfrintrz_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrintrp_s() { + let a = u32x4::new(1061968959, 1056597596, 1064869916, 1058742360); + let r = i64x2::new(4575657222473777152, 4575657222473777152); + + assert_eq!(r, transmute(lsx_vfrintrp_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrintrp_d() { + let a = u64x2::new(4603531792479663401, 4587997630530425392); + let r = i64x2::new(4607182418800017408, 4607182418800017408); + + assert_eq!(r, transmute(lsx_vfrintrp_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrintrm_s() { + let a = u32x4::new(1058024441, 1044087184, 1059777964, 1050835426); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfrintrm_s(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfrintrm_d() { + let a = u64x2::new(4589388034824743512, 4606800774570289382); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfrintrm_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vstelm_b() { + let a = i8x16::new( + -70, -74, -13, -53, -37, -28, -84, -8, 110, -98, -26, 71, 55, 104, -8, -50, + ); + let mut o: [i8; 16] = [ + 97, 16, 51, -123, 4, 14, 108, 36, -40, -53, 29, 67, 102, 63, -15, -39, + ]; + let r = i64x2::new(2624488095427530938, -2742340989646681128); + + lsx_vstelm_b::<0, 0>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vstelm_h() { + let a = i16x8::new(-7427, -5749, 19902, -9799, 28691, -16170, 11920, 24129); + let mut o: [i8; 16] = [ + 123, 19, -3, 118, -43, -40, -48, -81, 23, -114, -72, 26, 117, 98, -43, -112, + ]; + let r = i64x2::new(-5777879910580360821, -8010388107109560809); + + lsx_vstelm_h::<0, 1>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vstelm_w() { + let a = i32x4::new(424092909, 1956922334, -640221305, -164680666); + let mut o: [i8; 16] = [ + -12, -50, 8, 91, 60, -48, 94, -99, -64, -51, 3, -44, 7, -49, 62, -69, + ]; + let r = i64x2::new(-7107014201697162202, -4954294907532227136); + + lsx_vstelm_w::<0, 3>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vstelm_d() { + let a = i64x2::new(2628828971609511929, 9138529437562240974); + let mut o: [i8; 16] = [ + 48, -98, 127, -32, 90, 120, 50, 2, 90, 120, -113, 19, -120, 105, 27, -22, + ]; + let r = i64x2::new(2628828971609511929, -1577551211298588582); + + lsx_vstelm_d::<0, 0>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_d_w() { + let a = i32x4::new(-1889902301, 326462140, 1088579813, 626337726); + let b = i32x4::new(-2105551735, -1478351177, 1027048582, -607110700); + let r = i64x2::new(-3995454036, 2115628395); + + assert_eq!(r, transmute(lsx_vaddwev_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_w_h() { + let a = i16x8::new(7813, 337, -10949, -8624, 14298, -27002, -12747, 17169); + let b = i16x8::new(-17479, -32614, 24343, 25426, -14077, -12419, 10115, 23013); + let r = i64x2::new(57531086920254, -11304353922851); + + assert_eq!(r, transmute(lsx_vaddwev_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_h_b() { + let a = i8x16::new( + -122, -50, 126, -108, 72, 89, -50, -96, -37, -68, 63, -41, -1, -49, 90, 117, + ); + let b = i8x16::new( + -89, 6, -27, 58, 80, -29, 28, 104, 30, 69, -39, 76, 42, 34, 25, -24, + ); + let r = i64x2::new(-6191796646052051, 32369798417022969); + + assert_eq!(r, transmute(lsx_vaddwev_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_d_w() { + let a = i32x4::new(-1721333318, -347227654, -936088440, 1975890670); + let b = i32x4::new(420515981, 473447119, 1471756335, 1044924117); + let r = i64x2::new(126219465, 3020814787); + + assert_eq!(r, transmute(lsx_vaddwod_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_w_h() { + let a = i16x8::new(13058, 5020, 31112, -31710, 19542, -9009, -21764, -1881); + let b = i16x8::new(-26581, -22301, 18214, -3616, -24489, 12150, -10765, -24232); + let r = i64x2::new(-151719719748481, -112154480997307); + + assert_eq!(r, transmute(lsx_vaddwod_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_h_b() { + let a = i8x16::new( + -53, 61, 10, -18, -31, 26, 113, -14, -62, 6, 127, -43, 86, 33, 94, 57, + ); + let b = i8x16::new( + 37, 85, -14, -93, 61, -116, -53, -51, -46, 119, 36, -94, 0, -86, 46, -6, + ); + let r = i64x2::new(-18014780768845678, 14636475441676413); + + assert_eq!(r, transmute(lsx_vaddwod_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_d_wu() { + let a = u32x4::new(2539947230, 3548211150, 1193982195, 3547334418); + let b = u32x4::new(1482213353, 1001198416, 3345983326, 2244256337); + let r = i64x2::new(4022160583, 4539965521); + + assert_eq!(r, transmute(lsx_vaddwev_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_w_hu() { + let a = u16x8::new(50844, 55931, 31330, 63416, 32884, 2778, 22874, 13540); + let b = u16x8::new(28483, 24704, 9817, 62062, 47674, 8032, 29897, 62737); + let r = i64x2::new(176725019407839, 226649719257774); + + assert_eq!(r, transmute(lsx_vaddwev_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_h_bu() { + let a = u8x16::new( + 233, 165, 29, 130, 62, 173, 207, 120, 32, 254, 152, 27, 30, 159, 92, 76, + ); + let b = u8x16::new( + 118, 157, 181, 79, 81, 38, 95, 73, 245, 179, 126, 210, 16, 93, 78, 63, + ); + let r = i64x2::new(85006057160704351, 47850943627526421); + + assert_eq!(r, transmute(lsx_vaddwev_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_d_wu() { + let a = u32x4::new(342250989, 1651153980, 174227274, 2092816321); + let b = u32x4::new(2782520439, 2496077290, 2678772394, 196273109); + let r = i64x2::new(4147231270, 2289089430); + + assert_eq!(r, transmute(lsx_vaddwod_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_w_hu() { + let a = u16x8::new(36372, 35690, 49187, 14265, 54130, 40094, 57017, 10670); + let b = u16x8::new(20353, 34039, 21222, 4948, 58293, 4766, 51360, 37497); + let r = i64x2::new(82519206727777, 206875689791292); + + assert_eq!(r, transmute(lsx_vaddwod_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_h_bu() { + let a = u8x16::new( + 248, 1, 83, 240, 60, 173, 151, 39, 55, 39, 131, 86, 86, 18, 5, 110, + ); + let b = u8x16::new( + 63, 52, 164, 249, 242, 167, 236, 222, 171, 180, 249, 57, 79, 53, 87, 7, + ); + let r = i64x2::new(73466429242409013, 32932877227196635); + + assert_eq!(r, transmute(lsx_vaddwod_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_d_wu_w() { + let a = u32x4::new(3787058271, 4254502892, 1291509641, 2971162106); + let b = i32x4::new(-1308530150, 1427930358, 1723198474, 1987356336); + let r = i64x2::new(2478528121, 3014708115); + + assert_eq!(r, transmute(lsx_vaddwev_d_wu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_w_hu_h() { + let a = u16x8::new(7742, 2564, 7506, 3394, 6835, 41043, 29153, 7959); + let b = i16x8::new(-11621, -6593, 7431, -1189, -12361, -15174, 16182, -32434); + let r = i64x2::new(64158221463769, 194716637325930); + + assert_eq!(r, transmute(lsx_vaddwev_w_hu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_h_bu_b() { + let a = u8x16::new( + 103, 224, 71, 251, 48, 94, 188, 16, 181, 57, 192, 250, 248, 36, 51, 176, + ); + let b = i8x16::new( + 36, -32, 108, -95, -21, 20, 67, -107, -65, -124, -19, -50, -120, -36, -79, -12, + ); + let r = i64x2::new(71776235037065355, -7880749580746636); + + assert_eq!(r, transmute(lsx_vaddwev_h_bu_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_d_wu_w() { + let a = u32x4::new(3763905902, 2910980290, 1912906409, 2257280339); + let b = i32x4::new(-1646368557, 586112311, 376247963, 1048800083); + let r = i64x2::new(3497092601, 3306080422); + + assert_eq!(r, transmute(lsx_vaddwod_d_wu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_w_hu_h() { + let a = u16x8::new(53495, 36399, 39536, 12468, 17601, 52919, 14730, 58963); + let b = i16x8::new(31700, 22725, 14068, -14860, -28839, -14513, -1195, 27082); + let r = i64x2::new(-10273561712908, 369560461022726); + + assert_eq!(r, transmute(lsx_vaddwod_w_hu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_h_bu_b() { + let a = u8x16::new( + 191, 183, 244, 200, 83, 191, 111, 82, 210, 150, 228, 182, 45, 23, 145, 159, + ); + let b = i8x16::new( + -34, -59, -104, -58, -78, 90, -117, 93, 76, -23, 37, 44, -62, 60, 119, -91, + ); + let r = i64x2::new(49259327819481212, 19140654913421439); + + assert_eq!(r, transmute(lsx_vaddwod_h_bu_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwev_d_w() { + let a = i32x4::new(1979919903, -1490022083, -1106776488, 2132235386); + let b = i32x4::new(-2090701374, 629564229, -1170676885, 1069800209); + let r = i64x2::new(4070621277, 63900397); + + assert_eq!(r, transmute(lsx_vsubwev_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwev_w_h() { + let a = i16x8::new(1153, -17319, 23560, 30758, -11540, -15757, -5844, -31417); + let b = i16x8::new(-23957, 9416, -29569, -13210, 5333, 8420, 18648, -24201); + let r = i64x2::new(228187317494294, -105188044063209); + + assert_eq!(r, transmute(lsx_vsubwev_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwev_h_b() { + let a = i8x16::new( + 123, 120, -48, 33, 4, -108, -68, -59, 54, 30, 17, -104, -30, -76, -127, -108, + ); + let b = i8x16::new( + -16, 108, -113, 37, -118, 72, 81, 103, 63, -86, -109, -71, -29, 83, -75, 97, + ); + let r = i64x2::new(-41939247539617653, -14355228098887689); + + assert_eq!(r, transmute(lsx_vsubwev_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwod_d_w() { + let a = i32x4::new(-1024625027, -1083407596, 1367079411, 1458097720); + let b = i32x4::new(1436617964, -45524609, 502994793, -2039550077); + let r = i64x2::new(-1037882987, 3497647797); + + assert_eq!(r, transmute(lsx_vsubwod_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwod_w_h() { + let a = i16x8::new(-15137, 29913, 8889, -17237, 31133, 28017, 9070, -18477); + let b = i16x8::new(-1276, 12669, 24115, 19617, -26739, 1910, -757, 23994); + let r = i64x2::new(-158286724709540, -182411556002309); + + assert_eq!(r, transmute(lsx_vsubwod_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwod_h_b() { + let a = i8x16::new( + -25, -19, -117, -1, 9, 24, -16, 93, 9, -77, -36, 75, 0, 126, 74, -106, + ); + let b = i8x16::new( + -91, -3, -112, 5, -88, -14, -1, 8, -100, 65, -26, -24, 41, 124, 17, -108, + ); + let r = i64x2::new(23925540523802608, 562958549909362); + + assert_eq!(r, transmute(lsx_vsubwod_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwev_d_wu() { + let a = u32x4::new(2665672710, 2360377198, 3032815602, 1049776563); + let b = u32x4::new(1691253880, 1939268473, 1629937431, 2921768539); + let r = i64x2::new(974418830, 1402878171); + + assert_eq!(r, transmute(lsx_vsubwev_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwev_w_hu() { + let a = u16x8::new(8298, 25954, 33403, 10264, 36066, 64035, 18750, 26396); + let b = u16x8::new(15957, 42770, 43138, 30319, 50823, 18089, 64120, 18054); + let r = i64x2::new(-41807211666923, -194858371266981); + + assert_eq!(r, transmute(lsx_vsubwev_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwev_h_bu() { + let a = u8x16::new( + 128, 1, 20, 37, 75, 38, 156, 224, 7, 26, 190, 76, 144, 59, 175, 99, + ); + let b = u8x16::new( + 141, 113, 141, 61, 31, 32, 161, 158, 220, 37, 240, 180, 56, 229, 5, 26, + ); + let r = i64x2::new(-1407181617889293, 47851128289689387); + + assert_eq!(r, transmute(lsx_vsubwev_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwod_d_wu() { + let a = u32x4::new(623751944, 3506098576, 826539449, 2248804942); + let b = u32x4::new(103354715, 19070238, 1662532733, 3761231766); + let r = i64x2::new(3487028338, -1512426824); + + assert_eq!(r, transmute(lsx_vsubwod_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwod_w_hu() { + let a = u16x8::new(2891, 21215, 21876, 42023, 37208, 16456, 2023, 54703); + let b = u16x8::new(21739, 45406, 21733, 63910, 6659, 16020, 1211, 637); + let r = i64x2::new(-93999654264447, 232211701825972); + + assert_eq!(r, transmute(lsx_vsubwod_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwod_h_bu() { + let a = u8x16::new( + 6, 39, 26, 92, 204, 140, 65, 76, 214, 200, 24, 203, 215, 17, 22, 226, + ); + let b = u8x16::new( + 89, 14, 101, 173, 231, 124, 106, 127, 125, 115, 109, 27, 121, 175, 229, 175, + ); + let r = i64x2::new(-14355150803107815, 14636020195655765); + + assert_eq!(r, transmute(lsx_vsubwod_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_q_d() { + let a = i64x2::new(-1132117278547342347, -8844779319945501636); + let b = i64x2::new(6738886902337351868, -5985538541381931477); + let r = i64x2::new(5606769623790009521, 0); + + assert_eq!(r, transmute(lsx_vaddwev_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_q_d() { + let a = i64x2::new(-8159683400941020659, -1142783567808544783); + let b = i64x2::new(-1244049724346527963, -3275029038845457041); + let r = i64x2::new(-4417812606654001824, -1); + + assert_eq!(r, transmute(lsx_vaddwod_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_q_du() { + let a = u64x2::new(16775220860485391359, 8922486068170257729); + let b = u64x2::new(6745766838534849346, 15041258018068294402); + let r = i64x2::new(5074243625310689089, 1); + + assert_eq!(r, transmute(lsx_vaddwev_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_q_du() { + let a = u64x2::new(17311013772674153390, 11698682577513574290); + let b = u64x2::new(13496765248439164553, 4640846570780442359); + let r = i64x2::new(-2107214925415534967, 0); + + assert_eq!(r, transmute(lsx_vaddwod_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwev_q_d() { + let a = i64x2::new(8509296067394123199, 4972040966127046151); + let b = i64x2::new(8029026411722387723, -2105201823388787841); + let r = i64x2::new(480269655671735476, 0); + + assert_eq!(r, transmute(lsx_vsubwev_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwod_q_d() { + let a = i64x2::new(-5518792681032609552, -5818770921355494107); + let b = i64x2::new(5758437127240728961, 2933507971643343184); + let r = i64x2::new(-8752278892998837291, -1); + + assert_eq!(r, transmute(lsx_vsubwod_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwev_q_du() { + let a = u64x2::new(15348090063574162992, 4054607174208637377); + let b = u64x2::new(1574118313456291324, 7787456577305510529); + let r = i64x2::new(-4672772323591679948, 0); + + assert_eq!(r, transmute(lsx_vsubwev_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsubwod_q_du() { + let a = u64x2::new(7199085452795040192, 586057639195920839); + let b = u64x2::new(5627376085113520030, 12775637764770549815); + let r = i64x2::new(6257163948134922640, -1); + + assert_eq!(r, transmute(lsx_vsubwod_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwev_q_du_d() { + let a = u64x2::new(11103722789624608070, 8912888508651245205); + let b = i64x2::new(-1159499132550683978, -4257322329662100669); + let r = i64x2::new(-8502520416635627524, 0); + + assert_eq!(r, transmute(lsx_vaddwev_q_du_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vaddwod_q_du_d() { + let a = u64x2::new(8904095231861536434, 126069624822744729); + let b = i64x2::new(-3902573037873546881, 160140233311333524); + let r = i64x2::new(286209858134078253, 0); + + assert_eq!(r, transmute(lsx_vaddwod_q_du_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_d_w() { + let a = i32x4::new(1287102156, 1220933948, 1816088643, -266313269); + let b = i32x4::new(8741677, -276509855, -1214560052, -1338519080); + let r = i64x2::new(11251431313755612, -2205748716678689436); + + assert_eq!(r, transmute(lsx_vmulwev_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_w_h() { + let a = i16x8::new(6427, -15587, -29266, -12748, 29941, -16072, -3936, -4131); + let b = i16x8::new(30661, -20472, 1422, -16868, 4256, 9713, -27765, -7287); + let r = i64x2::new(-178740441125036345, 469367082934888736); + + assert_eq!(r, transmute(lsx_vmulwev_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_h_b() { + let a = i8x16::new( + -53, -116, -37, -91, -27, -23, 3, -103, -83, 88, 61, -1, 37, 89, -77, -78, + ); + let b = i8x16::new( + 102, -8, -8, -115, -104, 126, 46, 69, -53, 81, -41, 100, -83, -42, -38, -17, + ); + let r = i64x2::new(38855607073696482, 823864071118590255); + + assert_eq!(r, transmute(lsx_vmulwev_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_d_w() { + let a = i32x4::new(730217708, -1124949962, -360746398, -1749502167); + let b = i32x4::new(63312847, -1377579771, -2054819244, -1416520586); + let r = i64x2::new(1549708311038418702, 2478205834807109862); + + assert_eq!(r, transmute(lsx_vmulwod_d_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_w_h() { + let a = i16x8::new(-16507, -11588, -4739, -32549, -22878, 5561, -6134, -3022); + let b = i16x8::new(23748, 11912, 4946, -23048, 22372, 24702, -24875, -27771); + let r = i64x2::new(3222038736804363232, 360450672278114574); + + assert_eq!(r, transmute(lsx_vmulwod_w_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_h_b() { + let a = i8x16::new( + -110, 22, -19, -91, 6, 25, -7, 13, 86, -110, -98, -100, -18, -111, 100, 31, + ); + let b = i8x16::new( + 102, 16, -43, -24, -28, 2, 5, -96, 26, 74, -56, 109, -30, 40, -96, 109, + ); + let r = i64x2::new(-351280556043402912, 951366355207905332); + + assert_eq!(r, transmute(lsx_vmulwod_h_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_d_wu() { + let a = u32x4::new(2063305123, 761682812, 3318081558, 2848424479); + let b = u32x4::new(1769900227, 2256955703, 2342391995, 2407560006); + let r = i64x2::new(3651844205567962921, 7772247680216328210); + + assert_eq!(r, transmute(lsx_vmulwev_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_w_hu() { + let a = u16x8::new(9553, 49381, 46053, 13610, 17063, 24513, 41196, 11695); + let b = u16x8::new(20499, 45056, 20580, 12771, 53914, 60742, 45402, 40547); + let r = i64x2::new(4070644332601545987, 8033224333626513014); + + assert_eq!(r, transmute(lsx_vmulwev_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_h_bu() { + let a = u8x16::new( + 227, 157, 43, 90, 6, 141, 46, 1, 92, 129, 254, 35, 161, 83, 40, 101, + ); + let b = u8x16::new( + 111, 233, 206, 13, 205, 128, 21, 105, 114, 77, 138, 243, 4, 51, 173, 180, + ); + let r = i64x2::new(271910110892810861, 1947809607093856504); + + assert_eq!(r, transmute(lsx_vmulwev_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_d_wu() { + let a = u32x4::new(2178610550, 1983075871, 1118106927, 2182535205); + let b = u32x4::new(3750239707, 1422851626, 1277923597, 1377279439); + let r = i64x2::new(2821622727533716246, 3005960862740149995); + + assert_eq!(r, transmute(lsx_vmulwod_d_wu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_w_hu() { + let a = u16x8::new(63169, 54563, 40593, 32351, 22785, 46152, 51840, 54366); + let b = u16x8::new(38950, 5357, 36233, 17707, 61077, 61518, 5789, 13317); + let r = i64x2::new(2460325445475503463, 3109522059894091248); + + assert_eq!(r, transmute(lsx_vmulwod_w_hu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_h_bu() { + let a = u8x16::new( + 143, 18, 19, 120, 134, 160, 86, 206, 25, 26, 241, 198, 207, 50, 233, 169, + ); + let b = u8x16::new( + 244, 115, 210, 167, 103, 242, 182, 127, 214, 208, 47, 86, 54, 81, 161, 139, + ); + let r = i64x2::new(7364114643151226902, 6612146073643521312); + + assert_eq!(r, transmute(lsx_vmulwod_h_bu(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_d_wu_w() { + let a = u32x4::new(1829687775, 3948847254, 3506011389, 2834786083); + let b = i32x4::new(1254729285, 1938836163, -1902169358, -257980375); + let r = i64x2::new(2295762833698990875, -6669027432954818262); + + assert_eq!(r, transmute(lsx_vmulwev_d_wu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_w_hu_h() { + let a = u16x8::new(50708, 48173, 47753, 19808, 25837, 56376, 50749, 8070); + let b = i16x8::new(-30477, -10049, 16428, -30668, 21000, 24834, -3219, -9555); + let r = i64x2::new(3369342936690107644, -701630285043265176); + + assert_eq!(r, transmute(lsx_vmulwev_w_hu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_h_bu_b() { + let a = u8x16::new( + 196, 15, 88, 70, 49, 17, 144, 62, 34, 164, 51, 69, 162, 88, 100, 31, + ); + let b = i8x16::new( + -92, 119, 90, -113, -83, 119, -28, -14, 57, 93, -21, -38, 42, -105, -67, -73, + ); + let r = i64x2::new(-1134643098233554544, -1885853116779133038); + + assert_eq!(r, transmute(lsx_vmulwev_h_bu_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_d_wu_w() { + let a = u32x4::new(3252247725, 3029105766, 3286505645, 1763684728); + let b = i32x4::new(1204047391, -1970001586, 608763444, -2082771896); + let r = i64x2::new(-5967343163181744876, -3673352984882804288); + + assert_eq!(r, transmute(lsx_vmulwod_d_wu_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_w_hu_h() { + let a = u16x8::new(38405, 41959, 20449, 33265, 58814, 59003, 64929, 20835); + let b = i16x8::new(-3735, -12972, -4920, 7170, 11577, 9785, 4896, -537); + let r = i64x2::new(1024392868267999948, -48053790042385565); + + assert_eq!(r, transmute(lsx_vmulwod_w_hu_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_h_bu_b() { + let a = u8x16::new( + 78, 246, 141, 207, 212, 16, 30, 141, 71, 187, 92, 123, 199, 224, 105, 250, + ); + let b = i8x16::new( + 46, 11, 86, 64, -118, -53, 125, 48, -122, 104, 53, -111, 39, 16, -94, -56, + ); + let r = i64x2::new(1905300476090387090, -3940634277386171400); + + assert_eq!(r, transmute(lsx_vmulwod_h_bu_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_q_d() { + let a = i64x2::new(-7300892474466935547, -2126323416087979991); + let b = i64x2::new(7023560313675997328, 4368639658790376608); + let r = i64x2::new(-1409563343912029488, -2779799970834089134); + + assert_eq!(r, transmute(lsx_vmulwev_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_q_d() { + let a = i64x2::new(-333821925237206080, -2872872657001472243); + let b = i64x2::new(1734538850547798281, 6505001633960390309); + let r = i64x2::new(655114704133495137, -1013080750363369114); + + assert_eq!(r, transmute(lsx_vmulwod_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_q_du() { + let a = u64x2::new(7574912843445409775, 6458810692359816933); + let b = u64x2::new(15048173707940873365, 13594773395779002998); + let r = i64x2::new(-4049323972691826149, 6179334620527225413); + + assert_eq!(r, transmute(lsx_vmulwev_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_q_du() { + let a = u64x2::new(4945250618288414185, 5836523005600515765); + let b = u64x2::new(16172423495582959833, 11676106279348566952); + let r = i64x2::new(-66293137947075128, 3694303051148166412); + + assert_eq!(r, transmute(lsx_vmulwod_q_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwev_q_du_d() { + let a = u64x2::new(15472635927451755137, 2872062649560660647); + let b = i64x2::new(-7071166739782294817, 8496829998090419991); + let r = i64x2::new(5234431817964974175, -5931105679667820544); + + assert_eq!(r, transmute(lsx_vmulwev_q_du_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmulwod_q_du_d() { + let a = u64x2::new(2980498025260165803, 6347157252532266677); + let b = i64x2::new(-9085162554263782091, -3351642387065053502); + let r = i64x2::new(-3119502026085414102, -1153233394465180223); + + assert_eq!(r, transmute(lsx_vmulwod_q_du_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhaddw_q_d() { + let a = i64x2::new(-7668184096931639781, -2784020394780249366); + let b = i64x2::new(9222966760421493517, -8347454331188625422); + let r = i64x2::new(6438946365641244151, 0); + + assert_eq!(r, transmute(lsx_vhaddw_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhaddw_qu_du() { + let a = u64x2::new(16989728354409608690, 2941626047560944845); + let b = u64x2::new(2141387370256045519, 12417156199252644485); + let r = i64x2::new(5083013417816990364, 0); + + assert_eq!(r, transmute(lsx_vhaddw_qu_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhsubw_q_d() { + let a = i64x2::new(4415650624918824808, -2427685530964051137); + let b = i64x2::new(-3245503809142406078, 8660213762027125085); + let r = i64x2::new(817818278178354941, 0); + + assert_eq!(r, transmute(lsx_vhsubw_q_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vhsubw_qu_du() { + let a = u64x2::new(13300663635362906510, 12554343611316218179); + let b = u64x2::new(3098179646743711521, 11374525358855478565); + let r = i64x2::new(-8990580109137044958, 0); + + assert_eq!(r, transmute(lsx_vhsubw_qu_du(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_d_w() { + let a = i64x2::new(7507491558224723369, 7356288879446926343); + let b = i32x4::new(-1410295112, 176083487, 1092174685, 1464381516); + let c = i32x4::new(1610457028, -1105361927, -790658106, -1804307944); + let r = i64x2::new(5236271883550276233, 6492752111583679733); + + assert_eq!( + r, + transmute(lsx_vmaddwev_d_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_w_h() { + let a = i32x4::new(1210747897, 1541928975, -720014144, -2019635451); + let b = i16x8::new(12181, 16380, -24682, -13729, 12128, -21312, -23449, 17); + let c = i16x8::new(-27087, 21294, 30093, 5456, 28491, -25365, -18595, 14478); + let r = i64x2::new(3432424257664054654, -6801515772302723616); + + assert_eq!( + r, + transmute(lsx_vmaddwev_w_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_h_b() { + let a = i16x8::new(-26961, 27058, -26746, 7019, 27143, -20720, 20159, -22095); + let b = i8x16::new( + 126, 29, -29, 63, -17, 109, 56, 67, 91, -76, 83, -101, 51, 39, -109, 16, + ); + let c = i8x16::new( + -40, -36, -53, -47, -78, 33, -97, -54, 21, 103, 69, 101, 33, -83, 79, -6, + ); + let r = i64x2::new(446873086821892863, -8642876820889308802); + + assert_eq!( + r, + transmute(lsx_vmaddwev_h_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_d_wu() { + let a = u64x2::new(3288783601225499701, 17730813816531737481); + let b = u32x4::new(2583154680, 1751994654, 1115446691, 3761972534); + let c = u32x4::new(1143913546, 2487138808, 577997991, 917071165); + let r = i64x2::new(6243689231090794981, -71204310712216354); + + assert_eq!( + r, + transmute(lsx_vmaddwev_d_wu(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_w_hu() { + let a = u32x4::new(805734379, 3876931235, 2135371653, 3482539797); + let b = u16x8::new(7507, 65354, 30738, 63434, 34178, 38533, 8774, 9013); + let c = u16x8::new(32752, 10153, 5275, 7485, 55213, 62803, 43040, 42218); + let r = i64x2::new(-1099052541965094213, -1867428321461954977); + + assert_eq!( + r, + transmute(lsx_vmaddwev_w_hu(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_h_bu() { + let a = u16x8::new(55814, 6276, 42400, 55862, 19175, 17360, 30132, 17253); + let b = u8x16::new( + 148, 50, 79, 199, 193, 25, 144, 93, 18, 182, 102, 150, 226, 222, 254, 1, + ); + let c = u8x16::new( + 141, 28, 169, 93, 60, 134, 117, 80, 43, 12, 75, 85, 174, 176, 62, 94, + ); + let r = i64x2::new(2019533326543170442, -9157771529370317331); + + assert_eq!( + r, + transmute(lsx_vmaddwev_h_bu(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_d_w() { + let a = i64x2::new(1296033816549937177, -2404834118264545479); + let b = i32x4::new(-2135765262, -1741194198, -1750008434, -242816495); + let c = i32x4::new(178412146, 887047455, -1630315539, 57253350); + let r = i64x2::new(-248488065446728913, -2418736176038553729); + + assert_eq!( + r, + transmute(lsx_vmaddwod_d_w(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_w_h() { + let a = i32x4::new(1810262555, -720984423, 744322940, -172229387); + let b = i16x8::new(27584, -15468, -21544, -11891, -16682, 18538, -7573, -1522); + let c = i16x8::new(-8815, 3268, -32219, -7020, 13853, 26700, -2030, -5667); + let r = i64x2::new(-2738082894011230357, -702674743083530508); + + assert_eq!( + r, + transmute(lsx_vmaddwod_w_h(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_h_b() { + let a = i16x8::new(32731, -16929, 397, 14417, 22494, 1416, 1669, -12175); + let b = i8x16::new( + 87, 77, -44, -128, -69, 120, 82, -99, -21, 66, -47, -59, -35, 90, -85, 94, + ); + let c = i8x16::new( + 87, -119, -48, 10, 26, -36, 89, -16, 91, -74, -116, 7, 78, 17, -9, -98, + ); + let r = i64x2::new(4504145731268860944, -6019891587244669750); + + assert_eq!( + r, + transmute(lsx_vmaddwod_h_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_d_wu() { + let a = u64x2::new(8272899369384595612, 11592257149528470828); + let b = u32x4::new(244745450, 2190106289, 660562971, 1842569843); + let c = u32x4::new(388973541, 2963125445, 520938623, 340863345); + let r = i64x2::new(-3684285032134532399, -6226422404099975953); + + assert_eq!( + r, + transmute(lsx_vmaddwod_d_wu(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_w_hu() { + let a = u32x4::new(2163417444, 940670316, 624242075, 3716350419); + let b = u16x8::new(10149, 33560, 21613, 61563, 14556, 33558, 30440, 63972); + let c = u16x8::new(9862, 40610, 42783, 2223, 62194, 15996, 61261, 33667); + let r = i64x2::new(4627934059328104084, 6765125168025305155); + + assert_eq!( + r, + transmute(lsx_vmaddwod_w_hu(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_h_bu() { + let a = u16x8::new(17882, 7508, 14715, 47175, 62895, 51393, 34943, 20707); + let b = u8x16::new( + 83, 27, 56, 178, 210, 166, 36, 48, 144, 156, 209, 6, 181, 65, 232, 42, + ); + let c = u8x16::new( + 127, 23, 147, 75, 137, 205, 146, 169, 72, 89, 154, 45, 185, 229, 28, 217, + ); + let r = i64x2::new(-2884627676759701433, 8394079293504695275); + + assert_eq!( + r, + transmute(lsx_vmaddwod_h_bu(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_d_wu_w() { + let a = i64x2::new(-6323015107493705206, -3277448760143472563); + let b = u32x4::new(2331684563, 1941329953, 2983229925, 1155461882); + let c = i32x4::new(-1110134113, -106291268, -391880820, 644991581); + let r = i64x2::new(-8911497681635502825, -4446519349401011063); + + assert_eq!( + r, + transmute(lsx_vmaddwev_d_wu_w( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_w_hu_h() { + let a = i32x4::new(1713941452, 1545069267, -1096163566, -573017556); + let b = u16x8::new(28055, 23297, 30225, 2761, 48193, 19269, 2518, 51038); + let c = i16x8::new(-7715, -18819, -4701, -3778, 7207, 5810, -4430, -8060); + let r = i64x2::new(6025759841279147559, -2509000903003100935); + + assert_eq!( + r, + transmute(lsx_vmaddwev_w_hu_h( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_h_bu_b() { + let a = i16x8::new(27922, 26192, 14273, -18511, -13090, 27036, 4607, 27830); + let b = u8x16::new( + 85, 234, 241, 30, 218, 135, 230, 175, 34, 217, 231, 43, 159, 81, 198, 89, + ); + let c = i8x16::new( + 82, -91, 49, -114, 60, -32, -30, 17, 3, 82, -73, -55, -31, -106, -23, -44, + ); + let r = i64x2::new(-7152443150463563700, 6551891650581220676); + + assert_eq!( + r, + transmute(lsx_vmaddwev_h_bu_b( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_d_wu_w() { + let a = i64x2::new(4995790344325484125, -3678161850757174337); + let b = u32x4::new(770268311, 2190608617, 3264567056, 3912406971); + let c = i32x4::new(1039193627, -382136981, 178615845, -2029105420); + let r = i64x2::new(4158677780872518848, 6829896032850494459); + + assert_eq!( + r, + transmute(lsx_vmaddwod_d_wu_w( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_w_hu_h() { + let a = i32x4::new(-1650648862, 112052630, 369411463, -1789144688); + let b = u16x8::new(33326, 2589, 54571, 14483, 51494, 10946, 54991, 11715); + let c = i16x8::new(-13502, 9856, -7830, -1915, 23659, -23776, -29716, 15794); + let r = i64x2::new(362141702219265378, -6889634254326488121); + + assert_eq!( + r, + transmute(lsx_vmaddwod_w_hu_h( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_h_bu_b() { + let a = i16x8::new(16717, -21485, 6612, -8821, -31304, -13638, -10878, -27550); + let b = u8x16::new( + 99, 203, 114, 187, 131, 179, 178, 24, 220, 126, 23, 139, 118, 148, 39, 18, + ); + let c = i8x16::new( + 99, -47, 53, -116, 110, -65, -107, 123, -42, -51, -120, -102, 51, -56, -103, -58, + ); + let r = i64x2::new(-1651716735493530616, -8048296323958936418); + + assert_eq!( + r, + transmute(lsx_vmaddwod_h_bu_b( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_q_d() { + let a = i64x2::new(-6837031335752177395, -6960992767212208666); + let b = i64x2::new(-4435069404701670756, -2126315287755608563); + let c = i64x2::new(-5551390506600609458, -6711686916497928751); + let r = i64x2::new(-8173734519403794283, -5626296406109360320); + + assert_eq!( + r, + transmute(lsx_vmaddwev_q_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_q_d() { + let a = i64x2::new(-1677869231369184389, 8708214911109206592); + let b = i64x2::new(-7813673205639863330, -9004405202552727709); + let c = i64x2::new(989988865428690976, 7138926957150547746); + let r = i64x2::new(-1125748635129453663, 5223492036614230927); + + assert_eq!( + r, + transmute(lsx_vmaddwod_q_d(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_q_du() { + let a = u64x2::new(17268971871627349752, 17228948998305822956); + let b = u64x2::new(10411505101371540933, 14258056959108407269); + let c = u64x2::new(10083084353835617951, 7442290876599468511); + let r = i64x2::new(4362805751568378451, 4473186691787239539); + + assert_eq!( + r, + transmute(lsx_vmaddwev_q_du(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_q_du() { + let a = u64x2::new(14967144687255063091, 6224733010665264496); + let b = u64x2::new(17625137945884588260, 1535023950244313744); + let c = u64x2::new(1841326774698258895, 9587959489663720036); + let r = i64x2::new(1938476888214276723, 7022583698667268618); + + assert_eq!( + r, + transmute(lsx_vmaddwod_q_du(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwev_q_du_d() { + let a = i64x2::new(7413074575332965326, -6131981171876880542); + let b = u64x2::new(7027881729907986450, 9385132453710384328); + let c = i64x2::new(6154882990643114022, 8692307970783152636); + let r = i64x2::new(-8494196038584058246, -3787080112545186901); + + assert_eq!( + r, + transmute(lsx_vmaddwev_q_du_d( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmaddwod_q_du_d() { + let a = i64x2::new(-3567580028466810679, 82284695558926958); + let b = u64x2::new(12724355976909764846, 2153966982409398933); + let c = i64x2::new(-2209580291901273167, -3993952038101553236); + let r = i64x2::new(-613602630799693851, -384076239737958818); + + assert_eq!( + r, + transmute(lsx_vmaddwod_q_du_d( + transmute(a), + transmute(b), + transmute(c) + )) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrotr_b() { + let a = i8x16::new( + -115, -5, 112, 87, -91, -10, -42, -109, -71, 30, 80, 109, -37, -36, -82, -61, + ); + let b = i8x16::new( + 98, 80, -27, -51, -44, -43, 28, -49, -47, 12, -100, -113, 35, -85, 9, 23, + ); + let r = i64x2::new(2841128540244802403, -8694309599374351908); + + assert_eq!(r, transmute(lsx_vrotr_b(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrotr_h() { + let a = i16x8::new(29688, -22641, 11287, 9743, 29744, -9683, -24918, 28489); + let b = i16x8::new(-6485, 1418, 8263, -29872, -6491, 3930, -20621, 32531); + let r = i64x2::new(2742461657407651598, 3308267577913279393); + + assert_eq!(r, transmute(lsx_vrotr_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrotr_w() { + let a = i32x4::new(-232185187, -1057829624, -1428233439, 314333357); + let b = i32x4::new(1956224189, -1858012941, -1889446514, -2130978943); + let r = i64x2::new(6458469860191573231, -8548346292466177157); + + assert_eq!(r, transmute(lsx_vrotr_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrotr_d() { + let a = i64x2::new(-8694664621869506061, 3293016169868759706); + let b = i64x2::new(4553458262651691654, -5062393334123159235); + let r = i64x2::new(-3594618648537251961, 7897385285240526033); + + assert_eq!(r, transmute(lsx_vrotr_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vadd_q() { + let a = i64x2::new(2423569640801257553, 678073579687698205); + let b = i64x2::new(114135477458514099, 3481307531297359399); + let r = i64x2::new(2537705118259771652, 4159381110985057604); + + assert_eq!(r, transmute(lsx_vadd_q(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsub_q() { + let a = i64x2::new(7892977690518598837, -3112927447911510492); + let b = i64x2::new(-8526086848853095438, -1323481969747305966); + let r = i64x2::new(-2027679534337857341, -1789445478164204527); + + assert_eq!(r, transmute(lsx_vsub_q(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vldrepl_b() { + let a: [i8; 16] = [ + -88, 52, -104, -111, 84, -101, -36, 49, 31, 10, 34, -78, 22, 22, 118, 80, + ]; + let r = i64x2::new(-6293595036912670552, -6293595036912670552); + + assert_eq!(r, transmute(lsx_vldrepl_b::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vldrepl_h() { + let a: [i8; 16] = [ + 29, 81, 114, -8, 70, 29, 100, 46, 105, 38, -10, -58, 2, 66, -104, -43, + ]; + let r = i64x2::new(5844917077753549085, 5844917077753549085); + + assert_eq!(r, transmute(lsx_vldrepl_h::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vldrepl_w() { + let a: [i8; 16] = [ + -56, -83, -27, -88, 85, -105, 81, -74, 124, -76, -29, 34, 99, 36, 36, 37, + ]; + let r = i64x2::new(-6276419428332229176, -6276419428332229176); + + assert_eq!(r, transmute(lsx_vldrepl_w::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vldrepl_d() { + let a: [i8; 16] = [ + 90, -84, 7, 91, -2, 32, 74, 2, -4, 119, 62, 98, -112, -127, -109, 101, + ]; + let r = i64x2::new(164980613173455962, 164980613173455962); + + assert_eq!(r, transmute(lsx_vldrepl_d::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmskgez_b() { + let a = i8x16::new( + -121, 102, -85, -2, -103, 100, 119, -46, 35, -16, -66, -43, -61, 79, 40, -43, + ); + let r = i64x2::new(24930, 0); + + assert_eq!(r, transmute(lsx_vmskgez_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vmsknz_b() { + let a = i8x16::new( + -25, 93, 124, 56, -119, -93, -123, 118, -27, 16, -22, 58, -59, 69, 63, -66, + ); + let r = i64x2::new(65535, 0); + + assert_eq!(r, transmute(lsx_vmsknz_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vexth_h_b() { + let a = i8x16::new( + -86, 119, 29, -97, -55, -30, 39, -102, 85, 73, 20, -12, -94, 53, 30, 114, + ); + let r = i64x2::new(-3377613816397739, 32088276197572514); + + assert_eq!(r, transmute(lsx_vexth_h_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vexth_w_h() { + let a = i16x8::new(14576, -26514, 14165, -15781, 10106, 1864, 23348, 30478); + let r = i64x2::new(8005819049850, 130902013270836); + + assert_eq!(r, transmute(lsx_vexth_w_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vexth_d_w() { + let a = i32x4::new(863783254, 799653326, -1122161877, -652869192); + let r = i64x2::new(-1122161877, -652869192); + + assert_eq!(r, transmute(lsx_vexth_d_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vexth_q_d() { + let a = i64x2::new(2924262436748867523, 1959694872821330818); + let r = i64x2::new(1959694872821330818, 0); + + assert_eq!(r, transmute(lsx_vexth_q_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vexth_hu_bu() { + let a = u8x16::new( + 88, 245, 152, 181, 22, 122, 243, 162, 170, 115, 212, 217, 148, 176, 60, 214, + ); + let r = i64x2::new(61080980486815914, 60235902725652628); + + assert_eq!(r, transmute(lsx_vexth_hu_bu(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vexth_wu_hu() { + let a = u16x8::new(58875, 18924, 17611, 30197, 33869, 53931, 4693, 53025); + let r = i64x2::new(231631881274445, 227740640875093); + + assert_eq!(r, transmute(lsx_vexth_wu_hu(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vexth_du_wu() { + let a = u32x4::new(3499742961, 2840979237, 2082263829, 1096292547); + let r = i64x2::new(2082263829, 1096292547); + + assert_eq!(r, transmute(lsx_vexth_du_wu(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vexth_qu_du() { + let a = u64x2::new(14170556367894986991, 14238702840099699193); + let r = i64x2::new(-4208041233609852423, 0); + + assert_eq!(r, transmute(lsx_vexth_qu_du(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrotri_b() { + let a = i8x16::new( + 7, 49, -22, -120, -94, 53, -19, 95, -84, -30, 31, -25, 30, -98, -86, -5, + ); + let r = i64x2::new(-2919654548887155519, -96080239582005205); + + assert_eq!(r, transmute(lsx_vrotri_b::<2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrotri_h() { + let a = i16x8::new(-14120, -16812, -19570, -990, 24476, -7640, 20329, 8879); + let r = i64x2::new(-556925602567188047, 4998607264501841720); + + assert_eq!(r, transmute(lsx_vrotri_h::<15>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrotri_w() { + let a = i32x4::new(-1760224525, -1644621284, 1835781046, -1487934110); + let r = i64x2::new(2845787365010917052, -6209343103231659283); + + assert_eq!(r, transmute(lsx_vrotri_w::<2>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrotri_d() { + let a = i64x2::new(8884634342417174882, 244175985366916345); + let r = i64x2::new(-3963790888197019724, 4020656082573561910); + + assert_eq!(r, transmute(lsx_vrotri_d::<52>(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vextl_q_d() { + let a = i64x2::new(-5110246490938885255, 377414780188285171); + let r = i64x2::new(-5110246490938885255, -1); + + assert_eq!(r, transmute(lsx_vextl_q_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlni_b_h() { + let a = i8x16::new( + -62, -32, -115, -97, -74, 113, -113, -4, 10, 39, 102, -3, 38, 83, -88, 73, + ); + let b = i8x16::new( + 115, 89, -35, 113, -13, 93, -90, -127, -73, -66, -71, 19, 37, 76, -89, 116, + ); + let r = i64x2::new(72339077638193409, 72342367599919619); + + assert_eq!( + r, + transmute(lsx_vsrlni_b_h::<14>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlni_h_w() { + let a = i16x8::new(4205, -10016, 6553, 16160, 26411, 29470, -20643, 30057); + let b = i16x8::new(-20939, 15459, 13368, -29800, -25275, -15723, 30837, 7321); + let r = i64x2::new(1970530997633039, 8162894584676406); + + assert_eq!( + r, + transmute(lsx_vsrlni_h_w::<26>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlni_w_d() { + let a = i32x4::new(1705975377, 322077350, -1922153156, -661241171); + let b = i32x4::new(1098943214, -1567917396, 297055649, -1122208150); + let r = i64x2::new(2133162980935405664, -8022209066041763477); + + assert_eq!( + r, + transmute(lsx_vsrlni_w_d::<18>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlni_d_q() { + let a = i64x2::new(6325216582707926854, -5129479093920978170); + let b = i64x2::new(3985485829689892785, 7685789624553197779); + let r = i64x2::new(7505653930227732, 13005141581824778); + + assert_eq!( + r, + transmute(lsx_vsrlni_d_q::<74>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlrni_b_h() { + let a = i8x16::new( + -103, -39, -112, -128, -96, 40, -89, 40, -55, 102, 37, -49, 96, -107, 26, 16, + ); + let b = i8x16::new( + -57, 51, 17, 1, 37, 120, -54, 78, -67, 36, 0, -121, -113, 27, -9, 74, + ); + let r = i64x2::new(3201527803797374159, 4635960605099098726); + + assert_eq!( + r, + transmute(lsx_vsrlrni_b_h::<6>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlrni_h_w() { + let a = i16x8::new(16435, -5399, -4992, 1377, -27419, -9060, 28877, -12666); + let b = i16x8::new(30165, -32344, 15225, 17457, -5900, -17127, -30430, 21140); + let r = i64x2::new(5919251242624655831, 1856453178786227457); + + assert_eq!( + r, + transmute(lsx_vsrlrni_h_w::<6>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlrni_w_d() { + let a = i32x4::new(-1783593075, -767627057, 522051412, 1497970809); + let b = i32x4::new(-613709101, 1782777798, -1376237383, -2108949489); + let r = i64x2::new(8955006813860, 6137508269348); + + assert_eq!( + r, + transmute(lsx_vsrlrni_w_d::<52>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrlrni_d_q() { + let a = i64x2::new(-8390257423140334242, -5915059672723228155); + let b = i64x2::new(4065462044175592876, 5861150325027293506); + let r = i64x2::new(42645481, 91180005); + + assert_eq!( + r, + transmute(lsx_vsrlrni_d_q::<101>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlni_b_h() { + let a = i8x16::new( + -126, 26, 50, 111, 24, 36, -59, -44, -12, 82, 16, -39, 10, 27, -76, -81, + ); + let b = i8x16::new( + -72, -74, 3, -16, -50, -40, 17, -39, -88, 33, -11, -74, 27, 104, -56, 35, + ); + let r = i64x2::new(72907520922224389, 360294575950070528); + + assert_eq!( + r, + transmute(lsx_vssrlni_b_h::<13>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlni_h_w() { + let a = i16x8::new(8928, 556, 327, 11357, -32577, 24481, -16101, -875); + let b = i16x8::new(12, -2621, -27458, -24262, 23377, 16952, 19498, -31793); + let r = i64x2::new(74028485831688683, 142145683583401988); + + assert_eq!( + r, + transmute(lsx_vssrlni_h_w::<23>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlni_w_d() { + let a = i32x4::new(1838928968, 1883060425, -990389689, 735664934); + let b = i32x4::new(-971263991, -98050158, 134746673, -49144118); + let r = i64x2::new(9223372034707292159, 9223372034707292159); + + assert_eq!( + r, + transmute(lsx_vssrlni_w_d::<12>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlni_d_q() { + let a = i64x2::new(-5470954942766391223, 2164868713336601834); + let b = i64x2::new(-3507919664178941311, 8800311307152269561); + let r = i64x2::new(524539429375, 129036230643); + + assert_eq!( + r, + transmute(lsx_vssrlni_d_q::<88>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlni_bu_h() { + let a = u8x16::new( + 42, 80, 7, 61, 49, 172, 110, 186, 30, 201, 214, 72, 201, 231, 144, 223, + ); + let b = i8x16::new( + 39, 98, -57, 124, 78, 127, 89, 26, 44, 57, 9, -36, -100, -41, 7, 30, + ); + let r = i64x2::new(1695451225195267, 434318113941815554); + + assert_eq!( + r, + transmute(lsx_vssrlni_bu_h::<13>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlni_hu_w() { + let a = u16x8::new(47562, 12077, 58166, 40959, 47625, 4449, 45497, 47932); + let b = i16x8::new(25513, -19601, -22702, -15840, 32377, 32023, -4115, 25327); + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + transmute(lsx_vssrlni_hu_w::<9>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlni_wu_d() { + let a = u32x4::new(3924399037, 1624231459, 1033186938, 4207801648); + let b = i32x4::new(-343671492, 63408059, -17420952, -742649266); + let r = i64x2::new(111669149696, 133143986188); + + assert_eq!( + r, + transmute(lsx_vssrlni_wu_d::<59>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlni_du_q() { + let a = u64x2::new(9385373857335523158, 8829548075644432850); + let b = i64x2::new(1935200102096005901, -4336418136884591685); + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + transmute(lsx_vssrlni_du_q::<6>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrni_b_h() { + let a = i8x16::new( + -118, -53, 124, -32, -8, -106, -30, 125, 80, -118, 111, -49, 2, -54, -109, -63, + ); + let b = i8x16::new( + -128, 104, -60, -21, -28, 47, -78, 125, -65, -31, 111, 127, -102, -50, 87, 102, + ); + let r = i64x2::new(9187201950435737471, 9187201950435737471); + + assert_eq!( + r, + transmute(lsx_vssrlrni_b_h::<0>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrni_h_w() { + let a = i16x8::new(-6932, -27303, 5931, 1697, 23680, -18344, 21222, 31527); + let b = i16x8::new(16541, 32147, -26353, -15678, -7913, -31777, 12521, -25215); + let r = i64x2::new(2814784127631368, 2251851353292809); + + assert_eq!( + r, + transmute(lsx_vssrlrni_h_w::<28>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrni_w_d() { + let a = i32x4::new(-528492260, 635780412, 2102955910, -106415932); + let b = i32x4::new(-1062242289, 359654281, 1831754020, 1455206052); + let r = i64x2::new(9223372034707292159, 9223372034707292159); + + assert_eq!( + r, + transmute(lsx_vssrlrni_w_d::<1>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrni_d_q() { + let a = i64x2::new(-2050671473765220606, -974956007142498603); + let b = i64x2::new(4675761647927162976, -5100418369989582579); + let r = i64x2::new(9223372036854775807, 9223372036854775807); + + assert_eq!( + r, + transmute(lsx_vssrlrni_d_q::<60>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrni_bu_h() { + let a = u8x16::new( + 100, 79, 212, 163, 219, 225, 100, 84, 1, 173, 146, 41, 33, 251, 175, 18, + ); + let b = i8x16::new( + 104, -36, 123, 103, -26, -37, -104, -46, 107, -89, 120, 33, 117, -54, 107, 105, + ); + let r = i64x2::new(217862753078412039, 74310514888869122); + + assert_eq!( + r, + transmute(lsx_vssrlrni_bu_h::<13>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrni_hu_w() { + let a = u16x8::new(35722, 45502, 51777, 63215, 9369, 33224, 15844, 23578); + let b = i16x8::new(-18038, 23224, 26314, -15841, 826, -15682, -4109, -24970); + let r = i64x2::new(22236939778326573, 12948128109625433); + + assert_eq!( + r, + transmute(lsx_vssrlrni_hu_w::<25>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrni_wu_d() { + let a = u32x4::new(1956924769, 1833875292, 1956412037, 426346371); + let b = i32x4::new(-1128409795, 198077570, -1649408138, 1665566624); + let r = i64x2::new(447097136224200392, 114446481822641014); + + assert_eq!( + r, + transmute(lsx_vssrlrni_wu_d::<36>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrni_du_q() { + let a = u64x2::new(9048079498548224395, 9603999840623079368); + let b = i64x2::new(-404424089294655868, 5140892317651856748); + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + transmute(lsx_vssrlrni_du_q::<38>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrani_b_h() { + let a = i8x16::new( + 127, 75, -70, 122, 36, 105, 73, 54, -17, 44, 92, -80, 11, -110, 81, 51, + ); + let b = i8x16::new( + -72, 6, 81, -61, -8, -96, 24, 77, 30, -20, 95, -20, 69, -37, -109, 35, + ); + let r = i64x2::new(2079082344186583605, -7309198813337889445); + + assert_eq!( + r, + transmute(lsx_vsrani_b_h::<5>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrani_h_w() { + let a = i16x8::new(17089, -15383, 6606, 11797, -17230, -236, 24622, 14114); + let b = i16x8::new(4129, 30226, -29368, -25031, 7609, -18203, 28351, -1400); + let r = i64x2::new(-8724789849496477438, 2738834860014343212); + + assert_eq!( + r, + transmute(lsx_vsrani_h_w::<4>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrani_w_d() { + let a = i32x4::new(-382819185, 386357255, 35446809, 1387491503); + let b = i32x4::new(934617213, -1024433792, -516094326, 1363620957); + let r = i64x2::new(5130829100463783991, -5516717120280852503); + + assert_eq!( + r, + transmute(lsx_vsrani_w_d::<24>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrani_d_q() { + let a = i64x2::new(-6766658862703543347, -8101175034272755526); + let b = i64x2::new(-6351802365852683233, -7612236351910354649); + let r = i64x2::new(-58076754393848, -61807060503180); + + assert_eq!( + r, + transmute(lsx_vsrani_d_q::<81>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrarni_b_h() { + let a = i8x16::new( + -71, 50, -70, -110, 89, 96, -70, 126, 10, 119, -124, -91, -44, -66, -120, -110, + ); + let b = i8x16::new( + -118, 101, -58, -7, -118, 69, 75, 88, 75, -76, -41, -37, 13, -46, -84, 68, + ); + let r = i64x2::new(-7619391791054112335, 5898503720505399127); + + assert_eq!( + r, + transmute(lsx_vsrarni_b_h::<3>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrarni_h_w() { + let a = i16x8::new(-13195, 28211, 7711, -1401, -1145, -27232, 15206, 23526); + let b = i16x8::new(-21087, 18713, -7401, -30000, 25577, -10794, -28633, -25187); + let r = i64x2::new(4268193831744344627, -5202735902940537752); + + assert_eq!( + r, + transmute(lsx_vsrarni_h_w::<15>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrarni_w_d() { + let a = i32x4::new(-2004832894, -772030708, -2044339682, -161994376); + let b = i32x4::new(-314559979, 1401503238, -738119523, -2036313194); + let r = i64x2::new(-64424509430, -6); + + assert_eq!( + r, + transmute(lsx_vsrarni_w_d::<59>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vsrarni_d_q() { + let a = i64x2::new(2532701208156415278, 7815982649469220899); + let b = i64x2::new(-202407401251467620, 284380589150850504); + let r = i64x2::new(-202407401251467620, 2532701208156415278); + + assert_eq!( + r, + transmute(lsx_vsrarni_d_q::<0>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrani_b_h() { + let a = i8x16::new( + -50, 30, 4, -123, 102, 17, -127, 79, -3, 54, -91, 77, -81, -74, -32, 6, + ); + let b = i8x16::new( + -125, 114, -41, -31, 70, 17, -109, 98, -43, -79, -24, -39, -79, 49, -43, 61, + ); + let r = i64x2::new(9187203054242332799, 9187483425412448383); + + assert_eq!( + r, + transmute(lsx_vssrani_b_h::<0>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrani_h_w() { + let a = i16x8::new(-13653, 21802, 26851, -30910, -21293, -13050, -24174, 29805); + let b = i16x8::new(9604, -27726, -18692, 147, 23503, 3941, -18536, -25864); + let r = i64x2::new(-1970324836909063, 2251786928259077); + + assert_eq!( + r, + transmute(lsx_vssrani_h_w::<28>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrani_w_d() { + let a = i32x4::new(640738652, 568129780, 2099035547, 1750495014); + let b = i32x4::new(2090153020, 2002243310, 567374078, -1386845950); + let r = i64x2::new(-45445048943701, 57359288242414); + + assert_eq!( + r, + transmute(lsx_vssrani_w_d::<49>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrani_d_q() { + let a = i64x2::new(8313689526826187568, -7067970090029512662); + let b = i64x2::new(-7547166008384655380, 9056943104343751836); + let r = i64x2::new(138197984380245, -107848664703820); + + assert_eq!( + r, + transmute(lsx_vssrani_d_q::<80>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrani_bu_h() { + let a = u8x16::new( + 110, 23, 112, 128, 94, 127, 141, 246, 144, 229, 149, 191, 73, 211, 119, 89, + ); + let b = i8x16::new( + 9, -116, 68, -122, 13, -17, -90, 29, -22, -126, 50, 2, -50, -121, 124, -18, + ); + let r = i64x2::new(0, 72057594037993472); + + assert_eq!( + r, + transmute(lsx_vssrani_bu_h::<14>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrani_hu_w() { + let a = u16x8::new(23583, 19333, 39698, 13735, 15385, 8819, 61012, 57430); + let b = i16x8::new(-18676, -5045, 14040, 25346, -27192, -27172, 13333, 12330); + let r = i64x2::new(27021597777199104, 292064788631); + + assert_eq!( + r, + transmute(lsx_vssrani_hu_w::<23>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrani_wu_d() { + let a = u32x4::new(3826341651, 1946901217, 3504547080, 2702234829); + let b = i32x4::new(1013240156, -1783678601, -91667235, 485058283); + let r = i64x2::new(-4294967296, 4294967295); + + assert_eq!( + r, + transmute(lsx_vssrani_wu_d::<13>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrani_du_q() { + let a = u64x2::new(16452622598975149813, 15788367695672970142); + let b = i64x2::new(3271075037846423078, -4777595873776840194); + let r = i64x2::new(0, 0); + + assert_eq!( + r, + transmute(lsx_vssrani_du_q::<33>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarni_b_h() { + let a = i8x16::new( + -76, 3, 89, 123, 98, -91, 87, 101, 75, 77, -114, 117, -78, 10, -64, 13, + ); + let b = i8x16::new( + 125, 49, 97, -128, -38, 61, 29, 1, -108, 54, 28, -65, -22, -3, 71, -12, + ); + let r = i64x2::new(-9187201955687071617, 9187201950435803007); + + assert_eq!( + r, + transmute(lsx_vssrarni_b_h::<2>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarni_h_w() { + let a = i16x8::new(-5012, 11989, 5954, -22500, 4485, 31359, 28715, -16160); + let b = i16x8::new(29828, -15046, 20055, -7703, 18306, -411, -15337, 30957); + let r = i64x2::new(1125904201809918, -562928478781439); + + assert_eq!( + r, + transmute(lsx_vssrarni_h_w::<29>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarni_w_d() { + let a = i32x4::new(830116125, -782674123, 1854407155, 1495209920); + let b = i32x4::new(2038928041, -944152498, 984207668, -1562095866); + let r = i64x2::new(-9223372034707292160, 9223372034707292160); + + assert_eq!( + r, + transmute(lsx_vssrarni_w_d::<18>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarni_d_q() { + let a = i64x2::new(6798655171089504447, 7326163030789656624); + let b = i64x2::new(-2977477884402038599, -1140443471327573805); + let r = i64x2::new(-17819429239493341, 114471297356088385); + + assert_eq!( + r, + transmute(lsx_vssrarni_d_q::<70>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarni_bu_h() { + let a = u8x16::new( + 75, 193, 237, 8, 33, 177, 31, 133, 119, 169, 163, 98, 159, 36, 131, 221, + ); + let b = i8x16::new( + 85, 84, -17, -84, 37, -124, -96, -30, -113, 114, -49, -7, 93, -3, -69, 124, + ); + let r = i64x2::new(144115196665790465, 283673999966208); + + assert_eq!( + r, + transmute(lsx_vssrarni_bu_h::<14>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarni_hu_w() { + let a = u16x8::new(24614, 57570, 38427, 46010, 4180, 57175, 13134, 32047); + let b = i16x8::new(20333, -10949, -20123, -1525, 14594, -30628, -30604, -29092); + let r = i64x2::new(0, -281474976710656); + + assert_eq!( + r, + transmute(lsx_vssrarni_hu_w::<13>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarni_wu_d() { + let a = u32x4::new(1854465345, 2301618375, 1724286997, 3204532825); + let b = i32x4::new(-1176670423, -1482282410, 777914585, 87761646); + let r = i64x2::new(-4294967296, 0); + + assert_eq!( + r, + transmute(lsx_vssrarni_wu_d::<15>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrarni_du_q() { + let a = u64x2::new(5657125151084901446, 434040259538460448); + let b = i64x2::new(4567159404230772553, -10612253426094316); + let r = i64x2::new(0, 0); + + assert_eq!( + r, + transmute(lsx_vssrarni_du_q::<126>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vpermi_w() { + let a = i32x4::new(213291370, -674346961, -1480878002, -1600622413); + let b = i32x4::new(-1309240039, 1335257352, 852153543, 1125109318); + let r = i64x2::new(4832307726087017671, -6360322584335202257); + + assert_eq!( + r, + transmute(lsx_vpermi_w::<158>(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vld() { + let a: [i8; 16] = [ + 127, 127, 77, 66, 64, 25, -50, -34, 2, -7, 107, -87, 45, -88, -51, 41, + ]; + let r = i64x2::new(-2391946588306178177, 3012248639850150146); + + assert_eq!(r, transmute(lsx_vld::<0>(a.as_ptr()))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vst() { + let a = i8x16::new( + -27, -57, 84, 27, -46, -85, -92, 57, 15, -67, -44, -89, -88, 84, 22, -29, + ); + let mut o: [i8; 16] = [ + -9, 24, -11, -95, -10, 78, 41, -118, 91, -113, 107, 77, -50, 113, -22, 27, + ]; + let r = i64x2::new(4153633675232462821, -2083384694265299697); + + lsx_vst::<0>(transmute(a), o.as_mut_ptr()); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrn_b_h() { + let a = i16x8::new(-6731, 13740, 8488, -2854, -3028, 6907, -57, 5317); + let b = i16x8::new(17437, 9775, -20467, -31838, 5913, 4238, -7458, 2822); + let r = i64x2::new(5981906731171643399, 0); + + assert_eq!(r, transmute(lsx_vssrlrn_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrn_h_w() { + let a = i32x4::new(1684402804, 1385352714, 1360229118, 928996904); + let b = i32x4::new(-2116426818, 1641049288, 712377342, -1572394121); + let r = i64x2::new(31243728857268226, 0); + + assert_eq!(r, transmute(lsx_vssrlrn_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrlrn_w_d() { + let a = i64x2::new(-6889047968033387497, -1417681658907465534); + let b = i64x2::new(-3890929847852895653, -7819301294522132056); + let r = i64x2::new(66519777023098879, 0); + + assert_eq!(r, transmute(lsx_vssrlrn_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrln_b_h() { + let a = i16x8::new(6474, 27187, -10340, 1859, 23966, -18880, 3680, 9203); + let b = i16x8::new(-14062, -29610, -24609, -8884, -1818, 32133, 29934, -6498); + let r = i64x2::new(140183437672319, 0); + + assert_eq!(r, transmute(lsx_vssrln_b_h(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrln_h_w() { + let a = i32x4::new(-476821436, -709684595, 1401465952, -1429729676); + let b = i32x4::new(-1437891045, 1546371535, -1800954476, -1892390372); + let r = i64x2::new(2820489990832156, 0); + + assert_eq!(r, transmute(lsx_vssrln_h_w(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vssrln_w_d() { + let a = i64x2::new(2563829598589943649, 1915912925013067420); + let b = i64x2::new(2034490755997557661, -3470252066162700534); + let r = i64x2::new(9223372034707292159, 0); + + assert_eq!(r, transmute(lsx_vssrln_w_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vorn_v() { + let a = i8x16::new( + -104, -56, -109, -5, -124, 58, 19, -45, -64, 70, 0, 60, -67, -86, -77, -47, + ); + let b = i8x16::new( + 18, 99, -128, 74, -16, -127, 71, 94, -99, -119, 16, 43, 121, 77, -57, -24, + ); + let r = i64x2::new(-883973744907789059, -2901520201165080862); + + assert_eq!(r, transmute(lsx_vorn_v(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vldi() { + let r = i64x2::new(-404, -404); + + assert_eq!(r, transmute(lsx_vldi::<3692>())); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vshuf_b() { + let a = i8x16::new( + 115, -20, -59, -22, 43, -85, -79, 110, -79, -97, 14, -11, 5, -43, 17, -16, + ); + let b = i8x16::new( + -49, -101, -67, -10, -11, 76, -1, -74, 10, 110, 27, -53, 105, 34, 28, 98, + ); + let c = i8x16::new(3, 10, 3, 20, 23, 29, 7, 23, 3, 3, 4, 15, 3, 10, 21, 27); + let r = i64x2::new(7977798459094080502, -744470568363493642); + + assert_eq!( + r, + transmute(lsx_vshuf_b(transmute(a), transmute(b), transmute(c))) + ); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vldx() { + let a: [i8; 16] = [ + -102, -39, 3, 31, 58, -5, 78, 11, -96, -111, 11, 114, 103, -3, -86, 37, + ]; + let r = i64x2::new(814864809647659418, 2714260346180964768); + + assert_eq!(r, transmute(lsx_vldx(a.as_ptr(), 0))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vstx() { + let a = i8x16::new( + 113, -106, 22, -4, 54, 56, 70, -21, -30, 0, -25, -98, 56, -46, -51, 99, + ); + let mut o: [i8; 16] = [ + -60, -30, -98, 12, 90, 96, 120, -102, -124, 54, -91, -24, 126, -80, 121, -29, + ]; + let r = i64x2::new(-1493444417618012559, 7191635320606490850); + + lsx_vstx(transmute(a), o.as_mut_ptr(), 0); + assert_eq!(r, transmute(o)); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vextl_qu_du() { + let a = u64x2::new(14708598110732796778, 2132245682694336458); + let r = i64x2::new(-3738145962976754838, 0); + + assert_eq!(r, transmute(lsx_vextl_qu_du(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bnz_b() { + let a = u8x16::new( + 84, 211, 197, 223, 221, 228, 88, 147, 165, 38, 137, 91, 54, 252, 130, 198, + ); + let r: i32 = 1; + + assert_eq!(r, transmute(lsx_bnz_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bnz_d() { + let a = u64x2::new(2935166648440262530, 9853932033129373129); + let r: i32 = 1; + + assert_eq!(r, transmute(lsx_bnz_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bnz_h() { + let a = u16x8::new(55695, 60003, 59560, 35123, 25693, 41352, 61626, 42007); + let r: i32 = 1; + + assert_eq!(r, transmute(lsx_bnz_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bnz_v() { + let a = u8x16::new( + 97, 136, 236, 21, 16, 18, 39, 247, 250, 7, 67, 251, 83, 240, 242, 151, + ); + let r: i32 = 1; + + assert_eq!(r, transmute(lsx_bnz_v(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bnz_w() { + let a = u32x4::new(1172712391, 4211490091, 1954893853, 1606462106); + let r: i32 = 1; + + assert_eq!(r, transmute(lsx_bnz_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bz_b() { + let a = u8x16::new( + 15, 239, 121, 77, 200, 213, 232, 133, 158, 104, 98, 165, 77, 238, 68, 228, + ); + let r: i32 = 0; + + assert_eq!(r, transmute(lsx_bz_b(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bz_d() { + let a = u64x2::new(6051854163594201075, 9957257179760945130); + let r: i32 = 0; + + assert_eq!(r, transmute(lsx_bz_d(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bz_h() { + let a = u16x8::new(19470, 29377, 53886, 60432, 20799, 41755, 54479, 52192); + let r: i32 = 0; + + assert_eq!(r, transmute(lsx_bz_h(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bz_v() { + let a = u8x16::new( + 205, 20, 220, 220, 212, 207, 232, 167, 86, 81, 26, 68, 30, 112, 186, 234, + ); + let r: i32 = 0; + + assert_eq!(r, transmute(lsx_bz_v(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_bz_w() { + let a = u32x4::new(840335855, 1404686204, 628335401, 1171808080); + let r: i32 = 0; + + assert_eq!(r, transmute(lsx_bz_w(transmute(a)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_caf_d() { + let a = u64x2::new(4603762778598497410, 4600578720825355240); + let b = u64x2::new(4594845432849836188, 4605165420863530034); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_caf_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_caf_s() { + let a = u32x4::new(1057450480, 1041717868, 1063383650, 1052061330); + let b = u32x4::new(1058412800, 1058762495, 1028487696, 1027290752); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_caf_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_ceq_d() { + let a = u64x2::new(4605168921160906654, 4594290648143726556); + let b = u64x2::new(4605937250150464526, 4596769502461699132); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_ceq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_ceq_s() { + let a = u32x4::new(1022481472, 1054281004, 1061611781, 1063964926); + let b = u32x4::new(1057471620, 1064008655, 1062698831, 1064822930); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_ceq_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cle_d() { + let a = u64x2::new(4594614911097184960, 4595883006410794928); + let b = u64x2::new(4596931282408842596, 4592481315209481584); + let r = i64x2::new(-1, 0); + + assert_eq!(r, transmute(lsx_vfcmp_cle_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cle_s() { + let a = u32x4::new(1056795676, 1033595408, 1059655467, 1052539946); + let b = u32x4::new(1021993344, 1043028808, 1064182329, 1054794412); + let r = i64x2::new(-4294967296, -1); + + assert_eq!(r, transmute(lsx_vfcmp_cle_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_clt_d() { + let a = u64x2::new(4600913855630793750, 4577092243808815872); + let b = u64x2::new(4603056125735978454, 4595932368389116476); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_clt_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_clt_s() { + let a = u32x4::new(1056969130, 1052243316, 1061133360, 1024378560); + let b = u32x4::new(1040327468, 1040072248, 1063314103, 1061361061); + let r = i64x2::new(0, -1); + + assert_eq!(r, transmute(lsx_vfcmp_clt_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cne_d() { + let a = u64x2::new(4600626466477018126, 4598733447126827764); + let b = u64x2::new(4602354759349431170, 4598595124838935466); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_cne_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cne_s() { + let a = u32x4::new(1063546111, 1053175192, 1063179686, 1052800226); + let b = u32x4::new(1063262940, 1058010357, 1052721962, 1061295988); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_cne_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cor_d() { + let a = u64x2::new(4607018705522720912, 4606390725849766769); + let b = u64x2::new(4606863361114437050, 4600753700959452152); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_cor_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cor_s() { + let a = u32x4::new(993114880, 1063738833, 1020144864, 1055277186); + let b = u32x4::new(1053615382, 1065255138, 1051565294, 1041776832); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_cor_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cueq_d() { + let a = u64x2::new(4589986692503775384, 4604350239975880608); + let b = u64x2::new(4603317345052528721, 4586734343919602352); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_cueq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cueq_s() { + let a = u32x4::new(1049781896, 1063241920, 1063535787, 1062764831); + let b = u32x4::new(1057082822, 1059761998, 1052599998, 1054369118); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_cueq_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cule_d() { + let a = u64x2::new(4600113342137410192, 4586591372067099760); + let b = u64x2::new(4604253448175093958, 4599648167588382448); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_cule_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cule_s() { + let a = u32x4::new(1059878844, 1040845348, 1060450143, 1061437832); + let b = u32x4::new(1051100696, 1062219104, 1064568294, 1032521352); + let r = i64x2::new(-4294967296, 4294967295); + + assert_eq!(r, transmute(lsx_vfcmp_cule_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cult_d() { + let a = u64x2::new(4604916546627232568, 4599229615347667200); + let b = u64x2::new(4602944708025910986, 4606429728449082215); + let r = i64x2::new(0, -1); + + assert_eq!(r, transmute(lsx_vfcmp_cult_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cult_s() { + let a = u32x4::new(1061581945, 1058257026, 1059733857, 1064954284); + let b = u32x4::new(1030808384, 1044268840, 1050761328, 1037308928); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_cult_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cun_d() { + let a = u64x2::new(4603128178250554600, 4601297724275716756); + let b = u64x2::new(4599145506416791474, 4602762942707610466); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_cun_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cune_d() { + let a = u64x2::new(4603159382334199523, 4603135754641654385); + let b = u64x2::new(4602895209237804084, 4598685577984089858); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_cune_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cune_s() { + let a = u32x4::new(1059907972, 1059391341, 1025259296, 1050646758); + let b = u32x4::new(1049955876, 1032474200, 1023410112, 1050347912); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_cune_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_cun_s() { + let a = u32x4::new(1054871898, 1059065315, 1037157736, 1056161416); + let b = u32x4::new(1053288920, 1059911123, 1058695573, 1062913175); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_cun_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_saf_d() { + let a = u64x2::new(4585010456558902064, 4598376734249785852); + let b = u64x2::new(4589118818065931376, 4603302333347826011); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_saf_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_saf_s() { + let a = u32x4::new(1039827304, 1062400770, 1052695470, 1056530338); + let b = u32x4::new(1044756936, 1054667546, 1059141760, 1062203553); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_saf_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_seq_d() { + let a = u64x2::new(4604896813051509737, 4596873540510119820); + let b = u64x2::new(4594167956310606988, 4596272126122589228); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_seq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_seq_s() { + let a = u32x4::new(1060477925, 1048954814, 1059933669, 1053469148); + let b = u32x4::new(1057231588, 1051495460, 1057998997, 1049117328); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_seq_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sle_d() { + let a = u64x2::new(4605211142905317821, 4601961488287203912); + let b = u64x2::new(4603919005855163252, 4594682846653946884); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_sle_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sle_s() { + let a = u32x4::new(1053671520, 1055456634, 1063294891, 1059790187); + let b = u32x4::new(1045989468, 1052518900, 1046184640, 1032417352); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_sle_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_slt_d() { + let a = u64x2::new(4601902750800060998, 4605236132294100877); + let b = u64x2::new(4600564867142526828, 4585131890265864544); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_slt_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_slt_s() { + let a = u32x4::new(1054326748, 1059604229, 1060884737, 1022762624); + let b = u32x4::new(1063435026, 1062439603, 1060665555, 1059252630); + let r = i64x2::new(-1, -4294967296); + + assert_eq!(r, transmute(lsx_vfcmp_slt_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sne_d() { + let a = u64x2::new(4606672121388401433, 4604186491240191582); + let b = u64x2::new(4606789952952688555, 4605380358192261377); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_sne_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sne_s() { + let a = u32x4::new(1062253602, 1053568536, 1056615768, 1055754482); + let b = u32x4::new(1055803760, 1063372602, 1062608900, 1054634370); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_sne_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sor_d() { + let a = u64x2::new(4595713406002022116, 4604653971232015460); + let b = u64x2::new(4606380175568635560, 4602092067387067462); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_sor_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sor_s() { + let a = u32x4::new(1058728243, 1059025743, 1012810944, 1057593472); + let b = u32x4::new(1064534350, 1035771168, 1059142426, 1034677600); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_sor_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sueq_d() { + let a = u64x2::new(4605322679929877488, 4603091890812380784); + let b = u64x2::new(4602917609947054533, 4605983209212177197); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_sueq_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sueq_s() { + let a = u32x4::new(1058057744, 1049762394, 1044222368, 1050250466); + let b = u32x4::new(1064871165, 1059796257, 1055456352, 1058662692); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_sueq_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sule_d() { + let a = u64x2::new(4606210463692472427, 4576137083667840000); + let b = u64x2::new(4594044173266256632, 4601549551994738386); + let r = i64x2::new(0, -1); + + assert_eq!(r, transmute(lsx_vfcmp_sule_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sule_s() { + let a = u32x4::new(1054399614, 1064056006, 1040844632, 1022950656); + let b = u32x4::new(1061061244, 1051874412, 1041025316, 1056018690); + let r = i64x2::new(4294967295, -1); + + assert_eq!(r, transmute(lsx_vfcmp_sule_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sult_d() { + let a = u64x2::new(4593772214968107560, 4602360976974434088); + let b = u64x2::new(4603848042095479627, 4605032971316970060); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_sult_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sult_s() { + let a = u32x4::new(1055857986, 1049674182, 1050153588, 1054289234); + let b = u32x4::new(1053631630, 1064026599, 1058029398, 1041182304); + let r = i64x2::new(-4294967296, 4294967295); + + assert_eq!(r, transmute(lsx_vfcmp_sult_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sun_d() { + let a = u64x2::new(4600661687369290390, 4583739657744995904); + let b = u64x2::new(4560681020073292800, 4604624347352815433); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_sun_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sune_d() { + let a = u64x2::new(4600101879341653256, 4602392889952410448); + let b = u64x2::new(4593947987798339484, 4603656097008761637); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_sune_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sune_s() { + let a = u32x4::new(1058419193, 1062297121, 1026375712, 1061355356); + let b = u32x4::new(1049327168, 1034635272, 1042258196, 1062844003); + let r = i64x2::new(-1, -1); + + assert_eq!(r, transmute(lsx_vfcmp_sune_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vfcmp_sun_s() { + let a = u32x4::new(1044637928, 1061035459, 1051032716, 1050118110); + let b = u32x4::new(1057442863, 1064573466, 1058086753, 1015993248); + let r = i64x2::new(0, 0); + + assert_eq!(r, transmute(lsx_vfcmp_sun_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrepli_b() { + let r = i64x2::new(4340410370284600380, 4340410370284600380); + + assert_eq!(r, transmute(lsx_vrepli_b::<-452>())); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrepli_d() { + let r = i64x2::new(-330, -330); + + assert_eq!(r, transmute(lsx_vrepli_d::<-330>())); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrepli_h() { + let r = i64x2::new(39125618772344971, 39125618772344971); + + assert_eq!(r, transmute(lsx_vrepli_h::<139>())); +} + +#[simd_test(enable = "lsx")] +unsafe fn test_lsx_vrepli_w() { + let r = i64x2::new(-468151435374, -468151435374); + + assert_eq!(r, transmute(lsx_vrepli_w::<-110>())); +} diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/types.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/types.rs new file mode 100644 index 000000000000..4097164c2fae --- /dev/null +++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/types.rs @@ -0,0 +1,33 @@ +types! { + #![unstable(feature = "stdarch_loongarch", issue = "117427")] + + /// LOONGARCH-specific 128-bit wide vector of 16 packed `i8`. + pub struct v16i8(16 x pub(crate) i8); + + /// LOONGARCH-specific 128-bit wide vector of 8 packed `i16`. + pub struct v8i16(8 x pub(crate) i16); + + /// LOONGARCH-specific 128-bit wide vector of 4 packed `i32`. + pub struct v4i32(4 x pub(crate) i32); + + /// LOONGARCH-specific 128-bit wide vector of 2 packed `i64`. + pub struct v2i64(2 x pub(crate) i64); + + /// LOONGARCH-specific 128-bit wide vector of 16 packed `u8`. + pub struct v16u8(16 x pub(crate) u8); + + /// LOONGARCH-specific 128-bit wide vector of 8 packed `u16`. + pub struct v8u16(8 x pub(crate) u16); + + /// LOONGARCH-specific 128-bit wide vector of 4 packed `u32`. + pub struct v4u32(4 x pub(crate) u32); + + /// LOONGARCH-specific 128-bit wide vector of 2 packed `u64`. + pub struct v2u64(2 x pub(crate) u64); + + /// LOONGARCH-specific 128-bit wide vector of 4 packed `f32`. + pub struct v4f32(4 x pub(crate) f32); + + /// LOONGARCH-specific 128-bit wide vector of 2 packed `f64`. + pub struct v2f64(2 x pub(crate) f64); +} diff --git a/library/stdarch/crates/core_arch/src/loongarch64/mod.rs b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs new file mode 100644 index 000000000000..b1704bbb48d4 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs @@ -0,0 +1,376 @@ +//! `LoongArch` intrinsics + +mod lasx; +mod lsx; + +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub use self::lasx::*; +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub use self::lsx::*; + +use crate::arch::asm; + +/// Reads the 64-bit stable counter value and the counter ID +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn rdtime_d() -> (i64, isize) { + let val: i64; + let tid: isize; + asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)); + (val, tid) +} + +/// Reads the lower 32-bit stable counter value and the counter ID +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn rdtimel_w() -> (i32, isize) { + let val: i32; + let tid: isize; + asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)); + (val, tid) +} + +/// Reads the upper 32-bit stable counter value and the counter ID +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn rdtimeh_w() -> (i32, isize) { + let val: i32; + let tid: isize; + asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)); + (val, tid) +} + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.loongarch.crc.w.b.w"] + fn __crc_w_b_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crc.w.h.w"] + fn __crc_w_h_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crc.w.w.w"] + fn __crc_w_w_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crc.w.d.w"] + fn __crc_w_d_w(a: i64, b: i32) -> i32; + #[link_name = "llvm.loongarch.crcc.w.b.w"] + fn __crcc_w_b_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crcc.w.h.w"] + fn __crcc_w_h_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crcc.w.w.w"] + fn __crcc_w_w_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crcc.w.d.w"] + fn __crcc_w_d_w(a: i64, b: i32) -> i32; + #[link_name = "llvm.loongarch.cacop.d"] + fn __cacop(a: i64, b: i64, c: i64); + #[link_name = "llvm.loongarch.dbar"] + fn __dbar(a: i32); + #[link_name = "llvm.loongarch.ibar"] + fn __ibar(a: i32); + #[link_name = "llvm.loongarch.movgr2fcsr"] + fn __movgr2fcsr(a: i32, b: i32); + #[link_name = "llvm.loongarch.movfcsr2gr"] + fn __movfcsr2gr(a: i32) -> i32; + #[link_name = "llvm.loongarch.csrrd.d"] + fn __csrrd(a: i32) -> i64; + #[link_name = "llvm.loongarch.csrwr.d"] + fn __csrwr(a: i64, b: i32) -> i64; + #[link_name = "llvm.loongarch.csrxchg.d"] + fn __csrxchg(a: i64, b: i64, c: i32) -> i64; + #[link_name = "llvm.loongarch.iocsrrd.b"] + fn __iocsrrd_b(a: i32) -> i32; + #[link_name = "llvm.loongarch.iocsrrd.h"] + fn __iocsrrd_h(a: i32) -> i32; + #[link_name = "llvm.loongarch.iocsrrd.w"] + fn __iocsrrd_w(a: i32) -> i32; + #[link_name = "llvm.loongarch.iocsrrd.d"] + fn __iocsrrd_d(a: i32) -> i64; + #[link_name = "llvm.loongarch.iocsrwr.b"] + fn __iocsrwr_b(a: i32, b: i32); + #[link_name = "llvm.loongarch.iocsrwr.h"] + fn __iocsrwr_h(a: i32, b: i32); + #[link_name = "llvm.loongarch.iocsrwr.w"] + fn __iocsrwr_w(a: i32, b: i32); + #[link_name = "llvm.loongarch.iocsrwr.d"] + fn __iocsrwr_d(a: i64, b: i32); + #[link_name = "llvm.loongarch.break"] + fn __break(a: i32); + #[link_name = "llvm.loongarch.cpucfg"] + fn __cpucfg(a: i32) -> i32; + #[link_name = "llvm.loongarch.syscall"] + fn __syscall(a: i32); + #[link_name = "llvm.loongarch.asrtle.d"] + fn __asrtle(a: i64, b: i64); + #[link_name = "llvm.loongarch.asrtgt.d"] + fn __asrtgt(a: i64, b: i64); + #[link_name = "llvm.loongarch.lddir.d"] + fn __lddir(a: i64, b: i64) -> i64; + #[link_name = "llvm.loongarch.ldpte.d"] + fn __ldpte(a: i64, b: i64); + #[link_name = "llvm.loongarch.frecipe.s"] + fn __frecipe_s(a: f32) -> f32; + #[link_name = "llvm.loongarch.frecipe.d"] + fn __frecipe_d(a: f64) -> f64; + #[link_name = "llvm.loongarch.frsqrte.s"] + fn __frsqrte_s(a: f32) -> f32; + #[link_name = "llvm.loongarch.frsqrte.d"] + fn __frsqrte_d(a: f64) -> f64; +} + +/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn crc_w_b_w(a: i32, b: i32) -> i32 { + __crc_w_b_w(a, b) +} + +/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn crc_w_h_w(a: i32, b: i32) -> i32 { + __crc_w_h_w(a, b) +} + +/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn crc_w_w_w(a: i32, b: i32) -> i32 { + __crc_w_w_w(a, b) +} + +/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn crc_w_d_w(a: i64, b: i32) -> i32 { + __crc_w_d_w(a, b) +} + +/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn crcc_w_b_w(a: i32, b: i32) -> i32 { + __crcc_w_b_w(a, b) +} + +/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn crcc_w_h_w(a: i32, b: i32) -> i32 { + __crcc_w_h_w(a, b) +} + +/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn crcc_w_w_w(a: i32, b: i32) -> i32 { + __crcc_w_w_w(a, b) +} + +/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn crcc_w_d_w(a: i64, b: i32) -> i32 { + __crcc_w_d_w(a, b) +} + +/// Generates the cache operation instruction +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn cacop(a: i64, b: i64) { + static_assert_simm_bits!(IMM12, 12); + __cacop(a, b, IMM12); +} + +/// Generates the memory barrier instruction +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn dbar() { + static_assert_uimm_bits!(IMM15, 15); + __dbar(IMM15); +} + +/// Generates the instruction-fetch barrier instruction +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn ibar() { + static_assert_uimm_bits!(IMM15, 15); + __ibar(IMM15); +} + +/// Moves data from a GPR to the FCSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn movgr2fcsr(a: i32) { + static_assert_uimm_bits!(IMM5, 5); + __movgr2fcsr(IMM5, a); +} + +/// Moves data from a FCSR to the GPR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn movfcsr2gr() -> i32 { + static_assert_uimm_bits!(IMM5, 5); + __movfcsr2gr(IMM5) +} + +/// Reads the CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn csrrd() -> i64 { + static_assert_uimm_bits!(IMM14, 14); + __csrrd(IMM14) +} + +/// Writes the CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn csrwr(a: i64) -> i64 { + static_assert_uimm_bits!(IMM14, 14); + __csrwr(a, IMM14) +} + +/// Exchanges the CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn csrxchg(a: i64, b: i64) -> i64 { + static_assert_uimm_bits!(IMM14, 14); + __csrxchg(a, b, IMM14) +} + +/// Reads the 8-bit IO-CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn iocsrrd_b(a: i32) -> i32 { + __iocsrrd_b(a) +} + +/// Reads the 16-bit IO-CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn iocsrrd_h(a: i32) -> i32 { + __iocsrrd_h(a) +} + +/// Reads the 32-bit IO-CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn iocsrrd_w(a: i32) -> i32 { + __iocsrrd_w(a) +} + +/// Reads the 64-bit IO-CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn iocsrrd_d(a: i32) -> i64 { + __iocsrrd_d(a) +} + +/// Writes the 8-bit IO-CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn iocsrwr_b(a: i32, b: i32) { + __iocsrwr_b(a, b) +} + +/// Writes the 16-bit IO-CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn iocsrwr_h(a: i32, b: i32) { + __iocsrwr_h(a, b) +} + +/// Writes the 32-bit IO-CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn iocsrwr_w(a: i32, b: i32) { + __iocsrwr_w(a, b) +} + +/// Writes the 64-bit IO-CSR +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn iocsrwr_d(a: i64, b: i32) { + __iocsrwr_d(a, b) +} + +/// Generates the breakpoint instruction +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn brk() { + static_assert_uimm_bits!(IMM15, 15); + __break(IMM15); +} + +/// Reads the CPU configuration register +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn cpucfg(a: i32) -> i32 { + __cpucfg(a) +} + +/// Generates the syscall instruction +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn syscall() { + static_assert_uimm_bits!(IMM15, 15); + __syscall(IMM15); +} + +/// Generates the less-than-or-equal asseration instruction +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn asrtle(a: i64, b: i64) { + __asrtle(a, b); +} + +/// Generates the greater-than asseration instruction +#[inline] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn asrtgt(a: i64, b: i64) { + __asrtgt(a, b); +} + +/// Loads the page table directory entry +#[inline] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn lddir(a: i64) -> i64 { + __lddir(a, B) +} + +/// Loads the page table entry +#[inline] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn ldpte(a: i64) { + __ldpte(a, B) +} + +/// Calculate the approximate single-precision result of 1.0 divided +#[inline] +#[target_feature(enable = "frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn frecipe_s(a: f32) -> f32 { + __frecipe_s(a) +} + +/// Calculate the approximate double-precision result of 1.0 divided +#[inline] +#[target_feature(enable = "frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn frecipe_d(a: f64) -> f64 { + __frecipe_d(a) +} + +/// Calculate the approximate single-precision result of dividing 1.0 by the square root +#[inline] +#[target_feature(enable = "frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn frsqrte_s(a: f32) -> f32 { + __frsqrte_s(a) +} + +/// Calculate the approximate double-precision result of dividing 1.0 by the square root +#[inline] +#[target_feature(enable = "frecipe")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub unsafe fn frsqrte_d(a: f64) -> f64 { + __frsqrte_d(a) +} diff --git a/library/stdarch/crates/core_arch/src/macros.rs b/library/stdarch/crates/core_arch/src/macros.rs new file mode 100644 index 000000000000..e00b43353679 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/macros.rs @@ -0,0 +1,165 @@ +//! Utility macros. + +#[allow(unused)] +macro_rules! static_assert { + ($e:expr) => { + const { + assert!($e); + } + }; + ($e:expr, $msg:expr) => { + const { + assert!($e, $msg); + } + }; +} + +#[allow(unused_macros)] +macro_rules! static_assert_uimm_bits { + ($imm:ident, $bits:expr) => { + // `0 <= $imm` produces a warning if the immediate has an unsigned type + #[allow(unused_comparisons)] + { + static_assert!( + 0 <= $imm && $imm < (1 << $bits), + concat!( + stringify!($imm), + " doesn't fit in ", + stringify!($bits), + " bits", + ) + ) + } + }; +} + +#[allow(unused_macros)] +macro_rules! static_assert_simm_bits { + ($imm:ident, $bits:expr) => { + static_assert!( + (-1 << ($bits - 1)) - 1 <= $imm && $imm < (1 << ($bits - 1)), + concat!( + stringify!($imm), + " doesn't fit in ", + stringify!($bits), + " bits", + ) + ) + }; +} + +#[allow(unused)] +macro_rules! types { + ( + #![$stability_first:meta] + $( + #![$stability_more:meta] + )* + + $( + $(#[$doc:meta])* + $(stability: [$stability_already: meta])* + pub struct $name:ident($len:literal x $v:vis $elem_type:ty); + )* + ) => (types! { + $( + #![$stability_more] + )* + + $( + $(#[$doc])* + $(stability: [$stability_already])* + stability: [$stability_first] + pub struct $name($len x $v $elem_type); + )* + }); + + ( + $( + $(#[$doc:meta])* + $(stability: [$stability: meta])+ + pub struct $name:ident($len:literal x $v:vis $elem_type:ty); + )* + ) => ($( + $(#[$doc])* + $(#[$stability])+ + #[derive(Copy, Clone)] + #[allow(non_camel_case_types)] + #[repr(simd)] + #[allow(clippy::missing_inline_in_public_items)] + pub struct $name($v [$elem_type; $len]); + + impl $name { + /// Using `my_simd([x; N])` seemingly fails tests, + /// so use this internal helper for it instead. + #[inline(always)] + $v fn splat(value: $elem_type) -> $name { + #[derive(Copy, Clone)] + #[repr(simd)] + struct JustOne([$elem_type; 1]); + let one = JustOne([value]); + // SAFETY: 0 is always in-bounds because we're shuffling + // a simd type with exactly one element. + unsafe { simd_shuffle!(one, one, [0; $len]) } + } + + /// Returns an array reference containing the entire SIMD vector. + $v const fn as_array(&self) -> &[$elem_type; $len] { + // SAFETY: this type is just an overaligned `[T; N]` with + // potential padding at the end, so pointer casting to a + // `&[T; N]` is safe. + // + // NOTE: This deliberately doesn't just use `&self.0` because it may soon be banned + // see https://github.com/rust-lang/compiler-team/issues/838 + unsafe { &*(self as *const Self as *const [$elem_type; $len]) } + + } + + /// Returns a mutable array reference containing the entire SIMD vector. + #[inline] + $v fn as_mut_array(&mut self) -> &mut [$elem_type; $len] { + // SAFETY: this type is just an overaligned `[T; N]` with + // potential padding at the end, so pointer casting to a + // `&mut [T; N]` is safe. + // + // NOTE: This deliberately doesn't just use `&mut self.0` because it may soon be banned + // see https://github.com/rust-lang/compiler-team/issues/838 + unsafe { &mut *(self as *mut Self as *mut [$elem_type; $len]) } + } + } + + $(#[$stability])+ + impl crate::fmt::Debug for $name { + #[inline] + fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result { + crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array()) + } + } + )*); +} + +#[allow(unused)] +#[repr(simd)] +pub(crate) struct SimdShuffleIdx(pub(crate) [u32; LEN]); + +#[allow(unused)] +macro_rules! simd_shuffle { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + $crate::intrinsics::simd::simd_shuffle( + $x, + $y, + const { $crate::core_arch::macros::SimdShuffleIdx($idx) }, + ) + }}; +} + +#[allow(unused)] +macro_rules! simd_insert { + ($x:expr, $idx:expr, $val:expr $(,)?) => {{ $crate::intrinsics::simd::simd_insert($x, const { $idx }, $val) }}; +} + +#[allow(unused)] +macro_rules! simd_extract { + ($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }}; + ($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }}; +} diff --git a/library/stdarch/crates/core_arch/src/mips/mod.rs b/library/stdarch/crates/core_arch/src/mips/mod.rs new file mode 100644 index 000000000000..1de3ffd03d1f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/mips/mod.rs @@ -0,0 +1,20 @@ +//! MIPS + +// Building this module (even if unused) for non-fp64 targets fails with an LLVM +// error. +#[cfg(target_feature = "fp64")] +mod msa; +#[cfg(target_feature = "fp64")] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub use self::msa::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Generates the trap instruction `BREAK` +#[cfg_attr(test, assert_instr(break))] +#[inline] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn break_() -> ! { + crate::intrinsics::abort() +} diff --git a/library/stdarch/crates/core_arch/src/mips/msa.rs b/library/stdarch/crates/core_arch/src/mips/msa.rs new file mode 100644 index 000000000000..563e121a7bad --- /dev/null +++ b/library/stdarch/crates/core_arch/src/mips/msa.rs @@ -0,0 +1,18398 @@ +//! MIPS SIMD Architecture intrinsics +//! +//! The reference is [MIPS Architecture for Programmers Volume IV-j: The +//! MIPS32 SIMD Architecture Module Revision 1.12][msa_ref]. +//! +//! [msa_ref]: http://cdn2.imgtec.com/documentation/MD00866-2B-MSA32-AFP-01.12.pdf + +#[cfg(test)] +use stdarch_test::assert_instr; + +use crate::mem; + +types! { + #![unstable(feature = "stdarch_mips", issue = "111198")] + + /// MIPS-specific 128-bit wide vector of 16 packed `i8`. + pub struct v16i8(16 x i8); + + /// MIPS-specific 128-bit wide vector of 8 packed `i16`. + pub struct v8i16(8 x i16); + + /// MIPS-specific 128-bit wide vector of 4 packed `i32`. + pub struct v4i32(4 x i32); + + /// MIPS-specific 128-bit wide vector of 2 packed `i64`. + pub struct v2i64(2 x i64); + + /// MIPS-specific 128-bit wide vector of 16 packed `u8`. + pub struct v16u8(16 x u8); + + /// MIPS-specific 128-bit wide vector of 8 packed `u16`. + pub struct v8u16(8 x u16); + + /// MIPS-specific 128-bit wide vector of 4 packed `u32`. + pub struct v4u32(4 x u32); + + /// MIPS-specific 128-bit wide vector of 2 packed `u64`. + pub struct v2u64(2 x u64); + + // / MIPS-specific 128-bit wide vector of 4 packed `f32`. + pub struct v4f32(4 x f32); + + /// MIPS-specific 128-bit wide vector of 2 packed `f64`. + pub struct v2f64(2 x f64); +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.mips.add.a.b"] + fn msa_add_a_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.add.a.h"] + fn msa_add_a_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.add.a.w"] + fn msa_add_a_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.add.a.d"] + fn msa_add_a_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.adds.a.b"] + fn msa_adds_a_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.adds.a.h"] + fn msa_adds_a_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.adds.a.w"] + fn msa_adds_a_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.adds.a.d"] + fn msa_adds_a_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.adds.s.b"] + fn msa_adds_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.adds.s.h"] + fn msa_adds_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.adds.s.w"] + fn msa_adds_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.adds.s.d"] + fn msa_adds_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.adds.u.b"] + fn msa_adds_u_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.adds.u.h"] + fn msa_adds_u_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.adds.u.w"] + fn msa_adds_u_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.adds.u.d"] + fn msa_adds_u_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.addv.b"] + fn msa_addv_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.addv.h"] + fn msa_addv_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.addv.w"] + fn msa_addv_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.addv.d"] + fn msa_addv_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.addvi.b"] + fn msa_addvi_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.addvi.h"] + fn msa_addvi_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.addvi.w"] + fn msa_addvi_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.addvi.d"] + fn msa_addvi_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.and.v"] + fn msa_and_v(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.andi.b"] + fn msa_andi_b(a: v16u8, b: i32) -> v16u8; + #[link_name = "llvm.mips.asub.s.b"] + fn msa_asub_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.asub.s.h"] + fn msa_asub_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.asub.s.w"] + fn msa_asub_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.asub.s.d"] + fn msa_asub_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.asub.u.b"] + fn msa_asub_u_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.asub.u.h"] + fn msa_asub_u_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.asub.u.w"] + fn msa_asub_u_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.asub.u.d"] + fn msa_asub_u_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.ave.s.b"] + fn msa_ave_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.ave.s.h"] + fn msa_ave_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.ave.s.w"] + fn msa_ave_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.ave.s.d"] + fn msa_ave_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.ave.u.b"] + fn msa_ave_u_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.ave.u.h"] + fn msa_ave_u_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.ave.u.w"] + fn msa_ave_u_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.ave.u.d"] + fn msa_ave_u_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.aver.s.b"] + fn msa_aver_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.aver.s.h"] + fn msa_aver_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.aver.s.w"] + fn msa_aver_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.aver.s.d"] + fn msa_aver_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.aver.u.b"] + fn msa_aver_u_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.aver.u.h"] + fn msa_aver_u_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.aver.u.w"] + fn msa_aver_u_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.aver.u.d"] + fn msa_aver_u_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.bclr.b"] + fn msa_bclr_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.bclr.h"] + fn msa_bclr_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.bclr.w"] + fn msa_bclr_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.bclr.d"] + fn msa_bclr_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.bclri.b"] + fn msa_bclri_b(a: v16u8, b: i32) -> v16u8; + #[link_name = "llvm.mips.bclri.h"] + fn msa_bclri_h(a: v8u16, b: i32) -> v8u16; + #[link_name = "llvm.mips.bclri.w"] + fn msa_bclri_w(a: v4u32, b: i32) -> v4u32; + #[link_name = "llvm.mips.bclri.d"] + fn msa_bclri_d(a: v2u64, b: i32) -> v2u64; + #[link_name = "llvm.mips.binsl.b"] + fn msa_binsl_b(a: v16u8, b: v16u8, c: v16u8) -> v16u8; + #[link_name = "llvm.mips.binsl.h"] + fn msa_binsl_h(a: v8u16, b: v8u16, c: v8u16) -> v8u16; + #[link_name = "llvm.mips.binsl.w"] + fn msa_binsl_w(a: v4u32, b: v4u32, c: v4u32) -> v4u32; + #[link_name = "llvm.mips.binsl.d"] + fn msa_binsl_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64; + #[link_name = "llvm.mips.binsli.b"] + fn msa_binsli_b(a: v16u8, b: v16u8, c: i32) -> v16u8; + #[link_name = "llvm.mips.binsli.h"] + fn msa_binsli_h(a: v8u16, b: v8u16, c: i32) -> v8u16; + #[link_name = "llvm.mips.binsli.w"] + fn msa_binsli_w(a: v4u32, b: v4u32, c: i32) -> v4u32; + #[link_name = "llvm.mips.binsli.d"] + fn msa_binsli_d(a: v2u64, b: v2u64, c: i32) -> v2u64; + #[link_name = "llvm.mips.binsr.b"] + fn msa_binsr_b(a: v16u8, b: v16u8, c: v16u8) -> v16u8; + #[link_name = "llvm.mips.binsr.h"] + fn msa_binsr_h(a: v8u16, b: v8u16, c: v8u16) -> v8u16; + #[link_name = "llvm.mips.binsr.w"] + fn msa_binsr_w(a: v4u32, b: v4u32, c: v4u32) -> v4u32; + #[link_name = "llvm.mips.binsr.d"] + fn msa_binsr_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64; + #[link_name = "llvm.mips.binsri.b"] + fn msa_binsri_b(a: v16u8, b: v16u8, c: i32) -> v16u8; + #[link_name = "llvm.mips.binsri.h"] + fn msa_binsri_h(a: v8u16, b: v8u16, c: i32) -> v8u16; + #[link_name = "llvm.mips.binsri.w"] + fn msa_binsri_w(a: v4u32, b: v4u32, c: i32) -> v4u32; + #[link_name = "llvm.mips.binsri.d"] + fn msa_binsri_d(a: v2u64, b: v2u64, c: i32) -> v2u64; + #[link_name = "llvm.mips.bmnz.v"] + fn msa_bmnz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8; + #[link_name = "llvm.mips.bmnzi.b"] + fn msa_bmnzi_b(a: v16u8, b: v16u8, c: i32) -> v16u8; + #[link_name = "llvm.mips.bmz.v"] + fn msa_bmz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8; + #[link_name = "llvm.mips.bmzi.b"] + fn msa_bmzi_b(a: v16u8, b: v16u8, c: i32) -> v16u8; + #[link_name = "llvm.mips.bneg.b"] + fn msa_bneg_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.bneg.h"] + fn msa_bneg_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.bneg.w"] + fn msa_bneg_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.bneg.d"] + fn msa_bneg_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.bnegi.b"] + fn msa_bnegi_b(a: v16u8, b: i32) -> v16u8; + #[link_name = "llvm.mips.bnegi.h"] + fn msa_bnegi_h(a: v8u16, b: i32) -> v8u16; + #[link_name = "llvm.mips.bnegi.w"] + fn msa_bnegi_w(a: v4u32, b: i32) -> v4u32; + #[link_name = "llvm.mips.bnegi.d"] + fn msa_bnegi_d(a: v2u64, b: i32) -> v2u64; + #[link_name = "llvm.mips.bnz.b"] + fn msa_bnz_b(a: v16u8) -> i32; + #[link_name = "llvm.mips.bnz.h"] + fn msa_bnz_h(a: v8u16) -> i32; + #[link_name = "llvm.mips.bnz.w"] + fn msa_bnz_w(a: v4u32) -> i32; + #[link_name = "llvm.mips.bnz.d"] + fn msa_bnz_d(a: v2u64) -> i32; + #[link_name = "llvm.mips.bnz.v"] + fn msa_bnz_v(a: v16u8) -> i32; + #[link_name = "llvm.mips.bsel.v"] + fn msa_bsel_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8; + #[link_name = "llvm.mips.bseli.b"] + fn msa_bseli_b(a: v16u8, b: v16u8, c: i32) -> v16u8; + #[link_name = "llvm.mips.bset.b"] + fn msa_bset_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.bset.h"] + fn msa_bset_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.bset.w"] + fn msa_bset_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.bset.d"] + fn msa_bset_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.bseti.b"] + fn msa_bseti_b(a: v16u8, b: i32) -> v16u8; + #[link_name = "llvm.mips.bseti.h"] + fn msa_bseti_h(a: v8u16, b: i32) -> v8u16; + #[link_name = "llvm.mips.bseti.w"] + fn msa_bseti_w(a: v4u32, b: i32) -> v4u32; + #[link_name = "llvm.mips.bseti.d"] + fn msa_bseti_d(a: v2u64, b: i32) -> v2u64; + #[link_name = "llvm.mips.bz.b"] + fn msa_bz_b(a: v16u8) -> i32; + #[link_name = "llvm.mips.bz.h"] + fn msa_bz_h(a: v8u16) -> i32; + #[link_name = "llvm.mips.bz.w"] + fn msa_bz_w(a: v4u32) -> i32; + #[link_name = "llvm.mips.bz.d"] + fn msa_bz_d(a: v2u64) -> i32; + #[link_name = "llvm.mips.bz.v"] + fn msa_bz_v(a: v16u8) -> i32; + #[link_name = "llvm.mips.ceq.b"] + fn msa_ceq_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.ceq.h"] + fn msa_ceq_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.ceq.w"] + fn msa_ceq_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.ceq.d"] + fn msa_ceq_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.ceqi.b"] + fn msa_ceqi_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.ceqi.h"] + fn msa_ceqi_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.ceqi.w"] + fn msa_ceqi_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.ceqi.d"] + fn msa_ceqi_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.cfcmsa"] + fn msa_cfcmsa(a: i32) -> i32; + #[link_name = "llvm.mips.cle.s.b"] + fn msa_cle_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.cle.s.h"] + fn msa_cle_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.cle.s.w"] + fn msa_cle_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.cle.s.d"] + fn msa_cle_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.cle.u.b"] + fn msa_cle_u_b(a: v16u8, b: v16u8) -> v16i8; + #[link_name = "llvm.mips.cle.u.h"] + fn msa_cle_u_h(a: v8u16, b: v8u16) -> v8i16; + #[link_name = "llvm.mips.cle.u.w"] + fn msa_cle_u_w(a: v4u32, b: v4u32) -> v4i32; + #[link_name = "llvm.mips.cle.u.d"] + fn msa_cle_u_d(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.mips.clei.s.b"] + fn msa_clei_s_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.clei.s.h"] + fn msa_clei_s_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.clei.s.w"] + fn msa_clei_s_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.clei.s.d"] + fn msa_clei_s_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.clei.u.b"] + fn msa_clei_u_b(a: v16u8, b: i32) -> v16i8; + #[link_name = "llvm.mips.clei.u.h"] + fn msa_clei_u_h(a: v8u16, b: i32) -> v8i16; + #[link_name = "llvm.mips.clei.u.w"] + fn msa_clei_u_w(a: v4u32, b: i32) -> v4i32; + #[link_name = "llvm.mips.clei.u.d"] + fn msa_clei_u_d(a: v2u64, b: i32) -> v2i64; + #[link_name = "llvm.mips.clt.s.b"] + fn msa_clt_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.clt.s.h"] + fn msa_clt_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.clt.s.w"] + fn msa_clt_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.clt.s.d"] + fn msa_clt_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.clt.u.b"] + fn msa_clt_u_b(a: v16u8, b: v16u8) -> v16i8; + #[link_name = "llvm.mips.clt.u.h"] + fn msa_clt_u_h(a: v8u16, b: v8u16) -> v8i16; + #[link_name = "llvm.mips.clt.u.w"] + fn msa_clt_u_w(a: v4u32, b: v4u32) -> v4i32; + #[link_name = "llvm.mips.clt.u.d"] + fn msa_clt_u_d(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.mips.clti.s.b"] + fn msa_clti_s_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.clti.s.h"] + fn msa_clti_s_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.clti.s.w"] + fn msa_clti_s_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.clti.s.d"] + fn msa_clti_s_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.clti.u.b"] + fn msa_clti_u_b(a: v16u8, b: i32) -> v16i8; + #[link_name = "llvm.mips.clti.u.h"] + fn msa_clti_u_h(a: v8u16, b: i32) -> v8i16; + #[link_name = "llvm.mips.clti.u.w"] + fn msa_clti_u_w(a: v4u32, b: i32) -> v4i32; + #[link_name = "llvm.mips.clti.u.d"] + fn msa_clti_u_d(a: v2u64, b: i32) -> v2i64; + #[link_name = "llvm.mips.copy.s.b"] + fn msa_copy_s_b(a: v16i8, b: i32) -> i32; + #[link_name = "llvm.mips.copy.s.h"] + fn msa_copy_s_h(a: v8i16, b: i32) -> i32; + #[link_name = "llvm.mips.copy.s.w"] + fn msa_copy_s_w(a: v4i32, b: i32) -> i32; + #[link_name = "llvm.mips.copy.s.d"] + fn msa_copy_s_d(a: v2i64, b: i32) -> i64; + #[link_name = "llvm.mips.copy.u.b"] + fn msa_copy_u_b(a: v16i8, b: i32) -> u32; + #[link_name = "llvm.mips.copy.u.h"] + fn msa_copy_u_h(a: v8i16, b: i32) -> u32; + #[link_name = "llvm.mips.copy.u.w"] + fn msa_copy_u_w(a: v4i32, b: i32) -> u32; + #[link_name = "llvm.mips.copy.u.d"] + fn msa_copy_u_d(a: v2i64, b: i32) -> u64; + #[link_name = "llvm.mips.ctcmsa"] + fn msa_ctcmsa(imm5: i32, a: i32) -> (); + #[link_name = "llvm.mips.div.s.b"] + fn msa_div_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.div.s.h"] + fn msa_div_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.div.s.w"] + fn msa_div_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.div.s.d"] + fn msa_div_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.div.u.b"] + fn msa_div_u_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.div.u.h"] + fn msa_div_u_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.div.u.w"] + fn msa_div_u_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.div.u.d"] + fn msa_div_u_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.dotp.s.h"] + fn msa_dotp_s_h(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.mips.dotp.s.w"] + fn msa_dotp_s_w(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.mips.dotp.s.d"] + fn msa_dotp_s_d(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.mips.dotp.u.h"] + fn msa_dotp_u_h(a: v16u8, b: v16u8) -> v8u16; + #[link_name = "llvm.mips.dotp.u.w"] + fn msa_dotp_u_w(a: v8u16, b: v8u16) -> v4u32; + #[link_name = "llvm.mips.dotp.u.d"] + fn msa_dotp_u_d(a: v4u32, b: v4u32) -> v2u64; + #[link_name = "llvm.mips.dpadd.s.h"] + fn msa_dpadd_s_h(a: v8i16, b: v16i8, c: v16i8) -> v8i16; + #[link_name = "llvm.mips.dpadd.s.w"] + fn msa_dpadd_s_w(a: v4i32, b: v8i16, c: v8i16) -> v4i32; + #[link_name = "llvm.mips.dpadd.s.d"] + fn msa_dpadd_s_d(a: v2i64, b: v4i32, c: v4i32) -> v2i64; + #[link_name = "llvm.mips.dpadd.u.h"] + fn msa_dpadd_u_h(a: v8u16, b: v16u8, c: v16u8) -> v8u16; + #[link_name = "llvm.mips.dpadd.u.w"] + fn msa_dpadd_u_w(a: v4u32, b: v8u16, c: v8u16) -> v4u32; + #[link_name = "llvm.mips.dpadd.u.d"] + fn msa_dpadd_u_d(a: v2u64, b: v4u32, c: v4u32) -> v2u64; + #[link_name = "llvm.mips.dpsub.s.h"] + fn msa_dpsub_s_h(a: v8i16, b: v16i8, c: v16i8) -> v8i16; + #[link_name = "llvm.mips.dpsub.s.w"] + fn msa_dpsub_s_w(a: v4i32, b: v8i16, c: v8i16) -> v4i32; + #[link_name = "llvm.mips.dpsub.s.d"] + fn msa_dpsub_s_d(a: v2i64, b: v4i32, c: v4i32) -> v2i64; + #[link_name = "llvm.mips.dpsub.u.h"] + fn msa_dpsub_u_h(a: v8i16, b: v16u8, c: v16u8) -> v8i16; + #[link_name = "llvm.mips.dpsub.u.w"] + fn msa_dpsub_u_w(a: v4i32, b: v8u16, c: v8u16) -> v4i32; + #[link_name = "llvm.mips.dpsub.u.d"] + fn msa_dpsub_u_d(a: v2i64, b: v4u32, c: v4u32) -> v2i64; + #[link_name = "llvm.mips.fadd.w"] + fn msa_fadd_w(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.mips.fadd.d"] + fn msa_fadd_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.mips.fcaf.w"] + fn msa_fcaf_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fcaf.d"] + fn msa_fcaf_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fceq.w"] + fn msa_fceq_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fceq.d"] + fn msa_fceq_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fclass.w"] + fn msa_fclass_w(a: v4f32) -> v4i32; + #[link_name = "llvm.mips.fclass.d"] + fn msa_fclass_d(a: v2f64) -> v2i64; + #[link_name = "llvm.mips.fcle.w"] + fn msa_fcle_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fcle.d"] + fn msa_fcle_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fclt.w"] + fn msa_fclt_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fclt.d"] + fn msa_fclt_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fcne.w"] + fn msa_fcne_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fcne.d"] + fn msa_fcne_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fcor.w"] + fn msa_fcor_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fcor.d"] + fn msa_fcor_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fcueq.w"] + fn msa_fcueq_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fcueq.d"] + fn msa_fcueq_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fcule.w"] + fn msa_fcule_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fcule.d"] + fn msa_fcule_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fcult.w"] + fn msa_fcult_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fcult.d"] + fn msa_fcult_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fcun.w"] + fn msa_fcun_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fcun.d"] + fn msa_fcun_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fcune.w"] + fn msa_fcune_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fcune.d"] + fn msa_fcune_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fdiv.w"] + fn msa_fdiv_w(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.mips.fdiv.d"] + fn msa_fdiv_d(a: v2f64, b: v2f64) -> v2f64; + // FIXME: 16-bit floats + // #[link_name = "llvm.mips.fexdo.h"] + // fn msa_fexdo_h(a: v4f32, b: v4f32) -> f16x8; + #[link_name = "llvm.mips.fexdo.w"] + fn msa_fexdo_w(a: v2f64, b: v2f64) -> v4f32; + #[link_name = "llvm.mips.fexp2.w"] + fn msa_fexp2_w(a: v4f32, b: v4i32) -> v4f32; + #[link_name = "llvm.mips.fexp2.d"] + fn msa_fexp2_d(a: v2f64, b: v2i64) -> v2f64; + // FIXME: 16-bit floats + // #[link_name = "llvm.mips.fexupl.w"] + // fn msa_fexupl_w(a: f16x8) -> v4f32; + #[link_name = "llvm.mips.fexupl.d"] + fn msa_fexupl_d(a: v4f32) -> v2f64; + // FIXME: 16-bit floats + // #[link_name = "llvm.mips.fexupr.w"] + // fn msa_fexupr_w(a: f16x8) -> v4f32; + #[link_name = "llvm.mips.fexupr.d"] + fn msa_fexupr_d(a: v4f32) -> v2f64; + #[link_name = "llvm.mips.ffint.s.w"] + fn msa_ffint_s_w(a: v4i32) -> v4f32; + #[link_name = "llvm.mips.ffint.s.d"] + fn msa_ffint_s_d(a: v2i64) -> v2f64; + #[link_name = "llvm.mips.ffint.u.w"] + fn msa_ffint_u_w(a: v4u32) -> v4f32; + #[link_name = "llvm.mips.ffint.u.d"] + fn msa_ffint_u_d(a: v2u64) -> v2f64; + #[link_name = "llvm.mips.ffql.w"] + fn msa_ffql_w(a: v8i16) -> v4f32; + #[link_name = "llvm.mips.ffql.d"] + fn msa_ffql_d(a: v4i32) -> v2f64; + #[link_name = "llvm.mips.ffqr.w"] + fn msa_ffqr_w(a: v8i16) -> v4f32; + #[link_name = "llvm.mips.ffqr.d"] + fn msa_ffqr_d(a: v4i32) -> v2f64; + #[link_name = "llvm.mips.fill.b"] + fn msa_fill_b(a: i32) -> v16i8; + #[link_name = "llvm.mips.fill.h"] + fn msa_fill_h(a: i32) -> v8i16; + #[link_name = "llvm.mips.fill.w"] + fn msa_fill_w(a: i32) -> v4i32; + #[link_name = "llvm.mips.fill.d"] + fn msa_fill_d(a: i64) -> v2i64; + #[link_name = "llvm.mips.flog2.w"] + fn msa_flog2_w(a: v4f32) -> v4f32; + #[link_name = "llvm.mips.flog2.d"] + fn msa_flog2_d(a: v2f64) -> v2f64; + #[link_name = "llvm.mips.fmadd.w"] + fn msa_fmadd_w(a: v4f32, b: v4f32, c: v4f32) -> v4f32; + #[link_name = "llvm.mips.fmadd.d"] + fn msa_fmadd_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64; + #[link_name = "llvm.mips.fmax.w"] + fn msa_fmax_w(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.mips.fmax.d"] + fn msa_fmax_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.mips.fmax.a.w"] + fn msa_fmax_a_w(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.mips.fmax.a.d"] + fn msa_fmax_a_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.mips.fmin.w"] + fn msa_fmin_w(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.mips.fmin.d"] + fn msa_fmin_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.mips.fmin.a.w"] + fn msa_fmin_a_w(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.mips.fmin.a.d"] + fn msa_fmin_a_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.mips.fmsub.w"] + fn msa_fmsub_w(a: v4f32, b: v4f32, c: v4f32) -> v4f32; + #[link_name = "llvm.mips.fmsub.d"] + fn msa_fmsub_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64; + #[link_name = "llvm.mips.fmul.w"] + fn msa_fmul_w(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.mips.fmul.d"] + fn msa_fmul_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.mips.frint.w"] + fn msa_frint_w(a: v4f32) -> v4f32; + #[link_name = "llvm.mips.frint.d"] + fn msa_frint_d(a: v2f64) -> v2f64; + #[link_name = "llvm.mips.frcp.w"] + fn msa_frcp_w(a: v4f32) -> v4f32; + #[link_name = "llvm.mips.frcp.d"] + fn msa_frcp_d(a: v2f64) -> v2f64; + #[link_name = "llvm.mips.frsqrt.w"] + fn msa_frsqrt_w(a: v4f32) -> v4f32; + #[link_name = "llvm.mips.frsqrt.d"] + fn msa_frsqrt_d(a: v2f64) -> v2f64; + #[link_name = "llvm.mips.fsaf.w"] + fn msa_fsaf_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fsaf.d"] + fn msa_fsaf_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fseq.w"] + fn msa_fseq_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fseq.d"] + fn msa_fseq_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fsle.w"] + fn msa_fsle_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fsle.d"] + fn msa_fsle_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fslt.w"] + fn msa_fslt_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fslt.d"] + fn msa_fslt_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fsne.w"] + fn msa_fsne_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fsne.d"] + fn msa_fsne_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fsor.w"] + fn msa_fsor_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fsor.d"] + fn msa_fsor_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fsqrt.w"] + fn msa_fsqrt_w(a: v4f32) -> v4f32; + #[link_name = "llvm.mips.fsqrt.d"] + fn msa_fsqrt_d(a: v2f64) -> v2f64; + #[link_name = "llvm.mips.fsub.w"] + fn msa_fsub_w(a: v4f32, b: v4f32) -> v4f32; + #[link_name = "llvm.mips.fsub.d"] + fn msa_fsub_d(a: v2f64, b: v2f64) -> v2f64; + #[link_name = "llvm.mips.fsueq.w"] + fn msa_fsueq_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fsueq.d"] + fn msa_fsueq_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fsule.w"] + fn msa_fsule_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fsule.d"] + fn msa_fsule_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fsult.w"] + fn msa_fsult_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fsult.d"] + fn msa_fsult_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fsun.w"] + fn msa_fsun_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fsun.d"] + fn msa_fsun_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.fsune.w"] + fn msa_fsune_w(a: v4f32, b: v4f32) -> v4i32; + #[link_name = "llvm.mips.fsune.d"] + fn msa_fsune_d(a: v2f64, b: v2f64) -> v2i64; + #[link_name = "llvm.mips.ftint.s.w"] + fn msa_ftint_s_w(a: v4f32) -> v4i32; + #[link_name = "llvm.mips.ftint.s.d"] + fn msa_ftint_s_d(a: v2f64) -> v2i64; + #[link_name = "llvm.mips.ftint.u.w"] + fn msa_ftint_u_w(a: v4f32) -> v4u32; + #[link_name = "llvm.mips.ftint.u.d"] + fn msa_ftint_u_d(a: v2f64) -> v2u64; + #[link_name = "llvm.mips.ftq.h"] + fn msa_ftq_h(a: v4f32, b: v4f32) -> v8i16; + #[link_name = "llvm.mips.ftq.w"] + fn msa_ftq_w(a: v2f64, b: v2f64) -> v4i32; + #[link_name = "llvm.mips.ftrunc.s.w"] + fn msa_ftrunc_s_w(a: v4f32) -> v4i32; + #[link_name = "llvm.mips.ftrunc.s.d"] + fn msa_ftrunc_s_d(a: v2f64) -> v2i64; + #[link_name = "llvm.mips.ftrunc.u.w"] + fn msa_ftrunc_u_w(a: v4f32) -> v4u32; + #[link_name = "llvm.mips.ftrunc.u.d"] + fn msa_ftrunc_u_d(a: v2f64) -> v2u64; + #[link_name = "llvm.mips.hadd.s.h"] + fn msa_hadd_s_h(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.mips.hadd.s.w"] + fn msa_hadd_s_w(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.mips.hadd.s.d"] + fn msa_hadd_s_d(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.mips.hadd.u.h"] + fn msa_hadd_u_h(a: v16u8, b: v16u8) -> v8u16; + #[link_name = "llvm.mips.hadd.u.w"] + fn msa_hadd_u_w(a: v8u16, b: v8u16) -> v4u32; + #[link_name = "llvm.mips.hadd.u.d"] + fn msa_hadd_u_d(a: v4u32, b: v4u32) -> v2u64; + #[link_name = "llvm.mips.hsub.s.h"] + fn msa_hsub_s_h(a: v16i8, b: v16i8) -> v8i16; + #[link_name = "llvm.mips.hsub.s.w"] + fn msa_hsub_s_w(a: v8i16, b: v8i16) -> v4i32; + #[link_name = "llvm.mips.hsub.s.d"] + fn msa_hsub_s_d(a: v4i32, b: v4i32) -> v2i64; + #[link_name = "llvm.mips.hsub.u.h"] + fn msa_hsub_u_h(a: v16u8, b: v16u8) -> v8i16; + #[link_name = "llvm.mips.hsub.u.w"] + fn msa_hsub_u_w(a: v8u16, b: v8u16) -> v4i32; + #[link_name = "llvm.mips.hsub.u.d"] + fn msa_hsub_u_d(a: v4u32, b: v4u32) -> v2i64; + #[link_name = "llvm.mips.ilvev.b"] + fn msa_ilvev_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.ilvev.h"] + fn msa_ilvev_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.ilvev.w"] + fn msa_ilvev_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.ilvev.d"] + fn msa_ilvev_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.ilvl.b"] + fn msa_ilvl_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.ilvl.h"] + fn msa_ilvl_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.ilvl.w"] + fn msa_ilvl_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.ilvl.d"] + fn msa_ilvl_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.ilvod.b"] + fn msa_ilvod_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.ilvod.h"] + fn msa_ilvod_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.ilvod.w"] + fn msa_ilvod_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.ilvod.d"] + fn msa_ilvod_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.ilvr.b"] + fn msa_ilvr_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.ilvr.h"] + fn msa_ilvr_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.ilvr.w"] + fn msa_ilvr_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.ilvr.d"] + fn msa_ilvr_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.insert.b"] + fn msa_insert_b(a: v16i8, b: i32, c: i32) -> v16i8; + #[link_name = "llvm.mips.insert.h"] + fn msa_insert_h(a: v8i16, b: i32, c: i32) -> v8i16; + #[link_name = "llvm.mips.insert.w"] + fn msa_insert_w(a: v4i32, b: i32, c: i32) -> v4i32; + #[link_name = "llvm.mips.insert.d"] + fn msa_insert_d(a: v2i64, b: i32, c: i64) -> v2i64; + #[link_name = "llvm.mips.insve.b"] + fn msa_insve_b(a: v16i8, b: i32, c: v16i8) -> v16i8; + #[link_name = "llvm.mips.insve.h"] + fn msa_insve_h(a: v8i16, b: i32, c: v8i16) -> v8i16; + #[link_name = "llvm.mips.insve.w"] + fn msa_insve_w(a: v4i32, b: i32, c: v4i32) -> v4i32; + #[link_name = "llvm.mips.insve.d"] + fn msa_insve_d(a: v2i64, b: i32, c: v2i64) -> v2i64; + #[link_name = "llvm.mips.ld.b"] + fn msa_ld_b(mem_addr: *mut u8, b: i32) -> v16i8; + #[link_name = "llvm.mips.ld.h"] + fn msa_ld_h(mem_addr: *mut u8, b: i32) -> v8i16; + #[link_name = "llvm.mips.ld.w"] + fn msa_ld_w(mem_addr: *mut u8, b: i32) -> v4i32; + #[link_name = "llvm.mips.ld.d"] + fn msa_ld_d(mem_addr: *mut u8, b: i32) -> v2i64; + #[link_name = "llvm.mips.ldi.b"] + fn msa_ldi_b(a: i32) -> v16i8; + #[link_name = "llvm.mips.ldi.h"] + fn msa_ldi_h(a: i32) -> v8i16; + #[link_name = "llvm.mips.ldi.w"] + fn msa_ldi_w(a: i32) -> v4i32; + #[link_name = "llvm.mips.ldi.d"] + fn msa_ldi_d(a: i32) -> v2i64; + #[link_name = "llvm.mips.madd.q.h"] + fn msa_madd_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.mips.madd.q.w"] + fn msa_madd_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.mips.maddr.q.h"] + fn msa_maddr_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.mips.maddr.q.w"] + fn msa_maddr_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.mips.maddv.b"] + fn msa_maddv_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; + #[link_name = "llvm.mips.maddv.h"] + fn msa_maddv_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.mips.maddv.w"] + fn msa_maddv_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.mips.maddv.d"] + fn msa_maddv_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; + #[link_name = "llvm.mips.max.a.b"] + fn msa_max_a_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.max.a.h"] + fn msa_max_a_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.max.a.w"] + fn msa_max_a_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.max.a.d"] + fn msa_max_a_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.max.s.b"] + fn msa_max_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.max.s.h"] + fn msa_max_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.max.s.w"] + fn msa_max_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.max.s.d"] + fn msa_max_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.max.u.b"] + fn msa_max_u_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.max.u.h"] + fn msa_max_u_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.max.u.w"] + fn msa_max_u_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.max.u.d"] + fn msa_max_u_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.maxi.s.b"] + fn msa_maxi_s_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.maxi.s.h"] + fn msa_maxi_s_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.maxi.s.w"] + fn msa_maxi_s_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.maxi.s.d"] + fn msa_maxi_s_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.maxi.u.b"] + fn msa_maxi_u_b(a: v16u8, b: i32) -> v16u8; + #[link_name = "llvm.mips.maxi.u.h"] + fn msa_maxi_u_h(a: v8u16, b: i32) -> v8u16; + #[link_name = "llvm.mips.maxi.u.w"] + fn msa_maxi_u_w(a: v4u32, b: i32) -> v4u32; + #[link_name = "llvm.mips.maxi.u.d"] + fn msa_maxi_u_d(a: v2u64, b: i32) -> v2u64; + #[link_name = "llvm.mips.min.a.b"] + fn msa_min_a_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.min.a.h"] + fn msa_min_a_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.min.a.w"] + fn msa_min_a_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.min.a.d"] + fn msa_min_a_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.min.s.b"] + fn msa_min_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.min.s.h"] + fn msa_min_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.min.s.w"] + fn msa_min_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.min.s.d"] + fn msa_min_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.min.u.b"] + fn msa_min_u_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.min.u.h"] + fn msa_min_u_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.min.u.w"] + fn msa_min_u_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.min.u.d"] + fn msa_min_u_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.mini.s.b"] + fn msa_mini_s_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.mini.s.h"] + fn msa_mini_s_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.mini.s.w"] + fn msa_mini_s_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.mini.s.d"] + fn msa_mini_s_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.mini.u.b"] + fn msa_mini_u_b(a: v16u8, b: i32) -> v16u8; + #[link_name = "llvm.mips.mini.u.h"] + fn msa_mini_u_h(a: v8u16, b: i32) -> v8u16; + #[link_name = "llvm.mips.mini.u.w"] + fn msa_mini_u_w(a: v4u32, b: i32) -> v4u32; + #[link_name = "llvm.mips.mini.u.d"] + fn msa_mini_u_d(a: v2u64, b: i32) -> v2u64; + #[link_name = "llvm.mips.mod.s.b"] + fn msa_mod_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.mod.s.h"] + fn msa_mod_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.mod.s.w"] + fn msa_mod_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.mod.s.d"] + fn msa_mod_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.mod.u.b"] + fn msa_mod_u_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.mod.u.h"] + fn msa_mod_u_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.mod.u.w"] + fn msa_mod_u_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.mod.u.d"] + fn msa_mod_u_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.move.v"] + fn msa_move_v(a: v16i8) -> v16i8; + #[link_name = "llvm.mips.msub.q.h"] + fn msa_msub_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.mips.msub.q.w"] + fn msa_msub_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.mips.msubr.q.h"] + fn msa_msubr_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.mips.msubr.q.w"] + fn msa_msubr_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.mips.msubv.b"] + fn msa_msubv_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; + #[link_name = "llvm.mips.msubv.h"] + fn msa_msubv_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.mips.msubv.w"] + fn msa_msubv_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.mips.msubv.d"] + fn msa_msubv_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; + #[link_name = "llvm.mips.mul.q.h"] + fn msa_mul_q_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.mul.q.w"] + fn msa_mul_q_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.mulr.q.h"] + fn msa_mulr_q_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.mulr.q.w"] + fn msa_mulr_q_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.mulv.b"] + fn msa_mulv_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.mulv.h"] + fn msa_mulv_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.mulv.w"] + fn msa_mulv_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.mulv.d"] + fn msa_mulv_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.nloc.b"] + fn msa_nloc_b(a: v16i8) -> v16i8; + #[link_name = "llvm.mips.nloc.h"] + fn msa_nloc_h(a: v8i16) -> v8i16; + #[link_name = "llvm.mips.nloc.w"] + fn msa_nloc_w(a: v4i32) -> v4i32; + #[link_name = "llvm.mips.nloc.d"] + fn msa_nloc_d(a: v2i64) -> v2i64; + #[link_name = "llvm.mips.nlzc.b"] + fn msa_nlzc_b(a: v16i8) -> v16i8; + #[link_name = "llvm.mips.nlzc.h"] + fn msa_nlzc_h(a: v8i16) -> v8i16; + #[link_name = "llvm.mips.nlzc.w"] + fn msa_nlzc_w(a: v4i32) -> v4i32; + #[link_name = "llvm.mips.nlzc.d"] + fn msa_nlzc_d(a: v2i64) -> v2i64; + #[link_name = "llvm.mips.nor.v"] + fn msa_nor_v(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.nori.b"] + fn msa_nori_b(a: v16u8, b: i32) -> v16u8; + #[link_name = "llvm.mips.or.v"] + fn msa_or_v(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.ori.b"] + fn msa_ori_b(a: v16u8, b: i32) -> v16u8; + #[link_name = "llvm.mips.pckev.b"] + fn msa_pckev_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.pckev.h"] + fn msa_pckev_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.pckev.w"] + fn msa_pckev_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.pckev.d"] + fn msa_pckev_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.pckod.b"] + fn msa_pckod_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.pckod.h"] + fn msa_pckod_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.pckod.w"] + fn msa_pckod_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.pckod.d"] + fn msa_pckod_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.pcnt.b"] + fn msa_pcnt_b(a: v16i8) -> v16i8; + #[link_name = "llvm.mips.pcnt.h"] + fn msa_pcnt_h(a: v8i16) -> v8i16; + #[link_name = "llvm.mips.pcnt.w"] + fn msa_pcnt_w(a: v4i32) -> v4i32; + #[link_name = "llvm.mips.pcnt.d"] + fn msa_pcnt_d(a: v2i64) -> v2i64; + #[link_name = "llvm.mips.sat.s.b"] + fn msa_sat_s_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.sat.s.h"] + fn msa_sat_s_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.sat.s.w"] + fn msa_sat_s_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.sat.s.d"] + fn msa_sat_s_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.sat.u.b"] + fn msa_sat_u_b(a: v16u8, b: i32) -> v16u8; + #[link_name = "llvm.mips.sat.u.h"] + fn msa_sat_u_h(a: v8u16, b: i32) -> v8u16; + #[link_name = "llvm.mips.sat.u.w"] + fn msa_sat_u_w(a: v4u32, b: i32) -> v4u32; + #[link_name = "llvm.mips.sat.u.d"] + fn msa_sat_u_d(a: v2u64, b: i32) -> v2u64; + #[link_name = "llvm.mips.shf.b"] + fn msa_shf_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.shf.h"] + fn msa_shf_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.shf.w"] + fn msa_shf_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.sld.b"] + fn msa_sld_b(a: v16i8, b: v16i8, c: i32) -> v16i8; + #[link_name = "llvm.mips.sld.h"] + fn msa_sld_h(a: v8i16, b: v8i16, c: i32) -> v8i16; + #[link_name = "llvm.mips.sld.w"] + fn msa_sld_w(a: v4i32, b: v4i32, c: i32) -> v4i32; + #[link_name = "llvm.mips.sld.d"] + fn msa_sld_d(a: v2i64, b: v2i64, c: i32) -> v2i64; + #[link_name = "llvm.mips.sldi.b"] + fn msa_sldi_b(a: v16i8, b: v16i8, c: i32) -> v16i8; + #[link_name = "llvm.mips.sldi.h"] + fn msa_sldi_h(a: v8i16, b: v8i16, c: i32) -> v8i16; + #[link_name = "llvm.mips.sldi.w"] + fn msa_sldi_w(a: v4i32, b: v4i32, c: i32) -> v4i32; + #[link_name = "llvm.mips.sldi.d"] + fn msa_sldi_d(a: v2i64, b: v2i64, c: i32) -> v2i64; + #[link_name = "llvm.mips.sll.b"] + fn msa_sll_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.sll.h"] + fn msa_sll_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.sll.w"] + fn msa_sll_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.sll.d"] + fn msa_sll_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.slli.b"] + fn msa_slli_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.slli.h"] + fn msa_slli_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.slli.w"] + fn msa_slli_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.slli.d"] + fn msa_slli_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.splat.b"] + fn msa_splat_b(a: v16i8, c: i32) -> v16i8; + #[link_name = "llvm.mips.splat.h"] + fn msa_splat_h(a: v8i16, c: i32) -> v8i16; + #[link_name = "llvm.mips.splat.w"] + fn msa_splat_w(a: v4i32, w: i32) -> v4i32; + #[link_name = "llvm.mips.splat.d"] + fn msa_splat_d(a: v2i64, c: i32) -> v2i64; + #[link_name = "llvm.mips.splati.b"] + fn msa_splati_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.splati.h"] + fn msa_splati_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.splati.w"] + fn msa_splati_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.splati.d"] + fn msa_splati_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.sra.b"] + fn msa_sra_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.sra.h"] + fn msa_sra_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.sra.w"] + fn msa_sra_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.sra.d"] + fn msa_sra_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.srai.b"] + fn msa_srai_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.srai.h"] + fn msa_srai_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.srai.w"] + fn msa_srai_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.srai.d"] + fn msa_srai_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.srar.b"] + fn msa_srar_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.srar.h"] + fn msa_srar_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.srar.w"] + fn msa_srar_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.srar.d"] + fn msa_srar_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.srari.b"] + fn msa_srari_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.srari.h"] + fn msa_srari_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.srari.w"] + fn msa_srari_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.srari.d"] + fn msa_srari_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.srl.b"] + fn msa_srl_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.srl.h"] + fn msa_srl_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.srl.w"] + fn msa_srl_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.srl.d"] + fn msa_srl_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.srli.b"] + fn msa_srli_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.srli.h"] + fn msa_srli_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.srli.w"] + fn msa_srli_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.srli.d"] + fn msa_srli_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.srlr.b"] + fn msa_srlr_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.srlr.h"] + fn msa_srlr_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.srlr.w"] + fn msa_srlr_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.srlr.d"] + fn msa_srlr_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.srlri.b"] + fn msa_srlri_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.srlri.h"] + fn msa_srlri_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.srlri.w"] + fn msa_srlri_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.srlri.d"] + fn msa_srlri_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.st.b"] + fn msa_st_b(a: v16i8, mem_addr: *mut u8, imm_s10: i32) -> (); + #[link_name = "llvm.mips.st.h"] + fn msa_st_h(a: v8i16, mem_addr: *mut u8, imm_s11: i32) -> (); + #[link_name = "llvm.mips.st.w"] + fn msa_st_w(a: v4i32, mem_addr: *mut u8, imm_s12: i32) -> (); + #[link_name = "llvm.mips.st.d"] + fn msa_st_d(a: v2i64, mem_addr: *mut u8, imm_s13: i32) -> (); + #[link_name = "llvm.mips.subs.s.b"] + fn msa_subs_s_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.subs.s.h"] + fn msa_subs_s_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.subs.s.w"] + fn msa_subs_s_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.subs.s.d"] + fn msa_subs_s_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.subs.u.b"] + fn msa_subs_u_b(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.subs.u.h"] + fn msa_subs_u_h(a: v8u16, b: v8u16) -> v8u16; + #[link_name = "llvm.mips.subs.u.w"] + fn msa_subs_u_w(a: v4u32, b: v4u32) -> v4u32; + #[link_name = "llvm.mips.subs.u.d"] + fn msa_subs_u_d(a: v2u64, b: v2u64) -> v2u64; + #[link_name = "llvm.mips.subsus.u.b"] + fn msa_subsus_u_b(a: v16u8, b: v16i8) -> v16u8; + #[link_name = "llvm.mips.subsus.u.h"] + fn msa_subsus_u_h(a: v8u16, b: v8i16) -> v8u16; + #[link_name = "llvm.mips.subsus.u.w"] + fn msa_subsus_u_w(a: v4u32, b: v4i32) -> v4u32; + #[link_name = "llvm.mips.subsus.u.d"] + fn msa_subsus_u_d(a: v2u64, b: v2i64) -> v2u64; + #[link_name = "llvm.mips.subsuu.s.b"] + fn msa_subsuu_s_b(a: v16u8, b: v16u8) -> v16i8; + #[link_name = "llvm.mips.subsuu.s.h"] + fn msa_subsuu_s_h(a: v8u16, b: v8u16) -> v8i16; + #[link_name = "llvm.mips.subsuu.s.w"] + fn msa_subsuu_s_w(a: v4u32, b: v4u32) -> v4i32; + #[link_name = "llvm.mips.subsuu.s.d"] + fn msa_subsuu_s_d(a: v2u64, b: v2u64) -> v2i64; + #[link_name = "llvm.mips.subv.b"] + fn msa_subv_b(a: v16i8, b: v16i8) -> v16i8; + #[link_name = "llvm.mips.subv.h"] + fn msa_subv_h(a: v8i16, b: v8i16) -> v8i16; + #[link_name = "llvm.mips.subv.w"] + fn msa_subv_w(a: v4i32, b: v4i32) -> v4i32; + #[link_name = "llvm.mips.subv.d"] + fn msa_subv_d(a: v2i64, b: v2i64) -> v2i64; + #[link_name = "llvm.mips.subvi.b"] + fn msa_subvi_b(a: v16i8, b: i32) -> v16i8; + #[link_name = "llvm.mips.subvi.h"] + fn msa_subvi_h(a: v8i16, b: i32) -> v8i16; + #[link_name = "llvm.mips.subvi.w"] + fn msa_subvi_w(a: v4i32, b: i32) -> v4i32; + #[link_name = "llvm.mips.subvi.d"] + fn msa_subvi_d(a: v2i64, b: i32) -> v2i64; + #[link_name = "llvm.mips.vshf.b"] + fn msa_vshf_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; + #[link_name = "llvm.mips.vshf.h"] + fn msa_vshf_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; + #[link_name = "llvm.mips.vshf.w"] + fn msa_vshf_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; + #[link_name = "llvm.mips.vshf.d"] + fn msa_vshf_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; + #[link_name = "llvm.mips.xor.v"] + fn msa_xor_v(a: v16u8, b: v16u8) -> v16u8; + #[link_name = "llvm.mips.xori.b"] + fn msa_xori_b(a: v16u8, b: i32) -> v16u8; +} + +/// Vector Add Absolute Values. +/// +/// The absolute values of the elements in vector in `a` (sixteen signed 8-bit integer numbers) +/// are added to the absolute values of the elements in vector `b` (sixteen signed 8-bit integer numbers). +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(add_a.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_add_a_b(a: v16i8, b: v16i8) -> v16i8 { + msa_add_a_b(a, mem::transmute(b)) +} + +/// Vector Add Absolute Values +/// +/// The absolute values of the elements in vector in `a` (eight signed 16-bit integer numbers) +/// are added to the absolute values of the elements in vector `b` (eight signed 16-bit integer numbers). +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(add_a.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_add_a_h(a: v8i16, b: v8i16) -> v8i16 { + msa_add_a_h(a, mem::transmute(b)) +} + +/// Vector Add Absolute Values +/// +/// The absolute values of the elements in vector in `a` (four signed 32-bit integer numbers) +/// are added to the absolute values of the elements in vector `b` (four signed 32-bit integer numbers). +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(add_a.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_add_a_w(a: v4i32, b: v4i32) -> v4i32 { + msa_add_a_w(a, mem::transmute(b)) +} + +/// Vector Add Absolute Values +/// +/// The absolute values of the elements in vector in `a` (two signed 64-bit integer numbers) +/// are added to the absolute values of the elements in vector `b` (two signed 64-bit integer numbers). +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(add_a.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_add_a_d(a: v2i64, b: v2i64) -> v2i64 { + msa_add_a_d(a, mem::transmute(b)) +} + +/// Signed Saturated Vector Saturated Add of Absolute Values +/// +/// The absolute values of the elements in vector in `a` (sixteen signed 8-bit integer numbers) +/// are added to the absolute values of the elements in vector `b` (sixteen signed 8-bit integer numbers). +/// The saturated signed result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_a.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_a_b(a: v16i8, b: v16i8) -> v16i8 { + msa_adds_a_b(a, mem::transmute(b)) +} + +/// Vector Saturated Add of Absolute Values +/// +/// The absolute values of the elements in vector in `a` (eight signed 16-bit integer numbers) +/// are added to the absolute values of the elements in vector `b` (eight signed 16-bit integer numbers). +/// The saturated signed result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_a.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_a_h(a: v8i16, b: v8i16) -> v8i16 { + msa_adds_a_h(a, mem::transmute(b)) +} + +/// Vector Saturated Add of Absolute Values +/// +/// The absolute values of the elements in vector in `a` (four signed 32-bit integer numbers) +/// are added to the absolute values of the elements in vector `b` (four signed 32-bit integer numbers). +/// The saturated signed result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_a.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_a_w(a: v4i32, b: v4i32) -> v4i32 { + msa_adds_a_w(a, mem::transmute(b)) +} + +/// Vector Saturated Add of Absolute Values +/// +/// The absolute values of the elements in vector in `a` (two signed 64-bit integer numbers) +/// are added to the absolute values of the elements in vector `b` (two signed 64-bit integer numbers). +/// The saturated signed result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_a.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_a_d(a: v2i64, b: v2i64) -> v2i64 { + msa_adds_a_d(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Add of Signed Values +/// +/// The elements in vector in `a` (sixteen signed 8-bit integer numbers) +/// are added to the elements in vector `b` (sixteen signed 8-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_adds_s_b(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Add of Signed Values +/// +/// The elements in vector in `a` (eight signed 16-bit integer numbers) +/// are added to the elements in vector `b` (eight signed 16-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_adds_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Add of Signed Values +/// +/// The elements in vector in `a` (four signed 32-bit integer numbers) +/// are added to the elements in vector `b` (four signed 32-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_adds_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Add of Signed Values +/// +/// The elements in vector in `a` (two signed 64-bit integer numbers) +/// are added to the elements in vector `b` (two signed 64-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_adds_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Add of Unsigned Values +/// +/// The elements in vector in `a` (sixteen unsigned 8-bit integer numbers) +/// are added to the elements in vector `b` (sixteen unsigned 8-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_u_b(a: v16u8, b: v16u8) -> v16u8 { + msa_adds_u_b(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Add of Unsigned Values +/// +/// The elements in vector in `a` (eight unsigned 16-bit integer numbers) +/// are added to the elements in vector `b` (eight unsigned 16-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_u_h(a: v8u16, b: v8u16) -> v8u16 { + msa_adds_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Add of Unsigned Values +/// +/// The elements in vector in `a` (four unsigned 32-bit integer numbers) +/// are added to the elements in vector `b` (four unsigned 32-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_u_w(a: v4u32, b: v4u32) -> v4u32 { + msa_adds_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Add of Unsigned Values +/// +/// The elements in vector in `a` (two unsigned 64-bit integer numbers) +/// are added to the elements in vector `b` (two unsigned 64-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(adds_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_adds_u_d(a: v2u64, b: v2u64) -> v2u64 { + msa_adds_u_d(a, mem::transmute(b)) +} + +/// Vector Add +/// +/// The elements in vector in `a` (sixteen signed 8-bit integer numbers) +/// are added to the elements in vector `b` (sixteen signed 8-bit integer numbers). +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(addv.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_addv_b(a: v16i8, b: v16i8) -> v16i8 { + msa_addv_b(a, mem::transmute(b)) +} + +/// Vector Add +/// +/// The elements in vector in `a` (eight signed 16-bit integer numbers) +/// are added to the elements in vector `b` (eight signed 16-bit integer numbers). +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(addv.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_addv_h(a: v8i16, b: v8i16) -> v8i16 { + msa_addv_h(a, mem::transmute(b)) +} + +/// Vector Add +/// +/// The elements in vector in `a` (four signed 32-bit integer numbers) +/// are added to the elements in vector `b` (four signed 32-bit integer numbers). +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(addv.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_addv_w(a: v4i32, b: v4i32) -> v4i32 { + msa_addv_w(a, mem::transmute(b)) +} + +/// Vector Add +/// +/// The elements in vector in `a` (two signed 64-bit integer numbers) +/// are added to the elements in vector `b` (two signed 64-bit integer numbers). +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(addv.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_addv_d(a: v2i64, b: v2i64) -> v2i64 { + msa_addv_d(a, mem::transmute(b)) +} + +/// Immediate Add +/// +/// The 5-bit immediate unsigned value `imm5` is added to the elements +/// vector in `a` (sixteen signed 8-bit integer numbers). +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(addvi.b, imm5 = 0b10111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_addvi_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + msa_addvi_b(a, IMM5) +} + +/// Immediate Add +/// +/// The 5-bit immediate unsigned value `imm5` is added to the elements +/// vector in `a` (eight signed 16-bit integer numbers). +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(addvi.h, imm5 = 0b10111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_addvi_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + msa_addvi_h(a, IMM5) +} + +/// Immediate Add +/// +/// The 5-bit immediate unsigned value `imm5` is added to the elements +/// vector in `a` (four signed 32-bit integer numbers). +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(addvi.w, imm5 = 0b10111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_addvi_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + msa_addvi_w(a, IMM5) +} + +/// Immediate Add +/// +/// The 5-bit immediate unsigned value `imm5` is added to the elements +/// vector in `a` (two signed 64-bit integer numbers). +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(addvi.d, imm5 = 0b10111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_addvi_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM5, 5); + msa_addvi_d(a, IMM5) +} + +/// Vector Logical And +/// +/// Each bit of vector `a` (sixteen unsigned 8-bit integer numbers) +/// is combined with the corresponding bit of vector `b` (sixteen unsigned 8-bit integer numbers) +/// in a bitwise logical AND operation. +/// The result is written to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(and.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_and_v(a: v16u8, b: v16u8) -> v16u8 { + msa_and_v(a, mem::transmute(b)) +} + +/// Immediate Logical And +/// +/// Each byte element of vector `a` (sixteen unsigned 8-bit integer numbers) +/// is combined with the 8-bit immediate i8 (signed 8-bit integer number) in a bitwise logical AND operation. +/// The result is written to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(andi.b, imm8 = 0b10010111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_andi_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + msa_andi_b(a, IMM8) +} + +/// Vector Absolute Values of Signed Subtract +/// +/// The signed elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are subtracted from the signed elements in vector `b` (sixteen signed 8-bit integer numbers). +/// The absolute value of the signed result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(asub_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_asub_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_asub_s_b(a, mem::transmute(b)) +} + +/// Vector Absolute Values of Signed Subtract +/// +/// The signed elements in vector `a` (eight signed 16-bit integer numbers) +/// are subtracted from the signed elements in vector `b` (eight signed 16-bit integer numbers). +/// The absolute value of the signed result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(asub_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_asub_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_asub_s_h(a, mem::transmute(b)) +} + +/// Vector Absolute Values of Signed Subtract +/// +/// The signed elements in vector `a` (four signed 32-bit integer numbers) +/// are subtracted from the signed elements in vector `b` (four signed 32-bit integer numbers). +/// The absolute value of the signed result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(asub_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_asub_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_asub_s_w(a, mem::transmute(b)) +} + +/// Vector Absolute Values of Signed Subtract +/// +/// The signed elements in vector `a` (two signed 64-bit integer numbers) +/// are subtracted from the signed elements in vector `b` (two signed 64-bit integer numbers). +/// The absolute value of the signed result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(asub_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_asub_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_asub_s_d(a, mem::transmute(b)) +} + +/// Vector Absolute Values of Unsigned Subtract +/// +/// The unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// are subtracted from the unsigned elements in vector `b` (sixteen unsigned 8-bit integer numbers). +/// The absolute value of the unsigned result is written to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(asub_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_asub_u_b(a: v16u8, b: v16u8) -> v16u8 { + msa_asub_u_b(a, mem::transmute(b)) +} + +/// Vector Absolute Values of Unsigned Subtract +/// +/// The unsigned elements in vector `a` (eight unsigned 16-bit integer numbers) +/// are subtracted from the unsigned elements in vector `b` (eight unsigned 16-bit integer numbers). +/// The absolute value of the unsigned result is written to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(asub_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_asub_u_h(a: v8u16, b: v8u16) -> v8u16 { + msa_asub_u_h(a, mem::transmute(b)) +} + +/// Vector Absolute Values of Unsigned Subtract +/// +/// The unsigned elements in vector `a` (four unsigned 32-bit integer numbers) +/// are subtracted from the unsigned elements in vector `b` (four unsigned 32-bit integer numbers). +/// The absolute value of the unsigned result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(asub_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_asub_u_w(a: v4u32, b: v4u32) -> v4u32 { + msa_asub_u_w(a, mem::transmute(b)) +} + +/// Vector Absolute Values of Unsigned Subtract +/// +/// The unsigned elements in vector `a` (two unsigned 64-bit integer numbers) +/// are subtracted from the unsigned elements in vector `b` (two unsigned 64-bit integer numbers). +/// The absolute value of the unsigned result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(asub_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_asub_u_d(a: v2u64, b: v2u64) -> v2u64 { + msa_asub_u_d(a, mem::transmute(b)) +} + +/// Vector Signed Average +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are added to the elements in vector `b` (sixteen signed 8-bit integer numbers). +/// The addition is done signed with full precision, i.e. the result has one extra bit. +/// Signed division by 2 (or arithmetic shift right by one bit) is performed before +/// writing the result to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ave_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ave_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_ave_s_b(a, mem::transmute(b)) +} + +/// Vector Signed Average +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are added to the elements in vector `b` (eight signed 16-bit integer numbers). +/// The addition is done signed with full precision, i.e. the result has one extra bit. +/// Signed division by 2 (or arithmetic shift right by one bit) is performed before +/// writing the result to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ave_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ave_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_ave_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Average +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are added to the elements in vector `b` (four signed 32-bit integer numbers). +/// The addition is done signed with full precision, i.e. the result has one extra bit. +/// Signed division by 2 (or arithmetic shift right by one bit) is performed before +/// writing the result to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ave_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ave_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_ave_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Average +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are added to the elements in vector `b` (two signed 64-bit integer numbers). +/// The addition is done signed with full precision, i.e. the result has one extra bit. +/// Signed division by 2 (or arithmetic shift right by one bit) is performed before +/// writing the result to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ave_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ave_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_ave_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Average +/// +/// The elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// are added to the elements in vector `b` (sixteen unsigned 8-bit integer numbers). +/// The addition is done unsigned with full precision, i.e. the result has one extra bit. +/// Unsigned division by 2 (or logical shift right by one bit) is performed before +/// writing the result to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ave_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ave_u_b(a: v16u8, b: v16u8) -> v16u8 { + msa_ave_u_b(a, mem::transmute(b)) +} + +/// Vector Unsigned Average +/// +/// The elements in vector `a` (eight unsigned 16-bit integer numbers) +/// are added to the elements in vector `b` (eight unsigned 16-bit integer numbers). +/// The addition is done unsigned with full precision, i.e. the result has one extra bit. +/// Unsigned division by 2 (or logical shift right by one bit) is performed before +/// writing the result to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ave_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ave_u_h(a: v8u16, b: v8u16) -> v8u16 { + msa_ave_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Average +/// +/// The elements in vector `a` (four unsigned 32-bit integer numbers) +/// are added to the elements in vector `b` (four unsigned 32-bit integer numbers). +/// The addition is done unsigned with full precision, i.e. the result has one extra bit. +/// Unsigned division by 2 (or logical shift right by one bit) is performed before +/// writing the result to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ave_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ave_u_w(a: v4u32, b: v4u32) -> v4u32 { + msa_ave_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Average +/// +/// The elements in vector `a` (two unsigned 64-bit integer numbers) +/// are added to the elements in vector `b` (two unsigned 64-bit integer numbers). +/// The addition is done unsigned with full precision, i.e. the result has one extra bit. +/// Unsigned division by 2 (or logical shift right by one bit) is performed before +/// writing the result to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ave_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ave_u_d(a: v2u64, b: v2u64) -> v2u64 { + msa_ave_u_d(a, mem::transmute(b)) +} + +/// Vector Signed Average Rounded +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are added to the elements in vector `b` (sixteen signed 8-bit integer numbers). +/// The addition of the elements plus 1 (for rounding) is done signed with full precision, +/// i.e. the result has one extra bit. +/// Signed division by 2 (or arithmetic shift right by one bit) is performed before +/// writing the result to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(aver_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_aver_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_aver_s_b(a, mem::transmute(b)) +} + +/// Vector Signed Average Rounded +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are added to the elements in vector `b` (eight signed 16-bit integer numbers). +/// The addition of the elements plus 1 (for rounding) is done signed with full precision, +/// i.e. the result has one extra bit. +/// Signed division by 2 (or arithmetic shift right by one bit) is performed before +/// writing the result to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(aver_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_aver_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_aver_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Average Rounded +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are added to the elements in vector `b` (four signed 32-bit integer numbers). +/// The addition of the elements plus 1 (for rounding) is done signed with full precision, +/// i.e. the result has one extra bit. +/// Signed division by 2 (or arithmetic shift right by one bit) is performed before +/// writing the result to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(aver_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_aver_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_aver_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Average Rounded +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are added to the elements in vector `b` (two signed 64-bit integer numbers). +/// The addition of the elements plus 1 (for rounding) is done signed with full precision, +/// i.e. the result has one extra bit. +/// Signed division by 2 (or arithmetic shift right by one bit) is performed before +/// writing the result to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(aver_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_aver_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_aver_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Average Rounded +/// +/// The elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// are added to the elements in vector `b` (sixteen unsigned 8-bit integer numbers). +/// The addition of the elements plus 1 (for rounding) is done unsigned with full precision, +/// i.e. the result has one extra bit. +/// Unsigned division by 2 (or logical shift right by one bit) is performed before +/// writing the result to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(aver_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_aver_u_b(a: v16u8, b: v16u8) -> v16u8 { + msa_aver_u_b(a, mem::transmute(b)) +} + +/// Vector Unsigned Average Rounded +/// +/// The elements in vector `a` (eight unsigned 16-bit integer numbers) +/// are added to the elements in vector `b` (eight unsigned 16-bit integer numbers). +/// The addition of the elements plus 1 (for rounding) is done unsigned with full precision, +/// i.e. the result has one extra bit. +/// Unsigned division by 2 (or logical shift right by one bit) is performed before +/// writing the result to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(aver_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_aver_u_h(a: v8u16, b: v8u16) -> v8u16 { + msa_aver_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Average Rounded +/// +/// The elements in vector `a` (four unsigned 32-bit integer numbers) +/// are added to the elements in vector `b` (four unsigned 32-bit integer numbers). +/// The addition of the elements plus 1 (for rounding) is done unsigned with full precision, +/// i.e. the result has one extra bit. +/// Unsigned division by 2 (or logical shift right by one bit) is performed before +/// writing the result to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(aver_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_aver_u_w(a: v4u32, b: v4u32) -> v4u32 { + msa_aver_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Average Rounded +/// +/// The elements in vector `a` (two unsigned 64-bit integer numbers) +/// are added to the elements in vector `b` (two unsigned 64-bit integer numbers). +/// The addition of the elements plus 1 (for rounding) is done unsigned with full precision, +/// i.e. the result has one extra bit. +/// Unsigned division by 2 (or logical shift right by one bit) is performed before +/// writing the result to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(aver_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_aver_u_d(a: v2u64, b: v2u64) -> v2u64 { + msa_aver_u_d(a, mem::transmute(b)) +} + +/// Vector Bit Clear +/// +/// Clear (set to 0) one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers). +/// The bit position is given by the elements in `b` (sixteen unsigned 8-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bclr.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bclr_b(a: v16u8, b: v16u8) -> v16u8 { + msa_bclr_b(a, mem::transmute(b)) +} + +/// Vector Bit Clear +/// +/// Clear (set to 0) one bit in each element of vector `a` (eight unsigned 16-bit integer numbers). +/// The bit position is given by the elements in `b` (eight unsigned 16-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bclr.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bclr_h(a: v8u16, b: v8u16) -> v8u16 { + msa_bclr_h(a, mem::transmute(b)) +} + +/// Vector Bit Clear +/// +/// Clear (set to 0) one bit in each element of vector `a` (four unsigned 32-bit integer numbers). +/// The bit position is given by the elements in `b` (four unsigned 32-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bclr.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bclr_w(a: v4u32, b: v4u32) -> v4u32 { + msa_bclr_w(a, mem::transmute(b)) +} + +/// Vector Bit Clear +/// +/// Clear (set to 0) one bit in each element of vector `a` (two unsigned 64-bit integer numbers). +/// The bit position is given by the elements in `b` (two unsigned 64-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bclr.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bclr_d(a: v2u64, b: v2u64) -> v2u64 { + msa_bclr_d(a, mem::transmute(b)) +} + +/// Immediate Bit Clear +/// +/// Clear (set to 0) one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers). +/// The bit position is given by the immediate `m` modulo the size of the element in bits. +/// The result is written to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bclri.b, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bclri_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + msa_bclri_b(a, IMM3) +} + +/// Immediate Bit Clear +/// +/// Clear (set to 0) one bit in each element of vector `a` (eight unsigned 16-bit integer numbers). +/// The bit position is given by the immediate `m` modulo the size of the element in bits. +/// The result is written to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bclri.h, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bclri_h(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + msa_bclri_h(a, IMM4) +} + +/// Immediate Bit Clear +/// +/// Clear (set to 0) one bit in each element of vector `a` (four unsigned 32-bit integer numbers). +/// The bit position is given by the immediate `m` modulo the size of the element in bits. +/// The result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bclri.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bclri_w(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + msa_bclri_w(a, IMM5) +} + +/// Immediate Bit Clear +/// +/// Clear (set to 0) one bit in each element of vector `a` (two unsigned 64-bit integer numbers). +/// The bit position is given by the immediate `m` modulo the size of the element in bits. +/// The result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bclri.d, imm6 = 0b111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bclri_d(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + msa_bclri_d(a, IMM6) +} + +/// Vector Bit Insert Left +/// +/// Copy most significant (left) bits in each element of vector `b` (sixteen unsigned 8-bit integer numbers) +/// to elements in vector `a` (sixteen unsigned 8-bit integer numbers) while preserving the least significant (right) bits. +/// The number of bits to copy is given by the elements in vector `c` (sixteen unsigned 8-bit integer numbers) +/// modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsl.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsl_b(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { + msa_binsl_b(a, mem::transmute(b), c) +} + +/// Vector Bit Insert Left +/// +/// Copy most significant (left) bits in each element of vector `b` (eight unsigned 16-bit integer numbers) +/// to elements in vector `a` (eight unsigned 16-bit integer numbers) while preserving the least significant (right) bits. +/// The number of bits to copy is given by the elements in vector `c` (eight unsigned 16-bit integer numbers) +/// modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsl.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsl_h(a: v8u16, b: v8u16, c: v8u16) -> v8u16 { + msa_binsl_h(a, mem::transmute(b), c) +} + +/// Vector Bit Insert Left +/// +/// Copy most significant (left) bits in each element of vector `b` (four unsigned 32-bit integer numbers) +/// to elements in vector `a` (four unsigned 32-bit integer numbers) while preserving the least significant (right) bits. +/// The number of bits to copy is given by the elements in vector `c` (four unsigned 32-bit integer numbers) +/// modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsl.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsl_w(a: v4u32, b: v4u32, c: v4u32) -> v4u32 { + msa_binsl_w(a, mem::transmute(b), c) +} + +/// Vector Bit Insert Left +/// +/// Copy most significant (left) bits in each element of vector `b` (two unsigned 64-bit integer numbers) +/// to elements in vector `a` (two unsigned 64-bit integer numbers) while preserving the least significant (right) bits. +/// The number of bits to copy is given by the elements in vector `c` (two unsigned 64-bit integer numbers) +/// modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsl.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsl_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64 { + msa_binsl_d(a, mem::transmute(b), c) +} + +/// Immediate Bit Insert Left +/// +/// Copy most significant (left) bits in each element of vector `b` (sixteen unsigned 8-bit integer numbers) +/// to elements in vector `a` (sixteen unsigned 8-bit integer numbers) while preserving the least significant (right) bits. +/// The number of bits to copy is given by the immediate `imm3` modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsli.b, imm3 = 0b111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsli_b(a: v16u8, b: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + msa_binsli_b(a, mem::transmute(b), IMM3) +} + +/// Immediate Bit Insert Left +/// +/// Copy most significant (left) bits in each element of vector `b` (eight unsigned 16-bit integer numbers) +/// to elements in vector `a` (eight unsigned 16-bit integer numbers) while preserving the least significant (right) bits. +/// The number of bits to copy is given by the immediate `imm4` modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsli.h, imm4 = 0b1111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsli_h(a: v8u16, b: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + msa_binsli_h(a, mem::transmute(b), IMM4) +} + +/// Immediate Bit Insert Left +/// +/// Copy most significant (left) bits in each element of vector `b` (four unsigned 32-bit integer numbers) +/// to elements in vector `a` (four unsigned 32-bit integer numbers) while preserving the least significant (right) bits. +/// The number of bits to copy is given by the immediate `imm5` modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsli.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsli_w(a: v4u32, b: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + msa_binsli_w(a, mem::transmute(b), IMM5) +} + +/// Immediate Bit Insert Left +/// +/// Copy most significant (left) bits in each element of vector `b` (two unsigned 64-bit integer numbers) +/// to elements in vector `a` (two unsigned 64-bit integer numbers) while preserving the least significant (right) bits. +/// The number of bits to copy is given by the immediate `imm6` modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsli.d, imm6 = 0b111111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsli_d(a: v2u64, b: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + msa_binsli_d(a, mem::transmute(b), IMM6) +} + +/// Vector Bit Insert Right +/// +/// Copy most significant (right) bits in each element of vector `b` (sixteen unsigned 8-bit integer numbers) +/// to elements in vector `a` (sixteen unsigned 8-bit integer numbers) while preserving the least significant (left) bits. +/// The number of bits to copy is given by the elements in vector `c` (sixteen unsigned 8-bit integer numbers) +/// modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsr.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsr_b(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { + msa_binsr_b(a, mem::transmute(b), c) +} + +/// Vector Bit Insert Right +/// +/// Copy most significant (right) bits in each element of vector `b` (eight unsigned 16-bit integer numbers) +/// to elements in vector `a` (eight unsigned 16-bit integer numbers) while preserving the least significant (left) bits. +/// The number of bits to copy is given by the elements in vector `c` (eight unsigned 16-bit integer numbers) +/// modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsr.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsr_h(a: v8u16, b: v8u16, c: v8u16) -> v8u16 { + msa_binsr_h(a, mem::transmute(b), c) +} + +/// Vector Bit Insert Right +/// +/// Copy most significant (right) bits in each element of vector `b` (four unsigned 32-bit integer numbers) +/// to elements in vector `a` (four unsigned 32-bit integer numbers) while preserving the least significant (left) bits. +/// The number of bits to copy is given by the elements in vector `c` (four unsigned 32-bit integer numbers) +/// modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsr.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsr_w(a: v4u32, b: v4u32, c: v4u32) -> v4u32 { + msa_binsr_w(a, mem::transmute(b), c) +} + +/// Vector Bit Insert Right +/// +/// Copy most significant (right) bits in each element of vector `b` (two unsigned 64-bit integer numbers) +/// to elements in vector `a` (two unsigned 64-bit integer numbers) while preserving the least significant (left) bits. +/// The number of bits to copy is given by the elements in vector `c` (two unsigned 64-bit integer numbers) +/// modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsr.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsr_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64 { + msa_binsr_d(a, mem::transmute(b), c) +} + +/// Immediate Bit Insert Right +/// +/// Copy most significant (right) bits in each element of vector `b` (sixteen unsigned 8-bit integer numbers) +/// to elements in vector `a` (sixteen unsigned 8-bit integer numbers) while preserving the least significant (left) bits. +/// The number of bits to copy is given by the immediate `imm3` modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsri.b, imm3 = 0b111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsri_b(a: v16u8, b: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + msa_binsri_b(a, mem::transmute(b), IMM3) +} + +/// Immediate Bit Insert Right +/// +/// Copy most significant (right) bits in each element of vector `b` (eight unsigned 16-bit integer numbers) +/// to elements in vector `a` (eight unsigned 16-bit integer numbers) while preserving the least significant (left) bits. +/// The number of bits to copy is given by the immediate `imm4` modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsri.h, imm4 = 0b1111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsri_h(a: v8u16, b: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + msa_binsri_h(a, mem::transmute(b), IMM4) +} + +/// Immediate Bit Insert Right +/// +/// Copy most significant (right) bits in each element of vector `b` (four unsigned 32-bit integer numbers) +/// to elements in vector `a` (four unsigned 32-bit integer numbers) while preserving the least significant (left) bits. +/// The number of bits to copy is given by the immediate `imm5` modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsri.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsri_w(a: v4u32, b: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + msa_binsri_w(a, mem::transmute(b), IMM5) +} + +/// Immediate Bit Insert Right +/// +/// Copy most significant (right) bits in each element of vector `b` (two unsigned 64-bit integer numbers) +/// to elements in vector `a` (two unsigned 64-bit integer numbers) while preserving the least significant (left) bits. +/// The number of bits to copy is given by the immediate `imm6` modulo the size of the element in bits plus 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(binsri.d, imm6 = 0b111111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_binsri_d(a: v2u64, b: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + msa_binsri_d(a, mem::transmute(b), IMM6) +} + +/// Vector Bit Move If Not Zero +/// +/// Copy to destination vector `a` (sixteen unsigned 8-bit integer numbers) all bits from source vector +/// `b` (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from target vector `c` +/// (sixteen unsigned 8-bit integer numbers) are 1 and leaves unchanged all destination bits +/// for which the corresponding target bits are 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bmnz.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bmnz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { + msa_bmnz_v(a, mem::transmute(b), c) +} + +/// Immediate Bit Move If Not Zero +/// +/// Copy to destination vector `a` (sixteen unsigned 8-bit integer numbers) all bits from source vector +/// `b` (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from immediate `imm8` +/// are 1 and leaves unchanged all destination bits for which the corresponding target bits are 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bmnzi.b, imm8 = 0b11111111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bmnzi_b(a: v16u8, b: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + msa_bmnzi_b(a, mem::transmute(b), IMM8) +} + +/// Vector Bit Move If Zero +/// +/// Copy to destination vector `a` (sixteen unsigned 8-bit integer numbers) all bits from source vector +/// `b` (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from target vector `c` +/// (sixteen unsigned 8-bit integer numbers) are 0 and leaves unchanged all destination bits +/// for which the corresponding target bits are 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bmz.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bmz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { + msa_bmz_v(a, mem::transmute(b), c) +} + +/// Immediate Bit Move If Zero +/// +/// Copy to destination vector `a` (sixteen unsigned 8-bit integer numbers) all bits from source vector +/// `b` (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from immediate `imm8` +/// are 0 and leaves unchanged all destination bits for which the corresponding immediate bits are 1. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bmzi.b, imm8 = 0b11111111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bmzi_b(a: v16u8, b: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + msa_bmzi_b(a, mem::transmute(b), IMM8) +} + +/// Vector Bit Negate +/// +/// Negate (complement) one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers). +/// The bit position is given by the elements in vector `b` (sixteen unsigned 8-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bneg.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bneg_b(a: v16u8, b: v16u8) -> v16u8 { + msa_bneg_b(a, mem::transmute(b)) +} + +/// Vector Bit Negate +/// +/// Negate (complement) one bit in each element of vector `a` (eight unsigned 16-bit integer numbers). +/// The bit position is given by the elements in vector `b` (eight unsigned 16-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bneg.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bneg_h(a: v8u16, b: v8u16) -> v8u16 { + msa_bneg_h(a, mem::transmute(b)) +} + +/// Vector Bit Negate +/// +/// Negate (complement) one bit in each element of vector `a` (four unsigned 32-bit integer numbers). +/// The bit position is given by the elements in vector `b` (four unsigned 32-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bneg.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bneg_w(a: v4u32, b: v4u32) -> v4u32 { + msa_bneg_w(a, mem::transmute(b)) +} + +/// Vector Bit Negate +/// +/// Negate (complement) one bit in each element of vector `a` (two unsigned 64-bit integer numbers). +/// The bit position is given by the elements in vector `b` (two unsigned 64-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bneg.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bneg_d(a: v2u64, b: v2u64) -> v2u64 { + msa_bneg_d(a, mem::transmute(b)) +} + +/// Immediate Bit Negate +/// +/// Negate (complement) one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers). +/// The bit position is given by immediate `imm3` modulo the size of the element in bits. +/// The result is written to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bnegi.b, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bnegi_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + msa_bnegi_b(a, IMM3) +} + +/// Immediate Bit Negate +/// +/// Negate (complement) one bit in each element of vector `a` (eight unsigned 16-bit integer numbers). +/// The bit position is given by immediate `imm4` modulo the size of the element in bits. +/// The result is written to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bnegi.h, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bnegi_h(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + msa_bnegi_h(a, IMM4) +} + +/// Immediate Bit Negate +/// +/// Negate (complement) one bit in each element of vector `a` (four unsigned 32-bit integer numbers). +/// The bit position is given by immediate `imm5` modulo the size of the element in bits. +/// The result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bnegi.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bnegi_w(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + msa_bnegi_w(a, IMM5) +} + +/// Immediate Bit Negate +/// +/// Negate (complement) one bit in each element of vector `a` (two unsigned 64-bit integer numbers). +/// The bit position is given by immediate `imm6` modulo the size of the element in bits. +/// The result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bnegi.d, imm6 = 0b111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bnegi_d(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + msa_bnegi_d(a, IMM6) +} + +/// Immediate Branch If All Elements Are Not Zero +/// +/// PC-relative branch if all elements in `a` (sixteen unsigned 8-bit integer numbers) are not zero. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bnz.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bnz_b(a: v16u8) -> i32 { + msa_bnz_b(a) +} + +/// Immediate Branch If All Elements Are Not Zero +/// +/// PC-relative branch if all elements in `a` (eight unsigned 16-bit integer numbers) are not zero. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bnz.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bnz_h(a: v8u16) -> i32 { + msa_bnz_h(a) +} + +/// Immediate Branch If All Elements Are Not Zero +/// +/// PC-relative branch if all elements in `a` (four unsigned 32-bit integer numbers) are not zero. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bnz.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bnz_w(a: v4u32) -> i32 { + msa_bnz_w(a) +} + +/// Immediate Branch If All Elements Are Not Zero +/// +/// PC-relative branch if all elements in `a` (two unsigned 64-bit integer numbers) are not zero. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bnz.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bnz_d(a: v2u64) -> i32 { + msa_bnz_d(a) +} + +/// Immediate Branch If Not Zero (At Least One Element of Any Format Is Not Zero) +/// +/// PC-relative branch if at least one bit in `a` (four unsigned 32-bit integer numbers) are not zero. +/// i.e at least one element is not zero regardless of the data format. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bnz.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bnz_v(a: v16u8) -> i32 { + msa_bnz_v(a) +} + +/// Vector Bit Select +/// +/// Selectively copy bits from the source vectors `b` (eight unsigned 16-bit integer numbers) +/// and `c` (eight unsigned 16-bit integer numbers) +/// into destination vector `a` (eight unsigned 16-bit integer numbers) based on the corresponding bit in `a`: +/// if 0 copies the bit from `b`, if 1 copies the bit from `c`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bsel.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bsel_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { + msa_bsel_v(a, mem::transmute(b), c) +} + +/// Immediate Bit Select +/// +/// Selectively copy bits from the 8-bit immediate `imm8` and `c` (eight unsigned 16-bit integer numbers) +/// into destination vector `a` (eight unsigned 16-bit integer numbers) based on the corresponding bit in `a`: +/// if 0 copies the bit from `b`, if 1 copies the bit from `c`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bseli.b, imm8 = 0b11111111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bseli_b(a: v16u8, b: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + msa_bseli_b(a, mem::transmute(b), IMM8) +} + +/// Vector Bit Set +/// +/// Set to 1 one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers). +/// The bit position is given by the elements in vector `b` (sixteen unsigned 8-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bset.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bset_b(a: v16u8, b: v16u8) -> v16u8 { + msa_bset_b(a, mem::transmute(b)) +} + +/// Vector Bit Set +/// +/// Set to 1 one bit in each element of vector `a` (eight unsigned 16-bit integer numbers). +/// The bit position is given by the elements in vector `b` (eight unsigned 16-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bset.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bset_h(a: v8u16, b: v8u16) -> v8u16 { + msa_bset_h(a, mem::transmute(b)) +} + +/// Vector Bit Set +/// +/// Set to 1 one bit in each element of vector `a` (four unsigned 32-bit integer numbers). +/// The bit position is given by the elements in vector `b` (four unsigned 32-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bset.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bset_w(a: v4u32, b: v4u32) -> v4u32 { + msa_bset_w(a, mem::transmute(b)) +} + +/// Vector Bit Set +/// +/// Set to 1 one bit in each element of vector `a` (two unsigned 64-bit integer numbers). +/// The bit position is given by the elements in vector `b` (two unsigned 64-bit integer numbers) +/// modulo the size of the element in bits. +/// The result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bset.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bset_d(a: v2u64, b: v2u64) -> v2u64 { + msa_bset_d(a, mem::transmute(b)) +} + +/// Immediate Bit Set +/// +/// Set to 1 one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers). +/// The bit position is given by immediate `imm3`. +/// The result is written to vector `a` (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bseti.b, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bseti_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + msa_bseti_b(a, IMM3) +} + +/// Immediate Bit Set +/// +/// Set to 1 one bit in each element of vector `a` (eight unsigned 16-bit integer numbers). +/// The bit position is given by immediate `imm4`. +/// The result is written to vector `a` (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bseti.h, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bseti_h(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + msa_bseti_h(a, IMM4) +} + +/// Immediate Bit Set +/// +/// Set to 1 one bit in each element of vector `a` (four unsigned 32-bit integer numbers). +/// The bit position is given by immediate `imm5`. +/// The result is written to vector `a` (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bseti.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bseti_w(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + msa_bseti_w(a, IMM5) +} + +/// Immediate Bit Set +/// +/// Set to 1 one bit in each element of vector `a` (two unsigned 64-bit integer numbers). +/// The bit position is given by immediate `imm6`. +/// The result is written to vector `a` (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bseti.d, imm6 = 0b111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bseti_d(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + msa_bseti_d(a, IMM6) +} + +/// Immediate Branch If At Least One Element Is Zero +/// +/// PC-relative branch if at least one element in `a` (sixteen unsigned 8-bit integer numbers) is zero. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bz.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bz_b(a: v16u8) -> i32 { + msa_bz_b(a) +} + +/// Immediate Branch If At Least One Element Is Zero +/// +/// PC-relative branch if at least one element in `a` (eight unsigned 16-bit integer numbers) is zero. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bz.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bz_h(a: v8u16) -> i32 { + msa_bz_h(a) +} + +/// Immediate Branch If At Least One Element Is Zero +/// +/// PC-relative branch if at least one element in `a` (four unsigned 32-bit integer numbers) is zero. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bz.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bz_w(a: v4u32) -> i32 { + msa_bz_w(a) +} + +/// Immediate Branch If At Least One Element Is Zero +/// +/// PC-relative branch if at least one element in `a` (two unsigned 64-bit integer numbers) is zero. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bz.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bz_d(a: v2u64) -> i32 { + msa_bz_d(a) +} + +/// Immediate Branch If Zero (All Elements of Any Format Are Zero) +/// +/// PC-relative branch if all elements in `a` (sixteen unsigned 8-bit integer numbers) bits are zero, +/// i.e. all elements are zero regardless of the data format. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(bz.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_bz_v(a: v16u8) -> i32 { + msa_bz_v(a) +} + +/// Vector Compare Equal +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen signed 8-bit integer numbers) and `b` (sixteen signed 8-bit integer numbers) +/// elements are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ceq.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ceq_b(a: v16i8, b: v16i8) -> v16i8 { + msa_ceq_b(a, mem::transmute(b)) +} + +/// Vector Compare Equal +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight signed 16-bit integer numbers) and `b` (eight signed 16-bit integer numbers) +/// elements are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ceq.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ceq_h(a: v8i16, b: v8i16) -> v8i16 { + msa_ceq_h(a, mem::transmute(b)) +} + +/// Vector Compare Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four signed 32-bit integer numbers) and `b` (four signed 32-bit integer numbers) +/// elements are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ceq.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ceq_w(a: v4i32, b: v4i32) -> v4i32 { + msa_ceq_w(a, mem::transmute(b)) +} + +/// Vector Compare Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two signed 64-bit integer numbers) and `b` (two signed 64-bit integer numbers) +/// elements are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ceq.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ceq_d(a: v2i64, b: v2i64) -> v2i64 { + msa_ceq_d(a, mem::transmute(b)) +} + +/// Immediate Compare Equal +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen signed 8-bit integer numbers) the 5-bit signed immediate imm_s5 +/// are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ceqi.b, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ceqi_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + msa_ceqi_b(a, IMM_S5) +} + +/// Immediate Compare Equal +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight signed 16-bit integer numbers) the 5-bit signed immediate imm_s5 +/// are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ceqi.h, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ceqi_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + msa_ceqi_h(a, IMM_S5) +} + +/// Immediate Compare Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four signed 32-bit integer numbers) the 5-bit signed immediate imm_s5 +/// are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ceqi.w, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ceqi_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + msa_ceqi_w(a, IMM_S5) +} + +/// Immediate Compare Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two signed 64-bit integer numbers) the 5-bit signed immediate imm_s5 +/// are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ceqi.d, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ceqi_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + msa_ceqi_d(a, IMM_S5) +} + +/// GPR Copy from MSA Control Register +/// +/// The sign extended content of MSA control register cs is copied to GPR rd. +/// +/// Can not be tested in user mode +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(cfcmsa, imm5 = 0b11111))] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_cfcmsa() -> i32 { + static_assert_uimm_bits!(IMM5, 5); + msa_cfcmsa(IMM5) +} + +/// Vector Compare Signed Less Than or Equal +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen signed 8-bit integer numbers) element +/// are signed less than or equal to `b` (sixteen signed 8-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(cle_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_cle_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_cle_s_b(a, mem::transmute(b)) +} + +/// Vector Compare Signed Less Than or Equal +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight signed 16-bit integer numbers) element +/// are signed less than or equal to `b` (eight signed 16-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(cle_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_cle_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_cle_s_h(a, mem::transmute(b)) +} + +/// Vector Compare Signed Less Than or Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four signed 32-bit integer numbers) element +/// are signed less than or equal to `b` (four signed 32-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(cle_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_cle_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_cle_s_w(a, mem::transmute(b)) +} + +/// Vector Compare Signed Less Than or Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two signed 64-bit integer numbers) element +/// are signed less than or equal to `b` (two signed 64-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(cle_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_cle_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_cle_s_d(a, mem::transmute(b)) +} + +/// Vector Compare Unsigned Less Than or Equal +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen unsigned 8-bit integer numbers) element +/// are unsigned less than or equal to `b` (sixteen unsigned 8-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(cle_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_cle_u_b(a: v16u8, b: v16u8) -> v16i8 { + msa_cle_u_b(a, mem::transmute(b)) +} + +/// Vector Compare Unsigned Less Than or Equal +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight unsigned 16-bit integer numbers) element +/// are unsigned less than or equal to `b` (eight unsigned 16-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(cle_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_cle_u_h(a: v8u16, b: v8u16) -> v8i16 { + msa_cle_u_h(a, mem::transmute(b)) +} + +/// Vector Compare Unsigned Less Than or Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four unsigned 32-bit integer numbers) element +/// are unsigned less than or equal to `b` (four unsigned 32-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(cle_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_cle_u_w(a: v4u32, b: v4u32) -> v4i32 { + msa_cle_u_w(a, mem::transmute(b)) +} + +/// Vector Compare Unsigned Less Than or Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two unsigned 64-bit integer numbers) element +/// are unsigned less than or equal to `b` (two unsigned 64-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(cle_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_cle_u_d(a: v2u64, b: v2u64) -> v2i64 { + msa_cle_u_d(a, mem::transmute(b)) +} + +/// Immediate Compare Signed Less Than or Equal +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen signed 8-bit integer numbers) element +/// is less than or equal to the 5-bit signed immediate imm_s5, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clei_s.b, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clei_s_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + msa_clei_s_b(a, IMM_S5) +} + +/// Immediate Compare Signed Less Than or Equal +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight signed 16-bit integer numbers) element +/// is less than or equal to the 5-bit signed immediate imm_s5, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clei_s.h, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clei_s_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + msa_clei_s_h(a, IMM_S5) +} + +/// Immediate Compare Signed Less Than or Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four signed 32-bit integer numbers) element +/// is less than or equal to the 5-bit signed immediate imm_s5, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clei_s.w, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clei_s_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + msa_clei_s_w(a, IMM_S5) +} + +/// Immediate Compare Signed Less Than or Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two signed 64-bit integer numbers) element +/// is less than or equal to the 5-bit signed immediate imm_s5, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clei_s.d, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clei_s_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + msa_clei_s_d(a, IMM_S5) +} + +/// Immediate Compare Unsigned Less Than or Equal +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen unsigned 8-bit integer numbers) element +/// is unsigned less than or equal to the 5-bit unsigned immediate `imm5`, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clei_u.b, imm5 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clei_u_b(a: v16u8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + msa_clei_u_b(a, IMM5) +} + +/// Immediate Compare Unsigned Less Than or Equal +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight unsigned 16-bit integer numbers) element +/// is unsigned less than or equal to the 5-bit unsigned immediate `imm5`, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clei_u.h, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clei_u_h(a: v8u16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + msa_clei_u_h(a, IMM5) +} + +/// Immediate Compare Unsigned Less Than or Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four unsigned 32-bit integer numbers) element +/// is unsigned less than or equal to the 5-bit unsigned immediate `imm5`, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clei_u.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clei_u_w(a: v4u32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + msa_clei_u_w(a, IMM5) +} + +/// Immediate Compare Unsigned Less Than or Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two unsigned 64-bit integer numbers) element +/// is unsigned less than or equal to the 5-bit unsigned immediate `imm5`, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clei_u.d, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clei_u_d(a: v2u64) -> v2i64 { + static_assert_uimm_bits!(IMM5, 5); + msa_clei_u_d(a, IMM5) +} + +/// Vector Compare Signed Less Than +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen signed 8-bit integer numbers) element +/// are signed less than `b` (sixteen signed 8-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clt_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clt_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_clt_s_b(a, mem::transmute(b)) +} + +/// Vector Compare Signed Less Than +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight signed 16-bit integer numbers) element +/// are signed less than `b` (eight signed 16-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clt_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clt_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_clt_s_h(a, mem::transmute(b)) +} + +/// Vector Compare Signed Less Than +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four signed 32-bit integer numbers) element +/// are signed less than `b` (four signed 32-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clt_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clt_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_clt_s_w(a, mem::transmute(b)) +} + +/// Vector Compare Signed Less Than +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two signed 64-bit integer numbers) element +/// are signed less than `b` (two signed 64-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clt_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clt_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_clt_s_d(a, mem::transmute(b)) +} + +/// Vector Compare Unsigned Less Than +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen unsigned 8-bit integer numbers) element +/// are unsigned less than `b` (sixteen unsigned 8-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clt_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clt_u_b(a: v16u8, b: v16u8) -> v16i8 { + msa_clt_u_b(a, mem::transmute(b)) +} + +/// Vector Compare Unsigned Less Than +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight unsigned 16-bit integer numbers) element +/// are unsigned less than `b` (eight unsigned 16-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clt_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clt_u_h(a: v8u16, b: v8u16) -> v8i16 { + msa_clt_u_h(a, mem::transmute(b)) +} + +/// Vector Compare Unsigned Less Than +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four unsigned 32-bit integer numbers) element +/// are unsigned less than `b` (four unsigned 32-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clt_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clt_u_w(a: v4u32, b: v4u32) -> v4i32 { + msa_clt_u_w(a, mem::transmute(b)) +} + +/// Vector Compare Unsigned Less Than +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two unsigned 64-bit integer numbers) element +/// are unsigned less than `b` (two unsigned 64-bit integer numbers) element. +/// Otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clt_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clt_u_d(a: v2u64, b: v2u64) -> v2i64 { + msa_clt_u_d(a, mem::transmute(b)) +} + +/// Immediate Compare Signed Less Than +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen signed 8-bit integer numbers) element +/// is less than the 5-bit signed immediate imm_s5, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clti_s.b, imm_s5 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clti_s_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + msa_clti_s_b(a, IMM_S5) +} + +/// Immediate Compare Signed Less Than +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight signed 16-bit integer numbers) element +/// is less than the 5-bit signed immediate imm_s5, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clti_s.h, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clti_s_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + msa_clti_s_h(a, IMM_S5) +} + +/// Immediate Compare Signed Less Than +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four signed 32-bit integer numbers) element +/// is less than the 5-bit signed immediate imm_s5, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clti_s.w, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clti_s_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + msa_clti_s_w(a, IMM_S5) +} + +/// Immediate Compare Signed Less Than +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two signed 64-bit integer numbers) element +/// is less than the 5-bit signed immediate imm_s5, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clti_s.d, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clti_s_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + msa_clti_s_d(a, IMM_S5) +} + +/// Immediate Compare Unsigned Less Than +/// +/// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements +/// if the corresponding `a` (sixteen unsigned 8-bit integer numbers) element +/// is unsigned less than the 5-bit unsigned immediate `imm5`, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clti_u.b, imm5 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clti_u_b(a: v16u8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + msa_clti_u_b(a, IMM5) +} + +/// Immediate Compare Unsigned Less Than +/// +/// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements +/// if the corresponding `a` (eight unsigned 16-bit integer numbers) element +/// is unsigned less than the 5-bit unsigned immediate `imm5`, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clti_u.h, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clti_u_h(a: v8u16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + msa_clti_u_h(a, IMM5) +} + +/// Immediate Compare Unsigned Less Than +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four unsigned 32-bit integer numbers) element +/// is unsigned less than the 5-bit unsigned immediate `imm5`, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clti_u.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clti_u_w(a: v4u32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + msa_clti_u_w(a, IMM5) +} + +/// Immediate Compare Unsigned Less Than +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two unsigned 64-bit integer numbers) element +/// is unsigned less than the 5-bit unsigned immediate `imm5`, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(clti_u.d, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_clti_u_d(a: v2u64) -> v2i64 { + static_assert_uimm_bits!(IMM5, 5); + msa_clti_u_d(a, IMM5) +} + +/// Element Copy to GPR Signed +/// +/// Sign-extend element `imm4` of vector `a` (sixteen signed 8-bit integer numbers) +/// and copy the result to GPR rd. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(copy_s.b, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_copy_s_b(a: v16i8) -> i32 { + static_assert_uimm_bits!(IMM4, 4); + msa_copy_s_b(a, IMM4) +} + +/// Element Copy to GPR Signed +/// +/// Sign-extend element `imm3` of vector `a` (eight signed 16-bit integer numbers) +/// and copy the result to GPR rd. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(copy_s.h, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_copy_s_h(a: v8i16) -> i32 { + static_assert_uimm_bits!(IMM3, 3); + msa_copy_s_h(a, IMM3) +} + +/// Element Copy to GPR Signed +/// +/// Sign-extend element `imm2` of vector `a` (four signed 32-bit integer numbers) +/// and copy the result to GPR rd. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(copy_s.w, imm2 = 0b11))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_copy_s_w(a: v4i32) -> i32 { + static_assert_uimm_bits!(IMM2, 2); + msa_copy_s_w(a, IMM2) +} + +/// Element Copy to GPR Signed +/// +/// Sign-extend element `imm1` of vector `a` (two signed 64-bit integer numbers) +/// and copy the result to GPR rd. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(copy_s.d, imm1 = 0b1))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_copy_s_d(a: v2i64) -> i64 { + static_assert_uimm_bits!(IMM1, 1); + msa_copy_s_d(a, IMM1) +} + +/// Element Copy to GPR Unsigned +/// +/// Zero-extend element `imm4` of vector `a` (sixteen signed 8-bit integer numbers) +/// and copy the result to GPR rd. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(copy_u.b, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_copy_u_b(a: v16i8) -> u32 { + static_assert_uimm_bits!(IMM4, 4); + msa_copy_u_b(a, IMM4) +} + +/// Element Copy to GPR Unsigned +/// +/// Zero-extend element `imm3` of vector `a` (eight signed 16-bit integer numbers) +/// and copy the result to GPR rd. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(copy_u.h, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_copy_u_h(a: v8i16) -> u32 { + static_assert_uimm_bits!(IMM3, 3); + msa_copy_u_h(a, IMM3) +} + +/// Element Copy to GPR Unsigned +/// +/// Zero-extend element `imm2` of vector `a` (four signed 32-bit integer numbers) +/// and copy the result to GPR rd. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(copy_u.w, imm2 = 0b11))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_copy_u_w(a: v4i32) -> u32 { + static_assert_uimm_bits!(IMM2, 2); + msa_copy_u_w(a, IMM2) +} + +/// Element Copy to GPR Unsigned +/// +/// Zero-extend element `imm1` of vector `a` (two signed 64-bit integer numbers) +/// and copy the result to GPR rd. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(copy_u.d, imm1 = 0b1))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_copy_u_d(a: v2i64) -> u64 { + static_assert_uimm_bits!(IMM1, 1); + msa_copy_u_d(a, IMM1) +} + +/// GPR Copy to MSA Control Register +/// +/// The content of the least significant 31 bits of GPR `imm1` is copied to +/// MSA control register cd. +/// +/// Can not be tested in user mode +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ctcmsa, imm1 = 0b1))] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ctcmsa(a: i32) -> () { + static_assert_uimm_bits!(IMM5, 5); + msa_ctcmsa(IMM5, a) +} + +/// Vector Signed Divide +/// +/// The signed integer elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are divided by signed integer elements in vector `b` (sixteen signed 8-bit integer numbers). +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(div_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_div_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_div_s_b(a, mem::transmute(b)) +} + +/// Vector Signed Divide +/// +/// The signed integer elements in vector `a` (eight signed 16-bit integer numbers) +/// are divided by signed integer elements in vector `b` (eight signed 16-bit integer numbers). +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(div_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_div_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_div_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Divide +/// +/// The signed integer elements in vector `a` (four signed 32-bit integer numbers) +/// are divided by signed integer elements in vector `b` (four signed 32-bit integer numbers). +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(div_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_div_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_div_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Divide +/// +/// The signed integer elements in vector `a` (two signed 64-bit integer numbers) +/// are divided by signed integer elements in vector `b` (two signed 64-bit integer numbers). +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(div_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_div_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_div_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Divide +/// +/// The unsigned integer elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// are divided by unsigned integer elements in vector `b` (sixteen unsigned 8-bit integer numbers). +/// The result is written to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(div_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_div_u_b(a: v16u8, b: v16u8) -> v16u8 { + msa_div_u_b(a, mem::transmute(b)) +} + +/// Vector Unsigned Divide +/// +/// The unsigned integer elements in vector `a` (eight unsigned 16-bit integer numbers) +/// are divided by unsigned integer elements in vector `b` (eight unsigned 16-bit integer numbers). +/// The result is written to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(div_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_div_u_h(a: v8u16, b: v8u16) -> v8u16 { + msa_div_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Divide +/// +/// The unsigned integer elements in vector `a` (four unsigned 32-bit integer numbers) +/// are divided by unsigned integer elements in vector `b` (four unsigned 32-bit integer numbers). +/// The result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(div_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_div_u_w(a: v4u32, b: v4u32) -> v4u32 { + msa_div_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Divide +/// +/// The unsigned integer elements in vector `a` (two unsigned 64-bit integer numbers) +/// are divided by unsigned integer elements in vector `b` (two unsigned 64-bit integer numbers). +/// The result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(div_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_div_u_d(a: v2u64, b: v2u64) -> v2u64 { + msa_div_u_d(a, mem::transmute(b)) +} + +/// Vector Signed Dot Product +/// +/// The signed integer elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are multiplied by signed integer elements in vector `b` (sixteen signed 8-bit integer numbers) +/// producing a result the size of the input operands. The multiplication results of +/// adjacent odd/even elements are added and stored to the destination +/// vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dotp_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dotp_s_h(a: v16i8, b: v16i8) -> v8i16 { + msa_dotp_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Dot Product +/// +/// The signed integer elements in vector `a` (eight signed 16-bit integer numbers) +/// are multiplied by signed integer elements in vector `b` (eight signed 16-bit integer numbers) +/// producing a result the size of the input operands. The multiplication results of +/// adjacent odd/even elements are added and stored to the destination +/// vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dotp_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dotp_s_w(a: v8i16, b: v8i16) -> v4i32 { + msa_dotp_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Dot Product +/// +/// The signed integer elements in vector `a` (four signed 32-bit integer numbers) +/// are multiplied by signed integer elements in vector `b` (four signed 32-bit integer numbers) +/// producing a result the size of the input operands. The multiplication results of +/// adjacent odd/even elements are added and stored to the destination +/// vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dotp_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dotp_s_d(a: v4i32, b: v4i32) -> v2i64 { + msa_dotp_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Dot Product +/// +/// The unsigned integer elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// are multiplied by unsigned integer elements in vector `b` (sixteen unsigned 8-bit integer numbers) +/// producing a result the size of the input operands. The multiplication results of +/// adjacent odd/even elements are added and stored to the destination +/// vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dotp_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dotp_u_h(a: v16u8, b: v16u8) -> v8u16 { + msa_dotp_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Dot Product +/// +/// The unsigned integer elements in vector `a` (eight unsigned 16-bit integer numbers) +/// are multiplied by unsigned integer elements in vector `b` (eight unsigned 16-bit integer numbers) +/// producing a result the size of the input operands. The multiplication results of +/// adjacent odd/even elements are added and stored to the destination +/// vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dotp_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dotp_u_w(a: v8u16, b: v8u16) -> v4u32 { + msa_dotp_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Dot Product +/// +/// The unsigned integer elements in vector `a` (four unsigned 32-bit integer numbers) +/// are multiplied by unsigned integer elements in vector `b` (four unsigned 32-bit integer numbers) +/// producing a result the size of the input operands. The multiplication results of +/// adjacent odd/even elements are added and stored to the destination +/// vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dotp_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dotp_u_d(a: v4u32, b: v4u32) -> v2u64 { + msa_dotp_u_d(a, mem::transmute(b)) +} + +/// Vector Signed Dot Product and Add +/// +/// The signed integer elements in vector `b` (sixteen signed 8-bit integer numbers) +/// are multiplied by signed integer elements in vector `c` (sixteen signed 8-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are added to the vector `a` (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpadd_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpadd_s_h(a: v8i16, b: v16i8, c: v16i8) -> v8i16 { + msa_dpadd_s_h(a, mem::transmute(b), c) +} + +/// Vector Signed Dot Product and Add +/// +/// The signed integer elements in vector `b` (eight signed 16-bit integer numbers) +/// are multiplied by signed integer elements in vector `c` (eight signed 16-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are added to the vector `a` (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpadd_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpadd_s_w(a: v4i32, b: v8i16, c: v8i16) -> v4i32 { + msa_dpadd_s_w(a, mem::transmute(b), c) +} + +/// Vector Signed Dot Product and Add +/// +/// The signed integer elements in vector `b` (four signed 32-bit integer numbers) +/// are multiplied by signed integer elements in vector `c` (four signed 32-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are added to the vector `a` (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpadd_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpadd_s_d(a: v2i64, b: v4i32, c: v4i32) -> v2i64 { + msa_dpadd_s_d(a, mem::transmute(b), c) +} + +/// Vector Unsigned Dot Product and Add +/// +/// The unsigned integer elements in vector `b` (sixteen unsigned 8-bit integer numbers) +/// are multiplied by unsigned integer elements in vector `c` (sixteen unsigned 8-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are added to the vector `a` (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpadd_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpadd_u_h(a: v8u16, b: v16u8, c: v16u8) -> v8u16 { + msa_dpadd_u_h(a, mem::transmute(b), c) +} + +/// Vector Unsigned Dot Product and Add +/// +/// The unsigned integer elements in vector `b` (eight unsigned 16-bit integer numbers) +/// are multiplied by unsigned integer elements in vector `c` (eight unsigned 16-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are added to the vector `a` (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpadd_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpadd_u_w(a: v4u32, b: v8u16, c: v8u16) -> v4u32 { + msa_dpadd_u_w(a, mem::transmute(b), c) +} + +/// Vector Unsigned Dot Product and Add +/// +/// The unsigned integer elements in vector `b` (four unsigned 32-bit integer numbers) +/// are multiplied by unsigned integer elements in vector `c` (four unsigned 32-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are added to the vector `a` (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpadd_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpadd_u_d(a: v2u64, b: v4u32, c: v4u32) -> v2u64 { + msa_dpadd_u_d(a, mem::transmute(b), c) +} + +/// Vector Signed Dot Product and Add +/// +/// The signed integer elements in vector `b` (sixteen signed 8-bit integer numbers) +/// are multiplied by signed integer elements in vector `c` (sixteen signed 8-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are subtracted from the integer elements in vector `a` +/// (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpsub_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpsub_s_h(a: v8i16, b: v16i8, c: v16i8) -> v8i16 { + msa_dpsub_s_h(a, mem::transmute(b), c) +} + +/// Vector Signed Dot Product and Add +/// +/// The signed integer elements in vector `b` (eight signed 16-bit integer numbers) +/// are multiplied by signed integer elements in vector `c` (eight signed 16-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are subtracted from the integer elements in vector `a` +/// (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpsub_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpsub_s_w(a: v4i32, b: v8i16, c: v8i16) -> v4i32 { + msa_dpsub_s_w(a, mem::transmute(b), c) +} + +/// Vector Signed Dot Product and Add +/// +/// The signed integer elements in vector `b` (four signed 32-bit integer numbers) +/// are multiplied by signed integer elements in vector `c` (four signed 32-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are subtracted from the integer elements in vector `a` +/// (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpsub_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpsub_s_d(a: v2i64, b: v4i32, c: v4i32) -> v2i64 { + msa_dpsub_s_d(a, mem::transmute(b), c) +} + +/// Vector Unsigned Dot Product and Add +/// +/// The unsigned integer elements in vector `b` (sixteen unsigned 8-bit integer numbers) +/// are multiplied by unsigned integer elements in vector `c` (sixteen unsigned 8-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are subtracted from the integer elements in vector `a` +/// (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpsub_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpsub_u_h(a: v8i16, b: v16u8, c: v16u8) -> v8i16 { + msa_dpsub_u_h(a, mem::transmute(b), c) +} + +/// Vector Unsigned Dot Product and Add +/// +/// The unsigned integer elements in vector `b` (eight unsigned 16-bit integer numbers) +/// are multiplied by unsigned integer elements in vector `c` (eight unsigned 16-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are subtracted from the integer elements in vector `a` +/// (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpsub_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpsub_u_w(a: v4i32, b: v8u16, c: v8u16) -> v4i32 { + msa_dpsub_u_w(a, mem::transmute(b), c) +} + +/// Vector Unsigned Dot Product and Add +/// +/// The unsigned integer elements in vector `b` (four unsigned 32-bit integer numbers) +/// are multiplied by unsigned integer elements in vector `c` (four unsigned 32-bit integer numbers) +/// producing a result twice the size of the input operands. The multiplication results +/// of adjacent odd/even elements are subtracted from the integer elements in vector `a` +/// (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(dpsub_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_dpsub_u_d(a: v2i64, b: v4u32, c: v4u32) -> v2i64 { + msa_dpsub_u_d(a, mem::transmute(b), c) +} + +/// Vector Floating-Point Addition +/// +/// The floating-point elements in vector `a` (four 32-bit floating point numbers) +/// are added to the floating-point elements in `bc` (four 32-bit floating point numbers). +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fadd.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fadd_w(a: v4f32, b: v4f32) -> v4f32 { + msa_fadd_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Addition +/// +/// The floating-point elements in vector `a` (two 64-bit floating point numbers) +/// are added to the floating-point elements in `bc` (two 64-bit floating point numbers). +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fadd.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fadd_d(a: v2f64, b: v2f64) -> v2f64 { + msa_fadd_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Always False +/// +/// Set all bits to 0 in vector (four signed 32-bit integer numbers). +/// Signaling NaN elements in `a` (four 32-bit floating point numbers) +/// or `b` (four 32-bit floating point numbers) signal Invalid Operation exception. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcaf.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcaf_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fcaf_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Always False +/// +/// Set all bits to 0 in vector (two signed 64-bit integer numbers). +/// Signaling NaN elements in `a` (two 64-bit floating point numbers) +/// or `b` (two 64-bit floating point numbers) signal Invalid Operation exception. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcaf.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcaf_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fcaf_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding in `a` (four 32-bit floating point numbers) +/// and `b` (four 32-bit floating point numbers) elements are ordered and equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fceq.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fceq_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fceq_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding in `a` (two 64-bit floating point numbers) +/// and `b` (two 64-bit floating point numbers) elements are ordered and equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fceq.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fceq_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fceq_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Class Mask +/// +/// Store in each element of vector (four signed 32-bit integer numbers) +/// a bit mask reflecting the floating-point class of the corresponding element of vector +/// `a` (four 32-bit floating point numbers). +/// The mask has 10 bits as follows. Bits 0 and 1 indicate NaN values: signaling NaN (bit 0) and quiet NaN (bit 1). +/// Bits 2, 3, 4, 5 classify negative values: infinity (bit 2), normal (bit 3), subnormal (bit 4), and zero (bit 5). +/// Bits 6, 7, 8, 9 classify positive values: infinity (bit 6), normal (bit 7), subnormal (bit 8), and zero (bit 9). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fclass.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fclass_w(a: v4f32) -> v4i32 { + msa_fclass_w(a) +} + +/// Vector Floating-Point Class Mask +/// +/// Store in each element of vector (two signed 64-bit integer numbers) +/// a bit mask reflecting the floating-point class of the corresponding element of vector +/// `a` (two 64-bit floating point numbers). +/// The mask has 10 bits as follows. Bits 0 and 1 indicate NaN values: signaling NaN (bit 0) and quiet NaN (bit 1). +/// Bits 2, 3, 4, 5 classify negative values: infinity (bit 2), normal (bit 3), subnormal (bit 4), and zero (bit 5). +/// Bits 6, 7, 8, 9 classify positive values: infinity (bit 6), normal (bit 7), subnormal (bit 8), and zero (bit 9). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fclass.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fclass_d(a: v2f64) -> v2i64 { + msa_fclass_d(a) +} + +/// Vector Floating-Point Quiet Compare Less or Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding `a` (four 32-bit floating point numbers) elements are ordered +/// and either less than or equal to `b` (four 32-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcle.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcle_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fcle_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Less or Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding `a` (two 64-bit floating point numbers) elements are ordered +/// and either less than or equal to `b` (two 64-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcle.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcle_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fcle_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Less Than +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding `a` (four 32-bit floating point numbers) elements are ordered +/// and less than `b` (four 32-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fclt.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fclt_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fclt_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Less Than +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding `a` (two 64-bit floating point numbers) elements are ordered +/// and less than `b` (two 64-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fclt.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fclt_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fclt_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Not Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding `a` (four 32-bit floating point numbers) and +/// `b` (four 32-bit floating point numbers) elements are ordered and not equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcne.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcne_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fcne_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Not Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding `a` (two 64-bit floating point numbers) and +/// `b` (two 64-bit floating point numbers) elements are ordered and not equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcne.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcne_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fcne_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Ordered +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding `a` (four 32-bit floating point numbers) and +/// `b` (four 32-bit floating point numbers) elements are ordered, i.e. both elements are not NaN values, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcor.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcor_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fcor_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Ordered +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding `a` (two 64-bit floating point numbers) and +/// `b` (two 64-bit floating point numbers) elements are ordered, i.e. both elements are not NaN values, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcor.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcor_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fcor_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered or Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding `a` (four 32-bit floating point numbers) and +/// `b` (four 32-bit floating point numbers) elements are unordered or equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcueq.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcueq_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fcueq_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered or Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding `a` (two 64-bit floating point numbers) and +/// `b` (two 64-bit floating point numbers) elements are unordered or equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcueq.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcueq_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fcueq_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered or Less or Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding elements in `a` (four 32-bit floating point numbers) +/// are unordered or less than or equal to `b` (four 32-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcule.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcule_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fcule_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered or Less or Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding elements in `a` (two 64-bit floating point numbers) +/// are unordered or less than or equal to `b` (two 64-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcule.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcule_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fcule_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered or Less Than +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding elements in `a` (four 32-bit floating point numbers) +/// are unordered or less than `b` (four 32-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcult.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcult_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fcult_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered or Less Than +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding elements in `a` (two 64-bit floating point numbers) +/// are unordered or less than `b` (two 64-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcult.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcult_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fcult_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding `a` (four 32-bit floating point numbers) +/// and `b` (four 32-bit floating point numbers) elements are unordered, +/// i.e. at least one element is a NaN value, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcun.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcun_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fcun_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding `a` (two 64-bit floating point numbers) +/// and `b` (two 64-bit floating point numbers) elements are unordered, +/// i.e. at least one element is a NaN value, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcun.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcun_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fcun_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered or Not Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) +/// elements if the corresponding `a` (four 32-bit floating point numbers) +/// and `b` (four 32-bit floating point numbers) elements are unordered or not equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcune.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcune_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fcune_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Quiet Compare Unordered or Not Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) +/// elements if the corresponding `a` (two 64-bit floating point numbers) +/// and `b` (two 64-bit floating point numbers) elements are unordered or not equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fcune.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fcune_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fcune_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Division +/// +/// The floating-point elements in vector `a` (four 32-bit floating point numbers) +/// are divided by the floating-point elements in vector `b` (four 32-bit floating point numbers). +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fdiv.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fdiv_w(a: v4f32, b: v4f32) -> v4f32 { + msa_fdiv_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Division +/// +/// The floating-point elements in vector `a` (two 64-bit floating point numbers) +/// are divided by the floating-point elements in vector `b` (two 64-bit floating point numbers). +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fdiv.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fdiv_d(a: v2f64, b: v2f64) -> v2f64 { + msa_fdiv_d(a, mem::transmute(b)) +} + +/* FIXME: 16-bit float +/// Vector Floating-Point Down-Convert Interchange Format +/// +/// The floating-point elements in vector `a` (four 64-bit floating point numbers) +/// and vector `b` (four 64-bit floating point numbers) are down-converted +/// to a smaller interchange format, i.e. from 64-bit to 32-bit, or from 32-bit to 16-bit. +/// The result is written to vector (8 16-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fexdo.h))] + #[unstable(feature = "stdarch_mips", issue = "111198")] pub unsafe fn __msa_fexdo_h(a: v4f32, b: v4f32) -> f16x8 { + msa_fexdo_h(a, mem::transmute(b)) +}*/ + +/// Vector Floating-Point Down-Convert Interchange Format +/// +/// The floating-point elements in vector `a` (two 64-bit floating point numbers) +/// and vector `b` (two 64-bit floating point numbers) are down-converted +/// to a smaller interchange format, i.e. from 64-bit to 32-bit, or from 32-bit to 16-bit. +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fexdo.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fexdo_w(a: v2f64, b: v2f64) -> v4f32 { + msa_fexdo_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Down-Convert Interchange Format +/// +/// The floating-point elements in vector `a` (four 32-bit floating point numbers) +/// are scaled, i.e. multiplied, by 2 to the power of integer elements in vector `b` +/// (four signed 32-bit integer numbers). +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fexp2.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fexp2_w(a: v4f32, b: v4i32) -> v4f32 { + msa_fexp2_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Down-Convert Interchange Format +/// +/// The floating-point elements in vector `a` (two 64-bit floating point numbers) +/// are scaled, i.e. multiplied, by 2 to the power of integer elements in vector `b` +/// (two signed 64-bit integer numbers). +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fexp2.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fexp2_d(a: v2f64, b: v2i64) -> v2f64 { + msa_fexp2_d(a, mem::transmute(b)) +} + +/* FIXME: 16-bit float +/// Vector Floating-Point Up-Convert Interchange Format Left +/// +/// The left half floating-point elements in vector `a` (two 16-bit floating point numbers) +/// are up-converted to a larger interchange format, +/// i.e. from 16-bit to 32-bit, or from 32-bit to 64-bit. +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fexupl.w))] + #[unstable(feature = "stdarch_mips", issue = "111198")] pub unsafe fn __msa_fexupl_w(a: f16x8) -> v4f32 { + msa_fexupl_w(a) +}*/ + +/// Vector Floating-Point Up-Convert Interchange Format Left +/// +/// The left half floating-point elements in vector `a` (four 32-bit floating point numbers) +/// are up-converted to a larger interchange format, +/// i.e. from 16-bit to 32-bit, or from 32-bit to 64-bit. +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fexupl.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fexupl_d(a: v4f32) -> v2f64 { + msa_fexupl_d(a) +} + +/* FIXME: 16-bit float +/// Vector Floating-Point Up-Convert Interchange Format Left +/// +/// The right half floating-point elements in vector `a` (two 16-bit floating point numbers) +/// are up-converted to a larger interchange format, +/// i.e. from 16-bit to 32-bit, or from 32-bit to 64-bit. +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fexupr.w))] + #[unstable(feature = "stdarch_mips", issue = "111198")] pub unsafe fn __msa_fexupr_w(a: f16x8) -> v4f32 { + msa_fexupr_w(a) +} */ + +/// Vector Floating-Point Up-Convert Interchange Format Left +/// +/// The right half floating-point elements in vector `a` (four 32-bit floating point numbers) +/// are up-converted to a larger interchange format, +/// i.e. from 16-bit to 32-bit, or from 32-bit to 64-bit. +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fexupr.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fexupr_d(a: v4f32) -> v2f64 { + msa_fexupr_d(a) +} + +/// Vector Floating-Point Round and Convert from Signed Integer +/// +/// The signed integer elements in vector `a` (four signed 32-bit integer numbers) +/// are converted to floating-point values. +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ffint_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ffint_s_w(a: v4i32) -> v4f32 { + msa_ffint_s_w(a) +} + +/// Vector Floating-Point Round and Convert from Signed Integer +/// +/// The signed integer elements in vector `a` (two signed 64-bit integer numbers) +/// are converted to floating-point values. +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ffint_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ffint_s_d(a: v2i64) -> v2f64 { + msa_ffint_s_d(a) +} + +/// Vector Floating-Point Round and Convert from Unsigned Integer +/// +/// The unsigned integer elements in vector `a` (four unsigned 32-bit integer numbers) +/// are converted to floating-point values. +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ffint_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ffint_u_w(a: v4u32) -> v4f32 { + msa_ffint_u_w(a) +} + +/// Vector Floating-Point Round and Convert from Unsigned Integer +/// +/// The unsigned integer elements in vector `a` (two unsigned 64-bit integer numbers) +/// are converted to floating-point values. +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ffint_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ffint_u_d(a: v2u64) -> v2f64 { + msa_ffint_u_d(a) +} + +/// Vector Floating-Point Convert from Fixed-Point Left +/// +/// The left half fixed-point elements in vector `a` (eight signed 16-bit integer numbers) +/// are up-converted to floating-point data format. +/// i.e. from 16-bit Q15 to 32-bit floating-point, or from 32-bit Q31 to 64-bit floating-point. +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ffql.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ffql_w(a: v8i16) -> v4f32 { + msa_ffql_w(a) +} + +/// Vector Floating-Point Convert from Fixed-Point Left +/// +/// The left half fixed-point elements in vector `a` (four signed 32-bit integer numbers) +/// are up-converted to floating-point data format. +/// i.e. from 16-bit Q15 to 32-bit floating-point, or from 32-bit Q31 to 64-bit floating-point. +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ffql.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ffql_d(a: v4i32) -> v2f64 { + msa_ffql_d(a) +} + +/// Vector Floating-Point Convert from Fixed-Point Left +/// +/// The right half fixed-point elements in vector `a` (eight signed 16-bit integer numbers) +/// are up-converted to floating-point data format. +/// i.e. from 16-bit Q15 to 32-bit floating-point, or from 32-bit Q31 to 64-bit floating-point. +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ffqr.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ffqr_w(a: v8i16) -> v4f32 { + msa_ffqr_w(a) +} + +/// Vector Floating-Point Convert from Fixed-Point Left +/// +/// The right half fixed-point elements in vector `a` (four signed 32-bit integer numbers) +/// are up-converted to floating-point data format. +/// i.e. from 16-bit Q15 to 32-bit floating-point, or from 32-bit Q31 to 64-bit floating-point. +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ffqr.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ffqr_d(a: v4i32) -> v2f64 { + msa_ffqr_d(a) +} + +/// Vector Fill from GPR +/// +/// Replicate GPR rs value to all elements in vector (sixteen signed 8-bit integer numbers). +/// If the source GPR is wider than the destination data format, the destination's elements +/// will be set to the least significant bits of the GPR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fill.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fill_b(a: i32) -> v16i8 { + msa_fill_b(a) +} + +/// Vector Fill from GPR +/// +/// Replicate GPR rs value to all elements in vector (eight signed 16-bit integer numbers). +/// If the source GPR is wider than the destination data format, the destination's elements +/// will be set to the least significant bits of the GPR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fill.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fill_h(a: i32) -> v8i16 { + msa_fill_h(a) +} + +/// Vector Fill from GPR +/// +/// Replicate GPR rs value to all elements in vector (four signed 32-bit integer numbers). +/// If the source GPR is wider than the destination data format, the destination's elements +/// will be set to the least significant bits of the GPR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fill.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fill_w(a: i32) -> v4i32 { + msa_fill_w(a) +} + +/// Vector Fill from GPR +/// +/// Replicate GPR rs value to all elements in vector (two signed 64-bit integer numbers). +/// If the source GPR is wider than the destination data format, the destination's elements +/// will be set to the least significant bits of the GPR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fill.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fill_d(a: i64) -> v2i64 { + msa_fill_d(a) +} + +/// Vector Floating-Point Base 2 Logarithm +/// +/// The signed integral base 2 exponents of floating-point elements in vector `a` +/// (four 32-bit floating point numbers) are written as floating-point values to vector elements +/// (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(flog2.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_flog2_w(a: v4f32) -> v4f32 { + msa_flog2_w(a) +} + +/// Vector Floating-Point Base 2 Logarithm +/// +/// The signed integral base 2 exponents of floating-point elements in vector `a` +/// (two 64-bit floating point numbers) are written as floating-point values to vector elements +/// (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(flog2.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_flog2_d(a: v2f64) -> v2f64 { + msa_flog2_d(a) +} + +/// Vector Floating-Point Multiply-Add +/// +/// The floating-point elements in vector `b` (four 32-bit floating point numbers) +/// multiplied by floating-point elements in vector `c` (four 32-bit floating point numbers) +/// are added to the floating-point elements in vector `a` (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmadd.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmadd_w(a: v4f32, b: v4f32, c: v4f32) -> v4f32 { + msa_fmadd_w(a, mem::transmute(b), c) +} + +/// Vector Floating-Point Multiply-Add +/// +/// The floating-point elements in vector `b` (two 64-bit floating point numbers) +/// multiplied by floating-point elements in vector `c` (two 64-bit floating point numbers) +/// are added to the floating-point elements in vector `a` (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmadd.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmadd_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64 { + msa_fmadd_d(a, mem::transmute(b), c) +} + +/// Vector Floating-Point Maximum +/// +/// The largest values between corresponding floating-point elements in vector `a` +/// (four 32-bit floating point numbers) and vector `b` (four 32-bit floating point numbers) +/// are written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmax.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmax_w(a: v4f32, b: v4f32) -> v4f32 { + msa_fmax_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Maximum +/// +/// The largest values between corresponding floating-point elements in vector `a` +/// (two 64-bit floating point numbers) and vector `b` (two 64-bit floating point numbers) +/// are written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmax.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmax_d(a: v2f64, b: v2f64) -> v2f64 { + msa_fmax_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Maximum Based on Absolute Values +/// +/// The value with the largest magnitude, i.e. absolute value, between corresponding +/// floating-point elements in vector `a` (four 32-bit floating point numbers) +/// and vector `b` (four 32-bit floating point numbers) +/// are written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmax_a.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmax_a_w(a: v4f32, b: v4f32) -> v4f32 { + msa_fmax_a_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Maximum Based on Absolute Values +/// +/// The value with the largest magnitude, i.e. absolute value, between corresponding +/// floating-point elements in vector `a` (two 64-bit floating point numbers) +/// and vector `b` (two 64-bit floating point numbers) +/// are written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmax_a.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmax_a_d(a: v2f64, b: v2f64) -> v2f64 { + msa_fmax_a_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Minimum +/// +/// The smallest values between corresponding floating-point elements in vector `a` +/// (four 32-bit floating point numbers) and vector `b` (four 32-bit floating point numbers) +/// are written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmin.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmin_w(a: v4f32, b: v4f32) -> v4f32 { + msa_fmin_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Minimum +/// +/// The smallest values between corresponding floating-point elements in vector `a` +/// (two 64-bit floating point numbers) and vector `b` (two 64-bit floating point numbers) +/// are written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmin.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmin_d(a: v2f64, b: v2f64) -> v2f64 { + msa_fmin_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Minimum Based on Absolute Values +/// +/// The value with the smallest magnitude, i.e. absolute value, between corresponding +/// floating-point elements in vector `a` (four 32-bit floating point numbers) +/// and vector `b` (four 32-bit floating point numbers) +/// are written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmin_a.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmin_a_w(a: v4f32, b: v4f32) -> v4f32 { + msa_fmin_a_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Minimum Based on Absolute Values +/// +/// The value with the smallest magnitude, i.e. absolute value, between corresponding +/// floating-point elements in vector `a` (two 64-bit floating point numbers) +/// and vector `b` (two 64-bit floating point numbers) +/// are written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmin_a.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmin_a_d(a: v2f64, b: v2f64) -> v2f64 { + msa_fmin_a_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Multiply-Sub +/// +/// The floating-point elements in vector `b` (four 32-bit floating point numbers) +/// multiplied by floating-point elements in vector `c` (four 32-bit floating point numbers) +/// are subtracted from the floating-point elements in vector `a` (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmsub.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmsub_w(a: v4f32, b: v4f32, c: v4f32) -> v4f32 { + msa_fmsub_w(a, mem::transmute(b), c) +} + +/// Vector Floating-Point Multiply-Sub +/// +/// The floating-point elements in vector `b` (two 64-bit floating point numbers) +/// multiplied by floating-point elements in vector `c` (two 64-bit floating point numbers) +/// are subtracted from the floating-point elements in vector `a` (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmsub.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmsub_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64 { + msa_fmsub_d(a, mem::transmute(b), c) +} + +/// Vector Floating-Point Multiplication +/// +/// The floating-point elements in vector `a` (four 32-bit floating point numbers) are +/// multiplied by floating-point elements in vector `b` (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmul.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmul_w(a: v4f32, b: v4f32) -> v4f32 { + msa_fmul_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Multiplication +/// +/// The floating-point elements in vector `a` (two 64-bit floating point numbers) are +/// multiplied by floating-point elements in vector `b` (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fmul.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fmul_d(a: v2f64, b: v2f64) -> v2f64 { + msa_fmul_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Round to Integer +/// +/// The floating-point elements in vector `a` (four 32-bit floating point numbers) +/// are rounded to an integral valued floating-point number in the same format based +/// on the rounding mode bits RM in MSA Control and Status Register MSACSR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(frint.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_frint_w(a: v4f32) -> v4f32 { + msa_frint_w(a) +} + +/// Vector Floating-Point Round to Integer +/// +/// The floating-point elements in vector `a` (two 64-bit floating point numbers) +/// are rounded to an integral valued floating-point number in the same format based +/// on the rounding mode bits RM in MSA Control and Status Register MSACSR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(frint.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_frint_d(a: v2f64) -> v2f64 { + msa_frint_d(a) +} + +/// Vector Approximate Floating-Point Reciprocal +/// +/// The reciprocals of floating-point elements in vector `a` (four 32-bit floating point numbers) +/// are calculated and the result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(frcp.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_frcp_w(a: v4f32) -> v4f32 { + msa_frcp_w(a) +} + +/// Vector Approximate Floating-Point Reciprocal +/// +/// The reciprocals of floating-point elements in vector `a` (two 64-bit floating point numbers) +/// are calculated and the result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(frcp.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_frcp_d(a: v2f64) -> v2f64 { + msa_frcp_d(a) +} + +/// Vector Approximate Floating-Point Reciprocal of Square Root +/// +/// The reciprocals of the square roots of floating-point elements in vector `a` (four 32-bit floating point numbers) +/// are calculated and the result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(frsqrt.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_frsqrt_w(a: v4f32) -> v4f32 { + msa_frsqrt_w(a) +} + +/// Vector Approximate Floating-Point Reciprocal of Square Root +/// +/// The reciprocals of the square roots of floating-point elements in vector `a` (two 64-bit floating point numbers) +/// are calculated and the result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(frsqrt.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_frsqrt_d(a: v2f64) -> v2f64 { + msa_frsqrt_d(a) +} + +/// Vector Floating-Point Signaling Compare Always False +/// +/// Set all bits to 0 in vector (four signed 32-bit integer numbers) elements. +/// Signaling and quiet NaN elements in vector `a` (four 32-bit floating point numbers) +/// or `b` (four 32-bit floating point numbers) signal Invalid Operation exception. +/// In case of a floating-point exception, the default result has all bits set to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsaf.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsaf_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fsaf_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Always False +/// +/// Set all bits to 0 in vector (two signed 64-bit integer numbers) elements. +/// Signaling and quiet NaN elements in vector `a` (two 64-bit floating point numbers) +/// or `b` (two 64-bit floating point numbers) signal Invalid Operation exception. +/// In case of a floating-point exception, the default result has all bits set to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsaf.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsaf_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fsaf_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) +/// and `b` (four 32-bit floating point numbers) elements are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fseq.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fseq_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fseq_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) +/// and `b` (two 64-bit floating point numbers) elements are equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fseq.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fseq_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fseq_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Less or Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) elements +/// are less than or equal to `b` (four 32-bit floating point numbers) elements, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsle.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsle_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fsle_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Less or Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) elements +/// are less than or equal to `b` (two 64-bit floating point numbers) elements, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsle.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsle_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fsle_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Less Than +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) elements +/// are less than `b` (four 32-bit floating point numbers) elements, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fslt.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fslt_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fslt_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Less Than +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) elements +/// are less than `b` (two 64-bit floating point numbers) elements, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fslt.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fslt_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fslt_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Not Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) and +/// `b` (four 32-bit floating point numbers) elements are not equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsne.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsne_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fsne_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Not Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) and +/// `b` (two 64-bit floating point numbers) elements are not equal, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsne.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsne_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fsne_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Ordered +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) and +/// `b` (four 32-bit floating point numbers) elements are ordered, +/// i.e. both elements are not NaN values, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsor.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsor_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fsor_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Ordered +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) and +/// `b` (two 64-bit floating point numbers) elements are ordered, +/// i.e. both elements are not NaN values, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsor.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsor_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fsor_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Square Root +/// +/// The square roots of floating-point elements in vector `a` +/// (four 32-bit floating point numbers) are written to vector +/// (four 32-bit floating point numbers) elements are ordered,. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsqrt.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsqrt_w(a: v4f32) -> v4f32 { + msa_fsqrt_w(a) +} + +/// Vector Floating-Point Square Root +/// +/// The square roots of floating-point elements in vector `a` +/// (two 64-bit floating point numbers) are written to vector +/// (two 64-bit floating point numbers) elements are ordered,. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsqrt.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsqrt_d(a: v2f64) -> v2f64 { + msa_fsqrt_d(a) +} + +/// Vector Floating-Point Subtraction +/// +/// The floating-point elements in vector `b` (four 32-bit floating point numbers) +/// are subtracted from the floating-point elements in vector `a` +/// (four 32-bit floating point numbers). +/// The result is written to vector (four 32-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsub.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsub_w(a: v4f32, b: v4f32) -> v4f32 { + msa_fsub_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Subtraction +/// +/// The floating-point elements in vector `b` (two 64-bit floating point numbers) +/// are subtracted from the floating-point elements in vector `a` +/// (two 64-bit floating point numbers). +/// The result is written to vector (two 64-bit floating point numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsub.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsub_d(a: v2f64, b: v2f64) -> v2f64 { + msa_fsub_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Ordered +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) and +/// `b` (four 32-bit floating point numbers) elements are unordered or equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsueq.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsueq_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fsueq_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Ordered +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) and +/// `b` (two 64-bit floating point numbers) elements are unordered or equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsueq.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsueq_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fsueq_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Unordered or Less or Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) elements are +/// unordered or less than or equal to `b` (four 32-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsule.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsule_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fsule_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Unordered or Less or Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) elements are +/// unordered or less than or equal to `b` (two 64-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsule.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsule_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fsule_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Unordered or Less Than +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) elements +/// are unordered or less than `b` (four 32-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsult.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsult_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fsult_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Unordered or Less Than +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) elements +/// are unordered or less than `b` (two 64-bit floating point numbers) elements, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsult.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsult_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fsult_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Unordered +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) and +/// `b` (four 32-bit floating point numbers) elements are unordered, +/// i.e. at least one element is a NaN value, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsun.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsun_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fsun_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Unordered +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) and +/// `b` (two 64-bit floating point numbers) elements are unordered, +/// i.e. at least one element is a NaN value, otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsun.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsun_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fsun_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Unordered or Not Equal +/// +/// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements +/// if the corresponding `a` (four 32-bit floating point numbers) and +/// `b` (four 32-bit floating point numbers) elements are unordered or not equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsune.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsune_w(a: v4f32, b: v4f32) -> v4i32 { + msa_fsune_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Signaling Compare Unordered or Not Equal +/// +/// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements +/// if the corresponding `a` (two 64-bit floating point numbers) and +/// `b` (two 64-bit floating point numbers) elements are unordered or not equal, +/// otherwise set all bits to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(fsune.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_fsune_d(a: v2f64, b: v2f64) -> v2i64 { + msa_fsune_d(a, mem::transmute(b)) +} + +/// Vector Floating-Point Convert to Signed Integer +/// +///The elements in vector `a` (four 32-bit floating point numbers) +/// are rounded and converted to signed integer values based on the +/// rounding mode bits RM in MSA Control and Status Register MSACSR. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftint_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftint_s_w(a: v4f32) -> v4i32 { + msa_ftint_s_w(a) +} + +/// Vector Floating-Point Convert to Signed Integer +/// +///The elements in vector `a` (two 64-bit floating point numbers) +/// are rounded and converted to signed integer values based on the +/// rounding mode bits RM in MSA Control and Status Register MSACSR. +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftint_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftint_s_d(a: v2f64) -> v2i64 { + msa_ftint_s_d(a) +} + +/// Vector Floating-Point Convert to Unsigned Integer +/// +/// The elements in vector `a` (four 32-bit floating point numbers) +/// are rounded and converted to signed integer values based on the +/// rounding mode bits RM in MSA Control and Status Register MSACSR. +/// The result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftint_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftint_u_w(a: v4f32) -> v4u32 { + msa_ftint_u_w(a) +} + +/// Vector Floating-Point Convert to Unsigned Integer +/// +/// The elements in vector `a` (two 64-bit floating point numbers) +/// are rounded and converted to signed integer values based on the +/// rounding mode bits RM in MSA Control and Status Register MSACSR. +/// The result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftint_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftint_u_d(a: v2f64) -> v2u64 { + msa_ftint_u_d(a) +} + +/// Vector Floating-Point Convert to Fixed-Point +/// +/// The elements in vector `a` (four 32-bit floating point numbers) +/// and `b` (four 32-bit floating point numbers) are down-converted to a fixed-point +/// representation, i.e. from 64-bit floating-point to 32-bit Q31 fixed-point +/// representation, or from 32-bit floating-point to 16-bit Q15 fixed-point representation. +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftq.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftq_h(a: v4f32, b: v4f32) -> v8i16 { + msa_ftq_h(a, mem::transmute(b)) +} + +/// Vector Floating-Point Convert to Fixed-Point +/// +/// The elements in vector `a` (two 64-bit floating point numbers) +/// and `b` (two 64-bit floating point numbers) are down-converted to a fixed-point +/// representation, i.e. from 64-bit floating-point to 32-bit Q31 fixed-point +/// representation, or from 32-bit floating-point to 16-bit Q15 fixed-point representation. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftq.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftq_w(a: v2f64, b: v2f64) -> v4i32 { + msa_ftq_w(a, mem::transmute(b)) +} + +/// Vector Floating-Point Truncate and Convert to Signed Integer +/// +/// The elements in vector `a` (four 32-bit floating point numbers) +/// are truncated, i.e. rounded toward zero, to signed integer values. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftrunc_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftrunc_s_w(a: v4f32) -> v4i32 { + msa_ftrunc_s_w(a) +} + +/// Vector Floating-Point Truncate and Convert to Signed Integer +/// +/// The elements in vector `a` (two 64-bit floating point numbers) +/// are truncated, i.e. rounded toward zero, to signed integer values. +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftrunc_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftrunc_s_d(a: v2f64) -> v2i64 { + msa_ftrunc_s_d(a) +} + +/// Vector Floating-Point Truncate and Convert to Unsigned Integer +/// +/// The elements in vector `a` (four 32-bit floating point numbers) +/// are truncated, i.e. rounded toward zero, to unsigned integer values. +/// The result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftrunc_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftrunc_u_w(a: v4f32) -> v4u32 { + msa_ftrunc_u_w(a) +} + +/// Vector Floating-Point Truncate and Convert to Unsigned Integer +/// +/// The elements in vector `a` (two 64-bit floating point numbers) +/// are truncated, i.e. rounded toward zero, to unsigned integer values. +/// The result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ftrunc_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ftrunc_u_d(a: v2f64) -> v2u64 { + msa_ftrunc_u_d(a) +} + +/// Vector Signed Horizontal Add +/// +/// The sign-extended odd elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are added to the sign-extended even elements in vector `b` (sixteen signed 8-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hadd_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hadd_s_h(a: v16i8, b: v16i8) -> v8i16 { + msa_hadd_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Horizontal Add +/// +/// The sign-extended odd elements in vector `a` (eight signed 16-bit integer numbers) +/// are added to the sign-extended even elements in vector `b` (eight signed 16-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hadd_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hadd_s_w(a: v8i16, b: v8i16) -> v4i32 { + msa_hadd_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Horizontal Add +/// +/// The sign-extended odd elements in vector `a` (four signed 32-bit integer numbers) +/// are added to the sign-extended even elements in vector `b` (four signed 32-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hadd_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hadd_s_d(a: v4i32, b: v4i32) -> v2i64 { + msa_hadd_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Horizontal Add +/// +/// The zero-extended odd elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// are added to the zero-extended even elements in vector `b` (sixteen unsigned 8-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hadd_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hadd_u_h(a: v16u8, b: v16u8) -> v8u16 { + msa_hadd_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Horizontal Add +/// +/// The zero-extended odd elements in vector `a` (eight unsigned 16-bit integer numbers) +/// are added to the zero-extended even elements in vector `b` (eight unsigned 16-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hadd_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hadd_u_w(a: v8u16, b: v8u16) -> v4u32 { + msa_hadd_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Horizontal Add +/// +/// The zero-extended odd elements in vector `a` (four unsigned 32-bit integer numbers) +/// are added to the zero-extended even elements in vector `b` (four unsigned 32-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hadd_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hadd_u_d(a: v4u32, b: v4u32) -> v2u64 { + msa_hadd_u_d(a, mem::transmute(b)) +} + +/// Vector Signed Horizontal Subtract +/// +/// The sign-extended odd elements in vector `b` (sixteen signed 8-bit integer numbers) +/// are subtracted from the sign-extended elements in vector `a` (sixteen signed 8-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hsub_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hsub_s_h(a: v16i8, b: v16i8) -> v8i16 { + msa_hsub_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Horizontal Subtract +/// +/// The sign-extended odd elements in vector `b` (eight signed 16-bit integer numbers) +/// are subtracted from the sign-extended elements in vector `a` (eight signed 16-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hsub_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hsub_s_w(a: v8i16, b: v8i16) -> v4i32 { + msa_hsub_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Horizontal Subtract +/// +/// The sign-extended odd elements in vector `b` (four signed 32-bit integer numbers) +/// are subtracted from the sign-extended elements in vector `a` (four signed 32-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hsub_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hsub_s_d(a: v4i32, b: v4i32) -> v2i64 { + msa_hsub_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Horizontal Subtract +/// +/// The zero-extended odd elements in vector `b` (sixteen unsigned 8-bit integer numbers) +/// are subtracted from the zero-extended elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hsub_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hsub_u_h(a: v16u8, b: v16u8) -> v8i16 { + msa_hsub_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Horizontal Subtract +/// +/// The zero-extended odd elements in vector `b` (eight unsigned 16-bit integer numbers) +/// are subtracted from the zero-extended elements in vector `a` (eight unsigned 16-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hsub_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hsub_u_w(a: v8u16, b: v8u16) -> v4i32 { + msa_hsub_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Horizontal Subtract +/// +/// The zero-extended odd elements in vector `b` (four unsigned 32-bit integer numbers) +/// are subtracted from the zero-extended elements in vector `a` (four unsigned 32-bit integer numbers) +/// producing a result twice the size of the input operands. +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(hsub_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_hsub_u_d(a: v4u32, b: v4u32) -> v2i64 { + msa_hsub_u_d(a, mem::transmute(b)) +} + +/// Vector Interleave Even +/// +/// Even elements in vectors `a` (sixteen signed 8-bit integer numbers) +/// and vector `b` (sixteen signed 8-bit integer numbers) are copied to the result +/// (sixteen signed 8-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvev.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvev_b(a: v16i8, b: v16i8) -> v16i8 { + msa_ilvev_b(a, mem::transmute(b)) +} + +/// Vector Interleave Even +/// +/// Even elements in vectors `a` (eight signed 16-bit integer numbers) +/// and vector `b` (eight signed 16-bit integer numbers) are copied to the result +/// (eight signed 16-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvev.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvev_h(a: v8i16, b: v8i16) -> v8i16 { + msa_ilvev_h(a, mem::transmute(b)) +} + +/// Vector Interleave Even +/// +/// Even elements in vectors `a` (four signed 32-bit integer numbers) +/// and vector `b` (four signed 32-bit integer numbers) are copied to the result +/// (four signed 32-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvev.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvev_w(a: v4i32, b: v4i32) -> v4i32 { + msa_ilvev_w(a, mem::transmute(b)) +} + +/// Vector Interleave Even +/// +/// Even elements in vectors `a` (two signed 64-bit integer numbers) +/// and vector `b` (two signed 64-bit integer numbers) are copied to the result +/// (two signed 64-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvev.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvev_d(a: v2i64, b: v2i64) -> v2i64 { + msa_ilvev_d(a, mem::transmute(b)) +} + +/// Vector Interleave Left +/// +/// The left half elements in vectors `a` (sixteen signed 8-bit integer numbers) +/// and vector `b` (sixteen signed 8-bit integer numbers) are copied to the result +/// (sixteen signed 8-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvl.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvl_b(a: v16i8, b: v16i8) -> v16i8 { + msa_ilvl_b(a, mem::transmute(b)) +} + +/// Vector Interleave Left +/// +/// The left half elements in vectors `a` (eight signed 16-bit integer numbers) +/// and vector `b` (eight signed 16-bit integer numbers) are copied to the result +/// (eight signed 16-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvl.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvl_h(a: v8i16, b: v8i16) -> v8i16 { + msa_ilvl_h(a, mem::transmute(b)) +} + +/// Vector Interleave Left +/// +/// The left half elements in vectors `a` (four signed 32-bit integer numbers) +/// and vector `b` (four signed 32-bit integer numbers) are copied to the result +/// (four signed 32-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvl.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvl_w(a: v4i32, b: v4i32) -> v4i32 { + msa_ilvl_w(a, mem::transmute(b)) +} + +/// Vector Interleave Left +/// +/// The left half elements in vectors `a` (two signed 64-bit integer numbers) +/// and vector `b` (two signed 64-bit integer numbers) are copied to the result +/// (two signed 64-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvl.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvl_d(a: v2i64, b: v2i64) -> v2i64 { + msa_ilvl_d(a, mem::transmute(b)) +} + +/// Vector Interleave Odd +/// +/// Odd elements in vectors `a` (sixteen signed 8-bit integer numbers) +/// and vector `b` (sixteen signed 8-bit integer numbers) are copied to the result +/// (sixteen signed 8-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvod.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvod_b(a: v16i8, b: v16i8) -> v16i8 { + msa_ilvod_b(a, mem::transmute(b)) +} + +/// Vector Interleave Odd +/// +/// Odd elements in vectors `a` (eight signed 16-bit integer numbers) +/// and vector `b` (eight signed 16-bit integer numbers) are copied to the result +/// (eight signed 16-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvod.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvod_h(a: v8i16, b: v8i16) -> v8i16 { + msa_ilvod_h(a, mem::transmute(b)) +} + +/// Vector Interleave Odd +/// +/// Odd elements in vectors `a` (four signed 32-bit integer numbers) +/// and vector `b` (four signed 32-bit integer numbers) are copied to the result +/// (four signed 32-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvod.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvod_w(a: v4i32, b: v4i32) -> v4i32 { + msa_ilvod_w(a, mem::transmute(b)) +} + +/// Vector Interleave Odd +/// +/// Odd elements in vectors `a` (two signed 64-bit integer numbers) +/// and vector `b` (two signed 64-bit integer numbers) are copied to the result +/// (two signed 64-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvod.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvod_d(a: v2i64, b: v2i64) -> v2i64 { + msa_ilvod_d(a, mem::transmute(b)) +} + +/// Vector Interleave Right +/// +/// The right half elements in vectors `a` (sixteen signed 8-bit integer numbers) +/// and vector `b` (sixteen signed 8-bit integer numbers) are copied to the result +/// (sixteen signed 8-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvr.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvr_b(a: v16i8, b: v16i8) -> v16i8 { + msa_ilvr_b(a, mem::transmute(b)) +} + +/// Vector Interleave Right +/// +/// The right half elements in vectors `a` (eight signed 16-bit integer numbers) +/// and vector `b` (eight signed 16-bit integer numbers) are copied to the result +/// (eight signed 16-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvr.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvr_h(a: v8i16, b: v8i16) -> v8i16 { + msa_ilvr_h(a, mem::transmute(b)) +} + +/// Vector Interleave Right +/// +/// The right half elements in vectors `a` (four signed 32-bit integer numbers) +/// and vector `b` (four signed 32-bit integer numbers) are copied to the result +/// (four signed 32-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvr.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvr_w(a: v4i32, b: v4i32) -> v4i32 { + msa_ilvr_w(a, mem::transmute(b)) +} + +/// Vector Interleave Right +/// +/// The right half elements in vectors `a` (two signed 64-bit integer numbers) +/// and vector `b` (two signed 64-bit integer numbers) are copied to the result +/// (two signed 64-bit integer numbers) +/// alternating one element from `a` with one element from `b`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ilvr.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ilvr_d(a: v2i64, b: v2i64) -> v2i64 { + msa_ilvr_d(a, mem::transmute(b)) +} + +/// GPR Insert Element +/// +/// Set element `imm4` in vector `a` (sixteen signed 8-bit integer numbers) to GPR `c` value. +/// All other elements in vector `a` are unchanged. If the source GPR is wider than the +/// destination data format, the destination's elements will be set to the least significant bits of the GPR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(insert.b, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_insert_b(a: v16i8, c: i32) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + msa_insert_b(a, IMM4, c) +} + +/// GPR Insert Element +/// +/// Set element `imm3` in vector `a` (eight signed 16-bit integer numbers) to GPR `c` value. +/// All other elements in vector `a` are unchanged. If the source GPR is wider than the +/// destination data format, the destination's elements will be set to the least significant bits of the GPR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(insert.h, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_insert_h(a: v8i16, c: i32) -> v8i16 { + static_assert_uimm_bits!(IMM3, 3); + msa_insert_h(a, IMM3, c) +} + +/// GPR Insert Element +/// +/// Set element `imm2` in vector `a` (four signed 32-bit integer numbers) to GPR `c` value. +/// All other elements in vector `a` are unchanged. If the source GPR is wider than the +/// destination data format, the destination's elements will be set to the least significant bits of the GPR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(insert.w, imm2 = 0b11))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_insert_w(a: v4i32, c: i32) -> v4i32 { + static_assert_uimm_bits!(IMM2, 2); + msa_insert_w(a, IMM2, c) +} + +/// GPR Insert Element +/// +/// Set element `imm1` in vector `a` (two signed 64-bit integer numbers) to GPR `c` value. +/// All other elements in vector `a` are unchanged. If the source GPR is wider than the +/// destination data format, the destination's elements will be set to the least significant bits of the GPR. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(insert.d, imm1 = 0b1))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_insert_d(a: v2i64, c: i64) -> v2i64 { + static_assert_uimm_bits!(IMM1, 1); + msa_insert_d(a, IMM1, c) +} + +/// Element Insert Element +/// +/// Set element `imm1` in the result vector `a` (sixteen signed 8-bit integer numbers) to element 0 +/// in vector `c` (sixteen signed 8-bit integer numbers) value. +/// All other elements in vector `a` are unchanged. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(insve.b, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_insve_b(a: v16i8, c: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + msa_insve_b(a, IMM4, c) +} + +/// Element Insert Element +/// +/// Set element `imm1` in the result vector `a` (eight signed 16-bit integer numbers) to element 0 +/// in vector `c` (eight signed 16-bit integer numbers) value. +/// All other elements in vector `a` are unchanged. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(insve.h, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_insve_h(a: v8i16, c: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM3, 3); + msa_insve_h(a, IMM3, c) +} + +/// Element Insert Element +/// +/// Set element `imm1` in the result vector `a` (four signed 32-bit integer numbers) to element 0 +/// in vector `c` (four signed 32-bit integer numbers) value. +/// All other elements in vector `a` are unchanged. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(insve.w, imm2 = 0b11))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_insve_w(a: v4i32, c: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM2, 2); + msa_insve_w(a, IMM2, c) +} + +/// Element Insert Element +/// +/// Set element `imm1` in the result vector `a` (two signed 64-bit integer numbers) to element 0 +/// in vector `c` (two signed 64-bit integer numbers) value. +/// All other elements in vector `a` are unchanged. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(insve.d, imm1 = 0b1))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_insve_d(a: v2i64, c: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM1, 1); + msa_insve_d(a, IMM1, c) +} + +/// Vector Load +/// +/// The WRLEN / 8 bytes at the effective memory location addressed by the base +/// `mem_addr` and the 10-bit signed immediate offset `imm_s10` are fetched and placed in +/// the vector (sixteen signed 8-bit integer numbers) value. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ld.b, imm_s10 = 0b1111111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ld_b(mem_addr: *mut u8) -> v16i8 { + static_assert_simm_bits!(IMM_S10, 10); + msa_ld_b(mem_addr, IMM_S10) +} + +/// Vector Load +/// +/// The WRLEN / 8 bytes at the effective memory location addressed by the base +/// `mem_addr` and the 10-bit signed immediate offset `imm_s11` are fetched and placed in +/// the vector (eight signed 16-bit integer numbers) value. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ld.h, imm_s11 = 0b11111111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ld_h(mem_addr: *mut u8) -> v8i16 { + static_assert_simm_bits!(IMM_S11, 11); + static_assert!(IMM_S11 % 2 == 0); + msa_ld_h(mem_addr, IMM_S11) +} + +/// Vector Load +/// +/// The WRLEN / 8 bytes at the effective memory location addressed by the base +/// `mem_addr` and the 10-bit signed immediate offset `imm_s12` are fetched and placed in +/// the vector (four signed 32-bit integer numbers) value. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ld.w, imm_s12 = 0b111111111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ld_w(mem_addr: *mut u8) -> v4i32 { + static_assert_simm_bits!(IMM_S12, 12); + static_assert!(IMM_S12 % 4 == 0); + msa_ld_w(mem_addr, IMM_S12) +} + +/// Vector Load +/// +/// The WRLEN / 8 bytes at the effective memory location addressed by the base +/// `mem_addr` and the 10-bit signed immediate offset `imm_s13` are fetched and placed in +/// the vector (two signed 64-bit integer numbers) value. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ld.d, imm_s13 = 0b1111111111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ld_d(mem_addr: *mut u8) -> v2i64 { + static_assert_simm_bits!(IMM_S13, 13); + static_assert!(IMM_S13 % 8 == 0); + msa_ld_d(mem_addr, IMM_S13) +} + +/// Immediate Load +/// +/// The signed immediate imm_s10 is replicated in all vector +/// (sixteen signed 8-bit integer numbers) elements. For byte elements, +/// only the least significant 8 bits of imm_s10 will be used. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ldi.b, imm_s10 = 0b1111111111))] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ldi_b() -> v16i8 { + static_assert_simm_bits!(IMM_S10, 10); + msa_ldi_b(IMM_S10) +} + +/// Immediate Load +/// +/// The signed immediate imm_s10 is replicated in all vector +/// (eight signed 16-bit integer numbers) elements. For byte elements, +/// only the least significant 8 bits of imm_s10 will be used. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ldi.h, imm_s10 = 0b1111111111))] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ldi_h() -> v8i16 { + static_assert_simm_bits!(IMM_S10, 10); + msa_ldi_h(IMM_S10) +} + +/// Immediate Load +/// +/// The signed immediate imm_s10 is replicated in all vector +/// (four signed 32-bit integer numbers) elements. For byte elements, +/// only the least significant 8 bits of imm_s10 will be used. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ldi.w, imm_s10 = 0b1111111111))] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ldi_w() -> v4i32 { + static_assert_simm_bits!(IMM_S10, 10); + msa_ldi_w(IMM_S10) +} + +/// Immediate Load +/// +/// The signed immediate imm_s10 is replicated in all vector +/// (two signed 64-bit integer numbers) elements. For byte elements, +/// only the least significant 8 bits of imm_s10 will be used. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ldi.d, imm_s10 = 0b1111111111))] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ldi_d() -> v2i64 { + static_assert_simm_bits!(IMM_S10, 10); + msa_ldi_d(IMM_S10) +} + +/// Vector Fixed-Point Multiply and Add +/// +/// The products of fixed-point elements in `b` (eight signed 16-bit integer numbers) +/// by fixed-point elements in vector `c` (eight signed 16-bit integer numbers) +/// are added to the fixed-point elements in vector `a` (eight signed 16-bit integer numbers). +/// The multiplication result is not saturated, i.e. exact (-1) * (-1) = 1 is added to the destination. +/// The saturated fixed-point results are stored to vector `a`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(madd_q.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_madd_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + msa_madd_q_h(a, mem::transmute(b), c) +} + +/// Vector Fixed-Point Multiply and Add +/// +/// The products of fixed-point elements in `b` (four signed 32-bit integer numbers) +/// by fixed-point elements in vector `c` (four signed 32-bit integer numbers) +/// are added to the fixed-point elements in vector `a` (four signed 32-bit integer numbers). +/// The multiplication result is not saturated, i.e. exact (-1) * (-1) = 1 is added to the destination. +/// The saturated fixed-point results are stored to vector `a`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(madd_q.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_madd_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + msa_madd_q_w(a, mem::transmute(b), c) +} + +/// Vector Fixed-Point Multiply and Add Rounded +/// +/// The products of fixed-point elements in `b` (eight signed 16-bit integer numbers) +/// by fixed-point elements in vector `c` (eight signed 16-bit integer numbers) +/// are added to the fixed-point elements in vector `a` (eight signed 16-bit integer numbers). +/// The multiplication result is not saturated, i.e. exact (-1) * (-1) = 1 is added to the destination. +/// The rounded and saturated fixed-point results are stored to vector `a`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maddr_q.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maddr_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + msa_maddr_q_h(a, mem::transmute(b), c) +} + +/// Vector Fixed-Point Multiply and Add Rounded +/// +/// The products of fixed-point elements in `b` (four signed 32-bit integer numbers) +/// by fixed-point elements in vector `c` (four signed 32-bit integer numbers) +/// are added to the fixed-point elements in vector `a` (four signed 32-bit integer numbers). +/// The multiplication result is not saturated, i.e. exact (-1) * (-1) = 1 is added to the destination. +/// The rounded and saturated fixed-point results are stored to vector `a`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maddr_q.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maddr_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + msa_maddr_q_w(a, mem::transmute(b), c) +} + +/// Vector Multiply and Add +/// +/// The integer elements in vector `b` (sixteen signed 8-bit integer numbers) +/// are multiplied by integer elements in vector `c` (sixteen signed 8-bit integer numbers) +/// and added to the integer elements in vector `a` (sixteen signed 8-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maddv.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maddv_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { + msa_maddv_b(a, mem::transmute(b), c) +} + +/// Vector Multiply and Add +/// +/// The integer elements in vector `b` (eight signed 16-bit integer numbers) +/// are multiplied by integer elements in vector `c` (eight signed 16-bit integer numbers) +/// and added to the integer elements in vector `a` (eight signed 16-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maddv.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maddv_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + msa_maddv_h(a, mem::transmute(b), c) +} + +/// Vector Multiply and Add +/// +/// The integer elements in vector `b` (four signed 32-bit integer numbers) +/// are multiplied by integer elements in vector `c` (four signed 32-bit integer numbers) +/// and added to the integer elements in vector `a` (four signed 32-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maddv.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maddv_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + msa_maddv_w(a, mem::transmute(b), c) +} + +/// Vector Multiply and Add +/// +/// The integer elements in vector `b` (two signed 64-bit integer numbers) +/// are multiplied by integer elements in vector `c` (two signed 64-bit integer numbers) +/// and added to the integer elements in vector `a` (two signed 64-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maddv.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maddv_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { + msa_maddv_d(a, mem::transmute(b), c) +} + +/// Vector Maximum Based on Absolute Values +/// +/// The value with the largest magnitude, i.e. absolute value, between corresponding +/// signed elements in vector `a` (sixteen signed 8-bit integer numbers) and +/// `b` (sixteen signed 8-bit integer numbers) are written to vector +/// (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_a.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_a_b(a: v16i8, b: v16i8) -> v16i8 { + msa_max_a_b(a, mem::transmute(b)) +} + +/// Vector Maximum Based on Absolute Values +/// +/// The value with the largest magnitude, i.e. absolute value, between corresponding +/// signed elements in vector `a` (eight signed 16-bit integer numbers) and +/// `b` (eight signed 16-bit integer numbers) are written to vector +/// (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_a.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_a_h(a: v8i16, b: v8i16) -> v8i16 { + msa_max_a_h(a, mem::transmute(b)) +} + +/// Vector Maximum Based on Absolute Values +/// +/// The value with the largest magnitude, i.e. absolute value, between corresponding +/// signed elements in vector `a` (four signed 32-bit integer numbers) and +/// `b` (four signed 32-bit integer numbers) are written to vector +/// (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_a.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_a_w(a: v4i32, b: v4i32) -> v4i32 { + msa_max_a_w(a, mem::transmute(b)) +} + +/// Vector Maximum Based on Absolute Values +/// +/// The value with the largest magnitude, i.e. absolute value, between corresponding +/// signed elements in vector `a` (two signed 64-bit integer numbers) and +/// `b` (two signed 64-bit integer numbers) are written to vector +/// (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_a.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_a_d(a: v2i64, b: v2i64) -> v2i64 { + msa_max_a_d(a, mem::transmute(b)) +} + +/// Vector Signed Maximum +/// +/// Maximum values between signed elements in vector `a` (sixteen signed 8-bit integer numbers) +/// and signed elements in vector `b` (sixteen signed 8-bit integer numbers) are written to vector +/// (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_max_s_b(a, mem::transmute(b)) +} + +/// Vector Signed Maximum +/// +/// Maximum values between signed elements in vector `a` (eight signed 16-bit integer numbers) +/// and signed elements in vector `b` (eight signed 16-bit integer numbers) are written to vector +/// (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_max_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Maximum +/// +/// Maximum values between signed elements in vector `a` (four signed 32-bit integer numbers) +/// and signed elements in vector `b` (four signed 32-bit integer numbers) are written to vector +/// (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_max_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Maximum +/// +/// Maximum values between signed elements in vector `a` (two signed 64-bit integer numbers) +/// and signed elements in vector `b` (two signed 64-bit integer numbers) are written to vector +/// (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_max_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Maximum +/// +/// Maximum values between unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// and unsigned elements in vector `b` (sixteen unsigned 8-bit integer numbers) are written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_u_b(a: v16u8, b: v16u8) -> v16u8 { + msa_max_u_b(a, mem::transmute(b)) +} + +/// Vector Unsigned Maximum +/// +/// Maximum values between unsigned elements in vector `a` (eight unsigned 16-bit integer numbers) +/// and unsigned elements in vector `b` (eight unsigned 16-bit integer numbers) are written to vector +/// (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_u_h(a: v8u16, b: v8u16) -> v8u16 { + msa_max_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Maximum +/// +/// Maximum values between unsigned elements in vector `a` (four unsigned 32-bit integer numbers) +/// and unsigned elements in vector `b` (four unsigned 32-bit integer numbers) are written to vector +/// (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_u_w(a: v4u32, b: v4u32) -> v4u32 { + msa_max_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Maximum +/// +/// Maximum values between unsigned elements in vector `a` (two unsigned 64-bit integer numbers) +/// and unsigned elements in vector `b` (two unsigned 64-bit integer numbers) are written to vector +/// (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(max_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_max_u_d(a: v2u64, b: v2u64) -> v2u64 { + msa_max_u_d(a, mem::transmute(b)) +} + +/// Immediate Signed Maximum +/// +/// Maximum values between signed elements in vector `a` (sixteen signed 8-bit integer numbers) +/// and the 5-bit signed immediate imm_s5 are written to vector +/// (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maxi_s.b, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maxi_s_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + msa_maxi_s_b(a, IMM_S5) +} + +/// Immediate Signed Maximum +/// +/// Maximum values between signed elements in vector `a` (eight signed 16-bit integer numbers) +/// and the 5-bit signed immediate imm_s5 are written to vector +/// (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maxi_s.h, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maxi_s_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + msa_maxi_s_h(a, IMM_S5) +} + +/// Immediate Signed Maximum +/// +/// Maximum values between signed elements in vector `a` (four signed 32-bit integer numbers) +/// and the 5-bit signed immediate imm_s5 are written to vector +/// (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maxi_s.w, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maxi_s_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + msa_maxi_s_w(a, IMM_S5) +} + +/// Immediate Signed Maximum +/// +/// Maximum values between signed elements in vector `a` (two signed 64-bit integer numbers) +/// and the 5-bit signed immediate imm_s5 are written to vector +/// (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maxi_s.d, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maxi_s_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + msa_maxi_s_d(a, IMM_S5) +} + +/// Immediate Unsigned Maximum +/// +/// Maximum values between unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// and the 5-bit unsigned immediate `imm5` are written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maxi_u.b, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maxi_u_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM5, 5); + msa_maxi_u_b(a, IMM5) +} + +/// Immediate Unsigned Maximum +/// +/// Maximum values between unsigned elements in vector `a` (eight unsigned 16-bit integer numbers) +/// and the 5-bit unsigned immediate `imm5` are written to vector +/// (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maxi_u.h, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maxi_u_h(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM5, 5); + msa_maxi_u_h(a, IMM5) +} + +/// Immediate Unsigned Maximum +/// +/// Maximum values between unsigned elements in vector `a` (four unsigned 32-bit integer numbers) +/// and the 5-bit unsigned immediate `imm5` are written to vector +/// (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maxi_u.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maxi_u_w(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + msa_maxi_u_w(a, IMM5) +} + +/// Immediate Unsigned Maximum +/// +/// Maximum values between unsigned elements in vector `a` (two unsigned 64-bit integer numbers) +/// and the 5-bit unsigned immediate `imm5` are written to vector +/// (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(maxi_u.d, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_maxi_u_d(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM5, 5); + msa_maxi_u_d(a, IMM5) +} + +/// Vector Minimum Based on Absolute Value +/// +/// The value with the smallest magnitude, i.e. absolute value, between corresponding +/// signed elements in vector `a` (sixteen signed 8-bit integer numbers) and +/// `b` (sixteen signed 8-bit integer numbers) are written to vector +/// (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_a.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_a_b(a: v16i8, b: v16i8) -> v16i8 { + msa_min_a_b(a, mem::transmute(b)) +} + +/// Vector Minimum Based on Absolute Value +/// +/// The value with the smallest magnitude, i.e. absolute value, between corresponding +/// signed elements in vector `a` (eight signed 16-bit integer numbers) and +/// `b` (eight signed 16-bit integer numbers) are written to vector +/// (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_a.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_a_h(a: v8i16, b: v8i16) -> v8i16 { + msa_min_a_h(a, mem::transmute(b)) +} + +/// Vector Minimum Based on Absolute Value +/// +/// The value with the smallest magnitude, i.e. absolute value, between corresponding +/// signed elements in vector `a` (four signed 32-bit integer numbers) and +/// `b` (four signed 32-bit integer numbers) are written to vector +/// (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_a.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_a_w(a: v4i32, b: v4i32) -> v4i32 { + msa_min_a_w(a, mem::transmute(b)) +} + +/// Vector Minimum Based on Absolute Value +/// +/// The value with the smallest magnitude, i.e. absolute value, between corresponding +/// signed elements in vector `a` (two signed 64-bit integer numbers) and +/// `b` (two signed 64-bit integer numbers) are written to vector +/// (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_a.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_a_d(a: v2i64, b: v2i64) -> v2i64 { + msa_min_a_d(a, mem::transmute(b)) +} + +/// Vector Signed Minimum +/// +/// Minimum values between signed elements in vector `a` (sixteen signed 8-bit integer numbers) +/// and signed elements in vector `b` (sixteen signed 8-bit integer numbers) are written to vector +/// (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_min_s_b(a, mem::transmute(b)) +} + +/// Vector Signed Minimum +/// +/// Minimum values between signed elements in vector `a` (eight signed 16-bit integer numbers) +/// and signed elements in vector `b` (eight signed 16-bit integer numbers) are written to vector +/// (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_min_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Minimum +/// +/// Minimum values between signed elements in vector `a` (four signed 32-bit integer numbers) +/// and signed elements in vector `b` (four signed 32-bit integer numbers) are written to vector +/// (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_min_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Minimum +/// +/// Minimum values between signed elements in vector `a` (two signed 64-bit integer numbers) +/// and signed elements in vector `b` (two signed 64-bit integer numbers) are written to vector +/// (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_min_s_d(a, mem::transmute(b)) +} + +/// Immediate Signed Minimum +/// +/// Minimum values between signed elements in vector `a` (sixteen signed 8-bit integer numbers) +/// and the 5-bit signed immediate imm_s5 are written to vector +/// (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mini_s.b, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mini_s_b(a: v16i8) -> v16i8 { + static_assert_simm_bits!(IMM_S5, 5); + msa_mini_s_b(a, IMM_S5) +} + +/// Immediate Signed Minimum +/// +/// Minimum values between signed elements in vector `a` (eight signed 16-bit integer numbers) +/// and the 5-bit signed immediate imm_s5 are written to vector +/// (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mini_s.h, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mini_s_h(a: v8i16) -> v8i16 { + static_assert_simm_bits!(IMM_S5, 5); + msa_mini_s_h(a, IMM_S5) +} + +/// Immediate Signed Minimum +/// +/// Minimum values between signed elements in vector `a` (four signed 32-bit integer numbers) +/// and the 5-bit signed immediate imm_s5 are written to vector +/// (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mini_s.w, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mini_s_w(a: v4i32) -> v4i32 { + static_assert_simm_bits!(IMM_S5, 5); + msa_mini_s_w(a, IMM_S5) +} + +/// Immediate Signed Minimum +/// +/// Minimum values between signed elements in vector `a` (two signed 64-bit integer numbers) +/// and the 5-bit signed immediate imm_s5 are written to vector +/// (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mini_s.d, imm_s5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mini_s_d(a: v2i64) -> v2i64 { + static_assert_simm_bits!(IMM_S5, 5); + msa_mini_s_d(a, IMM_S5) +} + +/// Vector Unsigned Minimum +/// +/// Minimum values between unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// and unsigned elements in vector `b` (sixteen unsigned 8-bit integer numbers) are written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_u_b(a: v16u8, b: v16u8) -> v16u8 { + msa_min_u_b(a, mem::transmute(b)) +} + +/// Vector Unsigned Minimum +/// +/// Minimum values between unsigned elements in vector `a` (eight unsigned 16-bit integer numbers) +/// and unsigned elements in vector `b` (eight unsigned 16-bit integer numbers) are written to vector +/// (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_u_h(a: v8u16, b: v8u16) -> v8u16 { + msa_min_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Minimum +/// +/// Minimum values between unsigned elements in vector `a` (four unsigned 32-bit integer numbers) +/// and unsigned elements in vector `b` (four unsigned 32-bit integer numbers) are written to vector +/// (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_u_w(a: v4u32, b: v4u32) -> v4u32 { + msa_min_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Minimum +/// +/// Minimum values between unsigned elements in vector `a` (two unsigned 64-bit integer numbers) +/// and unsigned elements in vector `b` (two unsigned 64-bit integer numbers) are written to vector +/// (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(min_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_min_u_d(a: v2u64, b: v2u64) -> v2u64 { + msa_min_u_d(a, mem::transmute(b)) +} + +/// Immediate Unsigned Minimum +/// +/// Minimum values between unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// and the 5-bit unsigned immediate `imm5` are written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mini_u.b, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mini_u_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM5, 5); + msa_mini_u_b(a, IMM5) +} + +/// Immediate Unsigned Minimum +/// +/// Minimum values between unsigned elements in vector `a` (eight unsigned 16-bit integer numbers) +/// and the 5-bit unsigned immediate `imm5` are written to vector +/// (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mini_u.h, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mini_u_h(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM5, 5); + msa_mini_u_h(a, IMM5) +} + +/// Immediate Unsigned Minimum +/// +/// Minimum values between unsigned elements in vector `a` (four unsigned 32-bit integer numbers) +/// and the 5-bit unsigned immediate `imm5` are written to vector +/// (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mini_u.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mini_u_w(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + msa_mini_u_w(a, IMM5) +} + +/// Immediate Unsigned Minimum +/// +/// Minimum values between unsigned elements in vector `a` (two unsigned 64-bit integer numbers) +/// and the 5-bit unsigned immediate `imm5` are written to vector +/// (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mini_u.d, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mini_u_d(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM5, 5); + msa_mini_u_d(a, IMM5) +} + +/// Vector Signed Modulo +/// +/// The signed integer elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are divided by signed integer elements in vector `b` (sixteen signed 8-bit integer numbers). +/// The remainder of the same sign as the dividend is written to vector +/// (sixteen signed 8-bit integer numbers). If a divisor element vector `b` is zero, +/// the result value is UNPREDICTABLE. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mod_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mod_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_mod_s_b(a, mem::transmute(b)) +} + +/// Vector Signed Modulo +/// +/// The signed integer elements in vector `a` (eight signed 16-bit integer numbers) +/// are divided by signed integer elements in vector `b` (eight signed 16-bit integer numbers). +/// The remainder of the same sign as the dividend is written to vector +/// (eight signed 16-bit integer numbers). If a divisor element vector `b` is zero, +/// the result value is UNPREDICTABLE. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mod_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mod_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_mod_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Modulo +/// +/// The signed integer elements in vector `a` (four signed 32-bit integer numbers) +/// are divided by signed integer elements in vector `b` (four signed 32-bit integer numbers). +/// The remainder of the same sign as the dividend is written to vector +/// (four signed 32-bit integer numbers). If a divisor element vector `b` is zero, +/// the result value is UNPREDICTABLE. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mod_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mod_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_mod_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Modulo +/// +/// The signed integer elements in vector `a` (two signed 64-bit integer numbers) +/// are divided by signed integer elements in vector `b` (two signed 64-bit integer numbers). +/// The remainder of the same sign as the dividend is written to vector +/// (two signed 64-bit integer numbers). If a divisor element vector `b` is zero, +/// the result value is UNPREDICTABLE. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mod_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mod_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_mod_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Modulo +/// +/// The unsigned integer elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// are divided by unsigned integer elements in vector `b` (sixteen unsigned 8-bit integer numbers). +/// The remainder of the same sign as the dividend is written to vector +/// (sixteen unsigned 8-bit integer numbers). If a divisor element vector `b` is zero, +/// the result value is UNPREDICTABLE. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mod_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mod_u_b(a: v16u8, b: v16u8) -> v16u8 { + msa_mod_u_b(a, mem::transmute(b)) +} + +/// Vector Unsigned Modulo +/// +/// The unsigned integer elements in vector `a` (eight unsigned 16-bit integer numbers) +/// are divided by unsigned integer elements in vector `b` (eight unsigned 16-bit integer numbers). +/// The remainder of the same sign as the dividend is written to vector +/// (eight unsigned 16-bit integer numbers). If a divisor element vector `b` is zero, +/// the result value is UNPREDICTABLE. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mod_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mod_u_h(a: v8u16, b: v8u16) -> v8u16 { + msa_mod_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Modulo +/// +/// The unsigned integer elements in vector `a` (four unsigned 32-bit integer numbers) +/// are divided by unsigned integer elements in vector `b` (four unsigned 32-bit integer numbers). +/// The remainder of the same sign as the dividend is written to vector +/// (four unsigned 32-bit integer numbers). If a divisor element vector `b` is zero, +/// the result value is UNPREDICTABLE. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mod_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mod_u_w(a: v4u32, b: v4u32) -> v4u32 { + msa_mod_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Modulo +/// +/// The unsigned integer elements in vector `a` (two unsigned 64-bit integer numbers) +/// are divided by unsigned integer elements in vector `b` (two unsigned 64-bit integer numbers). +/// The remainder of the same sign as the dividend is written to vector +/// (two unsigned 64-bit integer numbers). If a divisor element vector `b` is zero, +/// the result value is UNPREDICTABLE. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mod_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mod_u_d(a: v2u64, b: v2u64) -> v2u64 { + msa_mod_u_d(a, mem::transmute(b)) +} + +/// Vector Move +/// +/// Copy all WRLEN bits in vector `a` (eight signed 16-bit integer numbers) +/// to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(move.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_move_v(a: v16i8) -> v16i8 { + msa_move_v(a) +} + +/// Vector Fixed-Point Multiply and Subtract +/// +/// The product of fixed-point elements in vector `c` (eight signed 16-bit integer numbers) +/// by fixed-point elements in vector `b` (eight signed 16-bit integer numbers) +/// are subtracted from the fixed-point elements in vector `a` +/// (eight signed 16-bit integer numbers). The multiplication result is not saturated, +/// i.e. exact (-1) * (-1) = 1 is subtracted from the destination. +/// The saturated fixed-point results are stored back to vector `a`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(msub_q.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_msub_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + msa_msub_q_h(a, mem::transmute(b), c) +} + +/// Vector Fixed-Point Multiply and Subtract +/// +/// The product of fixed-point elements in vector `c` (four signed 32-bit integer numbers) +/// by fixed-point elements in vector `b` (four signed 32-bit integer numbers) +/// are subtracted from the fixed-point elements in vector `a` +/// (four signed 32-bit integer numbers). The multiplication result is not saturated, +/// i.e. exact (-1) * (-1) = 1 is subtracted from the destination. +/// The saturated fixed-point results are stored back to vector `a`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(msub_q.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_msub_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + msa_msub_q_w(a, mem::transmute(b), c) +} + +/// Vector Fixed-Point Multiply and Subtract Rounded +/// +/// The product of fixed-point elements in vector `c` (eight signed 16-bit integer numbers) +/// by fixed-point elements in vector `b` (eight signed 16-bit integer numbers) +/// are subtracted from the fixed-point elements in vector `a` +/// (eight signed 16-bit integer numbers). The multiplication result is not saturated, +/// i.e. exact (-1) * (-1) = 1 is subtracted from the destination. +/// The rounded and saturated fixed-point results are stored back to vector `a`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(msubr_q.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_msubr_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + msa_msubr_q_h(a, mem::transmute(b), c) +} + +/// Vector Fixed-Point Multiply and Subtract Rounded +/// +/// The product of fixed-point elements in vector `c` (four signed 32-bit integer numbers) +/// by fixed-point elements in vector `b` (four signed 32-bit integer numbers) +/// are subtracted from the fixed-point elements in vector `a` +/// (four signed 32-bit integer numbers). The multiplication result is not saturated, +/// i.e. exact (-1) * (-1) = 1 is subtracted from the destination. +/// The rounded and saturated fixed-point results are stored back to vector `a`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(msubr_q.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_msubr_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + msa_msubr_q_w(a, mem::transmute(b), c) +} + +/// Vector Multiply and Subtract +/// +/// The integer elements in vector `c` (sixteen signed 8-bit integer numbers) +/// are multiplied by integer elements in vector `b` (sixteen signed 8-bit integer numbers) +/// and subtracted from the integer elements in vector `a` (sixteen signed 8-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(msubv.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_msubv_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { + msa_msubv_b(a, mem::transmute(b), c) +} + +/// Vector Multiply and Subtract +/// +/// The integer elements in vector `c` (eight signed 16-bit integer numbers) +/// are multiplied by integer elements in vector `b` (eight signed 16-bit integer numbers) +/// and subtracted from the integer elements in vector `a` (eight signed 16-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(msubv.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_msubv_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + msa_msubv_h(a, mem::transmute(b), c) +} + +/// Vector Multiply and Subtract +/// +/// The integer elements in vector `c` (four signed 32-bit integer numbers) +/// are multiplied by integer elements in vector `b` (four signed 32-bit integer numbers) +/// and subtracted from the integer elements in vector `a` (four signed 32-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(msubv.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_msubv_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + msa_msubv_w(a, mem::transmute(b), c) +} + +/// Vector Multiply and Subtract +/// +/// The integer elements in vector `c` (two signed 64-bit integer numbers) +/// are multiplied by integer elements in vector `b` (two signed 64-bit integer numbers) +/// and subtracted from the integer elements in vector `a` (two signed 64-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(msubv.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_msubv_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { + msa_msubv_d(a, mem::transmute(b), c) +} + +/// Vector Fixed-Point Multiply +/// +/// The fixed-point elements in vector `a` (eight signed 16-bit integer numbers) +/// multiplied by fixed-point elements in vector `b` (eight signed 16-bit integer numbers). +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mul_q.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mul_q_h(a: v8i16, b: v8i16) -> v8i16 { + msa_mul_q_h(a, mem::transmute(b)) +} + +/// Vector Fixed-Point Multiply +/// +/// The fixed-point elements in vector `a` (four signed 32-bit integer numbers) +/// multiplied by fixed-point elements in vector `b` (four signed 32-bit integer numbers). +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mul_q.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mul_q_w(a: v4i32, b: v4i32) -> v4i32 { + msa_mul_q_w(a, mem::transmute(b)) +} + +/// Vector Fixed-Point Multiply Rounded +/// +/// The fixed-point elements in vector `a` (eight signed 16-bit integer numbers) +/// multiplied by fixed-point elements in vector `b` (eight signed 16-bit integer numbers). +/// The rounded result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mulr_q.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mulr_q_h(a: v8i16, b: v8i16) -> v8i16 { + msa_mulr_q_h(a, mem::transmute(b)) +} + +/// Vector Fixed-Point Multiply Rounded +/// +/// The fixed-point elements in vector `a` (four signed 32-bit integer numbers) +/// multiplied by fixed-point elements in vector `b` (four signed 32-bit integer numbers). +/// The rounded result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mulr_q.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mulr_q_w(a: v4i32, b: v4i32) -> v4i32 { + msa_mulr_q_w(a, mem::transmute(b)) +} + +/// Vector Multiply +/// +/// The integer elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are multiplied by integer elements in vector `b` (sixteen signed 8-bit integer numbers). +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mulv.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mulv_b(a: v16i8, b: v16i8) -> v16i8 { + msa_mulv_b(a, mem::transmute(b)) +} + +/// Vector Multiply +/// +/// The integer elements in vector `a` (eight signed 16-bit integer numbers) +/// are multiplied by integer elements in vector `b` (eight signed 16-bit integer numbers). +/// The result is written to vector (eight signed 16-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mulv.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mulv_h(a: v8i16, b: v8i16) -> v8i16 { + msa_mulv_h(a, mem::transmute(b)) +} + +/// Vector Multiply +/// +/// The integer elements in vector `a` (four signed 32-bit integer numbers) +/// are multiplied by integer elements in vector `b` (four signed 32-bit integer numbers). +/// The result is written to vector (four signed 32-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mulv.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mulv_w(a: v4i32, b: v4i32) -> v4i32 { + msa_mulv_w(a, mem::transmute(b)) +} + +/// Vector Multiply +/// +/// The integer elements in vector `a` (two signed 64-bit integer numbers) +/// are multiplied by integer elements in vector `b` (two signed 64-bit integer numbers). +/// The result is written to vector (two signed 64-bit integer numbers). +/// The most significant half of the multiplication result is discarded. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(mulv.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_mulv_d(a: v2i64, b: v2i64) -> v2i64 { + msa_mulv_d(a, mem::transmute(b)) +} + +/// Vector Leading Ones Count +/// +/// The number of leading ones for elements in vector `a` (sixteen signed 8-bit integer numbers) +/// is stored to the elements in vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nloc.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nloc_b(a: v16i8) -> v16i8 { + msa_nloc_b(a) +} + +/// Vector Leading Ones Count +/// +/// The number of leading ones for elements in vector `a` (eight signed 16-bit integer numbers) +/// is stored to the elements in vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nloc.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nloc_h(a: v8i16) -> v8i16 { + msa_nloc_h(a) +} + +/// Vector Leading Ones Count +/// +/// The number of leading ones for elements in vector `a` (four signed 32-bit integer numbers) +/// is stored to the elements in vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nloc.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nloc_w(a: v4i32) -> v4i32 { + msa_nloc_w(a) +} + +/// Vector Leading Ones Count +/// +/// The number of leading ones for elements in vector `a` (two signed 64-bit integer numbers) +/// is stored to the elements in vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nloc.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nloc_d(a: v2i64) -> v2i64 { + msa_nloc_d(a) +} + +/// Vector Leading Zeros Count +/// +/// The number of leading zeros for elements in vector `a` (sixteen signed 8-bit integer numbers) +/// is stored to the elements in vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nlzc.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nlzc_b(a: v16i8) -> v16i8 { + msa_nlzc_b(a) +} + +/// Vector Leading Zeros Count +/// +/// The number of leading zeros for elements in vector `a` (eight signed 16-bit integer numbers) +/// is stored to the elements in vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nlzc.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nlzc_h(a: v8i16) -> v8i16 { + msa_nlzc_h(a) +} + +/// Vector Leading Zeros Count +/// +/// The number of leading zeros for elements in vector `a` (four signed 32-bit integer numbers) +/// is stored to the elements in vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nlzc.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nlzc_w(a: v4i32) -> v4i32 { + msa_nlzc_w(a) +} + +/// Vector Leading Zeros Count +/// +/// The number of leading zeros for elements in vector `a` (two signed 64-bit integer numbers) +/// is stored to the elements in vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nlzc.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nlzc_d(a: v2i64) -> v2i64 { + msa_nlzc_d(a) +} + +/// Vector Logical Negated Or +/// +/// Each bit of vector `a` (sixteen unsigned 8-bit integer numbers) +/// is combined with the corresponding bit of vector `b` (sixteen unsigned 8-bit integer numbers) +/// in a bitwise logical NOR operation. The result is written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nor.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nor_v(a: v16u8, b: v16u8) -> v16u8 { + msa_nor_v(a, mem::transmute(b)) +} + +/// Immediate Logical Negated Or +/// +/// Each bit of vector `a` (sixteen unsigned 8-bit integer numbers) +/// is combined with the 8-bit immediate `imm8` +/// in a bitwise logical NOR operation. The result is written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(nori.b, imm8 = 0b11111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_nori_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + msa_nori_b(a, IMM8) +} + +/// Vector Logical Or +/// +/// Each bit of vector `a` (sixteen unsigned 8-bit integer numbers) +/// is combined with the corresponding bit of vector `b` (sixteen unsigned 8-bit integer numbers) +/// in a bitwise logical OR operation. The result is written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(or.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_or_v(a: v16u8, b: v16u8) -> v16u8 { + msa_or_v(a, mem::transmute(b)) +} + +/// Immediate Logical Or +/// +/// Each bit of vector `a` (sixteen unsigned 8-bit integer numbers) +/// is combined with the 8-bit immediate `imm8` +/// in a bitwise logical OR operation. The result is written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(ori.b, imm8 = 0b11111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_ori_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + msa_ori_b(a, IMM8) +} + +/// Vector Pack Even +/// +/// Even elements in vectors `a` (sixteen signed 8-bit integer numbers) +/// are copied to the left half of the result vector and even elements in vector `b` +/// (sixteen signed 8-bit integer numbers) are copied to the right half of the result vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pckev.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pckev_b(a: v16i8, b: v16i8) -> v16i8 { + msa_pckev_b(a, mem::transmute(b)) +} + +/// Vector Pack Even +/// +/// Even elements in vectors `a` (eight signed 16-bit integer numbers) +/// are copied to the left half of the result vector and even elements in vector `b` +/// (eight signed 16-bit integer numbers) are copied to the right half of the result vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pckev.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pckev_h(a: v8i16, b: v8i16) -> v8i16 { + msa_pckev_h(a, mem::transmute(b)) +} + +/// Vector Pack Even +/// +/// Even elements in vectors `a` (four signed 32-bit integer numbers) +/// are copied to the left half of the result vector and even elements in vector `b` +/// (four signed 32-bit integer numbers) are copied to the right half of the result vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pckev.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pckev_w(a: v4i32, b: v4i32) -> v4i32 { + msa_pckev_w(a, mem::transmute(b)) +} + +/// Vector Pack Even +/// +/// Even elements in vectors `a` (two signed 64-bit integer numbers) +/// are copied to the left half of the result vector and even elements in vector `b` +/// (two signed 64-bit integer numbers) are copied to the right half of the result vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pckev.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pckev_d(a: v2i64, b: v2i64) -> v2i64 { + msa_pckev_d(a, mem::transmute(b)) +} + +/// Vector Pack Odd +/// +/// Odd elements in vectors `a` (sixteen signed 8-bit integer numbers) +/// are copied to the left half of the result vector and odd elements in vector `b` +/// (sixteen signed 8-bit integer numbers) are copied to the right half of the result vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pckod.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pckod_b(a: v16i8, b: v16i8) -> v16i8 { + msa_pckod_b(a, mem::transmute(b)) +} + +/// Vector Pack Odd +/// +/// Odd elements in vectors `a` (eight signed 16-bit integer numbers) +/// are copied to the left half of the result vector and odd elements in vector `b` +/// (eight signed 16-bit integer numbers) are copied to the right half of the result vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pckod.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pckod_h(a: v8i16, b: v8i16) -> v8i16 { + msa_pckod_h(a, mem::transmute(b)) +} + +/// Vector Pack Odd +/// +/// Odd elements in vectors `a` (four signed 32-bit integer numbers) +/// are copied to the left half of the result vector and odd elements in vector `b` +/// (four signed 32-bit integer numbers) are copied to the right half of the result vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pckod.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pckod_w(a: v4i32, b: v4i32) -> v4i32 { + msa_pckod_w(a, mem::transmute(b)) +} + +/// Vector Pack Odd +/// +/// Odd elements in vectors `a` (two signed 64-bit integer numbers) +/// are copied to the left half of the result vector and odd elements in vector `b` +/// (two signed 64-bit integer numbers) are copied to the right half of the result vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pckod.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pckod_d(a: v2i64, b: v2i64) -> v2i64 { + msa_pckod_d(a, mem::transmute(b)) +} + +/// Vector Population Count +/// +/// The number of bits set to 1 for elements in vector `a` (sixteen signed 8-bit integer numbers) +/// is stored to the elements in the result vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pcnt.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pcnt_b(a: v16i8) -> v16i8 { + msa_pcnt_b(a) +} + +/// Vector Population Count +/// +/// The number of bits set to 1 for elements in vector `a` (eight signed 16-bit integer numbers) +/// is stored to the elements in the result vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pcnt.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pcnt_h(a: v8i16) -> v8i16 { + msa_pcnt_h(a) +} + +/// Vector Population Count +/// +/// The number of bits set to 1 for elements in vector `a` (four signed 32-bit integer numbers) +/// is stored to the elements in the result vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pcnt.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pcnt_w(a: v4i32) -> v4i32 { + msa_pcnt_w(a) +} + +/// Vector Population Count +/// +/// The number of bits set to 1 for elements in vector `a` (two signed 64-bit integer numbers) +/// is stored to the elements in the result vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(pcnt.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_pcnt_d(a: v2i64) -> v2i64 { + msa_pcnt_d(a) +} + +/// Immediate Signed Saturate +/// +/// Signed elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are saturated to signed values of `imm3+1` bits without changing the data width. +/// The result is stored in the vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sat_s.b, imm4 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sat_s_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + msa_sat_s_b(a, IMM3) +} + +/// Immediate Signed Saturate +/// +/// Signed elements in vector `a` (eight signed 16-bit integer numbers) +/// are saturated to signed values of `imm4+1` bits without changing the data width. +/// The result is stored in the vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sat_s.h, imm3 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sat_s_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + msa_sat_s_h(a, IMM4) +} + +/// Immediate Signed Saturate +/// +/// Signed elements in vector `a` (four signed 32-bit integer numbers) +/// are saturated to signed values of `imm5+1` bits without changing the data width. +/// The result is stored in the vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sat_s.w, imm2 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sat_s_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + msa_sat_s_w(a, IMM5) +} + +/// Immediate Signed Saturate +/// +/// Signed elements in vector `a` (two signed 64-bit integer numbers) +/// are saturated to signed values of `imm6+1` bits without changing the data width. +/// The result is stored in the vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sat_s.d, imm1 = 0b111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sat_s_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + msa_sat_s_d(a, IMM6) +} + +/// Immediate Unsigned Saturate +/// +/// Unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers) +/// are saturated to unsigned values of `imm3+1` bits without changing the data width. +/// The result is stored in the vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sat_u.b, imm4 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sat_u_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM3, 3); + msa_sat_u_b(a, IMM3) +} + +/// Immediate Unsigned Saturate +/// +/// Unsigned elements in vector `a` (eight unsigned 16-bit integer numbers) +/// are saturated to unsigned values of `imm4+1` bits without changing the data width. +/// The result is stored in the vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sat_u.h, imm3 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sat_u_h(a: v8u16) -> v8u16 { + static_assert_uimm_bits!(IMM4, 4); + msa_sat_u_h(a, IMM4) +} + +/// Immediate Unsigned Saturate +/// +/// Unsigned elements in vector `a` (four unsigned 32-bit integer numbers) +/// are saturated to unsigned values of `imm5+1` bits without changing the data width. +/// The result is stored in the vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sat_u.w, imm2 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sat_u_w(a: v4u32) -> v4u32 { + static_assert_uimm_bits!(IMM5, 5); + msa_sat_u_w(a, IMM5) +} + +/// Immediate Unsigned Saturate +/// +/// Unsigned elements in vector `a` (two unsigned 64-bit integer numbers) +/// are saturated to unsigned values of `imm6+1` bits without changing the data width. +/// The result is stored in the vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sat_u.d, imm1 = 0b111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sat_u_d(a: v2u64) -> v2u64 { + static_assert_uimm_bits!(IMM6, 6); + msa_sat_u_d(a, IMM6) +} + +/// Immediate Set Shuffle Elements +/// +/// The set shuffle instruction works on 4-element sets. +/// All sets are shuffled in the same way: the element i82i+1..2i in `a` +/// (sixteen signed 8-bit integer numbers) is copied over the element i in result vector +/// (sixteen signed 8-bit integer numbers), where i is 0, 1, 2, 3. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(shf.b, imm8 = 0b11111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_shf_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM8, 8); + msa_shf_b(a, IMM8) +} + +/// Immediate Set Shuffle Elements +/// +/// The set shuffle instruction works on 4-element sets. +/// All sets are shuffled in the same way: the element i82i+1..2i in `a` +/// (eight signed 16-bit integer numbers) is copied over the element i in result vector +/// (eight signed 16-bit integer numbers), where i is 0, 1, 2, 3. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(shf.h, imm8 = 0b11111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_shf_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM8, 8); + msa_shf_h(a, IMM8) +} + +/// Immediate Set Shuffle Elements +/// +/// The set shuffle instruction works on 4-element sets. +/// All sets are shuffled in the same way: the element i82i+1..2i in `a` +/// (four signed 32-bit integer numbers) is copied over the element i in result vector +/// (four signed 32-bit integer numbers), where i is 0, 1, 2, 3. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(shf.w, imm8 = 0b11111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_shf_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM8, 8); + msa_shf_w(a, IMM8) +} + +/// GPR Columns Slide +/// +/// Vector registers `a` (sixteen signed 8-bit integer numbers) and `b` +/// (sixteen signed 8-bit integer numbers) contain 2-dimensional byte arrays (rectangles) +/// stored row-wise with as many rows as bytes in integer data format df. +/// The two source rectangles `b` and `a` are concatenated horizontally in the order +/// they appear in the syntax, i.e. first `a` and then `b`. Place a new destination +/// rectangle over `b` and then slide it to the left over the concatenation of `a` and `b` +/// by the number of columns given in GPR `c`. +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// GPR `c` value is interpreted modulo the number of columns in destination rectangle, +/// or equivalently, the number of data format df elements in the destination vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sld.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sld_b(a: v16i8, b: v16i8, c: i32) -> v16i8 { + msa_sld_b(a, mem::transmute(b), c) +} + +/// GPR Columns Slide +/// +/// Vector registers `a` (eight signed 16-bit integer numbers) and `b` +/// (eight signed 16-bit integer numbers) contain 2-dimensional byte arrays (rectangles) +/// stored row-wise with as many rows as bytes in integer data format df. +/// The two source rectangles `b` and `a` are concatenated horizontally in the order +/// they appear in the syntax, i.e. first `a` and then `b`. Place a new destination +/// rectangle over `b` and then slide it to the left over the concatenation of `a` and `b` +/// by the number of columns given in GPR `c`. +/// The result is written to vector (eight signed 16-bit integer numbers). +/// GPR `c` value is interpreted modulo the number of columns in destination rectangle, +/// or equivalently, the number of data format df elements in the destination vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sld.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sld_h(a: v8i16, b: v8i16, c: i32) -> v8i16 { + msa_sld_h(a, mem::transmute(b), c) +} + +/// GPR Columns Slide +/// +/// Vector registers `a` (four signed 32-bit integer numbers) and `b` +/// (four signed 32-bit integer numbers) contain 2-dimensional byte arrays (rectangles) +/// stored row-wise with as many rows as bytes in integer data format df. +/// The two source rectangles `b` and `a` are concatenated horizontally in the order +/// they appear in the syntax, i.e. first `a` and then `b`. Place a new destination +/// rectangle over `b` and then slide it to the left over the concatenation of `a` and `b` +/// by the number of columns given in GPR `c`. +/// The result is written to vector (four signed 32-bit integer numbers). +/// GPR `c` value is interpreted modulo the number of columns in destination rectangle, +/// or equivalently, the number of data format df elements in the destination vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sld.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sld_w(a: v4i32, b: v4i32, c: i32) -> v4i32 { + msa_sld_w(a, mem::transmute(b), c) +} + +/// GPR Columns Slide +/// +/// Vector registers `a` (two signed 64-bit integer numbers) and `b` +/// (two signed 64-bit integer numbers) contain 2-dimensional byte arrays (rectangles) +/// stored row-wise with as many rows as bytes in integer data format df. +/// The two source rectangles `b` and `a` are concatenated horizontally in the order +/// they appear in the syntax, i.e. first `a` and then `b`. Place a new destination +/// rectangle over `b` and then slide it to the left over the concatenation of `a` and `b` +/// by the number of columns given in GPR `c`. +/// The result is written to vector (two signed 64-bit integer numbers). +/// GPR `c` value is interpreted modulo the number of columns in destination rectangle, +/// or equivalently, the number of data format df elements in the destination vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sld.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sld_d(a: v2i64, b: v2i64, c: i32) -> v2i64 { + msa_sld_d(a, mem::transmute(b), c) +} + +/// Immediate Columns Slide +/// +/// Vector registers `a` (sixteen signed 8-bit integer numbers) and `b` +/// (sixteen signed 8-bit integer numbers) contain 2-dimensional byte arrays (rectangles) +/// stored row-wise with as many rows as bytes in integer data format df. +/// The two source rectangles `b` and `a` are concatenated horizontally in the order +/// they appear in the syntax, i.e. first `a` and then `b`. Place a new destination +/// rectangle over `b` and then slide it to the left over the concatenation of `a` and `b` +/// by `imm1` columns. +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sldi.b, imm4 = 0b1111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sldi_b(a: v16i8, b: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + msa_sldi_b(a, mem::transmute(b), IMM4) +} + +/// Immediate Columns Slide +/// +/// Vector registers `a` (eight signed 16-bit integer numbers) and `b` +/// (eight signed 16-bit integer numbers) contain 2-dimensional byte arrays (rectangles) +/// stored row-wise with as many rows as bytes in integer data format df. +/// The two source rectangles `b` and `a` are concatenated horizontally in the order +/// they appear in the syntax, i.e. first `a` and then `b`. Place a new destination +/// rectangle over `b` and then slide it to the left over the concatenation of `a` and `b` +/// by `imm1` columns. +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sldi.h, imm3 = 0b111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sldi_h(a: v8i16, b: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM3, 3); + msa_sldi_h(a, mem::transmute(b), IMM3) +} + +/// Immediate Columns Slide +/// +/// Vector registers `a` (four signed 32-bit integer numbers) and `b` +/// (four signed 32-bit integer numbers) contain 2-dimensional byte arrays (rectangles) +/// stored row-wise with as many rows as bytes in integer data format df. +/// The two source rectangles `b` and `a` are concatenated horizontally in the order +/// they appear in the syntax, i.e. first `a` and then `b`. Place a new destination +/// rectangle over `b` and then slide it to the left over the concatenation of `a` and `b` +/// by `imm1` columns. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sldi.w, imm2 = 0b11))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sldi_w(a: v4i32, b: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM2, 2); + msa_sldi_w(a, mem::transmute(b), IMM2) +} + +/// Immediate Columns Slide +/// +/// Vector registers `a` (two signed 64-bit integer numbers) and `b` +/// (two signed 64-bit integer numbers) contain 2-dimensional byte arrays (rectangles) +/// stored row-wise with as many rows as bytes in integer data format df. +/// The two source rectangles `b` and `a` are concatenated horizontally in the order +/// they appear in the syntax, i.e. first `a` and then `b`. Place a new destination +/// rectangle over `b` and then slide it to the left over the concatenation of `a` and `b` +/// by `imm1` columns. +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sldi.d, imm1 = 0b1))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sldi_d(a: v2i64, b: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM1, 1); + msa_sldi_d(a, mem::transmute(b), IMM1) +} + +/// Vector Shift Left +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted left by the number of bits the elements in vector `b` +/// (sixteen signed 8-bit integer numbers) specify modulo the size of the +/// element in bits. The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sll.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sll_b(a: v16i8, b: v16i8) -> v16i8 { + msa_sll_b(a, mem::transmute(b)) +} + +/// Vector Shift Left +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted left by the number of bits the elements in vector `b` +/// (eight signed 16-bit integer numbers) specify modulo the size of the +/// element in bits. The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sll.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sll_h(a: v8i16, b: v8i16) -> v8i16 { + msa_sll_h(a, mem::transmute(b)) +} + +/// Vector Shift Left +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted left by the number of bits the elements in vector `b` +/// (four signed 32-bit integer numbers) specify modulo the size of the +/// element in bits. The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sll.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sll_w(a: v4i32, b: v4i32) -> v4i32 { + msa_sll_w(a, mem::transmute(b)) +} + +/// Vector Shift Left +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted left by the number of bits the elements in vector `b` +/// (two signed 64-bit integer numbers) specify modulo the size of the +/// element in bits. The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sll.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sll_d(a: v2i64, b: v2i64) -> v2i64 { + msa_sll_d(a, mem::transmute(b)) +} + +/// Immediate Shift Left +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted left by `imm4` bits. +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(slli.b, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_slli_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + msa_slli_b(a, IMM4) +} + +/// Immediate Shift Left +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted left by `imm3` bits. +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(slli.h, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_slli_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM3, 3); + msa_slli_h(a, IMM3) +} + +/// Immediate Shift Left +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted left by `imm2` bits. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(slli.w, imm2 = 0b11))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_slli_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM2, 2); + msa_slli_w(a, IMM2) +} + +/// Immediate Shift Left +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted left by `imm1` bits. +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(slli.d, imm1 = 0b1))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_slli_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM1, 1); + msa_slli_d(a, IMM1) +} + +/// GPR Element Splat +/// +/// Replicate vector `a` (sixteen signed 8-bit integer numbers) +/// element with index given by GPR `b` to all elements in vector +/// (sixteen signed 8-bit integer numbers) GPR `b` value is interpreted +/// modulo the number of data format df elements in the destination vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(splat.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_splat_b(a: v16i8, b: i32) -> v16i8 { + msa_splat_b(a, mem::transmute(b)) +} + +/// GPR Element Splat +/// +/// Replicate vector `a` (eight signed 16-bit integer numbers) +/// element with index given by GPR `b` to all elements in vector +/// (eight signed 16-bit integer numbers) GPR `b` value is interpreted +/// modulo the number of data format df elements in the destination vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(splat.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_splat_h(a: v8i16, b: i32) -> v8i16 { + msa_splat_h(a, mem::transmute(b)) +} + +/// GPR Element Splat +/// +/// Replicate vector `a` (four signed 32-bit integer numbers) +/// element with index given by GPR `b` to all elements in vector +/// (four signed 32-bit integer numbers) GPR `b` value is interpreted +/// modulo the number of data format df elements in the destination vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(splat.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_splat_w(a: v4i32, b: i32) -> v4i32 { + msa_splat_w(a, mem::transmute(b)) +} + +/// GPR Element Splat +/// +/// Replicate vector `a` (two signed 64-bit integer numbers) +/// element with index given by GPR `b` to all elements in vector +/// (two signed 64-bit integer numbers) GPR `b` value is interpreted +/// modulo the number of data format df elements in the destination vector. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(splat.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_splat_d(a: v2i64, b: i32) -> v2i64 { + msa_splat_d(a, mem::transmute(b)) +} + +/// Immediate Element Splat +/// +/// Replicate element `imm4` in vector `a` (sixteen signed 8-bit integer numbers) +/// to all elements in vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(splati.b, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_splati_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + msa_splati_b(a, IMM4) +} + +/// Immediate Element Splat +/// +/// Replicate element `imm3` in vector `a` (eight signed 16-bit integer numbers) +/// to all elements in vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(splati.h, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_splati_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM3, 3); + msa_splati_h(a, IMM3) +} + +/// Immediate Element Splat +/// +/// Replicate element `imm2` in vector `a` (four signed 32-bit integer numbers) +/// to all elements in vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(splati.w, imm2 = 0b11))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_splati_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM2, 2); + msa_splati_w(a, IMM2) +} + +/// Immediate Element Splat +/// +/// Replicate element `imm1` in vector `a` (two signed 64-bit integer numbers) +/// to all elements in vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(splati.d, imm1 = 0b1))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_splati_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM1, 1); + msa_splati_d(a, IMM1) +} + +/// Vector Shift Right Arithmetic +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted right arithmetic by the number of bits the elements in vector `b` +/// (sixteen signed 8-bit integer numbers) specify modulo the size of the +/// element in bits.The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sra.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sra_b(a: v16i8, b: v16i8) -> v16i8 { + msa_sra_b(a, mem::transmute(b)) +} + +/// Vector Shift Right Arithmetic +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted right arithmetic by the number of bits the elements in vector `b` +/// (eight signed 16-bit integer numbers) specify modulo the size of the +/// element in bits.The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sra.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sra_h(a: v8i16, b: v8i16) -> v8i16 { + msa_sra_h(a, mem::transmute(b)) +} + +/// Vector Shift Right Arithmetic +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted right arithmetic by the number of bits the elements in vector `b` +/// (four signed 32-bit integer numbers) specify modulo the size of the +/// element in bits.The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sra.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sra_w(a: v4i32, b: v4i32) -> v4i32 { + msa_sra_w(a, mem::transmute(b)) +} + +/// Vector Shift Right Arithmetic +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted right arithmetic by the number of bits the elements in vector `b` +/// (two signed 64-bit integer numbers) specify modulo the size of the +/// element in bits.The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(sra.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_sra_d(a: v2i64, b: v2i64) -> v2i64 { + msa_sra_d(a, mem::transmute(b)) +} + +/// Immediate Shift Right Arithmetic +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted right arithmetic by `imm3` bits. +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srai.b, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srai_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + msa_srai_b(a, IMM3) +} + +/// Immediate Shift Right Arithmetic +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted right arithmetic by `imm4` bits. +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srai.h, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srai_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + msa_srai_h(a, IMM4) +} + +/// Immediate Shift Right Arithmetic +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted right arithmetic by `imm5` bits. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srai.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srai_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + msa_srai_w(a, IMM5) +} + +/// Immediate Shift Right Arithmetic +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted right arithmetic by `imm6` bits. +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srai.d, imm6 = 0b111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srai_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + msa_srai_d(a, IMM6) +} + +/// Vector Shift Right Arithmetic Rounded +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted right arithmetic by the number of bits the elements in vector `b` +/// (sixteen signed 8-bit integer numbers) specify modulo the size of the +/// element in bits.The most significant discarded bit is added to the shifted +/// value (for rounding) and the result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srar.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srar_b(a: v16i8, b: v16i8) -> v16i8 { + msa_srar_b(a, mem::transmute(b)) +} + +/// Vector Shift Right Arithmetic Rounded +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted right arithmetic by the number of bits the elements in vector `b` +/// (eight signed 16-bit integer numbers) specify modulo the size of the +/// element in bits.The most significant discarded bit is added to the shifted +/// value (for rounding) and the result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srar.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srar_h(a: v8i16, b: v8i16) -> v8i16 { + msa_srar_h(a, mem::transmute(b)) +} + +/// Vector Shift Right Arithmetic Rounded +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted right arithmetic by the number of bits the elements in vector `b` +/// (four signed 32-bit integer numbers) specify modulo the size of the +/// element in bits.The most significant discarded bit is added to the shifted +/// value (for rounding) and the result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srar.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srar_w(a: v4i32, b: v4i32) -> v4i32 { + msa_srar_w(a, mem::transmute(b)) +} + +/// Vector Shift Right Arithmetic Rounded +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted right arithmetic by the number of bits the elements in vector `b` +/// (two signed 64-bit integer numbers) specify modulo the size of the +/// element in bits.The most significant discarded bit is added to the shifted +/// value (for rounding) and the result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srar.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srar_d(a: v2i64, b: v2i64) -> v2i64 { + msa_srar_d(a, mem::transmute(b)) +} + +/// Immediate Shift Right Arithmetic Rounded +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted right arithmetic by `imm3` bits.The most significant +/// discarded bit is added to the shifted value (for rounding) and +/// the result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srari.b, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srari_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + msa_srari_b(a, IMM3) +} + +/// Immediate Shift Right Arithmetic Rounded +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted right arithmetic by `imm4` bits.The most significant +/// discarded bit is added to the shifted value (for rounding) and +/// the result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srari.h, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srari_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + msa_srari_h(a, IMM4) +} + +/// Immediate Shift Right Arithmetic Rounded +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted right arithmetic by `imm5` bits.The most significant +/// discarded bit is added to the shifted value (for rounding) and +/// the result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srari.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srari_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + msa_srari_w(a, IMM5) +} + +/// Immediate Shift Right Arithmetic Rounded +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted right arithmetic by `imm6` bits.The most significant +/// discarded bit is added to the shifted value (for rounding) and +/// the result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srari.d, imm6 = 0b111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srari_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + msa_srari_d(a, IMM6) +} + +/// Vector Shift Right Logical +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted right logical by the number of bits the elements in vector `b` +/// (sixteen signed 8-bit integer numbers) specify modulo the size of the +/// element in bits.The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srl.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srl_b(a: v16i8, b: v16i8) -> v16i8 { + msa_srl_b(a, mem::transmute(b)) +} + +/// Vector Shift Right Logical +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted right logical by the number of bits the elements in vector `b` +/// (eight signed 16-bit integer numbers) specify modulo the size of the +/// element in bits.The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srl.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srl_h(a: v8i16, b: v8i16) -> v8i16 { + msa_srl_h(a, mem::transmute(b)) +} + +/// Vector Shift Right Logical +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted right logical by the number of bits the elements in vector `b` +/// (four signed 32-bit integer numbers) specify modulo the size of the +/// element in bits.The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srl.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srl_w(a: v4i32, b: v4i32) -> v4i32 { + msa_srl_w(a, mem::transmute(b)) +} + +/// Vector Shift Right Logical +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted right logical by the number of bits the elements in vector `b` +/// (two signed 64-bit integer numbers) specify modulo the size of the +/// element in bits.The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srl.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srl_d(a: v2i64, b: v2i64) -> v2i64 { + msa_srl_d(a, mem::transmute(b)) +} + +/// Immediate Shift Right Logical +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted right logical by `imm4` bits. +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srli.b, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srli_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM4, 4); + msa_srli_b(a, IMM4) +} + +/// Immediate Shift Right Logical +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted right logical by `imm3` bits. +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srli.h, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srli_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM3, 3); + msa_srli_h(a, IMM3) +} + +/// Immediate Shift Right Logical +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted right logical by `imm2` bits. +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srli.w, imm2 = 0b11))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srli_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM2, 2); + msa_srli_w(a, IMM2) +} + +/// Immediate Shift Right Logical +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted right logical by `imm1` bits. +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srli.d, imm1 = 0b1))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srli_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM1, 1); + msa_srli_d(a, IMM1) +} + +/// Vector Shift Right Logical Rounded +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted right logical by the number of bits the elements in vector `b` +/// (sixteen signed 8-bit integer numbers) specify modulo the size of the +/// element in bits.The most significant discarded bit is added to the shifted +/// value (for rounding) and the result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srlr.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srlr_b(a: v16i8, b: v16i8) -> v16i8 { + msa_srlr_b(a, mem::transmute(b)) +} + +/// Vector Shift Right Logical Rounded +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted right logical by the number of bits the elements in vector `b` +/// (eight signed 16-bit integer numbers) specify modulo the size of the +/// element in bits.The most significant discarded bit is added to the shifted +/// value (for rounding) and the result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srlr.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srlr_h(a: v8i16, b: v8i16) -> v8i16 { + msa_srlr_h(a, mem::transmute(b)) +} + +/// Vector Shift Right Logical Rounded +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted right logical by the number of bits the elements in vector `b` +/// (four signed 32-bit integer numbers) specify modulo the size of the +/// element in bits.The most significant discarded bit is added to the shifted +/// value (for rounding) and the result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srlr.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srlr_w(a: v4i32, b: v4i32) -> v4i32 { + msa_srlr_w(a, mem::transmute(b)) +} + +/// Vector Shift Right Logical Rounded +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted right logical by the number of bits the elements in vector `b` +/// (two signed 64-bit integer numbers) specify modulo the size of the +/// element in bits.The most significant discarded bit is added to the shifted +/// value (for rounding) and the result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srlr.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srlr_d(a: v2i64, b: v2i64) -> v2i64 { + msa_srlr_d(a, mem::transmute(b)) +} + +/// Immediate Shift Right Logical Rounded +/// +/// The elements in vector `a` (sixteen signed 8-bit integer numbers) +/// are shifted right logical by `imm6` bits.The most significant +/// discarded bit is added to the shifted value (for rounding) and +/// the result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srlri.b, imm3 = 0b111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srlri_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM3, 3); + msa_srlri_b(a, IMM3) +} + +/// Immediate Shift Right Logical Rounded +/// +/// The elements in vector `a` (eight signed 16-bit integer numbers) +/// are shifted right logical by `imm6` bits.The most significant +/// discarded bit is added to the shifted value (for rounding) and +/// the result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srlri.h, imm4 = 0b1111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srlri_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM4, 4); + msa_srlri_h(a, IMM4) +} + +/// Immediate Shift Right Logical Rounded +/// +/// The elements in vector `a` (four signed 32-bit integer numbers) +/// are shifted right logical by `imm6` bits.The most significant +/// discarded bit is added to the shifted value (for rounding) and +/// the result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srlri.w, imm5 = 0b11111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srlri_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + msa_srlri_w(a, IMM5) +} + +/// Immediate Shift Right Logical Rounded +/// +/// The elements in vector `a` (two signed 64-bit integer numbers) +/// are shifted right logical by `imm6` bits.The most significant +/// discarded bit is added to the shifted value (for rounding) and +/// the result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(srlri.d, imm6 = 0b111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_srlri_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM6, 6); + msa_srlri_d(a, IMM6) +} + +/// Vector Store +/// +/// The WRLEN / 8 bytes in vector `a` (sixteen signed 8-bit integer numbers) +/// are stored as elements of data format df at the effective memory location +/// addressed by the base `mem_addr` and the 10-bit signed immediate offset `imm_s10`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(st.b, imm_s10 = 0b1111111111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_st_b(a: v16i8, mem_addr: *mut u8) -> () { + static_assert_simm_bits!(IMM_S10, 10); + msa_st_b(a, mem_addr, IMM_S10) +} + +/// Vector Store +/// +/// The WRLEN / 8 bytes in vector `a` (eight signed 16-bit integer numbers) +/// are stored as elements of data format df at the effective memory location +/// addressed by the base `mem_addr` and the 11-bit signed immediate offset `imm_s11`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(st.h, imm_s11 = 0b11111111111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_st_h(a: v8i16, mem_addr: *mut u8) -> () { + static_assert_simm_bits!(IMM_S11, 11); + static_assert!(IMM_S11 % 2 == 0); + msa_st_h(a, mem_addr, IMM_S11) +} + +/// Vector Store +/// +/// The WRLEN / 8 bytes in vector `a` (four signed 32-bit integer numbers) +/// are stored as elements of data format df at the effective memory location +/// addressed by the base `mem_addr` and the 12-bit signed immediate offset `imm_s12`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(st.w, imm_s12 = 0b111111111111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_st_w(a: v4i32, mem_addr: *mut u8) -> () { + static_assert_simm_bits!(IMM_S12, 12); + static_assert!(IMM_S12 % 4 == 0); + msa_st_w(a, mem_addr, IMM_S12) +} + +/// Vector Store +/// +/// The WRLEN / 8 bytes in vector `a` (two signed 64-bit integer numbers) +/// are stored as elements of data format df at the effective memory location +/// addressed by the base `mem_addr` and the 13-bit signed immediate offset `imm_s13`. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(st.d, imm_s13 = 0b1111111111111))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_st_d(a: v2i64, mem_addr: *mut u8) -> () { + static_assert_simm_bits!(IMM_S13, 13); + static_assert!(IMM_S13 % 8 == 0); + msa_st_d(a, mem_addr, IMM_S13) +} + +/// Vector Signed Saturated Subtract of Signed Values +/// +/// The elements in vector `b` (sixteen signed 8-bit integer numbers) +/// are subtracted from the elements in vector `a` (sixteen signed 8-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subs_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subs_s_b(a: v16i8, b: v16i8) -> v16i8 { + msa_subs_s_b(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Subtract of Signed Values +/// +/// The elements in vector `b` (eight signed 16-bit integer numbers) +/// are subtracted from the elements in vector `a` (eight signed 16-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subs_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subs_s_h(a: v8i16, b: v8i16) -> v8i16 { + msa_subs_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Subtract of Signed Values +/// +/// The elements in vector `b` (four signed 32-bit integer numbers) +/// are subtracted from the elements in vector `a` (four signed 32-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subs_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subs_s_w(a: v4i32, b: v4i32) -> v4i32 { + msa_subs_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Subtract of Signed Values +/// +/// The elements in vector `b` (two signed 64-bit integer numbers) +/// are subtracted from the elements in vector `a` (two signed 64-bit integer numbers). +/// Signed arithmetic is performed and overflows clamp to the largest and/or smallest +/// representable signed values before writing the result to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subs_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subs_s_d(a: v2i64, b: v2i64) -> v2i64 { + msa_subs_s_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Subtract of Unsigned Values +/// +/// The elements in vector `b` (sixteen unsigned 8-bit integer numbers) +/// are subtracted from the elements in vector `a` (sixteen unsigned 8-bit integer numbers). +/// Unsigned arithmetic is performed and under-flows clamp to 0 before writing +/// the result to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subs_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subs_u_b(a: v16u8, b: v16u8) -> v16u8 { + msa_subs_u_b(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Subtract of Unsigned Values +/// +/// The elements in vector `b` (eight unsigned 16-bit integer numbers) +/// are subtracted from the elements in vector `a` (eight unsigned 16-bit integer numbers). +/// Unsigned arithmetic is performed and under-flows clamp to 0 before writing +/// the result to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subs_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subs_u_h(a: v8u16, b: v8u16) -> v8u16 { + msa_subs_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Subtract of Unsigned Values +/// +/// The elements in vector `b` (four unsigned 32-bit integer numbers) +/// are subtracted from the elements in vector `a` (four unsigned 32-bit integer numbers). +/// Unsigned arithmetic is performed and under-flows clamp to 0 before writing +/// the result to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subs_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subs_u_w(a: v4u32, b: v4u32) -> v4u32 { + msa_subs_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Subtract of Unsigned Values +/// +/// The elements in vector `b` (two unsigned 64-bit integer numbers) +/// are subtracted from the elements in vector `a` (two unsigned 64-bit integer numbers). +/// Unsigned arithmetic is performed and under-flows clamp to 0 before writing +/// the result to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subs_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subs_u_d(a: v2u64, b: v2u64) -> v2u64 { + msa_subs_u_d(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Subtract of Signed from Unsigned +/// +/// The signed elements in vector `b` (sixteen signed 8-bit integer numbers) +/// are subtracted from the unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers). +/// The signed result is unsigned saturated and written to +/// to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subsus_u.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subsus_u_b(a: v16u8, b: v16i8) -> v16u8 { + msa_subsus_u_b(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Subtract of Signed from Unsigned +/// +/// The signed elements in vector `b` (eight signed 16-bit integer numbers) +/// are subtracted from the unsigned elements in vector `a` (eight unsigned 16-bit integer numbers). +/// The signed result is unsigned saturated and written to +/// to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subsus_u.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subsus_u_h(a: v8u16, b: v8i16) -> v8u16 { + msa_subsus_u_h(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Subtract of Signed from Unsigned +/// +/// The signed elements in vector `b` (four signed 6432it integer numbers) +/// are subtracted from the unsigned elements in vector `a` (four unsigned 32-bit integer numbers). +/// The signed result is unsigned saturated and written to +/// to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subsus_u.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subsus_u_w(a: v4u32, b: v4i32) -> v4u32 { + msa_subsus_u_w(a, mem::transmute(b)) +} + +/// Vector Unsigned Saturated Subtract of Signed from Unsigned +/// +/// The signed elements in vector `b` (two signed 64-bit integer numbers) +/// are subtracted from the unsigned elements in vector `a` (two unsigned 64-bit integer numbers). +/// The signed result is unsigned saturated and written to +/// to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subsus_u.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subsus_u_d(a: v2u64, b: v2i64) -> v2u64 { + msa_subsus_u_d(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Subtract of Unsigned Values +/// +/// The unsigned elements in vector `b` (sixteen unsigned 8-bit integer numbers) +/// are subtracted from the unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers). +/// The signed result is signed saturated and written to +/// to vector (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subsuu_s.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subsuu_s_b(a: v16u8, b: v16u8) -> v16i8 { + msa_subsuu_s_b(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Subtract of Unsigned Values +/// +/// The unsigned elements in vector `b` (eight unsigned 16-bit integer numbers) +/// are subtracted from the unsigned elements in vector `a` (eight unsigned 16-bit integer numbers). +/// The signed result is signed saturated and written to +/// to vector (eight unsigned 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subsuu_s.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subsuu_s_h(a: v8u16, b: v8u16) -> v8i16 { + msa_subsuu_s_h(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Subtract of Unsigned Values +/// +/// The unsigned elements in vector `b` (four unsigned 32-bit integer numbers) +/// are subtracted from the unsigned elements in vector `a` (four unsigned 32-bit integer numbers). +/// The signed result is signed saturated and written to +/// to vector (four unsigned 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subsuu_s.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subsuu_s_w(a: v4u32, b: v4u32) -> v4i32 { + msa_subsuu_s_w(a, mem::transmute(b)) +} + +/// Vector Signed Saturated Subtract of Unsigned Values +/// +/// The unsigned elements in vector `b` (two unsigned 64-bit integer numbers) +/// are subtracted from the unsigned elements in vector `a` (two unsigned 64-bit integer numbers). +/// The signed result is signed saturated and written to +/// to vector (two unsigned 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subsuu_s.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subsuu_s_d(a: v2u64, b: v2u64) -> v2i64 { + msa_subsuu_s_d(a, mem::transmute(b)) +} + +/// Vector Subtract +/// +/// The elements in vector `b` (sixteen signed 8-bit integer numbers) +/// are subtracted from the elements in vector `a` (sixteen signed 8-bit integer numbers). +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subv.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subv_b(a: v16i8, b: v16i8) -> v16i8 { + msa_subv_b(a, mem::transmute(b)) +} + +/// Vector Subtract +/// +/// The elements in vector `b` (eight signed 16-bit integer numbers) +/// are subtracted from the elements in vector `a` (eight signed 16-bit integer numbers). +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subv.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subv_h(a: v8i16, b: v8i16) -> v8i16 { + msa_subv_h(a, mem::transmute(b)) +} + +/// Vector Subtract +/// +/// The elements in vector `b` (four signed 32-bit integer numbers) +/// are subtracted from the elements in vector `a` (four signed 32-bit integer numbers). +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subv.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subv_w(a: v4i32, b: v4i32) -> v4i32 { + msa_subv_w(a, mem::transmute(b)) +} + +/// Vector Subtract +/// +/// The elements in vector `b` (two signed 64-bit integer numbers) +/// are subtracted from the elements in vector `a` (two signed 64-bit integer numbers). +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subv.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subv_d(a: v2i64, b: v2i64) -> v2i64 { + msa_subv_d(a, mem::transmute(b)) +} + +/// Immediate Subtract +/// +/// The 5-bit immediate unsigned value `imm5` +/// are subtracted from the elements in vector `a` (sixteen signed 8-bit integer numbers). +/// The result is written to vector (sixteen signed 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subvi.b, imm5 = 0b10111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subvi_b(a: v16i8) -> v16i8 { + static_assert_uimm_bits!(IMM5, 5); + msa_subvi_b(a, IMM5) +} + +/// Immediate Subtract +/// +/// The 5-bit immediate unsigned value `imm5` +/// are subtracted from the elements in vector `a` (eight signed 16-bit integer numbers). +/// The result is written to vector (eight signed 16-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subvi.h, imm5 = 0b10111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subvi_h(a: v8i16) -> v8i16 { + static_assert_uimm_bits!(IMM5, 5); + msa_subvi_h(a, IMM5) +} + +/// Immediate Subtract +/// +/// The 5-bit immediate unsigned value `imm5` +/// are subtracted from the elements in vector `a` (four signed 32-bit integer numbers). +/// The result is written to vector (four signed 32-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subvi.w, imm5 = 0b10111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subvi_w(a: v4i32) -> v4i32 { + static_assert_uimm_bits!(IMM5, 5); + msa_subvi_w(a, IMM5) +} + +/// Immediate Subtract +/// +/// The 5-bit immediate unsigned value `imm5` +/// are subtracted from the elements in vector `a` (two signed 64-bit integer numbers). +/// The result is written to vector (two signed 64-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(subvi.d, imm5 = 0b10111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_subvi_d(a: v2i64) -> v2i64 { + static_assert_uimm_bits!(IMM5, 5); + msa_subvi_d(a, IMM5) +} + +/// Vector Data Preserving Shuffle +/// +/// The vector shuffle instructions selectively copy data elements from the +/// concatenation of vectors `b` (sixteen signed 8-bit integer numbers) +/// and `c` (sixteen signed 8-bit integer numbers) in to vector `a` +/// (sixteen signed 8-bit integer numbers) based on the corresponding control element in `a`. +/// The least significant 6 bits in `a` control elements modulo the number of elements in +/// the concatenated vectors `b`, `a` specify the index of the source element. +/// If bit 6 or bit 7 is 1, there will be no copy, but rather the destination element is set to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(vshf.b))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_vshf_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { + msa_vshf_b(a, mem::transmute(b), c) +} + +/// Vector Data Preserving Shuffle +/// +/// The vector shuffle instructions selectively copy data elements from the +/// concatenation of vectors `b` (eight signed 16-bit integer numbers) +/// and `c` (eight signed 16-bit integer numbers) in to vector `a` +/// (eight signed 16-bit integer numbers) based on the corresponding control element in `a`. +/// The least significant 6 bits in `a` control elements modulo the number of elements in +/// the concatenated vectors `b`, `a` specify the index of the source element. +/// If bit 6 or bit 7 is 1, there will be no copy, but rather the destination element is set to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(vshf.h))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_vshf_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { + msa_vshf_h(a, mem::transmute(b), c) +} + +/// Vector Data Preserving Shuffle +/// +/// The vector shuffle instructions selectively copy data elements from the +/// concatenation of vectors `b` (four signed 32-bit integer numbers) +/// and `c` (four signed 32-bit integer numbers) in to vector `a` +/// (four signed 32-bit integer numbers) based on the corresponding control element in `a`. +/// The least significant 6 bits in `a` control elements modulo the number of elements in +/// the concatenated vectors `b`, `a` specify the index of the source element. +/// If bit 6 or bit 7 is 1, there will be no copy, but rather the destination element is set to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(vshf.w))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_vshf_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { + msa_vshf_w(a, mem::transmute(b), c) +} + +/// Vector Data Preserving Shuffle +/// +/// The vector shuffle instructions selectively copy data elements from the +/// concatenation of vectors `b` (two signed 64-bit integer numbers) +/// and `c` (two signed 64-bit integer numbers) in to vector `a` +/// (two signed 64-bit integer numbers) based on the corresponding control element in `a`. +/// The least significant 6 bits in `a` control elements modulo the number of elements in +/// the concatenated vectors `b`, `a` specify the index of the source element. +/// If bit 6 or bit 7 is 1, there will be no copy, but rather the destination element is set to 0. +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(vshf.d))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_vshf_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { + msa_vshf_d(a, mem::transmute(b), c) +} + +/// Vector Logical Exclusive Or +/// +/// Each bit of vector `a` (sixteen unsigned 8-bit integer numbers) +/// is combined with the corresponding bit of vector `b` (sixteen unsigned 8-bit integer numbers) +/// in a bitwise logical XOR operation. The result is written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(xor.v))] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_xor_v(a: v16u8, b: v16u8) -> v16u8 { + msa_xor_v(a, mem::transmute(b)) +} + +/// Immediate Logical Exclusive Or +/// +/// Each byte of vector `a` (sixteen unsigned 8-bit integer numbers) +/// is combined with the 8-bit immediate `imm8` +/// in a bitwise logical XOR operation. The result is written to vector +/// (sixteen unsigned 8-bit integer numbers). +/// +#[inline] +#[target_feature(enable = "msa")] +#[cfg_attr(test, assert_instr(xori.b, imm8 = 0b11111111))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_mips", issue = "111198")] +pub unsafe fn __msa_xori_b(a: v16u8) -> v16u8 { + static_assert_uimm_bits!(IMM8, 8); + msa_xori_b(a, IMM8) +} + +#[cfg(test)] +mod tests { + use crate::{ + core_arch::{mips::msa::*, simd::*}, + mem, + }; + use std::{f32, f64}; + use stdarch_test::simd_test; + + #[simd_test(enable = "msa")] + unsafe fn test_msa_add_a_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i8x16::new( + -4, -3, -2, -1, + -4, -3, -2, -1, + -4, -3, -2, -1, + -4, -3, -2, -1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 5, 5, 5, 5, + 5, 5, 5, 5, + 5, 5, 5, 5, + 5, 5, 5, 5 + ); + + assert_eq!( + r, + mem::transmute(__msa_add_a_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_add_a_h() { + #[rustfmt::skip] + let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = i16x8::new(-4, -3, -2, -1, -4, -3, -2, -1); + #[rustfmt::skip] + let r = i16x8::new(5, 5, 5, 5, 5, 5, 5, 5); + + assert_eq!( + r, + mem::transmute(__msa_add_a_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_add_a_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(-4, -3, -2, -1); + #[rustfmt::skip] + let r = i32x4::new(5, 5, 5, 5); + + assert_eq!( + r, + mem::transmute(__msa_add_a_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_add_a_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(-4, -3); + #[rustfmt::skip] + let r = i64x2::new(5, 5); + + assert_eq!( + r, + mem::transmute(__msa_add_a_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_a_b() { + #[rustfmt::skip] + let a = i8x16::new( + 100, i8::MAX, 100, i8::MAX, + 100, i8::MAX, 100, i8::MAX, + 100, i8::MAX, 100, i8::MAX, + 100, i8::MAX, 100, i8::MAX + ); + #[rustfmt::skip] + let b = i8x16::new( + -4, -3, -2, -100, + -4, -3, -2, -100, + -4, -3, -2, -100, + -4, -3, -2, -100 + ); + #[rustfmt::skip] + let r = i8x16::new( + 104, 127, 102, 127, + 104, 127, 102, 127, + 104, 127, 102, 127, + 104, 127, 102, 127 + ); + + assert_eq!( + r, + mem::transmute(__msa_adds_a_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_a_h() { + #[rustfmt::skip] + let a = i16x8::new( + 100, i16::MAX, 100, i16::MAX, + 100, i16::MAX, 100, i16::MAX + ); + #[rustfmt::skip] + let b = i16x8::new(-4, -3, -2, -1, -4, -3, -2, -1); + #[rustfmt::skip] + let r = i16x8::new( + 104, i16::MAX, 102, i16::MAX, + 104, i16::MAX, 102, i16::MAX + ); + + assert_eq!( + r, + mem::transmute(__msa_adds_a_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_a_w() { + #[rustfmt::skip] + let a = i32x4::new(100, i32::MAX, 100, i32::MAX); + #[rustfmt::skip] + let b = i32x4::new(-4, -3, -2, -1); + #[rustfmt::skip] + let r = i32x4::new(104, i32::MAX, 102, i32::MAX); + + assert_eq!( + r, + mem::transmute(__msa_adds_a_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_a_d() { + #[rustfmt::skip] + let a = i64x2::new(100, i64::MAX); + #[rustfmt::skip] + let b = i64x2::new(-4, -3); + #[rustfmt::skip] + let r = i64x2::new(104, i64::MAX); + + assert_eq!( + r, + mem::transmute(__msa_adds_a_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + 100, i8::MIN, 100, i8::MAX, + 100, i8::MIN, 100, i8::MAX, + 100, i8::MIN, 100, i8::MAX, + 100, i8::MIN, 100, i8::MAX + ); + #[rustfmt::skip] + let b = i8x16::new( + -4, -3, -2, 100, + -4, -3, -2, 100, + -4, -3, -2, 100, + -4, -3, -2, 100 + ); + #[rustfmt::skip] + let r = i8x16::new( + 96, i8::MIN, 98, i8::MAX, + 96, i8::MIN, 98, i8::MAX, + 96, i8::MIN, 98, i8::MAX, + 96, i8::MIN, 98, i8::MAX + ); + + assert_eq!( + r, + mem::transmute(__msa_adds_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_s_h() { + #[rustfmt::skip] + let a = i16x8::new( + 100, i16::MIN, 100, i16::MAX, + 100, i16::MIN, 100, i16::MAX + ); + #[rustfmt::skip] + let b = i16x8::new(-4, -3, -2, 1, -4, -3, -2, 1); + #[rustfmt::skip] + let r = i16x8::new( + 96, i16::MIN, 98, i16::MAX, + 96, i16::MIN, 98, i16::MAX + ); + + assert_eq!( + r, + mem::transmute(__msa_adds_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_s_w() { + #[rustfmt::skip] + let a = i32x4::new(100, i32::MAX, 100, i32::MIN); + #[rustfmt::skip] + let b = i32x4::new(-4, 3, -2, -1); + #[rustfmt::skip] + let r = i32x4::new(96, i32::MAX, 98, i32::MIN); + + assert_eq!( + r, + mem::transmute(__msa_adds_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_s_d() { + #[rustfmt::skip] + let a = i64x2::new(100, i64::MIN); + #[rustfmt::skip] + let b = i64x2::new(-4, -3); + #[rustfmt::skip] + let r = i64x2::new(96, i64::MIN); + + assert_eq!( + r, + mem::transmute(__msa_adds_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 100, u8::MAX, 100, u8::MAX, + 100, u8::MAX, 100, u8::MAX, + 100, u8::MAX, 100, u8::MAX, + 100, u8::MAX, 100, u8::MAX + ); + #[rustfmt::skip] + let b = u8x16::new( + 4, 3, 2, 100, + 4, 3, 2, 100, + 4, 3, 2, 100, + 4, 3, 2, 100 + ); + #[rustfmt::skip] + let r = u8x16::new( + 104, u8::MAX, 102, u8::MAX, + 104, u8::MAX, 102, u8::MAX, + 104, u8::MAX, 102, u8::MAX, + 104, u8::MAX, 102, u8::MAX + ); + + assert_eq!( + r, + mem::transmute(__msa_adds_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_u_h() { + #[rustfmt::skip] + let a = u16x8::new( + 100, u16::MAX, 100, u16::MAX, + 100, u16::MAX, 100, u16::MAX + ); + #[rustfmt::skip] + let b = u16x8::new(4, 3, 2, 1, 4, 3, 2, 1); + #[rustfmt::skip] + let r = u16x8::new( + 104, u16::MAX, 102, u16::MAX, + 104, u16::MAX, 102, u16::MAX + ); + + assert_eq!( + r, + mem::transmute(__msa_adds_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_u_w() { + #[rustfmt::skip] + let a = u32x4::new(100, u32::MAX, 100, u32::MAX); + #[rustfmt::skip] + let b = u32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = u32x4::new(104, u32::MAX, 102, u32::MAX); + + assert_eq!( + r, + mem::transmute(__msa_adds_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_adds_u_d() { + #[rustfmt::skip] + let a = u64x2::new(100, u64::MAX); + #[rustfmt::skip] + let b = u64x2::new(4, 3); + #[rustfmt::skip] + let r = u64x2::new(104, u64::MAX); + + assert_eq!( + r, + mem::transmute(__msa_adds_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_addv_b() { + #[rustfmt::skip] + let a = i8x16::new( + 100, i8::MIN, 100, i8::MAX, + 100, i8::MIN, 100, i8::MAX, + 100, i8::MIN, 100, i8::MAX, + 100, i8::MIN, 100, i8::MAX + ); + #[rustfmt::skip] + let b = i8x16::new( + -4, -3, -2, 100, + -4, -3, -2, 100, + -4, -3, -2, 100, + -4, -3, -2, 100 + ); + #[rustfmt::skip] + let r = i8x16::new( + 96, 125, 98, -29, + 96, 125, 98, -29, + 96, 125, 98, -29, + 96, 125, 98, -29 + ); + + assert_eq!( + r, + mem::transmute(__msa_addv_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_addv_h() { + #[rustfmt::skip] + let a = i16x8::new( + 100, i16::MIN, 100, i16::MAX, + 100, i16::MIN, 100, i16::MAX + ); + #[rustfmt::skip] + let b = i16x8::new(-4, -3, -2, 1, -4, -3, -2, 1); + #[rustfmt::skip] + let r = i16x8::new(96, 32765, 98, -32768, 96, 32765, 98, -32768); + + assert_eq!( + r, + mem::transmute(__msa_addv_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_addv_w() { + #[rustfmt::skip] + let a = i32x4::new(100, i32::MAX, 100, i32::MIN); + #[rustfmt::skip] + let b = i32x4::new(-4, 3, -2, -1); + #[rustfmt::skip] + let r = i32x4::new(96, -2147483646, 98, 2147483647); + + assert_eq!( + r, + mem::transmute(__msa_addv_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_addv_d() { + #[rustfmt::skip] + let a = i64x2::new(100, i64::MIN); + #[rustfmt::skip] + let b = i64x2::new(-4, -3); + #[rustfmt::skip] + let r = i64x2::new(96, 9223372036854775805); + + assert_eq!( + r, + mem::transmute(__msa_addv_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_addvi_b() { + #[rustfmt::skip] + let a = i8x16::new( + 100, i8::MAX, 100, i8::MAX, + 100, i8::MAX, 100, i8::MAX, + 100, i8::MAX, 100, i8::MAX, + 100, i8::MAX, 100, i8::MAX + ); + #[rustfmt::skip] + let r = i8x16::new( + 103, -126, 103, -126, + 103, -126, 103, -126, + 103, -126, 103, -126, + 103, -126, 103, -126 + ); + + assert_eq!(r, mem::transmute(__msa_addvi_b(mem::transmute(a), 67))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_addvi_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MAX, 3276, -100, -127, + i16::MAX, 3276, -100, -127 + ); + #[rustfmt::skip] + let r = i16x8::new( + -32766, 3279, -97, -124, + -32766, 3279, -97, -124 + ); + + assert_eq!(r, mem::transmute(__msa_addvi_h(mem::transmute(a), 67))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_addvi_w() { + #[rustfmt::skip] + let a = i32x4::new(100, i32::MAX, 100, i32::MIN); + #[rustfmt::skip] + let r = i32x4::new(103, -2147483646, 103, -2147483645); + + assert_eq!(r, mem::transmute(__msa_addvi_w(mem::transmute(a), 67))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_addvi_d() { + #[rustfmt::skip] + let a = i64x2::new(100, i64::MIN); + #[rustfmt::skip] + let r = i64x2::new(117, -9223372036854775791); + + assert_eq!(r, mem::transmute(__msa_addvi_d(mem::transmute(a), 17))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_and_v() { + #[rustfmt::skip] + let a = u8x16::new( + 100, u8::MAX, 100, u8::MAX, + 100, u8::MAX, 100, u8::MAX, + 100, u8::MAX, 100, u8::MAX, + 100, u8::MAX, 100, u8::MAX + ); + #[rustfmt::skip] + let b = u8x16::new( + 4, 3, 2, 100, + 4, 3, 2, 100, + 4, 3, 2, 100, + 4, 3, 2, 100 + ); + #[rustfmt::skip] + let r = u8x16::new( + 4, 3, 0, 100, + 4, 3, 0, 100, + 4, 3, 0, 100, + 4, 3, 0, 100 + ); + + assert_eq!( + r, + mem::transmute(__msa_and_v(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_andi_b() { + #[rustfmt::skip] + let a = u8x16::new( + 100, u8::MAX, 100, u8::MAX, + 100, u8::MAX, 100, u8::MAX, + 100, u8::MAX, 100, u8::MAX, + 100, u8::MAX, 100, u8::MAX + ); + #[rustfmt::skip] + let r = u8x16::new( + 4, 5, 4, 5, + 4, 5, 4, 5, + 4, 5, 4, 5, + 4, 5, 4, 5 + ); + + assert_eq!(r, mem::transmute(__msa_andi_b(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_asub_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + -1, -2, -3, -4, + -1, -2, -3, -4, + -1, -2, -3, -4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9 + ); + #[rustfmt::skip] + let r = i8x16::new( + 5, 5, 5, 5, + 5, 5, 5, 5, + 5, 5, 5, 5, + 5, 5, 5, 5 + ); + + assert_eq!( + r, + mem::transmute(__msa_asub_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_asub_s_h() { + #[rustfmt::skip] + let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, -4); + #[rustfmt::skip] + let b = i16x8::new(-6, -7, -8, -9, -6, -7, -8, -9); + #[rustfmt::skip] + let r = i16x8::new(5, 5, 5, 5, 5, 5, 5, 5); + + assert_eq!( + r, + mem::transmute(__msa_asub_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_asub_s_w() { + #[rustfmt::skip] + let a = i32x4::new(-1, -2, -3, -4); + #[rustfmt::skip] + let b = i32x4::new(-6, -7, -8, -9); + #[rustfmt::skip] + let r = i32x4::new(5, 5, 5, 5); + + assert_eq!( + r, + mem::transmute(__msa_asub_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_asub_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, -2); + #[rustfmt::skip] + let b = i64x2::new(-6, -7); + #[rustfmt::skip] + let r = i64x2::new(5, 5); + + assert_eq!( + r, + mem::transmute(__msa_asub_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_asub_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 5, 5, 5, 5, + 5, 5, 5, 5, + 5, 5, 5, 5, + 5, 5, 5, 5 + ); + + assert_eq!( + r, + mem::transmute(__msa_asub_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_asub_u_h() { + #[rustfmt::skip] + let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u16x8::new(5, 5, 5, 5, 5, 5, 5, 5); + + assert_eq!( + r, + mem::transmute(__msa_asub_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_asub_u_w() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(5, 5, 5, 5); + + assert_eq!( + r, + mem::transmute(__msa_asub_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_asub_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 2); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = u64x2::new(5, 5); + + assert_eq!( + r, + mem::transmute(__msa_asub_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ave_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + -1, -2, -3, -4, + -1, -2, -3, -4, + -1, -2, -3, -4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 6, -7, 8, -9, + 6, -7, 8, -9, + 6, -7, 8, -9, + 6, -7, 8, -9 + ); + #[rustfmt::skip] + let r = i8x16::new( + 2, -5, 2, -7, + 2, -5, 2, -7, + 2, -5, 2, -7, + 2, -5, 2, -7 + ); + + assert_eq!( + r, + mem::transmute(__msa_ave_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ave_s_h() { + #[rustfmt::skip] + let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, -4); + #[rustfmt::skip] + let b = i16x8::new(6, -7, 8, -9, 6, -7, 8, -9); + #[rustfmt::skip] + let r = i16x8::new(2, -5, 2, -7, 2, -5, 2, -7); + + assert_eq!( + r, + mem::transmute(__msa_ave_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ave_s_w() { + #[rustfmt::skip] + let a = i32x4::new(-1, -2, -3, -4); + #[rustfmt::skip] + let b = i32x4::new(6, -7, 8, -9); + #[rustfmt::skip] + let r = i32x4::new(2, -5, 2, -7); + + assert_eq!( + r, + mem::transmute(__msa_ave_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ave_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, -2); + #[rustfmt::skip] + let b = i64x2::new(-6, -7); + #[rustfmt::skip] + let r = i64x2::new(-4, -5); + + assert_eq!( + r, + mem::transmute(__msa_ave_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ave_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 3, 4, 5, 6, + 3, 4, 5, 6, + 3, 4, 5, 6, + 3, 4, 5, 6 + ); + + assert_eq!( + r, + mem::transmute(__msa_ave_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ave_u_h() { + #[rustfmt::skip] + let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u16x8::new(3, 4, 5, 6, 3, 4, 5, 6); + + assert_eq!( + r, + mem::transmute(__msa_ave_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ave_u_w() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(3, 4, 5, 6); + + assert_eq!( + r, + mem::transmute(__msa_ave_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ave_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 2); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = u64x2::new(3, 4); + + assert_eq!( + r, + mem::transmute(__msa_ave_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_aver_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + -1, -2, 3, -4, + -1, -2, 3, -4, + -1, -2, 3, -4, + -1, -2, 3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + -6, 7, -8, -9, + -6, 7, -8, -9, + -6, 7, -8, -9, + -6, 7, -8, -9 + ); + #[rustfmt::skip] + let r = i8x16::new( + -3, 3, -2, -6, + -3, 3, -2, -6, + -3, 3, -2, -6, + -3, 3, -2, -6 + ); + + assert_eq!( + r, + mem::transmute(__msa_aver_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_aver_s_h() { + #[rustfmt::skip] + let a = i16x8::new(-1, -2, 3, -4, -1, -2, 3, -4); + #[rustfmt::skip] + let b = i16x8::new(-6, 7, -8, -9, -6, 7, -8, -9); + #[rustfmt::skip] + let r = i16x8::new(-3, 3, -2, -6, -3, 3, -2, -6); + + assert_eq!( + r, + mem::transmute(__msa_aver_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_aver_s_w() { + #[rustfmt::skip] + let a = i32x4::new(-1, -2, 3, -4); + #[rustfmt::skip] + let b = i32x4::new(-6, 7, -8, -9); + #[rustfmt::skip] + let r = i32x4::new(-3, 3, -2, -6); + + assert_eq!( + r, + mem::transmute(__msa_aver_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_aver_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, -2); + #[rustfmt::skip] + let b = i64x2::new(-6, -7); + #[rustfmt::skip] + let r = i64x2::new(-3, -4); + + assert_eq!( + r, + mem::transmute(__msa_aver_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_aver_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 4, 5, 6, 7, + 4, 5, 6, 7, + 4, 5, 6, 7, + 4, 5, 6, 7 + ); + + assert_eq!( + r, + mem::transmute(__msa_aver_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_aver_u_h() { + #[rustfmt::skip] + let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u16x8::new(4, 5, 6, 7, 4, 5, 6, 7); + + assert_eq!( + r, + mem::transmute(__msa_aver_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_aver_u_w() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(4, 5, 6, 7); + + assert_eq!( + r, + mem::transmute(__msa_aver_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_aver_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 2); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = u64x2::new(4, 5); + + assert_eq!( + r, + mem::transmute(__msa_aver_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bclr_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 191, 27, 54, 1, + 191, 27, 54, 1, + 191, 27, 54, 1, + 191, 27, 54, 1 + ); + + assert_eq!( + r, + mem::transmute(__msa_bclr_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bclr_h() { + #[rustfmt::skip] + let a = u16x8::new(255, 155, 55, 1, 255, 155, 55, 1); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u16x8::new(191, 27, 55, 1, 191, 27, 55, 1); + + assert_eq!( + r, + mem::transmute(__msa_bclr_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bclr_w() { + #[rustfmt::skip] + let a = u32x4::new(255, 155, 55, 1); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(191, 27, 55, 1); + + assert_eq!( + r, + mem::transmute(__msa_bclr_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bclr_d() { + #[rustfmt::skip] + let a = u64x2::new(255, 155); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = u64x2::new(191, 27); + + assert_eq!( + r, + mem::transmute(__msa_bclr_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bclri_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let r = u8x16::new( + 247, 147, 55, 1, + 247, 147, 55, 1, + 247, 147, 55, 1, + 247, 147, 55, 1 + ); + + assert_eq!(r, mem::transmute(__msa_bclri_b(mem::transmute(a), 3))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bclri_h() { + #[rustfmt::skip] + let a = u16x8::new(2155, 1155, 155, 1, 2155, 1155, 155, 1); + #[rustfmt::skip] + let r = u16x8::new(107, 1155, 155, 1, 107, 1155, 155, 1); + + assert_eq!(r, mem::transmute(__msa_bclri_h(mem::transmute(a), 11))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bclri_w() { + #[rustfmt::skip] + let a = u32x4::new(211111155, 111111155, 11111155, 1); + #[rustfmt::skip] + let r = u32x4::new(202722547, 102722547, 2722547, 1); + + assert_eq!(r, mem::transmute(__msa_bclri_w(mem::transmute(a), 23))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bclri_d() { + #[rustfmt::skip] + let a = u64x2::new(211111111155, 11111111111111155); + #[rustfmt::skip] + let r = u64x2::new(73672157683, 11110973672157683); + + assert_eq!(r, mem::transmute(__msa_bclri_d(mem::transmute(a), 37))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsl_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let c = u8x16::new( + 1, 3, 5, 9, + 1, 3, 5, 9, + 1, 3, 5, 9, + 1, 3, 5, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 63, 11, 11, 1, + 63, 11, 11, 1, + 63, 11, 11, 1, + 63, 11, 11, 1 + ); + + assert_eq!( + r, + mem::transmute(__msa_binsl_b( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsl_h() { + #[rustfmt::skip] + let a = u16x8::new( + 32767, 16384, 8192, 4096, + 32767, 16384, 8192, 4096 + ); + #[rustfmt::skip] + let b = u16x8::new( + 21656, 5273, 7081, 2985, + 21656, 5273, 7081, 2985 + ); + #[rustfmt::skip] + let c = u16x8::new( + 3, 7, 9, 13, + 15, 17, 21, 23 + ); + #[rustfmt::skip] + let r = u16x8::new( + 24575, 5120, 7040, 2984, + 21656, 0, 6144, 2816 + ); + + assert_eq!( + r, + mem::transmute(__msa_binsl_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsl_w() { + #[rustfmt::skip] + let a = u32x4::new(2147483647, 536870912, 67108864, 8388608); + #[rustfmt::skip] + let b = u32x4::new(1036372536, 259093134, 78219975, 1119499719); + #[rustfmt::skip] + let c = u32x4::new(11, 15, 31, 37); + #[rustfmt::skip] + let r = u32x4::new(1037041663, 259063808, 78219975, 1082130432); + + assert_eq!( + r, + mem::transmute(__msa_binsl_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsl_d() { + #[rustfmt::skip] + let a = u64x2::new(8006399338, 2882303762); + #[rustfmt::skip] + let b = u64x2::new(9223372036854775805, 536870912); + #[rustfmt::skip] + let c = u64x2::new(12, 48); + #[rustfmt::skip] + let r = u64x2::new(9221120245047489898, 536901394); + + assert_eq!( + r, + mem::transmute(__msa_binsl_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsli_b() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 7, 7, 11, 9, + 7, 7, 11, 9, + 7, 7, 11, 9, + 7, 7, 11, 9 + ); + + assert_eq!( + r, + mem::transmute(__msa_binsli_b(mem::transmute(a), mem::transmute(b), 5)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsli_h() { + #[rustfmt::skip] + let a = u16x8::new( + 32767, 16384, 8192, 4096, + 32767, 16384, 8192, 4096 + ); + #[rustfmt::skip] + let b = u16x8::new( + 21656, 5273, 7081, 2985, + 21656, 5273, 7081, 2985 + ); + #[rustfmt::skip] + let r = u16x8::new( + 21659, 5272, 7080, 2984, + 21659, 5272, 7080, 2984 + ); + + assert_eq!( + r, + mem::transmute(__msa_binsli_h(mem::transmute(a), mem::transmute(b), 13)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsli_w() { + #[rustfmt::skip] + let a = u32x4::new(2147483647, 536870912, 67108864, 8388608); + #[rustfmt::skip] + let b = u32x4::new(1036372536, 259093134, 78219975, 1119499719); + #[rustfmt::skip] + let r = u32x4::new(1036386303, 259080192, 78217216, 1119485952); + + assert_eq!( + r, + mem::transmute(__msa_binsli_w(mem::transmute(a), mem::transmute(b), 17)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsli_d() { + #[rustfmt::skip] + let a = u64x2::new(8006399338, 2882303762); + #[rustfmt::skip] + let b = u64x2::new(9223372036854775805, 536870912); + #[rustfmt::skip] + let r = u64x2::new(9223372036854773098, 536901394); + + assert_eq!( + r, + mem::transmute(__msa_binsli_d(mem::transmute(a), mem::transmute(b), 48)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsr_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let c = u8x16::new( + 1, 3, 5, 9, + 1, 3, 5, 9, + 1, 3, 5, 9, + 1, 3, 5, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 254, 151, 8, 1, + 254, 151, 8, 1, + 254, 151, 8, 1, + 254, 151, 8, 1 + ); + + assert_eq!( + r, + mem::transmute(__msa_binsr_b( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsr_h() { + #[rustfmt::skip] + let a = u16x8::new( + 32767, 16384, 8192, 4096, + 32767, 16384, 8192, 4096 + ); + #[rustfmt::skip] + let b = u16x8::new( + 21656, 5273, 7081, 2985, + 21656, 5273, 7081, 2985 + ); + #[rustfmt::skip] + let c = u16x8::new( + 3, 7, 9, 13, + 15, 17, 21, 23 + ); + #[rustfmt::skip] + let r = u16x8::new( + 32760, 16537, 9129, 2985, + 21656, 16385, 8233, 4265 + ); + + assert_eq!( + r, + mem::transmute(__msa_binsr_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsr_w() { + #[rustfmt::skip] + let a = u32x4::new(2147483647, 536870912, 67108864, 8388608); + #[rustfmt::skip] + let b = u32x4::new(1036372536, 259093134, 78219975, 1119499719); + #[rustfmt::skip] + let c = u32x4::new(11, 15, 31, 37); + #[rustfmt::skip] + let r = u32x4::new(2147482168, 536900238, 78219975, 8388615); + + assert_eq!( + r, + mem::transmute(__msa_binsr_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsr_d() { + #[rustfmt::skip] + let a = u64x2::new(8006399338, 2882303762); + #[rustfmt::skip] + let b = u64x2::new(9223372036854775805, 536870912); + #[rustfmt::skip] + let c = u64x2::new(12, 48); + #[rustfmt::skip] + let r = u64x2::new(8006402045, 536870912); + + assert_eq!( + r, + mem::transmute(__msa_binsr_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsri_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 198, 135, 8, 9, + 198, 135, 8, 9, + 198, 135, 8, 9, + 198, 135, 8, 9 + ); + + assert_eq!( + r, + mem::transmute(__msa_binsri_b(mem::transmute(a), mem::transmute(b), 5)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsri_h() { + #[rustfmt::skip] + let a = u16x8::new( + 32767, 16384, 8192, 4096, + 32767, 16384, 8192, 4096 + ); + #[rustfmt::skip] + let b = u16x8::new( + 21656, 5273, 7081, 2985, + 21656, 5273, 7081, 2985 + ); + #[rustfmt::skip] + let r = u16x8::new( + 21656, 21657, 7081, 2985, + 21656, 21657, 7081, 2985 + ); + + assert_eq!( + r, + mem::transmute(__msa_binsri_h(mem::transmute(a), mem::transmute(b), 13)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsri_w() { + #[rustfmt::skip] + let a = u32x4::new(2147483647, 536870912, 67108864, 8388608); + #[rustfmt::skip] + let b = u32x4::new(1036372536, 259093134, 78219975, 1119499719); + #[rustfmt::skip] + let r = u32x4::new(2147338808, 536965774, 67209927, 8533447); + + assert_eq!( + r, + mem::transmute(__msa_binsri_w(mem::transmute(a), mem::transmute(b), 17)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_binsri_d() { + #[rustfmt::skip] + let a = u64x2::new(8006399338, 2882303762); + #[rustfmt::skip] + let b = u64x2::new(9223372036854775805, 536870912); + #[rustfmt::skip] + let r = u64x2::new(562949953421309, 536870912); + + assert_eq!( + r, + mem::transmute(__msa_binsri_d(mem::transmute(a), mem::transmute(b), 48)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bmnz_v() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + ); + #[rustfmt::skip] + let c = u8x16::new( + 3, 5, 7, 1, + 3, 5, 7, 1, + 3, 5, 7, 1, + 3, 5, 7, 1 + ); + #[rustfmt::skip] + let r = u8x16::new( + 254, 159, 48, 1, + 254, 159, 48, 1, + 254, 159, 48, 1, + 254, 159, 48, 1 + ); + + assert_eq!( + r, + mem::transmute(__msa_bmnz_v( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bmnzi_b() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 1, u8::MAX, 155, 55, + 1, u8::MAX, 155, 55, + 1, u8::MAX, 155, 55, + 1, u8::MAX, 155, 55 + ); + #[rustfmt::skip] + let r = u8x16::new( + 249, 159, 51, 7, + 249, 159, 51, 7, + 249, 159, 51, 7, + 249, 159, 51, 7 + ); + + assert_eq!( + r, + mem::transmute(__msa_bmnzi_b(mem::transmute(a), mem::transmute(b), 7)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bmz_v() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let c = u8x16::new( + 3, 5, 7, 1, + 3, 5, 7, 1, + 3, 5, 7, 1, + 3, 5, 7, 1 + ); + #[rustfmt::skip] + let r = u8x16::new( + 7, 3, 15, 9, + 7, 3, 15, 9, + 7, 3, 15, 9, + 7, 3, 15, 9 + ); + + assert_eq!( + r, + mem::transmute(__msa_bmz_v( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bmzi_b() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1, + u8::MAX, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 1, 255, 155, 55, + 1, 255, 155, 55, + 1, 255, 155, 55, + 1, 255, 155, 55 + ); + #[rustfmt::skip] + let r = u8x16::new( + 7, 251, 159, 49, + 7, 251, 159, 49, + 7, 251, 159, 49, + 7, 251, 159, 49 + ); + + assert_eq!( + r, + mem::transmute(__msa_bmzi_b(mem::transmute(a), mem::transmute(b), 7)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bneg_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 191, 27, 54, 3, + 191, 27, 54, 3, + 191, 27, 54, 3, + 191, 27, 54, 3 + ); + + assert_eq!( + r, + mem::transmute(__msa_bneg_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bneg_h() { + #[rustfmt::skip] + let a = u16x8::new(255, 155, 55, 1, 255, 155, 55, 1); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u16x8::new(191, 27, 311, 513, 191, 27, 311, 513); + + assert_eq!( + r, + mem::transmute(__msa_bneg_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bneg_w() { + #[rustfmt::skip] + let a = u32x4::new(255, 155, 55, 1); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(191, 27, 311, 513); + + assert_eq!( + r, + mem::transmute(__msa_bneg_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bneg_d() { + #[rustfmt::skip] + let a = u64x2::new(255, 155); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = u64x2::new(191, 27); + + assert_eq!( + r, + mem::transmute(__msa_bneg_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bnegi_b() { + #[rustfmt::skip] + let a = u8x16::new( + 50, 100, 127, u8::MAX, + 50, 100, 127, u8::MAX, + 50, 100, 127, u8::MAX, + 50, 100, 127, u8::MAX + ); + #[rustfmt::skip] + let r = u8x16::new( + 34, 116, 111, 239, + 34, 116, 111, 239, + 34, 116, 111, 239, + 34, 116, 111, 239 + ); + + assert_eq!(r, mem::transmute(__msa_bnegi_b(mem::transmute(a), 4))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bnegi_h() { + #[rustfmt::skip] + let a = u16x8::new( + 32767, 3276, 100, 127, + 32767, 3276, 100, 127 + ); + #[rustfmt::skip] + let r = u16x8::new( + 30719, 1228, 2148, 2175, + 30719, 1228, 2148, 2175 + ); + + assert_eq!(r, mem::transmute(__msa_bnegi_h(mem::transmute(a), 11))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bnegi_w() { + #[rustfmt::skip] + let a = u32x4::new(100, 2147483647, 100, 2147483648); + #[rustfmt::skip] + let r = u32x4::new(16777316, 2130706431, 16777316, 2164260864); + + assert_eq!(r, mem::transmute(__msa_bnegi_w(mem::transmute(a), 24))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bnegi_d() { + #[rustfmt::skip] + let a = u64x2::new(100, 9223372036854775808); + #[rustfmt::skip] + let r = u64x2::new(4398046511204, 9223376434901286912); + + assert_eq!(r, mem::transmute(__msa_bnegi_d(mem::transmute(a), 42))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bnz_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 1, 1, 1, + 1, 1, 1, 1, + 2, 2, 2, 2, + 4, 4, 0, 4, + ); + let r = 0 as i32; + + assert_eq!(r, mem::transmute(__msa_bnz_b(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bnz_h() { + #[rustfmt::skip] + let a = u16x8::new( + 32767, 3276, 100, 127, + 32767, 0, 100, 127 + ); + let r = 0 as i32; + + assert_eq!(r, mem::transmute(__msa_bnz_h(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bnz_w() { + #[rustfmt::skip] + let a = u32x4::new(100, 2147483647, 0, 2147483648); + let r = 0 as i32; + + assert_eq!(r, mem::transmute(__msa_bnz_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bnz_d() { + #[rustfmt::skip] + let a = u64x2::new(100, 9223372036854775808); + #[rustfmt::skip] + let r = 1 as i32; + + assert_eq!(r, mem::transmute(__msa_bnz_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bnz_v() { + #[rustfmt::skip] + let a = u8x16::new( + 0, 0, 0, 1, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + let r = 1 as i32; + + assert_eq!(r, mem::transmute(__msa_bnz_v(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bsel_v() { + #[rustfmt::skip] + let a = u8x16::new( + 3, 5, 7, 1, + 3, 5, 7, 1, + 3, 5, 7, 1, + 3, 5, 7, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let c = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let r = u8x16::new( + 7, 3, 15, 9, + 7, 3, 15, 9, + 7, 3, 15, 9, + 7, 3, 15, 9 + ); + + assert_eq!( + r, + mem::transmute(__msa_bsel_v( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bseli_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 121, 29, 57, 9, + 121, 29, 57, 9, + 121, 29, 57, 9, + 121, 29, 57, 9 + ); + + assert_eq!( + r, + mem::transmute(__msa_bseli_b(mem::transmute(a), mem::transmute(b), 121)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bset_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 255, 155, 55, 3, + 255, 155, 55, 3, + 255, 155, 55, 3, + 255, 155, 55, 3 + ); + + assert_eq!( + r, + mem::transmute(__msa_bset_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bset_h() { + #[rustfmt::skip] + let a = u16x8::new(255, 155, 55, 1, 255, 155, 55, 1); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u16x8::new(255, 155, 311, 513, 255, 155, 311, 513); + + assert_eq!( + r, + mem::transmute(__msa_bset_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bset_w() { + #[rustfmt::skip] + let a = u32x4::new(255, 155, 55, 1); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(255, 155, 311, 513); + + assert_eq!( + r, + mem::transmute(__msa_bset_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bset_d() { + #[rustfmt::skip] + let a = u64x2::new(255, 155); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = u64x2::new(255, 155); + + assert_eq!( + r, + mem::transmute(__msa_bset_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bseti_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + #[rustfmt::skip] + let r = u8x16::new( + 255, 159, 55, 5, + 255, 159, 55, 5, + 255, 159, 55, 5, + 255, 159, 55, 5 + ); + + assert_eq!(r, mem::transmute(__msa_bseti_b(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bseti_h() { + #[rustfmt::skip] + let a = u16x8::new(255, 155, 55, 1, 255, 155, 55, 1); + #[rustfmt::skip] + let r = u16x8::new(255, 159, 55, 5, 255, 159, 55, 5); + + assert_eq!(r, mem::transmute(__msa_bseti_h(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bseti_w() { + #[rustfmt::skip] + let a = u32x4::new(255, 155, 55, 1); + #[rustfmt::skip] + let r = u32x4::new(255, 159, 55, 5); + + assert_eq!(r, mem::transmute(__msa_bseti_w(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bseti_d() { + #[rustfmt::skip] + let a = u64x2::new(255, 155); + #[rustfmt::skip] + let r = u64x2::new(255, 159); + + assert_eq!(r, mem::transmute(__msa_bseti_d(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bz_b() { + #[rustfmt::skip] + let a = u8x16::new( + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1, + 255, 155, 55, 1 + ); + let r = 0 as i32; + + assert_eq!(r, mem::transmute(__msa_bz_b(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bz_h() { + #[rustfmt::skip] + let a = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r = 1 as i32; + + assert_eq!(r, mem::transmute(__msa_bz_h(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bz_w() { + #[rustfmt::skip] + let a = u32x4::new(255, 0, 55, 1); + let r = 1 as i32; + + assert_eq!(r, mem::transmute(__msa_bz_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bz_d() { + #[rustfmt::skip] + let a = u64x2::new(255, 0); + let r = 1 as i32; + + assert_eq!(r, mem::transmute(__msa_bz_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_bz_v() { + #[rustfmt::skip] + let a = u8x16::new( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 + ); + let r = 1 as i32; + + assert_eq!(r, mem::transmute(__msa_bz_v(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ceq_b() { + #[rustfmt::skip] + let a = i8x16::new( + -128, 127, 55, 1, + -128, 127, 55, 1, + -128, 127, 55, 1, + -128, 127, 55, 1 + ); + #[rustfmt::skip] + let b = i8x16::new( + -128, 126, 55, 1, + -128, 126, 55, 1, + -128, 126, 55, 1, + -128, 126, 55, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + -1, 0, -1, -1, + -1, 0, -1, -1, + -1, 0, -1, -1, + -1, 0, -1, -1 + ); + + assert_eq!( + r, + mem::transmute(__msa_ceq_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ceq_h() { + #[rustfmt::skip] + let a = i16x8::new(255, 155, 55, 1, 255, 155, 55, 1); + #[rustfmt::skip] + let b = i16x8::new(255, 155, 56, 1, 255, 155, 56, 1); + #[rustfmt::skip] + let r = i16x8::new(-1, -1, 0, -1, -1, -1, 0, -1); + + assert_eq!( + r, + mem::transmute(__msa_ceq_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ceq_w() { + #[rustfmt::skip] + let a = i32x4::new(255, 155, 55, 1); + #[rustfmt::skip] + let b = i32x4::new(255, 156, 55, 1); + #[rustfmt::skip] + let r = i32x4::new(-1, 0, -1, -1); + + assert_eq!( + r, + mem::transmute(__msa_ceq_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ceq_d() { + #[rustfmt::skip] + let a = i64x2::new(255, 155); + #[rustfmt::skip] + let b = i64x2::new(255, 156); + #[rustfmt::skip] + let r = i64x2::new(-1, 0); + + assert_eq!( + r, + mem::transmute(__msa_ceq_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ceqi_b() { + #[rustfmt::skip] + let a = i8x16::new( + 100, -1, -4, 15, + 100, -1, -4, 15, + 100, -1, -4, 15, + 100, -1, -4, 15 + ); + #[rustfmt::skip] + let r = i8x16::new( + 0, 0, -1, 0, + 0, 0, -1, 0, + 0, 0, -1, 0, + 0, 0, -1, 0 + ); + + assert_eq!(r, mem::transmute(__msa_ceqi_b(mem::transmute(a), -4))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ceqi_h() { + #[rustfmt::skip] + let a = i16x8::new( + 32767, 3276, 100, -11, + 32767, 3276, 100, -11 + ); + #[rustfmt::skip] + let r = i16x8::new(0, 0, 0, -1, 0, 0, 0, -1); + + assert_eq!(r, mem::transmute(__msa_ceqi_h(mem::transmute(a), -11))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ceqi_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 3, 5, -3); + #[rustfmt::skip] + let r = i32x4::new(0, 0, -1, 0); + + assert_eq!(r, mem::transmute(__msa_ceqi_w(mem::transmute(a), 5))); + } + + // FIXME: https://reviews.llvm.org/D59884 + // If target type is i64, negative immediate loses the sign + // Test passes if 4294967293 is used instead -3 in vector `a` + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_ceqi_d() { + // #[rustfmt::skip] + // let a = i64x2::new(-3, 2); + // #[rustfmt::skip] + // let r = i64x2::new(-1, 0); + + // assert_eq!(r, mem::transmute(__msa_ceqi_d(mem::transmute(a), -3))); + // } + + // Can not be tested in user mode + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_cfcmsa() { + // let r = 5; + + // assert_eq!(r, mem::transmute(__msa_cfcmsa(5)); + // } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_cle_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + -128, 127, 55, 2, + -128, 127, 55, 2, + -128, 127, 55, 2, + -128, 127, 55, 2 + ); + #[rustfmt::skip] + let b = i8x16::new( + -128, 126, 55, 1, + -128, 126, 55, 1, + -128, 126, 55, 1, + -128, 126, 55, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + -1, 0, -1, 0, + -1, 0, -1, 0, + -1, 0, -1, 0, + -1, 0, -1, 0 + ); + + assert_eq!( + r, + mem::transmute(__msa_cle_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_cle_s_h() { + #[rustfmt::skip] + let a = i16x8::new(255, 155, 55, 2, 255, 155, 55, 2); + #[rustfmt::skip] + let b = i16x8::new(255, 155, 56, 1, 255, 155, 56, 1); + #[rustfmt::skip] + let r = i16x8::new(-1, -1, -1, 0, -1, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_cle_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_cle_s_w() { + #[rustfmt::skip] + let a = i32x4::new(255, 155, 55, 2); + #[rustfmt::skip] + let b = i32x4::new(255, 156, 55, 1); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_cle_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_cle_s_d() { + #[rustfmt::skip] + let a = i64x2::new(255, 155); + #[rustfmt::skip] + let b = i64x2::new(255, 156); + #[rustfmt::skip] + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + mem::transmute(__msa_cle_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_cle_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 127, 55, 2, + u8::MAX, 127, 55, 2, + u8::MAX, 127, 55, 2, + u8::MAX, 127, 55, 2 + ); + #[rustfmt::skip] + let b = u8x16::new( + u8::MAX, 126, 55, 1, + u8::MAX, 126, 55, 1, + u8::MAX, 126, 55, 1, + u8::MAX, 126, 55, 1 + ); + #[rustfmt::skip] + let r = i8x16::new(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_cle_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_cle_u_h() { + #[rustfmt::skip] + let a = u16x8::new( + u16::MAX, 155, 55, 2, + u16::MAX, 155, 55, 2 + ); + #[rustfmt::skip] + let b = u16x8::new( + u16::MAX, 155, 56, 1, + u16::MAX, 155, 56, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(-1, -1, -1, 0, -1, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_cle_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_cle_u_w() { + #[rustfmt::skip] + let a = u32x4::new(u32::MAX, 155, 55, 2); + #[rustfmt::skip] + let b = u32x4::new(u32::MAX, 156, 55, 1); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_cle_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_cle_u_d() { + #[rustfmt::skip] + let a = u64x2::new(u64::MAX, 155); + #[rustfmt::skip] + let b = u64x2::new(u64::MAX, 156); + #[rustfmt::skip] + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + mem::transmute(__msa_cle_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clei_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + -2, -127, 100, -127, + -2, -127, 100, -127, + -2, -127, 100, -127, + -2, -127, 100, -127 + ); + #[rustfmt::skip] + let r = i8x16::new(-1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1); + + assert_eq!(r, mem::transmute(__msa_clei_s_b(mem::transmute(a), -2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clei_s_h() { + #[rustfmt::skip] + let a = i16x8::new( + 32767, 3276, 10, -1, + 32767, 3276, 10, -1, + ); + #[rustfmt::skip] + let r = i16x8::new(0, 0, 0, -1, 0, 0, 0, -1); + + assert_eq!(r, mem::transmute(__msa_clei_s_h(mem::transmute(a), -1))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clei_s_w() { + #[rustfmt::skip] + let a = i32x4::new(100, 2147483647, 6, 2147483647); + #[rustfmt::skip] + let r = i32x4::new(0, 0, -1, 0); + + assert_eq!(r, mem::transmute(__msa_clei_s_w(mem::transmute(a), 6))); + } + + // FIXME: https://reviews.llvm.org/D59884 + // If target type is i64, negative immediate loses the sign + // -3 is represented as 4294967293 + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_clei_s_d() { + // #[rustfmt::skip] + // let a = i64x2::new(-3, 11); + // #[rustfmt::skip] + // let r = i64x2::new(-1, 0); + + // assert_eq!(r, mem::transmute(__msa_clei_s_d(mem::transmute(a), -3))); + // } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clei_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 2, 127, 100, 127, + 2, 127, 100, 127, + 2, 127, 100, 127, + 2, 127, 100, 127, + ); + #[rustfmt::skip] + let r = i8x16::new( + -1, 0, 0, 0, + -1, 0, 0, 0, + -1, 0, 0, 0, + -1, 0, 0, 0 + ); + + assert_eq!(r, mem::transmute(__msa_clei_u_b(mem::transmute(a), 25))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clei_u_h() { + #[rustfmt::skip] + let a = u16x8::new( + 1, 26, 15, 36, + 1, 26, 15, 36 + ); + #[rustfmt::skip] + let r = i16x8::new(-1, 0, -1, 0, -1, 0, -1, 0); + + assert_eq!(r, mem::transmute(__msa_clei_u_h(mem::transmute(a), 25))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clei_u_w() { + #[rustfmt::skip] + let a = u32x4::new(25, 32, 25, 32); + #[rustfmt::skip] + let r = i32x4::new(-1, 0, -1, 0); + + assert_eq!(r, mem::transmute(__msa_clei_u_w(mem::transmute(a), 31))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clei_u_d() { + #[rustfmt::skip] + let a = u64x2::new(10, 26); + #[rustfmt::skip] + let r = i64x2::new(-1, 0); + + assert_eq!(r, mem::transmute(__msa_clei_u_d(mem::transmute(a), 25))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clt_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + -128, 127, 55, 2, + -128, 127, 55, 2, + -128, 127, 55, 2, + -128, 127, 55, 2 + ); + #[rustfmt::skip] + let b = i8x16::new( + -127, 126, 56, 1, + -127, 126, 56, 1, + -127, 126, 56, 1, + -127, 126, 56, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + -1, 0, -1, 0, + -1, 0, -1, 0, + -1, 0, -1, 0, + -1, 0, -1, 0 + ); + + assert_eq!( + r, + mem::transmute(__msa_clt_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clt_s_h() { + #[rustfmt::skip] + let a = i16x8::new(-255, 155, 55, 2, -255, 155, 55, 2); + #[rustfmt::skip] + let b = i16x8::new(255, 156, 56, 1, 255, 156, 56, 1); + #[rustfmt::skip] + let r = i16x8::new(-1, -1, -1, 0, -1, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_clt_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clt_s_w() { + #[rustfmt::skip] + let a = i32x4::new(-255, 155, 55, 2); + #[rustfmt::skip] + let b = i32x4::new(255, 156, 55, 1); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_clt_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clt_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-255, 155); + #[rustfmt::skip] + let b = i64x2::new(255, 156); + #[rustfmt::skip] + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + mem::transmute(__msa_clt_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clt_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 128, 127, 55, 2, + 128, 127, 55, 2, + 128, 127, 55, 2, + 128, 127, 55, 2 + ); + #[rustfmt::skip] + let b = u8x16::new( + 127, 126, 56, 1, + 127, 126, 56, 1, + 127, 126, 56, 1, + 127, 126, 56, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 0, 0, -1, 0, + 0, 0, -1, 0, + 0, 0, -1, 0, + 0, 0, -1, 0 + ); + + assert_eq!( + r, + mem::transmute(__msa_clt_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clt_u_h() { + #[rustfmt::skip] + let a = u16x8::new(255, 155, 55, 2, 255, 155, 55, 2); + #[rustfmt::skip] + let b = u16x8::new(255, 156, 56, 1, 255, 156, 56, 1); + #[rustfmt::skip] + let r = i16x8::new(0, -1, -1, 0, 0, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_clt_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clt_u_w() { + #[rustfmt::skip] + let a = u32x4::new(255, 155, 55, 2); + #[rustfmt::skip] + let b = u32x4::new(255, 156, 55, 1); + #[rustfmt::skip] + let r = i32x4::new(0, -1, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_clt_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clt_u_d() { + #[rustfmt::skip] + let a = u64x2::new(255, 155); + #[rustfmt::skip] + let b = u64x2::new(255, 156); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_clt_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clti_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + 2, -127, -5, 127, + 2, -127, -5, 127, + 2, -127, -5, 127, + 2, -127, -5, 127 + ); + #[rustfmt::skip] + let r = i8x16::new( + 0, -1, 0, 0, + 0, -1, 0, 0, + 0, -1, 0, 0, + 0, -1, 0, 0 + ); + + assert_eq!(r, mem::transmute(__msa_clti_s_b(mem::transmute(a), -5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clti_s_h() { + #[rustfmt::skip] + let a = i16x8::new( + -1024, 3276, 15, 127, + -1024, 3276, 15, 127 + ); + #[rustfmt::skip] + let r = i16x8::new(-1, 0, 0, 0, -1, 0, 0, 0); + + assert_eq!(r, mem::transmute(__msa_clti_s_h(mem::transmute(a), 15))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clti_s_w() { + #[rustfmt::skip] + let a = i32x4::new(-15, 2147483647, -15, 2147483647); + #[rustfmt::skip] + let r = i32x4::new(-1, 0, -1, 0); + + assert_eq!(r, mem::transmute(__msa_clti_s_w(mem::transmute(a), -10))); + } + + // FIXME: https://reviews.llvm.org/D59884 + // If target type is i64, negative immediate loses the sign + // -3 is represented as 4294967293 + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_clti_s_d() { + // #[rustfmt::skip] + // let a = i64x2::new(-5, -2); + // #[rustfmt::skip] + // let r = i64x2::new(-1, 0); + + // assert_eq!(r, mem::transmute(__msa_clti_s_d(mem::transmute(a), -3))); + // } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clti_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 2, 127, 49, 127, + 2, 127, 49, 127, + 2, 127, 49, 127, + 2, 127, 49, 127, + ); + #[rustfmt::skip] + let r = i8x16::new( + -1, 0, 0, 0, + -1, 0, 0, 0, + -1, 0, 0, 0, + -1, 0, 0, 0 + ); + + assert_eq!(r, mem::transmute(__msa_clti_u_b(mem::transmute(a), 50))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clti_u_h() { + #[rustfmt::skip] + let a = u16x8::new( + 327, 3276, 100, 127, + 327, 3276, 100, 127 + ); + #[rustfmt::skip] + let r = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + + assert_eq!(r, mem::transmute(__msa_clti_u_h(mem::transmute(a), 30))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clti_u_w() { + #[rustfmt::skip] + let a = u32x4::new(100, 2147483647, 100, 2147483647); + #[rustfmt::skip] + let r = i32x4::new(0, 0, 0, 0); + + assert_eq!(r, mem::transmute(__msa_clti_u_w(mem::transmute(a), 10))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_clti_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 9223372036854775807); + #[rustfmt::skip] + let r = i64x2::new(-1, 0); + + assert_eq!(r, mem::transmute(__msa_clti_u_d(mem::transmute(a), 10))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_copy_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + -100, 127, 4, 127, + -100, 127, 4, 127, + -100, 127, 4, 127, + -100, 127, 4, 127 + ); + #[rustfmt::skip] + let r = -100 as i32; + + assert_eq!(r, mem::transmute(__msa_copy_s_b(mem::transmute(a), 12))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_copy_s_h() { + #[rustfmt::skip] + let a = i16x8::new( + 32767, 3276, 100, 11, + 32767, 3276, 100, 11 + ); + #[rustfmt::skip] + let r = 32767 as i32; + + assert_eq!(r, mem::transmute(__msa_copy_s_h(mem::transmute(a), 4))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_copy_s_w() { + #[rustfmt::skip] + let a = i32x4::new(100, 2147483647, 5, -2147483647); + let r = 2147483647 as i32; + + assert_eq!(r, mem::transmute(__msa_copy_s_w(mem::transmute(a), 1))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_copy_s_d() { + #[rustfmt::skip] + let a = i64x2::new(3, 9223372036854775807); + #[rustfmt::skip] + let r = 9223372036854775807 as i64; + + assert_eq!(r, mem::transmute(__msa_copy_s_d(mem::transmute(a), 1))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_copy_u_b() { + #[rustfmt::skip] + let a = i8x16::new( + 100, 127, 4, 127, + 100, 127, 4, 127, + 100, 127, 4, 127, + 100, 127, 4, 127 + ); + #[rustfmt::skip] + let r = 100 as u32; + + assert_eq!(r, mem::transmute(__msa_copy_u_b(mem::transmute(a), 12))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_copy_u_h() { + #[rustfmt::skip] + let a = i16x8::new( + 32767, 3276, 100, 11, + 32767, 3276, 100, 11 + ); + #[rustfmt::skip] + let r = 32767 as u32; + + assert_eq!(r, mem::transmute(__msa_copy_u_h(mem::transmute(a), 4))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_copy_u_w() { + #[rustfmt::skip] + let a = i32x4::new(100, 2147483647, 5, 2147483647); + #[rustfmt::skip] + let r = 2147483647 as u32; + + assert_eq!(r, mem::transmute(__msa_copy_u_w(mem::transmute(a), 1))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_copy_u_d() { + #[rustfmt::skip] + let a = i64x2::new(3, i64::MAX); + #[rustfmt::skip] + let r = 9223372036854775807 as u64; + + assert_eq!(r, mem::transmute(__msa_copy_u_d(mem::transmute(a), 1))); + } + + // Can not be tested in user mode + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_ctcmsa() { + // } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_div_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9 + ); + #[rustfmt::skip] + let b = i8x16::new( + -1, -2, -3, -4, + -1, -2, -3, -4, + -1, -2, -3, -4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let r = i8x16::new( + 6, 3, 2, 2, + 6, 3, 2, 2, + 6, 3, 2, 2, + 6, 3, 2, 2 + ); + + assert_eq!( + r, + mem::transmute(__msa_div_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_div_s_h() { + #[rustfmt::skip] + let a = i16x8::new(-6, -7, -8, -9, 6, 7, 8, 9); + #[rustfmt::skip] + let b = i16x8::new(-1, -2, -3, -4, -1, -2, -3, -4); + #[rustfmt::skip] + let r = i16x8::new(6, 3, 2, 2, -6, -3, -2, -2); + + assert_eq!( + r, + mem::transmute(__msa_div_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_div_s_w() { + #[rustfmt::skip] + let a = i32x4::new(-6, -7, 8, 9); + #[rustfmt::skip] + let b = i32x4::new(-1, -2, -3, -4); + #[rustfmt::skip] + let r = i32x4::new(6, 3, -2, -2); + + assert_eq!( + r, + mem::transmute(__msa_div_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_div_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-6, 7); + #[rustfmt::skip] + let b = i64x2::new(-1, -2); + #[rustfmt::skip] + let r = i64x2::new(6, -3); + + assert_eq!( + r, + mem::transmute(__msa_div_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_div_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let b = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let r = u8x16::new( + 6, 3, 2, 2, + 6, 3, 2, 2, + 6, 3, 2, 2, + 6, 3, 2, 2 + ); + + assert_eq!( + r, + mem::transmute(__msa_div_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_div_u_h() { + #[rustfmt::skip] + let a = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let b = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let r = u16x8::new(6, 3, 2, 2, 6, 3, 2, 2); + + assert_eq!( + r, + mem::transmute(__msa_div_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_div_u_w() { + #[rustfmt::skip] + let a = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let b = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let r = u32x4::new(6, 3, 2, 2); + + assert_eq!( + r, + mem::transmute(__msa_div_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_div_u_d() { + #[rustfmt::skip] + let a = u64x2::new(6, 7); + #[rustfmt::skip] + let b = u64x2::new(1, 2); + #[rustfmt::skip] + let r = u64x2::new(6, 3); + + assert_eq!( + r, + mem::transmute(__msa_div_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dotp_s_h() { + #[rustfmt::skip] + let a = i8x16::new( + -1, -2, -3, 4, + -1, -2, -3, -4, + -1, -2, -3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9 + ); + #[rustfmt::skip] + let r = i16x8::new(20, -12, 20, 60, 20, -12, 20, 60); + + assert_eq!( + r, + mem::transmute(__msa_dotp_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dotp_s_w() { + #[rustfmt::skip] + let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, 4); + #[rustfmt::skip] + let b = i16x8::new(-6, -7, -8, -9, -6, -7, -8, -9); + #[rustfmt::skip] + let r = i32x4::new(20, 60, 20, -12); + + assert_eq!( + r, + mem::transmute(__msa_dotp_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dotp_s_d() { + #[rustfmt::skip] + let a = i32x4::new(-1, -2, -3, 4); + #[rustfmt::skip] + let b = i32x4::new(-6, -7, -8, -9); + #[rustfmt::skip] + let r = i64x2::new(20, -12); + + assert_eq!( + r, + mem::transmute(__msa_dotp_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dotp_u_h() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u16x8::new(20, 60, 20, 60, 20, 60, 20, 60); + + assert_eq!( + r, + mem::transmute(__msa_dotp_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dotp_u_w() { + #[rustfmt::skip] + let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(20, 60, 20, 60); + + assert_eq!( + r, + mem::transmute(__msa_dotp_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dotp_u_d() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u64x2::new(20, 60); + + assert_eq!( + r, + mem::transmute(__msa_dotp_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpadd_s_h() { + #[rustfmt::skip] + let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, 4); + #[rustfmt::skip] + let b = i8x16::new( + -1, -2, -3, 4, + -1, -2, -3, -4, + -1, -2, -3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let c = i8x16::new( + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9 + ); + #[rustfmt::skip] + let r = i16x8::new(19, -14, 17, 56, 19, -14, 17, 64); + + assert_eq!( + r, + mem::transmute(__msa_dpadd_s_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpadd_s_w() { + #[rustfmt::skip] + let a = i32x4::new(-1, -2, -3, -4); + #[rustfmt::skip] + let b = i16x8::new( + -1, -2, -3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let c = i16x8::new( + -6, -7, -8, -9, + -6, -7, -8, -9 + ); + #[rustfmt::skip] + let r = i32x4::new(19, -14, 17, 56); + + assert_eq!( + r, + mem::transmute(__msa_dpadd_s_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpadd_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, -2); + #[rustfmt::skip] + let b = i32x4::new(-1, -2, -3, 4); + #[rustfmt::skip] + let c = i32x4::new(-6, -7, -8, -9); + #[rustfmt::skip] + let r = i64x2::new(19, -14); + + assert_eq!( + r, + mem::transmute(__msa_dpadd_s_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpadd_u_h() { + #[rustfmt::skip] + let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let c = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u16x8::new(21, 62, 23, 64, 21, 62, 23, 64); + + assert_eq!( + r, + mem::transmute(__msa_dpadd_u_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpadd_u_w() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = u16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let c = u16x8::new( + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u32x4::new(21, 62, 23, 64); + + assert_eq!( + r, + mem::transmute(__msa_dpadd_u_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpadd_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 2); + #[rustfmt::skip] + let b = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let c = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u64x2::new(21, 62); + + assert_eq!( + r, + mem::transmute(__msa_dpadd_u_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpsub_s_h() { + #[rustfmt::skip] + let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, 4); + #[rustfmt::skip] + let b = i8x16::new( + -1, -2, -3, 4, + -1, -2, -3, -4, + -1, -2, -3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let c = i8x16::new( + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9 + ); + #[rustfmt::skip] + let r = i16x8::new(-21, 10, -23, -64, -21, 10, -23, -56); + + assert_eq!( + r, + mem::transmute(__msa_dpsub_s_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpsub_s_w() { + #[rustfmt::skip] + let a = i32x4::new(-1, -2, -3, -4); + #[rustfmt::skip] + let b = i16x8::new( + -1, -2, -3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let c = i16x8::new( + -6, -7, -8, -9, + -6, -7, -8, -9 + ); + #[rustfmt::skip] + let r = i32x4::new(-21, 10, -23, -64); + + assert_eq!( + r, + mem::transmute(__msa_dpsub_s_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpsub_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, -2); + #[rustfmt::skip] + let b = i32x4::new(-1, -2, -3, 4); + #[rustfmt::skip] + let c = i32x4::new(-6, -7, -8, -9); + #[rustfmt::skip] + let r = i64x2::new(-21, 10); + + assert_eq!( + r, + mem::transmute(__msa_dpsub_s_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpsub_u_h() { + #[rustfmt::skip] + let a = i16x8::new(1, -2, 3, -4, -1, 2,-3, 4); + #[rustfmt::skip] + let b = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let c = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = i16x8::new(-19, -62, -17, -64, -21, -58, -23, -56); + + assert_eq!( + r, + mem::transmute(__msa_dpsub_u_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpsub_u_w() { + #[rustfmt::skip] + let a = i32x4::new(1, -2, 3, -4); + #[rustfmt::skip] + let b = u16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let c = u16x8::new( + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = i32x4::new(-19, -62, -17, -64); + + assert_eq!( + r, + mem::transmute(__msa_dpsub_u_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_dpsub_u_d() { + #[rustfmt::skip] + let a = i64x2::new(1, -2); + #[rustfmt::skip] + let b = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let c = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = i64x2::new(-19, -62); + + assert_eq!( + r, + mem::transmute(__msa_dpsub_u_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fadd_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, -4.4); + #[rustfmt::skip] + let b = f32x4::new(4.4, -3.3, 2.2, -1.1); + #[rustfmt::skip] + let r = f32x4::new(5.5, -5.5, 5.5, -5.5); + + assert_eq!( + r, + mem::transmute(__msa_fadd_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fadd_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, -2.2); + #[rustfmt::skip] + let b = f64x2::new(4.4, -3.3); + #[rustfmt::skip] + let r = f64x2::new(5.5, -5.5); + + assert_eq!( + r, + mem::transmute(__msa_fadd_d(mem::transmute(a), mem::transmute(b))) + ); + } + + // Only observed beahiour should be SIGFPE signal + // Can not be tested + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcaf_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, -4.4); + #[rustfmt::skip] + let b = f32x4::new(0.0, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(0, 0, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_fcaf_w(mem::transmute(a), mem::transmute(b))) + ); + } + + // Only observed beahiour should be SIGFPE signal + // Can not be tested + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcaf_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, -2.2); + #[rustfmt::skip] + let b = f64x2::new(-2.2, 1.1); + #[rustfmt::skip] + let r = i64x2::new(0, 0); + + assert_eq!( + r, + mem::transmute(__msa_fcaf_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fceq_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(-4.4, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(0, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fceq_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fceq_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, -2.2); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(-1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fceq_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fclass_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(128, 8, 128, 2); + + assert_eq!(r, mem::transmute(__msa_fclass_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fclass_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, -2.2); + #[rustfmt::skip] + let r = i64x2::new(128, 8); + + assert_eq!(r, mem::transmute(__msa_fclass_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcle_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(-4.4, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(0, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fcle_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcle_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, -2.2); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcle_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fclt_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(-4.4, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(0, -1, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_fclt_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fclt_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, -2.2); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fclt_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcne_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(-4.4, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_fcne_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcne_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, -2.2); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcne_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcor_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(0, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fcor_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcor_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, f64::NAN); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(-1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fcor_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcueq_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(-1, 0, -1, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcueq_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcueq_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, f64::NAN); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcueq_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcule_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, -1, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcule_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcule_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, f64::NAN); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcule_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcult_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, 0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcult_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcult_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, f64::NAN); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcult_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcun_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(-1, 0, 0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcun_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcun_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, f64::NAN); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcun_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcune_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, 0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcune_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fcune_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, f64::NAN); + #[rustfmt::skip] + let b = f64x2::new(1.1, 1.1); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fcune_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fdiv_w() { + #[rustfmt::skip] + let a = f32x4::new(5.25, -20.2, 333.333, -425.0); + #[rustfmt::skip] + let b = f32x4::new(4.0, -2.1, 11.11, 8.2); + #[rustfmt::skip] + let r = f32x4::new(1.3125, 9.619048, 30.002972, -51.82927); + + assert_eq!( + r, + mem::transmute(__msa_fdiv_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fdiv_d() { + #[rustfmt::skip] + let a = f64x2::new(1111.11, -222222.2); + #[rustfmt::skip] + let b = f64x2::new(-4.85, 3.33); + #[rustfmt::skip] + let r = f64x2::new(-229.09484536082473, -66733.3933933934); + + assert_eq!( + r, + mem::transmute(__msa_fdiv_d(mem::transmute(a), mem::transmute(b))) + ); + } + + /*// FIXME: 16-bit floats + #[simd_test(enable = "msa")] + unsafe fn test_msa_fexdo_h() { + #[rustfmt::skip] + let a = f32x4::new(20.5, 2.3, 4.5, 5.4); + #[rustfmt::skip] + let b = f32x4::new(1.1, 1.0, 1.0, 1.0); + let r = i16x8::new(1, 9, 30, 51, 1, 9, 30, 51); + + assert_eq!(r, mem::transmute(__msa_fexdo_h(mem::transmute(a), mem::transmute(b)))); + }*/ + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fexdo_w() { + #[rustfmt::skip] + let a = f64x2::new(2000005.5, 2.3); + #[rustfmt::skip] + let b = f64x2::new(1235689784512.1, 2147483649998.5); + #[rustfmt::skip] + let r = f32x4::new( + 1235689800000.0, 2147483600000.0, + 2000005.5, 2.3 + ); + + assert_eq!( + r, + mem::transmute(__msa_fexdo_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fexp2_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, -4.4); + #[rustfmt::skip] + let b = i32x4::new(4, -3, 2, 1); + #[rustfmt::skip] + let r = f32x4::new(17.6, -0.275, 13.2, -8.8); + + assert_eq!( + r, + mem::transmute(__msa_fexp2_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fexp2_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, -2.2); + #[rustfmt::skip] + let b = i64x2::new(-4, 3); + #[rustfmt::skip] + let r = f64x2::new(0.06875, -17.6); + + assert_eq!( + r, + mem::transmute(__msa_fexp2_d(mem::transmute(a), mem::transmute(b))) + ); + } + + // FIXME: 16-bit floats + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_fexupl_w() { + // #[rustfmt::skip] + // let a = f16x8(1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5); + // #[rustfmt::skip] + // let r = f32x4::new(5.5, 6.5, 7.5, 8.5); + + // assert_eq!(r, mem::transmute(__msa_fexupl_w(mem::transmute(a)))); + // } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fexupl_d() { + #[rustfmt::skip] + let a = f32x4::new(5.5, 6.5, 7.5, 8.5); + #[rustfmt::skip] + let r = f64x2::new(7.5, 8.5); + + assert_eq!(r, mem::transmute(__msa_fexupl_d(mem::transmute(a)))); + } + + // FIXME: 16-bit floats + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_fexupr_w() { + // #[rustfmt::skip] + // let a = f16x8(1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5); + // #[rustfmt::skip] + // let r = f32x4::new(1.5, 2.5, 3.5, 4.5); + + // assert_eq!(r, mem::transmute(__msa_fexupr_w(mem::transmute(a)))); + // } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fexupr_d() { + #[rustfmt::skip] + let a = f32x4::new(5.5, 6.5, 7.5, 8.5); + #[rustfmt::skip] + let r = f64x2::new(5.5, 6.5); + + assert_eq!(r, mem::transmute(__msa_fexupr_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ffint_s_w() { + #[rustfmt::skip] + let a = i32x4::new(-1, 2, -3, 4); + #[rustfmt::skip] + let r = f32x4::new(-1.0, 2.0, -3.0, 4.0); + + assert_eq!(r, mem::transmute(__msa_ffint_s_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ffint_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, 2); + #[rustfmt::skip] + let r = f64x2::new(-1.0, 2.0); + + assert_eq!(r, mem::transmute(__msa_ffint_s_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ffint_u_w() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let r = f32x4::new(1.0, 2.0, 3.0, 4.0); + + assert_eq!(r, mem::transmute(__msa_ffint_u_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ffint_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 2); + #[rustfmt::skip] + let r = f64x2::new(1.0, 2.0); + + assert_eq!(r, mem::transmute(__msa_ffint_u_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ffql_w() { + #[rustfmt::skip] + let a = i16x8::new(11, 25, 33, 47, 11, 25, 33, 47); + #[rustfmt::skip] + let r = f32x4::new( + 0.00033569336, 0.00076293945, + 0.0010070801, 0.0014343262 + ); + + assert_eq!(r, mem::transmute(__msa_ffql_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ffql_d() { + #[rustfmt::skip] + let a = i32x4::new(1111, 2222, 3333, 4444); + #[rustfmt::skip] + let r = f64x2::new( + 0.000001552049070596695, + 0.0000020693987607955933 + ); + + assert_eq!(r, mem::transmute(__msa_ffql_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ffqr_w() { + #[rustfmt::skip] + let a = i16x8::new(12, 26, 34, 48, 11, 25, 33, 47); + #[rustfmt::skip] + let r = f32x4::new( + 0.00036621094, 0.00079345703, + 0.0010375977, 0.0014648438 + ); + + assert_eq!(r, mem::transmute(__msa_ffqr_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ffqr_d() { + #[rustfmt::skip] + let a = i32x4::new(1111, 2555, 3333, 475); + #[rustfmt::skip] + let r = f64x2::new( + 0.0000005173496901988983, + 0.0000011897645890712738 + ); + + assert_eq!(r, mem::transmute(__msa_ffqr_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fill_b() { + #[rustfmt::skip] + let r = i8x16::new( + 2, 2, 2, 2, + 2, 2, 2, 2, + 2, 2, 2, 2, + 2, 2, 2, 2 + ); + + assert_eq!(r, mem::transmute(__msa_fill_b(2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fill_h() { + #[rustfmt::skip] + let r = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + + assert_eq!(r, mem::transmute(__msa_fill_h(2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fill_w() { + #[rustfmt::skip] + let r = i32x4::new(2, 2, 2, 2); + + assert_eq!(r, mem::transmute(__msa_fill_w(2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fill_d() { + #[rustfmt::skip] + let r = i64x2::new(2, 2); + + assert_eq!(r, mem::transmute(__msa_fill_d(2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_flog2_w() { + #[rustfmt::skip] + let a = f32x4::new(8.0, 16.0, 32.0, 64.0); + #[rustfmt::skip] + let r = f32x4::new(3.0, 4.0, 5.0, 6.0); + + assert_eq!(r, mem::transmute(__msa_flog2_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_flog2_d() { + #[rustfmt::skip] + let a = f64x2::new(8.0, 16.0); + #[rustfmt::skip] + let r = f64x2::new(3.0, 4.0); + + assert_eq!(r, mem::transmute(__msa_flog2_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmadd_w() { + #[rustfmt::skip] + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + #[rustfmt::skip] + let b = f32x4::new(5.0, 6.0, 7.0, 8.0); + #[rustfmt::skip] + let c = f32x4::new(9.0, 10.0, 11.0, 12.0); + #[rustfmt::skip] + let r = f32x4::new(46.0, 62.0, 80.0, 100.0); + + assert_eq!( + r, + mem::transmute(__msa_fmadd_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmadd_d() { + #[rustfmt::skip] + let a = f64x2::new(1.0, 2.0); + #[rustfmt::skip] + let b = f64x2::new(3.0, 4.0); + #[rustfmt::skip] + let c = f64x2::new(5.0, 6.0); + #[rustfmt::skip] + let r = f64x2::new(16.0, 26.0); + + assert_eq!( + r, + mem::transmute(__msa_fmadd_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmax_w() { + #[rustfmt::skip] + let a = f32x4::new(1.0, -6.0, 7.0, 8.0); + #[rustfmt::skip] + let b = f32x4::new(5.0, -2.0, 3.0, 4.0); + #[rustfmt::skip] + let r = f32x4::new(5.0, -2.0, 7.0, 8.0); + + assert_eq!( + r, + mem::transmute(__msa_fmax_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmax_d() { + #[rustfmt::skip] + let a = f64x2::new(1.0, 4.0); + #[rustfmt::skip] + let b = f64x2::new(3.0, 2.0); + #[rustfmt::skip] + let r = f64x2::new(3.0, 4.0); + + assert_eq!( + r, + mem::transmute(__msa_fmax_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmax_a_w() { + #[rustfmt::skip] + let a = f32x4::new(1.0, -6.0, -7.0, -8.0); + #[rustfmt::skip] + let b = f32x4::new(5.0, -2.0, 3.0, 4.0); + #[rustfmt::skip] + let r = f32x4::new(5.0, -6.0, -7.0, -8.0); + + assert_eq!( + r, + mem::transmute(__msa_fmax_a_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmax_a_d() { + #[rustfmt::skip] + let a = f64x2::new(1.0, -4.0); + #[rustfmt::skip] + let b = f64x2::new(3.0, 2.0); + #[rustfmt::skip] + let r = f64x2::new(3.0, -4.0); + + assert_eq!( + r, + mem::transmute(__msa_fmax_a_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmin_w() { + #[rustfmt::skip] + let a = f32x4::new(1.0, -6.0, 7.0, 8.0); + #[rustfmt::skip] + let b = f32x4::new(5.0, -2.0, 3.0, 4.0); + #[rustfmt::skip] + let r = f32x4::new(1.0, -6.0, 3.0, 4.0); + + assert_eq!( + r, + mem::transmute(__msa_fmin_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmin_d() { + #[rustfmt::skip] + let a = f64x2::new(1.0, 4.0); + #[rustfmt::skip] + let b = f64x2::new(3.0, 2.0); + #[rustfmt::skip] + let r = f64x2::new(1.0, 2.0); + + assert_eq!( + r, + mem::transmute(__msa_fmin_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmin_a_w() { + #[rustfmt::skip] + let a = f32x4::new(1.0, -6.0, -7.0, -8.0); + #[rustfmt::skip] + let b = f32x4::new(5.0, -2.0, 3.0, 4.0); + #[rustfmt::skip] + let r = f32x4::new(1.0, -2.0, 3.0, 4.0); + + assert_eq!( + r, + mem::transmute(__msa_fmin_a_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmin_a_d() { + #[rustfmt::skip] + let a = f64x2::new(1.0, -4.0); + #[rustfmt::skip] + let b = f64x2::new(3.0, 2.0); + #[rustfmt::skip] + let r = f64x2::new(1.0, 2.0); + + assert_eq!( + r, + mem::transmute(__msa_fmin_a_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmsub_w() { + #[rustfmt::skip] + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + #[rustfmt::skip] + let b = f32x4::new(5.0, 6.0, 7.0, 8.0); + #[rustfmt::skip] + let c = f32x4::new(9.0, 10.0, 11.0, 12.0); + #[rustfmt::skip] + let r = f32x4::new(-44.0, -58.0, -74.0, -92.0); + + assert_eq!( + r, + mem::transmute(__msa_fmsub_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmsub_d() { + #[rustfmt::skip] + let a = f64x2::new(1.0, 2.0); + #[rustfmt::skip] + let b = f64x2::new(3.0, 4.0); + #[rustfmt::skip] + let c = f64x2::new(5.0, 6.0); + #[rustfmt::skip] + let r = f64x2::new(-14.0, -22.0); + + assert_eq!( + r, + mem::transmute(__msa_fmsub_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmul_w() { + #[rustfmt::skip] + let a = f32x4::new(1.1, -2.2, 3.3, 4.4); + #[rustfmt::skip] + let b = f32x4::new(4.4, 3.3, 2.2, -1.1); + #[rustfmt::skip] + let r = f32x4::new(4.84, -7.26, 7.26, -4.84); + + assert_eq!( + r, + mem::transmute(__msa_fmul_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fmul_d() { + #[rustfmt::skip] + let a = f64x2::new(1.1, -2.2); + #[rustfmt::skip] + let b = f64x2::new(4.0, -3.3); + #[rustfmt::skip] + let r = f64x2::new(4.4, 7.26); + + assert_eq!( + r, + mem::transmute(__msa_fmul_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_frint_w() { + #[rustfmt::skip] + let a = f32x4::new(2.6, -2.7, 1.3, -1.7); + #[rustfmt::skip] + let r = f32x4::new(3.0, -3.0, 1.0, -2.0); + + assert_eq!(r, mem::transmute(__msa_frint_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_frint_d() { + #[rustfmt::skip] + let a = f64x2::new(2.6, 1.3); + #[rustfmt::skip] + let r = f64x2::new(3.0, 1.0); + + assert_eq!(r, mem::transmute(__msa_frint_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_frcp_w() { + #[rustfmt::skip] + let a = f32x4::new(2.6, -2.7, 1.3, -1.7); + #[rustfmt::skip] + let r = f32x4::new( + 0.3846154, -0.37037036, + 0.7692308, -0.58823526 + ); + + assert_eq!(r, mem::transmute(__msa_frcp_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_frcp_d() { + #[rustfmt::skip] + let a = f64x2::new(2.6, 1.3); + #[rustfmt::skip] + let r = f64x2::new(0.3846153846153846, 0.7692307692307692); + + assert_eq!(r, mem::transmute(__msa_frcp_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_frsqrt_w() { + #[rustfmt::skip] + let a = f32x4::new(2.6, 2.7, 1.3, 1.7); + #[rustfmt::skip] + let r = f32x4::new( + 0.6201737, 0.6085806, + 0.87705797, 0.766965 + ); + + assert_eq!(r, mem::transmute(__msa_frsqrt_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_frsqrt_d() { + #[rustfmt::skip] + let a = f64x2::new(2.6, 1.3); + #[rustfmt::skip] + let r = f64x2::new(0.6201736729460422, 0.8770580193070292); + + assert_eq!(r, mem::transmute(__msa_frsqrt_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsaf_w() { + #[rustfmt::skip] + let a = f32x4::new(-5.5, 5.5, 5.5, 5.5); + #[rustfmt::skip] + let b = f32x4::new(-5.5, 5.5, 5.5, 5.5); + #[rustfmt::skip] + let r = i32x4::new(0, 0, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_fsaf_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsaf_d() { + #[rustfmt::skip] + let a = f64x2::new(-125.5, 5.5); + #[rustfmt::skip] + let b = f64x2::new(125.5, 3.3); + #[rustfmt::skip] + let r = i64x2::new(0, 0); + + assert_eq!( + r, + mem::transmute(__msa_fsaf_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fseq_w() { + #[rustfmt::skip] + let a = f32x4::new(-5.5, -3.3, f32::NAN, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(5.5, -3.3, f32::NAN, 1.1); + #[rustfmt::skip] + let r = i32x4::new(0, -1, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_fseq_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fseq_d() { + #[rustfmt::skip] + let a = f64x2::new(-125.5, 5.5); + #[rustfmt::skip] + let b = f64x2::new(125.5, 5.5); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fseq_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsle_w() { + #[rustfmt::skip] + let a = f32x4::new(5.5, 5.5, 5.5, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(-5.5, 3.3, 5.5, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(0, 0, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fsle_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsle_d() { + #[rustfmt::skip] + let a = f64x2::new(-125.5, 5.5); + #[rustfmt::skip] + let b = f64x2::new(125.5, 3.3); + #[rustfmt::skip] + let r = i64x2::new(-1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fsle_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fslt_w() { + #[rustfmt::skip] + let a = f32x4::new(-5.5, 5.5, 5.5, 5.5); + #[rustfmt::skip] + let b = f32x4::new(5.5, 3.3, 5.5, 1.1); + #[rustfmt::skip] + let r = i32x4::new(-1, 0, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_fslt_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fslt_d() { + #[rustfmt::skip] + let a = f64x2::new(-125.5, 5.5); + #[rustfmt::skip] + let b = f64x2::new(125.5, 3.3); + #[rustfmt::skip] + let r = i64x2::new(-1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fslt_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsne_w() { + #[rustfmt::skip] + let a = f32x4::new(-5.5, 5.5, 5.5, 5.5); + #[rustfmt::skip] + let b = f32x4::new(5.5, 3.3, 5.5, 1.1); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, 0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsne_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsne_d() { + #[rustfmt::skip] + let a = f64x2::new(-125.5, 5.5); + #[rustfmt::skip] + let b = f64x2::new(125.5, 5.5); + #[rustfmt::skip] + let r = i64x2::new(-1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fsne_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsor_w() { + #[rustfmt::skip] + let a = f32x4::new(-5.5, f32::NAN, 5.5, 5.5); + #[rustfmt::skip] + let b = f32x4::new(5.5, 3.3, 5.5, 1.1); + #[rustfmt::skip] + let r = i32x4::new(-1, 0, -1, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsor_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsor_d() { + #[rustfmt::skip] + let a = f64x2::new(-125.5, 5.5); + #[rustfmt::skip] + let b = f64x2::new(125.5, f64::NAN); + #[rustfmt::skip] + let r = i64x2::new(-1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fsor_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsqrt_w() { + #[rustfmt::skip] + let a = f32x4::new(9.0, 81.0, 1089.0, 10000.0); + #[rustfmt::skip] + let r = f32x4::new(3.0, 9.0, 33.0, 100.0); + + assert_eq!(r, mem::transmute(__msa_fsqrt_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsqrt_d() { + #[rustfmt::skip] + let a = f64x2::new(81.0, 10000.0); + #[rustfmt::skip] + let r = f64x2::new(9.0, 100.0); + + assert_eq!(r, mem::transmute(__msa_fsqrt_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsub_w() { + #[rustfmt::skip] + let a = f32x4::new(5.5, 6.5, 7.5, 8.5); + #[rustfmt::skip] + let b = f32x4::new(1.25, 1.75, 2.25, 2.75); + #[rustfmt::skip] + let r = f32x4::new(4.25, 4.75, 5.25, 5.75); + + assert_eq!( + r, + mem::transmute(__msa_fsub_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsub_d() { + #[rustfmt::skip] + let a = f64x2::new(555.5, 55.5); + #[rustfmt::skip] + let b = f64x2::new(4.25, 3.25); + #[rustfmt::skip] + let r = f64x2::new(551.25, 52.25); + + assert_eq!( + r, + mem::transmute(__msa_fsub_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsueq_w() { + #[rustfmt::skip] + let a = f32x4::new(5.5, f32::NAN, 5.5, 5.5); + #[rustfmt::skip] + let b = f32x4::new(5.5, 5.5, -5.5, 5.5); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, 0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsueq_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsueq_d() { + #[rustfmt::skip] + let a = f64x2::new(-5.5, 5.5); + #[rustfmt::skip] + let b = f64x2::new(5.5, f64::NAN); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsueq_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsule_w() { + #[rustfmt::skip] + let a = f32x4::new(5.7, 5.8, 5.9, f32::NAN); + #[rustfmt::skip] + let b = f32x4::new(5.6, 5.9, 5.9, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(0, -1, -1, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsule_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsule_d() { + #[rustfmt::skip] + let a = f64x2::new(5.5, 5.5); + #[rustfmt::skip] + let b = f64x2::new(5.5, 5.5); + #[rustfmt::skip] + let r = i64x2::new(-1, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsule_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsult_w() { + #[rustfmt::skip] + let a = f32x4::new(5.5, 5.5, 5.5, 5.5); + #[rustfmt::skip] + let b = f32x4::new(5.6, f32::NAN, 2.2, 1.1); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_fsult_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsult_d() { + #[rustfmt::skip] + let a = f64x2::new(5.5, f64::NAN); + #[rustfmt::skip] + let b = f64x2::new(4.4, 3.3); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsult_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsun_w() { + #[rustfmt::skip] + let a = f32x4::new(5.5, 5.5, f32::NAN, 5.5); + #[rustfmt::skip] + let b = f32x4::new(4.4, 3.3, 2.2, f32::NAN); + #[rustfmt::skip] + let r = i32x4::new(0, 0, -1, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsun_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsun_d() { + #[rustfmt::skip] + let a = f64x2::new(5.5, f64::NAN); + #[rustfmt::skip] + let b = f64x2::new(4.4, 3.3); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsun_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsune_w() { + #[rustfmt::skip] + let a = f32x4::new(5.5, 5.5, f32::NAN, 5.5); + #[rustfmt::skip] + let b = f32x4::new(4.4, 3.3, 2.2, 5.5); + #[rustfmt::skip] + let r = i32x4::new(-1, -1, -1, 0); + + assert_eq!( + r, + mem::transmute(__msa_fsune_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_fsune_d() { + #[rustfmt::skip] + let a = f64x2::new(5.5, f64::NAN); + #[rustfmt::skip] + let b = f64x2::new(5.5, 3.3); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_fsune_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftint_s_w() { + #[rustfmt::skip] + let a = f32x4::new(-5.5, 75.6, -1000.7, 1219.3); + #[rustfmt::skip] + let r = i32x4::new(-6, 76, -1001, 1219); + + assert_eq!(r, mem::transmute(__msa_ftint_s_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftint_s_d() { + #[rustfmt::skip] + let a = f64x2::new(-5.5, 25656.4); + #[rustfmt::skip] + let r = i64x2::new(-6, 25656); + + assert_eq!(r, mem::transmute(__msa_ftint_s_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftint_u_w() { + #[rustfmt::skip] + let a = f32x4::new(-5.5, 75.6, -1000.7, 1219.3); + #[rustfmt::skip] + let r = u32x4::new(0, 76, 0, 1219); + + assert_eq!(r, mem::transmute(__msa_ftint_u_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftint_u_d() { + #[rustfmt::skip] + let a = f64x2::new(5.5, -25656.4); + #[rustfmt::skip] + let r = u64x2::new(6, 0); + + assert_eq!(r, mem::transmute(__msa_ftint_u_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftq_h() { + #[rustfmt::skip] + let a = f32x4::new(0.00001, 0.0002, 0.00001, -0.0002); + #[rustfmt::skip] + let b = f32x4::new(0.0001, -0.002, 0.0001, 0.002); + #[rustfmt::skip] + let r = i16x8::new(3, -66, 3, 66, 0, 7, 0, -7); + + assert_eq!( + r, + mem::transmute(__msa_ftq_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftq_w() { + #[rustfmt::skip] + let a = f64x2::new(0.00001, -0.0002); + #[rustfmt::skip] + let b = f64x2::new(0.00000045, 0.000015); + #[rustfmt::skip] + let r = i32x4::new(966, 32212, 21475, -429497); + + assert_eq!( + r, + mem::transmute(__msa_ftq_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftrunc_s_w() { + #[rustfmt::skip] + let a = f32x4::new(-5.5, 75.6, -1000.7, 1219.3); + #[rustfmt::skip] + let r = i32x4::new(-5, 75, -1000, 1219); + + assert_eq!(r, mem::transmute(__msa_ftrunc_s_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftrunc_s_d() { + #[rustfmt::skip] + let a = f64x2::new(-5.5, 25656.4); + #[rustfmt::skip] + let r = i64x2::new(-5, 25656); + + assert_eq!(r, mem::transmute(__msa_ftrunc_s_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftrunc_u_w() { + #[rustfmt::skip] + let a = f32x4::new(-5.5, 75.6, -1000.7, 1219.3); + #[rustfmt::skip] + let r = u32x4::new(0, 75, 0, 1219); + + assert_eq!(r, mem::transmute(__msa_ftrunc_u_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ftrunc_u_d() { + #[rustfmt::skip] + let a = f64x2::new(5.5, -25656.4); + #[rustfmt::skip] + let r = u64x2::new(5, 0); + + assert_eq!(r, mem::transmute(__msa_ftrunc_u_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hadd_s_h() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + -1, -2, -3, -4, + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(6, 6, 2, -2, 6, 6, 2, -2); + + assert_eq!( + r, + mem::transmute(__msa_hadd_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hadd_s_w() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i32x4::new(6, 6, 2, -2); + + assert_eq!( + r, + mem::transmute(__msa_hadd_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hadd_s_d() { + #[rustfmt::skip] + let a = i32x4::new(1, -2, 3, -4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i64x2::new(2, -2); + + assert_eq!( + r, + mem::transmute(__msa_hadd_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hadd_u_h() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = u16x8::new(6, 6, 6, 6, 6, 6, 6, 6); + + assert_eq!( + r, + mem::transmute(__msa_hadd_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hadd_u_w() { + #[rustfmt::skip] + let a = u16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u16x8::new( + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = u32x4::new(6, 6, 6, 6); + + assert_eq!( + r, + mem::transmute(__msa_hadd_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hadd_u_d() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = u64x2::new(6, 6); + + assert_eq!( + r, + mem::transmute(__msa_hadd_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hsub_s_h() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + -1, -2, -3, -4, + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(-2, 2, -6, -6, -2, 2, -6, -6); + + assert_eq!( + r, + mem::transmute(__msa_hsub_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hsub_s_w() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i32x4::new(-2, 2, -6, -6); + + assert_eq!( + r, + mem::transmute(__msa_hsub_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hsub_s_d() { + #[rustfmt::skip] + let a = i32x4::new(1, -2, 3, -4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i64x2::new(-6, -6); + + assert_eq!( + r, + mem::transmute(__msa_hsub_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hsub_u_h() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(-2, 2, -2, 2, -2, 2, -2, 2); + + assert_eq!( + r, + mem::transmute(__msa_hsub_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hsub_u_w() { + #[rustfmt::skip] + let a = u16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u16x8::new( + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i32x4::new(-2, 2, -2, 2); + + assert_eq!( + r, + mem::transmute(__msa_hsub_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_hsub_u_d() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i64x2::new(-2, 2); + + assert_eq!( + r, + mem::transmute(__msa_hsub_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvev_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 4, 1, 2, 3, + 4, 1, 2, 3, + 4, 1, 2, 3, + 4, 1, 2, 3 + ); + + assert_eq!( + r, + mem::transmute(__msa_ilvev_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvev_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(4, 1, 2, 3, 4, 1, 2, 3); + + assert_eq!( + r, + mem::transmute(__msa_ilvev_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvev_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i32x4::new(4, 1, 2, 3); + + assert_eq!( + r, + mem::transmute(__msa_ilvev_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvev_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(4, 3); + #[rustfmt::skip] + let r = i64x2::new(4, 1); + + assert_eq!( + r, + mem::transmute(__msa_ilvev_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvl_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let b = i8x16::new( + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 8, 9, 7, 10, + 6, 11, 5, 12, + 4, 13, 3, 14, + 2, 15, 1, 16 + ); + + assert_eq!( + r, + mem::transmute(__msa_ilvl_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvl_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 5, 6, 7, 8 + ); + #[rustfmt::skip] + let b = i16x8::new( + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(4, 5, 3, 6, 2, 7, 1, 8); + + assert_eq!( + r, + mem::transmute(__msa_ilvl_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvl_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i32x4::new(2, 3, 1, 4); + + assert_eq!( + r, + mem::transmute(__msa_ilvl_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvl_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(2, 1); + #[rustfmt::skip] + let r = i64x2::new(1, 2); + + assert_eq!( + r, + mem::transmute(__msa_ilvl_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvod_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let b = i8x16::new( + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 15, 2, 13, 4, + 11, 6, 9, 8, + 7, 10, 5, 12, + 3, 14, 1, 16 + ); + + assert_eq!( + r, + mem::transmute(__msa_ilvod_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvod_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 5, 6, 7, 8 + ); + #[rustfmt::skip] + let b = i16x8::new( + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(7, 2, 5, 4, 3, 6, 1, 8); + + assert_eq!( + r, + mem::transmute(__msa_ilvod_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvod_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i32x4::new(3, 2, 1, 4); + + assert_eq!( + r, + mem::transmute(__msa_ilvod_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvod_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(2, 1); + #[rustfmt::skip] + let r = i64x2::new(1, 2); + + assert_eq!( + r, + mem::transmute(__msa_ilvod_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvr_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let b = i8x16::new( + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 16, 1, 15, 2, + 14, 3, 13, 4, + 12, 5, 11, 6, + 10, 7, 9, 8 + ); + + assert_eq!( + r, + mem::transmute(__msa_ilvr_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvr_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + ); + #[rustfmt::skip] + let b = i16x8::new( + 8, 7, 6, 5, + 4, 3, 2, 1, + ); + #[rustfmt::skip] + let r = i16x8::new(8, 1, 7, 2, 6, 3, 5, 4); + + assert_eq!( + r, + mem::transmute(__msa_ilvr_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvr_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i32x4::new(4, 1, 3, 2); + + assert_eq!( + r, + mem::transmute(__msa_ilvr_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ilvr_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(2, 1); + #[rustfmt::skip] + let r = i64x2::new(2, 1); + + assert_eq!( + r, + mem::transmute(__msa_ilvr_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_insert_b() { + #[rustfmt::skip] + let a = i8x16::new( + -100, 127, 4, 127, + -100, 127, 4, 127, + -100, 127, 4, 127, + -100, 127, 4, 127 + ); + #[rustfmt::skip] + let r = i8x16::new( + -100, 127, 4, 127, + -100, 127, 4, 127, + -100, 127, 4, 127, + 5, 127, 4, 127 + ); + + assert_eq!(r, mem::transmute(__msa_insert_b(mem::transmute(a), 12, 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_insert_h() { + #[rustfmt::skip] + let a = i16x8::new( + 32767, 3276, 100, 11, + 32767, 3276, 100, 11 + ); + #[rustfmt::skip] + let r = i16x8::new( + 32767, 3276, 100, 11, + 5, 3276, 100, 11 + ); + + assert_eq!(r, mem::transmute(__msa_insert_h(mem::transmute(a), 4, 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_insert_w() { + #[rustfmt::skip] + let a = i32x4::new(100, 2147483647, 5, -2147483647); + #[rustfmt::skip] + let r = i32x4::new(100, 7, 5, -2147483647); + + assert_eq!(r, mem::transmute(__msa_insert_w(mem::transmute(a), 1, 7))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_insert_d() { + #[rustfmt::skip] + let a = i64x2::new(3, i64::MAX); + #[rustfmt::skip] + let r = i64x2::new(3, 100); + + assert_eq!(r, mem::transmute(__msa_insert_d(mem::transmute(a), 1, 100))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_insve_b() { + #[rustfmt::skip] + let a = i8x16::new( + -100, i8::MAX, 4, i8::MAX, + -100, i8::MAX, 4, i8::MAX, + -100, i8::MAX, 4, i8::MAX, + -100, i8::MAX, 4, i8::MAX + ); + #[rustfmt::skip] + let b = i8x16::new( + 5, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let r = i8x16::new( + -100, 127, 4, 127, + -100, 127, 4, 127, + -100, 127, 4, 127, + 5, 127, 4, 127 + ); + + assert_eq!( + r, + mem::transmute(__msa_insve_b(mem::transmute(a), 12, mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_insve_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MAX, 3276, 100, 11, + i16::MAX, 3276, 100, 11 + ); + #[rustfmt::skip] + let b = i16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let r = i16x8::new( + 32767, 3276, 100, 11, + 1, 3276, 100, 11 + ); + + assert_eq!( + r, + mem::transmute(__msa_insve_h(mem::transmute(a), 4, mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_insve_w() { + #[rustfmt::skip] + let a = i32x4::new(100, 2147483647, 5, -2147483647); + #[rustfmt::skip] + let b = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let r = i32x4::new(100, 2147483647, 5, 1); + + assert_eq!( + r, + mem::transmute(__msa_insve_w(mem::transmute(a), 3, mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_insve_d() { + #[rustfmt::skip] + let a = i64x2::new(3, i64::MAX); + #[rustfmt::skip] + let b = i64x2::new(1, 2); + #[rustfmt::skip] + let r = i64x2::new(3, 1); + + assert_eq!( + r, + mem::transmute(__msa_insve_d(mem::transmute(a), 1, mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ld_b() { + #[rustfmt::skip] + let mut a : [i8; 32] = [ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ]; + let p = &mut a[4] as *mut _ as *mut u8; + #[rustfmt::skip] + let r = i8x16::new( + 13, 14, 15, 16, + 17, 18, 19, 20, + 21, 22, 23, 24, + 25, 26, 27, 28 + ); + + assert_eq!(r, mem::transmute(__msa_ld_b(p, 9))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ld_h() { + #[rustfmt::skip] + let mut a : [i16; 16] = [ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 + ]; + let p = &mut a[4] as *mut _ as *mut u8; + #[rustfmt::skip] + let r = i16x8::new(3, 4, 5, 6, 7, 8, 9, 10); + + assert_eq!(r, mem::transmute(__msa_ld_h(p, -2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ld_w() { + #[rustfmt::skip] + let mut a : [i32; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let p = &mut a[3] as *mut _ as *mut u8; + #[rustfmt::skip] + let r = i32x4::new(2, 3, 4, 5); + + assert_eq!(r, mem::transmute(__msa_ld_w(p, -4))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ld_d() { + #[rustfmt::skip] + let mut a : [i64; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let p = &mut a[4] as *mut _ as *mut u8; + #[rustfmt::skip] + let r = i64x2::new(0, 1); + + assert_eq!(r, mem::transmute(__msa_ld_d(p, -32))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ldi_b() { + #[rustfmt::skip] + let r = i8x16::new( + -20, -20, -20, -20, + -20, -20, -20, -20, + -20, -20, -20, -20, + -20, -20, -20, -20 + ); + + assert_eq!(r, mem::transmute(__msa_ldi_b(-20))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ldi_h() { + #[rustfmt::skip] + let r = i16x8::new( + 255, 255, 255, 255, + 255, 255, 255, 255 + ); + + assert_eq!(r, mem::transmute(__msa_ldi_h(255))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ldi_w() { + #[rustfmt::skip] + let r = i32x4::new(-509, -509, -509, -509); + + assert_eq!(r, mem::transmute(__msa_ldi_w(-509))); + } + + // FIXME: https://reviews.llvm.org/D59884 + // If target type is i64, negative immediate loses the sign + // Test passes if 4294967185 is used instead -111 in vector `r` + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_ldi_d() { + // let r = i64x2::new(-111, -111); + + // assert_eq!(r, mem::transmute(__msa_ldi_d(-111))); + // } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_madd_q_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MAX, 1024, i16::MIN, -1024, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024 + ); + #[rustfmt::skip] + let c = i16x8::new( + i16::MAX, i16::MAX, 1, -1, + 33, 66, 99, 132 + ); + #[rustfmt::skip] + let r = i16x8::new(32767, 2047, -32768, -1025, 2, 4, 6, 8); + + assert_eq!( + r, + mem::transmute(__msa_madd_q_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_madd_q_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MAX, i32::MIN, 1, 2); + #[rustfmt::skip] + let b = i32x4::new(102401, 102401, 102401, 102401); + #[rustfmt::skip] + let c = i32x4::new(10240, 20480, 30720, 40960); + #[rustfmt::skip] + let r = i32x4::new(2147483647, -2147483648, 2, 3); + + assert_eq!( + r, + mem::transmute(__msa_madd_q_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maddr_q_h() { + #[rustfmt::skip] + let a = i16x8::new( + 32767, 1024, -32768, -1024, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024 + ); + #[rustfmt::skip] + let c = i16x8::new( + 32767, 32767, 32767, 32767, + 33, 66, 99, 132 + ); + #[rustfmt::skip] + let r = i16x8::new(32767, 2048, -31744, 0, 2, 4, 6, 8); + + assert_eq!( + r, + mem::transmute(__msa_maddr_q_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maddr_q_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MAX, i32::MIN, 1, 2); + #[rustfmt::skip] + let b = i32x4::new(102401, 102401, 102401, 102401); + #[rustfmt::skip] + let c = i32x4::new(10240, 20480, 30720, 40960); + #[rustfmt::skip] + let r = i32x4::new(2147483647, -2147483647, 2, 4); + + assert_eq!( + r, + mem::transmute(__msa_maddr_q_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maddv_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 5, 6, 7, 8, + 5, 6, 7, 8, + 5, 6, 7, 8, + 5, 6, 7, 8 + ); + #[rustfmt::skip] + let c = i8x16::new( + 9, 10, 11, 12, + 9, 10, 11, 12, + 9, 10, 11, 12, + 9, 10, 11, 12 + ); + #[rustfmt::skip] + let r = i8x16::new( + 46, 62, 80, 100, + 46, 62, 80, 100, + 46, 62, 80, 100, + 46, 62, 80, 100 + ); + + assert_eq!( + r, + mem::transmute(__msa_maddv_b( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maddv_h() { + #[rustfmt::skip] + let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = i16x8::new(5, 6, 7, 8, 5, 6, 7, 8); + #[rustfmt::skip] + let c = i16x8::new(9, 10, 11, 12, 9, 10, 11, 12); + #[rustfmt::skip] + let r = i16x8::new(46, 62, 80, 100, 46, 62, 80, 100); + + assert_eq!( + r, + mem::transmute(__msa_maddv_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maddv_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 1, 2); + #[rustfmt::skip] + let b = i32x4::new(3, 4, 3, 4); + #[rustfmt::skip] + let c = i32x4::new(5, 6, 5, 6); + #[rustfmt::skip] + let r = i32x4::new(16, 26, 16, 26); + + assert_eq!( + r, + mem::transmute(__msa_maddv_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maddv_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(3, 4); + #[rustfmt::skip] + let c = i64x2::new(5, 6); + #[rustfmt::skip] + let r = i64x2::new(16, 26); + + assert_eq!( + r, + mem::transmute(__msa_maddv_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_a_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + -1, -2, -3, -4, + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + -6, -7, -8, -9, + 6, 7, 8, 9, + -6, -7, -8, -9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = i8x16::new( + -6, -7, -8, -9, + 6, 7, 8, 9, + -6, -7, -8, -9, + 6, 7, 8, 9 + ); + + assert_eq!( + r, + mem::transmute(__msa_max_a_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_a_h() { + #[rustfmt::skip] + let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); + #[rustfmt::skip] + let b = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); + #[rustfmt::skip] + let r = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); + + assert_eq!( + r, + mem::transmute(__msa_max_a_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_a_w() { + #[rustfmt::skip] + let a = i32x4::new(1, -2, 3, -4); + #[rustfmt::skip] + let b = i32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = i32x4::new(6, 7, 8, 9); + + assert_eq!( + r, + mem::transmute(__msa_max_a_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_a_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, 2); + #[rustfmt::skip] + let b = i64x2::new(6, -7); + #[rustfmt::skip] + let r = i64x2::new(6, -7); + + assert_eq!( + r, + mem::transmute(__msa_max_a_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + -1, -2, -3, -4, + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + -6, -7, -8, -9, + 6, 7, 8, 9, + -6, -7, -8, -9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = i8x16::new( + 1, 2, 3, 4, + 6, 7, 8, 9, + 1, 2, 3, 4, + 6, 7, 8, 9 + ); + + assert_eq!( + r, + mem::transmute(__msa_max_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_s_h() { + #[rustfmt::skip] + let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); + #[rustfmt::skip] + let b = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); + #[rustfmt::skip] + let r = i16x8::new(1, 7, 3, 9, 1, 7, 3, 9); + + assert_eq!( + r, + mem::transmute(__msa_max_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_s_w() { + #[rustfmt::skip] + let a = i32x4::new(1, -2, 3, -4); + #[rustfmt::skip] + let b = i32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = i32x4::new(6, 7, 8, 9); + + assert_eq!( + r, + mem::transmute(__msa_max_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, 2); + #[rustfmt::skip] + let b = i64x2::new(6, -7); + #[rustfmt::skip] + let r = i64x2::new(6, 2); + + assert_eq!( + r, + mem::transmute(__msa_max_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + + assert_eq!( + r, + mem::transmute(__msa_max_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_u_h() { + #[rustfmt::skip] + let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + + assert_eq!( + r, + mem::transmute(__msa_max_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_u_w() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(6, 7, 8, 9); + + assert_eq!( + r, + mem::transmute(__msa_max_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_max_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 2); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = u64x2::new(6, 7); + + assert_eq!( + r, + mem::transmute(__msa_max_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maxi_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, -20, -6, 8, + 1, -20, -6, 8, + 1, -20, -6, 8, + 1, -20, -6, 8 + ); + #[rustfmt::skip] + let r = i8x16::new( + 1, -16, -6, 8, + 1, -16, -6, 8, + 1, -16, -6, 8, + 1, -16, -6, 8 + ); + + assert_eq!(r, mem::transmute(__msa_maxi_s_b(mem::transmute(a), -16))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maxi_s_h() { + #[rustfmt::skip] + let a = i16x8::new(1, 3, -60, -8, 1, 3, -6, -8); + #[rustfmt::skip] + let r = i16x8::new(15, 15, 15, 15, 15, 15, 15, 15); + + assert_eq!(r, mem::transmute(__msa_maxi_s_h(mem::transmute(a), 15))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maxi_s_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 3, -6, -8); + #[rustfmt::skip] + let r = i32x4::new(1, 3, -5, -5); + + assert_eq!(r, mem::transmute(__msa_maxi_s_w(mem::transmute(a), -5))); + } + + // FIXME: https://reviews.llvm.org/D59884 + // If target type is i64, negative immediate loses the sign + // Test passes if 4294967293 is used instead -3 in vector `r` + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_maxi_s_d() { + // #[rustfmt::skip] + // let a = i64x2::new(1, -8); + // #[rustfmt::skip] + // let r = i64x2::new(-3, -3); + + // assert_eq!(r, mem::transmute(__msa_maxi_s_d(mem::transmute(a), -3))); + // } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maxi_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 3, 6, 8, + 1, 3, 6, 8, + 1, 3, 6, 8, + 1, 3, 6, 8 + ); + #[rustfmt::skip] + let r = u8x16::new( + 5, 5, 6, 8, + 5, 5, 6, 8, + 5, 5, 6, 8, + 5, 5, 6, 8 + ); + + assert_eq!(r, mem::transmute(__msa_maxi_u_b(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maxi_u_h() { + #[rustfmt::skip] + let a = u16x8::new(1, 3, 6, 8, 1, 3, 6, 8); + #[rustfmt::skip] + let r = u16x8::new(5, 5, 6, 8, 5, 5, 6, 8); + + assert_eq!(r, mem::transmute(__msa_maxi_u_h(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maxi_u_w() { + #[rustfmt::skip] + let a = u32x4::new(1, 3, 6, 8); + #[rustfmt::skip] + let r = u32x4::new(5, 5, 6, 8); + + assert_eq!(r, mem::transmute(__msa_maxi_u_w(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_maxi_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 8); + #[rustfmt::skip] + let r = u64x2::new(5, 8); + + assert_eq!(r, mem::transmute(__msa_maxi_u_d(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_a_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + -1, -2, -3, -4, + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + -6, -7, -8, -9, + 6, 7, 8, 9, + -6, -7, -8, -9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = i8x16::new( + 1, 2, 3, 4, + -1, -2, -3, -4, + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + + assert_eq!( + r, + mem::transmute(__msa_min_a_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_a_h() { + #[rustfmt::skip] + let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); + #[rustfmt::skip] + let b = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); + #[rustfmt::skip] + let r = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); + + assert_eq!( + r, + mem::transmute(__msa_min_a_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_a_w() { + #[rustfmt::skip] + let a = i32x4::new(1, -2, 3, -4); + #[rustfmt::skip] + let b = i32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = i32x4::new(1, -2, 3, -4); + + assert_eq!( + r, + mem::transmute(__msa_min_a_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_a_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, 2); + #[rustfmt::skip] + let b = i64x2::new(6, -7); + #[rustfmt::skip] + let r = i64x2::new(-1, 2); + + assert_eq!( + r, + mem::transmute(__msa_min_a_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + -1, -2, -3, -4, + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + -6, -7, -8, -9, + 6, 7, 8, 9, + -6, -7, -8, -9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = i8x16::new( + -6, -7, -8, -9, + -1, -2, -3, -4, + -6, -7, -8, -9, + -1, -2, -3, -4 + ); + + assert_eq!( + r, + mem::transmute(__msa_min_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_s_h() { + #[rustfmt::skip] + let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); + #[rustfmt::skip] + let b = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); + #[rustfmt::skip] + let r = i16x8::new(-6, -2, -8, -4, -6, -2, -8, -4); + + assert_eq!( + r, + mem::transmute(__msa_min_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_s_w() { + #[rustfmt::skip] + let a = i32x4::new(1, -2, 3, -4); + #[rustfmt::skip] + let b = i32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = i32x4::new(1, -2, 3, -4); + + assert_eq!( + r, + mem::transmute(__msa_min_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_s_d() { + #[rustfmt::skip] + let a = i64x2::new(-1, 2); + #[rustfmt::skip] + let b = i64x2::new(6, -7); + #[rustfmt::skip] + let r = i64x2::new(-1, -7); + + assert_eq!( + r, + mem::transmute(__msa_min_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mini_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + -1, -2, -3, -4, + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let r = i8x16::new( + -10, -10, -10, -10, + -10, -10, -10, -10, + -10, -10, -10, -10, + -10, -10, -10, -10 + ); + + assert_eq!(r, mem::transmute(__msa_mini_s_b(mem::transmute(a), -10))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mini_s_h() { + #[rustfmt::skip] + let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); + #[rustfmt::skip] + let r = i16x8::new(-3, -3, -3, -4, -3, -3, -3, -4); + + assert_eq!(r, mem::transmute(__msa_mini_s_h(mem::transmute(a), -3))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mini_s_w() { + #[rustfmt::skip] + let a = i32x4::new(1, -2, 3, -4); + #[rustfmt::skip] + let r = i32x4::new(-3, -3, -3, -4); + + assert_eq!(r, mem::transmute(__msa_mini_s_w(mem::transmute(a), -3))); + } + + // FIXME: https://reviews.llvm.org/D59884 + // If target type is i64, negative immediate loses the sign + // -3 is represented as 4294967293 + // #[simd_test(enable = "msa")] + // unsafe fn test_msa_mini_s_d() { + // #[rustfmt::skip] + // let a = i64x2::new(-3, 2); + // #[rustfmt::skip] + // let r = i64x2::new(-1, -3); + + // assert_eq!(r, mem::transmute(__msa_mini_s_d(mem::transmute(a), -3))); + // } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + + assert_eq!( + r, + mem::transmute(__msa_min_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_u_h() { + #[rustfmt::skip] + let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4,); + + assert_eq!( + r, + mem::transmute(__msa_min_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_u_w() { + #[rustfmt::skip] + let a = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(1, 2, 3, 4,); + + assert_eq!( + r, + mem::transmute(__msa_min_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_min_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 2); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = u64x2::new(1, 2,); + + assert_eq!( + r, + mem::transmute(__msa_min_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mini_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 3, 6, 8, + 1, 3, 6, 8, + 1, 3, 6, 8, + 1, 3, 6, 8 + ); + #[rustfmt::skip] + let r = u8x16::new( + 1, 3, 5, 5, + 1, 3, 5, 5, + 1, 3, 5, 5, + 1, 3, 5, 5 + ); + + assert_eq!(r, mem::transmute(__msa_mini_u_b(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mini_u_h() { + #[rustfmt::skip] + let a = u16x8::new(1, 3, 6, 8, 1, 3, 6, 8); + #[rustfmt::skip] + let r = u16x8::new(1, 3, 5, 5, 1, 3, 5, 5); + + assert_eq!(r, mem::transmute(__msa_mini_u_h(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mini_u_w() { + #[rustfmt::skip] + let a = u32x4::new(1, 3, 6, 8); + #[rustfmt::skip] + let r = u32x4::new(1, 3, 5, 5); + + assert_eq!(r, mem::transmute(__msa_mini_u_w(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mini_u_d() { + #[rustfmt::skip] + let a = u64x2::new(1, 8); + #[rustfmt::skip] + let r = u64x2::new(1, 5); + + assert_eq!(r, mem::transmute(__msa_mini_u_d(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mod_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + -6, -7, -8, -9, + 6, 7, 8, 9, + -6, -7, -8, -9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let b = i8x16::new( + 1, 2, 3, 4, + -1, -2, -3, -4, + 1, 2, 3, 4, + -1, -2, -3, -4 + ); + #[rustfmt::skip] + let r = i8x16::new( + 0, -1, -2, -1, + 0, 1, 2, 1, + 0, -1, -2, -1, + 0, 1, 2, 1 + ); + + assert_eq!( + r, + mem::transmute(__msa_mod_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mod_s_h() { + #[rustfmt::skip] + let a = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); + #[rustfmt::skip] + let b = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); + #[rustfmt::skip] + let r = i16x8::new(0, 1, -2, 1, 0, 1, -2, 1); + + assert_eq!( + r, + mem::transmute(__msa_mod_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mod_s_w() { + #[rustfmt::skip] + let a = i32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let b = i32x4::new(1, -2, 3, -4); + #[rustfmt::skip] + let r = i32x4::new(0, 1, 2, 1); + + assert_eq!( + r, + mem::transmute(__msa_mod_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mod_s_d() { + #[rustfmt::skip] + let a = i64x2::new(6, -7); + #[rustfmt::skip] + let b = i64x2::new(-1, 2); + #[rustfmt::skip] + let r = i64x2::new(0, -1); + + assert_eq!( + r, + mem::transmute(__msa_mod_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mod_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let b = u8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let r = u8x16::new( + 0, 1, 2, 1, + 0, 1, 2, 1, + 0, 1, 2, 1, + 0, 1, 2, 1 + ); + + assert_eq!( + r, + mem::transmute(__msa_mod_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mod_u_h() { + #[rustfmt::skip] + let a = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let b = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let r = u16x8::new(0, 1, 2, 1, 0, 1, 2, 1); + + assert_eq!( + r, + mem::transmute(__msa_mod_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mod_u_w() { + #[rustfmt::skip] + let a = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let b = u32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let r = u32x4::new(0, 1, 2, 1); + + assert_eq!( + r, + mem::transmute(__msa_mod_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mod_u_d() { + #[rustfmt::skip] + let a = u64x2::new(6, 7); + #[rustfmt::skip] + let b = u64x2::new(1, 2); + #[rustfmt::skip] + let r = u64x2::new(0, 1); + + assert_eq!( + r, + mem::transmute(__msa_mod_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_move_v() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 1, 2, 3, 4, + 5, 6, 7, 8 + ); + #[rustfmt::skip] + let r = i8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 1, 2, 3, 4, + 5, 6, 7, 8 + ); + + assert_eq!(r, mem::transmute(__msa_move_v(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_msub_q_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1024, -1024, 1024, -1024, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 1025, 1025, 1025, 1025, + 1025, 1025, 1025, 1025 + ); + #[rustfmt::skip] + let c = i16x8::new( + 1024, 2048, 3072, 4096, + 1024, 2048, 3072, 4096 + ); + #[rustfmt::skip] + let r = i16x8::new(991, -1089, 927, -1153, -32, -63, -94, -125); + + assert_eq!( + r, + mem::transmute(__msa_msub_q_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_msub_q_w() { + #[rustfmt::skip] + let a = i32x4::new(2147483647, -2147483647, 1, 2); + #[rustfmt::skip] + let b = i32x4::new(10240, 10240, 10240, 10240); + #[rustfmt::skip] + let c = i32x4::new(10240, 20480, 30720, 40960); + #[rustfmt::skip] + let r = i32x4::new(2147483646, -2147483648, 0, 1); + + assert_eq!( + r, + mem::transmute(__msa_msub_q_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_msubr_q_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1024, -1024, 1024, -1024, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 1025, 1025, 1025, 1025, + 1025, 1025, 1025, 1025 + ); + #[rustfmt::skip] + let c = i16x8::new( + 1024, 2048, 3072, 4096, + 1024, 2048, 3072, 4096 + ); + #[rustfmt::skip] + let r = i16x8::new(992, -1088, 928, -1152, -31, -62, -93, -124); + + assert_eq!( + r, + mem::transmute(__msa_msubr_q_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_msubr_q_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MAX, -2147483647, 1, 2); + #[rustfmt::skip] + let b = i32x4::new(10240, 10240, 10240, 10240); + #[rustfmt::skip] + let c = i32x4::new(10240, 20480, 30720, 40960); + #[rustfmt::skip] + let r = i32x4::new(2147483647, -2147483647, 1, 2); + + assert_eq!( + r, + mem::transmute(__msa_msubr_q_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_msubv_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 5, 6, 7, 8, + 5, 6, 7, 8, + 5, 6, 7, 8, + 5, 6, 7, 8 + ); + #[rustfmt::skip] + let c = i8x16::new( + 9, 10, 11, 12, + 9, 10, 11, 12, + 9, 10, 11, 12, + 9, 10, 11, 12 + ); + #[rustfmt::skip] + let r = i8x16::new( + -44, -58, -74, -92, + -44, -58, -74, -92, + -44, -58, -74, -92, + -44, -58, -74, -92 + ); + + assert_eq!( + r, + mem::transmute(__msa_msubv_b( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_msubv_h() { + #[rustfmt::skip] + let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = i16x8::new(5, 6, 7, 8, 5, 6, 7, 8); + #[rustfmt::skip] + let c = i16x8::new(9, 10, 11, 12, 9, 10, 11, 12); + #[rustfmt::skip] + let r = i16x8::new(-44, -58, -74, -92, -44, -58, -74, -92); + + assert_eq!( + r, + mem::transmute(__msa_msubv_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_msubv_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 1, 2); + #[rustfmt::skip] + let b = i32x4::new(3, 4, 3, 4); + #[rustfmt::skip] + let c = i32x4::new(5, 6, 5, 6); + #[rustfmt::skip] + let r = i32x4::new(-14, -22, -14, -22); + + assert_eq!( + r, + mem::transmute(__msa_msubv_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_msubv_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(3, 4); + #[rustfmt::skip] + let c = i64x2::new(5, 6); + #[rustfmt::skip] + let r = i64x2::new(-14, -22); + + assert_eq!( + r, + mem::transmute(__msa_msubv_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mul_q_h() { + #[rustfmt::skip] + let a = i16x8::new( + 12500, -20, -300, 400, + 12500, 20, 300, 400 + ); + #[rustfmt::skip] + let b = i16x8::new( + 1250, 10240, -7585, 8456, + 1250, 10240, -7585, 8456 + ); + #[rustfmt::skip] + let r = i16x8::new(476, -7, 69, 103, 476, 6, -70, 103); + + assert_eq!( + r, + mem::transmute(__msa_mul_q_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mul_q_w() { + #[rustfmt::skip] + let a = i32x4::new( + i32::MAX, i32::MAX, + i32::MIN, i32::MIN + ); + #[rustfmt::skip] + let b = i32x4::new(30, 60, 30, 60); + #[rustfmt::skip] + let r = i32x4::new(29, 59, -30, -60); + + assert_eq!( + r, + mem::transmute(__msa_mul_q_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mulr_q_h() { + #[rustfmt::skip] + let a = i16x8::new( + 12500, -20, -300, 400, + 12500, 20, 300, 400 + ); + #[rustfmt::skip] + let b = i16x8::new( + 1250, 10240, -7585, 8456, + 1250, 10240, -7585, 8456 + ); + #[rustfmt::skip] + let r = i16x8::new(477, -6, 69, 103, 477, 6, -69, 103); + + assert_eq!( + r, + mem::transmute(__msa_mulr_q_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mulr_q_w() { + #[rustfmt::skip] + let a = i32x4::new( + i32::MAX, i32::MAX, + i32::MIN, i32::MIN + ); + #[rustfmt::skip] + let b = i32x4::new(30, 60, 30, 60); + #[rustfmt::skip] + let r = i32x4::new(30, 60, -30, -60); + + assert_eq!( + r, + mem::transmute(__msa_mulr_q_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mulv_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let b = i8x16::new( + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 16, 30, 42, 52, + 60, 66, 70, 72, + 72, 70, 66, 60, + 52, 42, 30, 16 + ); + + assert_eq!( + r, + mem::transmute(__msa_mulv_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mulv_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 5, 6, 7, 8 + ); + #[rustfmt::skip] + let b = i16x8::new( + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(8, 14, 18, 20, 20, 18, 14, 8); + + assert_eq!( + r, + mem::transmute(__msa_mulv_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mulv_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i32x4::new(4, 6, 6, 4); + + assert_eq!( + r, + mem::transmute(__msa_mulv_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_mulv_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(2, 1); + #[rustfmt::skip] + let r = i64x2::new(2, 2); + + assert_eq!( + r, + mem::transmute(__msa_mulv_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nloc_b() { + #[rustfmt::skip] + let a = i8x16::new( + -128, -64, -32, -16, + -8, -4, -2, -1, + 1, 2, 4, 8, + 16, 32, 64, 127 + ); + #[rustfmt::skip] + let r = i8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 0, 0, 0, 0, + 0, 0, 0, 0 + ); + + assert_eq!(r, mem::transmute(__msa_nloc_b(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nloc_h() { + #[rustfmt::skip] + let a = i16x8::new( + -32768, -16384, -8192, -4096, + 4096, 8192, 16384, 32767 + ); + #[rustfmt::skip] + let r = i16x8::new(1, 2, 3, 4, 0, 0, 0, 0); + + assert_eq!(r, mem::transmute(__msa_nloc_h(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nloc_w() { + #[rustfmt::skip] + let a = i32x4::new( + i32::MIN, -1073741824, + 1073741824, i32::MAX + ); + #[rustfmt::skip] + let r = i32x4::new(1, 2, 0, 0); + + assert_eq!(r, mem::transmute(__msa_nloc_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nloc_d() { + #[rustfmt::skip] + let a = i64x2::new(i64::MIN, i64::MAX); + #[rustfmt::skip] + let r = i64x2::new(1, 0); + + assert_eq!(r, mem::transmute(__msa_nloc_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nlzc_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let r = i8x16::new( + 7, 6, 6, 5, + 5, 5, 5, 4, + 4, 4, 4, 4, + 4, 4, 4, 3 + ); + + assert_eq!(r, mem::transmute(__msa_nlzc_b(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nlzc_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 5, 6, 7, 8 + ); + #[rustfmt::skip] + let r = i16x8::new(15, 14, 14, 13, 13, 13, 13, 12); + + assert_eq!(r, mem::transmute(__msa_nlzc_h(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nlzc_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let r = i32x4::new(31, 30, 30, 29); + + assert_eq!(r, mem::transmute(__msa_nlzc_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nlzc_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let r = i64x2::new(63, 62); + + assert_eq!(r, mem::transmute(__msa_nlzc_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nor_v() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let b = u8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let r = u8x16::new( + 254, 253, 252, 251, + 250, 249, 248, 247, + 246, 245, 244, 243, + 242, 241, 240, 239 + ); + + assert_eq!( + r, + mem::transmute(__msa_nor_v(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_nori_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let r = u8x16::new( + 250, 249, 248, 251, + 250, 249, 248, 243, + 242, 241, 240, 243, + 242, 241, 240, 235 + ); + + assert_eq!(r, mem::transmute(__msa_nori_b(mem::transmute(a), 4))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_or_v() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let b = u8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let r = u8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + + assert_eq!( + r, + mem::transmute(__msa_or_v(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_ori_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let r = u8x16::new( + 5, 6, 7, 4, + 5, 6, 7, 12, + 13, 14, 15, 12, + 13, 14, 15, 20 + ); + + assert_eq!(r, mem::transmute(__msa_ori_b(mem::transmute(a), 4))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pckev_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 4, 2, 4, 2, + 4, 2, 4, 2, + 1, 3, 1, 3, + 1, 3, 1, 3 + ); + + assert_eq!( + r, + mem::transmute(__msa_pckev_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pckev_h() { + #[rustfmt::skip] + let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = i16x8::new(4, 3, 2, 1, 4, 3, 2, 1); + #[rustfmt::skip] + let r = i16x8::new(4, 2, 4, 2, 1, 3, 1, 3); + + assert_eq!( + r, + mem::transmute(__msa_pckev_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pckev_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i32x4::new(4, 2, 1, 3); + + assert_eq!( + r, + mem::transmute(__msa_pckev_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pckev_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(4, 3); + #[rustfmt::skip] + let r = i64x2::new(4, 1); + + assert_eq!( + r, + mem::transmute(__msa_pckev_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pckod_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 3, 1, 3, 1, + 3, 1, 3, 1, + 2, 4, 2, 4, + 2, 4, 2, 4 + ); + + assert_eq!( + r, + mem::transmute(__msa_pckod_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pckod_h() { + #[rustfmt::skip] + let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = i16x8::new(4, 3, 2, 1, 4, 3, 2, 1); + #[rustfmt::skip] + let r = i16x8::new(3, 1, 3, 1, 2, 4, 2, 4); + + assert_eq!( + r, + mem::transmute(__msa_pckod_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pckod_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i32x4::new(3, 1, 2, 4); + + assert_eq!( + r, + mem::transmute(__msa_pckod_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pckod_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(4, 3); + #[rustfmt::skip] + let r = i64x2::new(3, 2); + + assert_eq!( + r, + mem::transmute(__msa_pckod_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pcnt_b() { + #[rustfmt::skip] + let a = i8x16::new( + -128, -64, -32, -16, + -8, -4, -2, -1, + 1, 2, 4, 8, + 16, 32, 64, 127 + ); + #[rustfmt::skip] + let r = i8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 1, 1, 1, 1, + 1, 1, 1, 7 + ); + + assert_eq!(r, mem::transmute(__msa_pcnt_b(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pcnt_h() { + #[rustfmt::skip] + let a = i16x8::new( + -32768, -16384, -8192, -4096, + 4096, 8192, 16384, 32767 + ); + #[rustfmt::skip] + let r = i16x8::new(1, 2, 3, 4, 1, 1, 1, 15); + + assert_eq!(r, mem::transmute(__msa_pcnt_h(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pcnt_w() { + #[rustfmt::skip] + let a = i32x4::new( + i32::MIN, -1073741824, + 1073741824, i32::MAX + ); + #[rustfmt::skip] + let r = i32x4::new(1, 2, 1, 31); + + assert_eq!(r, mem::transmute(__msa_pcnt_w(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_pcnt_d() { + #[rustfmt::skip] + let a = i64x2::new(-2147483648, 2147483647); + #[rustfmt::skip] + let r = i64x2::new(33, 31); + + assert_eq!(r, mem::transmute(__msa_pcnt_d(mem::transmute(a)))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sat_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + i8::MAX, 105, 30, 1, + i8::MAX, 105, 30, 1, + i8::MAX, 105, 30, 1, + i8::MAX, 105, 30, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 3, 3, 3, 1, + 3, 3, 3, 1, + 3, 3, 3, 1, + 3, 3, 3, 1 + ); + + assert_eq!(r, mem::transmute(__msa_sat_s_b(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sat_s_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MAX, 1155, 155, 1, + i16::MAX, 1155, 155, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(127, 127, 127, 1, 127, 127, 127, 1); + + assert_eq!(r, mem::transmute(__msa_sat_s_h(mem::transmute(a), 7))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sat_s_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MAX, 111111155, i32::MAX, 1); + #[rustfmt::skip] + let r = i32x4::new(131071, 131071, 131071, 1); + + assert_eq!(r, mem::transmute(__msa_sat_s_w(mem::transmute(a), 17))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sat_s_d() { + #[rustfmt::skip] + let a = i64x2::new(i64::MAX, 1); + #[rustfmt::skip] + let r = i64x2::new(137438953471, 1); + + assert_eq!(r, mem::transmute(__msa_sat_s_d(mem::transmute(a), 37))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sat_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 105, 30, 1, + u8::MAX, 105, 30, 1, + u8::MAX, 105, 30, 1, + u8::MAX, 105, 30, 1 + ); + #[rustfmt::skip] + let r = u8x16::new( + 7, 7, 7, 1, + 7, 7, 7, 1, + 7, 7, 7, 1, + 7, 7, 7, 1 + ); + + assert_eq!(r, mem::transmute(__msa_sat_u_b(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sat_u_h() { + #[rustfmt::skip] + let a = u16x8::new( + u16::MAX, 1155, 155, 1, + u16::MAX, 1155, 155, 1 + ); + #[rustfmt::skip] + let r = u16x8::new(255, 255, 155, 1, 255, 255, 155, 1); + + assert_eq!(r, mem::transmute(__msa_sat_u_h(mem::transmute(a), 7))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sat_u_w() { + #[rustfmt::skip] + let a = u32x4::new(u32::MAX, 111111155, u32::MAX, 1); + #[rustfmt::skip] + let r = u32x4::new(262143, 262143, 262143, 1); + + assert_eq!(r, mem::transmute(__msa_sat_u_w(mem::transmute(a), 17))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sat_u_d() { + #[rustfmt::skip] + let a = u64x2::new(u64::MAX, 1); + #[rustfmt::skip] + let r = u64x2::new(274877906943, 1); + + assert_eq!(r, mem::transmute(__msa_sat_u_d(mem::transmute(a), 37))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_shf_b() { + #[rustfmt::skip] + let a = i8x16::new( + 11, 12, 3, 4, + 11, 12, 3, 4, + 11, 12, 3, 4, + 11, 12, 3, 4 + ); + #[rustfmt::skip] + let r = i8x16::new( + 11, 3, 4, 12, + 11, 3, 4, 12, + 11, 3, 4, 12, + 11, 3, 4, 12 + ); + + assert_eq!(r, mem::transmute(__msa_shf_b(mem::transmute(a), 120))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_shf_h() { + #[rustfmt::skip] + let a = i16x8::new( + 11, 12, 13, 14, + 11, 12, 13, 14 + ); + #[rustfmt::skip] + let r = i16x8::new(11, 14, 12, 13, 11, 14, 12, 13); + + assert_eq!(r, mem::transmute(__msa_shf_h(mem::transmute(a), 156))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_shf_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let r = i32x4::new(1, 3, 2, 4); + + assert_eq!(r, mem::transmute(__msa_shf_w(mem::transmute(a), 216))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sld_b() { + #[rustfmt::skip] + let a = i8x16::new( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15 + ); + #[rustfmt::skip] + let b = i8x16::new( + 16, 17, 18, 19, + 20, 21, 22, 23, + 24, 25, 26, 27, + 28, 29, 30, 31 + ); + #[rustfmt::skip] + let r = i8x16::new( + 21, 22, 23, 24, + 25, 26, 27, 28, + 29, 30, 31, 0, + 1, 2, 3, 4 + ); + + assert_eq!( + r, + mem::transmute(__msa_sld_b(mem::transmute(a), mem::transmute(b), 5)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sld_h() { + #[rustfmt::skip] + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + #[rustfmt::skip] + let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); + // let c = 5 as i32; + let r = i16x8::new(9, 10, 11, 0, 13, 14, 15, 4); + + assert_eq!( + r, + mem::transmute(__msa_sld_h(mem::transmute(a), mem::transmute(b), 2)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sld_w() { + #[rustfmt::skip] + let a = i32x4::new(0, 1, 2, 3); + #[rustfmt::skip] + let b = i32x4::new(4, 5, 6, 7); + #[rustfmt::skip] + let r = i32x4::new(4, 5, 6, 7); + + assert_eq!( + r, + mem::transmute(__msa_sld_w(mem::transmute(a), mem::transmute(b), 4)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sld_d() { + #[rustfmt::skip] + let a = i64x2::new(0, 1); + #[rustfmt::skip] + let b = i64x2::new(2, 3); + #[rustfmt::skip] + let r = i64x2::new(2, 3); + + assert_eq!( + r, + mem::transmute(__msa_sld_d(mem::transmute(a), mem::transmute(b), 2)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sldi_b() { + #[rustfmt::skip] + let a = i8x16::new( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15 + ); + #[rustfmt::skip] + let b = i8x16::new( + 16, 17, 18, 19, + 20, 21, 22, 23, + 24, 25, 26, 27, + 28, 29, 30, 31 + ); + #[rustfmt::skip] + let r = i8x16::new( + 21, 22, 23, 24, + 25, 26, 27, 28, + 29, 30, 31, 0, + 1, 2, 3, 4 + ); + + assert_eq!( + r, + mem::transmute(__msa_sldi_b(mem::transmute(a), mem::transmute(b), 5)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sldi_h() { + #[rustfmt::skip] + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + #[rustfmt::skip] + let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); + // let c = 5 as i32; + let r = i16x8::new(9, 10, 11, 0, 13, 14, 15, 4); + + assert_eq!( + r, + mem::transmute(__msa_sldi_h(mem::transmute(a), mem::transmute(b), 2)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sldi_w() { + #[rustfmt::skip] + let a = i32x4::new(0, 1, 2, 3); + #[rustfmt::skip] + let b = i32x4::new(4, 5, 6, 7); + #[rustfmt::skip] + let r = i32x4::new(4, 5, 6, 7); + + assert_eq!( + r, + mem::transmute(__msa_sldi_w(mem::transmute(a), mem::transmute(b), 4)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sldi_d() { + #[rustfmt::skip] + let a = i64x2::new(0, 1); + #[rustfmt::skip] + let b = i64x2::new(2, 3); + #[rustfmt::skip] + let r = i64x2::new(2, 3); + + assert_eq!( + r, + mem::transmute(__msa_sldi_d(mem::transmute(a), mem::transmute(b), 2)) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sll_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 16, 16, 12, 8, + 16, 16, 12, 8, + 16, 16, 12, 8, + 16, 16, 12, 8 + ); + + assert_eq!( + r, + mem::transmute(__msa_sll_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sll_h() { + #[rustfmt::skip] + let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); + #[rustfmt::skip] + let b = i16x8::new(4, 3, 2, 1, 4, 3, 2, 1); + #[rustfmt::skip] + let r = i16x8::new(16, 16, 12, 8, 16, 16, 12, 8); + + assert_eq!( + r, + mem::transmute(__msa_sll_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sll_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i32x4::new(16, 16, 12, 8); + + assert_eq!( + r, + mem::transmute(__msa_sll_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sll_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(4, 3); + #[rustfmt::skip] + let r = i64x2::new(16, 16); + + assert_eq!( + r, + mem::transmute(__msa_sll_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_slli_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let r = i8x16::new( + 4, 8, 12, 16, + 4, 8, 12, 16, + 4, 8, 12, 16, + 4, 8, 12, 16 + ); + + assert_eq!(r, mem::transmute(__msa_slli_b(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_slli_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let r = i16x8::new(4, 8, 12, 16, 4, 8, 12, 16); + + assert_eq!(r, mem::transmute(__msa_slli_h(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_slli_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let r = i32x4::new(4, 8, 12, 16); + + assert_eq!(r, mem::transmute(__msa_slli_w(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_slli_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let r = i64x2::new(2, 4); + + assert_eq!(r, mem::transmute(__msa_slli_d(mem::transmute(a), 1))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_splat_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let r = i8x16::new( + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4 + ); + + assert_eq!(r, mem::transmute(__msa_splat_b(mem::transmute(a), 3))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_splat_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + ); + #[rustfmt::skip] + let r = i16x8::new(4, 4, 4, 4, 4, 4, 4, 4); + + assert_eq!(r, mem::transmute(__msa_splat_h(mem::transmute(a), 3))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_splat_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let r = i32x4::new(4, 4, 4, 4); + + assert_eq!(r, mem::transmute(__msa_splat_w(mem::transmute(a), 3))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_splat_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let r = i64x2::new(2, 2); + + assert_eq!(r, mem::transmute(__msa_splat_d(mem::transmute(a), 3))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_splati_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let r = i8x16::new( + 3, 3, 3, 3, + 3, 3, 3, 3, + 3, 3, 3, 3, + 3, 3, 3, 3 + ); + + assert_eq!(r, mem::transmute(__msa_splati_b(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_splati_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + ); + #[rustfmt::skip] + let r = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + + assert_eq!(r, mem::transmute(__msa_splati_h(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_splati_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let r = i32x4::new(3, 3, 3, 3); + + assert_eq!(r, mem::transmute(__msa_splati_w(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_splati_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let r = i64x2::new(2, 2); + + assert_eq!(r, mem::transmute(__msa_splati_d(mem::transmute(a), 1))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sra_b() { + #[rustfmt::skip] + let a = i8x16::new( + -128, -64, -32, -16, + -8, -4, -2, -1, + 1, 2, 4, 8, + 16, 32, 64, 127 + ); + #[rustfmt::skip] + let b = i8x16::new( + 8, 7, 6, 5, + 4, 3, 2, 1, + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + -128, -1, -1, -1, + -1, -1, -1, -1, + 1, 0, 0, 0, + 1, 4, 16, 63 + ); + + assert_eq!( + r, + mem::transmute(__msa_sra_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sra_h() { + #[rustfmt::skip] + let a = i16x8::new( + -32768, -16384, -8192, -4096, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 15, 14, 13, 12, + 12, 13, 14, 15 + ); + #[rustfmt::skip] + let r = i16x8::new( + -1, -1, -1, -1, + 0, 0, 0, 0 + ); + + assert_eq!( + r, + mem::transmute(__msa_sra_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sra_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MIN, -1073741824, 1, 2); + #[rustfmt::skip] + let b = i32x4::new(16, 15, 16, 15); + #[rustfmt::skip] + let r = i32x4::new(-32768, -32768, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_sra_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_sra_d() { + #[rustfmt::skip] + let a = i64x2::new(i64::MIN, i64::MAX); + #[rustfmt::skip] + let b = i64x2::new(32, 31); + #[rustfmt::skip] + let r = i64x2::new(-2147483648, 4294967295); + + assert_eq!( + r, + mem::transmute(__msa_sra_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srai_b() { + #[rustfmt::skip] + let a = i8x16::new( + i8::MAX, 125, 55, 1, + i8::MAX, 125, 55, 1, + i8::MAX, 125, 55, 1, + i8::MAX, 125, 55, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 31, 31, 13, 0, + 31, 31, 13, 0, + 31, 31, 13, 0, + 31, 31, 13, 0 + ); + + assert_eq!(r, mem::transmute(__msa_srai_b(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srai_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MAX, 125, 55, 1, + i16::MAX, 125, 55, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(8191, 31, 13, 0, 8191, 31, 13, 0); + + assert_eq!(r, mem::transmute(__msa_srai_h(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srai_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MAX, 125, 55, 1); + let r = i32x4::new(536870911, 31, 13, 0); + + assert_eq!(r, mem::transmute(__msa_srai_w(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srai_d() { + #[rustfmt::skip] + let a = i64x2::new(i64::MAX, 55); + #[rustfmt::skip] + let r = i64x2::new(2305843009213693951, 13); + + assert_eq!(r, mem::transmute(__msa_srai_d(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srar_b() { + #[rustfmt::skip] + let a = i8x16::new( + -128, -64, -32, -16, + -8, -4, -2, -1, + 1, 2, 4, 8, + 16, 32, 64, 127 + ); + #[rustfmt::skip] + let b = i8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + -8, -8, -8, -8, + 0, 0, 0, 0, + 1, 0, 0, 0, + 1, 4, 16, 64 + ); + + assert_eq!( + r, + mem::transmute(__msa_srar_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srar_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MIN, -16384, -8192, -4096, + 150, 50, 25, 15 + ); + #[rustfmt::skip] + let b = i16x8::new( + 4, 3, 2, 1, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let r = i16x8::new( + -2048, -2048, -2048, -2048, + 75, 13, 3, 1 + ); + + assert_eq!( + r, + mem::transmute(__msa_srar_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srar_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MIN, -1073741824, 100, 50); + #[rustfmt::skip] + let b = i32x4::new(16, 15, 1, 2); + #[rustfmt::skip] + let r = i32x4::new(-32768, -32768, 50, 13); + + assert_eq!( + r, + mem::transmute(__msa_srar_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srar_d() { + #[rustfmt::skip] + let a = i64x2::new(i64::MIN, i64::MAX); + #[rustfmt::skip] + let b = i64x2::new(32, 31); + #[rustfmt::skip] + let r = i64x2::new(-2147483648, 4294967296); + + assert_eq!( + r, + mem::transmute(__msa_srar_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srari_b() { + #[rustfmt::skip] + let a = i8x16::new( + 125, i8::MAX, 55, 1, + 125, i8::MAX, 55, 1, + 125, i8::MAX, 55, 1, + 125, i8::MAX, 55, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 31, 32, 14, 0, + 31, 32, 14, 0, + 31, 32, 14, 0, + 31, 32, 14, 0 + ); + + assert_eq!(r, mem::transmute(__msa_srari_b(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srari_h() { + #[rustfmt::skip] + let a = i16x8::new(2155, 1155, 155, 1, 2155, 1155, 155, 1); + #[rustfmt::skip] + let r = i16x8::new(539, 289, 39, 0, 539, 289, 39, 0); + + assert_eq!(r, mem::transmute(__msa_srari_h(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srari_w() { + #[rustfmt::skip] + let a = i32x4::new(211111155, 111111155, 11111155, 1); + #[rustfmt::skip] + let r = i32x4::new(52777789, 27777789, 2777789, 0); + + assert_eq!(r, mem::transmute(__msa_srari_w(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srari_d() { + #[rustfmt::skip] + let a = i64x2::new(211111111155, 111111111155); + #[rustfmt::skip] + let r = i64x2::new(52777777789, 27777777789); + + assert_eq!(r, mem::transmute(__msa_srari_d(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srl_b() { + #[rustfmt::skip] + let a = i8x16::new( + -128, -64, -32, -16, + -8, -4, -2, -1, + 1, 2, 4, 8, + 16, 32, 64, 127 + ); + #[rustfmt::skip] + let b = i8x16::new( + 8, 7, 6, 5, + 4, 3, 2, 1, + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + -128, 1, 3, 7, + 15, 31, 63, 127, + 1, 0, 0, 0, + 1, 4, 16, 63 + ); + + assert_eq!( + r, + mem::transmute(__msa_srl_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srl_h() { + #[rustfmt::skip] + let a = i16x8::new( + -32768, -16384, -8192, -4096, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 15, 14, 13, 12, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(1, 3, 7, 15, 0, 0, 0, 2); + + assert_eq!( + r, + mem::transmute(__msa_srl_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srl_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MIN, -1073741824, 1, 2); + #[rustfmt::skip] + let b = i32x4::new(16, 15, 16, 15); + #[rustfmt::skip] + let r = i32x4::new(32768, 98304, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_srl_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srl_d() { + #[rustfmt::skip] + let a = i64x2::new(i64::MIN, i64::MAX); + #[rustfmt::skip] + let b = i64x2::new(32, 31); + #[rustfmt::skip] + let r = i64x2::new(2147483648, 4294967295); + + assert_eq!( + r, + mem::transmute(__msa_srl_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srli_b() { + #[rustfmt::skip] + let a = i8x16::new( + 25, 50, 100, 127, + 25, 50, 100, 127, + 25, 50, 100, 127, + 25, 50, 100, 127 + ); + #[rustfmt::skip] + let r = i8x16::new( + 6, 12, 25, 31, + 6, 12, 25, 31, + 6, 12, 25, 31, + 6, 12, 25, 31 + ); + + assert_eq!(r, mem::transmute(__msa_srli_b(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srli_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MAX, 3276, 100, 127, + i16::MAX, 3276, 100, 127 + ); + #[rustfmt::skip] + let r = i16x8::new( + 8191, 819, 25, 31, + 8191, 819, 25, 31 + ); + + assert_eq!(r, mem::transmute(__msa_srli_h(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srli_w() { + #[rustfmt::skip] + let a = i32x4::new(100, i32::MAX, 100, i32::MAX); + #[rustfmt::skip] + let r = i32x4::new(25, 536870911, 25, 536870911); + + assert_eq!(r, mem::transmute(__msa_srli_w(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srli_d() { + #[rustfmt::skip] + let a = i64x2::new(100, i64::MAX); + #[rustfmt::skip] + let r = i64x2::new(50, 4611686018427387903); + + assert_eq!(r, mem::transmute(__msa_srli_d(mem::transmute(a), 1))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srlr_b() { + #[rustfmt::skip] + let a = i8x16::new( + -128, -64, -32, -16, + -8, -4, -2, -1, + 1, 2, 4, 8, + 16, 32, 64, 127 + ); + #[rustfmt::skip] + let b = i8x16::new( + 8, 7, 6, 5, + 4, 3, 2, 1, + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + -128, 2, 4, 8, + 16, 32, 64, -128, + 1, 0, 0, 0, + 1, 4, 16, 64 + ); + + assert_eq!( + r, + mem::transmute(__msa_srlr_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srlr_h() { + #[rustfmt::skip] + let a = i16x8::new( + -32768, -16384, -8192, -4096, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 15, 14, 13, 12, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i16x8::new(1, 3, 7, 15, 0, 0, 1, 2); + + assert_eq!( + r, + mem::transmute(__msa_srlr_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srlr_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MIN, -1073741824, 1, 2); + #[rustfmt::skip] + let b = i32x4::new(16, 15, 16, 15); + let r = i32x4::new(32768, 98304, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_srlr_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srlr_d() { + #[rustfmt::skip] + let a = i64x2::new(i64::MIN, i64::MAX); + #[rustfmt::skip] + let b = i64x2::new(32, 31); + #[rustfmt::skip] + let r = i64x2::new(2147483648, 4294967296); + + assert_eq!( + r, + mem::transmute(__msa_srlr_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srlri_b() { + #[rustfmt::skip] + let a = i8x16::new( + 25, 50, 100, i8::MAX, + 25, 50, 100, i8::MAX, + 25, 50, 100, i8::MAX, + 25, 50, 100, i8::MAX + ); + #[rustfmt::skip] + let r = i8x16::new( + 6, 13, 25, 32, + 6, 13, 25, 32, + 6, 13, 25, 32, + 6, 13, 25, 32 + ); + + assert_eq!(r, mem::transmute(__msa_srlri_b(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srlri_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MAX, 3276, 100, 127, + i16::MAX, 3276, 100, 127 + ); + let r = i16x8::new(8192, 819, 25, 32, 8192, 819, 25, 32); + + assert_eq!(r, mem::transmute(__msa_srlri_h(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srlri_w() { + #[rustfmt::skip] + let a = i32x4::new(100, 150, 200, i32::MAX); + #[rustfmt::skip] + let r = i32x4::new(25, 38, 50, 536870912); + + assert_eq!(r, mem::transmute(__msa_srlri_w(mem::transmute(a), 2))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_srlri_d() { + #[rustfmt::skip] + let a = i64x2::new(100, i64::MAX); + #[rustfmt::skip] + let r = i64x2::new(50, 4611686018427387904); + + assert_eq!(r, mem::transmute(__msa_srlri_d(mem::transmute(a), 1))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_st_b() { + #[rustfmt::skip] + let a = i8x16::new( + 13, 14, 15, 16, + 17, 18, 19, 20, + 21, 22, 23, 24, + 25, 26, 27, 28 + ); + #[rustfmt::skip] + let mut arr : [i8; 16] = [ + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 + ]; + #[rustfmt::skip] + let r : [i8; 16] = [ + 13, 14, 15, 16, + 17, 18, 19, 20, + 21, 22, 23, 24, + 25, 26, 27, 28 + ]; + __msa_st_b(mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0); + assert_eq!(arr, r); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_st_h() { + #[rustfmt::skip] + let a = i16x8::new(13, 14, 15, 16, 17, 18, 19, 20); + let mut arr: [i16; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; + #[rustfmt::skip] + let r : [i16; 8] = [13, 14, 15, 16, 17, 18, 19, 20]; + __msa_st_h(mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0); + assert_eq!(arr, r); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_st_w() { + #[rustfmt::skip] + let a = i32x4::new(13, 14, 15, 16); + let mut arr: [i32; 4] = [0, 0, 0, 0]; + #[rustfmt::skip] + let r : [i32; 4] = [13, 14, 15, 16]; + __msa_st_w(mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0); + assert_eq!(arr, r); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_st_d() { + #[rustfmt::skip] + let a = i64x2::new(13, 14); + let mut arr: [i64; 2] = [0, 0]; + #[rustfmt::skip] + let r : [i64; 2] = [13, 14]; + __msa_st_d(mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0); + assert_eq!(arr, r); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subs_s_b() { + #[rustfmt::skip] + let a = i8x16::new( + i8::MIN, -2, -3, -4, + i8::MIN, -2, -3, -4, + i8::MIN, -2, -3, -4, + i8::MIN, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 6, -7, 8, -9, + 6, -7, 8, -9, + 6, -7, 8, -9, + 6, -7, 8, -9 + ); + #[rustfmt::skip] + let r = i8x16::new( + i8::MIN, 5, -11, 5, + i8::MIN, 5, -11, 5, + i8::MIN, 5, -11, 5, + i8::MIN, 5, -11, 5 + ); + + assert_eq!( + r, + mem::transmute(__msa_subs_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subs_s_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MIN, -2, -3, -4, + i16::MIN, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i16x8::new(6, -7, 8, -9, 6, -7, 8, -9); + #[rustfmt::skip] + let r = i16x8::new( + i16::MIN, 5, -11, 5, + i16::MIN, 5, -11, 5 + ); + + assert_eq!( + r, + mem::transmute(__msa_subs_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subs_s_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MIN, -2, -3, -4); + #[rustfmt::skip] + let b = i32x4::new(6, -7, 8, -9); + #[rustfmt::skip] + let r = i32x4::new(i32::MIN, 5, -11, 5); + + assert_eq!( + r, + mem::transmute(__msa_subs_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subs_s_d() { + #[rustfmt::skip] + let a = i64x2::new(i64::MIN, -2); + #[rustfmt::skip] + let b = i64x2::new(6, -7); + #[rustfmt::skip] + let r = i64x2::new(i64::MIN, 5); + + assert_eq!( + r, + mem::transmute(__msa_subs_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subs_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 2, 3, 4, + u8::MAX, 2, 3, 4, + u8::MAX, 2, 3, 4, + u8::MAX, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9, + 6, 7, 8, 9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 249, 0, 0, 0, + 249, 0, 0, 0, + 249, 0, 0, 0, + 249, 0, 0, 0 + ); + + assert_eq!( + r, + mem::transmute(__msa_subs_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subs_u_h() { + #[rustfmt::skip] + let a = u16x8::new( + u16::MAX, 2, 3, 4, + u16::MAX, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); + #[rustfmt::skip] + let r = u16x8::new(65529, 0, 0, 0, 65529, 0, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_subs_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subs_u_w() { + #[rustfmt::skip] + let a = u32x4::new(u32::MAX, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 9); + #[rustfmt::skip] + let r = u32x4::new(4294967289, 0, 0, 0); + + assert_eq!( + r, + mem::transmute(__msa_subs_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subs_u_d() { + #[rustfmt::skip] + let a = u64x2::new(u64::MAX, 2); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = u64x2::new(18446744073709551609, 0); + + assert_eq!( + r, + mem::transmute(__msa_subs_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subsus_u_b() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 2, 3, 4, + u8::MAX, 2, 3, 4, + u8::MAX, 2, 3, 4, + u8::MAX, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i8x16::new( + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9, + -6, -7, -8, -9 + ); + #[rustfmt::skip] + let r = u8x16::new( + 255, 9, 11, 13, + 255, 9, 11, 13, + 255, 9, 11, 13, + 255, 9, 11, 13 + ); + + assert_eq!( + r, + mem::transmute(__msa_subsus_u_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subsus_u_h() { + #[rustfmt::skip] + let a = u16x8::new( + u16::MAX, 2, 3, 4, + u16::MAX, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new(-6, -7, -8, -9, -6, -7, -8, -9); + #[rustfmt::skip] + let r = u16x8::new(65535, 9, 11, 13, 65535, 9, 11, 13); + + assert_eq!( + r, + mem::transmute(__msa_subsus_u_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subsus_u_w() { + #[rustfmt::skip] + let a = u32x4::new(u32::MAX, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(-6, -7, -8, -9); + #[rustfmt::skip] + let r = u32x4::new(4294967295, 9, 11, 13); + + assert_eq!( + r, + mem::transmute(__msa_subsus_u_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subsus_u_d() { + #[rustfmt::skip] + let a = u64x2::new(u64::MAX, 2); + #[rustfmt::skip] + let b = i64x2::new(-6, -7); + #[rustfmt::skip] + let r = u64x2::new(18446744073709551615, 9); + + assert_eq!( + r, + mem::transmute(__msa_subsus_u_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subsuu_s_b() { + #[rustfmt::skip] + let a = u8x16::new( + u8::MAX, 2, 3, 4, + u8::MAX, 2, 3, 4, + u8::MAX, 2, 3, 4, + u8::MAX, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u8x16::new( + 6, 7, 8, u8::MAX, + 6, 7, 8, u8::MAX, + 6, 7, 8, u8::MAX, + 6, 7, 8, u8::MAX + ); + #[rustfmt::skip] + let r = i8x16::new( + 127, -5, -5, -128, + 127, -5, -5, -128, + 127, -5, -5, -128, + 127, -5, -5, -128 + ); + + assert_eq!( + r, + mem::transmute(__msa_subsuu_s_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subsuu_s_h() { + #[rustfmt::skip] + let a = u16x8::new( + u16::MAX, 2, 3, + 4, u16::MAX, 2, 3, 4 + ); + #[rustfmt::skip] + let b = u16x8::new(6, 7, 8, 65535, 6, 7, 8, 65535); + #[rustfmt::skip] + let r = i16x8::new(32767, -5, -5, -32768, 32767, -5, -5, -32768); + + assert_eq!( + r, + mem::transmute(__msa_subsuu_s_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subsuu_s_w() { + #[rustfmt::skip] + let a = u32x4::new(u32::MAX, 2, 3, 4); + #[rustfmt::skip] + let b = u32x4::new(6, 7, 8, 4294967295); + #[rustfmt::skip] + let r = i32x4::new(2147483647, -5, -5, -2147483648); + + assert_eq!( + r, + mem::transmute(__msa_subsuu_s_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subsuu_s_d() { + #[rustfmt::skip] + let a = u64x2::new(u64::MAX, 2); + #[rustfmt::skip] + let b = u64x2::new(6, 7); + #[rustfmt::skip] + let r = i64x2::new(i64::MAX, -5); + + assert_eq!( + r, + mem::transmute(__msa_subsuu_s_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subv_b() { + #[rustfmt::skip] + let a = i8x16::new( + i8::MIN, -2, -3, -4, + i8::MIN, -2, -3, -4, + i8::MIN, -2, -3, -4, + i8::MIN, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 6, -7, 8, -9, + 6, -7, 8, -9, + 6, -7, 8, -9, + 6, -7, 8, -9 + ); + #[rustfmt::skip] + let r = i8x16::new( + 122, 5, -11, 5, + 122, 5, -11, 5, + 122, 5, -11, 5, + 122, 5, -11, 5 + ); + + assert_eq!( + r, + mem::transmute(__msa_subv_b(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subv_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MIN, -2, -3, -4, + i16::MIN, -2, -3, -4 + ); + #[rustfmt::skip] + let b = i16x8::new(6, -7, 8, -9, 6, -7, 8, -9); + #[rustfmt::skip] + let r = i16x8::new(32762, 5, -11, 5, 32762, 5, -11, 5); + + assert_eq!( + r, + mem::transmute(__msa_subv_h(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subv_w() { + #[rustfmt::skip] + let a = i32x4::new(i32::MIN, -2, -3, -4); + #[rustfmt::skip] + let b = i32x4::new(6, -7, 8, -9); + #[rustfmt::skip] + let r = i32x4::new(2147483642, 5, -11, 5); + + assert_eq!( + r, + mem::transmute(__msa_subv_w(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subv_d() { + #[rustfmt::skip] + let a = i64x2::new(i64::MAX, -2); + #[rustfmt::skip] + let b = i64x2::new(6, -7); + #[rustfmt::skip] + let r = i64x2::new(9223372036854775801, 5); + + assert_eq!( + r, + mem::transmute(__msa_subv_d(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subvi_b() { + #[rustfmt::skip] + let a = i8x16::new( + 100, i8::MAX, 50, i8::MIN, + 100, i8::MAX, 50, i8::MIN, + 100, i8::MAX, 50, i8::MIN, + 100, i8::MAX, 50, i8::MIN + ); + #[rustfmt::skip] + let r = i8x16::new( + 95, 122, 45, 123, + 95, 122, 45, 123, + 95, 122, 45, 123, + 95, 122, 45, 123 + ); + + assert_eq!(r, mem::transmute(__msa_subvi_b(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subvi_h() { + #[rustfmt::skip] + let a = i16x8::new( + i16::MAX, 3276, -100, i16::MIN, + i16::MAX, 3276, -100, i16::MIN + ); + #[rustfmt::skip] + let r = i16x8::new( + 32762, 3271, -105, 32763, + 32762, 3271, -105, 32763 + ); + + assert_eq!(r, mem::transmute(__msa_subvi_h(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subvi_w() { + #[rustfmt::skip] + let a = i32x4::new(100, 150, 200, i32::MAX); + #[rustfmt::skip] + let r = i32x4::new(95, 145, 195, 2147483642); + + assert_eq!(r, mem::transmute(__msa_subvi_w(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_subvi_d() { + #[rustfmt::skip] + let a = i64x2::new(100, i64::MAX); + #[rustfmt::skip] + let r = i64x2::new(95, 9223372036854775802); + + assert_eq!(r, mem::transmute(__msa_subvi_d(mem::transmute(a), 5))); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_vshf_b() { + #[rustfmt::skip] + let a = i8x16::new( + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let c = i8x16::new( + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = i8x16::new( + 3, 2, 1, 4, + 3, 2, 1, 4, + 3, 2, 1, 4, + 3, 2, 1, 4 + ); + + assert_eq!( + r, + mem::transmute(__msa_vshf_b( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_vshf_h() { + #[rustfmt::skip] + let a = i16x8::new( + 1, 2, 3, 4, + 1, 2, 3, 4 + ); + #[rustfmt::skip] + let b = i16x8::new( + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let c = i16x8::new( + 4, 3, 2, 1, + 4, 3, 2, 1 + ); + let r = i16x8::new(3, 2, 1, 4, 3, 2, 1, 4); + + assert_eq!( + r, + mem::transmute(__msa_vshf_h( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_vshf_w() { + #[rustfmt::skip] + let a = i32x4::new(1, 2, 3, 4); + #[rustfmt::skip] + let b = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let c = i32x4::new(4, 3, 2, 1); + #[rustfmt::skip] + let r = i32x4::new(3, 2, 1, 4); + + assert_eq!( + r, + mem::transmute(__msa_vshf_w( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_vshf_d() { + #[rustfmt::skip] + let a = i64x2::new(1, 2); + #[rustfmt::skip] + let b = i64x2::new(4, 3); + #[rustfmt::skip] + let c = i64x2::new(4, 3); + #[rustfmt::skip] + let r = i64x2::new(3, 4); + + assert_eq!( + r, + mem::transmute(__msa_vshf_d( + mem::transmute(a), + mem::transmute(b), + mem::transmute(c) + )) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_xor_v() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let b = u8x16::new( + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + ); + #[rustfmt::skip] + let r = u8x16::new( + 17, 13, 13, 9, + 9, 13, 13, 1, + 1, 13, 13, 9, + 9, 13, 13, 17 + ); + + assert_eq!( + r, + mem::transmute(__msa_xor_v(mem::transmute(a), mem::transmute(b))) + ); + } + + #[simd_test(enable = "msa")] + unsafe fn test_msa_xori_b() { + #[rustfmt::skip] + let a = u8x16::new( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + #[rustfmt::skip] + let r = u8x16::new( + 5, 6, 7, 0, + 1, 2, 3, 12, + 13, 14, 15, 8, + 9, 10, 11, 20 + ); + + assert_eq!(r, mem::transmute(__msa_xori_b(mem::transmute(a), 4))); + } +} diff --git a/library/stdarch/crates/core_arch/src/mod.rs b/library/stdarch/crates/core_arch/src/mod.rs new file mode 100644 index 000000000000..f6e959efd47c --- /dev/null +++ b/library/stdarch/crates/core_arch/src/mod.rs @@ -0,0 +1,343 @@ +//! `core_arch` + +#![allow(unknown_lints, unnecessary_transmutes)] + +#[macro_use] +mod macros; + +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))] +mod riscv_shared; + +#[cfg(any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + doc +))] +mod arm_shared; + +mod simd; + +#[doc = include_str!("core_arch_docs.md")] +#[stable(feature = "simd_arch", since = "1.27.0")] +pub mod arch { + /// Platform-specific intrinsics for the `x86` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "x86", doc))] + #[doc(cfg(target_arch = "x86"))] + #[stable(feature = "simd_x86", since = "1.27.0")] + pub mod x86 { + #[stable(feature = "simd_x86", since = "1.27.0")] + pub use crate::core_arch::x86::*; + } + + /// Platform-specific intrinsics for the `x86_64` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "x86_64", doc))] + #[doc(cfg(target_arch = "x86_64"))] + #[stable(feature = "simd_x86", since = "1.27.0")] + pub mod x86_64 { + #[stable(feature = "simd_x86", since = "1.27.0")] + pub use crate::core_arch::x86::*; + #[stable(feature = "simd_x86", since = "1.27.0")] + pub use crate::core_arch::x86_64::*; + } + + /// Platform-specific intrinsics for the `arm` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "arm", doc))] + #[doc(cfg(target_arch = "arm"))] + #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] + pub mod arm { + #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] + pub use crate::core_arch::arm::*; + } + + /// Platform-specific intrinsics for the `aarch64` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))] + #[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))] + #[stable(feature = "neon_intrinsics", since = "1.59.0")] + pub mod aarch64 { + #[stable(feature = "neon_intrinsics", since = "1.59.0")] + pub use crate::core_arch::aarch64::*; + } + + /// Platform-specific intrinsics for the `riscv32` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "riscv32", doc))] + #[doc(cfg(any(target_arch = "riscv32")))] + #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] + pub mod riscv32 { + pub use crate::core_arch::riscv_shared::*; + pub use crate::core_arch::riscv32::*; + } + + /// Platform-specific intrinsics for the `riscv64` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "riscv64", doc))] + #[doc(cfg(any(target_arch = "riscv64")))] + #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] + pub mod riscv64 { + pub use crate::core_arch::riscv64::*; + // RISC-V RV64 supports all RV32 instructions as well in current specifications (2022-01-05). + // Module `riscv_shared` includes instructions available under all RISC-V platforms, + // i.e. RISC-V RV32 instructions. + pub use crate::core_arch::riscv_shared::*; + } + + /// Platform-specific intrinsics for the `wasm32` platform. + /// + /// This module provides intrinsics specific to the WebAssembly + /// architecture. Here you'll find intrinsics specific to WebAssembly that + /// aren't otherwise surfaced somewhere in a cross-platform abstraction of + /// `std`, and you'll also find functions for leveraging WebAssembly + /// proposals such as [atomics] and [simd]. + /// + /// Intrinsics in the `wasm32` module are modeled after the WebAssembly + /// instructions that they represent. Most functions are named after the + /// instruction they intend to correspond to, and the arguments/results + /// correspond to the type signature of the instruction itself. Stable + /// WebAssembly instructions are [documented online][instrdoc]. + /// + /// [instrdoc]: https://webassembly.github.io/spec/core/valid/instructions.html + /// + /// If a proposal is not yet stable in WebAssembly itself then the functions + /// within this function may be unstable and require the nightly channel of + /// Rust to use. As the proposal itself stabilizes the intrinsics in this + /// module should stabilize as well. + /// + /// [atomics]: https://github.com/webassembly/threads + /// [simd]: https://github.com/webassembly/simd + /// + /// See the [module documentation](../index.html) for general information + /// about the `arch` module and platform intrinsics. + /// + /// ## Atomics + /// + /// The [threads proposal][atomics] for WebAssembly adds a number of + /// instructions for dealing with multithreaded programs. Most instructions + /// added in the [atomics] proposal are exposed in Rust through the + /// `std::sync::atomic` module. Some instructions, however, don't have + /// direct equivalents in Rust so they're exposed here instead. + /// + /// Note that the instructions added in the [atomics] proposal can work in + /// either a context with a shared wasm memory and without. These intrinsics + /// are always available in the standard library, but you likely won't be + /// able to use them too productively unless you recompile the standard + /// library (and all your code) with `-Ctarget-feature=+atomics`. + /// + /// It's also worth pointing out that multi-threaded WebAssembly and its + /// story in Rust is still in a somewhat "early days" phase as of the time + /// of this writing. Pieces should mostly work but it generally requires a + /// good deal of manual setup. At this time it's not as simple as "just call + /// `std::thread::spawn`", but it will hopefully get there one day! + /// + /// ## SIMD + /// + /// The [simd proposal][simd] for WebAssembly added a new `v128` type for a + /// 128-bit SIMD register. It also added a large array of instructions to + /// operate on the `v128` type to perform data processing. Using SIMD on + /// wasm is intended to be similar to as you would on `x86_64`, for example. + /// You'd write a function such as: + /// + /// ```rust,ignore + /// #[cfg(target_arch = "wasm32")] + /// #[target_feature(enable = "simd128")] + /// unsafe fn uses_simd() { + /// use std::arch::wasm32::*; + /// // ... + /// } + /// ``` + /// + /// Unlike `x86_64`, however, WebAssembly does not currently have dynamic + /// detection at runtime as to whether SIMD is supported (this is one of the + /// motivators for the [conditional sections][condsections] and [feature + /// detection] proposals, but that is still pretty early days). This means + /// that your binary will either have SIMD and can only run on engines + /// which support SIMD, or it will not have SIMD at all. For compatibility + /// the standard library itself does not use any SIMD internally. + /// Determining how best to ship your WebAssembly binary with SIMD is + /// largely left up to you as it can be pretty nuanced depending on + /// your situation. + /// + /// [condsections]: https://github.com/webassembly/conditional-sections + /// [feature detection]: https://github.com/WebAssembly/feature-detection + /// + /// To enable SIMD support at compile time you need to do one of two things: + /// + /// * First you can annotate functions with `#[target_feature(enable = + /// "simd128")]`. This causes just that one function to have SIMD support + /// available to it, and intrinsics will get inlined as usual in this + /// situation. + /// + /// * Second you can compile your program with `-Ctarget-feature=+simd128`. + /// This compilation flag blanket enables SIMD support for your entire + /// compilation. Note that this does not include the standard library + /// unless you [recompile the standard library][buildstd]. + /// + /// [buildstd]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#build-std + /// + /// If you enable SIMD via either of these routes then you'll have a + /// WebAssembly binary that uses SIMD instructions, and you'll need to ship + /// that accordingly. Also note that if you call SIMD intrinsics but don't + /// enable SIMD via either of these mechanisms, you'll still have SIMD + /// generated in your program. This means to generate a binary without SIMD + /// you'll need to avoid both options above plus calling into any intrinsics + /// in this module. + #[cfg(any(target_arch = "wasm32", doc))] + #[doc(cfg(target_arch = "wasm32"))] + #[stable(feature = "simd_wasm32", since = "1.33.0")] + pub mod wasm32 { + #[stable(feature = "simd_wasm32", since = "1.33.0")] + pub use crate::core_arch::wasm32::*; + } + + /// Platform-specific intrinsics for the `wasm64` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "wasm64", doc))] + #[doc(cfg(target_arch = "wasm64"))] + #[unstable(feature = "simd_wasm64", issue = "90599")] + pub mod wasm64 { + #[unstable(feature = "simd_wasm64", issue = "90599")] + pub use crate::core_arch::wasm32::*; + } + + /// Platform-specific intrinsics for the `wasm` target family. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_family = "wasm", doc))] + #[doc(cfg(target_family = "wasm"))] + #[unstable(feature = "simd_wasm64", issue = "90599")] + pub mod wasm { + #[unstable(feature = "simd_wasm64", issue = "90599")] + pub use crate::core_arch::wasm32::*; + } + + /// Platform-specific intrinsics for the `mips` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "mips", doc))] + #[doc(cfg(target_arch = "mips"))] + #[unstable(feature = "stdarch_mips", issue = "111198")] + pub mod mips { + pub use crate::core_arch::mips::*; + } + + /// Platform-specific intrinsics for the `mips64` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "mips64", doc))] + #[doc(cfg(target_arch = "mips64"))] + #[unstable(feature = "stdarch_mips", issue = "111198")] + pub mod mips64 { + pub use crate::core_arch::mips::*; + } + + /// Platform-specific intrinsics for the `PowerPC` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "powerpc", doc))] + #[doc(cfg(target_arch = "powerpc"))] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub mod powerpc { + pub use crate::core_arch::powerpc::*; + } + + /// Platform-specific intrinsics for the `PowerPC64` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "powerpc64", doc))] + #[doc(cfg(target_arch = "powerpc64"))] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub mod powerpc64 { + pub use crate::core_arch::powerpc64::*; + } + + /// Platform-specific intrinsics for the `NVPTX` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "nvptx64", doc))] + #[doc(cfg(target_arch = "nvptx64"))] + #[unstable(feature = "stdarch_nvptx", issue = "111199")] + pub mod nvptx { + pub use crate::core_arch::nvptx::*; + } + + /// Platform-specific intrinsics for the `loongarch` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "loongarch64", doc))] + #[doc(cfg(target_arch = "loongarch64"))] + #[unstable(feature = "stdarch_loongarch", issue = "117427")] + pub mod loongarch64 { + pub use crate::core_arch::loongarch64::*; + } + + /// Platform-specific intrinsics for the `s390x` platform. + /// + /// See the [module documentation](../index.html) for more details. + #[cfg(any(target_arch = "s390x", doc))] + #[doc(cfg(target_arch = "s390x"))] + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub mod s390x { + pub use crate::core_arch::s390x::*; + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))] +#[doc(cfg(any(target_arch = "x86", target_arch = "x86_64")))] +mod x86; +#[cfg(any(target_arch = "x86_64", doc))] +#[doc(cfg(target_arch = "x86_64"))] +mod x86_64; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))] +#[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))] +mod aarch64; +#[cfg(any(target_arch = "arm", doc))] +#[doc(cfg(any(target_arch = "arm")))] +mod arm; + +#[cfg(any(target_arch = "riscv32", doc))] +#[doc(cfg(any(target_arch = "riscv32")))] +mod riscv32; + +#[cfg(any(target_arch = "riscv64", doc))] +#[doc(cfg(any(target_arch = "riscv64")))] +mod riscv64; + +#[cfg(any(target_family = "wasm", doc))] +#[doc(cfg(target_family = "wasm"))] +mod wasm32; + +#[cfg(any(target_arch = "mips", target_arch = "mips64", doc))] +#[doc(cfg(any(target_arch = "mips", target_arch = "mips64")))] +mod mips; + +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64", doc))] +#[doc(cfg(any(target_arch = "powerpc", target_arch = "powerpc64")))] +mod powerpc; + +#[cfg(any(target_arch = "powerpc64", doc))] +#[doc(cfg(target_arch = "powerpc64"))] +mod powerpc64; + +#[cfg(any(target_arch = "nvptx64", doc))] +#[doc(cfg(target_arch = "nvptx64"))] +mod nvptx; + +#[cfg(any(target_arch = "loongarch64", doc))] +#[doc(cfg(target_arch = "loongarch64"))] +mod loongarch64; + +#[cfg(any(target_arch = "s390x", doc))] +#[doc(cfg(target_arch = "s390x"))] +mod s390x; diff --git a/library/stdarch/crates/core_arch/src/nvptx/mod.rs b/library/stdarch/crates/core_arch/src/nvptx/mod.rs new file mode 100644 index 000000000000..8d16dfb53d43 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/nvptx/mod.rs @@ -0,0 +1,236 @@ +//! NVPTX intrinsics (experimental) +//! +//! These intrinsics form the foundation of the CUDA +//! programming model. +//! +//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also +//! the [LLVM NVPTX Backend documentation][llvm_docs]. +//! +//! [cuda_c]: +//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html +//! [llvm_docs]: +//! https://llvm.org/docs/NVPTXUsage.html + +use crate::ffi::c_void; + +mod packed; + +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub use packed::*; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.nvvm.barrier0"] + fn syncthreads() -> (); + #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"] + fn block_dim_x() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"] + fn block_dim_y() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"] + fn block_dim_z() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"] + fn block_idx_x() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"] + fn block_idx_y() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"] + fn block_idx_z() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"] + fn grid_dim_x() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"] + fn grid_dim_y() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"] + fn grid_dim_z() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"] + fn thread_idx_x() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"] + fn thread_idx_y() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"] + fn thread_idx_z() -> i32; +} + +/// Synchronizes all threads in the block. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _syncthreads() -> () { + syncthreads() +} + +/// x-th thread-block dimension. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _block_dim_x() -> i32 { + block_dim_x() +} + +/// y-th thread-block dimension. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _block_dim_y() -> i32 { + block_dim_y() +} + +/// z-th thread-block dimension. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _block_dim_z() -> i32 { + block_dim_z() +} + +/// x-th thread-block index. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _block_idx_x() -> i32 { + block_idx_x() +} + +/// y-th thread-block index. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _block_idx_y() -> i32 { + block_idx_y() +} + +/// z-th thread-block index. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _block_idx_z() -> i32 { + block_idx_z() +} + +/// x-th block-grid dimension. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _grid_dim_x() -> i32 { + grid_dim_x() +} + +/// y-th block-grid dimension. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _grid_dim_y() -> i32 { + grid_dim_y() +} + +/// z-th block-grid dimension. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _grid_dim_z() -> i32 { + grid_dim_z() +} + +/// x-th thread index. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _thread_idx_x() -> i32 { + thread_idx_x() +} + +/// y-th thread index. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _thread_idx_y() -> i32 { + thread_idx_y() +} + +/// z-th thread index. +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn _thread_idx_z() -> i32 { + thread_idx_z() +} + +/// Generates the trap instruction `TRAP` +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn trap() -> ! { + crate::intrinsics::abort() +} + +// Basic CUDA syscall declarations. +unsafe extern "C" { + /// Print formatted output from a kernel to a host-side output stream. + /// + /// Syscall arguments: + /// * `status`: The status value that is returned by `vprintf`. + /// * `format`: A pointer to the format specifier input (uses common `printf` format). + /// * `valist`: A pointer to the valist input. + /// + /// ``` + /// #[repr(C)] + /// struct PrintArgs(f32, f32, f32, i32); + /// + /// vprintf( + /// "int(%f + %f) = int(%f) = %d\n".as_ptr(), + /// transmute(&PrintArgs(a, b, a + b, (a + b) as i32)), + /// ); + /// ``` + /// + /// Sources: + /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output), + /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). + #[unstable(feature = "stdarch_nvptx", issue = "111199")] + pub fn vprintf(format: *const u8, valist: *const c_void) -> i32; + + /// Allocate memory dynamically from a fixed-size heap in global memory. + /// + /// The CUDA in-kernel `malloc()` function allocates at least `size` bytes + /// from the device heap and returns a pointer to the allocated memory + /// or `NULL` if insufficient memory exists to fulfill the request. + /// + /// The returned pointer is guaranteed to be aligned to a 16-byte boundary. + /// + /// The memory allocated by a given CUDA thread via `malloc()` remains allocated + /// for the lifetime of the CUDA context, or until it is explicitly released + /// by a call to `free()`. It can be used by any other CUDA threads + /// even from subsequent kernel launches. + /// + /// Sources: + /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations), + /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). + // FIXME(denzp): assign `malloc` and `nothrow` attributes. + #[unstable(feature = "stdarch_nvptx", issue = "111199")] + pub fn malloc(size: usize) -> *mut c_void; + + /// Free previously dynamically allocated memory. + /// + /// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`, + /// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL, + /// the call to `free()` is ignored. + /// + /// Any CUDA thread may free memory allocated by another thread, but care should be taken + /// to ensure that the same pointer is not freed more than once. Repeated calls to `free()` + /// with the same `ptr` has undefined behavior. + /// + /// Sources: + /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations), + /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). + // FIXME(denzp): assign `nothrow` attribute. + #[unstable(feature = "stdarch_nvptx", issue = "111199")] + pub fn free(ptr: *mut c_void); + + // Internal declaration of the syscall. Exported variant has + // the `char_size` parameter set to `1` (single char size in bytes). + fn __assertfail( + message: *const u8, + file: *const u8, + line: u32, + function: *const u8, + char_size: usize, + ); +} + +/// Syscall to be used whenever the *assert expression produces a `false` value*. +/// +/// Syscall arguments: +/// * `message`: The pointer to the string that should be output. +/// * `file`: The pointer to the file name string associated with the assert. +/// * `line`: The line number associated with the assert. +/// * `function`: The pointer to the function name string associated with the assert. +/// +/// Source: +/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). +#[inline] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) { + __assertfail(message, file, line, function, 1) +} diff --git a/library/stdarch/crates/core_arch/src/nvptx/packed.rs b/library/stdarch/crates/core_arch/src/nvptx/packed.rs new file mode 100644 index 000000000000..856aeea4b686 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/nvptx/packed.rs @@ -0,0 +1,139 @@ +//! NVPTX Packed data types (SIMD) +//! +//! Packed Data Types is what PTX calls SIMD types. See [PTX ISA (Packed Data Types)](https://docs.nvidia.com/cuda/parallel-thread-execution/#packed-data-types) for a full reference. + +// Note: #[assert_instr] tests are not actually being run on nvptx due to being a `no_std` target incapable of running tests. Something like FileCheck would be appropriate for verifying the correct instruction is used. + +use crate::intrinsics::simd::*; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.minimum.v2f16"] + fn llvm_f16x2_minimum(a: f16x2, b: f16x2) -> f16x2; + #[link_name = "llvm.maximum.v2f16"] + fn llvm_f16x2_maximum(a: f16x2, b: f16x2) -> f16x2; +} + +types! { + #![unstable(feature = "stdarch_nvptx", issue = "111199")] + + /// PTX-specific 32-bit wide floating point (f16 x 2) vector type + pub struct f16x2(2 x f16); + +} + +/// Add two values, round to nearest even +/// +/// +/// +/// Corresponds to the CUDA C intrinsics: +/// - [`__hadd2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g921c795176eaa31265bd80ef4fe4b8e6) +/// - [`__hadd2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g6cd8ddb2c3d670e1a10c3eb2e7644f82) +#[inline] +#[cfg_attr(test, assert_instr(add.rn.f16x22))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_add(a: f16x2, b: f16x2) -> f16x2 { + simd_add(a, b) +} + +/// Subtract two values, round to nearest even +/// +/// +/// +/// Corresponds to the CUDA C intrinsics: +/// - [`__hsub2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1ga5536c9c3d853d8c8b9de60e18b41e54) +/// - [`__hsub2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g8adc164c68d553354f749f0f0645a874) +#[inline] +#[cfg_attr(test, assert_instr(sub.rn.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_sub(a: f16x2, b: f16x2) -> f16x2 { + simd_sub(a, b) +} + +/// Multiply two values, round to nearest even +/// +/// +/// +/// Corresponds to the CUDA C intrinsics: +/// - [`__hmul2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g70de3f2ee48babe4e0969397ac17708e) +/// - [`__hmul2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g99f8fe23a4b4c6898d6faf999afaa76e) +#[inline] +#[cfg_attr(test, assert_instr(mul.rn.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_mul(a: f16x2, b: f16x2) -> f16x2 { + simd_mul(a, b) +} + +/// Fused multiply-add, round to nearest even +/// +/// +/// +/// Corresponds to the CUDA C intrinsics: +/// - [`__fma2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g43628ba21ded8b1e188a367348008dab) +/// - [`__fma2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g43628ba21ded8b1e188a367348008dab) +#[inline] +#[cfg_attr(test, assert_instr(fma.rn.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_fma(a: f16x2, b: f16x2, c: f16x2) -> f16x2 { + simd_fma(a, b, c) +} + +/// Arithmetic negate +/// +/// +/// +/// Corresponds to the CUDA C intrinsic [`__hmin2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g9e17a33f96061804166f3fbd395422b6) +#[inline] +#[cfg_attr(test, assert_instr(neg.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_neg(a: f16x2) -> f16x2 { + simd_neg(a) +} + +/// Find the minimum of two values +/// +/// +/// +/// Corresponds to the CUDA C intrinsic [`__hmin2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g9e17a33f96061804166f3fbd395422b6) +#[inline] +#[cfg_attr(test, assert_instr(min.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_min(a: f16x2, b: f16x2) -> f16x2 { + simd_fmin(a, b) +} + +/// Find the minimum of two values, NaNs pass through. +/// +/// +/// +/// Corresponds to the CUDA C intrinsic [`__hmin2_nan`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g8bb8f58e9294cc261d2f42c4d5aecd6b) +#[inline] +#[cfg_attr(test, assert_instr(min.NaN.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_min_nan(a: f16x2, b: f16x2) -> f16x2 { + llvm_f16x2_minimum(a, b) +} + +/// Find the maximum of two values +/// +/// +/// +/// Corresponds to the CUDA C intrinsic [`__hmax2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g59fc7fc7975d8127b202444a05e57e3d) +#[inline] +#[cfg_attr(test, assert_instr(max.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_max(a: f16x2, b: f16x2) -> f16x2 { + simd_fmax(a, b) +} + +/// Find the maximum of two values, NaNs pass through. +/// +/// +/// +/// Corresponds to the CUDA C intrinsic [`__hmax2_nan`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g41623db7850e3074fd9daa80a14c3897) +#[inline] +#[cfg_attr(test, assert_instr(max.NaN.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_max_nan(a: f16x2, b: f16x2) -> f16x2 { + llvm_f16x2_maximum(a, b) +} diff --git a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs new file mode 100644 index 000000000000..2deeb53c2099 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs @@ -0,0 +1,6692 @@ +//! PowerPC AltiVec intrinsics. +//! +//! AltiVec is a brandname trademarked by Freescale (previously Motorola) for +//! the standard `Category:Vector` part of the Power ISA v.2.03 specification. +//! This Category is also known as VMX (used by IBM), and "Velocity Engine" (a +//! brand name previously used by Apple). +//! +//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA +//! NVlink)] and [POWER ISA v3.0B (for POWER9)]. +//! +//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u +//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv + +#![allow(non_camel_case_types)] + +use crate::{core_arch::simd::*, intrinsics::simd::*, mem, mem::transmute}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +use super::macros::*; + +types! { + #![unstable(feature = "stdarch_powerpc", issue = "111145")] + + /// PowerPC-specific 128-bit wide vector of sixteen packed `i8` + pub struct vector_signed_char(16 x i8); + /// PowerPC-specific 128-bit wide vector of sixteen packed `u8` + pub struct vector_unsigned_char(16 x u8); + + /// PowerPC-specific 128-bit wide vector mask of sixteen packed elements + pub struct vector_bool_char(16 x i8); + /// PowerPC-specific 128-bit wide vector of eight packed `i16` + pub struct vector_signed_short(8 x i16); + /// PowerPC-specific 128-bit wide vector of eight packed `u16` + pub struct vector_unsigned_short(8 x u16); + /// PowerPC-specific 128-bit wide vector mask of eight packed elements + pub struct vector_bool_short(8 x i16); + // pub struct vector_pixel(???); + /// PowerPC-specific 128-bit wide vector of four packed `i32` + pub struct vector_signed_int(4 x i32); + /// PowerPC-specific 128-bit wide vector of four packed `u32` + pub struct vector_unsigned_int(4 x u32); + /// PowerPC-specific 128-bit wide vector mask of four packed elements + pub struct vector_bool_int(4 x i32); + /// PowerPC-specific 128-bit wide vector of four packed `f32` + pub struct vector_float(4 x f32); +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.ppc.altivec.lvx"] + fn lvx(p: *const i8) -> vector_unsigned_int; + + #[link_name = "llvm.ppc.altivec.lvebx"] + fn lvebx(p: *const i8) -> vector_signed_char; + #[link_name = "llvm.ppc.altivec.lvehx"] + fn lvehx(p: *const i8) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.lvewx"] + fn lvewx(p: *const i8) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.lvxl"] + fn lvxl(p: *const i8) -> vector_unsigned_int; + + #[link_name = "llvm.ppc.altivec.stvx"] + fn stvx(a: vector_signed_int, p: *const i8); + + #[link_name = "llvm.ppc.altivec.stvebx"] + fn stvebx(a: vector_signed_char, p: *const i8); + #[link_name = "llvm.ppc.altivec.stvehx"] + fn stvehx(a: vector_signed_short, p: *const i8); + #[link_name = "llvm.ppc.altivec.stvewx"] + fn stvewx(a: vector_signed_int, p: *const i8); + + #[link_name = "llvm.ppc.altivec.stvxl"] + fn stvxl(a: vector_signed_int, p: *const i8); + + #[link_name = "llvm.ppc.altivec.vperm"] + fn vperm( + a: vector_signed_int, + b: vector_signed_int, + c: vector_unsigned_char, + ) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vmhaddshs"] + fn vmhaddshs( + a: vector_signed_short, + b: vector_signed_short, + c: vector_signed_short, + ) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vmhraddshs"] + fn vmhraddshs( + a: vector_signed_short, + b: vector_signed_short, + c: vector_signed_short, + ) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vmsumuhs"] + fn vmsumuhs( + a: vector_unsigned_short, + b: vector_unsigned_short, + c: vector_unsigned_int, + ) -> vector_unsigned_int; + #[link_name = "llvm.ppc.altivec.vmsumshs"] + fn vmsumshs( + a: vector_signed_short, + b: vector_signed_short, + c: vector_signed_int, + ) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vmsumubm"] + fn vmsumubm( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_int, + ) -> vector_unsigned_int; + #[link_name = "llvm.ppc.altivec.vmsummbm"] + fn vmsummbm( + a: vector_signed_char, + b: vector_unsigned_char, + c: vector_signed_int, + ) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vmsumuhm"] + fn vmsumuhm( + a: vector_unsigned_short, + b: vector_unsigned_short, + c: vector_unsigned_int, + ) -> vector_unsigned_int; + #[link_name = "llvm.ppc.altivec.vmsumshm"] + fn vmsumshm( + a: vector_signed_short, + b: vector_signed_short, + c: vector_signed_int, + ) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vnmsubfp"] + fn vnmsubfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float; + #[link_name = "llvm.ppc.altivec.vsum2sws"] + fn vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vsum4ubs"] + fn vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.ppc.altivec.vsum4sbs"] + fn vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vsum4shs"] + fn vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vmuleub"] + fn vmuleub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.ppc.altivec.vmulesb"] + fn vmulesb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vmuleuh"] + fn vmuleuh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.ppc.altivec.vmulesh"] + fn vmulesh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vmuloub"] + fn vmuloub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.ppc.altivec.vmulosb"] + fn vmulosb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vmulouh"] + fn vmulouh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.ppc.altivec.vmulosh"] + fn vmulosh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; + + #[link_name = "llvm.smax.v16i8"] + fn vmaxsb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.smax.v8i16"] + fn vmaxsh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.smax.v4i32"] + fn vmaxsw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.umax.v16i8"] + fn vmaxub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.umax.v8i16"] + fn vmaxuh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.umax.v4i32"] + fn vmaxuw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.smin.v16i8"] + fn vminsb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.smin.v8i16"] + fn vminsh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.smin.v4i32"] + fn vminsw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.umin.v16i8"] + fn vminub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.umin.v8i16"] + fn vminuh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.umin.v4i32"] + fn vminuw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.ppc.altivec.vsubsbs"] + fn vsubsbs(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.ppc.altivec.vsubshs"] + fn vsubshs(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vsubsws"] + fn vsubsws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.vsububs"] + fn vsububs(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.ppc.altivec.vsubuhs"] + fn vsubuhs(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.ppc.altivec.vsubuws"] + fn vsubuws(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.ppc.altivec.vsubcuw"] + fn vsubcuw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.ppc.altivec.vaddcuw"] + fn vaddcuw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.ppc.altivec.vaddsbs"] + fn vaddsbs(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.ppc.altivec.vaddshs"] + fn vaddshs(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vaddsws"] + fn vaddsws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.vaddubs"] + fn vaddubs(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.ppc.altivec.vadduhs"] + fn vadduhs(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.ppc.altivec.vadduws"] + fn vadduws(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.ppc.altivec.vavgsb"] + fn vavgsb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.ppc.altivec.vavgsh"] + fn vavgsh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vavgsw"] + fn vavgsw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.vavgub"] + fn vavgub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.ppc.altivec.vavguh"] + fn vavguh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.ppc.altivec.vavguw"] + fn vavguw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.ppc.altivec.vcmpbfp"] + fn vcmpbfp(a: vector_float, b: vector_float) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.vcmpequb"] + fn vcmpequb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_bool_char; + #[link_name = "llvm.ppc.altivec.vcmpequh"] + fn vcmpequh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_bool_short; + #[link_name = "llvm.ppc.altivec.vcmpequw"] + fn vcmpequw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_bool_int; + + #[link_name = "llvm.ppc.altivec.vcmpneb"] + fn vcmpneb(a: vector_signed_char, b: vector_signed_char) -> vector_bool_char; + #[link_name = "llvm.ppc.altivec.vcmpneh"] + fn vcmpneh(a: vector_signed_short, b: vector_signed_short) -> vector_bool_short; + #[link_name = "llvm.ppc.altivec.vcmpnew"] + fn vcmpnew(a: vector_signed_int, b: vector_signed_int) -> vector_bool_int; + + #[link_name = "llvm.ppc.altivec.vcmpgefp"] + fn vcmpgefp(a: vector_float, b: vector_float) -> vector_bool_int; + + #[link_name = "llvm.ppc.altivec.vcmpgtub"] + fn vcmpgtub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_bool_char; + #[link_name = "llvm.ppc.altivec.vcmpgtuh"] + fn vcmpgtuh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_bool_short; + #[link_name = "llvm.ppc.altivec.vcmpgtuw"] + fn vcmpgtuw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_bool_int; + + #[link_name = "llvm.ppc.altivec.vcmpgtsb"] + fn vcmpgtsb(a: vector_signed_char, b: vector_signed_char) -> vector_bool_char; + #[link_name = "llvm.ppc.altivec.vcmpgtsh"] + fn vcmpgtsh(a: vector_signed_short, b: vector_signed_short) -> vector_bool_short; + #[link_name = "llvm.ppc.altivec.vcmpgtsw"] + fn vcmpgtsw(a: vector_signed_int, b: vector_signed_int) -> vector_bool_int; + + #[link_name = "llvm.ppc.altivec.vexptefp"] + fn vexptefp(a: vector_float) -> vector_float; + + #[link_name = "llvm.ppc.altivec.vcmpequb.p"] + fn vcmpequb_p(cr: i32, a: vector_unsigned_char, b: vector_unsigned_char) -> i32; + #[link_name = "llvm.ppc.altivec.vcmpequh.p"] + fn vcmpequh_p(cr: i32, a: vector_unsigned_short, b: vector_unsigned_short) -> i32; + #[link_name = "llvm.ppc.altivec.vcmpequw.p"] + fn vcmpequw_p(cr: i32, a: vector_unsigned_int, b: vector_unsigned_int) -> i32; + + #[link_name = "llvm.ppc.altivec.vcmpeqfp.p"] + fn vcmpeqfp_p(cr: i32, a: vector_float, b: vector_float) -> i32; + + #[link_name = "llvm.ppc.altivec.vcmpgtub.p"] + fn vcmpgtub_p(cr: i32, a: vector_unsigned_char, b: vector_unsigned_char) -> i32; + #[link_name = "llvm.ppc.altivec.vcmpgtuh.p"] + fn vcmpgtuh_p(cr: i32, a: vector_unsigned_short, b: vector_unsigned_short) -> i32; + #[link_name = "llvm.ppc.altivec.vcmpgtuw.p"] + fn vcmpgtuw_p(cr: i32, a: vector_unsigned_int, b: vector_unsigned_int) -> i32; + #[link_name = "llvm.ppc.altivec.vcmpgtsb.p"] + fn vcmpgtsb_p(cr: i32, a: vector_signed_char, b: vector_signed_char) -> i32; + #[link_name = "llvm.ppc.altivec.vcmpgtsh.p"] + fn vcmpgtsh_p(cr: i32, a: vector_signed_short, b: vector_signed_short) -> i32; + #[link_name = "llvm.ppc.altivec.vcmpgtsw.p"] + fn vcmpgtsw_p(cr: i32, a: vector_signed_int, b: vector_signed_int) -> i32; + + #[link_name = "llvm.ppc.altivec.vcmpgefp.p"] + fn vcmpgefp_p(cr: i32, a: vector_float, b: vector_float) -> i32; + #[link_name = "llvm.ppc.altivec.vcmpgtfp.p"] + fn vcmpgtfp_p(cr: i32, a: vector_float, b: vector_float) -> i32; + #[link_name = "llvm.ppc.altivec.vcmpbfp.p"] + fn vcmpbfp_p(cr: i32, a: vector_float, b: vector_float) -> i32; + + #[link_name = "llvm.ppc.altivec.vcfsx"] + fn vcfsx(a: vector_signed_int, b: i32) -> vector_float; + #[link_name = "llvm.ppc.altivec.vcfux"] + fn vcfux(a: vector_unsigned_int, b: i32) -> vector_float; + + #[link_name = "llvm.ppc.altivec.vctsxs"] + fn vctsxs(a: vector_float, b: i32) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vctuxs"] + fn vctuxs(a: vector_float, b: i32) -> vector_unsigned_int; + + #[link_name = "llvm.ppc.altivec.vpkshss"] + fn vpkshss(a: vector_signed_short, b: vector_signed_short) -> vector_signed_char; + #[link_name = "llvm.ppc.altivec.vpkshus"] + fn vpkshus(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char; + #[link_name = "llvm.ppc.altivec.vpkuhus"] + fn vpkuhus(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_char; + #[link_name = "llvm.ppc.altivec.vpkswss"] + fn vpkswss(a: vector_signed_int, b: vector_signed_int) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vpkswus"] + fn vpkswus(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short; + #[link_name = "llvm.ppc.altivec.vpkuwus"] + fn vpkuwus(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_short; + + #[link_name = "llvm.ppc.altivec.vupkhsb"] + fn vupkhsb(a: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vupklsb"] + fn vupklsb(a: vector_signed_char) -> vector_signed_short; + + #[link_name = "llvm.ppc.altivec.vupkhsh"] + fn vupkhsh(a: vector_signed_short) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vupklsh"] + fn vupklsh(a: vector_signed_short) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.mfvscr"] + fn mfvscr() -> vector_unsigned_short; + + #[link_name = "llvm.ppc.altivec.vlogefp"] + fn vlogefp(a: vector_float) -> vector_float; + + #[link_name = "llvm.ppc.altivec.vsl"] + fn vsl(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vslo"] + fn vslo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.vsrab"] + fn vsrab(a: vector_signed_char, b: vector_unsigned_char) -> vector_signed_char; + #[link_name = "llvm.ppc.altivec.vsrah"] + fn vsrah(a: vector_signed_short, b: vector_unsigned_short) -> vector_signed_short; + #[link_name = "llvm.ppc.altivec.vsraw"] + fn vsraw(a: vector_signed_int, b: vector_unsigned_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.vsr"] + fn vsr(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.ppc.altivec.vsro"] + fn vsro(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.ppc.altivec.vslv"] + fn vslv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.ppc.altivec.vsrv"] + fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + + #[link_name = "llvm.fshl.v16i8"] + fn fshlb( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_char, + ) -> vector_unsigned_char; + #[link_name = "llvm.fshl.v8i16"] + fn fshlh( + a: vector_unsigned_short, + b: vector_unsigned_short, + c: vector_unsigned_short, + ) -> vector_unsigned_short; + #[link_name = "llvm.fshl.v4i32"] + fn fshlw( + a: vector_unsigned_int, + b: vector_unsigned_int, + c: vector_unsigned_int, + ) -> vector_unsigned_int; + + #[link_name = "llvm.nearbyint.v4f32"] + fn vrfin(a: vector_float) -> vector_float; +} + +impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4 } + +impl_neg! { i8x16 : 0 } +impl_neg! { i16x8 : 0 } +impl_neg! { i32x4 : 0 } +impl_neg! { f32x4 : 0f32 } + +#[macro_use] +mod sealed { + use super::*; + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorInsert { + type Scalar; + unsafe fn vec_insert(self, s: Self::Scalar) -> Self; + } + + const fn idx_in_vec() -> u32 { + IDX & (16 / crate::mem::size_of::() as u32) + } + + macro_rules! impl_vec_insert { + ($ty:ident) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorInsert for t_t_l!($ty) { + type Scalar = $ty; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_insert(self, s: Self::Scalar) -> Self { + simd_insert(self, const { idx_in_vec::() }, s) + } + } + }; + } + + impl_vec_insert! { i8 } + impl_vec_insert! { u8 } + impl_vec_insert! { i16 } + impl_vec_insert! { u16 } + impl_vec_insert! { i32 } + impl_vec_insert! { u32 } + impl_vec_insert! { f32 } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorExtract { + type Scalar; + unsafe fn vec_extract(self) -> Self::Scalar; + } + + macro_rules! impl_vec_extract { + ($ty:ident) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorExtract for t_t_l!($ty) { + type Scalar = $ty; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_extract(self) -> Self::Scalar { + simd_extract(self, const { idx_in_vec::() }) + } + } + }; + } + + impl_vec_extract! { i8 } + impl_vec_extract! { u8 } + impl_vec_extract! { i16 } + impl_vec_extract! { u16 } + impl_vec_extract! { i32 } + impl_vec_extract! { u32 } + impl_vec_extract! { f32 } + + macro_rules! impl_vec_cmp { + ([$Trait:ident $m:ident] ($b:ident, $h:ident, $w:ident)) => { + impl_vec_cmp! { [$Trait $m] ($b, $b, $h, $h, $w, $w) } + }; + ([$Trait:ident $m:ident] ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => { + impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, vector_unsigned_char) -> vector_bool_char } + impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, vector_signed_char) -> vector_bool_char } + impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, vector_unsigned_short) -> vector_bool_short } + impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, vector_signed_short) -> vector_bool_short } + impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, vector_unsigned_int) -> vector_bool_int } + impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, vector_signed_int) -> vector_bool_int } + } + } + + macro_rules! impl_vec_any_all { + ([$Trait:ident $m:ident] ($b:ident, $h:ident, $w:ident)) => { + impl_vec_any_all! { [$Trait $m] ($b, $b, $h, $h, $w, $w) } + }; + ([$Trait:ident $m:ident] ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => { + impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, vector_unsigned_char) -> bool } + impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, vector_signed_char) -> bool } + impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, vector_unsigned_short) -> bool } + impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, vector_signed_short) -> bool } + impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, vector_unsigned_int) -> bool } + impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, vector_signed_int) -> bool } + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorLd { + type Result; + unsafe fn vec_ld(self, off: isize) -> Self::Result; + unsafe fn vec_ldl(self, off: isize) -> Self::Result; + } + + macro_rules! impl_vec_ld { + ($fun:ident $fun_lru:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(lvx))] + pub unsafe fn $fun(off: isize, p: *const $ty) -> t_t_l!($ty) { + let addr = (p as *const i8).offset(off); + transmute(lvx(addr)) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(lvxl))] + pub unsafe fn $fun_lru(off: isize, p: *const $ty) -> t_t_l!($ty) { + let addr = (p as *const i8).offset(off); + transmute(lvxl(addr)) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorLd for *const $ty { + type Result = t_t_l!($ty); + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_ld(self, off: isize) -> Self::Result { + $fun(off, self) + } + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_ldl(self, off: isize) -> Self::Result { + $fun_lru(off, self) + } + } + }; + } + + impl_vec_ld! { vec_ld_u8 vec_ldl_u8 u8 } + impl_vec_ld! { vec_ld_i8 vec_ldl_i8 i8 } + + impl_vec_ld! { vec_ld_u16 vec_ldl_u16 u16 } + impl_vec_ld! { vec_ld_i16 vec_ldl_i16 i16 } + + impl_vec_ld! { vec_ld_u32 vec_ldl_u32 u32 } + impl_vec_ld! { vec_ld_i32 vec_ldl_i32 i32 } + + impl_vec_ld! { vec_ld_f32 vec_ldl_f32 f32 } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorLde { + type Result; + unsafe fn vec_lde(self, a: isize) -> Self::Result; + } + + macro_rules! impl_vec_lde { + ($fun:ident $instr:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $fun(a: isize, b: *const $ty) -> t_t_l!($ty) { + let addr = b.byte_offset(a).cast::(); + transmute($instr(addr)) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorLde for *const $ty { + type Result = t_t_l!($ty); + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_lde(self, a: isize) -> Self::Result { + $fun(a, self) + } + } + }; + } + + impl_vec_lde! { vec_lde_u8 lvebx u8 } + impl_vec_lde! { vec_lde_i8 lvebx i8 } + + impl_vec_lde! { vec_lde_u16 lvehx u16 } + impl_vec_lde! { vec_lde_i16 lvehx i16 } + + impl_vec_lde! { vec_lde_u32 lvewx u32 } + impl_vec_lde! { vec_lde_i32 lvewx i32 } + + impl_vec_lde! { vec_lde_f32 lvewx f32 } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSt { + type Target; + unsafe fn vec_st(self, off: isize, p: Self::Target); + unsafe fn vec_stl(self, off: isize, p: Self::Target); + } + + macro_rules! impl_vec_st { + ($fun:ident $fun_lru:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(stvx))] + pub unsafe fn $fun(a: t_t_l!($ty), off: isize, p: *const $ty) { + let addr = (p as *const i8).offset(off); + stvx(transmute(a), addr) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(stvxl))] + pub unsafe fn $fun_lru(a: t_t_l!($ty), off: isize, p: *const $ty) { + let addr = (p as *const i8).offset(off as isize); + stvxl(transmute(a), addr) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSt for t_t_l!($ty) { + type Target = *const $ty; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_st(self, off: isize, p: Self::Target) { + $fun(self, off, p) + } + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_stl(self, off: isize, p: Self::Target) { + $fun(self, off, p) + } + } + }; + } + + impl_vec_st! { vec_st_u8 vec_stl_u8 u8 } + impl_vec_st! { vec_st_i8 vec_stl_i8 i8 } + + impl_vec_st! { vec_st_u16 vec_stl_u16 u16 } + impl_vec_st! { vec_st_i16 vec_stl_i16 i16 } + + impl_vec_st! { vec_st_u32 vec_stl_u32 u32 } + impl_vec_st! { vec_st_i32 vec_stl_i32 i32 } + + impl_vec_st! { vec_st_f32 vec_stl_f32 f32 } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSte { + type Target; + unsafe fn vec_ste(self, off: isize, p: Self::Target); + } + + macro_rules! impl_vec_ste { + ($fun:ident $instr:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $fun(a: t_t_l!($ty), off: isize, p: *const $ty) { + let addr = (p as *const i8).offset(off); + $instr(transmute(a), addr) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSte for t_t_l!($ty) { + type Target = *const $ty; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_ste(self, off: isize, p: Self::Target) { + $fun(self, off, p) + } + } + }; + } + + impl_vec_ste! { vec_ste_u8 stvebx u8 } + impl_vec_ste! { vec_ste_i8 stvebx i8 } + + impl_vec_ste! { vec_ste_u16 stvehx u16 } + impl_vec_ste! { vec_ste_i16 stvehx i16 } + + impl_vec_ste! { vec_ste_u32 stvewx u32 } + impl_vec_ste! { vec_ste_i32 stvewx i32 } + + impl_vec_ste! { vec_ste_f32 stvewx f32 } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorXl { + type Result; + unsafe fn vec_xl(self, a: isize) -> Self::Result; + } + + macro_rules! impl_vec_xl { + ($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr( + all(test, not(target_feature = "power9-altivec")), + assert_instr($notpwr9) + )] + #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] + pub unsafe fn $fun(a: isize, b: *const $ty) -> t_t_l!($ty) { + let addr = (b as *const u8).offset(a); + + let mut r = mem::MaybeUninit::uninit(); + + crate::ptr::copy_nonoverlapping( + addr, + r.as_mut_ptr() as *mut u8, + mem::size_of::(), + ); + + r.assume_init() + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorXl for *const $ty { + type Result = t_t_l!($ty); + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_xl(self, a: isize) -> Self::Result { + $fun(a, self) + } + } + }; + } + + impl_vec_xl! { vec_xl_i8 lxvd2x / lxv i8 } + impl_vec_xl! { vec_xl_u8 lxvd2x / lxv u8 } + impl_vec_xl! { vec_xl_i16 lxvd2x / lxv i16 } + impl_vec_xl! { vec_xl_u16 lxvd2x / lxv u16 } + impl_vec_xl! { vec_xl_i32 lxvd2x / lxv i32 } + impl_vec_xl! { vec_xl_u32 lxvd2x / lxv u32 } + impl_vec_xl! { vec_xl_f32 lxvd2x / lxv f32 } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorXst { + type Out; + unsafe fn vec_xst(self, a: isize, p: Self::Out); + } + + macro_rules! impl_vec_xst { + ($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr( + all(test, not(target_feature = "power9-altivec")), + assert_instr($notpwr9) + )] + #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] + pub unsafe fn $fun(s: t_t_l!($ty), a: isize, b: *mut $ty) { + let addr = (b as *mut u8).offset(a); + + crate::ptr::copy_nonoverlapping( + &s as *const _ as *const u8, + addr, + mem::size_of::(), + ); + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorXst for t_t_l!($ty) { + type Out = *mut $ty; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_xst(self, a: isize, b: Self::Out) { + $fun(self, a, b) + } + } + }; + } + + impl_vec_xst! { vec_xst_i8 stxvd2x / stxv i8 } + impl_vec_xst! { vec_xst_u8 stxvd2x / stxv u8 } + impl_vec_xst! { vec_xst_i16 stxvd2x / stxv i16 } + impl_vec_xst! { vec_xst_u16 stxvd2x / stxv u16 } + impl_vec_xst! { vec_xst_i32 stxvd2x / stxv i32 } + impl_vec_xst! { vec_xst_u32 stxvd2x / stxv u32 } + impl_vec_xst! { vec_xst_f32 stxvd2x / stxv f32 } + + test_impl! { vec_floor(a: vector_float) -> vector_float [ simd_floor, vrfim / xvrspim ] } + + test_impl! { vec_vexptefp(a: vector_float) -> vector_float [ vexptefp, vexptefp ] } + + test_impl! { vec_vcmpgtub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_bool_char [ vcmpgtub, vcmpgtub ] } + test_impl! { vec_vcmpgtuh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_bool_short [ vcmpgtuh, vcmpgtuh ] } + test_impl! { vec_vcmpgtuw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_bool_int [ vcmpgtuw, vcmpgtuw ] } + + test_impl! { vec_vcmpgtsb(a: vector_signed_char, b: vector_signed_char) -> vector_bool_char [ vcmpgtsb, vcmpgtsb ] } + test_impl! { vec_vcmpgtsh(a: vector_signed_short, b: vector_signed_short) -> vector_bool_short [ vcmpgtsh, vcmpgtsh ] } + test_impl! { vec_vcmpgtsw(a: vector_signed_int, b: vector_signed_int) -> vector_bool_int [ vcmpgtsw, vcmpgtsw ] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorCmpGt { + type Result; + unsafe fn vec_cmpgt(self, b: Other) -> Self::Result; + } + + impl_vec_cmp! { [VectorCmpGt vec_cmpgt] ( vec_vcmpgtub, vec_vcmpgtsb, vec_vcmpgtuh, vec_vcmpgtsh, vec_vcmpgtuw, vec_vcmpgtsw ) } + + test_impl! { vec_vcmpgefp(a: vector_float, b: vector_float) -> vector_bool_int [ vcmpgefp, vcmpgefp ] } + + test_impl! { vec_vcmpequb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_bool_char [ vcmpequb, vcmpequb ] } + test_impl! { vec_vcmpequh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_bool_short [ vcmpequh, vcmpequh ] } + test_impl! { vec_vcmpequw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_bool_int [ vcmpequw, vcmpequw ] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorCmpEq { + type Result; + unsafe fn vec_cmpeq(self, b: Other) -> Self::Result; + } + + impl_vec_cmp! { [VectorCmpEq vec_cmpeq] (vec_vcmpequb, vec_vcmpequh, vec_vcmpequw) } + + macro_rules! impl_cmpne { + ($fun:ident ($ty:ident) -> $r:ident $([ $pwr9:ident ])? ) => { + #[inline] + #[target_feature(enable = "altivec")] + $( #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] )? + unsafe fn $fun(a: $ty, b: $ty) -> $r { + $( if cfg!(target_feature = "power9-altivec") { + transmute($pwr9(transmute(a), transmute(b))) + } else )? { + let zero = transmute(i32x4::new(0, 0, 0, 0)); + vec_nor(vec_cmpeq(a, b), zero) + } + } + }; + } + + impl_cmpne! { vec_vcmpneb(vector_signed_char) -> vector_bool_char [ vcmpneb ] } + impl_cmpne! { vec_vcmpneh(vector_signed_short) -> vector_bool_short [ vcmpneh ] } + impl_cmpne! { vec_vcmpnew(vector_signed_int) -> vector_bool_int [ vcmpnew ] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorCmpNe { + type Result; + unsafe fn vec_cmpne(self, b: Other) -> Self::Result; + } + + impl_vec_cmp! { [VectorCmpNe vec_cmpne] (vec_vcmpneb, vec_vcmpneh, vec_vcmpnew) } + + test_impl! { vec_vcmpbfp(a: vector_float, b: vector_float) -> vector_signed_int [vcmpbfp, vcmpbfp] } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequb.))] + unsafe fn vcmpequb_all(a: vector_unsigned_char, b: vector_unsigned_char) -> bool { + vcmpequb_p(2, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequb.))] + unsafe fn vcmpequb_any(a: vector_unsigned_char, b: vector_unsigned_char) -> bool { + vcmpequb_p(1, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequh.))] + unsafe fn vcmpequh_all(a: vector_unsigned_short, b: vector_unsigned_short) -> bool { + vcmpequh_p(2, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequh.))] + unsafe fn vcmpequh_any(a: vector_unsigned_short, b: vector_unsigned_short) -> bool { + vcmpequh_p(1, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequw.))] + unsafe fn vcmpequw_all(a: vector_unsigned_int, b: vector_unsigned_int) -> bool { + vcmpequw_p(2, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequw.))] + unsafe fn vcmpequw_any(a: vector_unsigned_int, b: vector_unsigned_int) -> bool { + vcmpequw_p(1, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAllEq { + type Result; + unsafe fn vec_all_eq(self, b: Other) -> Self::Result; + } + + impl_vec_any_all! { [VectorAllEq vec_all_eq] (vcmpequb_all, vcmpequh_all, vcmpequw_all) } + + // TODO: vsx encoding + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpeqfp.))] + unsafe fn vcmpeqfp_all(a: vector_float, b: vector_float) -> bool { + vcmpeqfp_p(2, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAllEq for vector_float { + type Result = bool; + #[inline] + unsafe fn vec_all_eq(self, b: vector_float) -> Self::Result { + vcmpeqfp_all(self, b) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAnyEq { + type Result; + unsafe fn vec_any_eq(self, b: Other) -> Self::Result; + } + + impl_vec_any_all! { [VectorAnyEq vec_any_eq] (vcmpequb_any, vcmpequh_any, vcmpequw_any) } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpeqfp.))] + unsafe fn vcmpeqfp_any(a: vector_float, b: vector_float) -> bool { + vcmpeqfp_p(1, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAnyEq for vector_float { + type Result = bool; + #[inline] + unsafe fn vec_any_eq(self, b: vector_float) -> Self::Result { + vcmpeqfp_any(self, b) + } + } + + // All/Any GreaterEqual + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsb.))] + unsafe fn vcmpgesb_all(a: vector_signed_char, b: vector_signed_char) -> bool { + vcmpgtsb_p(0, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsb.))] + unsafe fn vcmpgesb_any(a: vector_signed_char, b: vector_signed_char) -> bool { + vcmpgtsb_p(3, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsh.))] + unsafe fn vcmpgesh_all(a: vector_signed_short, b: vector_signed_short) -> bool { + vcmpgtsh_p(0, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsh.))] + unsafe fn vcmpgesh_any(a: vector_signed_short, b: vector_signed_short) -> bool { + vcmpgtsh_p(3, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsw.))] + unsafe fn vcmpgesw_all(a: vector_signed_int, b: vector_signed_int) -> bool { + vcmpgtsw_p(0, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsw.))] + unsafe fn vcmpgesw_any(a: vector_signed_int, b: vector_signed_int) -> bool { + vcmpgtsw_p(3, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtub.))] + unsafe fn vcmpgeub_all(a: vector_unsigned_char, b: vector_unsigned_char) -> bool { + vcmpgtub_p(0, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtub.))] + unsafe fn vcmpgeub_any(a: vector_unsigned_char, b: vector_unsigned_char) -> bool { + vcmpgtub_p(3, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtuh.))] + unsafe fn vcmpgeuh_all(a: vector_unsigned_short, b: vector_unsigned_short) -> bool { + vcmpgtuh_p(0, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtuh.))] + unsafe fn vcmpgeuh_any(a: vector_unsigned_short, b: vector_unsigned_short) -> bool { + vcmpgtuh_p(3, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtuw.))] + unsafe fn vcmpgeuw_all(a: vector_unsigned_int, b: vector_unsigned_int) -> bool { + vcmpgtuw_p(0, b, a) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtuw.))] + unsafe fn vcmpgeuw_any(a: vector_unsigned_int, b: vector_unsigned_int) -> bool { + vcmpgtuw_p(3, b, a) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAllGe { + type Result; + unsafe fn vec_all_ge(self, b: Other) -> Self::Result; + } + + impl_vec_any_all! { [VectorAllGe vec_all_ge] ( + vcmpgeub_all, vcmpgesb_all, + vcmpgeuh_all, vcmpgesh_all, + vcmpgeuw_all, vcmpgesw_all + ) } + + // TODO: vsx encoding + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgefp.))] + unsafe fn vcmpgefp_all(a: vector_float, b: vector_float) -> bool { + vcmpgefp_p(2, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAllGe for vector_float { + type Result = bool; + #[inline] + unsafe fn vec_all_ge(self, b: vector_float) -> Self::Result { + vcmpgefp_all(self, b) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAnyGe { + type Result; + unsafe fn vec_any_ge(self, b: Other) -> Self::Result; + } + + impl_vec_any_all! { [VectorAnyGe vec_any_ge] ( + vcmpgeub_any, vcmpgesb_any, + vcmpgeuh_any, vcmpgesh_any, + vcmpgeuw_any, vcmpgesw_any + ) } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgefp.))] + unsafe fn vcmpgefp_any(a: vector_float, b: vector_float) -> bool { + vcmpgefp_p(1, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAnyGe for vector_float { + type Result = bool; + #[inline] + unsafe fn vec_any_ge(self, b: vector_float) -> Self::Result { + vcmpgefp_any(self, b) + } + } + + // All/Any Greater Than + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsb.))] + unsafe fn vcmpgtsb_all(a: vector_signed_char, b: vector_signed_char) -> bool { + vcmpgtsb_p(2, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsb.))] + unsafe fn vcmpgtsb_any(a: vector_signed_char, b: vector_signed_char) -> bool { + vcmpgtsb_p(1, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsh.))] + unsafe fn vcmpgtsh_all(a: vector_signed_short, b: vector_signed_short) -> bool { + vcmpgtsh_p(2, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsh.))] + unsafe fn vcmpgtsh_any(a: vector_signed_short, b: vector_signed_short) -> bool { + vcmpgtsh_p(1, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsw.))] + unsafe fn vcmpgtsw_all(a: vector_signed_int, b: vector_signed_int) -> bool { + vcmpgtsw_p(2, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtsw.))] + unsafe fn vcmpgtsw_any(a: vector_signed_int, b: vector_signed_int) -> bool { + vcmpgtsw_p(1, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtub.))] + unsafe fn vcmpgtub_all(a: vector_unsigned_char, b: vector_unsigned_char) -> bool { + vcmpgtub_p(2, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtub.))] + unsafe fn vcmpgtub_any(a: vector_unsigned_char, b: vector_unsigned_char) -> bool { + vcmpgtub_p(1, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtuh.))] + unsafe fn vcmpgtuh_all(a: vector_unsigned_short, b: vector_unsigned_short) -> bool { + vcmpgtuh_p(2, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtuh.))] + unsafe fn vcmpgtuh_any(a: vector_unsigned_short, b: vector_unsigned_short) -> bool { + vcmpgtuh_p(1, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtuw.))] + unsafe fn vcmpgtuw_all(a: vector_unsigned_int, b: vector_unsigned_int) -> bool { + vcmpgtuw_p(2, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtuw.))] + unsafe fn vcmpgtuw_any(a: vector_unsigned_int, b: vector_unsigned_int) -> bool { + vcmpgtuw_p(1, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAllGt { + type Result; + unsafe fn vec_all_gt(self, b: Other) -> Self::Result; + } + + impl_vec_any_all! { [VectorAllGt vec_all_gt] ( + vcmpgtub_all, vcmpgtsb_all, + vcmpgtuh_all, vcmpgtsh_all, + vcmpgtuw_all, vcmpgtsw_all + ) } + + // TODO: vsx encoding + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtfp.))] + unsafe fn vcmpgtfp_all(a: vector_float, b: vector_float) -> bool { + vcmpgtfp_p(2, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAllGt for vector_float { + type Result = bool; + #[inline] + unsafe fn vec_all_gt(self, b: vector_float) -> Self::Result { + vcmpgtfp_all(self, b) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAnyGt { + type Result; + unsafe fn vec_any_gt(self, b: Other) -> Self::Result; + } + + impl_vec_any_all! { [VectorAnyGt vec_any_gt] ( + vcmpgtub_any, vcmpgtsb_any, + vcmpgtuh_any, vcmpgtsh_any, + vcmpgtuw_any, vcmpgtsw_any + ) } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpgtfp.))] + unsafe fn vcmpgtfp_any(a: vector_float, b: vector_float) -> bool { + vcmpgtfp_p(1, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAnyGt for vector_float { + type Result = bool; + #[inline] + unsafe fn vec_any_gt(self, b: vector_float) -> Self::Result { + vcmpgtfp_any(self, b) + } + } + + // All/Any Elements Not Equal + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequb.))] + unsafe fn vcmpneub_all(a: vector_unsigned_char, b: vector_unsigned_char) -> bool { + vcmpequb_p(0, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequb.))] + unsafe fn vcmpneub_any(a: vector_unsigned_char, b: vector_unsigned_char) -> bool { + vcmpequb_p(3, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequh.))] + unsafe fn vcmpneuh_all(a: vector_unsigned_short, b: vector_unsigned_short) -> bool { + vcmpequh_p(0, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequh.))] + unsafe fn vcmpneuh_any(a: vector_unsigned_short, b: vector_unsigned_short) -> bool { + vcmpequh_p(3, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequw.))] + unsafe fn vcmpneuw_all(a: vector_unsigned_int, b: vector_unsigned_int) -> bool { + vcmpequw_p(0, a, b) != 0 + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpequw.))] + unsafe fn vcmpneuw_any(a: vector_unsigned_int, b: vector_unsigned_int) -> bool { + vcmpequw_p(3, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAllNe { + type Result; + unsafe fn vec_all_ne(self, b: Other) -> Self::Result; + } + + impl_vec_any_all! { [VectorAllNe vec_all_ne] (vcmpneub_all, vcmpneuh_all, vcmpneuw_all) } + + // TODO: vsx encoding + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpeqfp.))] + unsafe fn vcmpnefp_all(a: vector_float, b: vector_float) -> bool { + vcmpeqfp_p(0, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAllNe for vector_float { + type Result = bool; + #[inline] + unsafe fn vec_all_ne(self, b: vector_float) -> Self::Result { + vcmpnefp_all(self, b) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAnyNe { + type Result; + unsafe fn vec_any_ne(self, b: Other) -> Self::Result; + } + + impl_vec_any_all! { [VectorAnyNe vec_any_ne] (vcmpneub_any, vcmpneuh_any, vcmpneuw_any) } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcmpeqfp.))] + unsafe fn vcmpnefp_any(a: vector_float, b: vector_float) -> bool { + vcmpeqfp_p(3, a, b) != 0 + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAnyNe for vector_float { + type Result = bool; + #[inline] + unsafe fn vec_any_ne(self, b: vector_float) -> Self::Result { + vcmpnefp_any(self, b) + } + } + + test_impl! { vec_vceil(a: vector_float) -> vector_float [simd_ceil, vrfip / xvrspip ] } + + test_impl! { vec_vavgsb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vavgsb, vavgsb ] } + test_impl! { vec_vavgsh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vavgsh, vavgsh ] } + test_impl! { vec_vavgsw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vavgsw, vavgsw ] } + test_impl! { vec_vavgub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vavgub, vavgub ] } + test_impl! { vec_vavguh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vavguh, vavguh ] } + test_impl! { vec_vavguw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vavguw, vavguw ] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAvg { + type Result; + unsafe fn vec_avg(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorAvg vec_avg] 2 (vec_vavgub, vec_vavgsb, vec_vavguh, vec_vavgsh, vec_vavguw, vec_vavgsw) } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, not(target_feature = "vsx")), assert_instr(vandc))] + #[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxlandc))] + unsafe fn andc(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char { + let a = transmute(a); + let b = transmute(b); + transmute(simd_and(simd_xor(u8x16::splat(0xff), b), a)) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAndc { + type Result; + unsafe fn vec_andc(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorAndc vec_andc]+ 2b (andc) } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, not(target_feature = "vsx")), assert_instr(vorc))] + #[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxlorc))] + unsafe fn orc(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char { + let a = transmute(a); + let b = transmute(b); + transmute(simd_or(simd_xor(u8x16::splat(0xff), b), a)) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorOrc { + type Result; + unsafe fn vec_orc(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorOrc vec_orc]+ 2b (orc) } + + test_impl! { vec_vand(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ simd_and, vand / xxland ] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAnd { + type Result; + unsafe fn vec_and(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorAnd vec_and] ~(simd_and) } + + test_impl! { vec_vaddsbs(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vaddsbs, vaddsbs ] } + test_impl! { vec_vaddshs(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vaddshs, vaddshs ] } + test_impl! { vec_vaddsws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vaddsws, vaddsws ] } + test_impl! { vec_vaddubs(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vaddubs, vaddubs ] } + test_impl! { vec_vadduhs(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vadduhs, vadduhs ] } + test_impl! { vec_vadduws(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vadduws, vadduws ] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAdds { + type Result; + unsafe fn vec_adds(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorAdds vec_adds] ~(vaddubs, vaddsbs, vadduhs, vaddshs, vadduws, vaddsws) } + + test_impl! { vec_vaddcuw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [vaddcuw, vaddcuw] } + + test_impl! { vec_vsubsbs(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vsubsbs, vsubsbs ] } + test_impl! { vec_vsubshs(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vsubshs, vsubshs ] } + test_impl! { vec_vsubsws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vsubsws, vsubsws ] } + test_impl! { vec_vsububs(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vsububs, vsububs ] } + test_impl! { vec_vsubuhs(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vsubuhs, vsubuhs ] } + test_impl! { vec_vsubuws(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vsubuws, vsubuws ] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSubs { + type Result; + unsafe fn vec_subs(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorSubs vec_subs] ~(vsububs, vsubsbs, vsubuhs, vsubshs, vsubuws, vsubsws) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAbs { + unsafe fn vec_abs(self) -> Self; + } + + macro_rules! impl_abs { + ($name:ident, $ty: ident) => { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) { + v.vec_max(-v) + } + + impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) } + }; + } + + impl_abs! { vec_abs_i8, i8x16 } + impl_abs! { vec_abs_i16, i16x8 } + impl_abs! { vec_abs_i32, i32x4 } + + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_abs_f32(v: vector_float) -> vector_float { + let v: u32x4 = transmute(v); + + transmute(simd_and(v, u32x4::splat(0x7FFFFFFF))) + } + + impl_vec_trait! { [VectorAbs vec_abs] vec_abs_f32 (vector_float) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAbss { + unsafe fn vec_abss(self) -> Self; + } + + macro_rules! impl_abss { + ($name:ident, $ty: ident) => { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) { + let zero: s_t_l!($ty) = transmute(0u8.vec_splats()); + v.vec_max(zero.vec_subs(v)) + } + + impl_vec_trait! { [VectorAbss vec_abss] $name (s_t_l!($ty)) } + }; + } + + impl_abss! { vec_abss_i8, i8x16 } + impl_abss! { vec_abss_i16, i16x8 } + impl_abss! { vec_abss_i32, i32x4 } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vspltb, IMM4 = 15))] + unsafe fn vspltb(a: vector_signed_char) -> vector_signed_char { + static_assert_uimm_bits!(IMM4, 4); + simd_shuffle(a, a, const { u32x16::from_array([IMM4; 16]) }) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsplth, IMM3 = 7))] + unsafe fn vsplth(a: vector_signed_short) -> vector_signed_short { + static_assert_uimm_bits!(IMM3, 3); + simd_shuffle(a, a, const { u32x8::from_array([IMM3; 8]) }) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, not(target_feature = "vsx")), assert_instr(vspltw, IMM2 = 3))] + #[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxspltw, IMM2 = 3))] + unsafe fn vspltw(a: vector_signed_int) -> vector_signed_int { + static_assert_uimm_bits!(IMM2, 2); + simd_shuffle(a, a, const { u32x4::from_array([IMM2; 4]) }) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSplat { + unsafe fn vec_splat(self) -> Self; + } + + macro_rules! impl_vec_splat { + ($ty:ty, $fun:ident) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSplat for $ty { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_splat(self) -> Self { + transmute($fun::(transmute(self))) + } + } + }; + } + + impl_vec_splat! { vector_signed_char, vspltb } + impl_vec_splat! { vector_unsigned_char, vspltb } + impl_vec_splat! { vector_bool_char, vspltb } + impl_vec_splat! { vector_signed_short, vsplth } + impl_vec_splat! { vector_unsigned_short, vsplth } + impl_vec_splat! { vector_bool_short, vsplth } + impl_vec_splat! { vector_signed_int, vspltw } + impl_vec_splat! { vector_unsigned_int, vspltw } + impl_vec_splat! { vector_bool_int, vspltw } + + macro_rules! splat { + ($name:ident, $v:ident, $r:ident [$instr_altivec:ident / $instr_pwr9:ident, $doc:literal]) => { + #[doc = $doc] + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr( + all(test, not(target_feature = "vsx")), + assert_instr($instr_altivec, IMM5 = 1) + )] + #[cfg_attr( + all(test, target_feature = "power9-vector"), + assert_instr($instr_pwr9, IMM5 = 1) + )] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub unsafe fn $name() -> s_t_l!($r) { + static_assert_simm_bits!(IMM5, 5); + transmute($r::splat(IMM5 as $v)) + } + }; + ($name:ident, $v:ident, $r:ident [$instr:ident, $doc:literal]) => { + splat! { $name, $v, $r [$instr / $instr, $doc] } + }; + } + + macro_rules! splats { + ($name:ident, $v:ident, $r:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn $name(v: $v) -> s_t_l!($r) { + transmute($r::splat(v)) + } + }; + } + + splats! { splats_u8, u8, u8x16 } + splats! { splats_u16, u16, u16x8 } + splats! { splats_u32, u32, u32x4 } + splats! { splats_i8, i8, i8x16 } + splats! { splats_i16, i16, i16x8 } + splats! { splats_i32, i32, i32x4 } + splats! { splats_f32, f32, f32x4 } + + test_impl! { vec_splats_u8 (v: u8) -> vector_unsigned_char [splats_u8, vspltb] } + test_impl! { vec_splats_u16 (v: u16) -> vector_unsigned_short [splats_u16, vsplth] } + test_impl! { vec_splats_u32 (v: u32) -> vector_unsigned_int [splats_u32, vspltw / xxspltw / mtvsrws] } + test_impl! { vec_splats_i8 (v: i8) -> vector_signed_char [splats_i8, vspltb] } + test_impl! { vec_splats_i16 (v: i16) -> vector_signed_short [splats_i16, vsplth] } + test_impl! { vec_splats_i32 (v: i32) -> vector_signed_int [splats_i32, vspltw / xxspltw / mtvsrws] } + test_impl! { vec_splats_f32 (v: f32) -> vector_float [splats_f32, vspltw / xxspltw / mtvsrws] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSplats { + type Result; + unsafe fn vec_splats(self) -> Self::Result; + } + + macro_rules! impl_vec_splats { + ($(($fn:ident ($ty:ty) -> $r:ty)),*) => { + $( + impl_vec_trait!{ [VectorSplats vec_splats] $fn ($ty) -> $r } + )* + } + } + + impl_vec_splats! { + (vec_splats_u8 (u8) -> vector_unsigned_char), + (vec_splats_i8 (i8) -> vector_signed_char), + (vec_splats_u16 (u16) -> vector_unsigned_short), + (vec_splats_i16 (i16) -> vector_signed_short), + (vec_splats_u32 (u32) -> vector_unsigned_int), + (vec_splats_i32 (i32) -> vector_signed_int), + (vec_splats_f32 (f32) -> vector_float) + } + + test_impl! { vec_vsububm (a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [simd_sub, vsububm] } + test_impl! { vec_vsubuhm (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [simd_sub, vsubuhm] } + test_impl! { vec_vsubuwm (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [simd_sub, vsubuwm] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSub { + type Result; + unsafe fn vec_sub(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorSub vec_sub] ~(simd_sub, simd_sub, simd_sub, simd_sub, simd_sub, simd_sub) } + impl_vec_trait! { [VectorSub vec_sub] simd_sub(vector_float, vector_float) -> vector_float } + + test_impl! { vec_vsubcuw (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [vsubcuw, vsubcuw] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSubc { + type Result; + unsafe fn vec_subc(self, b: Other) -> Self::Result; + } + + impl_vec_trait! {[VectorSubc vec_subc]+ vec_vsubcuw(vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait! {[VectorSubc vec_subc]+ vec_vsubcuw(vector_signed_int, vector_signed_int) -> vector_signed_int } + + test_impl! { vec_vminsb (a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [vminsb, vminsb] } + test_impl! { vec_vminsh (a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [vminsh, vminsh] } + test_impl! { vec_vminsw (a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [vminsw, vminsw] } + + test_impl! { vec_vminub (a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [vminub, vminub] } + test_impl! { vec_vminuh (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [vminuh, vminuh] } + test_impl! { vec_vminuw (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [vminuw, vminuw] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMin { + type Result; + unsafe fn vec_min(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorMin vec_min] ~(vminub, vminsb, vminuh, vminsh, vminuw, vminsw) } + + test_impl! { vec_vmaxsb (a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [vmaxsb, vmaxsb] } + test_impl! { vec_vmaxsh (a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [vmaxsh, vmaxsh] } + test_impl! { vec_vmaxsw (a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [vmaxsw, vmaxsw] } + + test_impl! { vec_vmaxub (a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [vmaxub, vmaxub] } + test_impl! { vec_vmaxuh (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [vmaxuh, vmaxuh] } + test_impl! { vec_vmaxuw (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [vmaxuw, vmaxuw] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMax { + type Result; + unsafe fn vec_max(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorMax vec_max] ~(vmaxub, vmaxsb, vmaxuh, vmaxsh, vmaxuw, vmaxsw) } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmuleub))] + unsafe fn vec_vmuleub( + a: vector_unsigned_char, + b: vector_unsigned_char, + ) -> vector_unsigned_short { + vmuleub(a, b) + } + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmulesb))] + unsafe fn vec_vmulesb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short { + vmulesb(a, b) + } + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmuleuh))] + unsafe fn vec_vmuleuh( + a: vector_unsigned_short, + b: vector_unsigned_short, + ) -> vector_unsigned_int { + vmuleuh(a, b) + } + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmulesh))] + unsafe fn vec_vmulesh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int { + vmulesh(a, b) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMul { + unsafe fn vec_mul(self, b: Self) -> Self; + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmuluwm))] + unsafe fn vec_vmuluwm(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + transmute(simd_mul::(transmute(a), transmute(b))) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(xvmulsp))] + unsafe fn vec_xvmulsp(a: vector_float, b: vector_float) -> vector_float { + transmute(simd_mul::(transmute(a), transmute(b))) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMul for vector_signed_int { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mul(self, b: Self) -> Self { + vec_vmuluwm(self, b) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMul for vector_unsigned_int { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mul(self, b: Self) -> Self { + transmute(simd_mul::(transmute(self), transmute(b))) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMul for vector_float { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mul(self, b: Self) -> Self { + vec_xvmulsp(self, b) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMule { + unsafe fn vec_mule(self, b: Self) -> Result; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMule for vector_unsigned_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mule(self, b: Self) -> vector_unsigned_short { + vmuleub(self, b) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMule for vector_signed_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mule(self, b: Self) -> vector_signed_short { + vmulesb(self, b) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMule for vector_unsigned_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mule(self, b: Self) -> vector_unsigned_int { + vmuleuh(self, b) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMule for vector_signed_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mule(self, b: Self) -> vector_signed_int { + vmulesh(self, b) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmuloub))] + unsafe fn vec_vmuloub( + a: vector_unsigned_char, + b: vector_unsigned_char, + ) -> vector_unsigned_short { + vmuloub(a, b) + } + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmulosb))] + unsafe fn vec_vmulosb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short { + vmulosb(a, b) + } + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmulouh))] + unsafe fn vec_vmulouh( + a: vector_unsigned_short, + b: vector_unsigned_short, + ) -> vector_unsigned_int { + vmulouh(a, b) + } + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmulosh))] + unsafe fn vec_vmulosh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int { + vmulosh(a, b) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMulo { + unsafe fn vec_mulo(self, b: Self) -> Result; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMulo for vector_unsigned_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mulo(self, b: Self) -> vector_unsigned_short { + vmuloub(self, b) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMulo for vector_signed_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mulo(self, b: Self) -> vector_signed_short { + vmulosb(self, b) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMulo for vector_unsigned_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mulo(self, b: Self) -> vector_unsigned_int { + vmulouh(self, b) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMulo for vector_signed_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mulo(self, b: Self) -> vector_signed_int { + vmulosh(self, b) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsum4ubs))] + unsafe fn vec_vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int { + vsum4ubs(a, b) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsum4sbs))] + unsafe fn vec_vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int { + vsum4sbs(a, b) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsum4shs))] + unsafe fn vec_vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int { + vsum4shs(a, b) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSum4s { + unsafe fn vec_sum4s(self, b: Other) -> Other; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSum4s for vector_unsigned_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sum4s(self, b: vector_unsigned_int) -> vector_unsigned_int { + vsum4ubs(self, b) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSum4s for vector_signed_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int { + vsum4sbs(self, b) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSum4s for vector_signed_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int { + vsum4shs(self, b) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsum2sws))] + unsafe fn vec_vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + vsum2sws(a, b) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vnmsubfp))] + unsafe fn vec_vnmsubfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + vnmsubfp(a, b, c) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(xvmaddasp))] + pub unsafe fn vec_vmaddfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + simd_fma(a, b, c) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmsumubm))] + unsafe fn vec_vmsumubm( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_int, + ) -> vector_unsigned_int { + vmsumubm(a, b, c) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmsummbm))] + unsafe fn vec_vmsummbm( + a: vector_signed_char, + b: vector_unsigned_char, + c: vector_signed_int, + ) -> vector_signed_int { + vmsummbm(a, b, c) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmsumuhm))] + unsafe fn vec_vmsumuhm( + a: vector_unsigned_short, + b: vector_unsigned_short, + c: vector_unsigned_int, + ) -> vector_unsigned_int { + vmsumuhm(a, b, c) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmsumshm))] + unsafe fn vec_vmsumshm( + a: vector_signed_short, + b: vector_signed_short, + c: vector_signed_int, + ) -> vector_signed_int { + vmsumshm(a, b, c) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMsum { + unsafe fn vec_msum(self, b: B, c: Other) -> Other; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMsum for vector_unsigned_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_msum( + self, + b: vector_unsigned_char, + c: vector_unsigned_int, + ) -> vector_unsigned_int { + vmsumubm(self, b, c) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMsum for vector_signed_char { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_msum( + self, + b: vector_unsigned_char, + c: vector_signed_int, + ) -> vector_signed_int { + vmsummbm(self, b, c) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMsum for vector_unsigned_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_msum( + self, + b: vector_unsigned_short, + c: vector_unsigned_int, + ) -> vector_unsigned_int { + vmsumuhm(self, b, c) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMsum for vector_signed_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_msum( + self, + b: vector_signed_short, + c: vector_signed_int, + ) -> vector_signed_int { + vmsumshm(self, b, c) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmsumuhs))] + unsafe fn vec_vmsumuhs( + a: vector_unsigned_short, + b: vector_unsigned_short, + c: vector_unsigned_int, + ) -> vector_unsigned_int { + vmsumuhs(a, b, c) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmsumshs))] + unsafe fn vec_vmsumshs( + a: vector_signed_short, + b: vector_signed_short, + c: vector_signed_int, + ) -> vector_signed_int { + vmsumshs(a, b, c) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMsums { + unsafe fn vec_msums(self, b: Self, c: Other) -> Other; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMsums for vector_unsigned_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_msums(self, b: Self, c: vector_unsigned_int) -> vector_unsigned_int { + vmsumuhs(self, b, c) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMsums for vector_signed_short { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_msums(self, b: Self, c: vector_signed_int) -> vector_signed_int { + vmsumshs(self, b, c) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vperm))] + unsafe fn vec_vperm( + a: vector_signed_int, + b: vector_signed_int, + c: vector_unsigned_char, + ) -> vector_signed_int { + vperm(a, b, c) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPerm { + unsafe fn vec_vperm(self, b: Self, c: vector_unsigned_char) -> Self; + } + + macro_rules! vector_perm { + {$impl: ident} => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorPerm for $impl { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_vperm(self, b: Self, c: vector_unsigned_char) -> Self { + transmute(vec_vperm(transmute(self), transmute(b), c)) + } + } + } + } + + vector_perm! { vector_signed_char } + vector_perm! { vector_unsigned_char } + vector_perm! { vector_bool_char } + + vector_perm! { vector_signed_short } + vector_perm! { vector_unsigned_short } + vector_perm! { vector_bool_short } + + vector_perm! { vector_signed_int } + vector_perm! { vector_unsigned_int } + vector_perm! { vector_bool_int } + + vector_perm! { vector_float } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAdd { + type Result; + unsafe fn vec_add(self, other: Other) -> Self::Result; + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vaddubm))] + pub unsafe fn vec_add_bc_sc(a: vector_bool_char, b: vector_signed_char) -> vector_signed_char { + simd_add(transmute(a), b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_bool_char { + type Result = vector_signed_char; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_signed_char) -> Self::Result { + vec_add_bc_sc(self, other) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_signed_char { + type Result = vector_signed_char; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_bool_char) -> Self::Result { + other.vec_add(self) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vaddubm))] + pub unsafe fn vec_add_sc_sc( + a: vector_signed_char, + b: vector_signed_char, + ) -> vector_signed_char { + simd_add(a, b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_signed_char { + type Result = vector_signed_char; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_signed_char) -> Self::Result { + vec_add_sc_sc(self, other) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vaddubm))] + pub unsafe fn vec_add_bc_uc( + a: vector_bool_char, + b: vector_unsigned_char, + ) -> vector_unsigned_char { + simd_add(transmute(a), b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_bool_char { + type Result = vector_unsigned_char; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_unsigned_char) -> Self::Result { + vec_add_bc_uc(self, other) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_unsigned_char { + type Result = vector_unsigned_char; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_bool_char) -> Self::Result { + other.vec_add(self) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vaddubm))] + pub unsafe fn vec_add_uc_uc( + a: vector_unsigned_char, + b: vector_unsigned_char, + ) -> vector_unsigned_char { + simd_add(a, b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_unsigned_char { + type Result = vector_unsigned_char; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_unsigned_char) -> Self::Result { + vec_add_uc_uc(self, other) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vadduhm))] + pub unsafe fn vec_add_bs_ss( + a: vector_bool_short, + b: vector_signed_short, + ) -> vector_signed_short { + let a: i16x8 = transmute(a); + let a: vector_signed_short = simd_cast(a); + simd_add(a, b) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_bool_short { + type Result = vector_signed_short; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_signed_short) -> Self::Result { + vec_add_bs_ss(self, other) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_signed_short { + type Result = vector_signed_short; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_bool_short) -> Self::Result { + other.vec_add(self) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vadduhm))] + pub unsafe fn vec_add_ss_ss( + a: vector_signed_short, + b: vector_signed_short, + ) -> vector_signed_short { + simd_add(a, b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_signed_short { + type Result = vector_signed_short; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_signed_short) -> Self::Result { + vec_add_ss_ss(self, other) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vadduhm))] + pub unsafe fn vec_add_bs_us( + a: vector_bool_short, + b: vector_unsigned_short, + ) -> vector_unsigned_short { + let a: i16x8 = transmute(a); + let a: vector_unsigned_short = simd_cast(a); + simd_add(a, b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_bool_short { + type Result = vector_unsigned_short; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_unsigned_short) -> Self::Result { + vec_add_bs_us(self, other) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_unsigned_short { + type Result = vector_unsigned_short; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_bool_short) -> Self::Result { + other.vec_add(self) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vadduhm))] + pub unsafe fn vec_add_us_us( + a: vector_unsigned_short, + b: vector_unsigned_short, + ) -> vector_unsigned_short { + simd_add(a, b) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_unsigned_short { + type Result = vector_unsigned_short; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_unsigned_short) -> Self::Result { + vec_add_us_us(self, other) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vadduwm))] + pub unsafe fn vec_add_bi_si(a: vector_bool_int, b: vector_signed_int) -> vector_signed_int { + let a: i32x4 = transmute(a); + let a: vector_signed_int = simd_cast(a); + simd_add(a, b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_bool_int { + type Result = vector_signed_int; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_signed_int) -> Self::Result { + vec_add_bi_si(self, other) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_signed_int { + type Result = vector_signed_int; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_bool_int) -> Self::Result { + other.vec_add(self) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vadduwm))] + pub unsafe fn vec_add_si_si(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + simd_add(a, b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_signed_int { + type Result = vector_signed_int; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_signed_int) -> Self::Result { + vec_add_si_si(self, other) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vadduwm))] + pub unsafe fn vec_add_bi_ui(a: vector_bool_int, b: vector_unsigned_int) -> vector_unsigned_int { + let a: i32x4 = transmute(a); + let a: vector_unsigned_int = simd_cast(a); + simd_add(a, b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_bool_int { + type Result = vector_unsigned_int; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_unsigned_int) -> Self::Result { + vec_add_bi_ui(self, other) + } + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_unsigned_int { + type Result = vector_unsigned_int; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_bool_int) -> Self::Result { + other.vec_add(self) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vadduwm))] + pub unsafe fn vec_add_ui_ui( + a: vector_unsigned_int, + b: vector_unsigned_int, + ) -> vector_unsigned_int { + simd_add(a, b) + } + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_unsigned_int { + type Result = vector_unsigned_int; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_unsigned_int) -> Self::Result { + vec_add_ui_ui(self, other) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(xvaddsp))] + pub unsafe fn vec_add_float_float(a: vector_float, b: vector_float) -> vector_float { + simd_add(a, b) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdd for vector_float { + type Result = vector_float; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_add(self, other: vector_float) -> Self::Result { + vec_add_float_float(self, other) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorAdde { + unsafe fn vec_adde(self, b: Self, c: Self) -> Self; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdde for vector_unsigned_int { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_adde(self, b: Self, c: Self) -> Self { + let mask: vector_unsigned_int = transmute(u32x4::new(1, 1, 1, 1)); + let carry = vec_and(c, mask); + vec_add(vec_add(self, b), carry) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorAdde for vector_signed_int { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_adde(self, b: Self, c: Self) -> Self { + let mask: vector_signed_int = transmute(i32x4::new(1, 1, 1, 1)); + let carry = vec_and(c, mask); + vec_add(vec_add(self, b), carry) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMladd { + type Result; + unsafe fn vec_mladd(self, b: Other, c: Other) -> Self::Result; + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vmladduhm))] + unsafe fn mladd( + a: vector_signed_short, + b: vector_signed_short, + c: vector_signed_short, + ) -> vector_signed_short { + let a: i16x8 = transmute(a); + let b: i16x8 = transmute(b); + let c: i16x8 = transmute(c); + transmute(simd_add(simd_mul(a, b), c)) + } + + macro_rules! vector_mladd { + ($a: ident, $bc: ident, $d: ident) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMladd<$bc> for $a { + type Result = $d; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mladd(self, b: $bc, c: $bc) -> Self::Result { + let a = transmute(self); + let b = transmute(b); + let c = transmute(c); + + transmute(mladd(a, b, c)) + } + } + }; + } + + vector_mladd! { vector_unsigned_short, vector_unsigned_short, vector_unsigned_short } + vector_mladd! { vector_unsigned_short, vector_signed_short, vector_signed_short } + vector_mladd! { vector_signed_short, vector_unsigned_short, vector_signed_short } + vector_mladd! { vector_signed_short, vector_signed_short, vector_signed_short } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorOr { + type Result; + unsafe fn vec_or(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorOr vec_or] ~(simd_or) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorXor { + type Result; + unsafe fn vec_xor(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorXor vec_xor] ~(simd_xor) } + + macro_rules! vector_vnor { + ($fun:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, not(target_feature = "vsx")), assert_instr(vnor))] + #[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxlnor))] + pub unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { + let o = vec_splats(!0 as $ty); + vec_xor(vec_or(a, b), o) + } + }; + } + + vector_vnor! { vec_vnorsb i8 } + vector_vnor! { vec_vnorsh i16 } + vector_vnor! { vec_vnorsw i32 } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorNor { + type Result; + unsafe fn vec_nor(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorNor vec_nor]+ 2b (vec_vnorsb, vec_vnorsh, vec_vnorsw) } + + macro_rules! vector_vnand { + ($fun:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, not(target_feature = "vsx")), assert_instr(vnand))] + #[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxlnand))] + pub unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { + let o = vec_splats(!0 as $ty); + vec_xor(vec_and(a, b), o) + } + }; + } + + vector_vnand! { vec_vnandsb i8 } + vector_vnand! { vec_vnandsh i16 } + vector_vnand! { vec_vnandsw i32 } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorNand { + type Result; + unsafe fn vec_nand(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorNand vec_nand]+ 2b (vec_vnandsb, vec_vnandsh, vec_vnandsw) } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, not(target_feature = "vsx")), assert_instr(vsel))] + #[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxsel))] + pub unsafe fn vec_vsel( + a: vector_signed_char, + b: vector_signed_char, + c: vector_signed_char, + ) -> vector_signed_char { + let a: i8x16 = transmute(a); + let b: i8x16 = transmute(b); + let c: i8x16 = transmute(c); + let not_c = simd_xor(c, i8x16::splat(!0)); + + transmute(simd_or(simd_and(a, not_c), simd_and(b, c))) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSel { + unsafe fn vec_sel(self, b: Self, c: Mask) -> Self; + } + + macro_rules! vector_sel { + ($ty: ty, $m: ty) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSel<$m> for $ty { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sel(self, b: Self, c: $m) -> Self { + let a = transmute(self); + let b = transmute(b); + let c = transmute(c); + + transmute(vec_vsel(a, b, c)) + } + } + }; + ($ty: ident) => { + vector_sel! { $ty, t_b!{ $ty } } + vector_sel! { $ty, t_u!{ $ty } } + vector_sel! { t_u!{ $ty }, t_b!{ $ty } } + vector_sel! { t_u!{ $ty }, t_u!{ $ty } } + vector_sel! { t_b!{ $ty }, t_b!{ $ty } } + vector_sel! { t_b!{ $ty }, t_u!{ $ty } } + }; + (- $ty: ident) => { + vector_sel! { $ty, t_b!{ $ty } } + vector_sel! { $ty, t_u!{ $ty } } + }; + } + + vector_sel! { vector_signed_char } + vector_sel! { vector_signed_short } + vector_sel! { vector_signed_int } + vector_sel! {- vector_float } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcfsx, IMM5 = 1))] + unsafe fn vec_ctf_i32(a: vector_signed_int) -> vector_float { + static_assert_uimm_bits!(IMM5, 5); + vcfsx(a, IMM5) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vcfux, IMM5 = 1))] + unsafe fn vec_ctf_u32(a: vector_unsigned_int) -> vector_float { + static_assert_uimm_bits!(IMM5, 5); + vcfux(a, IMM5) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorCtf { + unsafe fn vec_ctf(self) -> vector_float; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorCtf for vector_signed_int { + unsafe fn vec_ctf(self) -> vector_float { + vec_ctf_i32::(self) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorCtf for vector_unsigned_int { + unsafe fn vec_ctf(self) -> vector_float { + vec_ctf_u32::(self) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, target_endian = "little"), assert_instr(vmrghb))] + #[cfg_attr(all(test, target_endian = "big"), assert_instr(vmrglb))] + unsafe fn vec_vmrglb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char { + let mergel_perm = transmute(u8x16::new( + 0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, + 0x0F, 0x1F, + )); + vec_perm(a, b, mergel_perm) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, target_endian = "little"), assert_instr(vmrghh))] + #[cfg_attr(all(test, target_endian = "big"), assert_instr(vmrglh))] + unsafe fn vec_vmrglh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short { + let mergel_perm = transmute(u8x16::new( + 0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, + 0x1E, 0x1F, + )); + vec_perm(a, b, mergel_perm) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr( + all(test, target_endian = "little", not(target_feature = "vsx")), + assert_instr(vmrghw) + )] + #[cfg_attr( + all(test, target_endian = "little", target_feature = "vsx"), + assert_instr(xxmrghw) + )] + #[cfg_attr( + all(test, target_endian = "big", not(target_feature = "vsx")), + assert_instr(vmrglw) + )] + #[cfg_attr( + all(test, target_endian = "big", target_feature = "vsx"), + assert_instr(xxmrglw) + )] + unsafe fn vec_vmrglw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + let mergel_perm = transmute(u8x16::new( + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, + 0x1E, 0x1F, + )); + vec_perm(a, b, mergel_perm) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, target_endian = "little"), assert_instr(vmrglb))] + #[cfg_attr(all(test, target_endian = "big"), assert_instr(vmrghb))] + unsafe fn vec_vmrghb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char { + let mergel_perm = transmute(u8x16::new( + 0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, + 0x07, 0x17, + )); + vec_perm(a, b, mergel_perm) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, target_endian = "little"), assert_instr(vmrglh))] + #[cfg_attr(all(test, target_endian = "big"), assert_instr(vmrghh))] + unsafe fn vec_vmrghh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short { + let mergel_perm = transmute(u8x16::new( + 0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, + 0x16, 0x17, + )); + vec_perm(a, b, mergel_perm) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr( + all(test, target_endian = "little", not(target_feature = "vsx")), + assert_instr(vmrglw) + )] + #[cfg_attr( + all(test, target_endian = "little", target_feature = "vsx"), + assert_instr(xxmrglw) + )] + #[cfg_attr( + all(test, target_endian = "big", not(target_feature = "vsx")), + assert_instr(vmrghw) + )] + #[cfg_attr( + all(test, target_endian = "big", target_feature = "vsx"), + assert_instr(xxmrghw) + )] + unsafe fn vec_vmrghw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + let mergel_perm = transmute(u8x16::new( + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, + 0x16, 0x17, + )); + vec_perm(a, b, mergel_perm) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMergeh { + type Result; + unsafe fn vec_mergeh(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorMergeh vec_mergeh]+ 2b (vec_vmrghb, vec_vmrghh, vec_vmrghw) } + impl_vec_trait! { [VectorMergeh vec_mergeh]+ vec_vmrghw (vector_float, vector_float) -> vector_float } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMergel { + type Result; + unsafe fn vec_mergel(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorMergel vec_mergel]+ 2b (vec_vmrglb, vec_vmrglh, vec_vmrglw) } + impl_vec_trait! { [VectorMergel vec_mergel]+ vec_vmrglw (vector_float, vector_float) -> vector_float } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vpkuhum))] + unsafe fn vec_vpkuhum(a: vector_signed_short, b: vector_signed_short) -> vector_signed_char { + let pack_perm = if cfg!(target_endian = "little") { + transmute(u8x16::new( + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, + 0x1C, 0x1E, + )) + } else { + transmute(u8x16::new( + 0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, + 0x1D, 0x1F, + )) + }; + + transmute(vec_perm(a, b, pack_perm)) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vpkuwum))] + unsafe fn vec_vpkuwum(a: vector_signed_int, b: vector_signed_int) -> vector_signed_short { + let pack_perm = if cfg!(target_endian = "little") { + transmute(u8x16::new( + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, + 0x1C, 0x1D, + )) + } else { + transmute(u8x16::new( + 0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, + 0x1E, 0x1F, + )) + }; + + transmute(vec_perm(a, b, pack_perm)) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPack { + type Result; + unsafe fn vec_pack(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorPack vec_pack]+ vec_vpkuhum (vector_signed_short, vector_signed_short) -> vector_signed_char } + impl_vec_trait! { [VectorPack vec_pack]+ vec_vpkuhum (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPack vec_pack]+ vec_vpkuhum (vector_bool_short, vector_bool_short) -> vector_bool_char } + impl_vec_trait! { [VectorPack vec_pack]+ vec_vpkuwum (vector_signed_int, vector_signed_int) -> vector_signed_short } + impl_vec_trait! { [VectorPack vec_pack]+ vec_vpkuwum (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPack vec_pack]+ vec_vpkuwum (vector_bool_int, vector_bool_int) -> vector_bool_short } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vpkshss))] + unsafe fn vec_vpkshss(a: vector_signed_short, b: vector_signed_short) -> vector_signed_char { + if cfg!(target_endian = "little") { + vpkshss(b, a) + } else { + vpkshss(a, b) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vpkshus))] + unsafe fn vec_vpkshus(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char { + if cfg!(target_endian = "little") { + vpkshus(b, a) + } else { + vpkshus(a, b) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vpkuhus))] + unsafe fn vec_vpkuhus( + a: vector_unsigned_short, + b: vector_unsigned_short, + ) -> vector_unsigned_char { + if cfg!(target_endian = "little") { + vpkuhus(b, a) + } else { + vpkuhus(a, b) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vpkswss))] + unsafe fn vec_vpkswss(a: vector_signed_int, b: vector_signed_int) -> vector_signed_short { + if cfg!(target_endian = "little") { + vpkswss(b, a) + } else { + vpkswss(a, b) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vpkswus))] + unsafe fn vec_vpkswus(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short { + if cfg!(target_endian = "little") { + vpkswus(b, a) + } else { + vpkswus(a, b) + } + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vpkuwus))] + unsafe fn vec_vpkuwus(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_short { + if cfg!(target_endian = "little") { + vpkuwus(b, a) + } else { + vpkuwus(a, b) + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacks { + type Result; + unsafe fn vec_packs(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorPacks vec_packs] vec_vpkshss (vector_signed_short, vector_signed_short) -> vector_signed_char } + impl_vec_trait! { [VectorPacks vec_packs] vec_vpkuhus (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacks vec_packs] vec_vpkswss (vector_signed_int, vector_signed_int) -> vector_signed_short } + impl_vec_trait! { [VectorPacks vec_packs] vec_vpkuwus (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacksu { + type Result; + unsafe fn vec_packsu(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorPacksu vec_packsu] vec_vpkshus (vector_signed_short, vector_signed_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacksu vec_packsu] vec_vpkuhus (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacksu vec_packsu] vec_vpkswus (vector_signed_int, vector_signed_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacksu vec_packsu] vec_vpkuwus (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + + macro_rules! impl_vec_unpack { + ($fun:ident ($a:ident) -> $r:ident [$little:ident, $big:ident]) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, target_endian = "little"), assert_instr($little))] + #[cfg_attr(all(test, target_endian = "big"), assert_instr($big))] + unsafe fn $fun(a: $a) -> $r { + if cfg!(target_endian = "little") { + $little(a) + } else { + $big(a) + } + } + }; + } + + impl_vec_unpack! { vec_vupkhsb (vector_signed_char) -> vector_signed_short [vupklsb, vupkhsb] } + impl_vec_unpack! { vec_vupklsb (vector_signed_char) -> vector_signed_short [vupkhsb, vupklsb] } + impl_vec_unpack! { vec_vupkhsh (vector_signed_short) -> vector_signed_int [vupklsh, vupkhsh] } + impl_vec_unpack! { vec_vupklsh (vector_signed_short) -> vector_signed_int [vupkhsh, vupklsh] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorUnpackh { + type Result; + unsafe fn vec_unpackh(self) -> Self::Result; + } + + impl_vec_trait! { [VectorUnpackh vec_unpackh] vec_vupkhsb (vector_signed_char) -> vector_signed_short } + impl_vec_trait! { [VectorUnpackh vec_unpackh]+ vec_vupkhsb (vector_bool_char) -> vector_bool_short } + impl_vec_trait! { [VectorUnpackh vec_unpackh] vec_vupkhsh (vector_signed_short) -> vector_signed_int } + impl_vec_trait! { [VectorUnpackh vec_unpackh]+ vec_vupkhsh (vector_bool_short) -> vector_bool_int } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorUnpackl { + type Result; + unsafe fn vec_unpackl(self) -> Self::Result; + } + + impl_vec_trait! { [VectorUnpackl vec_unpackl] vec_vupklsb (vector_signed_char) -> vector_signed_short } + impl_vec_trait! { [VectorUnpackl vec_unpackl]+ vec_vupklsb (vector_bool_char) -> vector_bool_short } + impl_vec_trait! { [VectorUnpackl vec_unpackl] vec_vupklsh (vector_signed_short) -> vector_signed_int } + impl_vec_trait! { [VectorUnpackl vec_unpackl]+ vec_vupklsh (vector_bool_short) -> vector_bool_int } + + macro_rules! impl_vec_shift { + ([$Trait:ident $m:ident] ($b:ident, $h:ident, $w:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_unsigned_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_unsigned_int) -> vector_signed_int } + }; + } + + macro_rules! impl_shift { + ($fun:ident $intr:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr($fun))] + unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { + let a = transmute(a); + let b = simd_rem( + transmute(b), + ::splat(mem::size_of::<$ty>() as $ty * $ty::BITS as $ty), + ); + + transmute($intr(a, b)) + } + }; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSl { + type Result; + unsafe fn vec_sl(self, b: Other) -> Self::Result; + } + + impl_shift! { vslb simd_shl u8 } + impl_shift! { vslh simd_shl u16 } + impl_shift! { vslw simd_shl u32 } + + impl_vec_shift! { [VectorSl vec_sl] (vslb, vslh, vslw) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSr { + type Result; + unsafe fn vec_sr(self, b: Other) -> Self::Result; + } + + impl_shift! { vsrb simd_shr u8 } + impl_shift! { vsrh simd_shr u16 } + impl_shift! { vsrw simd_shr u32 } + + impl_vec_shift! { [VectorSr vec_sr] (vsrb, vsrh, vsrw) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSra { + type Result; + unsafe fn vec_sra(self, b: Other) -> Self::Result; + } + + impl_vec_shift! { [VectorSra vec_sra] (vsrab, vsrah, vsraw) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSld { + unsafe fn vec_sld(self, b: Self) -> Self; + unsafe fn vec_sldw(self, b: Self) -> Self; + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(vsldoi, UIMM4 = 1))] + unsafe fn vsldoi( + a: vector_unsigned_char, + b: vector_unsigned_char, + ) -> vector_unsigned_char { + static_assert_uimm_bits!(UIMM4, 4); + let d = UIMM4 as u8; + if cfg!(target_endian = "little") { + let perm = u8x16::new( + 16 - d, + 17 - d, + 18 - d, + 19 - d, + 20 - d, + 21 - d, + 22 - d, + 23 - d, + 24 - d, + 25 - d, + 26 - d, + 27 - d, + 28 - d, + 29 - d, + 30 - d, + 31 - d, + ); + + vec_perm(b, a, transmute(perm)) + } else { + let perm = u8x16::new( + d, + d + 1, + d + 2, + d + 3, + d + 4, + d + 5, + d + 6, + d + 7, + d + 8, + d + 9, + d + 10, + d + 11, + d + 12, + d + 13, + d + 14, + d + 15, + ); + vec_perm(a, b, transmute(perm)) + } + } + + // TODO: collapse the two once generic_const_exprs are usable. + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr(xxsldwi, UIMM2 = 1))] + unsafe fn xxsldwi( + a: vector_unsigned_char, + b: vector_unsigned_char, + ) -> vector_unsigned_char { + static_assert_uimm_bits!(UIMM2, 2); + let d = (UIMM2 << 2) as u8; + if cfg!(target_endian = "little") { + let perm = u8x16::new( + 16 - d, + 17 - d, + 18 - d, + 19 - d, + 20 - d, + 21 - d, + 22 - d, + 23 - d, + 24 - d, + 25 - d, + 26 - d, + 27 - d, + 28 - d, + 29 - d, + 30 - d, + 31 - d, + ); + + vec_perm(b, a, transmute(perm)) + } else { + let perm = u8x16::new( + d, + d + 1, + d + 2, + d + 3, + d + 4, + d + 5, + d + 6, + d + 7, + d + 8, + d + 9, + d + 10, + d + 11, + d + 12, + d + 13, + d + 14, + d + 15, + ); + vec_perm(a, b, transmute(perm)) + } + } + + macro_rules! impl_vec_sld { + ($($ty:ident),+) => { $( + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSld for $ty { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sld(self, b: Self) -> Self { + transmute(vsldoi::(transmute(self), transmute(b))) + } + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sldw(self, b: Self) -> Self { + transmute(xxsldwi::(transmute(self), transmute(b))) + } + } + )+ }; + } + + impl_vec_sld! { vector_bool_char, vector_signed_char, vector_unsigned_char } + impl_vec_sld! { vector_bool_short, vector_signed_short, vector_unsigned_short } + impl_vec_sld! { vector_bool_int, vector_signed_int, vector_unsigned_int } + impl_vec_sld! { vector_float } + + macro_rules! impl_vec_shift_long { + ([$Trait:ident $m:ident] ($f:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_short, vector_unsigned_char) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_short, vector_unsigned_char) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_int, vector_unsigned_char) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_int, vector_unsigned_char) -> vector_signed_int } + }; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSll { + type Result; + unsafe fn vec_sll(self, b: Other) -> Self::Result; + } + + impl_vec_shift_long! { [VectorSll vec_sll] (vsl) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSrl { + type Result; + unsafe fn vec_srl(self, b: Other) -> Self::Result; + } + + impl_vec_shift_long! { [VectorSrl vec_srl] (vsr) } + + macro_rules! impl_vec_shift_octect { + ([$Trait:ident $m:ident] ($f:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_signed_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_short, vector_signed_char) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_short, vector_signed_char) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_int, vector_signed_char) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_int, vector_signed_char) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_float, vector_signed_char) -> vector_float } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_short, vector_unsigned_char) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_short, vector_unsigned_char) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $f (vector_unsigned_int, vector_unsigned_char) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_signed_int, vector_unsigned_char) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m]+ $f (vector_float, vector_unsigned_char) -> vector_float } + }; + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSlo { + type Result; + unsafe fn vec_slo(self, b: Other) -> Self::Result; + } + + impl_vec_shift_octect! { [VectorSlo vec_slo] (vslo) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorSro { + type Result; + unsafe fn vec_sro(self, b: Other) -> Self::Result; + } + + impl_vec_shift_octect! { [VectorSro vec_sro] (vsro) } + + test_impl! { vec_vcntlzb(a: vector_signed_char) -> vector_signed_char [simd_ctlz, vclzb] } + test_impl! { vec_vcntlzh(a: vector_signed_short) -> vector_signed_short [simd_ctlz, vclzh] } + test_impl! { vec_vcntlzw(a: vector_signed_int) -> vector_signed_int [simd_ctlz, vclzw] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorCntlz { + unsafe fn vec_cntlz(self) -> Self; + } + + macro_rules! impl_vec_cntlz { + ($fun:ident ($a:ty)) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorCntlz for $a { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_cntlz(self) -> Self { + transmute($fun(transmute(self))) + } + } + }; + } + + impl_vec_cntlz! { vec_vcntlzb(vector_signed_char) } + impl_vec_cntlz! { vec_vcntlzb(vector_unsigned_char) } + impl_vec_cntlz! { vec_vcntlzh(vector_signed_short) } + impl_vec_cntlz! { vec_vcntlzh(vector_unsigned_short) } + impl_vec_cntlz! { vec_vcntlzw(vector_signed_int) } + impl_vec_cntlz! { vec_vcntlzw(vector_unsigned_int) } + + macro_rules! impl_vrl { + ($fun:ident $intr:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr($fun))] + unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { + transmute($intr(transmute(a), transmute(a), transmute(b))) + } + }; + } + + impl_vrl! { vrlb fshlb u8 } + impl_vrl! { vrlh fshlh u16 } + impl_vrl! { vrlw fshlw u32 } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorRl { + type Shift; + unsafe fn vec_rl(self, b: Self::Shift) -> Self; + } + + macro_rules! impl_vec_rl { + ($fun:ident ($a:ident)) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorRl for $a { + type Shift = t_u!($a); + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_rl(self, b: Self::Shift) -> Self { + transmute($fun(transmute(self), b)) + } + } + }; + } + + impl_vec_rl! { vrlb(vector_signed_char) } + impl_vec_rl! { vrlh(vector_signed_short) } + impl_vec_rl! { vrlw(vector_signed_int) } + impl_vec_rl! { vrlb(vector_unsigned_char) } + impl_vec_rl! { vrlh(vector_unsigned_short) } + impl_vec_rl! { vrlw(vector_unsigned_int) } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorRound { + unsafe fn vec_round(self) -> Self; + } + + test_impl! { vec_vrfin(a: vector_float) -> vector_float [vrfin, xvrspic] } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorRound for vector_float { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_round(self) -> Self { + vec_vrfin(self) + } + } +} + +/// Vector Insert +/// +/// ## Purpose +/// Returns a copy of vector b with element c replaced by the value of a. +/// +/// ## Result value +/// r contains a copy of vector b with element c replaced by the value of a. +/// This function uses modular arithmetic on c to determine the element number. +/// For example, if c is out of range, the compiler uses c modulo the number of +/// elements in the vector to determine the element position. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_insert(a: T, b: ::Scalar) -> T +where + T: sealed::VectorInsert, +{ + a.vec_insert::(b) +} + +/// Vector Extract +/// +/// ## Purpose +/// Returns the value of the bth element of vector a. +/// +/// ## Result value +/// The value of each element of r is the element of a at position b modulo the number of +/// elements of a. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_extract(a: T) -> ::Scalar +where + T: sealed::VectorExtract, +{ + a.vec_extract::() +} + +/// Vector Merge Low +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_mergel(a: T, b: U) -> >::Result +where + T: sealed::VectorMergel, +{ + a.vec_mergel(b) +} + +/// Vector Merge High +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_mergeh(a: T, b: U) -> >::Result +where + T: sealed::VectorMergeh, +{ + a.vec_mergeh(b) +} + +/// Vector Pack +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_pack(a: T, b: U) -> >::Result +where + T: sealed::VectorPack, +{ + a.vec_pack(b) +} + +/// Vector Pack Saturated +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_packs(a: T, b: U) -> >::Result +where + T: sealed::VectorPacks, +{ + a.vec_packs(b) +} + +/// Vector Pack Saturated Unsigned +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_packsu(a: T, b: U) -> >::Result +where + T: sealed::VectorPacksu, +{ + a.vec_packsu(b) +} + +/// Vector Unpack High +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_unpackh(a: T) -> ::Result +where + T: sealed::VectorUnpackh, +{ + a.vec_unpackh() +} + +/// Vector Unpack Low +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_unpackl(a: T) -> ::Result +where + T: sealed::VectorUnpackl, +{ + a.vec_unpackl() +} + +/// Vector Shift Left +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sl(a: T, b: U) -> >::Result +where + T: sealed::VectorSl, +{ + a.vec_sl(b) +} + +/// Vector Shift Right +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sr(a: T, b: U) -> >::Result +where + T: sealed::VectorSr, +{ + a.vec_sr(b) +} + +/// Vector Shift Right Algebraic +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sra(a: T, b: U) -> >::Result +where + T: sealed::VectorSra, +{ + a.vec_sra(b) +} + +/// Vector Shift Left Double +/// +/// ## Endian considerations +/// +/// This intrinsic is not endian-neutral, so uses of vec_sld in +/// big-endian code must be rewritten for little-endian targets. +/// +/// Historically, vec_sld could be used to shift by amounts not a multiple of the element size +/// for most types, in which case the purpose of the shift is difficult to determine and difficult +/// to automatically rewrite efficiently for little endian. +/// +/// So the concatenation of a and b is done in big-endian fashion (left to right), and the shift is +/// always to the left. This will generally produce surprising results for little-endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sld(a: T, b: T) -> T +where + T: sealed::VectorSld, +{ + a.vec_sld::(b) +} + +/// Vector Shift Left Double by Words +/// +/// ## Endian considerations +/// +/// This intrinsic is not endian-neutral, so uses of vec_sldw in +/// big-endian code must be rewritten for little-endian targets. +/// +/// The concatenation of a and b is done in big-endian fashion (left to right), and the shift is +/// always to the left. This will generally produce surprising results for little- endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sldw(a: T, b: T) -> T +where + T: sealed::VectorSld, +{ + a.vec_sldw::(b) +} + +/// Vector Shift Left Long +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_sll in big-endian +/// code must be rewritten for little-endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sll(a: T, b: U) -> >::Result +where + T: sealed::VectorSll, +{ + a.vec_sll(b) +} + +/// Vector Shift Right Long +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_srl in big-endian +/// code must be rewritten for little-endian targets. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_srl(a: T, b: U) -> >::Result +where + T: sealed::VectorSrl, +{ + a.vec_srl(b) +} + +/// Vector Shift Left by Octets +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_slo in big-endian code must be rewritten +/// for little-endian targets. The shift count is in element 15 of b for big-endian, but in element +/// 0 of b for little-endian. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_slo(a: T, b: U) -> >::Result +where + T: sealed::VectorSlo, +{ + a.vec_slo(b) +} + +/// Vector Shift Right by Octets +/// +/// ## Endian considerations +/// This intrinsic is not endian-neutral, so uses of vec_sro in big-endian code must be rewritten +/// for little-endian targets. The shift count is in element 15 of b for big-endian, but in element +/// 0 of b for little-endian. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sro(a: T, b: U) -> >::Result +where + T: sealed::VectorSro, +{ + a.vec_sro(b) +} + +/// Vector Shift Left Variable +/// +/// ## Result value +/// Let v be a 17-byte vector formed from a in bytes `[0:15]` and a zero byte in element 16. +/// Then each byte element i of r is determined as follows. The start bit sb is +/// obtained from bits 5:7 of byte element i of b. Then the contents of bits sb:sb+7 of the +/// halfword in byte elements i:i+1 of v are placed into byte element i of r. +/// +/// ## Endian considerations +/// All bit and byte element numbers are specified in big-endian order. This intrinsic is not +/// endian-neutral. +#[inline] +#[target_feature(enable = "power9-altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_slv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char { + vslv(a, b) +} + +/// Vector Shift Right Variable +/// +/// ## Result value +/// Let v be a 17-byte vector formed from a zero byte in element 0 and the elements of +/// a in bytes `[1:16]`. Then each byte element i of r is determined as follows. The start bit sb is +/// obtained from bits 5:7 of byte element i of b. Then the contents of bits (8 – sb):(15 – sb) of +/// the halfword in byte elements i:i+1 of v are placed into byte element i of r. +/// +/// ## Endian considerations +/// All bit and byte element numbers are specified in big-endian order. This intrinsic is not +/// endian-neutral. +#[inline] +#[target_feature(enable = "power9-altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_srv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char { + vsrv(a, b) +} + +/// Vector Load Indexed. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_ld(off: isize, p: T) -> ::Result +where + T: sealed::VectorLd, +{ + p.vec_ld(off) +} + +/// Vector Load Indexed Least Recently Used. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_ldl(off: isize, p: T) -> ::Result +where + T: sealed::VectorLd, +{ + p.vec_ldl(off) +} + +/// Vector Load Element Indexed. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_lde(off: isize, p: T) -> ::Result +where + T: sealed::VectorLde, +{ + p.vec_lde(off) +} + +/// Vector Store Indexed +/// +/// ## Purpose +/// Stores a 16-byte vector into memory at the address specified by a displacement and a +/// pointer, ignoring the four low-order bits of the calculated address. +/// +/// ## Operation +/// A memory address is obtained by adding b and c, and masking off the four low-order +/// bits of the result. The 16-byte vector in a is stored to the resultant memory address. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_st(a: T, off: isize, c: ::Target) +where + T: sealed::VectorSt, +{ + a.vec_st(off, c) +} + +/// Vector Store Indexed Least Recently Used +/// +/// ## Purpose +/// Stores a 16-byte vector into memory at the address specified by a displacement and +/// a pointer, ignoring the four low-order bits of the calculated address, and marking the cache +/// line containing the address as least frequently used. +/// +/// ## Operation +/// A memory address is obtained by adding b and c, and masking off the four +/// low-order bits of the result. The 16-byte vector in a is stored to the resultant memory +/// address, and the containing cache line is marked as least frequently used. +/// +/// ## Notes +/// This intrinsic can be used to indicate the last access to a portion of memory, as a hint to the +/// data cache controller that the associated cache line can be replaced without performance loss. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_stl(a: T, off: isize, c: ::Target) +where + T: sealed::VectorSt, +{ + a.vec_stl(off, c) +} + +/// Vector Store Element Indexed +/// +/// ## Purpose +/// Stores a single element from a 16-byte vector into memory at the address specified by +/// a displacement and a pointer, aligned to the element size. +/// +/// ## Operation +/// The integer value b is added to the pointer value c. The resulting address is +/// rounded down to the nearest address that is a multiple of es, where es is 1 for char pointers, +/// 2 for short pointers, and 4 for float or int pointers. An element offset eo is calculated by +/// taking the resultant address modulo 16. The vector element of a at offset eo is stored to the +/// resultant address. +/// +/// ## Notes +/// Be careful to note that the address (b+c) is aligned to an element boundary. Do not attempt +/// to store unaligned data with this intrinsic. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_ste(a: T, off: isize, c: ::Target) +where + T: sealed::VectorSte, +{ + a.vec_ste(off, c) +} + +/// VSX Unaligned Load +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_xl(off: isize, p: T) -> ::Result +where + T: sealed::VectorXl, +{ + p.vec_xl(off) +} + +/// VSX Unaligned Store +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_xst(v: T, off: isize, p: ::Out) +where + T: sealed::VectorXst, +{ + v.vec_xst(off, p) +} + +/// Vector Base-2 Logarithm Estimate +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr(vlogefp))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_loge(a: vector_float) -> vector_float { + vlogefp(a) +} + +/// Vector floor. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_floor(a: vector_float) -> vector_float { + sealed::vec_floor(a) +} + +/// Vector expte. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_expte(a: vector_float) -> vector_float { + sealed::vec_vexptefp(a) +} + +/// Vector cmplt. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cmplt(a: U, b: T) -> >::Result +where + T: sealed::VectorCmpGt, +{ + vec_cmpgt(b, a) +} + +/// Vector cmple. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cmple(a: vector_float, b: vector_float) -> vector_bool_int { + vec_cmpge(b, a) +} + +/// Vector cmpgt. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cmpgt(a: T, b: U) -> >::Result +where + T: sealed::VectorCmpGt, +{ + a.vec_cmpgt(b) +} + +/// Vector cmpge. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cmpge(a: vector_float, b: vector_float) -> vector_bool_int { + sealed::vec_vcmpgefp(a, b) +} + +/// Vector cmpeq. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cmpeq(a: T, b: U) -> >::Result +where + T: sealed::VectorCmpEq, +{ + a.vec_cmpeq(b) +} + +/// Vector Compare Not Equal +/// +/// ## Result value +/// For each element of r, the value of each bit is 1 if the corresponding elements +/// of a and b are not equal. Otherwise, the value of each bit is 0. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cmpne(a: T, b: U) -> >::Result +where + T: sealed::VectorCmpNe, +{ + a.vec_cmpne(b) +} + +/// Vector cmpb. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cmpb(a: vector_float, b: vector_float) -> vector_signed_int { + sealed::vec_vcmpbfp(a, b) +} + +/// Vector ceil. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_ceil(a: vector_float) -> vector_float { + sealed::vec_vceil(a) +} + +/// Vector avg. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_avg(a: T, b: U) -> >::Result +where + T: sealed::VectorAvg, +{ + a.vec_avg(b) +} + +/// Vector andc. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_andc(a: T, b: U) -> >::Result +where + T: sealed::VectorAndc, +{ + a.vec_andc(b) +} + +/// Vector OR with Complement +/// +/// ## Purpose +/// Performs a bitwise OR of the first vector with the bitwise-complemented second vector. +/// +/// ## Result value +/// r is the bitwise OR of a and the bitwise complement of b. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_orc(a: T, b: U) -> >::Result +where + T: sealed::VectorOrc, +{ + a.vec_orc(b) +} + +/// Vector and. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_and(a: T, b: U) -> >::Result +where + T: sealed::VectorAnd, +{ + a.vec_and(b) +} + +/// Vector or. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_or(a: T, b: U) -> >::Result +where + T: sealed::VectorOr, +{ + a.vec_or(b) +} + +/// Vector NAND +/// +/// ## Purpose +/// Performs a bitwise NAND of two vectors. +/// +/// ## Result value +/// r is the bitwise NAND of a and b. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_nand(a: T, b: U) -> >::Result +where + T: sealed::VectorNand, +{ + a.vec_nand(b) +} + +/// Vector nor. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_nor(a: T, b: U) -> >::Result +where + T: sealed::VectorNor, +{ + a.vec_nor(b) +} + +/// Vector xor. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_xor(a: T, b: U) -> >::Result +where + T: sealed::VectorXor, +{ + a.vec_xor(b) +} + +/// Vector adds. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_adds(a: T, b: U) -> >::Result +where + T: sealed::VectorAdds, +{ + a.vec_adds(b) +} + +/// Vector addc. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_addc(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int { + sealed::vec_vaddcuw(a, b) +} + +/// Vector abs. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_abs(a: T) -> T +where + T: sealed::VectorAbs, +{ + a.vec_abs() +} + +/// Vector abss. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_abss(a: T) -> T +where + T: sealed::VectorAbss, +{ + a.vec_abss() +} + +/// Vector Rotate Left +/// +/// ## Purpose +/// Rotates each element of a vector left by a given number of bits. +/// +/// ## Result value +/// Each element of r is obtained by rotating the corresponding element of a left by +/// the number of bits specified by the corresponding element of b. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_rl(a: T, b: ::Shift) -> T +where + T: sealed::VectorRl, +{ + a.vec_rl(b) +} + +/// Vector Round +/// +/// ## Purpose +/// Returns a vector containing the rounded values of the corresponding elements of the +/// source vector. +/// +/// ## Result value +/// Each element of r contains the value of the corresponding element of a, rounded +/// to the nearest representable floating-point integer, using IEEE round-to-nearest +/// rounding. +/// The current floating-point rounding mode is ignored. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_round(a: T) -> T +where + T: sealed::VectorRound, +{ + a.vec_round() +} + +/// Vector Splat +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_splat(a: T) -> T +where + T: sealed::VectorSplat, +{ + a.vec_splat::() +} + +splat! { vec_splat_u8, u8, u8x16 [vspltisb / xxspltib, "Vector Splat to Unsigned Byte"] } +splat! { vec_splat_s8, i8, i8x16 [vspltisb / xxspltib, "Vector Splat to Signed Byte"] } +splat! { vec_splat_u16, u16, u16x8 [vspltish, "Vector Splat to Unsigned Halfword"] } +splat! { vec_splat_s16, i16, i16x8 [vspltish, "Vector Splat to Signed Halfword"] } +splat! { vec_splat_u32, u32, u32x4 [vspltisw, "Vector Splat to Unsigned Word"] } +splat! { vec_splat_s32, i32, i32x4 [vspltisw, "Vector Splat to Signed Word"] } + +/// Vector splats. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_splats(a: T) -> ::Result +where + T: sealed::VectorSplats, +{ + a.vec_splats() +} + +/// Vector sub. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sub(a: T, b: U) -> >::Result +where + T: sealed::VectorSub, +{ + a.vec_sub(b) +} + +/// Vector Subtract Carryout +/// +/// ## Purpose +/// Returns a vector wherein each element contains the carry produced by subtracting the +/// corresponding elements of the two source vectors. +/// +/// ## Result value +/// The value of each element of r is the complement of the carry produced by subtract- ing the +/// value of the corresponding element of b from the value of the corresponding element of a. The +/// value is 0 if a borrow occurred, or 1 if no borrow occurred. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_subc(a: T, b: U) -> >::Result +where + T: sealed::VectorSubc, +{ + a.vec_subc(b) +} + +/// Vector subs. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_subs(a: T, b: U) -> >::Result +where + T: sealed::VectorSubs, +{ + a.vec_subs(b) +} + +/// Vector min. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_min(a: T, b: U) -> >::Result +where + T: sealed::VectorMin, +{ + a.vec_min(b) +} + +/// Vector max. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_max(a: T, b: U) -> >::Result +where + T: sealed::VectorMax, +{ + a.vec_max(b) +} + +/// Move From Vector Status and Control Register. +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr(mfvscr))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_mfvscr() -> vector_unsigned_short { + mfvscr() +} + +/// Vector add. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_add(a: T, b: U) -> >::Result +where + T: sealed::VectorAdd, +{ + a.vec_add(b) +} + +/// Vector Add Extended +/// +/// ## Result value +/// The value of each element of r is produced by adding the corresponding elements of +/// a and b with a carry specified in the corresponding element of c (1 if there is a carry, 0 +/// otherwise). +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_adde(a: T, b: T, c: T) -> T +where + T: sealed::VectorAdde, +{ + a.vec_adde(b, c) +} + +/// Vector Convert to Floating-Point +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_ctf(a: T) -> vector_float +where + T: sealed::VectorCtf, +{ + a.vec_ctf::() +} + +/// Vector Convert to Signed Integer +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr(vctsxs, IMM5 = 1))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cts(a: vector_float) -> vector_signed_int { + static_assert_uimm_bits!(IMM5, 5); + + vctsxs(a, IMM5) +} + +/// Vector Convert to Unsigned Integer +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr(vctuxs, IMM5 = 1))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_ctu(a: vector_float) -> vector_unsigned_int { + static_assert_uimm_bits!(IMM5, 5); + + vctuxs(a, IMM5) +} + +/// Endian-biased intrinsics +#[cfg(target_endian = "little")] +mod endian { + use super::*; + /// Vector permute. + #[inline] + #[target_feature(enable = "altivec")] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub unsafe fn vec_perm(a: T, b: T, c: vector_unsigned_char) -> T + where + T: sealed::VectorPerm, + { + // vperm has big-endian bias + // + // Xor the mask and flip the arguments + let d = transmute(u8x16::new( + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + )); + let c = simd_xor(c, d); + + b.vec_vperm(a, c) + } + + /// Vector Sum Across Partial (1/2) Saturated + #[inline] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + #[target_feature(enable = "altivec")] + pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + // vsum2sws has big-endian bias + // + // swap the even b elements with the odd ones + let flip = transmute(u8x16::new( + 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, + )); + let b = vec_perm(b, b, flip); + let c = vsum2sws(a, b); + + vec_perm(c, c, flip) + } + + // Even and Odd are swapped in little-endian + /// Vector Multiply Even + #[inline] + #[target_feature(enable = "altivec")] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub unsafe fn vec_mule(a: T, b: T) -> U + where + T: sealed::VectorMulo, + { + a.vec_mulo(b) + } + /// Vector Multiply Odd + #[inline] + #[target_feature(enable = "altivec")] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub unsafe fn vec_mulo(a: T, b: T) -> U + where + T: sealed::VectorMule, + { + a.vec_mule(b) + } +} + +/// Vector Multiply +/// +/// ## Purpose +/// Compute the products of corresponding elements of two vectors. +/// +/// ## Result value +/// Each element of r receives the product of the corresponding elements of a and b. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_mul(a: T, b: T) -> T +where + T: sealed::VectorMul, +{ + a.vec_mul(b) +} + +/// Vector Multiply Add Saturated +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr(vmhaddshs))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_madds( + a: vector_signed_short, + b: vector_signed_short, + c: vector_signed_short, +) -> vector_signed_short { + vmhaddshs(a, b, c) +} + +/// Vector Multiply Low and Add Unsigned Half Word +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_mladd(a: T, b: U, c: U) -> >::Result +where + T: sealed::VectorMladd, +{ + a.vec_mladd(b, c) +} + +/// Vector Multiply Round and Add Saturated +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr(vmhraddshs))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_mradds( + a: vector_signed_short, + b: vector_signed_short, + c: vector_signed_short, +) -> vector_signed_short { + vmhraddshs(a, b, c) +} + +/// Vector Multiply Sum +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_msum(a: T, b: B, c: U) -> U +where + T: sealed::VectorMsum, +{ + a.vec_msum(b, c) +} + +/// Vector Multiply Sum Saturated +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_msums(a: T, b: T, c: U) -> U +where + T: sealed::VectorMsums, +{ + a.vec_msums(b, c) +} + +/// Vector Multiply Add +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_madd(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + sealed::vec_vmaddfp(a, b, c) +} + +/// Vector Negative Multiply Subtract +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_nmsub(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + vnmsubfp(a, b, c) +} + +/// Vector Select +/// +/// ## Purpose +/// Returns a vector selecting bits from two source vectors depending on the corresponding +/// bit values of a third source vector. +/// +/// ## Result value +/// Each bit of r has the value of the corresponding bit of a if the corresponding +/// bit of c is 0. Otherwise, the bit of r has the value of the corresponding bit of b. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sel(a: T, b: T, c: U) -> T +where + T: sealed::VectorSel, +{ + a.vec_sel(b, c) +} + +/// Vector Sum Across Partial (1/4) Saturated +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_sum4s(a: T, b: U) -> U +where + T: sealed::VectorSum4s, +{ + a.vec_sum4s(b) +} + +/// Vector All Elements Equal +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_eq(a: T, b: U) -> >::Result +where + T: sealed::VectorAllEq, +{ + a.vec_all_eq(b) +} + +/// Vector All Elements Equal +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_eq(a: T, b: U) -> >::Result +where + T: sealed::VectorAnyEq, +{ + a.vec_any_eq(b) +} + +/// Vector All Elements Greater or Equal +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_ge(a: T, b: U) -> >::Result +where + T: sealed::VectorAllGe, +{ + a.vec_all_ge(b) +} + +/// Vector Any Element Greater or Equal +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_ge(a: T, b: U) -> >::Result +where + T: sealed::VectorAnyGe, +{ + a.vec_any_ge(b) +} + +/// Vector All Elements Greater Than +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_gt(a: T, b: U) -> >::Result +where + T: sealed::VectorAllGt, +{ + a.vec_all_gt(b) +} + +/// Vector Any Element Greater Than +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_gt(a: T, b: U) -> >::Result +where + T: sealed::VectorAnyGt, +{ + a.vec_any_gt(b) +} + +/// Vector All In +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpbfp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_in(a: vector_float, b: vector_float) -> bool { + vcmpbfp_p(0, a, b) != 0 +} + +/// Vector All Elements Less Than or Equal +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_le(a: U, b: T) -> >::Result +where + T: sealed::VectorAllGe, +{ + b.vec_all_ge(a) +} + +/// Vector Any Element Less Than or Equal +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_le(a: U, b: T) -> >::Result +where + T: sealed::VectorAnyGe, +{ + b.vec_any_ge(a) +} + +/// Vector All Elements Less Than +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_lt(a: U, b: T) -> >::Result +where + T: sealed::VectorAllGt, +{ + b.vec_all_gt(a) +} + +/// Vector Any Element Less Than +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_lt(a: U, b: T) -> >::Result +where + T: sealed::VectorAnyGt, +{ + b.vec_any_gt(a) +} + +/// All Elements Not a Number +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpeqfp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_nan(a: vector_float) -> bool { + vcmpeqfp_p(0, a, a) != 0 +} + +/// Any Elements Not a Number +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpeqfp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_nan(a: vector_float) -> bool { + vcmpeqfp_p(3, a, a) != 0 +} + +/// Vector All Elements Not Equal +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_ne(a: T, b: U) -> >::Result +where + T: sealed::VectorAllNe, +{ + a.vec_all_ne(b) +} + +/// Vector Any Elements Not Equal +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_ne(a: T, b: U) -> >::Result +where + T: sealed::VectorAnyNe, +{ + a.vec_any_ne(b) +} + +/// All Elements Not Greater Than or Equal +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgefp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_nge(a: vector_float, b: vector_float) -> bool { + vcmpgefp_p(0, a, b) != 0 +} + +/// All Elements Not Greater Than +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgtfp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_ngt(a: vector_float, b: vector_float) -> bool { + vcmpgtfp_p(0, a, b) != 0 +} + +/// All Elements Not Less Than or Equal +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgefp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_nle(a: vector_float, b: vector_float) -> bool { + vcmpgefp_p(0, b, a) != 0 +} + +/// All Elements Not Less Than +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgtfp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_nlt(a: vector_float, b: vector_float) -> bool { + vcmpgtfp_p(0, b, a) != 0 +} + +/// All Elements Numeric +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgefp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_all_numeric(a: vector_float) -> bool { + vcmpgefp_p(2, a, a) != 0 +} + +/// Any Elements Not Greater Than or Equal +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgefp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_nge(a: vector_float, b: vector_float) -> bool { + vcmpgefp_p(3, a, b) != 0 +} + +/// Any Elements Not Greater Than +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgtfp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_ngt(a: vector_float, b: vector_float) -> bool { + vcmpgtfp_p(3, a, b) != 0 +} + +/// Any Elements Not Less Than or Equal +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgefp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_nle(a: vector_float, b: vector_float) -> bool { + vcmpgefp_p(3, b, a) != 0 +} + +/// Any Elements Not Less Than +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgtfp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_nlt(a: vector_float, b: vector_float) -> bool { + vcmpgtfp_p(3, b, a) != 0 +} + +/// Any Elements Numeric +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpgefp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_numeric(a: vector_float) -> bool { + vcmpgefp_p(1, a, a) != 0 +} + +/// Vector Count Leading Zeros +/// +/// ## Purpose +/// Returns a vector containing the number of most-significant bits equal to zero of each +/// corresponding element of the source vector. +/// +/// ## Result value +/// The value of each element of r is set to the number of leading zeros of the +/// corresponding element of a. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_cntlz(a: T) -> T +where + T: sealed::VectorCntlz, +{ + a.vec_cntlz() +} + +/// Any Element Out of Bounds +#[inline] +#[target_feature(enable = "altivec")] +#[cfg_attr(test, assert_instr("vcmpeqfp."))] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_any_out(a: vector_float) -> bool { + vcmpeqfp_p(1, a, a) != 0 +} + +#[cfg(target_endian = "big")] +mod endian { + use super::*; + /// Vector permute. + #[inline] + #[target_feature(enable = "altivec")] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub unsafe fn vec_perm(a: T, b: T, c: vector_unsigned_char) -> T + where + T: sealed::VectorPerm, + { + a.vec_vperm(b, c) + } + + /// Vector Sum Across Partial (1/2) Saturated + #[inline] + #[target_feature(enable = "altivec")] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + vsum2sws(a, b) + } + + /// Vector Multiply Even + #[inline] + #[target_feature(enable = "altivec")] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub unsafe fn vec_mule(a: T, b: T) -> U + where + T: sealed::VectorMule, + { + a.vec_mule(b) + } + /// Vector Multiply Odd + #[inline] + #[target_feature(enable = "altivec")] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub unsafe fn vec_mulo(a: T, b: T) -> U + where + T: sealed::VectorMulo, + { + a.vec_mulo(b) + } +} + +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub use self::endian::*; + +#[cfg(test)] +mod tests { + use super::*; + + use std::mem::transmute; + + use crate::core_arch::simd::*; + use stdarch_test::simd_test; + + macro_rules! test_vec_2 { + { $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + test_vec_2! { $name, $fn, $ty -> $ty, [$($a),+], [$($b),+], [$($d),+] } + }; + { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a: s_t_l!($ty) = transmute($ty::new($($a),+)); + let b: s_t_l!($ty) = transmute($ty::new($($b),+)); + + let d = $ty_out::new($($d),+); + let r : $ty_out = transmute($fn(a, b)); + assert_eq!(d, r); + } + }; + { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($b:expr),+], $d:expr } => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a: s_t_l!($ty) = transmute($ty::new($($a),+)); + let b: s_t_l!($ty) = transmute($ty::new($($b),+)); + + let r : $ty_out = transmute($fn(a, b)); + assert_eq!($d, r); + } + } + } + + macro_rules! test_vec_1 { + { $name: ident, $fn:ident, f32x4, [$($a:expr),+], ~[$($d:expr),+] } => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a: vector_float = transmute(f32x4::new($($a),+)); + + let d: vector_float = transmute(f32x4::new($($d),+)); + let r = transmute(vec_cmple(vec_abs(vec_sub($fn(a), d)), vec_splats(f32::EPSILON))); + let e = m32x4::new(true, true, true, true); + assert_eq!(e, r); + } + }; + { $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($d:expr),+] } => { + test_vec_1! { $name, $fn, $ty -> $ty, [$($a),+], [$($d),+] } + }; + { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($d:expr),+] } => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a: s_t_l!($ty) = transmute($ty::new($($a),+)); + + let d = $ty_out::new($($d),+); + let r : $ty_out = transmute($fn(a)); + assert_eq!(d, r); + } + } + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_ld() { + let pat = [ + u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + u8x16::new( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ), + ]; + + for off in 0..16 { + let v: u8x16 = transmute(vec_ld(0, (pat.as_ptr() as *const u8).offset(off))); + assert_eq!( + v, + u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) + ); + } + for off in 16..32 { + let v: u8x16 = transmute(vec_ld(0, (pat.as_ptr() as *const u8).offset(off))); + assert_eq!( + v, + u8x16::new( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ) + ); + } + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_xl() { + let pat = [ + u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + u8x16::new( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ), + ]; + + for off in 0..16 { + let val: u8x16 = transmute(vec_xl(0, (pat.as_ptr() as *const u8).offset(off))); + for i in 0..16 { + let v = val.extract(i); + assert_eq!(off as usize + i, v as usize); + } + } + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_xst() { + let v: vector_unsigned_char = transmute(u8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + )); + + for off in 0..16 { + let mut buf = [0u8; 32]; + vec_xst(v, 0, (buf.as_mut_ptr() as *mut u8).offset(off)); + for i in 0..16 { + assert_eq!(i as u8, buf[off as usize..][i]); + } + } + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_ldl() { + let pat = [ + u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + u8x16::new( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ), + ]; + + for off in 0..16 { + let v: u8x16 = transmute(vec_ldl(0, (pat.as_ptr() as *const u8).offset(off))); + assert_eq!( + v, + u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) + ); + } + for off in 16..32 { + let v: u8x16 = transmute(vec_ldl(0, (pat.as_ptr() as *const u8).offset(off))); + assert_eq!( + v, + u8x16::new( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ) + ); + } + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_lde_u8() { + let pat = [u8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + )]; + for off in 0..16 { + let v: u8x16 = transmute(vec_lde(off, pat.as_ptr() as *const u8)); + assert_eq!(off as u8, v.extract(off as _)); + } + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_lde_u16() { + let pat = [u16x8::new(0, 1, 2, 3, 4, 5, 6, 7)]; + for off in 0..8 { + let v: u16x8 = transmute(vec_lde(off * 2, pat.as_ptr() as *const u16)); + assert_eq!(off as u16, v.extract(off as _)); + } + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_lde_u32() { + let pat = [u32x4::new(0, 1, 2, 3)]; + for off in 0..4 { + let v: u32x4 = transmute(vec_lde(off * 4, pat.as_ptr() as *const u32)); + assert_eq!(off as u32, v.extract(off as _)); + } + } + + test_vec_1! { test_vec_floor, vec_floor, f32x4, + [1.1, 1.9, -0.5, -0.9], + [1.0, 1.0, -1.0, -1.0] + } + + test_vec_1! { test_vec_expte, vec_expte, f32x4, + [0.0, 2.0, 2.0, -1.0], + ~[1.0, 4.0, 4.0, 0.5] + } + + test_vec_2! { test_vec_cmpgt_i8, vec_cmpgt, i8x16 -> m8x16, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [true, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpgt_u8, vec_cmpgt, u8x16 -> m8x16, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpgt_i16, vec_cmpgt, i16x8 -> m16x8, + [1, -1, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0], + [true, false, true, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpgt_u16, vec_cmpgt, u16x8 -> m16x8, + [1, 255, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0], + [true, true, false, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpgt_i32, vec_cmpgt, i32x4 -> m32x4, + [1, -1, 0, 0], + [0, -1, 0, 1], + [true, false, false, false] + } + + test_vec_2! { test_vec_cmpgt_u32, vec_cmpgt, u32x4 -> m32x4, + [1, 255, 0, 0], + [0, 255, 0, 1], + [true, false, false, false] + } + + test_vec_2! { test_vec_cmpge, vec_cmpge, f32x4 -> m32x4, + [0.1, -0.1, 0.0, 0.99], + [0.1, 0.0, 0.1, 1.0], + [true, false, false, false] + } + + test_vec_2! { test_vec_cmpeq_i8, vec_cmpeq, i8x16 -> m8x16, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [false, false, false, false, true, true, true, true, true, true, true, true, true, true, true, true] + } + + test_vec_2! { test_vec_cmpeq_u8, vec_cmpeq, u8x16 -> m8x16, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [false, false, false, false, true, true, true, true, true, true, true, true, true, true, true, true] + } + + test_vec_2! { test_vec_cmpeq_i16, vec_cmpeq, i16x8 -> m16x8, + [1, -1, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0], + [false, false, false, false, true, true, true, true] + } + + test_vec_2! { test_vec_cmpeq_u16, vec_cmpeq, u16x8 -> m16x8, + [1, 255, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0], + [false, false, false, false, true, true, true, true] + } + + test_vec_2! { test_vec_cmpeq_i32, vec_cmpeq, i32x4 -> m32x4, + [1, -1, 0, 0], + [0, -1, 0, 1], + [false, true, true, false] + } + + test_vec_2! { test_vec_cmpeq_u32, vec_cmpeq, u32x4 -> m32x4, + [1, 255, 0, 0], + [0, 255, 0, 1], + [false, true, true, false] + } + + test_vec_2! { test_vec_cmpne_i8, vec_cmpne, i8x16 -> m8x16, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_u8, vec_cmpne, u8x16 -> m8x16, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_i16, vec_cmpne, i16x8 -> m16x8, + [1, -1, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_u16, vec_cmpne, u16x8 -> m16x8, + [1, 255, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0], + [true, true, true, true, false, false, false, false] + } + + test_vec_2! { test_vec_cmpne_i32, vec_cmpne, i32x4 -> m32x4, + [1, -1, 0, 0], + [0, -1, 0, 1], + [true, false, false, true] + } + + test_vec_2! { test_vec_cmpne_u32, vec_cmpne, u32x4 -> m32x4, + [1, 255, 0, 0], + [0, 255, 0, 1], + [true, false, false, true] + } + + test_vec_2! { test_vec_all_eq_i8_false, vec_all_eq, i8x16 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_eq_u8_false, vec_all_eq, u8x16 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_eq_i16_false, vec_all_eq, i16x8 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_eq_u16_false, vec_all_eq, u16x8 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_eq_i32_false, vec_all_eq, i32x4 -> bool, + [1, -1, 0, 0], + [0, -1, 0, 1], + false + } + + test_vec_2! { test_vec_all_eq_u32_false, vec_all_eq, u32x4 -> bool, + [1, 255, 0, 0], + [0, 255, 0, 1], + false + } + + test_vec_2! { test_vec_all_eq_i8_true, vec_all_eq, i8x16 -> bool, + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_eq_u8_true, vec_all_eq, u8x16 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_eq_i16_true, vec_all_eq, i16x8 -> bool, + [1, -1, 1, 0, 0, 0, 0, 0], + [1, -1, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_eq_u16_true, vec_all_eq, u16x8 -> bool, + [1, 255, 1, 0, 0, 0, 0, 0], + [1, 255, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_eq_i32_true, vec_all_eq, i32x4 -> bool, + [1, -1, 0, 1], + [1, -1, 0, 1], + true + } + + test_vec_2! { test_vec_all_eq_u32_true, vec_all_eq, u32x4 -> bool, + [1, 255, 0, 1], + [1, 255, 0, 1], + true + } + + test_vec_2! { test_vec_any_eq_i8_false, vec_any_eq, i8x16 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_eq_u8_false, vec_any_eq, u8x16 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_eq_i16_false, vec_any_eq, i16x8 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_eq_u16_false, vec_any_eq, u16x8 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_eq_i32_false, vec_any_eq, i32x4 -> bool, + [1, -1, 0, 0], + [0, -2, 1, 1], + false + } + + test_vec_2! { test_vec_any_eq_u32_false, vec_any_eq, u32x4 -> bool, + [1, 2, 1, 0], + [0, 255, 0, 1], + false + } + + test_vec_2! { test_vec_any_eq_i8_true, vec_any_eq, i8x16 -> bool, + [1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_eq_u8_true, vec_any_eq, u8x16 -> bool, + [0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_eq_i16_true, vec_any_eq, i16x8 -> bool, + [0, -1, 1, 0, 0, 0, 0, 0], + [1, -1, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_eq_u16_true, vec_any_eq, u16x8 -> bool, + [0, 255, 1, 0, 0, 0, 0, 0], + [1, 255, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_eq_i32_true, vec_any_eq, i32x4 -> bool, + [0, -1, 0, 1], + [1, -1, 0, 1], + true + } + + test_vec_2! { test_vec_any_eq_u32_true, vec_any_eq, u32x4 -> bool, + [0, 255, 0, 1], + [1, 255, 0, 1], + true + } + + test_vec_2! { test_vec_all_ge_i8_false, vec_all_ge, i8x16 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_ge_u8_false, vec_all_ge, u8x16 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_ge_i16_false, vec_all_ge, i16x8 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_ge_u16_false, vec_all_ge, u16x8 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_ge_i32_false, vec_all_ge, i32x4 -> bool, + [1, -1, 0, 0], + [0, -1, 0, 1], + false + } + + test_vec_2! { test_vec_all_ge_u32_false, vec_all_ge, u32x4 -> bool, + [1, 255, 0, 0], + [0, 255, 1, 1], + false + } + + test_vec_2! { test_vec_all_ge_i8_true, vec_all_ge, i8x16 -> bool, + [0, 0, -1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_ge_u8_true, vec_all_ge, u8x16 -> bool, + [1, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_ge_i16_true, vec_all_ge, i16x8 -> bool, + [1, -1, 42, 0, 0, 0, 0, 0], + [1, -5, 2, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_ge_u16_true, vec_all_ge, u16x8 -> bool, + [42, 255, 1, 0, 0, 0, 0, 0], + [2, 255, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_ge_i32_true, vec_all_ge, i32x4 -> bool, + [1, -1, 0, 1], + [0, -1, 0, 1], + true + } + + test_vec_2! { test_vec_all_ge_u32_true, vec_all_ge, u32x4 -> bool, + [1, 255, 0, 1], + [1, 254, 0, 0], + true + } + + test_vec_2! { test_vec_any_ge_i8_false, vec_any_ge, i8x16 -> bool, + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_ge_u8_false, vec_any_ge, u8x16 -> bool, + [1, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [42, 255, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_ge_i16_false, vec_any_ge, i16x8 -> bool, + [1, -1, -2, 0, 0, 0, 0, 0], + [2, 0, -1, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_ge_u16_false, vec_any_ge, u16x8 -> bool, + [1, 2, 0, 0, 0, 0, 0, 0], + [2, 42, 255, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_ge_i32_false, vec_any_ge, i32x4 -> bool, + [1, -1, 0, 0], + [2, 0, 1, 1], + false + } + + test_vec_2! { test_vec_any_ge_u32_false, vec_any_ge, u32x4 -> bool, + [1, 2, 1, 0], + [4, 255, 4, 1], + false + } + + test_vec_2! { test_vec_any_ge_i8_true, vec_any_ge, i8x16 -> bool, + [1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_ge_u8_true, vec_any_ge, u8x16 -> bool, + [0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_ge_i16_true, vec_any_ge, i16x8 -> bool, + [0, -1, 1, 0, 0, 0, 0, 0], + [1, -1, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_ge_u16_true, vec_any_ge, u16x8 -> bool, + [0, 255, 1, 0, 0, 0, 0, 0], + [1, 255, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_ge_i32_true, vec_any_ge, i32x4 -> bool, + [0, -1, 0, 1], + [1, -1, 0, 1], + true + } + + test_vec_2! { test_vec_any_ge_u32_true, vec_any_ge, u32x4 -> bool, + [0, 255, 0, 1], + [1, 255, 0, 1], + true + } + + test_vec_2! { test_vec_all_gt_i8_false, vec_all_gt, i8x16 -> bool, + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_gt_u8_false, vec_all_gt, u8x16 -> bool, + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_gt_i16_false, vec_all_gt, i16x8 -> bool, + [1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_gt_u16_false, vec_all_gt, u16x8 -> bool, + [1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_gt_i32_false, vec_all_gt, i32x4 -> bool, + [1, -1, 0, 0], + [0, -1, 0, 1], + false + } + + test_vec_2! { test_vec_all_gt_u32_false, vec_all_gt, u32x4 -> bool, + [1, 255, 0, 0], + [0, 255, 1, 1], + false + } + + test_vec_2! { test_vec_all_gt_i8_true, vec_all_gt, i8x16 -> bool, + [2, 1, -1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -2, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], + true + } + + test_vec_2! { test_vec_all_gt_u8_true, vec_all_gt, u8x16 -> bool, + [1, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_gt_i16_true, vec_all_gt, i16x8 -> bool, + [1, -1, 42, 1, 1, 1, 1, 1], + [0, -5, 2, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_gt_u16_true, vec_all_gt, u16x8 -> bool, + [42, 255, 1, 1, 1, 1, 1, 1], + [2, 254, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_gt_i32_true, vec_all_gt, i32x4 -> bool, + [1, -1, 1, 1], + [0, -2, 0, 0], + true + } + + test_vec_2! { test_vec_all_gt_u32_true, vec_all_gt, u32x4 -> bool, + [1, 255, 1, 1], + [0, 254, 0, 0], + true + } + + test_vec_2! { test_vec_any_gt_i8_false, vec_any_gt, i8x16 -> bool, + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_gt_u8_false, vec_any_gt, u8x16 -> bool, + [1, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [42, 255, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_gt_i16_false, vec_any_gt, i16x8 -> bool, + [1, -1, -2, 0, 0, 0, 0, 0], + [2, 0, -1, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_gt_u16_false, vec_any_gt, u16x8 -> bool, + [1, 2, 0, 0, 0, 0, 0, 0], + [2, 42, 255, 1, 1, 1, 1, 1], + false + } + + test_vec_2! { test_vec_any_gt_i32_false, vec_any_gt, i32x4 -> bool, + [1, -1, 0, 0], + [2, 0, 1, 1], + false + } + + test_vec_2! { test_vec_any_gt_u32_false, vec_any_gt, u32x4 -> bool, + [1, 2, 1, 0], + [4, 255, 4, 1], + false + } + + test_vec_2! { test_vec_any_gt_i8_true, vec_any_gt, i8x16 -> bool, + [1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_gt_u8_true, vec_any_gt, u8x16 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_gt_i16_true, vec_any_gt, i16x8 -> bool, + [1, -1, 1, 0, 0, 0, 0, 0], + [0, -1, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_gt_u16_true, vec_any_gt, u16x8 -> bool, + [1, 255, 1, 0, 0, 0, 0, 0], + [0, 255, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_gt_i32_true, vec_any_gt, i32x4 -> bool, + [1, -1, 0, 1], + [0, -1, 0, 1], + true + } + + test_vec_2! { test_vec_any_gt_u32_true, vec_any_gt, u32x4 -> bool, + [1, 255, 0, 1], + [0, 255, 0, 1], + true + } + + test_vec_2! { test_vec_all_in_true, vec_all_in, f32x4 -> bool, + [0.0, -0.1, 0.0, 0.0], + [0.1, 0.2, 0.0, 0.0], + true + } + + test_vec_2! { test_vec_all_in_false, vec_all_in, f32x4 -> bool, + [0.5, 0.4, -0.5, 0.8], + [0.1, 0.4, -0.5, 0.8], + false + } + + test_vec_2! { test_vec_all_le_i8_false, vec_all_le, i8x16 -> bool, + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_le_u8_false, vec_all_le, u8x16 -> bool, + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_le_i16_false, vec_all_le, i16x8 -> bool, + [0, 0, -1, 1, 0, 0, 0, 0], + [1, -1, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_le_u16_false, vec_all_le, u16x8 -> bool, + [0, 0, 255, 1, 0, 0, 0, 0], + [1, 255, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_le_i32_false, vec_all_le, i32x4 -> bool, + [0, -1, 0, 1], + [1, -1, 0, 0], + false + } + + test_vec_2! { test_vec_all_le_u32_false, vec_all_le, u32x4 -> bool, + [0, 255, 1, 1], + [1, 255, 0, 0], + false + } + + test_vec_2! { test_vec_all_le_i8_true, vec_all_le, i8x16 -> bool, + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_le_u8_true, vec_all_le, u8x16 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_le_i16_true, vec_all_le, i16x8 -> bool, + [1, -5, 2, 0, 0, 0, 0, 0], + [1, -1, 42, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_le_u16_true, vec_all_le, u16x8 -> bool, + [2, 255, 1, 0, 0, 0, 0, 0], + [42, 255, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_le_i32_true, vec_all_le, i32x4 -> bool, + [0, -1, 0, 1], + [1, -1, 0, 1], + true + } + + test_vec_2! { test_vec_all_le_u32_true, vec_all_le, u32x4 -> bool, + [1, 254, 0, 0], + [1, 255, 0, 1], + true + } + + test_vec_2! { test_vec_any_le_i8_false, vec_any_le, i8x16 -> bool, + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_le_u8_false, vec_any_le, u8x16 -> bool, + [42, 255, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_le_i16_false, vec_any_le, i16x8 -> bool, + [2, 0, -1, 1, 1, 1, 1, 1], + [1, -1, -2, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_le_u16_false, vec_any_le, u16x8 -> bool, + [2, 42, 255, 1, 1, 1, 1, 1], + [1, 2, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_le_i32_false, vec_any_le, i32x4 -> bool, + [2, 0, 1, 1], + [1, -1, 0, 0], + false + } + + test_vec_2! { test_vec_any_le_u32_false, vec_any_le, u32x4 -> bool, + [4, 255, 4, 1], + [1, 2, 1, 0], + false + } + + test_vec_2! { test_vec_any_le_i8_true, vec_any_le, i8x16 -> bool, + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_le_u8_true, vec_any_le, u8x16 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_le_i16_true, vec_any_le, i16x8 -> bool, + [1, -1, 1, 0, 0, 0, 0, 0], + [0, -1, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_le_u16_true, vec_any_le, u16x8 -> bool, + [1, 255, 1, 0, 0, 0, 0, 0], + [0, 255, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_le_i32_true, vec_any_le, i32x4 -> bool, + [1, -1, 0, 1], + [0, -1, 0, 1], + true + } + + test_vec_2! { test_vec_any_le_u32_true, vec_any_le, u32x4 -> bool, + [1, 255, 0, 1], + [0, 255, 0, 1], + true + } + + test_vec_2! { test_vec_all_lt_i8_false, vec_all_lt, i8x16 -> bool, + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_lt_u8_false, vec_all_lt, u8x16 -> bool, + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_lt_i16_false, vec_all_lt, i16x8 -> bool, + [0, 0, -1, 1, 0, 0, 0, 0], + [1, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_lt_u16_false, vec_all_lt, u16x8 -> bool, + [0, 0, 255, 1, 0, 0, 0, 0], + [1, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_lt_i32_false, vec_all_lt, i32x4 -> bool, + [0, -1, 0, 1], + [1, -1, 0, 0], + false + } + + test_vec_2! { test_vec_all_lt_u32_false, vec_all_lt, u32x4 -> bool, + [0, 255, 1, 1], + [1, 255, 0, 0], + false + } + + test_vec_2! { test_vec_all_lt_i8_true, vec_all_lt, i8x16 -> bool, + [0, 0, -2, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], + [2, 1, -1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_lt_u8_true, vec_all_lt, u8x16 -> bool, + [0, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + true + } + + test_vec_2! { test_vec_all_lt_i16_true, vec_all_lt, i16x8 -> bool, + [0, -5, 2, 0, 0, 0, 0, 0], + [1, -1, 42, 1, 1, 1, 1, 1], + true + } + + test_vec_2! { test_vec_all_lt_u16_true, vec_all_lt, u16x8 -> bool, + [2, 254, 0, 0, 0, 0, 0, 0], + [42, 255, 1, 1, 1, 1, 1, 1], + true + } + + test_vec_2! { test_vec_all_lt_i32_true, vec_all_lt, i32x4 -> bool, + [0, -2, 0, 0], + [1, -1, 1, 1], + true + } + + test_vec_2! { test_vec_all_lt_u32_true, vec_all_lt, u32x4 -> bool, + [0, 254, 0, 0], + [1, 255, 1, 1], + true + } + + test_vec_2! { test_vec_any_lt_i8_false, vec_any_lt, i8x16 -> bool, + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_lt_u8_false, vec_any_lt, u8x16 -> bool, + [42, 255, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_lt_i16_false, vec_any_lt, i16x8 -> bool, + [2, 0, -1, 1, 1, 1, 1, 1], + [1, -1, -2, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_lt_u16_false, vec_any_lt, u16x8 -> bool, + [2, 42, 255, 1, 1, 1, 1, 1], + [1, 2, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_lt_i32_false, vec_any_lt, i32x4 -> bool, + [2, 0, 1, 1], + [1, -1, 0, 0], + false + } + + test_vec_2! { test_vec_any_lt_u32_false, vec_any_lt, u32x4 -> bool, + [4, 255, 4, 1], + [1, 2, 1, 0], + false + } + + test_vec_2! { test_vec_any_lt_i8_true, vec_any_lt, i8x16 -> bool, + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_lt_u8_true, vec_any_lt, u8x16 -> bool, + [0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_lt_i16_true, vec_any_lt, i16x8 -> bool, + [0, -1, 1, 0, 0, 0, 0, 0], + [1, -1, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_lt_u16_true, vec_any_lt, u16x8 -> bool, + [0, 255, 1, 0, 0, 0, 0, 0], + [1, 255, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_lt_i32_true, vec_any_lt, i32x4 -> bool, + [0, -1, 0, 1], + [1, -1, 0, 1], + true + } + + test_vec_2! { test_vec_any_lt_u32_true, vec_any_lt, u32x4 -> bool, + [0, 255, 0, 1], + [1, 255, 0, 1], + true + } + + test_vec_2! { test_vec_all_ne_i8_false, vec_all_ne, i8x16 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_ne_u8_false, vec_all_ne, u8x16 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 255, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_ne_i16_false, vec_all_ne, i16x8 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0], + [0, -1, 1, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_ne_u16_false, vec_all_ne, u16x8 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0], + [0, 255, 0, 1, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_all_ne_i32_false, vec_all_ne, i32x4 -> bool, + [1, -1, 0, 0], + [0, -1, 0, 1], + false + } + + test_vec_2! { test_vec_all_ne_u32_false, vec_all_ne, u32x4 -> bool, + [1, 255, 0, 0], + [0, 255, 0, 1], + false + } + + test_vec_2! { test_vec_all_ne_i8_true, vec_all_ne, i8x16 -> bool, + [0, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_ne_u8_true, vec_all_ne, u8x16 -> bool, + [0, 254, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_ne_i16_true, vec_all_ne, i16x8 -> bool, + [2, -2, 0, 1, 1, 1, 1, 1], + [1, -1, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_all_ne_u16_true, vec_all_ne, u16x8 -> bool, + [0, 254, 1, 1, 0, 0, 1, 0], + [1, 255, 0, 0, 1, 1, 0, 1], + true + } + + test_vec_2! { test_vec_all_ne_i32_true, vec_all_ne, i32x4 -> bool, + [0, -2, 0, 0], + [1, -1, 1, 1], + true + } + + test_vec_2! { test_vec_all_ne_u32_true, vec_all_ne, u32x4 -> bool, + [1, 255, 0, 0], + [0, 254, 1, 1], + true + } + + test_vec_2! { test_vec_any_ne_i8_false, vec_any_ne, i8x16 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_ne_u8_false, vec_any_ne, u8x16 -> bool, + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_ne_i16_false, vec_any_ne, i16x8 -> bool, + [1, -1, 0, 0, 0, 0, 0, 0], + [1, -1, 0, 0, 0, 0, 0, 0], + false + } + + test_vec_2! { test_vec_any_ne_u16_false, vec_any_ne, u16x8 -> bool, + [1, 255, 1, 1, 1, 1, 1, 0], + [1, 255, 1, 1, 1, 1, 1, 0], + false + } + + test_vec_2! { test_vec_any_ne_i32_false, vec_any_ne, i32x4 -> bool, + [0, -1, 1, 1], + [0, -1, 1, 1], + false + } + + test_vec_2! { test_vec_any_ne_u32_false, vec_any_ne, u32x4 -> bool, + [1, 2, 1, 255], + [1, 2, 1, 255], + false + } + + test_vec_2! { test_vec_any_ne_i8_true, vec_any_ne, i8x16 -> bool, + [1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_ne_u8_true, vec_any_ne, u8x16 -> bool, + [0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_ne_i16_true, vec_any_ne, i16x8 -> bool, + [0, -1, 1, 0, 0, 0, 0, 0], + [1, -1, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_ne_u16_true, vec_any_ne, u16x8 -> bool, + [0, 255, 1, 0, 0, 0, 0, 0], + [1, 255, 1, 0, 0, 0, 0, 0], + true + } + + test_vec_2! { test_vec_any_ne_i32_true, vec_any_ne, i32x4 -> bool, + [0, -1, 0, 1], + [1, -1, 0, 1], + true + } + + test_vec_2! { test_vec_any_ne_u32_true, vec_any_ne, u32x4 -> bool, + [0, 255, 0, 1], + [1, 255, 0, 1], + true + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_cmpb() { + let a: vector_float = transmute(f32x4::new(0.1, 0.5, 0.6, 0.9)); + let b: vector_float = transmute(f32x4::new(-0.1, 0.5, -0.6, 0.9)); + let d = i32x4::new( + -0b10000000000000000000000000000000, + 0, + -0b10000000000000000000000000000000, + 0, + ); + + assert_eq!(d, transmute(vec_cmpb(a, b))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_ceil() { + let a: vector_float = transmute(f32x4::new(0.1, 0.5, 0.6, 0.9)); + let d = f32x4::new(1.0, 1.0, 1.0, 1.0); + + assert_eq!(d, transmute(vec_ceil(a))); + } + + test_vec_2! { test_vec_andc, vec_andc, i32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b10000000], + [0b11001100, 0b00001100, 0b11000000, 0b01001100] } + + test_vec_2! { test_vec_and, vec_and, i32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b00000000], + [0b00000000, 0b11000000, 0b00001100, 0b00000000] } + + macro_rules! test_vec_avg { + { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + test_vec_2! {$name, vec_avg, $ty, [$($a),+], [$($b),+], [$($d),+] } + } + } + + test_vec_avg! { test_vec_avg_i32x4, i32x4, + [i32::MIN, i32::MAX, 1, -1], + [-1, 1, 1, -1], + [-1073741824, 1073741824, 1, -1] } + + test_vec_avg! { test_vec_avg_u32x4, u32x4, + [u32::MAX, 0, 1, 2], + [2, 1, 0, 0], + [2147483649, 1, 1, 1] } + + test_vec_avg! { test_vec_avg_i16x8, i16x8, + [i16::MIN, i16::MAX, 1, -1, 0, 0, 0, 0], + [-1, 1, 1, -1, 0, 0, 0, 0], + [-16384, 16384, 1, -1, 0, 0, 0, 0] } + + test_vec_avg! { test_vec_avg_u16x8, u16x8, + [u16::MAX, 0, 1, 2, 0, 0, 0, 0], + [2, 1, 0, 0, 0, 0, 0, 0], + [32769, 1, 1, 1, 0, 0, 0, 0] } + + test_vec_avg! { test_vec_avg_i8x16, i8x16, + [i8::MIN, i8::MAX, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [-1, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [-64, 64, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + + test_vec_avg! { test_vec_avg_u8x16, u8x16, + [u8::MAX, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [129, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + + macro_rules! test_vec_adds { + { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + test_vec_2! {$name, vec_adds, $ty, [$($a),+], [$($b),+], [$($d),+] } + } + } + + test_vec_adds! { test_vec_adds_i32x4, i32x4, + [i32::MIN, i32::MAX, 1, -1], + [-1, 1, 1, -1], + [i32::MIN, i32::MAX, 2, -2] } + + test_vec_adds! { test_vec_adds_u32x4, u32x4, + [u32::MAX, 0, 1, 2], + [2, 1, 0, 0], + [u32::MAX, 1, 1, 2] } + + test_vec_adds! { test_vec_adds_i16x8, i16x8, + [i16::MIN, i16::MAX, 1, -1, 0, 0, 0, 0], + [-1, 1, 1, -1, 0, 0, 0, 0], + [i16::MIN, i16::MAX, 2, -2, 0, 0, 0, 0] } + + test_vec_adds! { test_vec_adds_u16x8, u16x8, + [u16::MAX, 0, 1, 2, 0, 0, 0, 0], + [2, 1, 0, 0, 0, 0, 0, 0], + [u16::MAX, 1, 1, 2, 0, 0, 0, 0] } + + test_vec_adds! { test_vec_adds_i8x16, i8x16, + [i8::MIN, i8::MAX, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [-1, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [i8::MIN, i8::MAX, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + + test_vec_adds! { test_vec_adds_u8x16, u8x16, + [u8::MAX, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [u8::MAX, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + + test_vec_2! { test_vec_addc, vec_addc, u32x4, [u32::MAX, 0, 0, 0], [1, 1, 1, 1], [1, 0, 0, 0] } + + macro_rules! test_vec_abs { + { $name: ident, $ty: ident, $a: expr, $d: expr } => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a = vec_splats($a); + let a: s_t_l!($ty) = vec_abs(a); + let d = $ty::splat($d); + assert_eq!(d, transmute(a)); + } + } + } + + test_vec_abs! { test_vec_abs_i8, i8x16, -42i8, 42i8 } + test_vec_abs! { test_vec_abs_i16, i16x8, -42i16, 42i16 } + test_vec_abs! { test_vec_abs_i32, i32x4, -42i32, 42i32 } + test_vec_abs! { test_vec_abs_f32, f32x4, -42f32, 42f32 } + + macro_rules! test_vec_abss { + { $name: ident, $ty: ident, $a: expr, $d: expr } => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a = vec_splats($a); + let a: s_t_l!($ty) = vec_abss(a); + let d = $ty::splat($d); + assert_eq!(d, transmute(a)); + } + } + } + + test_vec_abss! { test_vec_abss_i8, i8x16, -127i8, 127i8 } + test_vec_abss! { test_vec_abss_i16, i16x8, -42i16, 42i16 } + test_vec_abss! { test_vec_abss_i32, i32x4, -42i32, 42i32 } + + macro_rules! test_vec_splats { + { $name: ident, $ty: ident, $a: expr } => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a: s_t_l!($ty) = vec_splats($a); + let d = $ty::splat($a); + assert_eq!(d, transmute(a)); + } + } + } + + test_vec_splats! { test_vec_splats_u8, u8x16, 42u8 } + test_vec_splats! { test_vec_splats_u16, u16x8, 42u16 } + test_vec_splats! { test_vec_splats_u32, u32x4, 42u32 } + test_vec_splats! { test_vec_splats_i8, i8x16, 42i8 } + test_vec_splats! { test_vec_splats_i16, i16x8, 42i16 } + test_vec_splats! { test_vec_splats_i32, i32x4, 42i32 } + test_vec_splats! { test_vec_splats_f32, f32x4, 42f32 } + + macro_rules! test_vec_splat { + { $name: ident, $fun: ident, $ty: ident, $a: expr, $b: expr} => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a = $fun::<$a>(); + let d = $ty::splat($b); + assert_eq!(d, transmute(a)); + } + } + } + + test_vec_splat! { test_vec_splat_u8, vec_splat_u8, u8x16, -1, u8::MAX } + test_vec_splat! { test_vec_splat_u16, vec_splat_u16, u16x8, -1, u16::MAX } + test_vec_splat! { test_vec_splat_u32, vec_splat_u32, u32x4, -1, u32::MAX } + test_vec_splat! { test_vec_splat_s8, vec_splat_s8, i8x16, -1, -1 } + test_vec_splat! { test_vec_splat_s16, vec_splat_s16, i16x8, -1, -1 } + test_vec_splat! { test_vec_splat_s32, vec_splat_s32, i32x4, -1, -1 } + + macro_rules! test_vec_sub { + { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + test_vec_2! {$name, vec_sub, $ty, [$($a),+], [$($b),+], [$($d),+] } + } + } + + test_vec_sub! { test_vec_sub_f32x4, f32x4, + [-1.0, 0.0, 1.0, 2.0], + [2.0, 1.0, -1.0, -2.0], + [-3.0, -1.0, 2.0, 4.0] } + + test_vec_sub! { test_vec_sub_i32x4, i32x4, + [-1, 0, 1, 2], + [2, 1, -1, -2], + [-3, -1, 2, 4] } + + test_vec_sub! { test_vec_sub_u32x4, u32x4, + [0, 0, 1, 2], + [2, 1, 0, 0], + [4294967294, 4294967295, 1, 2] } + + test_vec_sub! { test_vec_sub_i16x8, i16x8, + [-1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2], + [-3, -1, 2, 4, -3, -1, 2, 4] } + + test_vec_sub! { test_vec_sub_u16x8, u16x8, + [0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0], + [65534, 65535, 1, 2, 65534, 65535, 1, 2] } + + test_vec_sub! { test_vec_sub_i8x16, i8x16, + [-1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2], + [-3, -1, 2, 4, -3, -1, 2, 4, -3, -1, 2, 4, -3, -1, 2, 4] } + + test_vec_sub! { test_vec_sub_u8x16, u8x16, + [0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0], + [254, 255, 1, 2, 254, 255, 1, 2, 254, 255, 1, 2, 254, 255, 1, 2] } + + macro_rules! test_vec_subs { + { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + test_vec_2! {$name, vec_subs, $ty, [$($a),+], [$($b),+], [$($d),+] } + } + } + + test_vec_subs! { test_vec_subs_i32x4, i32x4, + [-1, 0, 1, 2], + [2, 1, -1, -2], + [-3, -1, 2, 4] } + + test_vec_subs! { test_vec_subs_u32x4, u32x4, + [0, 0, 1, 2], + [2, 1, 0, 0], + [0, 0, 1, 2] } + + test_vec_subs! { test_vec_subs_i16x8, i16x8, + [-1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2], + [-3, -1, 2, 4, -3, -1, 2, 4] } + + test_vec_subs! { test_vec_subs_u16x8, u16x8, + [0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0], + [0, 0, 1, 2, 0, 0, 1, 2] } + + test_vec_subs! { test_vec_subs_i8x16, i8x16, + [-1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2], + [-3, -1, 2, 4, -3, -1, 2, 4, -3, -1, 2, 4, -3, -1, 2, 4] } + + test_vec_subs! { test_vec_subs_u8x16, u8x16, + [0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0], + [0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2] } + + macro_rules! test_vec_min { + { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a: s_t_l!($ty) = transmute($ty::new($($a),+)); + let b: s_t_l!($ty) = transmute($ty::new($($b),+)); + + let d = $ty::new($($d),+); + let r : $ty = transmute(vec_min(a, b)); + assert_eq!(d, r); + } + } + } + + test_vec_min! { test_vec_min_i32x4, i32x4, + [-1, 0, 1, 2], + [2, 1, -1, -2], + [-1, 0, -1, -2] } + + test_vec_min! { test_vec_min_u32x4, u32x4, + [0, 0, 1, 2], + [2, 1, 0, 0], + [0, 0, 0, 0] } + + test_vec_min! { test_vec_min_i16x8, i16x8, + [-1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2], + [-1, 0, -1, -2, -1, 0, -1, -2] } + + test_vec_min! { test_vec_min_u16x8, u16x8, + [0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0] } + + test_vec_min! { test_vec_min_i8x16, i8x16, + [-1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2], + [-1, 0, -1, -2, -1, 0, -1, -2, -1, 0, -1, -2, -1, 0, -1, -2] } + + test_vec_min! { test_vec_min_u8x16, u8x16, + [0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + + macro_rules! test_vec_max { + { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a: s_t_l!($ty) = transmute($ty::new($($a),+)); + let b: s_t_l!($ty) = transmute($ty::new($($b),+)); + + let d = $ty::new($($d),+); + let r : $ty = transmute(vec_max(a, b)); + assert_eq!(d, r); + } + } + } + + test_vec_max! { test_vec_max_i32x4, i32x4, + [-1, 0, 1, 2], + [2, 1, -1, -2], + [2, 1, 1, 2] } + + test_vec_max! { test_vec_max_u32x4, u32x4, + [0, 0, 1, 2], + [2, 1, 0, 0], + [2, 1, 1, 2] } + + test_vec_max! { test_vec_max_i16x8, i16x8, + [-1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2], + [2, 1, 1, 2, 2, 1, 1, 2] } + + test_vec_max! { test_vec_max_u16x8, u16x8, + [0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0], + [2, 1, 1, 2, 2, 1, 1, 2] } + + test_vec_max! { test_vec_max_i8x16, i8x16, + [-1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2], + [2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2] } + + test_vec_max! { test_vec_max_u8x16, u8x16, + [0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0], + [2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2] } + + macro_rules! test_vec_perm { + {$name:ident, + $shorttype:ident, $longtype:ident, + [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a: $longtype = transmute($shorttype::new($($a),+)); + let b: $longtype = transmute($shorttype::new($($b),+)); + let c: vector_unsigned_char = transmute(u8x16::new($($c),+)); + let d = $shorttype::new($($d),+); + + let r: $shorttype = transmute(vec_perm(a, b, c)); + assert_eq!(d, r); + } + } + } + + test_vec_perm! {test_vec_perm_u8x16, + u8x16, vector_unsigned_char, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]} + test_vec_perm! {test_vec_perm_i8x16, + i8x16, vector_signed_char, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]} + + test_vec_perm! {test_vec_perm_m8x16, + m8x16, vector_bool_char, + [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], + [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [false, false, true, true, false, false, true, true, false, false, true, true, false, false, true, true]} + test_vec_perm! {test_vec_perm_u16x8, + u16x8, vector_unsigned_short, + [0, 1, 2, 3, 4, 5, 6, 7], + [10, 11, 12, 13, 14, 15, 16, 17], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [0, 10, 1, 11, 2, 12, 3, 13]} + test_vec_perm! {test_vec_perm_i16x8, + i16x8, vector_signed_short, + [0, 1, 2, 3, 4, 5, 6, 7], + [10, 11, 12, 13, 14, 15, 16, 17], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [0, 10, 1, 11, 2, 12, 3, 13]} + test_vec_perm! {test_vec_perm_m16x8, + m16x8, vector_bool_short, + [false, false, false, false, false, false, false, false], + [true, true, true, true, true, true, true, true], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [false, true, false, true, false, true, false, true]} + + test_vec_perm! {test_vec_perm_u32x4, + u32x4, vector_unsigned_int, + [0, 1, 2, 3], + [10, 11, 12, 13], + [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], + [0, 10, 1, 11]} + test_vec_perm! {test_vec_perm_i32x4, + i32x4, vector_signed_int, + [0, 1, 2, 3], + [10, 11, 12, 13], + [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], + [0, 10, 1, 11]} + test_vec_perm! {test_vec_perm_m32x4, + m32x4, vector_bool_int, + [false, false, false, false], + [true, true, true, true], + [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], + [false, true, false, true]} + test_vec_perm! {test_vec_perm_f32x4, + f32x4, vector_float, + [0.0, 1.0, 2.0, 3.0], + [1.0, 1.1, 1.2, 1.3], + [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], + [0.0, 1.0, 1.0, 1.1]} + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_madds() { + let a: vector_signed_short = transmute(i16x8::new( + 0 * 256, + 1 * 256, + 2 * 256, + 3 * 256, + 4 * 256, + 5 * 256, + 6 * 256, + 7 * 256, + )); + let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); + let c: vector_signed_short = transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); + + let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, 21); + + assert_eq!(d, transmute(vec_madds(a, b, c))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_madd_float() { + let a: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let b: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let c: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let d = f32x4::new( + 0.1 * 0.1 + 0.1, + 0.2 * 0.2 + 0.2, + 0.3 * 0.3 + 0.3, + 0.4 * 0.4 + 0.4, + ); + + assert_eq!(d, transmute(vec_madd(a, b, c))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_nmsub_float() { + let a: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let b: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let c: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); + let d = f32x4::new( + -(0.1 * 0.1 - 0.1), + -(0.2 * 0.2 - 0.2), + -(0.3 * 0.3 - 0.3), + -(0.4 * 0.4 - 0.4), + ); + assert_eq!(d, transmute(vec_nmsub(a, b, c))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_mradds() { + let a: vector_signed_short = transmute(i16x8::new( + 0 * 256, + 1 * 256, + 2 * 256, + 3 * 256, + 4 * 256, + 5 * 256, + 6 * 256, + 7 * 256, + )); + let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); + let c: vector_signed_short = transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::MAX - 1)); + + let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, i16::MAX); + + assert_eq!(d, transmute(vec_mradds(a, b, c))); + } + + macro_rules! test_vec_mladd { + {$name:ident, $sa:ident, $la:ident, $sbc:ident, $lbc:ident, $sd:ident, + [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { + #[simd_test(enable = "altivec")] + unsafe fn $name() { + let a: $la = transmute($sa::new($($a),+)); + let b: $lbc = transmute($sbc::new($($b),+)); + let c = transmute($sbc::new($($c),+)); + let d = $sd::new($($d),+); + + assert_eq!(d, transmute(vec_mladd(a, b, c))); + } + } + } + + test_vec_mladd! { test_vec_mladd_u16x8_u16x8, u16x8, vector_unsigned_short, u16x8, vector_unsigned_short, u16x8, + [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56] + } + test_vec_mladd! { test_vec_mladd_u16x8_i16x8, u16x8, vector_unsigned_short, i16x8, vector_unsigned_short, i16x8, + [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56] + } + test_vec_mladd! { test_vec_mladd_i16x8_u16x8, i16x8, vector_signed_short, u16x8, vector_unsigned_short, i16x8, + [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56] + } + test_vec_mladd! { test_vec_mladd_i16x8_i16x8, i16x8, vector_signed_short, i16x8, vector_unsigned_short, i16x8, + [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56] + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_msum_unsigned_char() { + let a: vector_unsigned_char = + transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); + let b: vector_unsigned_char = transmute(u8x16::new( + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + )); + let c: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3)); + let d = u32x4::new( + (0 + 1 + 2 + 3) * 255 + 0, + (4 + 5 + 6 + 7) * 255 + 1, + (0 + 1 + 2 + 3) * 255 + 2, + (4 + 5 + 6 + 7) * 255 + 3, + ); + + assert_eq!(d, transmute(vec_msum(a, b, c))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_msum_signed_char() { + let a: vector_signed_char = transmute(i8x16::new( + 0, -1, 2, -3, 1, -1, 1, -1, 0, 1, 2, 3, 4, -5, -6, -7, + )); + let b: vector_unsigned_char = + transmute(i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)); + let c: vector_signed_int = transmute(u32x4::new(0, 1, 2, 3)); + let d = i32x4::new( + (0 - 1 + 2 - 3) + 0, + (0) + 1, + (0 + 1 + 2 + 3) + 2, + (4 - 5 - 6 - 7) + 3, + ); + + assert_eq!(d, transmute(vec_msum(a, b, c))); + } + #[simd_test(enable = "altivec")] + unsafe fn test_vec_msum_unsigned_short() { + let a: vector_unsigned_short = transmute(u16x8::new( + 0 * 256, + 1 * 256, + 2 * 256, + 3 * 256, + 4 * 256, + 5 * 256, + 6 * 256, + 7 * 256, + )); + let b: vector_unsigned_short = + transmute(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); + let c: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3)); + let d = u32x4::new( + (0 + 1) * 256 * 256 + 0, + (2 + 3) * 256 * 256 + 1, + (4 + 5) * 256 * 256 + 2, + (6 + 7) * 256 * 256 + 3, + ); + + assert_eq!(d, transmute(vec_msum(a, b, c))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_msum_signed_short() { + let a: vector_signed_short = transmute(i16x8::new( + 0 * 256, + -1 * 256, + 2 * 256, + -3 * 256, + 4 * 256, + -5 * 256, + 6 * 256, + -7 * 256, + )); + let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); + let c: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); + let d = i32x4::new( + (0 - 1) * 256 * 256 + 0, + (2 - 3) * 256 * 256 + 1, + (4 - 5) * 256 * 256 + 2, + (6 - 7) * 256 * 256 + 3, + ); + + assert_eq!(d, transmute(vec_msum(a, b, c))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_msums_unsigned() { + let a: vector_unsigned_short = transmute(u16x8::new( + 0 * 256, + 1 * 256, + 2 * 256, + 3 * 256, + 4 * 256, + 5 * 256, + 6 * 256, + 7 * 256, + )); + let b: vector_unsigned_short = + transmute(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); + let c: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3)); + let d = u32x4::new( + (0 + 1) * 256 * 256 + 0, + (2 + 3) * 256 * 256 + 1, + (4 + 5) * 256 * 256 + 2, + (6 + 7) * 256 * 256 + 3, + ); + + assert_eq!(d, transmute(vec_msums(a, b, c))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_msums_signed() { + let a: vector_signed_short = transmute(i16x8::new( + 0 * 256, + -1 * 256, + 2 * 256, + -3 * 256, + 4 * 256, + -5 * 256, + 6 * 256, + -7 * 256, + )); + let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); + let c: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); + let d = i32x4::new( + (0 - 1) * 256 * 256 + 0, + (2 - 3) * 256 * 256 + 1, + (4 - 5) * 256 * 256 + 2, + (6 - 7) * 256 * 256 + 3, + ); + + assert_eq!(d, transmute(vec_msums(a, b, c))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_sum2s() { + let a: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); + let b: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); + let d = i32x4::new(0, 0 + 1 + 1, 0, 2 + 3 + 3); + + assert_eq!(d, transmute(vec_sum2s(a, b))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_sum4s_unsigned_char() { + let a: vector_unsigned_char = + transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); + let b: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3)); + let d = u32x4::new( + 0 + 1 + 2 + 3 + 0, + 4 + 5 + 6 + 7 + 1, + 0 + 1 + 2 + 3 + 2, + 4 + 5 + 6 + 7 + 3, + ); + + assert_eq!(d, transmute(vec_sum4s(a, b))); + } + #[simd_test(enable = "altivec")] + unsafe fn test_vec_sum4s_signed_char() { + let a: vector_signed_char = + transmute(i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); + let b: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); + let d = i32x4::new( + 0 + 1 + 2 + 3 + 0, + 4 + 5 + 6 + 7 + 1, + 0 + 1 + 2 + 3 + 2, + 4 + 5 + 6 + 7 + 3, + ); + + assert_eq!(d, transmute(vec_sum4s(a, b))); + } + #[simd_test(enable = "altivec")] + unsafe fn test_vec_sum4s_signed_short() { + let a: vector_signed_short = transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); + let b: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); + let d = i32x4::new(0 + 1 + 0, 2 + 3 + 1, 4 + 5 + 2, 6 + 7 + 3); + + assert_eq!(d, transmute(vec_sum4s(a, b))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_mule_unsigned_char() { + let a: vector_unsigned_char = + transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); + let d = u16x8::new(0 * 0, 2 * 2, 4 * 4, 6 * 6, 0 * 0, 2 * 2, 4 * 4, 6 * 6); + + assert_eq!(d, transmute(vec_mule(a, a))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_mule_signed_char() { + let a: vector_signed_char = transmute(i8x16::new( + 0, 1, -2, 3, -4, 5, -6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + )); + let d = i16x8::new(0 * 0, 2 * 2, 4 * 4, 6 * 6, 0 * 0, 2 * 2, 4 * 4, 6 * 6); + + assert_eq!(d, transmute(vec_mule(a, a))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_mule_unsigned_short() { + let a: vector_unsigned_short = transmute(u16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); + let d = u32x4::new(0 * 0, 2 * 2, 4 * 4, 6 * 6); + + assert_eq!(d, transmute(vec_mule(a, a))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_mule_signed_short() { + let a: vector_signed_short = transmute(i16x8::new(0, 1, -2, 3, -4, 5, -6, 7)); + let d = i32x4::new(0 * 0, 2 * 2, 4 * 4, 6 * 6); + + assert_eq!(d, transmute(vec_mule(a, a))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_mulo_unsigned_char() { + let a: vector_unsigned_char = + transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); + let d = u16x8::new(1 * 1, 3 * 3, 5 * 5, 7 * 7, 1 * 1, 3 * 3, 5 * 5, 7 * 7); + + assert_eq!(d, transmute(vec_mulo(a, a))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_mulo_signed_char() { + let a: vector_signed_char = transmute(i8x16::new( + 0, 1, -2, 3, -4, 5, -6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + )); + let d = i16x8::new(1 * 1, 3 * 3, 5 * 5, 7 * 7, 1 * 1, 3 * 3, 5 * 5, 7 * 7); + + assert_eq!(d, transmute(vec_mulo(a, a))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_mulo_unsigned_short() { + let a: vector_unsigned_short = transmute(u16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); + let d = u32x4::new(1 * 1, 3 * 3, 5 * 5, 7 * 7); + + assert_eq!(d, transmute(vec_mulo(a, a))); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_mulo_signed_short() { + let a: vector_signed_short = transmute(i16x8::new(0, 1, -2, 3, -4, 5, -6, 7)); + let d = i32x4::new(1 * 1, 3 * 3, 5 * 5, 7 * 7); + + assert_eq!(d, transmute(vec_mulo(a, a))); + } + + #[simd_test(enable = "altivec")] + unsafe fn vec_add_i32x4_i32x4() { + let x = i32x4::new(1, 2, 3, 4); + let y = i32x4::new(4, 3, 2, 1); + let x: vector_signed_int = transmute(x); + let y: vector_signed_int = transmute(y); + let z = vec_add(x, y); + assert_eq!(i32x4::splat(5), transmute(z)); + } + + #[simd_test(enable = "altivec")] + unsafe fn vec_ctf_u32() { + let v: vector_unsigned_int = transmute(u32x4::new(u32::MIN, u32::MAX, u32::MAX, 42)); + let v2 = vec_ctf::<1, _>(v); + let r2: vector_float = transmute(f32x4::new(0.0, 2147483600.0, 2147483600.0, 21.0)); + let v4 = vec_ctf::<2, _>(v); + let r4: vector_float = transmute(f32x4::new(0.0, 1073741800.0, 1073741800.0, 10.5)); + let v8 = vec_ctf::<3, _>(v); + let r8: vector_float = transmute(f32x4::new(0.0, 536870900.0, 536870900.0, 5.25)); + + let check = |a, b| { + let r = transmute(vec_cmple(vec_abs(vec_sub(a, b)), vec_splats(f32::EPSILON))); + let e = m32x4::new(true, true, true, true); + assert_eq!(e, r); + }; + + check(v2, r2); + check(v4, r4); + check(v8, r8); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_ctu() { + let v = u32x4::new(u32::MIN, u32::MAX, u32::MAX, 42); + let v2: u32x4 = transmute(vec_ctu::<1>(transmute(f32x4::new( + 0.0, + 2147483600.0, + 2147483600.0, + 21.0, + )))); + let v4: u32x4 = transmute(vec_ctu::<2>(transmute(f32x4::new( + 0.0, + 1073741800.0, + 1073741800.0, + 10.5, + )))); + let v8: u32x4 = transmute(vec_ctu::<3>(transmute(f32x4::new( + 0.0, + 536870900.0, + 536870900.0, + 5.25, + )))); + + assert_eq!(v2, v); + assert_eq!(v4, v); + assert_eq!(v8, v); + } + + #[simd_test(enable = "altivec")] + unsafe fn vec_ctf_i32() { + let v: vector_signed_int = transmute(i32x4::new(i32::MIN, i32::MAX, i32::MAX - 42, 42)); + let v2 = vec_ctf::<1, _>(v); + let r2: vector_float = + transmute(f32x4::new(-1073741800.0, 1073741800.0, 1073741800.0, 21.0)); + let v4 = vec_ctf::<2, _>(v); + let r4: vector_float = transmute(f32x4::new(-536870900.0, 536870900.0, 536870900.0, 10.5)); + let v8 = vec_ctf::<3, _>(v); + let r8: vector_float = transmute(f32x4::new(-268435460.0, 268435460.0, 268435460.0, 5.25)); + + let check = |a, b| { + let r = transmute(vec_cmple(vec_abs(vec_sub(a, b)), vec_splats(f32::EPSILON))); + println!("{:?} {:?}", a, b); + let e = m32x4::new(true, true, true, true); + assert_eq!(e, r); + }; + + check(v2, r2); + check(v4, r4); + check(v8, r8); + } + + #[simd_test(enable = "altivec")] + unsafe fn test_vec_cts() { + let v = i32x4::new(i32::MIN, i32::MAX, i32::MAX, 42); + let v2: i32x4 = transmute(vec_cts::<1>(transmute(f32x4::new( + -1073741800.0, + 1073741800.0, + 1073741800.0, + 21.0, + )))); + let v4: i32x4 = transmute(vec_cts::<2>(transmute(f32x4::new( + -536870900.0, + 536870900.0, + 536870900.0, + 10.5, + )))); + let v8: i32x4 = transmute(vec_cts::<3>(transmute(f32x4::new( + -268435460.0, + 268435460.0, + 268435460.0, + 5.25, + )))); + + assert_eq!(v2, v); + assert_eq!(v4, v); + assert_eq!(v8, v); + } + + test_vec_2! { test_vec_rl, vec_rl, u32x4, + [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678], + [4, 8, 12, 68], + [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781] + } +} diff --git a/library/stdarch/crates/core_arch/src/powerpc/macros.rs b/library/stdarch/crates/core_arch/src/powerpc/macros.rs new file mode 100644 index 000000000000..af47494e8fb4 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/powerpc/macros.rs @@ -0,0 +1,315 @@ +macro_rules! test_impl { + ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr:ident]) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $fun ($($v : $ty),*) -> $r { + $call ($($v),*) + } + }; + ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr_altivec:ident / $instr_vsx:ident]) => { + test_impl! { $fun ($($v : $ty),*) -> $r [$call, $instr_altivec / $instr_vsx / $instr_vsx] } + }; + ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr_altivec:ident / $instr_vsx:ident / $instr_pwr9:ident]) => { + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr(all(test, not(target_feature="vsx"), not(target_feature = "power9-vector")), assert_instr($instr_altivec))] + #[cfg_attr(all(test, target_feature="vsx", not(target_feature = "power9-vector")), assert_instr($instr_vsx))] + #[cfg_attr(all(test, not(target_feature="vsx"), target_feature = "power9-vector"), assert_instr($instr_pwr9))] + pub unsafe fn $fun ($($v : $ty),*) -> $r { + $call ($($v),*) + } + } +} + +#[allow(unknown_lints, unused_macro_rules)] +macro_rules! impl_vec_trait { + ([$Trait:ident $m:ident] $fun:ident ($a:ty)) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl $Trait for $a { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn $m(self) -> Self { + $fun(transmute(self)) + } + } + }; + ([$Trait:ident $m:ident] $fun:ident ($a:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl $Trait for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn $m(self) -> Self::Result { + $fun(transmute(self)) + } + } + }; + ([$Trait:ident $m:ident]+ $fun:ident ($a:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl $Trait for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn $m(self) -> Self::Result { + transmute($fun(transmute(self))) + } + } + }; + ([$Trait:ident $m:ident] 1 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident, $sf: ident)) => { + impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m] $sf (vector_float) -> vector_float } + }; + ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl $Trait<$b> for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn $m(self, b: $b) -> Self::Result { + $fun(transmute(self), transmute(b)) + } + } + }; + ([$Trait:ident $m:ident]+ $fun:ident ($a:ty, $b:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl $Trait<$b> for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn $m(self, b: $b) -> Self::Result { + transmute($fun(transmute(self), transmute(b))) + } + } + }; + ([$Trait:ident $m:ident] $fun:ident ($a:ty, ~$b:ty) -> $r:ty) => { + impl_vec_trait!{ [$Trait $m] $fun ($a, $a) -> $r } + impl_vec_trait!{ [$Trait $m] $fun ($a, $b) -> $r } + impl_vec_trait!{ [$Trait $m] $fun ($b, $a) -> $r } + }; + ([$Trait:ident $m:ident] ~($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => { + impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, ~vector_bool_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, ~vector_bool_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, ~vector_bool_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, ~vector_bool_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, ~vector_bool_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, ~vector_bool_int) -> vector_signed_int } + }; + ([$Trait:ident $m:ident] ~($fn:ident)) => { + impl_vec_trait!{ [$Trait $m] ~($fn, $fn, $fn, $fn, $fn, $fn) } + }; + ([$Trait:ident $m:ident] 2 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => { + impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, vector_signed_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, vector_signed_int) -> vector_signed_int } + }; + ([$Trait:ident $m:ident] 2 ($fn:ident)) => { + impl_vec_trait!{ [$Trait $m] ($fn, $fn, $fn, $fn, $fn, $fn) } + }; + ([$Trait:ident $m:ident]+ 2b ($b:ident, $h:ident, $w:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $b (vector_bool_char, vector_bool_char) -> vector_bool_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $h (vector_bool_short, vector_bool_short) -> vector_bool_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_signed_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $w (vector_bool_int, vector_bool_int) -> vector_bool_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_signed_int) -> vector_signed_int } + }; + ([$Trait:ident $m:ident]+ 2b ($fn:ident)) => { + impl_vec_trait!{ [$Trait $m]+ 2b ($fn, $fn, $fn) } + }; +} + +macro_rules! s_t_l { + (i32x4) => { + vector_signed_int + }; + (i16x8) => { + vector_signed_short + }; + (i8x16) => { + vector_signed_char + }; + + (u32x4) => { + vector_unsigned_int + }; + (u16x8) => { + vector_unsigned_short + }; + (u8x16) => { + vector_unsigned_char + }; + + (f32x4) => { + vector_float + }; +} + +macro_rules! t_t_l { + (i32) => { + vector_signed_int + }; + (i16) => { + vector_signed_short + }; + (i8) => { + vector_signed_char + }; + + (u32) => { + vector_unsigned_int + }; + (u16) => { + vector_unsigned_short + }; + (u8) => { + vector_unsigned_char + }; + + (f32) => { + vector_float + }; +} + +macro_rules! t_t_s { + (i32) => { + i32x4 + }; + (i16) => { + i16x8 + }; + (i8) => { + i8x16 + }; + + (u32) => { + u32x4 + }; + (u16) => { + u16x8 + }; + (u8) => { + u8x16 + }; + + (f32) => { + f32x4 + }; +} + +macro_rules! t_u { + (vector_bool_char) => { + vector_unsigned_char + }; + (vector_bool_short) => { + vector_unsigned_short + }; + (vector_bool_int) => { + vector_unsigned_int + }; + (vector_unsigned_char) => { + vector_unsigned_char + }; + (vector_unsigned_short) => { + vector_unsigned_short + }; + (vector_unsigned_int) => { + vector_unsigned_int + }; + (vector_signed_char) => { + vector_unsigned_char + }; + (vector_signed_short) => { + vector_unsigned_short + }; + (vector_signed_int) => { + vector_unsigned_int + }; + (vector_float) => { + vector_unsigned_int + }; +} + +macro_rules! t_b { + (vector_bool_char) => { + vector_bool_char + }; + (vector_bool_short) => { + vector_bool_short + }; + (vector_bool_int) => { + vector_bool_int + }; + (vector_signed_char) => { + vector_bool_char + }; + (vector_signed_short) => { + vector_bool_short + }; + (vector_signed_int) => { + vector_bool_int + }; + (vector_unsigned_char) => { + vector_bool_char + }; + (vector_unsigned_short) => { + vector_bool_short + }; + (vector_unsigned_int) => { + vector_bool_int + }; + (vector_float) => { + vector_bool_int + }; +} + +macro_rules! impl_from { + ($s: ident) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl From<$s> for s_t_l!($s) { + fn from (v: $s) -> Self { + unsafe { + transmute(v) + } + } + } + }; + ($($s: ident),*) => { + $( + impl_from! { $s } + )* + }; +} + +macro_rules! impl_neg { + ($s: ident : $zero: expr) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl crate::ops::Neg for s_t_l!($s) { + type Output = s_t_l!($s); + fn neg(self) -> Self::Output { + unsafe { simd_neg(self) } + } + } + }; +} + +pub(crate) use impl_from; +pub(crate) use impl_neg; +pub(crate) use impl_vec_trait; +pub(crate) use s_t_l; +pub(crate) use t_b; +pub(crate) use t_t_l; +pub(crate) use t_t_s; +pub(crate) use t_u; +pub(crate) use test_impl; diff --git a/library/stdarch/crates/core_arch/src/powerpc/mod.rs b/library/stdarch/crates/core_arch/src/powerpc/mod.rs new file mode 100644 index 000000000000..53227215d946 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/powerpc/mod.rs @@ -0,0 +1,22 @@ +//! PowerPC intrinsics + +pub(crate) mod macros; + +mod altivec; +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub use self::altivec::*; + +mod vsx; +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub use self::vsx::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Generates the trap instruction `TRAP` +#[cfg_attr(test, assert_instr(trap))] +#[inline] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn trap() -> ! { + crate::intrinsics::abort() +} diff --git a/library/stdarch/crates/core_arch/src/powerpc/vsx.rs b/library/stdarch/crates/core_arch/src/powerpc/vsx.rs new file mode 100644 index 000000000000..ca9fcaabe8b2 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/powerpc/vsx.rs @@ -0,0 +1,240 @@ +//! PowerPC Vector Scalar eXtensions (VSX) intrinsics. +//! +//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA +//! NVlink)] and [POWER ISA v3.0B (for POWER9)]. +//! +//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u +//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv + +#![allow(non_camel_case_types)] + +use crate::core_arch::powerpc::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +use crate::mem::transmute; + +types! { + #![unstable(feature = "stdarch_powerpc", issue = "111145")] + + // pub struct vector_Float16 = f16x8; + /// PowerPC-specific 128-bit wide vector of two packed `i64` + pub struct vector_signed_long(2 x i64); + /// PowerPC-specific 128-bit wide vector of two packed `u64` + pub struct vector_unsigned_long(2 x u64); + /// PowerPC-specific 128-bit wide vector mask of two `i64` + pub struct vector_bool_long(2 x i64); + /// PowerPC-specific 128-bit wide vector of two packed `f64` + pub struct vector_double(2 x f64); + // pub struct vector_signed_long_long = vector_signed_long; + // pub struct vector_unsigned_long_long = vector_unsigned_long; + // pub struct vector_bool_long_long = vector_bool_long; + // pub struct vector_signed___int128 = i128x1; + // pub struct vector_unsigned___int128 = i128x1; +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.ppc.altivec.vperm"] + fn vperm( + a: vector_signed_int, + b: vector_signed_int, + c: vector_unsigned_char, + ) -> vector_signed_int; +} + +mod sealed { + use super::*; + use crate::core_arch::simd::*; + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPermDI { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self; + } + + // xxpermdi has an big-endian bias and extended mnemonics + #[inline] + #[target_feature(enable = "vsx")] + #[cfg_attr(all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0))] + #[cfg_attr(all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0))] + unsafe fn xxpermdi(a: vector_signed_long, b: vector_signed_long, dm: u8) -> vector_signed_long { + let a: i64x2 = transmute(a); + let b: i64x2 = transmute(b); + let r: i64x2 = match dm & 0b11 { + 0 => simd_shuffle!(a, b, [0b00, 0b10]), + 1 => simd_shuffle!(a, b, [0b01, 0b10]), + 2 => simd_shuffle!(a, b, [0b00, 0b11]), + _ => simd_shuffle!(a, b, [0b01, 0b11]), + }; + transmute(r) + } + + macro_rules! vec_xxpermdi { + {$impl: ident} => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorPermDI for $impl { + #[inline] + #[target_feature(enable = "vsx")] + unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self { + transmute(xxpermdi(transmute(self), transmute(b), dm)) + } + } + } + } + + vec_xxpermdi! { vector_unsigned_long } + vec_xxpermdi! { vector_signed_long } + vec_xxpermdi! { vector_bool_long } + vec_xxpermdi! { vector_double } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMergeEo { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + unsafe fn vec_mergee(self, b: Self) -> Self; + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + unsafe fn vec_mergeo(self, b: Self) -> Self; + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr( + all(test, target_endian = "little", target_feature = "power8-vector"), + assert_instr(vmrgow) + )] + #[cfg_attr( + all(test, target_endian = "big", target_feature = "power8-vector"), + assert_instr(vmrgew) + )] + unsafe fn mergee(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + let p = transmute(u8x16::new( + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, + 0x1A, 0x1B, + )); + vec_perm(a, b, p) + } + + #[inline] + #[target_feature(enable = "altivec")] + #[cfg_attr( + all(test, target_endian = "little", target_feature = "power8-vector"), + assert_instr(vmrgew) + )] + #[cfg_attr( + all(test, target_endian = "big", target_feature = "power8-vector"), + assert_instr(vmrgow) + )] + unsafe fn mergeo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + let p = transmute(u8x16::new( + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, + 0x1E, 0x1F, + )); + vec_perm(a, b, p) + } + + macro_rules! vec_mergeeo { + { $impl: ident, $even: ident, $odd: ident } => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorMergeEo for $impl { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mergee(self, b: Self) -> Self { + transmute(mergee(transmute(self), transmute(b))) + } + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_mergeo(self, b: Self) -> Self { + transmute(mergeo(transmute(self), transmute(b))) + } + } + } + } + + vec_mergeeo! { vector_signed_int, mergee, mergeo } + vec_mergeeo! { vector_unsigned_int, mergee, mergeo } + vec_mergeeo! { vector_bool_int, mergee, mergeo } + vec_mergeeo! { vector_float, mergee, mergeo } +} + +/// Vector permute. +#[inline] +#[target_feature(enable = "vsx")] +//#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_xxpermdi(a: T, b: T) -> T +where + T: sealed::VectorPermDI, +{ + static_assert_uimm_bits!(DM, 2); + a.vec_xxpermdi(b, DM as u8) +} + +/// Vector Merge Even +/// +/// ## Purpose +/// Merges the even-numbered values from two vectors. +/// +/// ## Result value +/// The even-numbered elements of a are stored into the even-numbered elements of r. +/// The even-numbered elements of b are stored into the odd-numbered elements of r. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_mergee(a: T, b: T) -> T +where + T: sealed::VectorMergeEo, +{ + a.vec_mergee(b) +} + +/// Vector Merge Odd +/// +/// ## Purpose +/// Merges the odd-numbered values from two vectors. +/// +/// ## Result value +/// The odd-numbered elements of a are stored into the even-numbered elements of r. +/// The odd-numbered elements of b are stored into the odd-numbered elements of r. +#[inline] +#[target_feature(enable = "altivec")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_mergeo(a: T, b: T) -> T +where + T: sealed::VectorMergeEo, +{ + a.vec_mergeo(b) +} + +#[cfg(test)] +mod tests { + #[cfg(target_arch = "powerpc")] + use crate::core_arch::arch::powerpc::*; + + #[cfg(target_arch = "powerpc64")] + use crate::core_arch::arch::powerpc64::*; + + use crate::core_arch::simd::*; + use crate::mem::transmute; + use stdarch_test::simd_test; + + macro_rules! test_vec_xxpermdi { + {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { + #[simd_test(enable = "vsx")] + unsafe fn $name() { + let a: $longtype = transmute($shorttype::new($($a),+, $($b),+)); + let b = transmute($shorttype::new($($c),+, $($d),+)); + + assert_eq!($shorttype::new($($a),+, $($c),+), transmute(vec_xxpermdi::<_, 0>(a, b))); + assert_eq!($shorttype::new($($b),+, $($c),+), transmute(vec_xxpermdi::<_, 1>(a, b))); + assert_eq!($shorttype::new($($a),+, $($d),+), transmute(vec_xxpermdi::<_, 2>(a, b))); + assert_eq!($shorttype::new($($b),+, $($d),+), transmute(vec_xxpermdi::<_, 3>(a, b))); + } + } + } + + test_vec_xxpermdi! {test_vec_xxpermdi_u64x2, u64x2, vector_unsigned_long, [0], [1], [2], [3]} + test_vec_xxpermdi! {test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]} + test_vec_xxpermdi! {test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]} + test_vec_xxpermdi! {test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]} +} diff --git a/library/stdarch/crates/core_arch/src/powerpc64/mod.rs b/library/stdarch/crates/core_arch/src/powerpc64/mod.rs new file mode 100644 index 000000000000..e361c55a9071 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/powerpc64/mod.rs @@ -0,0 +1,14 @@ +//! PowerPC 64 +//! +//! The reference is the [64-Bit ELF V2 ABI Specification - Power +//! Architecture]. +//! +//! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf + +mod vsx; + +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub use crate::core_arch::powerpc::*; + +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub use self::vsx::*; diff --git a/library/stdarch/crates/core_arch/src/powerpc64/vsx.rs b/library/stdarch/crates/core_arch/src/powerpc64/vsx.rs new file mode 100644 index 000000000000..7b42be8653c5 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/powerpc64/vsx.rs @@ -0,0 +1,156 @@ +//! PowerPC Vector Scalar eXtensions (VSX) intrinsics. +//! +//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA +//! NVlink)] and [POWER ISA v3.0B (for POWER9)]. +//! +//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u +//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv + +#![allow(non_camel_case_types)] + +use crate::core_arch::powerpc::macros::*; +use crate::core_arch::powerpc::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +use crate::mem::transmute; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.ppc.vsx.lxvl"] + fn lxvl(a: *const u8, l: usize) -> vector_signed_int; + + #[link_name = "llvm.ppc.vsx.stxvl"] + fn stxvl(v: vector_signed_int, a: *mut u8, l: usize); +} + +mod sealed { + use super::*; + + #[inline] + #[target_feature(enable = "power9-vector")] + #[cfg_attr(test, assert_instr(lxvl))] + unsafe fn vec_lxvl(p: *const u8, l: usize) -> vector_signed_int { + lxvl(p, l << 56) + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorXloads { + type Result; + unsafe fn vec_xl_len(self, l: usize) -> Self::Result; + } + + macro_rules! impl_vsx_loads { + ($ty:ident) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorXloads for *const $ty { + type Result = t_t_l!($ty); + #[inline] + #[target_feature(enable = "power9-vector")] + unsafe fn vec_xl_len(self, l: usize) -> Self::Result { + transmute(vec_lxvl(self as *const u8, l)) + } + } + }; + } + + impl_vsx_loads! { i8 } + impl_vsx_loads! { u8 } + impl_vsx_loads! { i16 } + impl_vsx_loads! { u16 } + impl_vsx_loads! { i32 } + impl_vsx_loads! { u32 } + impl_vsx_loads! { f32 } + + #[inline] + #[target_feature(enable = "power9-vector")] + #[cfg_attr(test, assert_instr(stxvl))] + unsafe fn vec_stxvl(v: vector_signed_int, a: *mut u8, l: usize) { + stxvl(v, a, l << 56); + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorXstores { + type Out; + unsafe fn vec_xst_len(self, p: Self::Out, l: usize); + } + + macro_rules! impl_stores { + ($ty:ident) => { + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorXstores for t_t_l!($ty) { + type Out = *mut $ty; + #[inline] + #[target_feature(enable = "power9-vector")] + unsafe fn vec_xst_len(self, a: Self::Out, l: usize) { + stxvl(transmute(self), a as *mut u8, l) + } + } + }; + } + + impl_stores! { i8 } + impl_stores! { u8 } + impl_stores! { i16 } + impl_stores! { u16 } + impl_stores! { i32 } + impl_stores! { u32 } + impl_stores! { f32 } +} + +/// Vector Load with Length +/// +/// ## Purpose +/// Loads a vector of a specified byte length. +/// +/// ## Result value +/// Loads the number of bytes specified by b from the address specified in a. +/// Initializes elements in order from the byte stream (as defined by the endianness of the +/// target). Any bytes of elements that cannot be initialized from the number of loaded bytes have +/// a zero value. +/// +/// Between 0 and 16 bytes, inclusive, will be loaded. The length is specified by the +/// least-significant byte of b, as min (b mod 256, 16). The behavior is undefined if the length +/// argument is outside of the range 0–255, or if it is not a multiple of the vector element size. +/// +/// ## Notes +/// vec_xl_len should not be used to load from cache-inhibited memory. +#[inline] +#[target_feature(enable = "power9-vector")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_xl_len(p: T, len: usize) -> ::Result +where + T: sealed::VectorXloads, +{ + p.vec_xl_len(len) +} + +/// Vector Store with Length +/// +/// ## Purpose +/// +/// Stores a vector of a specified byte length. +/// +/// ## Operation +/// +/// Stores the number of bytes specified by c of the vector a to the address specified +/// in b. The bytes are obtained starting from the lowest-numbered byte of the lowest-numbered +/// element (as defined by the endianness of the target). All bytes of an element are accessed +/// before proceeding to the next higher element. +/// +/// Between 0 and 16 bytes, inclusive, will be stored. The length is specified by the +/// least-significant byte of c, as min (c mod 256, 16). The behavior is undefined if the length +/// argument is outside of the range 0–255, or if it is not a multiple of the vector element size. +/// +/// ## Notes +/// vec_xst_len should not be used to store to cache-inhibited memory. +#[inline] +#[target_feature(enable = "power9-vector")] +#[unstable(feature = "stdarch_powerpc", issue = "111145")] +pub unsafe fn vec_xst_len(v: T, a: ::Out, l: usize) +where + T: sealed::VectorXstores, +{ + v.vec_xst_len(a, l) +} diff --git a/library/stdarch/crates/core_arch/src/riscv32/mod.rs b/library/stdarch/crates/core_arch/src/riscv32/mod.rs new file mode 100644 index 000000000000..7ff871227b50 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv32/mod.rs @@ -0,0 +1,6 @@ +//! RISC-V RV32 specific intrinsics + +mod zk; + +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub use zk::*; diff --git a/library/stdarch/crates/core_arch/src/riscv32/zk.rs b/library/stdarch/crates/core_arch/src/riscv32/zk.rs new file mode 100644 index 000000000000..054bcfe955b7 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv32/zk.rs @@ -0,0 +1,331 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +unsafe extern "unadjusted" { + #[link_name = "llvm.riscv.aes32esi"] + fn _aes32esi(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.aes32esmi"] + fn _aes32esmi(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.aes32dsi"] + fn _aes32dsi(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.aes32dsmi"] + fn _aes32dsmi(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.zip.i32"] + fn _zip(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.unzip.i32"] + fn _unzip(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sig0h"] + fn _sha512sig0h(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sig0l"] + fn _sha512sig0l(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sig1h"] + fn _sha512sig1h(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sig1l"] + fn _sha512sig1l(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sum0r"] + fn _sha512sum0r(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sum1r"] + fn _sha512sum1r(rs1: i32, rs2: i32) -> i32; +} + +/// AES final round encryption instruction for RV32. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// forward AES SBox operation, before XOR’ing the result with rs1. This instruction must +/// always be implemented such that its execution latency does not depend on the data being +/// operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.3 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +#[target_feature(enable = "zkne")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(aes32esi, BS = 0))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes32esi(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + unsafe { _aes32esi(rs1 as i32, rs2 as i32, BS as i32) as u32 } +} + +/// AES middle round encryption instruction for RV32 with. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// forward AES SBox operation, and a partial forward MixColumn, before XOR’ing the result with +/// rs1. This instruction must always be implemented such that its execution latency does not +/// depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.4 +/// +/// # Note +/// +/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +#[target_feature(enable = "zkne")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(aes32esmi, BS = 0))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes32esmi(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + unsafe { _aes32esmi(rs1 as i32, rs2 as i32, BS as i32) as u32 } +} + +/// AES final round decryption instruction for RV32. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// inverse AES SBox operation, and XOR’s the result with rs1. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.1 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +#[target_feature(enable = "zknd")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(aes32dsi, BS = 0))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes32dsi(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + unsafe { _aes32dsi(rs1 as i32, rs2 as i32, BS as i32) as u32 } +} + +/// AES middle round decryption instruction for RV32. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// inverse AES SBox operation, and a partial inverse MixColumn, before XOR’ing the result with +/// rs1. This instruction must always be implemented such that its execution latency does not +/// depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.2 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +#[target_feature(enable = "zknd")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(aes32dsmi, BS = 0))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes32dsmi(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + unsafe { _aes32dsmi(rs1 as i32, rs2 as i32, BS as i32) as u32 } +} + +/// Place upper/lower halves of the source register into odd/even bits of the destination +/// respectivley. +/// +/// This instruction places bits in the low half of the source register into the even bit +/// positions of the destination, and bits in the high half of the source register into the odd +/// bit positions of the destination. It is the inverse of the unzip instruction. This +/// instruction is available only on RV32. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.49 +#[target_feature(enable = "zbkb")] +// See #1464 +// #[cfg_attr(test, assert_instr(zip))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn zip(rs: u32) -> u32 { + unsafe { _zip(rs as i32) as u32 } +} + +/// Place odd and even bits of the source word into upper/lower halves of the destination. +/// +/// This instruction places the even bits of the source register into the low half of the +/// destination, and the odd bits of the source into the high bits of the destination. It is +/// the inverse of the zip instruction. This instruction is available only on RV32. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.45 +#[target_feature(enable = "zbkb")] +#[cfg_attr(test, assert_instr(unzip))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn unzip(rs: u32) -> u32 { + unsafe { _unzip(rs as i32) as u32 } +} + +/// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash +/// function \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig0l instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.31 +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sig0h))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sig0h(rs1: u32, rs2: u32) -> u32 { + unsafe { _sha512sig0h(rs1 as i32, rs2 as i32) as u32 } +} + +/// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function +/// \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig0h instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.32 +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sig0l))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sig0l(rs1: u32, rs2: u32) -> u32 { + unsafe { _sha512sig0l(rs1 as i32, rs2 as i32) as u32 } +} + +/// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash +/// function \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig1l instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.33 +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sig1h))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sig1h(rs1: u32, rs2: u32) -> u32 { + unsafe { _sha512sig1h(rs1 as i32, rs2 as i32) as u32 } +} + +/// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function +/// \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig1h instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.34 +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig1l))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sig1l(rs1: u32, rs2: u32) -> u32 { + unsafe { _sha512sig1l(rs1 as i32, rs2 as i32) as u32 } +} + +/// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section +/// 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sum0 transform of the +/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and +/// output is represented by two 32-bit registers. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.35 +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sum0r))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sum0r(rs1: u32, rs2: u32) -> u32 { + unsafe { _sha512sum0r(rs1 as i32, rs2 as i32) as u32 } +} + +/// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section +/// 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sum1 transform of the +/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and +/// output is represented by two 32-bit registers. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.36 +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sum1r))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sum1r(rs1: u32, rs2: u32) -> u32 { + unsafe { _sha512sum1r(rs1 as i32, rs2 as i32) as u32 } +} diff --git a/library/stdarch/crates/core_arch/src/riscv64/mod.rs b/library/stdarch/crates/core_arch/src/riscv64/mod.rs new file mode 100644 index 000000000000..0e860f6f2ad2 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv64/mod.rs @@ -0,0 +1,57 @@ +//! RISC-V RV64 specific intrinsics +use crate::arch::asm; + +mod zk; + +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub use zk::*; + +/// Loads virtual machine memory by unsigned word integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This operation is not available under RV32 base instruction set. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.WU` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hlv_wu(src: *const u32) -> u32 { + let value: u32; + asm!(".insn i 0x73, 0x4, {}, {}, 0x681", out(reg) value, in(reg) src, options(readonly, nostack)); + value +} + +/// Loads virtual machine memory by double integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This operation is not available under RV32 base instruction set. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.D` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hlv_d(src: *const i64) -> i64 { + let value: i64; + asm!(".insn i 0x73, 0x4, {}, {}, 0x6C0", out(reg) value, in(reg) src, options(readonly, nostack)); + value +} + +/// Stores virtual machine memory by double integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.D` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hsv_d(dst: *mut i64, src: i64) { + asm!(".insn r 0x73, 0x4, 0x37, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack)); +} diff --git a/library/stdarch/crates/core_arch/src/riscv64/zk.rs b/library/stdarch/crates/core_arch/src/riscv64/zk.rs new file mode 100644 index 000000000000..c6af750bbc57 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv64/zk.rs @@ -0,0 +1,265 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +unsafe extern "unadjusted" { + #[link_name = "llvm.riscv.aes64es"] + fn _aes64es(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.aes64esm"] + fn _aes64esm(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.aes64ds"] + fn _aes64ds(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.aes64dsm"] + fn _aes64dsm(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.aes64ks1i"] + fn _aes64ks1i(rs1: i64, rnum: i32) -> i64; + + #[link_name = "llvm.riscv.aes64ks2"] + fn _aes64ks2(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.aes64im"] + fn _aes64im(rs1: i64) -> i64; + + #[link_name = "llvm.riscv.sha512sig0"] + fn _sha512sig0(rs1: i64) -> i64; + + #[link_name = "llvm.riscv.sha512sig1"] + fn _sha512sig1(rs1: i64) -> i64; + + #[link_name = "llvm.riscv.sha512sum0"] + fn _sha512sum0(rs1: i64) -> i64; + + #[link_name = "llvm.riscv.sha512sum1"] + fn _sha512sum1(rs1: i64) -> i64; +} + +/// AES final round encryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the ShiftRows and SubBytes steps. This instruction must +/// always be implemented such that its execution latency does not depend on the data being +/// operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.7 +#[target_feature(enable = "zkne")] +#[cfg_attr(test, assert_instr(aes64es))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes64es(rs1: u64, rs2: u64) -> u64 { + unsafe { _aes64es(rs1 as i64, rs2 as i64) as u64 } +} + +/// AES middle round encryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the ShiftRows, SubBytes and MixColumns steps. This +/// instruction must always be implemented such that its execution latency does not depend on +/// the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.8 +#[target_feature(enable = "zkne")] +#[cfg_attr(test, assert_instr(aes64esm))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes64esm(rs1: u64, rs2: u64) -> u64 { + unsafe { _aes64esm(rs1 as i64, rs2 as i64) as u64 } +} + +/// AES final round decryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the Inverse ShiftRows and SubBytes steps. This +/// instruction must always be implemented such that its execution latency does not depend on +/// the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.5 +#[target_feature(enable = "zknd")] +#[cfg_attr(test, assert_instr(aes64ds))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes64ds(rs1: u64, rs2: u64) -> u64 { + unsafe { _aes64ds(rs1 as i64, rs2 as i64) as u64 } +} + +/// AES middle round decryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the Inverse ShiftRows, SubBytes and MixColumns steps. +/// This instruction must always be implemented such that its execution latency does not depend +/// on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.6 +#[target_feature(enable = "zknd")] +#[cfg_attr(test, assert_instr(aes64dsm))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes64dsm(rs1: u64, rs2: u64) -> u64 { + unsafe { _aes64dsm(rs1 as i64, rs2 as i64) as u64 } +} + +/// This instruction implements part of the KeySchedule operation for the AES Block cipher +/// involving the SBox operation. +/// +/// This instruction implements the rotation, SubBytes and Round Constant addition steps of the +/// AES block cipher Key Schedule. This instruction must always be implemented such that its +/// execution latency does not depend on the data being operated on. Note that rnum must be in +/// the range 0x0..0xA. The values 0xB..0xF are reserved. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.10 +/// +/// # Note +/// +/// The `RNUM` parameter is expected to be a constant value inside the range of `0..=10`. +#[target_feature(enable = "zkne", enable = "zknd")] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes64ks1i(rs1: u64) -> u64 { + static_assert!(RNUM <= 10); + + unsafe { _aes64ks1i(rs1 as i64, RNUM as i32) as u64 } +} + +/// This instruction implements part of the KeySchedule operation for the AES Block cipher. +/// +/// This instruction implements the additional XOR’ing of key words as part of the AES block +/// cipher Key Schedule. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.11 +#[target_feature(enable = "zkne", enable = "zknd")] +#[cfg_attr(test, assert_instr(aes64ks2))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes64ks2(rs1: u64, rs2: u64) -> u64 { + unsafe { _aes64ks2(rs1 as i64, rs2 as i64) as u64 } +} + +/// This instruction accelerates the inverse MixColumns step of the AES Block Cipher, and is used to aid creation of +/// the decryption KeySchedule. +/// +/// The instruction applies the inverse MixColumns transformation to two columns of the state array, packed +/// into a single 64-bit register. It is used to create the inverse cipher KeySchedule, according to the equivalent +/// inverse cipher construction in (Page 23, Section 5.3.5). This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.9 +#[target_feature(enable = "zkne", enable = "zknd")] +#[cfg_attr(test, assert_instr(aes64im))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn aes64im(rs1: u64) -> u64 { + unsafe { _aes64im(rs1 as i64) as u64 } +} + +/// Implements the Sigma0 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sigma0 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.37 +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig0))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sig0(rs1: u64) -> u64 { + unsafe { _sha512sig0(rs1 as i64) as u64 } +} + +/// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sigma1 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.38 +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig1))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sig1(rs1: u64) -> u64 { + unsafe { _sha512sig1(rs1 as i64) as u64 } +} + +/// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sum0 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.39 +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sum0))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sum0(rs1: u64) -> u64 { + unsafe { _sha512sum0(rs1 as i64) as u64 } +} + +/// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sum1 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.40 +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sum1))] +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sha512sum1(rs1: u64) -> u64 { + unsafe { _sha512sum1(rs1 as i64) as u64 } +} diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs new file mode 100644 index 000000000000..3ce24324de2e --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs @@ -0,0 +1,579 @@ +//! Shared RISC-V intrinsics +//! +//! ## Missing floating-point register instructions +//! +//! We are deliberately *not* providing instructions that could change the floating-point rounding +//! mode or exception behavior or read the accrued exceptions flags: `frcsr`, `fscsr`, `fsrm`, +//! `frflags`, `fsflags`. +//! +//! Rust makes no guarantees whatsoever about the contents of the accrued exceptions register: Rust +//! floating-point operations may or may not result in this register getting updated with exception +//! state, and the register can change between two invocations of this function even when no +//! floating-point operations appear in the source code (since floating-point operations appearing +//! earlier or later can be reordered). +//! +//! Modifying the rounding mode leads to **immediate Undefined Behavior**: Rust assumes that the +//! default rounding mode is always set and will optimize accordingly. This even applies when the +//! rounding mode is altered and later reset to its original value without any floating-point +//! operations appearing in the source code between those operations (since floating-point +//! operations appearing earlier or later can be reordered). +//! +//! If you need to perform some floating-point operations and check whether they raised an +//! exception, use a single inline assembly block for the entire sequence of operations. +//! +//! If you need to perform some floating-point operations under a differen rounding mode, use a +//! single inline assembly block and make sure to restore the original rounding mode before the end +//! of the block. +mod p; +mod zb; +mod zk; + +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub use p::*; +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub use zb::*; +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub use zk::*; + +use crate::arch::asm; + +/// Generates the `PAUSE` instruction +/// +/// The PAUSE instruction is a HINT that indicates the current hart's rate of instruction retirement +/// should be temporarily reduced or paused. The duration of its effect must be bounded and may be zero. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn pause() { + unsafe { asm!(".insn i 0x0F, 0, x0, x0, 0x010", options(nomem, nostack)) } +} + +/// Generates the `NOP` instruction +/// +/// The NOP instruction does not change any architecturally visible state, except for +/// advancing the `pc` and incrementing any applicable performance counters. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn nop() { + unsafe { asm!("nop", options(nomem, nostack)) } +} + +/// Generates the `WFI` instruction +/// +/// The WFI instruction provides a hint to the implementation that the current hart can be stalled +/// until an interrupt might need servicing. This instruction is a hint, +/// and a legal implementation is to simply implement WFI as a NOP. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn wfi() { + asm!("wfi", options(nomem, nostack)) +} + +/// Generates the `FENCE.I` instruction +/// +/// A FENCE.I instruction ensures that a subsequent instruction fetch on a RISC-V hart will see +/// any previous data stores already visible to the same RISC-V hart. +/// +/// FENCE.I does not ensure that other RISC-V harts' instruction fetches will observe the +/// local hart's stores in a multiprocessor system. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn fence_i() { + asm!("fence.i", options(nostack)) +} + +/// Supervisor memory management fence for given virtual address and address space +/// +/// The fence orders only reads and writes made to leaf page table entries corresponding to +/// the virtual address in parameter `vaddr`, for the address space identified by integer parameter +/// `asid`. Accesses to global mappings are not ordered. The fence also invalidates all +/// address-translation cache entries that contain leaf page table entries corresponding to the +/// virtual address in parameter `vaddr` and that match the address space identified by integer +/// parameter `asid`, except for entries containing global mappings. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sfence_vma(vaddr: usize, asid: usize) { + asm!("sfence.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) +} + +/// Supervisor memory management fence for given virtual address +/// +/// The fence orders only reads and writes made to leaf page table entries corresponding to +/// the virtual address in parameter `vaddr`, for all address spaces. +/// The fence also invalidates all address-translation cache entries that contain leaf page +/// table entries corresponding to the virtual address in parameter `vaddr`, for all address spaces. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sfence_vma_vaddr(vaddr: usize) { + asm!("sfence.vma {}, x0", in(reg) vaddr, options(nostack)) +} + +/// Supervisor memory management fence for given address space +/// +/// The fence orders all reads and writes made to any level of the page tables, +/// but only for the address space identified by integer parameter `asid`. +/// +/// Accesses to global mappings are not ordered. The fence also invalidates all +/// address-translation cache entries matching the address space identified by integer +/// parameter `asid`, except for entries containing global mappings. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sfence_vma_asid(asid: usize) { + asm!("sfence.vma x0, {}", in(reg) asid, options(nostack)) +} + +/// Supervisor memory management fence for all address spaces and virtual addresses +/// +/// The fence orders all reads and writes made to any level of the page +/// tables, for all address spaces. The fence also invalidates all address-translation cache entries, +/// for all address spaces. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sfence_vma_all() { + asm!("sfence.vma", options(nostack)) +} + +/// Invalidate supervisor translation cache for given virtual address and address space +/// +/// This instruction invalidates any address-translation cache entries that an +/// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sinval_vma(vaddr: usize, asid: usize) { + // asm!("sinval.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) + asm!(".insn r 0x73, 0, 0x0B, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) +} + +/// Invalidate supervisor translation cache for given virtual address +/// +/// This instruction invalidates any address-translation cache entries that an +/// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sinval_vma_vaddr(vaddr: usize) { + asm!(".insn r 0x73, 0, 0x0B, x0, {}, x0", in(reg) vaddr, options(nostack)) +} + +/// Invalidate supervisor translation cache for given address space +/// +/// This instruction invalidates any address-translation cache entries that an +/// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sinval_vma_asid(asid: usize) { + asm!(".insn r 0x73, 0, 0x0B, x0, x0, {}", in(reg) asid, options(nostack)) +} + +/// Invalidate supervisor translation cache for all address spaces and virtual addresses +/// +/// This instruction invalidates any address-translation cache entries that an +/// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sinval_vma_all() { + asm!(".insn r 0x73, 0, 0x0B, x0, x0, x0", options(nostack)) +} + +/// Generates the `SFENCE.W.INVAL` instruction +/// +/// This instruction guarantees that any previous stores already visible to the current RISC-V hart +/// are ordered before subsequent `SINVAL.VMA` instructions executed by the same hart. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sfence_w_inval() { + // asm!("sfence.w.inval", options(nostack)) + asm!(".insn i 0x73, 0, x0, x0, 0x180", options(nostack)) +} + +/// Generates the `SFENCE.INVAL.IR` instruction +/// +/// This instruction guarantees that any previous SINVAL.VMA instructions executed by the current hart +/// are ordered before subsequent implicit references by that hart to the memory-management data structures. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn sfence_inval_ir() { + // asm!("sfence.inval.ir", options(nostack)) + asm!(".insn i 0x73, 0, x0, x0, 0x181", options(nostack)) +} + +/// Loads virtual machine memory by signed byte integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.B` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hlv_b(src: *const i8) -> i8 { + let value: i8; + asm!(".insn i 0x73, 0x4, {}, {}, 0x600", out(reg) value, in(reg) src, options(readonly, nostack)); + value +} + +/// Loads virtual machine memory by unsigned byte integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.BU` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hlv_bu(src: *const u8) -> u8 { + let value: u8; + asm!(".insn i 0x73, 0x4, {}, {}, 0x601", out(reg) value, in(reg) src, options(readonly, nostack)); + value +} + +/// Loads virtual machine memory by signed half integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.H` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hlv_h(src: *const i16) -> i16 { + let value: i16; + asm!(".insn i 0x73, 0x4, {}, {}, 0x640", out(reg) value, in(reg) src, options(readonly, nostack)); + value +} + +/// Loads virtual machine memory by unsigned half integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.HU` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hlv_hu(src: *const u16) -> u16 { + let value: u16; + asm!(".insn i 0x73, 0x4, {}, {}, 0x641", out(reg) value, in(reg) src, options(readonly, nostack)); + value +} + +/// Accesses virtual machine instruction by unsigned half integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// the memory being read must be executable in both stages of address translation, +/// but read permission is not required. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HLVX.HU` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hlvx_hu(src: *const u16) -> u16 { + let insn: u16; + asm!(".insn i 0x73, 0x4, {}, {}, 0x643", out(reg) insn, in(reg) src, options(readonly, nostack)); + insn +} + +/// Loads virtual machine memory by signed word integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.W` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hlv_w(src: *const i32) -> i32 { + let value: i32; + asm!(".insn i 0x73, 0x4, {}, {}, 0x680", out(reg) value, in(reg) src, options(readonly, nostack)); + value +} + +/// Accesses virtual machine instruction by unsigned word integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// the memory being read must be executable in both stages of address translation, +/// but read permission is not required. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HLVX.WU` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hlvx_wu(src: *const u32) -> u32 { + let insn: u32; + asm!(".insn i 0x73, 0x4, {}, {}, 0x683", out(reg) insn, in(reg) src, options(readonly, nostack)); + insn +} + +/// Stores virtual machine memory by byte integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.B` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hsv_b(dst: *mut i8, src: i8) { + asm!(".insn r 0x73, 0x4, 0x31, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack)); +} + +/// Stores virtual machine memory by half integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.H` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hsv_h(dst: *mut i16, src: i16) { + asm!(".insn r 0x73, 0x4, 0x33, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack)); +} + +/// Stores virtual machine memory by word integer +/// +/// This instruction performs an explicit memory access as though `V=1`; +/// i.e., with the address translation and protection, and the endianness, that apply to memory +/// accesses in either VS-mode or VU-mode. +/// +/// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.W` +/// instruction which is effectively a dereference to any memory address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hsv_w(dst: *mut i32, src: i32) { + asm!(".insn r 0x73, 0x4, 0x35, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack)); +} + +/// Hypervisor memory management fence for given guest virtual address and guest address space +/// +/// Guarantees that any previous stores already visible to the current hart are ordered before all +/// implicit reads by that hart done for VS-stage address translation for instructions that: +/// - are subsequent to the `HFENCE.VVMA`, and +/// - execute when `hgatp.VMID` has the same setting as it did when `HFENCE.VVMA` executed. +/// +/// This fence specifies a single guest virtual address, and a single guest address-space identifier. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hfence_vvma(vaddr: usize, asid: usize) { + // asm!("hfence.vvma {}, {}", in(reg) vaddr, in(reg) asid) + asm!(".insn r 0x73, 0, 0x11, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) +} + +/// Hypervisor memory management fence for given guest virtual address +/// +/// Guarantees that any previous stores already visible to the current hart are ordered before all +/// implicit reads by that hart done for VS-stage address translation for instructions that: +/// - are subsequent to the `HFENCE.VVMA`, and +/// - execute when `hgatp.VMID` has the same setting as it did when `HFENCE.VVMA` executed. +/// +/// This fence specifies a single guest virtual address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hfence_vvma_vaddr(vaddr: usize) { + asm!(".insn r 0x73, 0, 0x11, x0, {}, x0", in(reg) vaddr, options(nostack)) +} + +/// Hypervisor memory management fence for given guest address space +/// +/// Guarantees that any previous stores already visible to the current hart are ordered before all +/// implicit reads by that hart done for VS-stage address translation for instructions that: +/// - are subsequent to the `HFENCE.VVMA`, and +/// - execute when `hgatp.VMID` has the same setting as it did when `HFENCE.VVMA` executed. +/// +/// This fence specifies a single guest address-space identifier. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hfence_vvma_asid(asid: usize) { + asm!(".insn r 0x73, 0, 0x11, x0, x0, {}", in(reg) asid, options(nostack)) +} + +/// Hypervisor memory management fence for all guest address spaces and guest virtual addresses +/// +/// Guarantees that any previous stores already visible to the current hart are ordered before all +/// implicit reads by that hart done for VS-stage address translation for instructions that: +/// - are subsequent to the `HFENCE.VVMA`, and +/// - execute when `hgatp.VMID` has the same setting as it did when `HFENCE.VVMA` executed. +/// +/// This fence applies to any guest address spaces and guest virtual addresses. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hfence_vvma_all() { + asm!(".insn r 0x73, 0, 0x11, x0, x0, x0", options(nostack)) +} + +/// Hypervisor memory management fence for guest physical address and virtual machine +/// +/// Guarantees that any previous stores already visible to the current hart are ordered before all implicit reads +/// by that hart done for G-stage address translation for instructions that follow the HFENCE.GVMA. +/// +/// This fence specifies a single guest physical address, **shifted right by 2 bits**, and a single virtual machine +/// by virtual machine identifier (VMID). +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hfence_gvma(gaddr: usize, vmid: usize) { + // asm!("hfence.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack)) + asm!(".insn r 0x73, 0, 0x31, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack)) +} + +/// Hypervisor memory management fence for guest physical address +/// +/// Guarantees that any previous stores already visible to the current hart are ordered before all implicit reads +/// by that hart done for G-stage address translation for instructions that follow the HFENCE.GVMA. +/// +/// This fence specifies a single guest physical address; **the physical address should be shifted right by 2 bits**. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hfence_gvma_gaddr(gaddr: usize) { + asm!(".insn r 0x73, 0, 0x31, x0, {}, x0", in(reg) gaddr, options(nostack)) +} + +/// Hypervisor memory management fence for given virtual machine +/// +/// Guarantees that any previous stores already visible to the current hart are ordered before all implicit reads +/// by that hart done for G-stage address translation for instructions that follow the HFENCE.GVMA. +/// +/// This fence specifies a single virtual machine by virtual machine identifier (VMID). +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hfence_gvma_vmid(vmid: usize) { + asm!(".insn r 0x73, 0, 0x31, x0, x0, {}", in(reg) vmid, options(nostack)) +} + +/// Hypervisor memory management fence for all virtual machines and guest physical addresses +/// +/// Guarantees that any previous stores already visible to the current hart are ordered before all implicit reads +/// by that hart done for G-stage address translation for instructions that follow the HFENCE.GVMA. +/// +/// This fence specifies all guest physical addresses and all virtual machines. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hfence_gvma_all() { + asm!(".insn r 0x73, 0, 0x31, x0, x0, x0", options(nostack)) +} + +/// Invalidate hypervisor translation cache for given guest virtual address and guest address space +/// +/// This instruction invalidates any address-translation cache entries that an +/// `HFENCE.VVMA` instruction with the same values of `vaddr` and `asid` would invalidate. +/// +/// This fence specifies a single guest virtual address, and a single guest address-space identifier. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hinval_vvma(vaddr: usize, asid: usize) { + // asm!("hinval.vvma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) + asm!(".insn r 0x73, 0, 0x13, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) +} + +/// Invalidate hypervisor translation cache for given guest virtual address +/// +/// This instruction invalidates any address-translation cache entries that an +/// `HFENCE.VVMA` instruction with the same values of `vaddr` and `asid` would invalidate. +/// +/// This fence specifies a single guest virtual address. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hinval_vvma_vaddr(vaddr: usize) { + asm!(".insn r 0x73, 0, 0x13, x0, {}, x0", in(reg) vaddr, options(nostack)) +} + +/// Invalidate hypervisor translation cache for given guest address space +/// +/// This instruction invalidates any address-translation cache entries that an +/// `HFENCE.VVMA` instruction with the same values of `vaddr` and `asid` would invalidate. +/// +/// This fence specifies a single guest address-space identifier. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hinval_vvma_asid(asid: usize) { + asm!(".insn r 0x73, 0, 0x13, x0, x0, {}", in(reg) asid, options(nostack)) +} + +/// Invalidate hypervisor translation cache for all guest address spaces and guest virtual addresses +/// +/// This instruction invalidates any address-translation cache entries that an +/// `HFENCE.VVMA` instruction with the same values of `vaddr` and `asid` would invalidate. +/// +/// This fence applies to any guest address spaces and guest virtual addresses. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hinval_vvma_all() { + asm!(".insn r 0x73, 0, 0x13, x0, x0, x0", options(nostack)) +} + +/// Invalidate hypervisor translation cache for guest physical address and virtual machine +/// +/// This instruction invalidates any address-translation cache entries that an +/// `HFENCE.GVMA` instruction with the same values of `gaddr` and `vmid` would invalidate. +/// +/// This fence specifies a single guest physical address, **shifted right by 2 bits**, and a single virtual machine +/// by virtual machine identifier (VMID). +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hinval_gvma(gaddr: usize, vmid: usize) { + // asm!("hinval.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack)) + asm!(".insn r 0x73, 0, 0x33, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack)) +} + +/// Invalidate hypervisor translation cache for guest physical address +/// +/// This instruction invalidates any address-translation cache entries that an +/// `HFENCE.GVMA` instruction with the same values of `gaddr` and `vmid` would invalidate. +/// +/// This fence specifies a single guest physical address; **the physical address should be shifted right by 2 bits**. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hinval_gvma_gaddr(gaddr: usize) { + asm!(".insn r 0x73, 0, 0x33, x0, {}, x0", in(reg) gaddr, options(nostack)) +} + +/// Invalidate hypervisor translation cache for given virtual machine +/// +/// This instruction invalidates any address-translation cache entries that an +/// `HFENCE.GVMA` instruction with the same values of `gaddr` and `vmid` would invalidate. +/// +/// This fence specifies a single virtual machine by virtual machine identifier (VMID). +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hinval_gvma_vmid(vmid: usize) { + asm!(".insn r 0x73, 0, 0x33, x0, x0, {}", in(reg) vmid, options(nostack)) +} + +/// Invalidate hypervisor translation cache for all virtual machines and guest physical addresses +/// +/// This instruction invalidates any address-translation cache entries that an +/// `HFENCE.GVMA` instruction with the same values of `gaddr` and `vmid` would invalidate. +/// +/// This fence specifies all guest physical addresses and all virtual machines. +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub unsafe fn hinval_gvma_all() { + asm!(".insn r 0x73, 0, 0x33, x0, x0, x0", options(nostack)) +} + +/// Reads the floating-point rounding mode register `frm` +/// +/// According to "F" Standard Extension for Single-Precision Floating-Point, Version 2.2, +/// the rounding mode field is defined as listed in the table below: +/// +/// | Rounding Mode | Mnemonic | Meaning | +/// |:-------------|:----------|:---------| +/// | 000 | RNE | Round to Nearest, ties to Even | +/// | 001 | RTZ | Round towards Zero | +/// | 010 | RDN | Round Down (towards −∞) | +/// | 011 | RUP | Round Up (towards +∞) | +/// | 100 | RMM | Round to Nearest, ties to Max Magnitude | +/// | 101 | | _Reserved for future use._ | +/// | 110 | | _Reserved for future use._ | +/// | 111 | DYN | In Rounding Mode register, _reserved_. | +#[inline] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn frrm() -> u32 { + let value: u32; + unsafe { asm!("frrm {}", out(reg) value, options(nomem, nostack)) }; + value +} diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/p.rs b/library/stdarch/crates/core_arch/src/riscv_shared/p.rs new file mode 100644 index 000000000000..c76a0ec4b8f2 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv_shared/p.rs @@ -0,0 +1,1283 @@ +//! RISC-V Packed SIMD intrinsics; shared part. +//! +//! RV64 only part is placed in riscv64 folder. +use crate::arch::asm; +#[cfg(test)] +use stdarch_test::assert_instr; + +// FIXME: Currently the P extension is still unratified, so there is no support +// for it in the upstream LLVM for now, and thus no LLVM built-in functions or +// serialization of instructions are provided. +// +// We add `assert_instr(unknown)` to each function so that we can at least make +// sure they compile. Since there is no serialization yet, we can only write +// "unknown" here, so that if LLVM upstream provides support for the P extension +// at some point in the future, we can know in time and then update our +// implementation. + +/// Adds packed 16-bit signed numbers, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn add16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x20, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the sum of packed 16-bit signed numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn radd16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x00, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the sum of packed 16-bit unsigned numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn uradd16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x10, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kadd16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x08, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ukadd16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x18, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 16-bit signed numbers, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x21, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the subtraction result of packed 16-bit signed numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn rsub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x01, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the subtraction result of packed 16-bit unsigned numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ursub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x11, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ksub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x09, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn uksub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x19, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross adds and subtracts packed 16-bit signed numbers, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn cras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x22, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross halves of adds and subtracts packed 16-bit signed numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn rcras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x02, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross halves of adds and subtracts packed 16-bit unsigned numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn urcras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x12, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross adds and subtracts packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kcras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross adds and subtracts packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ukcras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross subtracts and adds packed 16-bit signed numbers, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn crsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x23, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross halves of subtracts and adds packed 16-bit signed numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn rcrsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x03, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross halves of subtracts and adds packed 16-bit unsigned numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn urcrsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x13, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross subtracts and adds packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kcrsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross subtracts and adds packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ukcrsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight adds and subtracts packed 16-bit signed numbers, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn stas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x7A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight halves of adds and subtracts packed 16-bit signed numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn rstas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x5A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight halves of adds and subtracts packed 16-bit unsigned numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn urstas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x6A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight adds and subtracts packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kstas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x62, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight adds and subtracts packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ukstas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x72, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight subtracts and adds packed 16-bit signed numbers, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn stsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x7B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight halves of subtracts and adds packed 16-bit signed numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn rstsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x5B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight halves of subtracts and adds packed 16-bit unsigned numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn urstsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x6B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight subtracts and adds packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kstsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x63, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight subtracts and adds packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ukstsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x73, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 8-bit signed numbers, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn add8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x24, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the sum of packed 8-bit signed numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn radd8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x04, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the sum of packed 8-bit unsigned numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn uradd8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x14, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 8-bit signed numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kadd8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0C, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 8-bit unsigned numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ukadd8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1C, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 8-bit signed numbers, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x25, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the subtraction result of packed 8-bit signed numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn rsub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x05, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the subtraction result of packed 8-bit unsigned numbers, dropping least bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ursub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x15, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 8-bit signed numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ksub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0D, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 8-bit unsigned numbers, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn uksub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1D, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Arithmetic right shift packed 16-bit elements without rounding up +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sra16(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x28, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Arithmetic right shift packed 16-bit elements with rounding up +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sra16u(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x30, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical right shift packed 16-bit elements without rounding up +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn srl16(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x29, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical right shift packed 16-bit elements with rounding up +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn srl16u(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x31, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical left shift packed 16-bit elements, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sll16(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical left shift packed 16-bit elements, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ksll16(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x32, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical saturating left then arithmetic right shift packed 16-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kslra16(a: usize, b: i32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical saturating left then arithmetic right shift packed 16-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kslra16u(a: usize, b: i32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x33, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Arithmetic right shift packed 8-bit elements without rounding up +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sra8(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2C, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Arithmetic right shift packed 8-bit elements with rounding up +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sra8u(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x34, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical right shift packed 8-bit elements without rounding up +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn srl8(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2D, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical right shift packed 8-bit elements with rounding up +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn srl8u(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x35, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical left shift packed 8-bit elements, discarding overflow bits +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sll8(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2E, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical left shift packed 8-bit elements, saturating at the numeric bounds +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ksll8(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x36, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical saturating left then arithmetic right shift packed 8-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kslra8(a: usize, b: i32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical saturating left then arithmetic right shift packed 8-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kslra8u(a: usize, b: i32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x37, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare equality for packed 16-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn cmpeq16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x26, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 16-bit packed signed integers are less than the others +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn scmplt16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x06, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 16-bit packed signed integers are less than or equal to the others +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn scmple16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0E, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 16-bit packed unsigned integers are less than the others +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ucmplt16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x16, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 16-bit packed unsigned integers are less than or equal to the others +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ucmple16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1E, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare equality for packed 8-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn cmpeq8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x27, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 8-bit packed signed integers are less than the others +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn scmplt8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x07, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 8-bit packed signed integers are less than or equal to the others +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn scmple8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 8-bit packed unsigned integers are less than the others +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ucmplt8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x17, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 8-bit packed unsigned integers are less than or equal to the others +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ucmple8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get minimum values from 16-bit packed signed integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn smin16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x40, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get minimum values from 16-bit packed unsigned integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn umin16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x48, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get maximum values from 16-bit packed signed integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn smax16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x41, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get maximum values from 16-bit packed unsigned integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn umax16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x49, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/* todo: sclip16, uclip16 */ + +/// Compute the absolute value of packed 16-bit signed integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kabs16(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAD1)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of redundant sign bits of the packed 16-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn clrs16(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAE8)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of leading zero bits of the packed 16-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn clz16(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAE9)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Swap the 16-bit halfwords within each 32-bit word of a register +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn swap16(a: usize) -> usize { + let value: usize; + // this instruction is an alias for `pkbt rd, rs1, rs1`. + unsafe { + asm!(".insn r 0x77, 0x0, 0x0F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Get minimum values from 8-bit packed signed integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn smin8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x44, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get minimum values from 8-bit packed unsigned integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn umin8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x4C, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get maximum values from 8-bit packed signed integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn smax8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x45, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get maximum values from 8-bit packed unsigned integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn umax8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x4D, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/* todo: sclip8, uclip8 */ + +/// Compute the absolute value of packed 8-bit signed integers +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kabs8(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAD0)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of redundant sign bits of the packed 8-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn clrs8(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAE0)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of leading zero bits of the packed 8-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn clz8(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAE1)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Swap the 8-bit bytes within each 16-bit halfword of a register. +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn swap8(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAD8)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack first and zeroth into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sunpkd810(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAC8)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack second and zeroth into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sunpkd820(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAC9)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and zeroth into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sunpkd830(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xACA)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and first into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sunpkd831(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xACB)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and second into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn sunpkd832(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAD3)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack first and zeroth into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn zunpkd810(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xACC)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack second and zeroth into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn zunpkd820(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xACD)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and zeroth into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn zunpkd830(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xACE)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and first into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn zunpkd831(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xACF)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and second into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn zunpkd832(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAD7)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +// todo: pkbb16, pktt16 + +/// Pack two 16-bit data from bottom and top half from 32-bit chunks +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn pkbt16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x0F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Pack two 16-bit data from top and bottom half from 32-bit chunks +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn pktb16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x1F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of redundant sign bits of the packed 32-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn clrs32(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAF8)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of leading zero bits of the packed 32-bit elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn clz32(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, %lo(0xAF9)", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Calculate the sum of absolute difference of unsigned 8-bit data elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn pbsad(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x7E, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Calculate and accumulate the sum of absolute difference of unsigned 8-bit data elements +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn pbsada(t: usize, a: usize, b: usize) -> usize { + let mut value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x7F, {}, {}, {}", inlateout(reg) t => value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Multiply signed 8-bit elements and add 16-bit elements on results for packed 32-bit chunks +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn smaqa(t: usize, a: usize, b: usize) -> usize { + let mut value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x64, {}, {}, {}", inlateout(reg) t => value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Multiply unsigned 8-bit elements and add 16-bit elements on results for packed 32-bit chunks +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn umaqa(t: usize, a: usize, b: usize) -> usize { + let mut value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x66, {}, {}, {}", inlateout(reg) t => value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Multiply signed to unsigned 8-bit and add 16-bit elements on results for packed 32-bit chunks +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn smaqasu(t: usize, a: usize, b: usize) -> usize { + let mut value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x65, {}, {}, {}", inlateout(reg) t => value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds signed lower 16-bit content of two registers with Q15 saturation +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn kaddh(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x02, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts signed lower 16-bit content of two registers with Q15 saturation +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ksubh(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x03, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds signed lower 16-bit content of two registers with U16 saturation +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn ukaddh(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x0A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts signed lower 16-bit content of two registers with U16 saturation +#[inline] +#[cfg_attr(test, assert_instr(unknown))] +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +pub fn uksubh(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x0B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs b/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs new file mode 100644 index 000000000000..9472e3c8be9f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv_shared/zb.rs @@ -0,0 +1,134 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +#[cfg(target_arch = "riscv32")] +unsafe extern "unadjusted" { + #[link_name = "llvm.riscv.orc.b.i32"] + fn _orc_b_32(rs: i32) -> i32; + + #[link_name = "llvm.riscv.clmul.i32"] + fn _clmul_32(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.clmulh.i32"] + fn _clmulh_32(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.clmulr.i32"] + fn _clmulr_32(rs1: i32, rs2: i32) -> i32; +} + +#[cfg(target_arch = "riscv64")] +unsafe extern "unadjusted" { + #[link_name = "llvm.riscv.orc.b.i64"] + fn _orc_b_64(rs1: i64) -> i64; + + #[link_name = "llvm.riscv.clmul.i64"] + fn _clmul_64(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.clmulh.i64"] + fn _clmulh_64(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.clmulr.i64"] + fn _clmulr_64(rs1: i64, rs2: i64) -> i64; +} + +/// Bitwise OR-Combine, byte granule +/// +/// Combines the bits within every byte through a reciprocal bitwise logical OR. This sets the bits of each byte in +/// the result rd to all zeros if no bit within the respective byte of rs is set, or to all ones if any bit within the +/// respective byte of rs is set. +/// +/// Source: RISC-V Bit-Manipulation ISA-extensions +/// +/// Version: v1.0.0 +/// +/// Section: 2.24 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zbb")] +#[cfg_attr(test, assert_instr(orc.b))] +#[inline] +pub fn orc_b(rs: usize) -> usize { + #[cfg(target_arch = "riscv32")] + unsafe { + _orc_b_32(rs as i32) as usize + } + + #[cfg(target_arch = "riscv64")] + unsafe { + _orc_b_64(rs as i64) as usize + } +} + +/// Carry-less multiply (low-part) +/// +/// clmul produces the lower half of the 2·XLEN carry-less product. +/// +/// Source: RISC-V Bit-Manipulation ISA-extensions +/// +/// Version: v1.0.0 +/// +/// Section: 2.11 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zbc")] +#[cfg_attr(test, assert_instr(clmul))] +#[inline] +pub fn clmul(rs1: usize, rs2: usize) -> usize { + #[cfg(target_arch = "riscv32")] + unsafe { + _clmul_32(rs1 as i32, rs2 as i32) as usize + } + + #[cfg(target_arch = "riscv64")] + unsafe { + _clmul_64(rs1 as i64, rs2 as i64) as usize + } +} + +/// Carry-less multiply (high-part) +/// +/// clmulh produces the upper half of the 2·XLEN carry-less product. +/// +/// Source: RISC-V Bit-Manipulation ISA-extensions +/// +/// Version: v1.0.0 +/// +/// Section: 2.12 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zbc")] +#[cfg_attr(test, assert_instr(clmulh))] +#[inline] +pub fn clmulh(rs1: usize, rs2: usize) -> usize { + #[cfg(target_arch = "riscv32")] + unsafe { + _clmulh_32(rs1 as i32, rs2 as i32) as usize + } + + #[cfg(target_arch = "riscv64")] + unsafe { + _clmulh_64(rs1 as i64, rs2 as i64) as usize + } +} + +/// Carry-less multiply (reversed) +/// +/// clmulr produces bits 2·XLEN−2:XLEN-1 of the 2·XLEN carry-less product. +/// +/// Source: RISC-V Bit-Manipulation ISA-extensions +/// +/// Version: v1.0.0 +/// +/// Section: 2.13 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zbc")] +#[cfg_attr(test, assert_instr(clmulr))] +#[inline] +pub fn clmulr(rs1: usize, rs2: usize) -> usize { + #[cfg(target_arch = "riscv32")] + unsafe { + _clmulr_32(rs1 as i32, rs2 as i32) as usize + } + + #[cfg(target_arch = "riscv64")] + unsafe { + _clmulr_64(rs1 as i64, rs2 as i64) as usize + } +} diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs b/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs new file mode 100644 index 000000000000..b1e633d17022 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs @@ -0,0 +1,422 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +unsafe extern "unadjusted" { + #[link_name = "llvm.riscv.sm4ed"] + fn _sm4ed(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.sm4ks"] + fn _sm4ks(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.sm3p0"] + fn _sm3p0(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sm3p1"] + fn _sm3p1(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha256sig0"] + fn _sha256sig0(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha256sig1"] + fn _sha256sig1(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha256sum0"] + fn _sha256sum0(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha256sum1"] + fn _sha256sum1(rs1: i32) -> i32; +} + +#[cfg(target_arch = "riscv32")] +unsafe extern "unadjusted" { + #[link_name = "llvm.riscv.xperm8.i32"] + fn _xperm8_32(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.xperm4.i32"] + fn _xperm4_32(rs1: i32, rs2: i32) -> i32; +} + +#[cfg(target_arch = "riscv64")] +unsafe extern "unadjusted" { + #[link_name = "llvm.riscv.xperm8.i64"] + fn _xperm8_64(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.xperm4.i64"] + fn _xperm4_64(rs1: i64, rs2: i64) -> i64; +} + +/// Byte-wise lookup of indicies into a vector in registers. +/// +/// The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8 +/// 8-bit elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is +/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 +/// is out of bounds. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.47 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zbkx")] +#[cfg_attr(test, assert_instr(xperm8))] +#[inline] +pub fn xperm8(rs1: usize, rs2: usize) -> usize { + #[cfg(target_arch = "riscv32")] + unsafe { + _xperm8_32(rs1 as i32, rs2 as i32) as usize + } + + #[cfg(target_arch = "riscv64")] + unsafe { + _xperm8_64(rs1 as i64, rs2 as i64) as usize + } +} + +/// Nibble-wise lookup of indicies into a vector. +/// +/// The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4 +/// 4-bit elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is +/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 +/// is out of bounds. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.48 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zbkx")] +#[cfg_attr(test, assert_instr(xperm4))] +#[inline] +pub fn xperm4(rs1: usize, rs2: usize) -> usize { + #[cfg(target_arch = "riscv32")] + unsafe { + _xperm4_32(rs1 as i32, rs2 as i32) as usize + } + + #[cfg(target_arch = "riscv64")] + unsafe { + _xperm4_64(rs1 as i64, rs2 as i64) as usize + } +} + +/// Implements the Sigma0 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.27 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha256sig0))] +#[inline] +pub fn sha256sig0(rs1: u32) -> u32 { + unsafe { _sha256sig0(rs1 as i32) as u32 } +} + +/// Implements the Sigma1 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.28 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha256sig1))] +#[inline] +pub fn sha256sig1(rs1: u32) -> u32 { + unsafe { _sha256sig1(rs1 as i32) as u32 } +} + +/// Implements the Sum0 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.29 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha256sum0))] +#[inline] +pub fn sha256sum0(rs1: u32) -> u32 { + unsafe { _sha256sum0(rs1 as i32) as u32 } +} + +/// Implements the Sum1 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.30 +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha256sum1))] +#[inline] +pub fn sha256sum1(rs1: u32) -> u32 { + unsafe { _sha256sum1(rs1 as i32) as u32 } +} + +/// Accelerates the block encrypt/decrypt operation of the SM4 block cipher \[5, 31\]. +/// +/// Implements a T-tables in hardware style approach to accelerating the SM4 round function. A +/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are +/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction +/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to +/// XLEN bits. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.43 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Details +/// +/// Accelerates the round function `F` in the SM4 block cipher algorithm +/// +/// This instruction is included in extension `Zksed`. It's defined as: +/// +/// ```text +/// SM4ED(x, a, BS) = x ⊕ T(ai) +/// ... where +/// ai = a.bytes[BS] +/// T(ai) = L(τ(ai)) +/// bi = τ(ai) = SM4-S-Box(ai) +/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24) +/// SM4ED = (ci ≪ (BS * 8)) ⊕ x +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ` +/// and linear layer transform `L`. +/// +/// In the SM4 algorithm, the round function `F` is defined as: +/// +/// ```text +/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk) +/// ... where +/// T(A) = L(τ(A)) +/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3)) +/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24) +/// ``` +/// +/// It can be implemented by `sm4ed` instruction like: +/// +/// ```no_run +/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { +/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed; +/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed; +/// let a = x1 ^ x2 ^ x3 ^ rk; +/// let c0 = sm4ed(x0, a, 0); +/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc. +/// let c2 = sm4ed(c1, a, 2); +/// let c3 = sm4ed(c2, a, 3); +/// return c3; // c3 represents c[0..=3] +/// # } +/// ``` +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zksed")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(sm4ed, BS = 0))] +#[inline] +pub fn sm4ed(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + unsafe { _sm4ed(rs1 as i32, rs2 as i32, BS as i32) as u32 } +} + +/// Accelerates the Key Schedule operation of the SM4 block cipher \[5, 31\] with `bs=0`. +/// +/// Implements a T-tables in hardware style approach to accelerating the SM4 Key Schedule. A +/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are +/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction +/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to +/// XLEN bits. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.44 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Details +/// +/// Accelerates the round function `F` in the SM4 block cipher algorithm +/// +/// This instruction is included in extension `Zksed`. It's defined as: +/// +/// ```text +/// SM4ED(x, a, BS) = x ⊕ T(ai) +/// ... where +/// ai = a.bytes[BS] +/// T(ai) = L(τ(ai)) +/// bi = τ(ai) = SM4-S-Box(ai) +/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24) +/// SM4ED = (ci ≪ (BS * 8)) ⊕ x +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ` +/// and linear layer transform `L`. +/// +/// In the SM4 algorithm, the round function `F` is defined as: +/// +/// ```text +/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk) +/// ... where +/// T(A) = L(τ(A)) +/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3)) +/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24) +/// ``` +/// +/// It can be implemented by `sm4ed` instruction like: +/// +/// ```no_run +/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { +/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed; +/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed; +/// let a = x1 ^ x2 ^ x3 ^ rk; +/// let c0 = sm4ed(x0, a, 0); +/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc. +/// let c2 = sm4ed(c1, a, 2); +/// let c3 = sm4ed(c2, a, 3); +/// return c3; // c3 represents c[0..=3] +/// # } +/// ``` +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zksed")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(sm4ks, BS = 0))] +#[inline] +pub fn sm4ks(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + unsafe { _sm4ks(rs1 as i32, rs2 as i32, BS as i32) as u32 } +} + +/// Implements the P0 transformation function as used in the SM3 hash function [4, 30]. +/// +/// This instruction is supported for the RV32 and RV64 base architectures. It implements the +/// P0 transform of the SM3 hash function [4, 30]. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.41 +/// +/// # Details +/// +/// `P0` transformation function as is used in the SM3 hash algorithm +/// +/// This function is included in `Zksh` extension. It's defined as: +/// +/// ```text +/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17) +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// +/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the +/// compression function `CF` uses the intermediate value `TT2` to calculate +/// the variable `E` in one iteration for subsequent processes. +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zksh")] +#[cfg_attr(test, assert_instr(sm3p0))] +#[inline] +pub fn sm3p0(rs1: u32) -> u32 { + unsafe { _sm3p0(rs1 as i32) as u32 } +} + +/// Implements the P1 transformation function as used in the SM3 hash function [4, 30]. +/// +/// This instruction is supported for the RV32 and RV64 base architectures. It implements the +/// P1 transform of the SM3 hash function [4, 30]. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.42 +/// +/// # Details +/// +/// `P1` transformation function as is used in the SM3 hash algorithm +/// +/// This function is included in `Zksh` extension. It's defined as: +/// +/// ```text +/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23) +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// +/// In the SM3 algorithm, the `P1` transformation is used to expand message, +/// where expanded word `Wj` can be generated from the previous words. +/// The whole process can be described as the following pseudocode: +/// +/// ```text +/// FOR j=16 TO 67 +/// Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6 +/// ENDFOR +/// ``` +#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] +#[target_feature(enable = "zksh")] +#[cfg_attr(test, assert_instr(sm3p1))] +#[inline] +pub fn sm3p1(rs1: u32) -> u32 { + unsafe { _sm3p1(rs1 as i32) as u32 } +} diff --git a/library/stdarch/crates/core_arch/src/s390x/macros.rs b/library/stdarch/crates/core_arch/src/s390x/macros.rs new file mode 100644 index 000000000000..4f0f84ec912b --- /dev/null +++ b/library/stdarch/crates/core_arch/src/s390x/macros.rs @@ -0,0 +1,473 @@ +#![allow(unused_macros)] // FIXME remove when more tests are added +#![allow(unused_imports)] // FIXME remove when more tests are added + +macro_rules! test_impl { + ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, _]) => { + #[inline] + #[target_feature(enable = "vector")] + pub unsafe fn $fun ($($v : $ty),*) -> $r { + $call ($($v),*) + } + }; + ($fun:ident +($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr:ident]) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $fun ($($v : $ty),*) -> $r { + transmute($call ($($v),*)) + } + }; + ($fun:ident +($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $tf:literal $instr:ident]) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(all(test, target_feature = $tf), assert_instr($instr))] + pub unsafe fn $fun ($($v : $ty),*) -> $r { + transmute($call ($($v),*)) + } + }; + ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $tf:literal $instr:ident]) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(all(test, target_feature = $tf), assert_instr($instr))] + pub unsafe fn $fun ($($v : $ty),*) -> $r { + $call ($($v),*) + } + }; + ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr:ident]) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $fun ($($v : $ty),*) -> $r { + $call ($($v),*) + } + }; +} + +#[allow(unknown_lints, unused_macro_rules)] +macro_rules! impl_vec_trait { + ([$Trait:ident $m:ident] $fun:ident ($a:ty)) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $a { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self) -> Self { + $fun(transmute(self)) + } + } + }; + ([$Trait:ident $m:ident]+ $fun:ident ($a:ty)) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $a { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self) -> Self { + transmute($fun(transmute(self))) + } + } + }; + ([$Trait:ident $m:ident] $fun:ident ($a:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self) -> Self::Result { + $fun(transmute(self)) + } + } + }; + ([$Trait:ident $m:ident]+ $fun:ident ($a:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self) -> Self::Result { + transmute($fun(transmute(self))) + } + } + }; + ([$Trait:ident $m:ident] 1 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident, $sf: ident)) => { + impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_long_long) -> vector_unsigned_long_long } + impl_vec_trait!{ [$Trait $m] $sw (vector_signed_long_long) -> vector_signed_long_long } + impl_vec_trait!{ [$Trait $m] $sf (vector_float) -> vector_float } + impl_vec_trait!{ [$Trait $m] $sf (vector_double) -> vector_double } + }; + ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait<$b> for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: $b) -> Self::Result { + $fun(transmute(self), transmute(b)) + } + } + }; + ([$Trait:ident $m:ident]+ $fun:ident ($a:ty, $b:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait<$b> for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: $b) -> Self::Result { + transmute($fun(transmute(self), transmute(b))) + } + } + }; + ([$Trait:ident $m:ident] $fun:ident ($a:ty, ~$b:ty) -> $r:ty) => { + impl_vec_trait!{ [$Trait $m] $fun ($a, $a) -> $r } + impl_vec_trait!{ [$Trait $m] $fun ($a, $b) -> $r } + impl_vec_trait!{ [$Trait $m] $fun ($b, $a) -> $r } + }; + ([$Trait:ident $m:ident] ~($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident, $ug:ident, $sg:ident)) => { + impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, ~vector_bool_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, ~vector_bool_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, ~vector_bool_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, ~vector_bool_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, ~vector_bool_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, ~vector_bool_int) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m] $ug (vector_unsigned_long_long, ~vector_bool_long_long) -> vector_unsigned_long_long } + impl_vec_trait!{ [$Trait $m] $sg (vector_signed_long_long, ~vector_bool_long_long) -> vector_signed_long_long } + }; + ([$Trait:ident $m:ident] ~($fn:ident)) => { + impl_vec_trait!{ [$Trait $m] ~($fn, $fn, $fn, $fn, $fn, $fn, $fn, $fn) } + }; + ([$Trait:ident $m:ident] 2 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident, $ug:ident, $sg:ident)) => { + impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, vector_signed_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, vector_signed_int) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m] $ug (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_long_long } + impl_vec_trait!{ [$Trait $m] $sg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_long_long } + }; + ([$Trait:ident $m:ident] 2 ($fn:ident)) => { + impl_vec_trait!{ [$Trait $m] ($fn, $fn, $fn, $fn, $fn, $fn, $fn, $fn) } + }; + ([$Trait:ident $m:ident]+ 2b ($b:ident, $h:ident, $w:ident, $g:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $b (vector_bool_char, vector_bool_char) -> vector_bool_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $h (vector_bool_short, vector_bool_short) -> vector_bool_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_signed_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $w (vector_bool_int, vector_bool_int) -> vector_bool_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_signed_int) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m]+ $g (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_long_long } + impl_vec_trait!{ [$Trait $m]+ $g (vector_signed_long_long, vector_signed_long_long) -> vector_signed_long_long } + }; + ([$Trait:ident $m:ident]+ 2b ($fn:ident)) => { + impl_vec_trait!{ [$Trait $m]+ 2b ($fn, $fn, $fn, $fn) } + }; + ([$Trait:ident $m:ident]+ 2c ($b:ident, $h:ident, $w:ident, $g:ident, $s:ident, $d:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $b (vector_bool_char, vector_bool_char) -> vector_bool_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $h (vector_bool_short, vector_bool_short) -> vector_bool_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_signed_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $w (vector_bool_int, vector_bool_int) -> vector_bool_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_signed_int) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m]+ $g (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_long_long } + impl_vec_trait!{ [$Trait $m]+ $g (vector_signed_long_long, vector_signed_long_long) -> vector_signed_long_long } + impl_vec_trait!{ [$Trait $m]+ $s (vector_float, vector_float) -> vector_float } + impl_vec_trait!{ [$Trait $m]+ $d (vector_double, vector_double) -> vector_double } + }; + ([$Trait:ident $m:ident]+ 2c ($fn:ident)) => { + impl_vec_trait!{ [$Trait $m]+ 2c ($fn, $fn, $fn, $fn, $fn, $fn) } + }; +} + +macro_rules! s_t_l { + (i64x2) => { + vector_signed_long_long + }; + (i32x4) => { + vector_signed_int + }; + (i16x8) => { + vector_signed_short + }; + (i8x16) => { + vector_signed_char + }; + + (u64x2) => { + vector_unsigned_long_long + }; + (u32x4) => { + vector_unsigned_int + }; + (u16x8) => { + vector_unsigned_short + }; + (u8x16) => { + vector_unsigned_char + }; + + (f32x4) => { + vector_float + }; + (f64x2) => { + vector_double + }; +} + +macro_rules! l_t_t { + (vector_signed_long_long) => { + i64 + }; + (vector_signed_int) => { + i32 + }; + (vector_signed_short) => { + i16 + }; + (vector_signed_char) => { + i8 + }; + + (vector_unsigned_long_long ) => { + u64 + }; + (vector_unsigned_int ) => { + u32 + }; + (vector_unsigned_short ) => { + u16 + }; + (vector_unsigned_char ) => { + u8 + }; + + (vector_bool_long_long ) => { + u64 + }; + (vector_bool_int ) => { + u32 + }; + (vector_bool_short ) => { + u16 + }; + (vector_bool_char ) => { + u8 + }; + + (vector_float) => { + f32 + }; + (vector_double) => { + f64 + }; +} + +macro_rules! t_t_l { + (i64) => { + vector_signed_long_long + }; + (i32) => { + vector_signed_int + }; + (i16) => { + vector_signed_short + }; + (i8) => { + vector_signed_char + }; + + (u64) => { + vector_unsigned_long_long + }; + (u32) => { + vector_unsigned_int + }; + (u16) => { + vector_unsigned_short + }; + (u8) => { + vector_unsigned_char + }; + + (f32) => { + vector_float + }; + (f64) => { + vector_double + }; +} + +macro_rules! t_t_s { + (i64) => { + i64x2 + }; + (i32) => { + i32x4 + }; + (i16) => { + i16x8 + }; + (i8) => { + i8x16 + }; + + (u64) => { + u64x2 + }; + (u32) => { + u32x4 + }; + (u16) => { + u16x8 + }; + (u8) => { + u8x16 + }; + + (f32) => { + f32x4 + }; + (f64) => { + f64x2 + }; +} + +macro_rules! t_u { + (vector_bool_char) => { + vector_unsigned_char + }; + (vector_bool_short) => { + vector_unsigned_short + }; + (vector_bool_int) => { + vector_unsigned_int + }; + (vector_bool_long_long) => { + vector_unsigned_long_long + }; + (vector_unsigned_char) => { + vector_unsigned_char + }; + (vector_unsigned_short) => { + vector_unsigned_short + }; + (vector_unsigned_int) => { + vector_unsigned_int + }; + (vector_unsigned_long_long) => { + vector_unsigned_long_long + }; + (vector_signed_char) => { + vector_unsigned_char + }; + (vector_signed_short) => { + vector_unsigned_short + }; + (vector_signed_int) => { + vector_unsigned_int + }; + (vector_signed_long_long) => { + vector_unsigned_long_long + }; + (vector_float) => { + vector_unsigned_int + }; + (vector_double) => { + vector_unsigned_long_long + }; +} + +macro_rules! t_b { + (vector_bool_char) => { + vector_bool_char + }; + (vector_bool_short) => { + vector_bool_short + }; + (vector_bool_int) => { + vector_bool_int + }; + (vector_bool_long_long) => { + vector_bool_long_long + }; + (vector_signed_char) => { + vector_bool_char + }; + (vector_signed_short) => { + vector_bool_short + }; + (vector_signed_int) => { + vector_bool_int + }; + (vector_signed_long_long) => { + vector_bool_long_long + }; + (vector_unsigned_char) => { + vector_bool_char + }; + (vector_unsigned_short) => { + vector_bool_short + }; + (vector_unsigned_int) => { + vector_bool_int + }; + (vector_unsigned_long_long) => { + vector_bool_long_long + }; + (vector_float) => { + vector_bool_int + }; + (vector_double) => { + vector_bool_long_long + }; +} + +macro_rules! impl_from { + ($s: ident) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl From<$s> for s_t_l!($s) { + fn from (v: $s) -> Self { + unsafe { + transmute(v) + } + } + } + }; + ($($s: ident),*) => { + $( + impl_from! { $s } + )* + }; +} + +macro_rules! impl_neg { + ($s: ident : $zero: expr) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl crate::ops::Neg for s_t_l!($s) { + type Output = s_t_l!($s); + fn neg(self) -> Self::Output { + unsafe { simd_neg(self) } + } + } + }; +} + +pub(crate) use impl_from; +pub(crate) use impl_neg; +pub(crate) use impl_vec_trait; +pub(crate) use l_t_t; +pub(crate) use s_t_l; +pub(crate) use t_b; +pub(crate) use t_t_l; +pub(crate) use t_t_s; +pub(crate) use t_u; +pub(crate) use test_impl; diff --git a/library/stdarch/crates/core_arch/src/s390x/mod.rs b/library/stdarch/crates/core_arch/src/s390x/mod.rs new file mode 100644 index 000000000000..7d3b3f2d99aa --- /dev/null +++ b/library/stdarch/crates/core_arch/src/s390x/mod.rs @@ -0,0 +1,7 @@ +//! `s390x` intrinsics + +pub(crate) mod macros; + +mod vector; +#[unstable(feature = "stdarch_s390x", issue = "130869")] +pub use self::vector::*; diff --git a/library/stdarch/crates/core_arch/src/s390x/vector.rs b/library/stdarch/crates/core_arch/src/s390x/vector.rs new file mode 100644 index 000000000000..ae5c37ce0178 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/s390x/vector.rs @@ -0,0 +1,7382 @@ +//! s390x vector intrinsics. +//! +//! For more info see the [Reference Summary] or the online [IBM docs]. +//! +//! [Reference Summary]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf +//! [IBM docs]: https://www.ibm.com/docs/en/zos/2.4.0?topic=support-vector-built-in-functions + +#![allow(non_camel_case_types)] + +use crate::{core_arch::simd::*, intrinsics::simd::*, mem::MaybeUninit, mem::transmute}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +use super::macros::*; + +types! { + #![unstable(feature = "stdarch_s390x", issue = "135681")] + + /// s390x-specific 128-bit wide vector of sixteen packed `i8` + pub struct vector_signed_char(16 x i8); + /// s390x-specific 128-bit wide vector of sixteen packed `u8` + pub struct vector_unsigned_char(16 x u8); + /// s390x-specific 128-bit wide vector mask of sixteen packed elements + pub struct vector_bool_char(16 x i8); + + /// s390x-specific 128-bit wide vector of eight packed `i16` + pub struct vector_signed_short(8 x i16); + /// s390x-specific 128-bit wide vector of eight packed `u16` + pub struct vector_unsigned_short(8 x u16); + /// s390x-specific 128-bit wide vector mask of eight packed elements + pub struct vector_bool_short(8 x i16); + + /// s390x-specific 128-bit wide vector of four packed `i32` + pub struct vector_signed_int(4 x i32); + /// s390x-specific 128-bit wide vector of four packed `u32` + pub struct vector_unsigned_int(4 x u32); + /// s390x-specific 128-bit wide vector mask of four packed elements + pub struct vector_bool_int(4 x i32); + + /// s390x-specific 128-bit wide vector of two packed `i64` + pub struct vector_signed_long_long(2 x i64); + /// s390x-specific 128-bit wide vector of two packed `u64` + pub struct vector_unsigned_long_long(2 x u64); + /// s390x-specific 128-bit wide vector mask of two packed elements + pub struct vector_bool_long_long(2 x i64); + + /// s390x-specific 128-bit wide vector of four packed `f32` + pub struct vector_float(4 x f32); + /// s390x-specific 128-bit wide vector of two packed `f64` + pub struct vector_double(2 x f64); +} + +#[repr(packed)] +struct PackedTuple { + x: T, + y: U, +} + +#[allow(improper_ctypes)] +#[rustfmt::skip] +unsafe extern "unadjusted" { + #[link_name = "llvm.smax.v16i8"] fn vmxb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.smax.v8i16"] fn vmxh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.smax.v4i32"] fn vmxf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.smax.v2i64"] fn vmxg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long; + + #[link_name = "llvm.umax.v16i8"] fn vmxlb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.umax.v8i16"] fn vmxlh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.umax.v4i32"] fn vmxlf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.umax.v2i64"] fn vmxlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.smin.v16i8"] fn vmnb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.smin.v8i16"] fn vmnh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.smin.v4i32"] fn vmnf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.smin.v2i64"] fn vmng(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long; + + #[link_name = "llvm.umin.v16i8"] fn vmnlb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.umin.v8i16"] fn vmnlh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.umin.v4i32"] fn vmnlf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.umin.v2i64"] fn vmnlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.nearbyint.v4f32"] fn nearbyint_v4f32(a: vector_float) -> vector_float; + #[link_name = "llvm.nearbyint.v2f64"] fn nearbyint_v2f64(a: vector_double) -> vector_double; + + #[link_name = "llvm.rint.v4f32"] fn rint_v4f32(a: vector_float) -> vector_float; + #[link_name = "llvm.rint.v2f64"] fn rint_v2f64(a: vector_double) -> vector_double; + + #[link_name = "llvm.roundeven.v4f32"] fn roundeven_v4f32(a: vector_float) -> vector_float; + #[link_name = "llvm.roundeven.v2f64"] fn roundeven_v2f64(a: vector_double) -> vector_double; + + #[link_name = "llvm.s390.vsra"] fn vsra(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vsrl"] fn vsrl(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vsl"] fn vsl(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + + #[link_name = "llvm.s390.vsrab"] fn vsrab(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vsrlb"] fn vsrlb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vslb"] fn vslb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + + #[link_name = "llvm.s390.vsldb"] fn vsldb(a: i8x16, b: i8x16, c: u32) -> i8x16; + #[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16; + #[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16; + + #[link_name = "llvm.fshl.v16i8"] fn fshlb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.fshl.v8i16"] fn fshlh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.fshl.v4i32"] fn fshlf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.fshl.v2i64"] fn fshlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.verimb"] fn verimb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char, d: i32) -> vector_signed_char; + #[link_name = "llvm.s390.verimh"] fn verimh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, d: i32) -> vector_signed_short; + #[link_name = "llvm.s390.verimf"] fn verimf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int, d: i32) -> vector_signed_int; + #[link_name = "llvm.s390.verimg"] fn verimg(a: vector_signed_long_long, b: vector_signed_long_long, c: vector_signed_long_long, d: i32) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vperm"] fn vperm(a: vector_signed_char, b: vector_signed_char, c: vector_unsigned_char) -> vector_signed_char; + + #[link_name = "llvm.s390.vsumb"] fn vsumb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_int; + #[link_name = "llvm.s390.vsumh"] fn vsumh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vsumgh"] fn vsumgh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_long_long; + #[link_name = "llvm.s390.vsumgf"] fn vsumgf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vsumqf"] fn vsumqf(a: vector_unsigned_int, b: vector_unsigned_int) -> u128; + #[link_name = "llvm.s390.vsumqg"] fn vsumqg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> u128; + + #[link_name = "llvm.s390.vscbiq"] fn vscbiq(a: u128, b: u128) -> u128; + #[link_name = "llvm.s390.vsbiq"] fn vsbiq(a: u128, b: u128, c: u128) -> u128; + #[link_name = "llvm.s390.vsbcbiq"] fn vsbcbiq(a: u128, b: u128, c: u128) -> u128; + + #[link_name = "llvm.s390.vacq"] fn vacq(a: u128, b: u128, c: u128) -> u128; + + #[link_name = "llvm.s390.vscbib"] fn vscbib(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vscbih"] fn vscbih(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vscbif"] fn vscbif(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.s390.vscbig"] fn vscbig(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vfaeb"] fn vfaeb(a: vector_signed_char, b: vector_signed_char, c: i32) -> vector_signed_char; + #[link_name = "llvm.s390.vfaeh"] fn vfaeh(a: vector_signed_short, b: vector_signed_short, c: i32) -> vector_signed_short; + #[link_name = "llvm.s390.vfaef"] fn vfaef(a: vector_signed_int, b: vector_signed_int, c: i32) -> vector_signed_int; + + #[link_name = "llvm.s390.vfaezb"] fn vfaezb(a: vector_signed_char, b: vector_signed_char, c: i32) -> vector_signed_char; + #[link_name = "llvm.s390.vfaezh"] fn vfaezh(a: vector_signed_short, b: vector_signed_short, c: i32) -> vector_signed_short; + #[link_name = "llvm.s390.vfaezf"] fn vfaezf(a: vector_signed_int, b: vector_signed_int, c: i32) -> vector_signed_int; + + #[link_name = "llvm.s390.vfaebs"] fn vfaebs(a: vector_signed_char, b: vector_signed_char, c: i32) -> PackedTuple; + #[link_name = "llvm.s390.vfaehs"] fn vfaehs(a: vector_signed_short, b: vector_signed_short, c: i32) -> PackedTuple; + #[link_name = "llvm.s390.vfaefs"] fn vfaefs(a: vector_signed_int, b: vector_signed_int, c: i32) -> PackedTuple; + + #[link_name = "llvm.s390.vfaezbs"] fn vfaezbs(a: vector_signed_char, b: vector_signed_char, c: i32) -> PackedTuple; + #[link_name = "llvm.s390.vfaezhs"] fn vfaezhs(a: vector_signed_short, b: vector_signed_short, c: i32) -> PackedTuple; + #[link_name = "llvm.s390.vfaezfs"] fn vfaezfs(a: vector_signed_int, b: vector_signed_int, c: i32) -> PackedTuple; + + #[link_name = "llvm.s390.vll"] fn vll(a: u32, b: *const u8) -> vector_signed_char; + #[link_name = "llvm.s390.vstl"] fn vstl(a: vector_signed_char, b: u32, c: *mut u8); + + #[link_name = "llvm.s390.vlrl"] fn vlrl(a: u32, b: *const u8) -> vector_unsigned_char; + #[link_name = "llvm.s390.vstrl"] fn vstrl(a: vector_unsigned_char, b: u32, c: *mut u8); + + #[link_name = "llvm.s390.lcbb"] fn lcbb(a: *const u8, b: u32) -> u32; + #[link_name = "llvm.s390.vlbb"] fn vlbb(a: *const u8, b: u32) -> MaybeUninit; + + #[link_name = "llvm.s390.vpksh"] fn vpksh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_char; + #[link_name = "llvm.s390.vpksf"] fn vpksf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_short; + #[link_name = "llvm.s390.vpksg"] fn vpksg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_int; + + #[link_name = "llvm.s390.vpklsh"] fn vpklsh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char; + #[link_name = "llvm.s390.vpklsf"] fn vpklsf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short; + #[link_name = "llvm.s390.vpklsg"] fn vpklsg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vpkshs"] fn vpkshs(a: vector_signed_short, b: vector_signed_short) -> PackedTuple; + #[link_name = "llvm.s390.vpksfs"] fn vpksfs(a: vector_signed_int, b: vector_signed_int) -> PackedTuple; + #[link_name = "llvm.s390.vpksgs"] fn vpksgs(a: vector_signed_long_long, b: vector_signed_long_long) -> PackedTuple; + + #[link_name = "llvm.s390.vpklshs"] fn vpklshs(a: vector_unsigned_short, b: vector_unsigned_short) -> PackedTuple; + #[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple; + #[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple; + + #[link_name = "llvm.s390.vuplb"] fn vuplb (a: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.s390.vuplhw"] fn vuplhw (a: vector_signed_short) -> vector_signed_int; + #[link_name = "llvm.s390.vuplf"] fn vuplf (a: vector_signed_int) -> vector_signed_long_long; + #[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vavgb"] fn vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vavgh"] fn vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vavgf"] fn vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.s390.vavgg"] fn vavgg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vavglb"] fn vavglb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vavglh"] fn vavglh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vavglf"] fn vavglf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.s390.vavglg"] fn vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vcksm"] fn vcksm(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vmeb"] fn vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.s390.vmeh"] fn vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; + #[link_name = "llvm.s390.vmef"] fn vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vmleb"] fn vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmleh"] fn vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.s390.vmlef"] fn vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vmob"] fn vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.s390.vmoh"] fn vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; + #[link_name = "llvm.s390.vmof"] fn vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vmlob"] fn vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmloh"] fn vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.s390.vmlof"] fn vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vmhb"] fn vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vmhh"] fn vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmhf"] fn vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.s390.vmlhb"] fn vmlhb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vmlhh"] fn vmlhh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmlhf"] fn vmlhf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vmaeb"] fn vmaeb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmaeh"] fn vmaeh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.s390.vmaef"] fn vmaef(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vmaleb"] fn vmaleb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmaleh"] fn vmaleh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.s390.vmalef"] fn vmalef(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vmaob"] fn vmaob(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmaoh"] fn vmaoh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.s390.vmaof"] fn vmaof(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vmalob"] fn vmalob(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmaloh"] fn vmaloh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.s390.vmalof"] fn vmalof(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vmahb"] fn vmahb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vmahh"] fn vmahh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmahf"] fn vmahf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.s390.vmalhb"] fn vmalhb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vmalhh"] fn vmalhh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmalhf"] fn vmalhf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vmalb"] fn vmalb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vmalh"] fn vmalh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmalf"] fn vmalf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.s390.vmallb"] fn vmallb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vmallh"] fn vmallh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmallf"] fn vmallf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vgfmb"] fn vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.s390.vgfmh"] fn vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.s390.vgfmf"] fn vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long; + #[link_name = "llvm.s390.vgfmg"] fn vgfmg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> u128; + + #[link_name = "llvm.s390.vgfmab"] fn vgfmab(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vgfmah"] fn vgfmah(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.s390.vgfmaf"] fn vgfmaf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long; + #[link_name = "llvm.s390.vgfmag"] fn vgfmag(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128) -> u128; + + #[link_name = "llvm.s390.vbperm"] fn vbperm(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vftcisb"] fn vftcisb(a: vector_float, b: u32) -> PackedTuple; + #[link_name = "llvm.s390.vftcidb"] fn vftcidb(a: vector_double, b: u32) -> PackedTuple; + + #[link_name = "llvm.s390.vtm"] fn vtm(a: i8x16, b: i8x16) -> i32; + + #[link_name = "llvm.s390.vstrsb"] fn vstrsb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vstrsh"] fn vstrsh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vstrsf"] fn vstrsf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_char) -> PackedTuple; + + #[link_name = "llvm.s390.vstrszb"] fn vstrszb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vstrszh"] fn vstrszh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vstrszf"] fn vstrszf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_char) -> PackedTuple; + + #[link_name = "llvm.s390.vistrb"] fn vistrb(a: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vistrh"] fn vistrh(a: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vistrf"] fn vistrf(a: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vistrbs"] fn vistrbs(a: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vistrhs"] fn vistrhs(a: vector_unsigned_short) -> PackedTuple; + #[link_name = "llvm.s390.vistrfs"] fn vistrfs(a: vector_unsigned_int) -> PackedTuple; + + #[link_name = "llvm.s390.vmslg"] fn vmslg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128, d: u32) -> u128; + + #[link_name = "llvm.s390.vstrcb"] fn vstrcb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> vector_bool_char; + #[link_name = "llvm.s390.vstrch"] fn vstrch(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> vector_bool_short; + #[link_name = "llvm.s390.vstrcf"] fn vstrcf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> vector_bool_int; + + #[link_name = "llvm.s390.vstrcbs"] fn vstrcbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple; + #[link_name = "llvm.s390.vstrchs"] fn vstrchs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple; + #[link_name = "llvm.s390.vstrcfs"] fn vstrcfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple; + + #[link_name = "llvm.s390.vstrczb"] fn vstrczb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> vector_bool_char; + #[link_name = "llvm.s390.vstrczh"] fn vstrczh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> vector_bool_short; + #[link_name = "llvm.s390.vstrczf"] fn vstrczf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> vector_bool_int; + + #[link_name = "llvm.s390.vstrczbs"] fn vstrczbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple; + #[link_name = "llvm.s390.vstrczhs"] fn vstrczhs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple; + #[link_name = "llvm.s390.vstrczfs"] fn vstrczfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple; + + #[link_name = "llvm.s390.vfeeb"] fn vfeeb(a: i8x16, b: i8x16) -> i8x16; + #[link_name = "llvm.s390.vfeeh"] fn vfeeh(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.s390.vfeef"] fn vfeef(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.s390.vfeezb"] fn vfeezb(a: i8x16, b: i8x16) -> i8x16; + #[link_name = "llvm.s390.vfeezh"] fn vfeezh(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.s390.vfeezf"] fn vfeezf(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.s390.vfeebs"] fn vfeebs(a: i8x16, b: i8x16) -> PackedTuple; + #[link_name = "llvm.s390.vfeehs"] fn vfeehs(a: i16x8, b: i16x8) -> PackedTuple; + #[link_name = "llvm.s390.vfeefs"] fn vfeefs(a: i32x4, b: i32x4) -> PackedTuple; + + #[link_name = "llvm.s390.vfeezbs"] fn vfeezbs(a: i8x16, b: i8x16) -> PackedTuple; + #[link_name = "llvm.s390.vfeezhs"] fn vfeezhs(a: i16x8, b: i16x8) -> PackedTuple; + #[link_name = "llvm.s390.vfeezfs"] fn vfeezfs(a: i32x4, b: i32x4) -> PackedTuple; + + #[link_name = "llvm.s390.vfeneb"] fn vfeneb(a: i8x16, b: i8x16) -> i8x16; + #[link_name = "llvm.s390.vfeneh"] fn vfeneh(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.s390.vfenef"] fn vfenef(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.s390.vfenezb"] fn vfenezb(a: i8x16, b: i8x16) -> i8x16; + #[link_name = "llvm.s390.vfenezh"] fn vfenezh(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.s390.vfenezf"] fn vfenezf(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.s390.vfenebs"] fn vfenebs(a: i8x16, b: i8x16) -> PackedTuple; + #[link_name = "llvm.s390.vfenehs"] fn vfenehs(a: i16x8, b: i16x8) -> PackedTuple; + #[link_name = "llvm.s390.vfenefs"] fn vfenefs(a: i32x4, b: i32x4) -> PackedTuple; + + #[link_name = "llvm.s390.vfenezbs"] fn vfenezbs(a: i8x16, b: i8x16) -> PackedTuple; + #[link_name = "llvm.s390.vfenezhs"] fn vfenezhs(a: i16x8, b: i16x8) -> PackedTuple; + #[link_name = "llvm.s390.vfenezfs"] fn vfenezfs(a: i32x4, b: i32x4) -> PackedTuple; +} + +impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 } + +impl_neg! { i8x16 : 0 } +impl_neg! { i16x8 : 0 } +impl_neg! { i32x4 : 0 } +impl_neg! { i64x2 : 0 } +impl_neg! { f32x4 : 0f32 } +impl_neg! { f64x2 : 0f64 } + +#[repr(simd)] +struct ShuffleMask([u32; N]); + +impl ShuffleMask { + const fn reverse() -> Self { + let mut index = [0; N]; + let mut i = 0; + while i < N { + index[i] = (N - i - 1) as u32; + i += 1; + } + ShuffleMask(index) + } + + const fn merge_low() -> Self { + let mut mask = [0; N]; + let mut i = N / 2; + let mut index = 0; + while index < N { + mask[index] = i as u32; + mask[index + 1] = (i + N) as u32; + + i += 1; + index += 2; + } + ShuffleMask(mask) + } + + const fn merge_high() -> Self { + let mut mask = [0; N]; + let mut i = 0; + let mut index = 0; + while index < N { + mask[index] = i as u32; + mask[index + 1] = (i + N) as u32; + + i += 1; + index += 2; + } + ShuffleMask(mask) + } + + const fn pack() -> Self { + let mut mask = [0; N]; + let mut i = 1; + let mut index = 0; + while index < N { + mask[index] = i as u32; + + i += 2; + index += 1; + } + ShuffleMask(mask) + } + + const fn unpack_low() -> Self { + let mut mask = [0; N]; + let mut i = 0; + while i < N { + mask[i] = (N + i) as u32; + i += 1; + } + ShuffleMask(mask) + } + + const fn unpack_high() -> Self { + let mut mask = [0; N]; + let mut i = 0; + while i < N { + mask[i] = i as u32; + i += 1; + } + ShuffleMask(mask) + } +} + +const fn genmask() -> [u8; 16] { + let mut bits = MASK; + let mut elements = [0u8; 16]; + + let mut i = 0; + while i < 16 { + elements[i] = match bits & (1u16 << 15) { + 0 => 0, + _ => 0xFF, + }; + + bits <<= 1; + i += 1; + } + + elements +} + +const fn genmasks(bit_width: u32, a: u8, b: u8) -> u64 { + let bit_width = bit_width as u8; + let a = a % bit_width; + let mut b = b % bit_width; + if a > b { + b = bit_width - 1; + } + + // of course these indices start from the left + let a = (bit_width - 1) - a; + let b = (bit_width - 1) - b; + + ((1u64.wrapping_shl(a as u32 + 1)) - 1) & !((1u64.wrapping_shl(b as u32)) - 1) +} + +const fn validate_block_boundary(block_boundary: u16) -> u32 { + assert!( + block_boundary.is_power_of_two() && block_boundary >= 64 && block_boundary <= 4096, + "block boundary must be a constant power of 2 from 64 to 4096", + ); + + // so that 64 is encoded as 0, 128 as 1, ect. + block_boundary as u32 >> 7 +} + +enum FindImm { + Eq = 4, + Ne = 12, + EqIdx = 0, + NeIdx = 8, +} + +#[macro_use] +mod sealed { + use super::*; + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorAdd { + type Result; + unsafe fn vec_add(self, other: Other) -> Self::Result; + } + + macro_rules! impl_add { + ($name:ident, $a:ty, $instr:ident) => { + impl_add!($name, $a, $a, $a, $instr); + }; + ($name:ident, $a:ty, $b:ty, $c:ty, $instr:ident) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $name(a: $a, b: $b) -> $c { + transmute(simd_add(transmute(a), b)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorAdd<$b> for $a { + type Result = $c; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_add(self, other: $b) -> Self::Result { + $name(self, other) + } + } + }; + } + + #[rustfmt::skip] + mod impl_add { + use super::*; + + impl_add!(va_sc, vector_signed_char, vab); + impl_add!(va_uc, vector_unsigned_char, vab); + impl_add!(va_sh, vector_signed_short, vah); + impl_add!(va_uh, vector_unsigned_short, vah); + impl_add!(va_sf, vector_signed_int, vaf); + impl_add!(va_uf, vector_unsigned_int, vaf); + impl_add!(va_sg, vector_signed_long_long, vag); + impl_add!(va_ug, vector_unsigned_long_long, vag); + + impl_add!(va_sc_bc, vector_signed_char, vector_bool_char, vector_signed_char, vab); + impl_add!(va_uc_bc, vector_unsigned_char, vector_bool_char, vector_unsigned_char, vab); + impl_add!(va_sh_bh, vector_signed_short, vector_bool_short, vector_signed_short, vah); + impl_add!(va_uh_bh, vector_unsigned_short, vector_bool_short, vector_unsigned_short, vah); + impl_add!(va_sf_bf, vector_signed_int, vector_bool_int, vector_signed_int, vaf); + impl_add!(va_uf_bf, vector_unsigned_int, vector_bool_int, vector_unsigned_int, vaf); + impl_add!(va_sg_bg, vector_signed_long_long, vector_bool_long_long, vector_signed_long_long, vag); + impl_add!(va_ug_bg, vector_unsigned_long_long, vector_bool_long_long, vector_unsigned_long_long, vag); + + impl_add!(va_bc_sc, vector_bool_char, vector_signed_char, vector_signed_char, vab); + impl_add!(va_bc_uc, vector_bool_char, vector_unsigned_char, vector_unsigned_char, vab); + impl_add!(va_bh_sh, vector_bool_short, vector_signed_short, vector_signed_short, vah); + impl_add!(va_bh_uh, vector_bool_short, vector_unsigned_short, vector_unsigned_short, vah); + impl_add!(va_bf_sf, vector_bool_int, vector_signed_int, vector_signed_int, vaf); + impl_add!(va_bf_uf, vector_bool_int, vector_unsigned_int, vector_unsigned_int, vaf); + impl_add!(va_bg_sg, vector_bool_long_long, vector_signed_long_long, vector_signed_long_long, vag); + impl_add!(va_bg_ug, vector_bool_long_long, vector_unsigned_long_long, vector_unsigned_long_long, vag); + + impl_add!(va_double, vector_double, vfadb); + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vfasb))] + pub unsafe fn va_float(a: vector_float, b: vector_float) -> vector_float { + transmute(simd_add(a, b)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorAdd for vector_float { + type Result = Self; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_add(self, other: Self) -> Self::Result { + va_float(self, other) + } + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSub { + type Result; + unsafe fn vec_sub(self, other: Other) -> Self::Result; + } + + macro_rules! impl_sub { + ($name:ident, $a:ty, $instr:ident) => { + impl_sub!($name, $a, $a, $a, $instr); + }; + ($name:ident, $a:ty, $b:ty, $c:ty, $instr:ident) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $name(a: $a, b: $b) -> $c { + transmute(simd_sub(transmute(a), b)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSub<$b> for $a { + type Result = $c; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sub(self, other: $b) -> Self::Result { + $name(self, other) + } + } + }; + } + + #[rustfmt::skip] + mod impl_sub { + use super::*; + + impl_sub!(vs_sc, vector_signed_char, vsb); + impl_sub!(vs_uc, vector_unsigned_char, vsb); + impl_sub!(vs_sh, vector_signed_short, vsh); + impl_sub!(vs_uh, vector_unsigned_short, vsh); + impl_sub!(vs_sf, vector_signed_int, vsf); + impl_sub!(vs_uf, vector_unsigned_int, vsf); + impl_sub!(vs_sg, vector_signed_long_long, vsg); + impl_sub!(vs_ug, vector_unsigned_long_long, vsg); + + impl_sub!(vs_sc_bc, vector_signed_char, vector_bool_char, vector_signed_char, vsb); + impl_sub!(vs_uc_bc, vector_unsigned_char, vector_bool_char, vector_unsigned_char, vsb); + impl_sub!(vs_sh_bh, vector_signed_short, vector_bool_short, vector_signed_short, vsh); + impl_sub!(vs_uh_bh, vector_unsigned_short, vector_bool_short, vector_unsigned_short, vsh); + impl_sub!(vs_sf_bf, vector_signed_int, vector_bool_int, vector_signed_int, vsf); + impl_sub!(vs_uf_bf, vector_unsigned_int, vector_bool_int, vector_unsigned_int, vsf); + impl_sub!(vs_sg_bg, vector_signed_long_long, vector_bool_long_long, vector_signed_long_long, vsg); + impl_sub!(vs_ug_bg, vector_unsigned_long_long, vector_bool_long_long, vector_unsigned_long_long, vsg); + + impl_sub!(vs_bc_sc, vector_bool_char, vector_signed_char, vector_signed_char, vsb); + impl_sub!(vs_bc_uc, vector_bool_char, vector_unsigned_char, vector_unsigned_char, vsb); + impl_sub!(vs_bh_sh, vector_bool_short, vector_signed_short, vector_signed_short, vsh); + impl_sub!(vs_bh_uh, vector_bool_short, vector_unsigned_short, vector_unsigned_short, vsh); + impl_sub!(vs_bf_sf, vector_bool_int, vector_signed_int, vector_signed_int, vsf); + impl_sub!(vs_bf_uf, vector_bool_int, vector_unsigned_int, vector_unsigned_int, vsf); + impl_sub!(vs_bg_sg, vector_bool_long_long, vector_signed_long_long, vector_signed_long_long, vsg); + impl_sub!(vs_bg_ug, vector_bool_long_long, vector_unsigned_long_long, vector_unsigned_long_long, vsg); + + impl_sub!(vs_double, vector_double, vfsdb); + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vfssb))] + pub unsafe fn vs_float(a: vector_float, b: vector_float) -> vector_float { + transmute(simd_sub(a, b)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSub for vector_float { + type Result = Self; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sub(self, other: Self) -> Self::Result { + vs_float(self, other) + } + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMul { + unsafe fn vec_mul(self, b: Self) -> Self; + } + + macro_rules! impl_mul { + ($name:ident, $a:ty, std_simd) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMul for $a { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_mul(self, other: Self) -> Self { + transmute(simd_mul(transmute(self), other)) + } + } + }; + ($name:ident, $a:ty, $instr:ident) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $name(a: $a, b: $a) -> $a { + transmute(simd_mul(transmute(a), b)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMul for $a { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_mul(self, other: Self) -> Self { + $name(self, other) + } + } + }; + } + + #[rustfmt::skip] + mod impl_mul { + use super::*; + + impl_mul!(vml_sc, vector_signed_char, vmlb); + impl_mul!(vml_uc, vector_unsigned_char, vmlb); + impl_mul!(vml_sh, vector_signed_short, vmlhw); + impl_mul!(vml_uh, vector_unsigned_short, vmlhw); + impl_mul!(vml_sf, vector_signed_int, vmlf); + impl_mul!(vml_uf, vector_unsigned_int, vmlf); + impl_mul!(vml_sg, vector_signed_long_long, std_simd); + impl_mul!(vml_ug, vector_unsigned_long_long, std_simd); + + impl_mul!(vml_float, vector_float, std_simd); + impl_mul!(vml_double, vector_double, vfmdb); + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMax { + type Result; + unsafe fn vec_max(self, b: Other) -> Self::Result; + } + + test_impl! { vec_vmxsb (a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [vmxb, vmxb] } + test_impl! { vec_vmxsh (a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [vmxh, vmxh] } + test_impl! { vec_vmxsf (a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [vmxf, vmxf] } + test_impl! { vec_vmxsg (a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long [vmxg, vmxg] } + + test_impl! { vec_vmxslb (a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [vmxlb, vmxlb] } + test_impl! { vec_vmxslh (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [vmxlh, vmxlh] } + test_impl! { vec_vmxslf (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [vmxlf, vmxlf] } + test_impl! { vec_vmxslg (a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long [vmxlg, vmxlg] } + + impl_vec_trait! { [VectorMax vec_max] ~(vmxlb, vmxb, vmxlh, vmxh, vmxlf, vmxf, vmxlg, vmxg) } + + test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, "vector-enhancements-1" vfmaxsb ] } + test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, "vector-enhancements-1" vfmaxdb] } + + impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float); + impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMin { + type Result; + unsafe fn vec_min(self, b: Other) -> Self::Result; + } + + test_impl! { vec_vmnsb (a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [vmnb, vmnb] } + test_impl! { vec_vmnsh (a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [vmnh, vmnh] } + test_impl! { vec_vmnsf (a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [vmnf, vmnf] } + test_impl! { vec_vmnsg (a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long [vmng, vmng] } + + test_impl! { vec_vmnslb (a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [vmnlb, vmnlb] } + test_impl! { vec_vmnslh (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [vmnlh, vmnlh] } + test_impl! { vec_vmnslf (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [vmnlf, vmnlf] } + test_impl! { vec_vmnslg (a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long [vmnlg, vmnlg] } + + impl_vec_trait! { [VectorMin vec_min] ~(vmxlb, vmxb, vmxlh, vmxh, vmxlf, vmxf, vmxlg, vmxg) } + + test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, "vector-enhancements-1" vfminsb] } + test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, "vector-enhancements-1" vfmindb] } + + impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float); + impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorAbs { + unsafe fn vec_abs(self) -> Self; + } + + macro_rules! impl_abs { + ($name:ident, $ty:ident) => { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) { + v.vec_max(-v) + } + + impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) } + }; + } + + impl_abs! { vec_abs_i8, i8x16 } + impl_abs! { vec_abs_i16, i16x8 } + impl_abs! { vec_abs_i32, i32x4 } + impl_abs! { vec_abs_i64, i64x2 } + + test_impl! { vec_abs_f32 (v: vector_float) -> vector_float [ simd_fabs, "vector-enhancements-1" vflpsb ] } + test_impl! { vec_abs_f64 (v: vector_double) -> vector_double [ simd_fabs, vflpdb ] } + + impl_vec_trait! { [VectorAbs vec_abs] vec_abs_f32 (vector_float) } + impl_vec_trait! { [VectorAbs vec_abs] vec_abs_f64 (vector_double) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorNabs { + unsafe fn vec_nabs(self) -> Self; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-1"), + assert_instr(vflnsb) + )] + unsafe fn vec_nabs_f32(a: vector_float) -> vector_float { + simd_neg(simd_fabs(a)) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vflndb))] + unsafe fn vec_nabs_f64(a: vector_double) -> vector_double { + simd_neg(simd_fabs(a)) + } + + impl_vec_trait! { [VectorNabs vec_nabs] vec_nabs_f32 (vector_float) } + impl_vec_trait! { [VectorNabs vec_nabs] vec_nabs_f64 (vector_double) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorNmsub { + unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vfnmssb) + )] + unsafe fn vec_nmsub_f32(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + simd_neg(simd_fma(a, b, simd_neg(c))) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorNmsub for vector_float { + #[target_feature(enable = "vector")] + unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self { + vec_nmsub_f32(self, b, c) + } + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vfnmsdb) + )] + unsafe fn vec_nmsub_f64(a: vector_double, b: vector_double, c: vector_double) -> vector_double { + simd_neg(simd_fma(a, b, simd_neg(c))) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorNmsub for vector_double { + #[target_feature(enable = "vector")] + unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self { + vec_nmsub_f64(self, b, c) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorNmadd { + unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vfnmasb) + )] + unsafe fn vec_nmadd_f32(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + simd_neg(simd_fma(a, b, c)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorNmadd for vector_float { + #[target_feature(enable = "vector")] + unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self { + vec_nmadd_f32(self, b, c) + } + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vfnmadb) + )] + unsafe fn vec_nmadd_f64(a: vector_double, b: vector_double, c: vector_double) -> vector_double { + simd_neg(simd_fma(a, b, c)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorNmadd for vector_double { + #[target_feature(enable = "vector")] + unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self { + vec_nmadd_f64(self, b, c) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSplat { + unsafe fn vec_splat(self) -> Self; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vrepb, IMM2 = 1))] + unsafe fn vrepb(a: vector_signed_char) -> vector_signed_char { + static_assert_uimm_bits!(IMM2, 4); + simd_shuffle(a, a, const { u32x16::from_array([IMM2; 16]) }) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vreph, IMM2 = 1))] + unsafe fn vreph(a: vector_signed_short) -> vector_signed_short { + static_assert_uimm_bits!(IMM2, 3); + simd_shuffle(a, a, const { u32x8::from_array([IMM2; 8]) }) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vrepf, IMM2 = 1))] + unsafe fn vrepf(a: vector_signed_int) -> vector_signed_int { + static_assert_uimm_bits!(IMM2, 2); + simd_shuffle(a, a, const { u32x4::from_array([IMM2; 4]) }) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vrepg, IMM2 = 1))] + unsafe fn vrepg(a: vector_signed_long_long) -> vector_signed_long_long { + static_assert_uimm_bits!(IMM2, 1); + simd_shuffle(a, a, const { u32x2::from_array([IMM2; 2]) }) + } + + macro_rules! impl_vec_splat { + ($ty:ty, $fun:ident) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSplat for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_splat(self) -> Self { + transmute($fun::(transmute(self))) + } + } + }; + } + + impl_vec_splat! { vector_signed_char, vrepb } + impl_vec_splat! { vector_unsigned_char, vrepb } + impl_vec_splat! { vector_bool_char, vrepb } + impl_vec_splat! { vector_signed_short, vreph } + impl_vec_splat! { vector_unsigned_short, vreph } + impl_vec_splat! { vector_bool_short, vreph } + impl_vec_splat! { vector_signed_int, vrepf } + impl_vec_splat! { vector_unsigned_int, vrepf } + impl_vec_splat! { vector_bool_int, vrepf } + impl_vec_splat! { vector_signed_long_long, vrepg } + impl_vec_splat! { vector_unsigned_long_long, vrepg } + impl_vec_splat! { vector_bool_long_long, vrepg } + + impl_vec_splat! { vector_float, vrepf } + impl_vec_splat! { vector_double, vrepg } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSplats { + unsafe fn vec_splats(self) -> Output; + } + + macro_rules! impl_vec_splats { + ($(($fn:ident ($ty:ty, $shortty:tt) $instr:ident)),*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $fn(v: $ty) -> s_t_l!($shortty) { + transmute($shortty::splat(v)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSplats for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_splats(self) -> s_t_l!($shortty) { + $fn (self) + } + } + )* + } + } + + impl_vec_splats! { + (vec_splats_u8 (u8, u8x16) vrepb), + (vec_splats_i8 (i8, i8x16) vrepb), + (vec_splats_u16 (u16, u16x8) vreph), + (vec_splats_i16 (i16, i16x8) vreph), + (vec_splats_u32 (u32, u32x4) vrepf), + (vec_splats_i32 (i32, i32x4) vrepf), + (vec_splats_u64 (u64, u64x2) vlvgp), + (vec_splats_i64 (i64, i64x2) vlvgp), + (vec_splats_f32 (f32, f32x4) vrepf), + (vec_splats_f64 (f64, f64x2) vrepg) + } + + macro_rules! impl_bool_vec_splats { + ($(($ty:ty, $shortty:tt, $boolty:ty)),*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSplats<$boolty> for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_splats(self) -> $boolty { + transmute($shortty::splat(self)) + } + } + )* + } + } + + impl_bool_vec_splats! { + (u8, u8x16, vector_bool_char), + (i8, i8x16, vector_bool_char), + (u16, u16x8, vector_bool_short), + (i16, i16x8, vector_bool_short), + (u32, u32x4, vector_bool_int), + (i32, i32x4, vector_bool_int), + (u64, u64x2, vector_bool_long_long), + (i64, i64x2, vector_bool_long_long) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait CountBits { + type Result; + + unsafe fn vec_cntlz(self) -> Self::Result; + unsafe fn vec_cnttz(self) -> Self::Result; + unsafe fn vec_popcnt(self) -> Self::Result; + } + + macro_rules! impl_count_bits { + ($ty:tt) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl CountBits for $ty { + type Result = t_u!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cntlz(self) -> Self::Result { + transmute(simd_ctlz(self)) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cnttz(self) -> Self::Result { + transmute(simd_cttz(self)) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_popcnt(self) -> Self::Result { + transmute(simd_ctpop(self)) + } + } + }; + } + + impl_count_bits!(vector_signed_char); + impl_count_bits!(vector_unsigned_char); + impl_count_bits!(vector_signed_short); + impl_count_bits!(vector_unsigned_short); + impl_count_bits!(vector_signed_int); + impl_count_bits!(vector_unsigned_int); + impl_count_bits!(vector_signed_long_long); + impl_count_bits!(vector_unsigned_long_long); + + test_impl! { vec_clzb_signed +(a: vector_signed_char) -> vector_unsigned_char [simd_ctlz, vclzb] } + test_impl! { vec_clzh_signed +(a: vector_signed_short) -> vector_unsigned_short [simd_ctlz, vclzh] } + test_impl! { vec_clzf_signed +(a: vector_signed_int) -> vector_unsigned_int [simd_ctlz, vclzf] } + test_impl! { vec_clzg_signed +(a: vector_signed_long_long) -> vector_unsigned_long_long [simd_ctlz, vclzg] } + + test_impl! { vec_clzb_unsigned +(a: vector_unsigned_char) -> vector_unsigned_char [simd_ctlz, vclzb] } + test_impl! { vec_clzh_unsigned +(a: vector_unsigned_short) -> vector_unsigned_short [simd_ctlz, vclzh] } + test_impl! { vec_clzf_unsigned +(a: vector_unsigned_int) -> vector_unsigned_int [simd_ctlz, vclzf] } + test_impl! { vec_clzg_unsigned +(a: vector_unsigned_long_long) -> vector_unsigned_long_long [simd_ctlz, vclzg] } + + test_impl! { vec_ctzb_signed +(a: vector_signed_char) -> vector_unsigned_char [simd_cttz, vctzb] } + test_impl! { vec_ctzh_signed +(a: vector_signed_short) -> vector_unsigned_short [simd_cttz, vctzh] } + test_impl! { vec_ctzf_signed +(a: vector_signed_int) -> vector_unsigned_int [simd_cttz, vctzf] } + test_impl! { vec_ctzg_signed +(a: vector_signed_long_long) -> vector_unsigned_long_long [simd_cttz, vctzg] } + + test_impl! { vec_ctzb_unsigned +(a: vector_unsigned_char) -> vector_unsigned_char [simd_cttz, vctzb] } + test_impl! { vec_ctzh_unsigned +(a: vector_unsigned_short) -> vector_unsigned_short [simd_cttz, vctzh] } + test_impl! { vec_ctzf_unsigned +(a: vector_unsigned_int) -> vector_unsigned_int [simd_cttz, vctzf] } + test_impl! { vec_ctzg_unsigned +(a: vector_unsigned_long_long) -> vector_unsigned_long_long [simd_cttz, vctzg] } + + test_impl! { vec_vpopctb_signed +(a: vector_signed_char) -> vector_signed_char [simd_ctpop, vpopctb] } + test_impl! { vec_vpopcth_signed +(a: vector_signed_short) -> vector_signed_short [simd_ctpop, "vector-enhancements-1" vpopcth] } + test_impl! { vec_vpopctf_signed +(a: vector_signed_int) -> vector_signed_int [simd_ctpop, "vector-enhancements-1" vpopctf] } + test_impl! { vec_vpopctg_signed +(a: vector_signed_long_long) -> vector_signed_long_long [simd_ctpop, "vector-enhancements-1" vpopctg] } + + test_impl! { vec_vpopctb_unsigned +(a: vector_unsigned_char) -> vector_unsigned_char [simd_ctpop, vpopctb] } + test_impl! { vec_vpopcth_unsigned +(a: vector_unsigned_short) -> vector_unsigned_short [simd_ctpop, "vector-enhancements-1" vpopcth] } + test_impl! { vec_vpopctf_unsigned +(a: vector_unsigned_int) -> vector_unsigned_int [simd_ctpop, "vector-enhancements-1" vpopctf] } + test_impl! { vec_vpopctg_unsigned +(a: vector_unsigned_long_long) -> vector_unsigned_long_long [simd_ctpop, "vector-enhancements-1" vpopctg] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorAnd { + type Result; + unsafe fn vec_and(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorAnd vec_and] ~(simd_and) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorOr { + type Result; + unsafe fn vec_or(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorOr vec_or] ~(simd_or) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorXor { + type Result; + unsafe fn vec_xor(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorXor vec_xor] ~(simd_xor) } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vno))] + unsafe fn nor(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char { + let a: u8x16 = transmute(a); + let b: u8x16 = transmute(b); + transmute(simd_xor(simd_or(a, b), u8x16::splat(0xff))) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorNor { + type Result; + unsafe fn vec_nor(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorNor vec_nor]+ 2c (nor) } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vnn))] + unsafe fn nand(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char { + let a: u8x16 = transmute(a); + let b: u8x16 = transmute(b); + transmute(simd_xor(simd_and(a, b), u8x16::splat(0xff))) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorNand { + type Result; + unsafe fn vec_nand(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorNand vec_nand]+ 2c (nand) } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vnx))] + unsafe fn eqv(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char { + let a: u8x16 = transmute(a); + let b: u8x16 = transmute(b); + transmute(simd_xor(simd_xor(a, b), u8x16::splat(0xff))) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorEqv { + type Result; + unsafe fn vec_eqv(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorEqv vec_eqv]+ 2c (eqv) } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vnc))] + unsafe fn andc(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char { + let a = transmute(a); + let b = transmute(b); + transmute(simd_and(simd_xor(u8x16::splat(0xff), b), a)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorAndc { + type Result; + unsafe fn vec_andc(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorAndc vec_andc]+ 2c (andc) } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(voc))] + unsafe fn orc(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char { + let a = transmute(a); + let b = transmute(b); + transmute(simd_or(simd_xor(u8x16::splat(0xff), b), a)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorOrc { + type Result; + unsafe fn vec_orc(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorOrc vec_orc]+ 2c (orc) } + + test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32, "vector-enhancements-1" vfisb] } + test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] } + + // FIXME(llvm) llvm trunk already lowers roundeven to vfidb, but rust does not use it yet + // use https://godbolt.org/z/cWq95fexe to check, and enable the instruction test when it works + test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] } + test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] } + + test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [rint_v4f32, "vector-enhancements-1" vfisb] } + test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [rint_v2f64, vfidb] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorRoundc { + unsafe fn vec_roundc(self) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorRound { + unsafe fn vec_round(self) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorRint { + unsafe fn vec_rint(self) -> Self; + } + + impl_vec_trait! { [VectorRoundc vec_roundc] vec_roundc_f32 (vector_float) } + impl_vec_trait! { [VectorRoundc vec_roundc] vec_roundc_f64 (vector_double) } + + impl_vec_trait! { [VectorRound vec_round] vec_round_f32 (vector_float) } + impl_vec_trait! { [VectorRound vec_round] vec_round_f64 (vector_double) } + + impl_vec_trait! { [VectorRint vec_rint] vec_rint_f32 (vector_float) } + impl_vec_trait! { [VectorRint vec_rint] vec_rint_f64 (vector_double) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorTrunc { + // same as vec_roundz + unsafe fn vec_trunc(self) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorCeil { + // same as vec_roundp + unsafe fn vec_ceil(self) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFloor { + // same as vec_roundm + unsafe fn vec_floor(self) -> Self; + } + + impl_vec_trait! { [VectorTrunc vec_trunc] simd_trunc (vector_float) } + impl_vec_trait! { [VectorTrunc vec_trunc] simd_trunc (vector_double) } + + impl_vec_trait! { [VectorCeil vec_ceil] simd_ceil (vector_float) } + impl_vec_trait! { [VectorCeil vec_ceil] simd_ceil (vector_double) } + + impl_vec_trait! { [VectorFloor vec_floor] simd_floor (vector_float) } + impl_vec_trait! { [VectorFloor vec_floor] simd_floor (vector_double) } + + macro_rules! impl_vec_shift { + ([$Trait:ident $m:ident] ($b:ident, $h:ident, $w:ident, $g:ident)) => { + impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_unsigned_short) -> vector_signed_short } + impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_unsigned_int) -> vector_signed_int } + impl_vec_trait!{ [$Trait $m]+ $g (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_long_long } + impl_vec_trait!{ [$Trait $m]+ $g (vector_signed_long_long, vector_unsigned_long_long) -> vector_signed_long_long } + }; + } + + macro_rules! impl_shift { + ($fun:ident $intr:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($fun))] + unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { + let a = transmute(a); + // use the remainder of b by the width of a's elements to prevent UB + let b = simd_rem(transmute(b), ::splat($ty::BITS as $ty)); + + transmute($intr(a, b)) + } + }; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSl { + type Result; + unsafe fn vec_sl(self, b: Other) -> Self::Result; + } + + impl_shift! { veslvb simd_shl u8 } + impl_shift! { veslvh simd_shl u16 } + impl_shift! { veslvf simd_shl u32 } + impl_shift! { veslvg simd_shl u64 } + + impl_vec_shift! { [VectorSl vec_sl] (veslvb, veslvh, veslvf, veslvg) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSr { + type Result; + unsafe fn vec_sr(self, b: Other) -> Self::Result; + } + + impl_shift! { vesrlvb simd_shr u8 } + impl_shift! { vesrlvh simd_shr u16 } + impl_shift! { vesrlvf simd_shr u32 } + impl_shift! { vesrlvg simd_shr u64 } + + impl_vec_shift! { [VectorSr vec_sr] (vesrlvb, vesrlvh, vesrlvf, vesrlvg) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSra { + type Result; + unsafe fn vec_sra(self, b: Other) -> Self::Result; + } + + impl_shift! { vesravb simd_shr i8 } + impl_shift! { vesravh simd_shr i16 } + impl_shift! { vesravf simd_shr i32 } + impl_shift! { vesravg simd_shr i64 } + + impl_vec_shift! { [VectorSra vec_sra] (vesravb, vesravh, vesravf, vesravg) } + + macro_rules! impl_vec_shift_byte { + ([$trait:ident $m:ident] ($f:ident)) => { + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_char, vector_signed_char) -> vector_unsigned_char } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_char, vector_signed_char) -> vector_signed_char } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_short, vector_signed_short) -> vector_unsigned_short } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_short, vector_signed_short) -> vector_signed_short } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_short, vector_unsigned_short) -> vector_signed_short } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_int, vector_signed_int) -> vector_unsigned_int } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_int, vector_signed_int) -> vector_signed_int } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_int, vector_unsigned_int) -> vector_signed_int } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_long_long, vector_signed_long_long) -> vector_unsigned_long_long } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_long_long } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_long_long, vector_signed_long_long) -> vector_signed_long_long } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_long_long, vector_unsigned_long_long) -> vector_signed_long_long } + impl_vec_trait!{ [$trait $m]+ $f (vector_float, vector_signed_int) -> vector_float } + impl_vec_trait!{ [$trait $m]+ $f (vector_float, vector_unsigned_int) -> vector_float } + impl_vec_trait!{ [$trait $m]+ $f (vector_double, vector_signed_long_long) -> vector_double } + impl_vec_trait!{ [$trait $m]+ $f (vector_double, vector_unsigned_long_long) -> vector_double } + }; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSlb { + type Result; + unsafe fn vec_slb(self, b: Other) -> Self::Result; + } + + impl_vec_shift_byte! { [VectorSlb vec_slb] (vslb) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSrab { + type Result; + unsafe fn vec_srab(self, b: Other) -> Self::Result; + } + + impl_vec_shift_byte! { [VectorSrab vec_srab] (vsrab) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSrb { + type Result; + unsafe fn vec_srb(self, b: Other) -> Self::Result; + } + + impl_vec_shift_byte! { [VectorSrb vec_srb] (vsrlb) } + + macro_rules! impl_vec_shift_long { + ([$trait:ident $m:ident] ($f:ident)) => { + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_char, vector_unsigned_char) -> vector_signed_char } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_short, vector_unsigned_char) -> vector_unsigned_short } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_short, vector_unsigned_char) -> vector_signed_short } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_int, vector_unsigned_char) -> vector_unsigned_int } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_int, vector_unsigned_char) -> vector_signed_int } + impl_vec_trait!{ [$trait $m]+ $f (vector_unsigned_long_long, vector_unsigned_char) -> vector_unsigned_long_long } + impl_vec_trait!{ [$trait $m]+ $f (vector_signed_long_long, vector_unsigned_char) -> vector_signed_long_long } + }; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSrl { + type Result; + unsafe fn vec_srl(self, b: Other) -> Self::Result; + } + + impl_vec_shift_long! { [VectorSrl vec_srl] (vsrl) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSral { + type Result; + unsafe fn vec_sral(self, b: Other) -> Self::Result; + } + + impl_vec_shift_long! { [VectorSral vec_sral] (vsra) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSll { + type Result; + unsafe fn vec_sll(self, b: Other) -> Self::Result; + } + + impl_vec_shift_long! { [VectorSll vec_sll] (vsl) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorRl { + type Result; + unsafe fn vec_rl(self, b: Other) -> Self::Result; + } + + macro_rules! impl_rot { + ($fun:ident $intr:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($fun))] + unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) { + transmute($intr(transmute(a), transmute(a), transmute(b))) + } + }; + } + + impl_rot! { verllvb fshlb u8 } + impl_rot! { verllvh fshlh u16 } + impl_rot! { verllvf fshlf u32 } + impl_rot! { verllvg fshlg u64 } + + impl_vec_shift! { [VectorRl vec_rl] (verllvb, verllvh, verllvf, verllvg) } + + macro_rules! test_rot_imm { + ($fun:ident $instr:ident $intr:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + unsafe fn $fun(a: t_t_l!($ty), bits: core::ffi::c_ulong) -> t_t_l!($ty) { + // mod by the number of bits in a's element type to prevent UB + let bits = (bits % $ty::BITS as core::ffi::c_ulong) as $ty; + let a = transmute(a); + let b = ::splat(bits); + + transmute($intr(a, a, transmute(b))) + } + }; + } + + test_rot_imm! { verllvb_imm verllb fshlb u8 } + test_rot_imm! { verllvh_imm verllh fshlh u16 } + test_rot_imm! { verllvf_imm verllf fshlf u32 } + test_rot_imm! { verllvg_imm verllg fshlg u64 } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorRli { + unsafe fn vec_rli(self, bits: core::ffi::c_ulong) -> Self; + } + + macro_rules! impl_rot_imm { + ($($ty:ident, $intr:ident),*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorRli for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_rli(self, bits: core::ffi::c_ulong) -> Self { + transmute($intr(transmute(self), bits)) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorRli for t_u!($ty) { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_rli(self, bits: core::ffi::c_ulong) -> Self { + $intr(self, bits) + } + } + )* + } + } + + impl_rot_imm! { + vector_signed_char, verllvb_imm, + vector_signed_short, verllvh_imm, + vector_signed_int, verllvf_imm, + vector_signed_long_long, verllvg_imm + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorRlMask { + unsafe fn vec_rl_mask(self, other: Other) -> Self; + } + + macro_rules! impl_rl_mask { + ($($ty:ident, $intr:ident, $fun:ident),*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($intr, IMM8 = 6))] + unsafe fn $fun(a: $ty, b: t_u!($ty)) -> $ty { + // mod by the number of bits in a's element type to prevent UB + $intr(a, a, transmute(b), const { (IMM8 % ::BITS as u8) as i32 }) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorRlMask for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_rl_mask(self, other: t_u!($ty)) -> Self { + $fun::(self, other) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorRlMask for t_u!($ty) { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_rl_mask(self, other: t_u!($ty)) -> Self { + transmute($fun::(transmute(self), transmute(other))) + } + } + )* + } + } + + impl_rl_mask! { + vector_signed_char, verimb, test_verimb, + vector_signed_short, verimh, test_verimh, + vector_signed_int, verimf, test_verimf, + vector_signed_long_long, verimg, test_verimg + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorReve { + unsafe fn vec_reve(self) -> Self; + } + + #[repr(simd)] + struct ReverseMask([u32; N]); + + impl ReverseMask { + const fn new() -> Self { + let mut index = [0; N]; + let mut i = 0; + while i < N { + index[i] = (N - i - 1) as u32; + i += 1; + } + ReverseMask(index) + } + } + + macro_rules! impl_reve { + ($($ty:ident, $fun:ident, $instr:ident),*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + unsafe fn $fun(a: $ty) -> $ty { + const N: usize = core::mem::size_of::<$ty>() / core::mem::size_of::(); + simd_shuffle(a, a, const { ShuffleMask::::reverse() }) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorReve for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_reve(self) -> Self { + $fun(self) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorReve for t_u!($ty) { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_reve(self) -> Self { + transmute($fun(transmute(self))) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorReve for t_b!($ty) { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_reve(self) -> Self { + transmute($fun(transmute(self))) + } + } + )* + } + } + + impl_reve! { + vector_signed_char, reveb, vperm, + vector_signed_short, reveh, vperm, + vector_signed_int, revef, vperm, + vector_signed_long_long, reveg, vpdi + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorReve for vector_float { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_reve(self) -> Self { + transmute(transmute::<_, vector_signed_int>(self).vec_reve()) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorReve for vector_double { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_reve(self) -> Self { + transmute(transmute::<_, vector_signed_long_long>(self).vec_reve()) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorRevb { + unsafe fn vec_revb(self) -> Self; + } + + test_impl! { bswapb (a: vector_signed_char) -> vector_signed_char [simd_bswap, _] } + test_impl! { bswaph (a: vector_signed_short) -> vector_signed_short [simd_bswap, vperm] } + test_impl! { bswapf (a: vector_signed_int) -> vector_signed_int [simd_bswap, vperm] } + test_impl! { bswapg (a: vector_signed_long_long) -> vector_signed_long_long [simd_bswap, vperm] } + + impl_vec_trait! { [VectorRevb vec_revb]+ bswapb (vector_unsigned_char) } + impl_vec_trait! { [VectorRevb vec_revb]+ bswapb (vector_signed_char) } + impl_vec_trait! { [VectorRevb vec_revb]+ bswaph (vector_unsigned_short) } + impl_vec_trait! { [VectorRevb vec_revb]+ bswaph (vector_signed_short) } + impl_vec_trait! { [VectorRevb vec_revb]+ bswapf (vector_unsigned_int) } + impl_vec_trait! { [VectorRevb vec_revb]+ bswapf (vector_signed_int) } + impl_vec_trait! { [VectorRevb vec_revb]+ bswapg (vector_unsigned_long_long) } + impl_vec_trait! { [VectorRevb vec_revb]+ bswapg (vector_signed_long_long) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorRevb for vector_float { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_revb(self) -> Self { + transmute(transmute::<_, vector_signed_int>(self).vec_revb()) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorRevb for vector_double { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_revb(self) -> Self { + transmute(transmute::<_, vector_signed_long_long>(self).vec_revb()) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMergel { + unsafe fn vec_mergel(self, other: Self) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMergeh { + unsafe fn vec_mergeh(self, other: Self) -> Self; + } + + macro_rules! impl_merge { + ($($ty:ident, $mergel:ident, $mergeh:ident),*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($mergel))] + unsafe fn $mergel(a: $ty, b: $ty) -> $ty { + const N: usize = core::mem::size_of::<$ty>() / core::mem::size_of::(); + simd_shuffle(a, b, const { ShuffleMask::::merge_low() }) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMergel for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_mergel(self, other: Self) -> Self { + $mergel(self, other) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMergel for t_u!($ty) { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_mergel(self, other: Self) -> Self { + transmute($mergel(transmute(self), transmute(other))) + } + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($mergeh))] + unsafe fn $mergeh(a: $ty, b: $ty) -> $ty { + const N: usize = core::mem::size_of::<$ty>() / core::mem::size_of::(); + simd_shuffle(a, b, const { ShuffleMask::::merge_high() }) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMergeh for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_mergeh(self, other: Self) -> Self { + $mergeh(self, other) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMergeh for t_u!($ty) { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_mergeh(self, other: Self) -> Self { + transmute($mergeh(transmute(self), transmute(other))) + } + } + )* + } + } + + impl_merge! { + vector_signed_char, vmrlb, vmrhb, + vector_signed_short, vmrlh, vmrhh, + vector_signed_int, vmrlf, vmrhf, + vector_signed_long_long, vmrlg, vmrhg + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorPerm { + unsafe fn vec_perm(self, other: Self, c: vector_unsigned_char) -> Self; + } + + macro_rules! impl_merge { + ($($ty:ident),*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorPerm for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_perm(self, other: Self, c: vector_unsigned_char) -> Self { + transmute(vperm(transmute(self), transmute(other), c)) + } + } + )* + } + } + + impl_merge! { + vector_signed_char, + vector_signed_short, + vector_signed_int, + vector_signed_long_long, + vector_unsigned_char, + vector_unsigned_short, + vector_unsigned_int, + vector_unsigned_long_long, + vector_bool_char, + vector_bool_short, + vector_bool_int, + vector_bool_long_long, + vector_float, + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSumU128 { + unsafe fn vec_sum_u128(self, other: Self) -> vector_unsigned_char; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vsumqf))] + pub unsafe fn vec_vsumqf(a: vector_unsigned_int, b: vector_unsigned_int) -> u128 { + transmute(vsumqf(a, b)) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vsumqg))] + pub unsafe fn vec_vsumqg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> u128 { + transmute(vsumqg(a, b)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSumU128 for vector_unsigned_int { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sum_u128(self, other: Self) -> vector_unsigned_char { + transmute(vec_vsumqf(self, other)) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSumU128 for vector_unsigned_long_long { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sum_u128(self, other: Self) -> vector_unsigned_char { + transmute(vec_vsumqg(self, other)) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSum2 { + unsafe fn vec_sum2(self, other: Self) -> vector_unsigned_long_long; + } + + test_impl! { vec_vsumgh (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_long_long [vsumgh, vsumgh] } + test_impl! { vec_vsumgf (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [vsumgf, vsumgf] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSum2 for vector_unsigned_short { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sum2(self, other: Self) -> vector_unsigned_long_long { + vec_vsumgh(self, other) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSum2 for vector_unsigned_int { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sum2(self, other: Self) -> vector_unsigned_long_long { + vec_vsumgf(self, other) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSum4 { + unsafe fn vec_sum4(self, other: Self) -> vector_unsigned_int; + } + + test_impl! { vec_vsumb (a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_int [vsumb, vsumb] } + test_impl! { vec_vsumh (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int [vsumh, vsumh] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSum4 for vector_unsigned_char { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sum4(self, other: Self) -> vector_unsigned_int { + vec_vsumb(self, other) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSum4 for vector_unsigned_short { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sum4(self, other: Self) -> vector_unsigned_int { + vec_vsumh(self, other) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSubc { + type Result; + unsafe fn vec_subc(self, b: Other) -> Self::Result; + } + + test_impl! { vec_vscbib (a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [vscbib, vscbib] } + test_impl! { vec_vscbih (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [vscbih, vscbih] } + test_impl! { vec_vscbif (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [vscbif, vscbif] } + test_impl! { vec_vscbig (a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long [vscbig, vscbig] } + + impl_vec_trait! {[VectorSubc vec_subc] vec_vscbib (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char } + impl_vec_trait! {[VectorSubc vec_subc] vec_vscbih (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short } + impl_vec_trait! {[VectorSubc vec_subc] vec_vscbif (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int } + impl_vec_trait! {[VectorSubc vec_subc] vec_vscbig (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_long_long } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSqrt { + unsafe fn vec_sqrt(self) -> Self; + } + + test_impl! { vec_sqrt_f32 (v: vector_float) -> vector_float [ simd_fsqrt, "vector-enhancements-1" vfsqsb ] } + test_impl! { vec_sqrt_f64 (v: vector_double) -> vector_double [ simd_fsqrt, vfsqdb ] } + + impl_vec_trait! { [VectorSqrt vec_sqrt] vec_sqrt_f32 (vector_float) } + impl_vec_trait! { [VectorSqrt vec_sqrt] vec_sqrt_f64 (vector_double) } + + macro_rules! vfae_wrapper { + ($($name:ident $ty:ident)*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($name, IMM = 0))] + unsafe fn $name( + a: $ty, + b: $ty, + ) -> $ty { + super::$name(a, b, IMM) + } + )* + } + } + + vfae_wrapper! { + vfaeb vector_signed_char + vfaeh vector_signed_short + vfaef vector_signed_int + + vfaezb vector_signed_char + vfaezh vector_signed_short + vfaezf vector_signed_int + } + + macro_rules! impl_vfae { + ([idx_cc $Trait:ident $m:ident] $imm:ident $b:ident $h:ident $f:ident) => { + impl_vfae! { [idx_cc $Trait $m] $imm + $b vector_signed_char vector_signed_char + $b vector_unsigned_char vector_unsigned_char + $b vector_bool_char vector_unsigned_char + + $h vector_signed_short vector_signed_short + $h vector_unsigned_short vector_unsigned_short + $h vector_bool_short vector_unsigned_short + + $f vector_signed_int vector_signed_int + $f vector_unsigned_int vector_unsigned_int + $f vector_bool_int vector_unsigned_int + } + }; + ([idx_cc $Trait:ident $m:ident] $imm:ident $($fun:ident $ty:ident $r:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $ty { + type Result = $r; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b)); + (transmute(x), y) + } + } + )* + }; + ([cc $Trait:ident $m:ident] $imm:ident $b:ident $h:ident $f:ident) => { + impl_vfae! { [cc $Trait $m] $imm + $b vector_signed_char + $b vector_unsigned_char + $b vector_bool_char + + $h vector_signed_short + $h vector_unsigned_short + $h vector_bool_short + + $f vector_signed_int + $f vector_unsigned_int + $f vector_bool_int + } + }; + ([cc $Trait:ident $m:ident] $imm:ident $($fun:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $ty { + type Result = t_b!($ty); + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b)); + (transmute(x), y) + } + } + )* + }; + ([idx $Trait:ident $m:ident] $imm:ident $b:ident $h:ident $f:ident) => { + impl_vfae! { [idx $Trait $m] $imm + $b vector_signed_char vector_signed_char + $b vector_unsigned_char vector_unsigned_char + $b vector_bool_char vector_unsigned_char + + $h vector_signed_short vector_signed_short + $h vector_unsigned_short vector_unsigned_short + $h vector_bool_short vector_unsigned_short + + $f vector_signed_int vector_signed_int + $f vector_unsigned_int vector_unsigned_int + $f vector_bool_int vector_unsigned_int + } + }; + ([idx $Trait:ident $m:ident] $imm:ident $($fun:ident $ty:ident $r:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $ty { + type Result = $r; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: Self) -> Self::Result { + transmute($fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b))) + } + } + )* + }; + ([$Trait:ident $m:ident] $imm:ident $b:ident $h:ident $f:ident) => { + impl_vfae! { [$Trait $m] $imm + $b vector_signed_char + $b vector_unsigned_char + $b vector_bool_char + + $h vector_signed_short + $h vector_unsigned_short + $h vector_bool_short + + $f vector_signed_int + $f vector_unsigned_int + $f vector_bool_int + } + }; + ([$Trait:ident $m:ident] $imm:ident $($fun:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $ty { + type Result = t_b!($ty); + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: Self) -> Self::Result { + transmute($fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b))) + } + } + )* + }; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyEq { + type Result; + unsafe fn vec_find_any_eq(self, other: Other) -> Self::Result; + } + + impl_vfae! { [VectorFindAnyEq vec_find_any_eq] Eq vfaeb vfaeh vfaef } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyNe { + type Result; + unsafe fn vec_find_any_ne(self, other: Other) -> Self::Result; + } + + impl_vfae! { [VectorFindAnyNe vec_find_any_ne] Ne vfaeb vfaeh vfaef } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyEqOrZeroIdx { + type Result; + unsafe fn vec_find_any_eq_or_0_idx(self, other: Other) -> Self::Result; + } + + impl_vfae! { [idx VectorFindAnyEqOrZeroIdx vec_find_any_eq_or_0_idx] EqIdx + vfaezb vector_signed_char vector_signed_char + vfaezb vector_unsigned_char vector_unsigned_char + vfaezb vector_bool_char vector_unsigned_char + + vfaezh vector_signed_short vector_signed_short + vfaezh vector_unsigned_short vector_unsigned_short + vfaezh vector_bool_short vector_unsigned_short + + vfaezf vector_signed_int vector_signed_int + vfaezf vector_unsigned_int vector_unsigned_int + vfaezf vector_bool_int vector_unsigned_int + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyNeOrZeroIdx { + type Result; + unsafe fn vec_find_any_ne_or_0_idx(self, other: Other) -> Self::Result; + } + + impl_vfae! { [idx VectorFindAnyNeOrZeroIdx vec_find_any_ne_or_0_idx] NeIdx + vfaezb vector_signed_char vector_signed_char + vfaezb vector_unsigned_char vector_unsigned_char + vfaezb vector_bool_char vector_unsigned_char + + vfaezh vector_signed_short vector_signed_short + vfaezh vector_unsigned_short vector_unsigned_short + vfaezh vector_bool_short vector_unsigned_short + + vfaezf vector_signed_int vector_signed_int + vfaezf vector_unsigned_int vector_unsigned_int + vfaezf vector_bool_int vector_unsigned_int + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyEqIdx { + type Result; + unsafe fn vec_find_any_eq_idx(self, other: Other) -> Self::Result; + } + + impl_vfae! { [idx VectorFindAnyEqIdx vec_find_any_eq_idx] EqIdx vfaeb vfaeh vfaef } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyNeIdx { + type Result; + unsafe fn vec_find_any_ne_idx(self, other: Other) -> Self::Result; + } + + impl_vfae! { [idx VectorFindAnyNeIdx vec_find_any_ne_idx] NeIdx vfaeb vfaeh vfaef } + + macro_rules! vfaes_wrapper { + ($($name:ident $ty:ident)*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($name, IMM = 0))] + unsafe fn $name( + a: $ty, + b: $ty, + ) -> PackedTuple<$ty, i32> { + super::$name(a, b, IMM) + } + )* + } + } + + vfaes_wrapper! { + vfaebs vector_signed_char + vfaehs vector_signed_short + vfaefs vector_signed_int + + vfaezbs vector_signed_char + vfaezhs vector_signed_short + vfaezfs vector_signed_int + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyEqCC { + type Result; + unsafe fn vec_find_any_eq_cc(self, other: Other) -> (Self::Result, i32); + } + + impl_vfae! { [cc VectorFindAnyEqCC vec_find_any_eq_cc] Eq vfaebs vfaehs vfaefs } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyNeCC { + type Result; + unsafe fn vec_find_any_ne_cc(self, other: Other) -> (Self::Result, i32); + } + + impl_vfae! { [cc VectorFindAnyNeCC vec_find_any_ne_cc] Ne vfaebs vfaehs vfaefs } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyEqIdxCC { + type Result; + unsafe fn vec_find_any_eq_idx_cc(self, other: Other) -> (Self::Result, i32); + } + + impl_vfae! { [idx_cc VectorFindAnyEqIdxCC vec_find_any_eq_idx_cc] EqIdx vfaebs vfaehs vfaefs } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyNeIdxCC { + type Result; + unsafe fn vec_find_any_ne_idx_cc(self, other: Other) -> (Self::Result, i32); + } + + impl_vfae! { [idx_cc VectorFindAnyNeIdxCC vec_find_any_ne_idx_cc] NeIdx vfaebs vfaehs vfaefs } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyEqOrZeroIdxCC { + type Result; + unsafe fn vec_find_any_eq_or_0_idx_cc(self, other: Other) -> (Self::Result, i32); + } + + impl_vfae! { [idx_cc VectorFindAnyEqOrZeroIdxCC vec_find_any_eq_or_0_idx_cc] EqIdx vfaezbs vfaezhs vfaezfs } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFindAnyNeOrZeroIdxCC { + type Result; + unsafe fn vec_find_any_ne_or_0_idx_cc(self, other: Other) -> (Self::Result, i32); + } + + impl_vfae! { [idx_cc VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc] NeIdx vfaezbs vfaezhs vfaezfs } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vl))] + unsafe fn test_vector_load(offset: isize, ptr: *const i32) -> vector_signed_int { + ptr.byte_offset(offset) + .cast::() + .read_unaligned() + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vst))] + unsafe fn test_vector_store(vector: vector_signed_int, offset: isize, ptr: *mut i32) { + ptr.byte_offset(offset) + .cast::() + .write_unaligned(vector) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorLoad: Sized { + type ElementType; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_xl(offset: isize, ptr: *const Self::ElementType) -> Self { + ptr.byte_offset(offset).cast::().read_unaligned() + } + + unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self; + + unsafe fn vec_load_bndry( + ptr: *const Self::ElementType, + ) -> MaybeUninit; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorStore: Sized { + type ElementType; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_xst(self, offset: isize, ptr: *mut Self::ElementType) { + ptr.byte_offset(offset).cast::().write_unaligned(self) + } + + unsafe fn vec_store_len(self, ptr: *mut Self::ElementType, byte_count: u32); + } + + macro_rules! impl_load_store { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorLoad for t_t_l!($ty) { + type ElementType = $ty; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self { + transmute(vll( byte_count, ptr.cast(),)) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_load_bndry(ptr: *const Self::ElementType) -> MaybeUninit { + transmute(vlbb(ptr.cast(), const { validate_block_boundary(BLOCK_BOUNDARY) })) + } + + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorStore for t_t_l!($ty) { + type ElementType = $ty; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_store_len(self, ptr: *mut Self::ElementType, byte_count: u32) { + vstl(transmute(self), byte_count, ptr.cast()) + } + } + )* + } + } + + impl_load_store! { i8 u8 i16 u16 i32 u32 i64 u64 f32 f64 } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vll))] + unsafe fn test_vec_load_len(ptr: *const i32, byte_count: u32) -> vector_signed_int { + vector_signed_int::vec_load_len(ptr, byte_count) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr("vlbb"))] + unsafe fn test_vec_load_bndry(ptr: *const i32) -> MaybeUninit { + vector_signed_int::vec_load_bndry::<512>(ptr) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vst))] + unsafe fn test_vec_store_len(vector: vector_signed_int, ptr: *mut i32, byte_count: u32) { + vector.vec_store_len(ptr, byte_count) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorLoadPair: Sized { + type ElementType; + + unsafe fn vec_load_pair(a: Self::ElementType, b: Self::ElementType) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorLoadPair for vector_signed_long_long { + type ElementType = i64; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_load_pair(a: i64, b: i64) -> Self { + vector_signed_long_long([a, b]) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorLoadPair for vector_unsigned_long_long { + type ElementType = u64; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_load_pair(a: u64, b: u64) -> Self { + vector_unsigned_long_long([a, b]) + } + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn pack(a: T, b: T) -> T { + simd_shuffle(a, b, const { ShuffleMask::::pack() }) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpkh))] + unsafe fn vpkh(a: i16x8, b: i16x8) -> i8x16 { + let a: i8x16 = transmute(a); + let b: i8x16 = transmute(b); + simd_shuffle(a, b, const { ShuffleMask::<16>::pack() }) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpkf))] + unsafe fn vpkf(a: i32x4, b: i32x4) -> i16x8 { + let a: i16x8 = transmute(a); + let b: i16x8 = transmute(b); + simd_shuffle(a, b, const { ShuffleMask::<8>::pack() }) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpkg))] + unsafe fn vpkg(a: i64x2, b: i64x2) -> i32x4 { + let a: i32x4 = transmute(a); + let b: i32x4 = transmute(b); + simd_shuffle(a, b, const { ShuffleMask::<4>::pack() }) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorPack { + type Result; + unsafe fn vec_pack(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_signed_short, vector_signed_short) -> vector_signed_char } + impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_bool_short, vector_bool_short) -> vector_bool_char } + impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_signed_int, vector_signed_int) -> vector_signed_short } + impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_bool_int, vector_bool_int) -> vector_bool_short } + impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int } + impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int } + impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_bool_long_long, vector_bool_long_long) -> vector_bool_int } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacks { + type Result; + unsafe fn vec_packs(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorPacks vec_packs] vpksh (vector_signed_short, vector_signed_short) -> vector_signed_char } + impl_vec_trait! { [VectorPacks vec_packs] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacks vec_packs] vpksf (vector_signed_int, vector_signed_int) -> vector_signed_short } + impl_vec_trait! { [VectorPacks vec_packs] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacks vec_packs] vpksg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int } + impl_vec_trait! { [VectorPacks vec_packs] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacksu { + type Result; + unsafe fn vec_packsu(self, b: Other) -> Self::Result; + } + + unsafe fn simd_smax(a: T, b: T) -> T { + simd_select::(simd_gt::(a, b), a, b) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpklsh))] + unsafe fn vpacksuh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char { + vpklsh( + simd_smax(a, vector_signed_short([0; 8])), + simd_smax(b, vector_signed_short([0; 8])), + ) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpklsf))] + unsafe fn vpacksuf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short { + vpklsf( + simd_smax(a, vector_signed_int([0; 4])), + simd_smax(b, vector_signed_int([0; 4])), + ) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpklsg))] + unsafe fn vpacksug( + a: vector_signed_long_long, + b: vector_signed_long_long, + ) -> vector_unsigned_int { + vpklsg( + simd_smax(a, vector_signed_long_long([0; 2])), + simd_smax(b, vector_signed_long_long([0; 2])), + ) + } + + impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuh (vector_signed_short, vector_signed_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacksu vec_packsu] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuf (vector_signed_int, vector_signed_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacksu vec_packsu] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacksu vec_packsu] vpacksug (vector_signed_long_long, vector_signed_long_long) -> vector_unsigned_int } + impl_vec_trait! { [VectorPacksu vec_packsu] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int } + + macro_rules! impl_vector_packs_cc { + ($($intr:ident $ty:ident $outty:ident)*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($intr))] + unsafe fn $intr( + a: $ty, + b: $ty, + ) -> ($outty, i32) { + let PackedTuple { x, y } = super::$intr(a, b); + (x, y) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorPacksCC for $ty { + type Result = $outty; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_packs_cc(self, b: Self) -> (Self::Result, i32) { + $intr(self, b) + } + } + )* + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacksCC { + type Result; + unsafe fn vec_packs_cc(self, b: Self) -> (Self::Result, i32); + } + + impl_vector_packs_cc! { + vpkshs vector_signed_short vector_signed_char + vpklshs vector_unsigned_short vector_unsigned_char + vpksfs vector_signed_int vector_signed_short + vpklsfs vector_unsigned_int vector_unsigned_short + vpksgs vector_signed_long_long vector_signed_int + vpklsgs vector_unsigned_long_long vector_unsigned_int + } + + macro_rules! impl_vector_packsu_cc { + ($($intr:ident $ty:ident $outty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorPacksuCC for $ty { + type Result = $outty; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_packsu_cc(self, b: Self) -> (Self::Result, i32) { + $intr(self, b) + } + } + )* + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacksuCC { + type Result; + unsafe fn vec_packsu_cc(self, b: Self) -> (Self::Result, i32); + } + + impl_vector_packsu_cc! { + vpklshs vector_unsigned_short vector_unsigned_char + vpklsfs vector_unsigned_int vector_unsigned_short + vpklsgs vector_unsigned_long_long vector_unsigned_int + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMadd { + unsafe fn vec_madd(self, b: Self, c: Self) -> Self; + unsafe fn vec_msub(self, b: Self, c: Self) -> Self; + } + + test_impl! { vfmasb (a: vector_float, b: vector_float, c: vector_float) -> vector_float [simd_fma, "vector-enhancements-1" vfmasb] } + test_impl! { vfmadb (a: vector_double, b: vector_double, c: vector_double) -> vector_double [simd_fma, vfmadb] } + + #[inline] + unsafe fn simd_fms(a: T, b: T, c: T) -> T { + simd_fma(a, b, simd_neg(c)) + } + + test_impl! { vfmssb (a: vector_float, b: vector_float, c: vector_float) -> vector_float [simd_fms, "vector-enhancements-1" vfmssb] } + test_impl! { vfmsdb (a: vector_double, b: vector_double, c: vector_double) -> vector_double [simd_fms, vfmsdb] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMadd for vector_float { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_madd(self, b: Self, c: Self) -> Self { + vfmasb(self, b, c) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_msub(self, b: Self, c: Self) -> Self { + vfmssb(self, b, c) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMadd for vector_double { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_madd(self, b: Self, c: Self) -> Self { + vfmadb(self, b, c) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_msub(self, b: Self, c: Self) -> Self { + vfmsdb(self, b, c) + } + } + + macro_rules! impl_vec_unpack { + ($mask:ident $instr:ident $src:ident $shuffled:ident $dst:ident $width:literal) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + unsafe fn $instr(a: $src) -> $dst { + simd_as(simd_shuffle::<_, _, $shuffled>( + a, + a, + const { ShuffleMask::<$width>::$mask() }, + )) + } + }; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorUnpackh { + type Result; + unsafe fn vec_unpackh(self) -> Self::Result; + } + + impl_vec_unpack!(unpack_high vuphb vector_signed_char i8x8 vector_signed_short 8); + impl_vec_unpack!(unpack_high vuphh vector_signed_short i16x4 vector_signed_int 4); + impl_vec_unpack!(unpack_high vuphf vector_signed_int i32x2 vector_signed_long_long 2); + + impl_vec_unpack!(unpack_high vuplhb vector_unsigned_char u8x8 vector_unsigned_short 8); + impl_vec_unpack!(unpack_high vuplhh vector_unsigned_short u16x4 vector_unsigned_int 4); + impl_vec_unpack!(unpack_high vuplhf vector_unsigned_int u32x2 vector_unsigned_long_long 2); + + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphb (vector_signed_char) -> vector_signed_short} + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphh (vector_signed_short) -> vector_signed_int} + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphf (vector_signed_int) -> vector_signed_long_long} + + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhb (vector_unsigned_char) -> vector_unsigned_short} + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhh (vector_unsigned_short) -> vector_unsigned_int} + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhf (vector_unsigned_int) -> vector_unsigned_long_long} + + impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhb (vector_bool_char) -> vector_bool_short} + impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhh (vector_bool_short) -> vector_bool_int} + impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhf (vector_bool_int) -> vector_bool_long_long} + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorUnpackl { + type Result; + unsafe fn vec_unpackl(self) -> Self::Result; + } + + // FIXME(llvm): a shuffle + simd_as does not currently optimize into a single instruction like + // unpachk above. Tracked in https://github.com/llvm/llvm-project/issues/129576. + + impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplb (vector_signed_char) -> vector_signed_short} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplhw (vector_signed_short) -> vector_signed_int} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplf (vector_signed_int) -> vector_signed_long_long} + + impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllb (vector_unsigned_char) -> vector_unsigned_short} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllh (vector_unsigned_short) -> vector_unsigned_int} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllf (vector_unsigned_int) -> vector_unsigned_long_long} + + impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllb (vector_bool_char) -> vector_bool_short} + impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllh (vector_bool_short) -> vector_bool_int} + impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllf (vector_bool_int) -> vector_bool_long_long} + + test_impl! { vec_vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vavgb, vavgb ] } + test_impl! { vec_vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vavgh, vavgh ] } + test_impl! { vec_vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vavgf, vavgf ] } + test_impl! { vec_vavgg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long [ vavgg, vavgg ] } + + test_impl! { vec_vavglb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vavglb, vavglb ] } + test_impl! { vec_vavglh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vavglh, vavglh ] } + test_impl! { vec_vavglf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vavglf, vavglf ] } + test_impl! { vec_vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long [ vavglg, vavglg ] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorAvg { + type Result; + unsafe fn vec_avg(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorAvg vec_avg] 2 (vec_vavglb, vec_vavgb, vec_vavglh, vec_vavgh, vec_vavglf, vec_vavgf, vec_vavglg, vec_vavgg) } + + macro_rules! impl_mul { + ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait<$r> for $a { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: $b) -> $r { + $fun(transmute(self), transmute(b)) + } + } + }; + ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty, $c:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: $b, c: $c) -> $r { + $fun(self, b, c) + } + } + }; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMule { + unsafe fn vec_mule(self, b: Self) -> Result; + } + + // FIXME(llvm) sadly this does not yet work https://github.com/llvm/llvm-project/issues/129705 + // #[target_feature(enable = "vector")] + // #[cfg_attr(test, assert_instr(vmleh))] + // unsafe fn vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int { + // let even_a: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>( + // a, + // a, + // const { ShuffleMask([0, 2, 4, 6]) }, + // )); + // + // let even_b: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>( + // b, + // b, + // const { ShuffleMask([0, 2, 4, 6]) }, + // )); + // + // simd_mul(even_a, even_b) + // } + + test_impl! { vec_vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmeb, vmeb ] } + test_impl! { vec_vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmeh, vmeh ] } + test_impl! { vec_vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmef, vmef ] } + + test_impl! { vec_vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmleb, vmleb ] } + test_impl! { vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmleh, vmleh ] } + test_impl! { vec_vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlef, vmlef ] } + + impl_mul!([VectorMule vec_mule] vec_vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short ); + impl_mul!([VectorMule vec_mule] vec_vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int); + impl_mul!([VectorMule vec_mule] vec_vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long ); + + impl_mul!([VectorMule vec_mule] vec_vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); + impl_mul!([VectorMule vec_mule] vec_vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); + impl_mul!([VectorMule vec_mule] vec_vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMulo { + unsafe fn vec_mulo(self, b: Self) -> Result; + } + + test_impl! { vec_vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmob, vmob ] } + test_impl! { vec_vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmoh, vmoh ] } + test_impl! { vec_vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmof, vmof ] } + + test_impl! { vec_vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmlob, vmlob ] } + test_impl! { vec_vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmloh, vmloh ] } + test_impl! { vec_vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlof, vmlof ] } + + impl_mul!([VectorMulo vec_mulo] vec_vmob (vector_signed_char, vector_signed_char) -> vector_signed_short ); + impl_mul!([VectorMulo vec_mulo] vec_vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int); + impl_mul!([VectorMulo vec_mulo] vec_vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long ); + + impl_mul!([VectorMulo vec_mulo] vec_vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); + impl_mul!([VectorMulo vec_mulo] vec_vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); + impl_mul!([VectorMulo vec_mulo] vec_vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMulh { + unsafe fn vec_mulh(self, b: Self) -> Result; + } + + test_impl! { vec_vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vmhb, vmhb ] } + test_impl! { vec_vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vmhh, vmhh ] } + test_impl! { vec_vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vmhf, vmhf ] } + + test_impl! { vec_vmlhb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vmlhb, vmlhb ] } + test_impl! { vec_vmlhh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vmlhh, vmlhh ] } + test_impl! { vec_vmlhf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vmlhf, vmlhf ] } + + impl_mul!([VectorMulh vec_mulh] vec_vmhb (vector_signed_char, vector_signed_char) -> vector_signed_char); + impl_mul!([VectorMulh vec_mulh] vec_vmhh (vector_signed_short, vector_signed_short) -> vector_signed_short); + impl_mul!([VectorMulh vec_mulh] vec_vmhf (vector_signed_int, vector_signed_int) -> vector_signed_int); + + impl_mul!([VectorMulh vec_mulh] vec_vmlhb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char); + impl_mul!([VectorMulh vec_mulh] vec_vmlhh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short); + impl_mul!([VectorMulh vec_mulh] vec_vmlhf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMeadd { + type Result; + unsafe fn vec_meadd(self, b: Self, c: Self::Result) -> Self::Result; + } + + test_impl! { vec_vmaeb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short [ vmaeb, vmaeb ] } + test_impl! { vec_vmaeh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int[ vmaeh, vmaeh ] } + test_impl! { vec_vmaef(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long [ vmaef, vmaef ] } + + test_impl! { vec_vmaleb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vmaleb, vmaleb ] } + test_impl! { vec_vmaleh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vmaleh, vmaleh ] } + test_impl! { vec_vmalef(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vmalef, vmalef ] } + + impl_mul!([VectorMeadd vec_meadd] vec_vmaeb (vector_signed_char, vector_signed_char, vector_signed_short) -> vector_signed_short ); + impl_mul!([VectorMeadd vec_meadd] vec_vmaeh (vector_signed_short, vector_signed_short, vector_signed_int) -> vector_signed_int); + impl_mul!([VectorMeadd vec_meadd] vec_vmaef (vector_signed_int, vector_signed_int, vector_signed_long_long) -> vector_signed_long_long ); + + impl_mul!([VectorMeadd vec_meadd] vec_vmaleb (vector_unsigned_char, vector_unsigned_char, vector_unsigned_short) -> vector_unsigned_short ); + impl_mul!([VectorMeadd vec_meadd] vec_vmaleh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_int) -> vector_unsigned_int); + impl_mul!([VectorMeadd vec_meadd] vec_vmalef (vector_unsigned_int, vector_unsigned_int, vector_unsigned_long_long) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMoadd { + type Result; + unsafe fn vec_moadd(self, b: Self, c: Self::Result) -> Self::Result; + } + + test_impl! { vec_vmaob(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short [ vmaob, vmaob ] } + test_impl! { vec_vmaoh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int[ vmaoh, vmaoh ] } + test_impl! { vec_vmaof(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long [ vmaof, vmaof ] } + + test_impl! { vec_vmalob(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vmalob, vmalob ] } + test_impl! { vec_vmaloh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vmaloh, vmaloh ] } + test_impl! { vec_vmalof(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vmalof, vmalof ] } + + impl_mul!([VectorMoadd vec_moadd] vec_vmaob (vector_signed_char, vector_signed_char, vector_signed_short) -> vector_signed_short ); + impl_mul!([VectorMoadd vec_moadd] vec_vmaoh (vector_signed_short, vector_signed_short, vector_signed_int) -> vector_signed_int); + impl_mul!([VectorMoadd vec_moadd] vec_vmaof (vector_signed_int, vector_signed_int, vector_signed_long_long) -> vector_signed_long_long ); + + impl_mul!([VectorMoadd vec_moadd] vec_vmalob (vector_unsigned_char, vector_unsigned_char, vector_unsigned_short) -> vector_unsigned_short ); + impl_mul!([VectorMoadd vec_moadd] vec_vmaloh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_int) -> vector_unsigned_int); + impl_mul!([VectorMoadd vec_moadd] vec_vmalof (vector_unsigned_int, vector_unsigned_int, vector_unsigned_long_long) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMhadd { + type Result; + unsafe fn vec_mhadd(self, b: Self, c: Self::Result) -> Self::Result; + } + + test_impl! { vec_vmahb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [ vmahb, vmahb ] } + test_impl! { vec_vmahh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[ vmahh, vmahh ] } + test_impl! { vec_vmahf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [ vmahf, vmahf ] } + + test_impl! { vec_vmalhb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [ vmalhb, vmalhb ] } + test_impl! { vec_vmalhh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[ vmalhh, vmalhh ] } + test_impl! { vec_vmalhf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [ vmalhf, vmalhf ] } + + impl_mul!([VectorMhadd vec_mhadd] vec_vmahb (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char ); + impl_mul!([VectorMhadd vec_mhadd] vec_vmahh (vector_signed_short, vector_signed_short, vector_signed_short) -> vector_signed_short); + impl_mul!([VectorMhadd vec_mhadd] vec_vmahf (vector_signed_int, vector_signed_int, vector_signed_int) -> vector_signed_int ); + + impl_mul!([VectorMhadd vec_mhadd] vec_vmalhb (vector_unsigned_char, vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char ); + impl_mul!([VectorMhadd vec_mhadd] vec_vmalhh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short); + impl_mul!([VectorMhadd vec_mhadd] vec_vmalhf (vector_unsigned_int, vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMladd { + type Result; + unsafe fn vec_mladd(self, b: Self, c: Self::Result) -> Self::Result; + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn simd_mladd(a: T, b: T, c: T) -> T { + simd_add(simd_mul(a, b), c) + } + + test_impl! { vec_vmal_ib(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [simd_mladd, vmalb ] } + test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalh ] } + test_impl! { vec_vmal_if(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [simd_mladd, vmalf ] } + + test_impl! { vec_vmal_ub(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [simd_mladd, vmalb ] } + test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalh ] } + test_impl! { vec_vmal_uf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [simd_mladd, vmalf ] } + + impl_mul!([VectorMladd vec_mladd] vec_vmal_ib (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char ); + impl_mul!([VectorMladd vec_mladd] vec_vmal_ih (vector_signed_short, vector_signed_short, vector_signed_short) -> vector_signed_short); + impl_mul!([VectorMladd vec_mladd] vec_vmal_if (vector_signed_int, vector_signed_int, vector_signed_int) -> vector_signed_int ); + + impl_mul!([VectorMladd vec_mladd] vec_vmal_ub (vector_unsigned_char, vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char ); + impl_mul!([VectorMladd vec_mladd] vec_vmal_uh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short); + impl_mul!([VectorMladd vec_mladd] vec_vmal_uf (vector_unsigned_int, vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorGfmsum { + unsafe fn vec_gfmsum(self, b: Self) -> Result; + } + + test_impl! { vec_vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vgfmb, vgfmb ] } + test_impl! { vec_vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vgfmh, vgfmh] } + test_impl! { vec_vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vgfmf, vgfmf ] } + + impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); + impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); + impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorGfmsumAccum { + type Result; + unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result; + } + + test_impl! { vec_vgfmab(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vgfmab, vgfmab ] } + test_impl! { vec_vgfmah(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vgfmah, vgfmah] } + test_impl! { vec_vgfmaf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vgfmaf, vgfmaf ] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorGfmsumAccum for vector_unsigned_char { + type Result = vector_unsigned_short; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result { + vec_vgfmab(self, b, c) + } + } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorGfmsumAccum for vector_unsigned_short { + type Result = vector_unsigned_int; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result { + vec_vgfmah(self, b, c) + } + } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorGfmsumAccum for vector_unsigned_int { + type Result = vector_unsigned_long_long; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result { + vec_vgfmaf(self, b, c) + } + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vgef, D = 3))] + unsafe fn vgef( + a: vector_unsigned_int, + b: vector_unsigned_int, + c: *const u32, + ) -> vector_unsigned_int { + static_assert_uimm_bits!(D, 2); + let offset: u32 = simd_extract(b, D); + let ptr = c.byte_add(offset as usize); + let value = ptr.read(); + simd_insert(a, D, value) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vgeg, D = 1))] + unsafe fn vgeg( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, + c: *const u64, + ) -> vector_unsigned_long_long { + static_assert_uimm_bits!(D, 1); + let offset: u64 = simd_extract(b, D); + let ptr = c.byte_add(offset as usize); + let value = ptr.read(); + simd_insert(a, D, value) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorGatherElement { + type Element; + type Offset; + unsafe fn vec_gather_element( + self, + b: Self::Offset, + c: *const Self::Element, + ) -> Self; + } + + macro_rules! impl_vec_gather_element { + ($($instr:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorGatherElement for $ty { + type Element = l_t_t!($ty); + type Offset = t_u!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_gather_element(self, b: Self::Offset, c: *const Self::Element) -> Self { + transmute($instr::(transmute(self), b, c.cast())) + } + } + )* + } + } + + impl_vec_gather_element! { + vgef vector_signed_int + vgef vector_bool_int + vgef vector_unsigned_int + + vgeg vector_signed_long_long + vgeg vector_bool_long_long + vgeg vector_unsigned_long_long + + vgef vector_float + vgeg vector_double + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vscef, D = 3))] + unsafe fn vscef(a: vector_unsigned_int, b: vector_unsigned_int, c: *mut u32) { + static_assert_uimm_bits!(D, 2); + let value = simd_extract(a, D); + let offset: u32 = simd_extract(b, D); + let ptr = c.byte_add(offset as usize); + ptr.write(value); + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vsceg, D = 1))] + unsafe fn vsceg( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, + c: *mut u64, + ) { + static_assert_uimm_bits!(D, 1); + let value = simd_extract(a, D); + let offset: u64 = simd_extract(b, D); + let ptr = c.byte_add(offset as usize); + ptr.write(value); + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorScatterElement { + type Element; + type Offset; + unsafe fn vec_scatter_element(self, b: Self::Offset, c: *mut Self::Element); + } + + macro_rules! impl_vec_scatter_element { + ($($instr:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorScatterElement for $ty { + type Element = l_t_t!($ty); + type Offset = t_u!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_scatter_element(self, b: Self::Offset, c: *mut Self::Element) { + $instr::(transmute(self), b, c.cast()) + } + } + )* + } + } + + impl_vec_scatter_element! { + vscef vector_signed_int + vscef vector_bool_int + vscef vector_unsigned_int + + vsceg vector_signed_long_long + vsceg vector_bool_long_long + vsceg vector_unsigned_long_long + + vscef vector_float + vsceg vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSel: Sized { + unsafe fn vec_sel(self, b: Self, c: Mask) -> Self; + } + + macro_rules! impl_vec_sel { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSel for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sel(self, b: Self, c: t_u!($ty)) -> Self { + let b = simd_and(transmute(b), c); + let a = simd_and(transmute(self), simd_xor(c, transmute(vector_signed_char([!0; 16])))); + transmute(simd_or(a, b)) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSel for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sel(self, b: Self, c: t_b!($ty)) -> Self { + // defer to the implementation with an unsigned mask + self.vec_sel(b, transmute::<_, t_u!($ty)>(c)) + } + } + )* + } + } + + impl_vec_sel! { + vector_signed_char + vector_signed_short + vector_signed_int + vector_signed_long_long + + vector_unsigned_char + vector_unsigned_short + vector_unsigned_int + vector_unsigned_long_long + + vector_bool_char + vector_bool_short + vector_bool_int + vector_bool_long_long + + vector_float + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFpTestDataClass { + type Result; + unsafe fn vec_fp_test_data_class(self) -> (Self::Result, i32); + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorFpTestDataClass for vector_float { + type Result = vector_bool_int; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_fp_test_data_class(self) -> (Self::Result, i32) { + let PackedTuple { x, y } = vftcisb(self, CLASS); + (x, y) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorFpTestDataClass for vector_double { + type Result = vector_bool_long_long; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_fp_test_data_class(self) -> (Self::Result, i32) { + let PackedTuple { x, y } = vftcidb(self, CLASS); + (x, y) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorCompare { + unsafe fn vec_all_lt(self, other: Self) -> i32; + unsafe fn vec_all_le(self, other: Self) -> i32; + unsafe fn vec_all_gt(self, other: Self) -> i32; + unsafe fn vec_all_ge(self, other: Self) -> i32; + } + + // NOTE: this implementation is currently non-optimal, but it does work for floats even with + // only `vector` enabled. + // + // - https://github.com/llvm/llvm-project/issues/129434 + // - https://github.com/llvm/llvm-project/issues/130424 + macro_rules! impl_vec_compare { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorCompare for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_all_lt(self, other: Self) -> i32 { + simd_reduce_all(simd_lt::<_, t_b!($ty)>(self, other)) as i32 + } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_all_le(self, other: Self) -> i32 { + simd_reduce_all(simd_le::<_, t_b!($ty)>(self, other)) as i32 + } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_all_gt(self, other: Self) -> i32 { + simd_reduce_all(simd_gt::<_, t_b!($ty)>(self, other)) as i32 + } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_all_ge(self, other: Self) -> i32 { + simd_reduce_all(simd_ge::<_, t_b!($ty)>(self, other)) as i32 + } + } + )* + } + } + + impl_vec_compare! { + vector_signed_char + vector_unsigned_char + + vector_signed_short + vector_unsigned_short + + vector_signed_int + vector_unsigned_int + vector_float + + vector_signed_long_long + vector_unsigned_long_long + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorTestMask { + type Mask; + unsafe fn vec_test_mask(self, other: Self::Mask) -> i32; + } + + macro_rules! impl_vec_test_mask { + ($($instr:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorTestMask for $ty { + type Mask = t_u!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_test_mask(self, other: Self::Mask) -> i32 { + vtm(transmute(self), transmute(other)) + } + } + )* + } + } + + impl_vec_test_mask! { + vector_signed_char + vector_signed_short + vector_signed_int + vector_signed_long_long + + vector_unsigned_char + vector_unsigned_short + vector_unsigned_int + vector_unsigned_long_long + + vector_float + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSearchString { + unsafe fn vec_search_string_cc( + self, + b: Self, + c: vector_unsigned_char, + ) -> (vector_unsigned_char, i32); + + unsafe fn vec_search_string_until_zero_cc( + self, + b: Self, + c: vector_unsigned_char, + ) -> (vector_unsigned_char, i32); + } + + macro_rules! impl_vec_search_string{ + ($($intr_s:ident $intr_sz:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSearchString for $ty { + #[inline] + #[target_feature(enable = "vector-enhancements-2")] + unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) { + let PackedTuple { x,y } = $intr_s(transmute(self), transmute(b), c); + (x, y) + } + + #[inline] + #[target_feature(enable = "vector-enhancements-2")] + unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) { + let PackedTuple { x,y } = $intr_sz(transmute(self), transmute(b), c); + (x, y) + } + } + + )* + } + } + + impl_vec_search_string! { + vstrsb vstrszb vector_signed_char + vstrsb vstrszb vector_bool_char + vstrsb vstrszb vector_unsigned_char + + vstrsh vstrszh vector_signed_short + vstrsh vstrszh vector_bool_short + vstrsh vstrszh vector_unsigned_short + + vstrsf vstrszf vector_signed_int + vstrsf vstrszf vector_bool_int + vstrsf vstrszf vector_unsigned_int + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vcdgb))] + pub unsafe fn vcdgb(a: vector_signed_long_long) -> vector_double { + simd_as(a) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vcdlgb))] + pub unsafe fn vcdlgb(a: vector_unsigned_long_long) -> vector_double { + simd_as(a) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorDouble { + unsafe fn vec_double(self) -> vector_double; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorDouble for vector_signed_long_long { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_double(self) -> vector_double { + vcdgb(self) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorDouble for vector_unsigned_long_long { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_double(self) -> vector_double { + vcdlgb(self) + } + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vcefb) + )] + pub unsafe fn vcefb(a: vector_signed_int) -> vector_float { + simd_as(a) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vcelfb) + )] + pub unsafe fn vcelfb(a: vector_unsigned_int) -> vector_float { + simd_as(a) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFloat { + unsafe fn vec_float(self) -> vector_float; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorFloat for vector_signed_int { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_float(self) -> vector_float { + vcefb(self) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorFloat for vector_unsigned_int { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_float(self) -> vector_float { + vcelfb(self) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorExtendSigned64 { + unsafe fn vec_extend_s64(self) -> vector_signed_long_long; + } + + #[inline] + #[target_feature(enable = "vector")] + // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899 + // #[cfg_attr(test, assert_instr(vsegb))] + pub unsafe fn vsegb(a: vector_signed_char) -> vector_signed_long_long { + simd_as(simd_shuffle::<_, _, i8x2>( + a, + a, + const { u32x2::from_array([7, 15]) }, + )) + } + + #[inline] + #[target_feature(enable = "vector")] + // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899 + // #[cfg_attr(test, assert_instr(vsegh))] + pub unsafe fn vsegh(a: vector_signed_short) -> vector_signed_long_long { + simd_as(simd_shuffle::<_, _, i16x2>( + a, + a, + const { u32x2::from_array([3, 7]) }, + )) + } + + #[inline] + #[target_feature(enable = "vector")] + // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899 + // #[cfg_attr(test, assert_instr(vsegf))] + pub unsafe fn vsegf(a: vector_signed_int) -> vector_signed_long_long { + simd_as(simd_shuffle::<_, _, i32x2>( + a, + a, + const { u32x2::from_array([1, 3]) }, + )) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorExtendSigned64 for vector_signed_char { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_extend_s64(self) -> vector_signed_long_long { + vsegb(self) + } + } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorExtendSigned64 for vector_signed_short { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_extend_s64(self) -> vector_signed_long_long { + vsegh(self) + } + } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorExtendSigned64 for vector_signed_int { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_extend_s64(self) -> vector_signed_long_long { + vsegf(self) + } + } + + // NOTE: VectorSigned and VectorUnsigned make strong safety assumptions around floats. + // This is what C provides, but even IBM does not clearly document these constraints. + // + // https://doc.rust-lang.org/std/intrinsics/simd/fn.simd_cast.html + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSigned { + type Result; + unsafe fn vec_signed(self) -> Self::Result; + } + + test_impl! { vcgsb (a: vector_float) -> vector_signed_int [simd_cast, "vector-enhancements-2" vcgsb] } + test_impl! { vcgdb (a: vector_double) -> vector_signed_long_long [simd_cast, vcgdb] } + + impl_vec_trait! { [VectorSigned vec_signed] vcgsb (vector_float) -> vector_signed_int } + impl_vec_trait! { [VectorSigned vec_signed] vcgdb (vector_double) -> vector_signed_long_long } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorUnsigned { + type Result; + unsafe fn vec_unsigned(self) -> Self::Result; + } + + test_impl! { vclgsb (a: vector_float) -> vector_unsigned_int [simd_cast, "vector-enhancements-2" vclgsb] } + test_impl! { vclgdb (a: vector_double) -> vector_unsigned_long_long [simd_cast, vclgdb] } + + impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgsb (vector_float) -> vector_unsigned_int } + impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgdb (vector_double) -> vector_unsigned_long_long } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorCopyUntilZero { + unsafe fn vec_cp_until_zero(self) -> Self; + } + + test_impl! { vec_vistrb (a: vector_unsigned_char) -> vector_unsigned_char [vistrb, vistrb] } + test_impl! { vec_vistrh (a: vector_unsigned_short) -> vector_unsigned_short [vistrh, vistrh] } + test_impl! { vec_vistrf (a: vector_unsigned_int) -> vector_unsigned_int [vistrf, vistrf] } + + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_signed_char) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_bool_char) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_unsigned_char) } + + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_signed_short) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_bool_short) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_unsigned_short) } + + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_signed_int) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_bool_int) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_unsigned_int) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorCopyUntilZeroCC: Sized { + unsafe fn vec_cp_until_zero_cc(self) -> (Self, i32); + } + + test_impl! { vec_vistrbs (a: vector_unsigned_char) -> PackedTuple [vistrbs, vistrbs] } + test_impl! { vec_vistrhs (a: vector_unsigned_short) -> PackedTuple [vistrhs, vistrhs] } + test_impl! { vec_vistrfs (a: vector_unsigned_int) -> PackedTuple [vistrfs, vistrfs] } + + macro_rules! impl_vec_copy_until_zero_cc { + ($($intr:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorCopyUntilZeroCC for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cp_until_zero_cc(self) -> (Self, i32) { + let PackedTuple { x,y } = $intr(transmute(self)); + (transmute(x), y) + } + } + + )* + } + } + + impl_vec_copy_until_zero_cc! { + vec_vistrbs vector_signed_char + vec_vistrbs vector_bool_char + vec_vistrbs vector_unsigned_char + + vec_vistrhs vector_signed_short + vec_vistrhs vector_bool_short + vec_vistrhs vector_unsigned_short + + vec_vistrfs vector_signed_int + vec_vistrfs vector_bool_int + vec_vistrfs vector_unsigned_int + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSrdb { + unsafe fn vec_srdb(self, b: Self) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSld { + unsafe fn vec_sld(self, b: Self) -> Self; + + unsafe fn vec_sldw(self, b: Self) -> Self; + + unsafe fn vec_sldb(self, b: Self) -> Self; + } + + // FIXME(llvm) https://github.com/llvm/llvm-project/issues/129955 + // ideally we could implement this in terms of llvm.fshl.i128 + // #[link_name = "llvm.fshl.i128"] fn fshl_i128(a: u128, b: u128, c: u128) -> u128; + // transmute(fshl_i128(transmute(a), transmute(b), const { C * 8 } )) + + macro_rules! impl_vec_sld { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSld for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sld(self, b: Self) -> Self { + static_assert_uimm_bits!(C, 4); + transmute(vsldb(transmute(self), transmute(b), C)) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sldw(self, b: Self) -> Self { + static_assert_uimm_bits!(C, 2); + transmute(vsldb(transmute(self), transmute(b), const { 4 * C })) + } + + #[inline] + #[target_feature(enable = "vector-enhancements-2")] + unsafe fn vec_sldb(self, b: Self) -> Self { + static_assert_uimm_bits!(C, 3); + transmute(vsld(transmute(self), transmute(b), C)) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSrdb for $ty { + #[inline] + #[target_feature(enable = "vector-enhancements-2")] + unsafe fn vec_srdb(self, b: Self) -> Self { + static_assert_uimm_bits!(C, 3); + transmute(vsrd(transmute(self), transmute(b), C)) + } + } + )* + } + } + + impl_vec_sld! { + vector_signed_char + vector_bool_char + vector_unsigned_char + + vector_signed_short + vector_bool_short + vector_unsigned_short + + vector_signed_int + vector_bool_int + vector_unsigned_int + + vector_signed_long_long + vector_bool_long_long + vector_unsigned_long_long + + vector_float + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorCompareRange: Sized { + type Result; + + unsafe fn vstrc(self, b: Self, c: Self) -> Self::Result; + unsafe fn vstrcz(self, b: Self, c: Self) -> Self::Result; + unsafe fn vstrcs(self, b: Self, c: Self) -> (Self::Result, i32); + unsafe fn vstrczs(self, b: Self, c: Self) -> (Self::Result, i32); + } + + const fn validate_compare_range_imm(imm: u32) { + if !matches!(imm, 0 | 4 | 8 | 12) { + panic!("IMM needs to be one of 0, 4, 8, 12"); + } + } + + macro_rules! impl_compare_range { + ($($ty:ident $vstrc:ident $vstrcs:ident $vstrcz:ident $vstrczs:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorCompareRange for $ty { + type Result = t_b!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vstrc(self, b: Self, c: Self) -> Self::Result { + const { validate_compare_range_imm }; + $vstrc(self, b, c, IMM) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vstrcz(self, b: Self, c: Self) -> Self::Result { + const { validate_compare_range_imm }; + $vstrcz(self, b, c, IMM) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vstrcs(self, b: Self, c: Self) -> (Self::Result, i32) { + const { validate_compare_range_imm }; + let PackedTuple { x, y } = $vstrcs(self, b, c, IMM); + (x,y) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vstrczs(self, b: Self, c: Self) -> (Self::Result, i32) { + const { validate_compare_range_imm }; + let PackedTuple { x, y } = $vstrczs(self, b, c, IMM); + (x,y) + } + } + )* + } + } + + impl_compare_range! { + vector_unsigned_char vstrcb vstrcbs vstrczb vstrczbs + vector_unsigned_short vstrch vstrchs vstrczh vstrczhs + vector_unsigned_int vstrcf vstrcfs vstrczf vstrczfs + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorComparePredicate: Sized { + type Result; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpgt(self, other: Self) -> Self::Result { + simd_gt(self, other) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpge(self, other: Self) -> Self::Result { + simd_ge(self, other) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmplt(self, other: Self) -> Self::Result { + simd_lt(self, other) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmple(self, other: Self) -> Self::Result { + simd_le(self, other) + } + } + + macro_rules! impl_compare_predicate { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorComparePredicate for $ty { + type Result = t_b!($ty); + } + )* + } + } + + impl_compare_predicate! { + vector_signed_char + vector_unsigned_char + + vector_signed_short + vector_unsigned_short + + vector_signed_int + vector_unsigned_int + vector_float + + vector_signed_long_long + vector_unsigned_long_long + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorEquality: Sized { + type Result; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq(self, other: Self) -> Self::Result { + simd_eq(self, other) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne(self, other: Self) -> Self::Result { + simd_ne(self, other) + } + } + + macro_rules! impl_compare_equality { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorEquality for $ty { + type Result = t_b!($ty); + } + )* + } + } + + impl_compare_equality! { + vector_bool_char + vector_signed_char + vector_unsigned_char + + vector_bool_short + vector_signed_short + vector_unsigned_short + + vector_bool_int + vector_signed_int + vector_unsigned_int + vector_float + + vector_bool_long_long + vector_signed_long_long + vector_unsigned_long_long + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorEqualityIdx: Sized { + type Result; + + unsafe fn vec_cmpeq_idx(self, other: Self) -> Self::Result; + unsafe fn vec_cmpne_idx(self, other: Self) -> Self::Result; + + unsafe fn vec_cmpeq_idx_cc(self, other: Self) -> (Self::Result, i32); + unsafe fn vec_cmpne_idx_cc(self, other: Self) -> (Self::Result, i32); + + unsafe fn vec_cmpeq_or_0_idx(self, other: Self) -> Self::Result; + unsafe fn vec_cmpne_or_0_idx(self, other: Self) -> Self::Result; + + unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self) -> (Self::Result, i32); + unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self) -> (Self::Result, i32); + } + + macro_rules! impl_compare_equality_idx { + ($($ty:ident $ret:ident + $cmpeq:ident $cmpne:ident + $cmpeq_or_0:ident $cmpne_or_0:ident + $cmpeq_cc:ident $cmpne_cc:ident + $cmpeq_or_0_cc:ident $cmpne_or_0_cc:ident + )*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorEqualityIdx for $ty { + type Result = $ret; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq_idx(self, other: Self) -> Self::Result { + transmute($cmpeq(transmute(self), transmute(other))) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne_idx(self, other: Self) -> Self::Result { + transmute($cmpne(transmute(self), transmute(other))) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq_or_0_idx(self, other: Self) -> Self::Result { + transmute($cmpeq_or_0(transmute(self), transmute(other))) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne_or_0_idx(self, other: Self) -> Self::Result { + transmute($cmpne_or_0(transmute(self), transmute(other))) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq_idx_cc(self, other: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $cmpeq_cc(transmute(self), transmute(other)); + (transmute(x), y) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne_idx_cc(self, other: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $cmpne_cc(transmute(self), transmute(other)); + (transmute(x),y) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $cmpeq_or_0_cc(transmute(self), transmute(other)); + (transmute(x), y) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $cmpne_or_0_cc(transmute(self), transmute(other)); + (transmute(x),y) + } + } + )* + } + } + + impl_compare_equality_idx! { + vector_signed_char vector_signed_char vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs + vector_bool_char vector_unsigned_char vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs + vector_unsigned_char vector_unsigned_char vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs + vector_signed_short vector_signed_short vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs + vector_bool_short vector_unsigned_short vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs + vector_unsigned_short vector_unsigned_short vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs + vector_signed_int vector_signed_int vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs + vector_bool_int vector_unsigned_int vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs + vector_unsigned_int vector_unsigned_int vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorExtract { + type ElementType; + + unsafe fn vec_extract(a: Self, b: i32) -> Self::ElementType; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vlgvb))] + unsafe fn vlgvb(a: vector_unsigned_char, b: i32) -> u8 { + simd_extract_dyn(a, b as u32 % 16) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vlgvh))] + unsafe fn vlgvh(a: vector_unsigned_short, b: i32) -> u16 { + simd_extract_dyn(a, b as u32 % 8) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vlgvf))] + unsafe fn vlgvf(a: vector_unsigned_int, b: i32) -> u32 { + simd_extract_dyn(a, b as u32 % 4) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vlgvg))] + unsafe fn vlgvg(a: vector_unsigned_long_long, b: i32) -> u64 { + simd_extract_dyn(a, b as u32 % 2) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorInsert { + type ElementType; + + unsafe fn vec_insert(a: Self::ElementType, b: Self, c: i32) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorPromote: Sized { + type ElementType; + + unsafe fn vec_promote(a: Self::ElementType, b: i32) -> MaybeUninit; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vlvgb))] + unsafe fn vlvgb(a: u8, b: vector_unsigned_char, c: i32) -> vector_unsigned_char { + simd_insert_dyn(b, c as u32 % 16, a) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vlvgh))] + unsafe fn vlvgh(a: u16, b: vector_unsigned_short, c: i32) -> vector_unsigned_short { + simd_insert_dyn(b, c as u32 % 8, a) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vlvgf))] + unsafe fn vlvgf(a: u32, b: vector_unsigned_int, c: i32) -> vector_unsigned_int { + simd_insert_dyn(b, c as u32 % 4, a) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vlvgg))] + unsafe fn vlvgg(a: u64, b: vector_unsigned_long_long, c: i32) -> vector_unsigned_long_long { + simd_insert_dyn(b, c as u32 % 2, a) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorInsertAndZero { + type ElementType; + + unsafe fn vec_insert_and_zero(a: *const Self::ElementType) -> Self; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vllezb))] + unsafe fn vllezb(x: *const u8) -> vector_unsigned_char { + vector_unsigned_char([0, 0, 0, 0, 0, 0, 0, *x, 0, 0, 0, 0, 0, 0, 0, 0]) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vllezh))] + unsafe fn vllezh(x: *const u16) -> vector_unsigned_short { + vector_unsigned_short([0, 0, 0, *x, 0, 0, 0, 0]) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vllezf))] + unsafe fn vllezf(x: *const u32) -> vector_unsigned_int { + vector_unsigned_int([0, *x, 0, 0]) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vllezg))] + unsafe fn vllezg(x: *const u64) -> vector_unsigned_long_long { + vector_unsigned_long_long([*x, 0]) + } + + macro_rules! impl_extract_insert { + ($($ty:ident $extract_intr:ident $insert_intr:ident $insert_and_zero_intr:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorExtract for $ty { + type ElementType = l_t_t!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_extract(a: Self, b: i32) -> Self::ElementType { + transmute($extract_intr(transmute(a), b)) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorInsert for $ty { + type ElementType = l_t_t!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_insert(a: Self::ElementType, b: Self, c: i32) -> Self { + transmute($insert_intr(transmute(a), transmute(b), c)) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorInsertAndZero for $ty { + type ElementType = l_t_t!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_insert_and_zero(a: *const Self::ElementType) -> Self { + transmute($insert_and_zero_intr(a.cast())) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorPromote for $ty { + type ElementType = l_t_t!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_promote(a: Self::ElementType, c: i32) -> MaybeUninit { + // Rust does not currently support `MaybeUninit` element types to simd + // vectors. In C/LLVM that is allowed (using poison values). So rust will + // use an extra instruction to zero the memory. + let b = MaybeUninit::<$ty>::zeroed(); + MaybeUninit::new(transmute($insert_intr(transmute(a), transmute(b), c))) + } + } + )* + } + + } + + impl_extract_insert! { + vector_signed_char vlgvb vlvgb vllezb + vector_unsigned_char vlgvb vlvgb vllezb + vector_signed_short vlgvh vlvgh vllezh + vector_unsigned_short vlgvh vlvgh vllezh + vector_signed_int vlgvf vlvgf vllezf + vector_unsigned_int vlgvf vlvgf vllezf + vector_signed_long_long vlgvg vlvgg vllezg + vector_unsigned_long_long vlgvg vlvgg vllezg + vector_float vlgvf vlvgf vllezf + vector_double vlgvg vlvgg vllezg + } +} + +/// Load Count to Block Boundary +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(lcbb, BLOCK_BOUNDARY = 512))] +unsafe fn __lcbb(ptr: *const u8) -> u32 { + lcbb(ptr, const { validate_block_boundary(BLOCK_BOUNDARY) }) +} + +/// Vector Add +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_add, U>(a: T, b: U) -> T::Result { + a.vec_add(b) +} + +/// Vector Subtract +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sub, U>(a: T, b: U) -> T::Result { + a.vec_sub(b) +} + +/// Vector Multiply +/// +/// ## Purpose +/// Compute the products of corresponding elements of two vectors. +/// +/// ## Result value +/// Each element of r receives the product of the corresponding elements of a and b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mul(a: T, b: T) -> T { + a.vec_mul(b) +} + +/// Vector Count Leading Zeros +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cntlz(a: T) -> T::Result { + a.vec_cntlz() +} + +/// Vector Count Trailing Zeros +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cnttz(a: T) -> T::Result { + a.vec_cnttz() +} + +/// Vector Population Count +/// +/// Computes the population count (number of set bits) in each element of the input. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_popcnt(a: T) -> T::Result { + a.vec_popcnt() +} + +/// Vector Maximum +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_max, U>(a: T, b: U) -> T::Result { + a.vec_max(b) +} + +/// Vector Minimum +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_min, U>(a: T, b: U) -> T::Result { + a.vec_min(b) +} + +/// Vector Absolute +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_abs(a: T) -> T { + a.vec_abs() +} + +/// Vector Negative Absolute +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nabs(a: T) -> T { + a.vec_nabs() +} + +/// Vector Negative Multiply Add +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nmadd(a: T, b: T, c: T) -> T { + a.vec_nmadd(b, c) +} + +/// Vector Negative Multiply Subtract +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nmsub(a: T, b: T, c: T) -> T { + a.vec_nmsub(b, c) +} + +/// Vector Square Root +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sqrt(a: T) -> T { + a.vec_sqrt() +} + +/// Vector Splat +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_splat(a: T) -> T { + a.vec_splat::() +} + +/// Vector Splats +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_splats, U>(a: T) -> U { + a.vec_splats() +} + +/// Vector AND +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_and, U>(a: T, b: U) -> T::Result { + a.vec_and(b) +} + +/// Vector OR +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_or, U>(a: T, b: U) -> T::Result { + a.vec_or(b) +} + +/// Vector XOR +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_xor, U>(a: T, b: U) -> T::Result { + a.vec_xor(b) +} + +/// Vector NOR +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nor, U>(a: T, b: U) -> T::Result { + a.vec_nor(b) +} + +/// Vector NAND +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nand, U>(a: T, b: U) -> T::Result { + a.vec_nand(b) +} + +/// Vector XNOR +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_eqv, U>(a: T, b: U) -> T::Result { + a.vec_eqv(b) +} + +/// Vector ANDC +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_andc, U>(a: T, b: U) -> T::Result { + a.vec_andc(b) +} + +/// Vector OR with Complement +/// +/// ## Purpose +/// Performs a bitwise OR of the first vector with the bitwise-complemented second vector. +/// +/// ## Result value +/// r is the bitwise OR of a and the bitwise complement of b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_orc, U>(a: T, b: U) -> T::Result { + a.vec_orc(b) +} + +/// Vector Floor +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_floor(a: T) -> T { + a.vec_floor() +} + +/// Vector Ceil +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_ceil(a: T) -> T { + a.vec_ceil() +} + +/// Vector Truncate +/// +/// Returns a vector containing the truncated values of the corresponding elements of the given vector. +/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_trunc(a: T) -> T { + a.vec_trunc() +} + +/// Vector Round +/// +/// Returns a vector containing the rounded values to the nearest representable floating-point integer, +/// using IEEE round-to-nearest rounding, of the corresponding elements of the given vector +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_round(a: T) -> T { + a.vec_round() +} + +/// Vector Round to Current +/// +/// Returns a vector by using the current rounding mode to round every +/// floating-point element in the given vector to integer. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_roundc(a: T) -> T { + a.vec_roundc() +} + +/// Vector Round toward Negative Infinity +/// +/// Returns a vector containing the largest representable floating-point integral values less +/// than or equal to the values of the corresponding elements of the given vector. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_roundm(a: T) -> T { + // the IBM docs note + // + // > vec_roundm provides the same functionality as vec_floor, except that vec_roundz would not trigger the IEEE-inexact exception. + // + // but in practice `vec_floor` also does not trigger that exception, so both are equivalent + a.vec_floor() +} + +/// Vector Round toward Positive Infinity +/// +/// Returns a vector containing the smallest representable floating-point integral values greater +/// than or equal to the values of the corresponding elements of the given vector. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_roundp(a: T) -> T { + // the IBM docs note + // + // > vec_roundp provides the same functionality as vec_ceil, except that vec_roundz would not trigger the IEEE-inexact exception. + // + // but in practice `vec_ceil` also does not trigger that exception, so both are equivalent + a.vec_ceil() +} + +/// Vector Round toward Zero +/// +/// Returns a vector containing the truncated values of the corresponding elements of the given vector. +/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_roundz(a: T) -> T { + // the IBM docs note + // + // > vec_roundz provides the same functionality as vec_trunc, except that vec_roundz would not trigger the IEEE-inexact exception. + // + // but in practice `vec_trunc` also does not trigger that exception, so both are equivalent + a.vec_trunc() +} + +/// Vector Round to Integer +/// +/// Returns a vector by using the current rounding mode to round every floating-point element in the given vector to integer. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_rint(a: T) -> T { + a.vec_rint() +} + +/// Vector Average +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_avg, U>(a: T, b: U) -> T::Result { + a.vec_avg(b) +} + +/// Vector Shift Left +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sl, U>(a: T, b: U) -> T::Result { + a.vec_sl(b) +} + +/// Vector Shift Right +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sr, U>(a: T, b: U) -> T::Result { + a.vec_sr(b) +} + +/// Vector Shift Right Algebraic +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sra, U>(a: T, b: U) -> T::Result { + a.vec_sra(b) +} + +/// Vector Shift Left by Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_slb, U>(a: T, b: U) -> T::Result { + a.vec_slb(b) +} + +/// Vector Shift Right by Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_srb, U>(a: T, b: U) -> T::Result { + a.vec_srb(b) +} + +/// Vector Shift Right Algebraic by Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_srab, U>(a: T, b: U) -> T::Result { + a.vec_srab(b) +} + +/// Vector Element Rotate Left +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_rl, U>(a: T, b: U) -> T::Result { + a.vec_rl(b) +} + +/// Vector Shift Left +/// +/// Performs a left shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding +/// element of a left by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sll(a: T, b: vector_unsigned_char) -> T +where + T: sealed::VectorSll, +{ + a.vec_sll(b) +} + +/// Vector Shift Right +/// +/// Performs a right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding +/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_srl(a: T, b: vector_unsigned_char) -> T +where + T: sealed::VectorSrl, +{ + a.vec_srl(b) +} + +/// Vector Shift Right Arithmetic +/// +/// Performs an algebraic right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding +/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by copies of +/// the most significant bit of the element of a. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sral(a: T, b: vector_unsigned_char) -> T +where + T: sealed::VectorSral, +{ + a.vec_sral(b) +} + +/// Vector Element Rotate Left Immediate +/// +/// Rotates each element of a vector left by a given number of bits. Each element of the result is obtained by rotating the corresponding element +/// of a left by the number of bits specified by b, modulo the number of bits in the element. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_rli(a: T, bits: core::ffi::c_ulong) -> T { + a.vec_rli(bits) +} + +/// Vector Reverse Elements +/// +/// Returns a vector with the elements of the input vector in reversed order. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_reve(a: T) -> T { + a.vec_reve() +} + +/// Vector Byte Reverse +/// +/// Returns a vector where each vector element contains the corresponding byte-reversed vector element of the input vector. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_revb(a: T) -> T { + a.vec_revb() +} + +/// Vector Merge High +/// +/// Merges the most significant ("high") halves of two vectors. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mergeh(a: T, b: T) -> T { + a.vec_mergeh(b) +} + +/// Vector Merge Low +/// +/// Merges the least significant ("low") halves of two vectors. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mergel(a: T, b: T) -> T { + a.vec_mergel(b) +} + +/// Vector Pack +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_pack, U>(a: T, b: U) -> T::Result { + a.vec_pack(b) +} + +/// Vector Pack Saturated +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packs, U>(a: T, b: U) -> T::Result { + a.vec_packs(b) +} + +/// Vector Pack Saturated Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packs_cc(a: T, b: T) -> (T::Result, i32) { + a.vec_packs_cc(b) +} + +/// Vector Pack Saturated Unsigned +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packsu, U>(a: T, b: U) -> T::Result { + a.vec_packsu(b) +} + +/// Vector Pack Saturated Unsigned Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packsu_cc(a: T, b: T) -> (T::Result, i32) { + a.vec_packsu_cc(b) +} + +/// Vector Unpack High +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_unpackh(a: T) -> ::Result { + a.vec_unpackh() +} + +/// Vector Unpack Low +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_unpackl(a: T) -> ::Result { + a.vec_unpackl() +} + +/// Vector Generate Byte Mask +/// +/// Generates byte masks for elements in the return vector. For each bit in a, if the bit is one, all bit positions +/// in the corresponding byte element of d are set to ones. Otherwise, if the bit is zero, the corresponding byte element is set to zero. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vgbm, MASK = 0x00FF))] +pub unsafe fn vec_genmask() -> vector_unsigned_char { + vector_unsigned_char(const { genmask::() }) +} + +/// Vector Generate Mask (Byte) +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepib, L = 3, H = 5))] +pub unsafe fn vec_genmasks_8() -> vector_unsigned_char { + vector_unsigned_char(const { [genmasks(u8::BITS, L, H) as u8; 16] }) +} + +/// Vector Generate Mask (Halfword) +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepih, L = 3, H = 5))] +pub unsafe fn vec_genmasks_16() -> vector_unsigned_short { + vector_unsigned_short(const { [genmasks(u16::BITS, L, H) as u16; 8] }) +} + +/// Vector Generate Mask (Word) +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vgmf, L = 3, H = 5))] +pub unsafe fn vec_genmasks_32() -> vector_unsigned_int { + vector_unsigned_int(const { [genmasks(u32::BITS, L, H) as u32; 4] }) +} + +/// Vector Generate Mask (Doubleword) +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vgmg, L = 3, H = 5))] +pub unsafe fn vec_genmasks_64() -> vector_unsigned_long_long { + vector_unsigned_long_long(const { [genmasks(u64::BITS, L, H); 2] }) +} + +/// Vector Permute +/// +/// Returns a vector that contains some elements of two vectors, in the order specified by a third vector. +/// Each byte of the result is selected by using the least significant 5 bits of the corresponding byte of c as an index into the concatenated bytes of a and b. +/// Note: The vector generate mask built-in function [`vec_genmask`] could help generate the mask c. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_perm(a: T, b: T, c: vector_unsigned_char) -> T { + a.vec_perm(b, c) +} + +/// Vector Sum Across Quadword +/// +/// Returns a vector containing the results of performing a sum across all the elements in each of the quadword of vector a, +/// and the rightmost word or doubleword element of the b. The result is an unsigned 128-bit integer. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sum_u128(a: T, b: T) -> vector_unsigned_char { + a.vec_sum_u128(b) +} + +/// Vector Sum Across Doubleword +/// +/// Returns a vector containing the results of performing a sum across all the elements in each of the doubleword of vector a, +/// and the rightmost sub-element of the corresponding doubleword of b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sum2(a: T, b: T) -> vector_unsigned_long_long { + a.vec_sum2(b) +} + +/// Vector Sum Across Word +/// +/// Returns a vector containing the results of performing a sum across all the elements in each of the word of vector a, +/// and the rightmost sub-element of the corresponding word of b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sum4(a: T, b: T) -> vector_unsigned_int { + a.vec_sum4(b) +} + +/// Vector Addition unsigned 128-bits +/// +/// Adds unsigned quadword values. +/// +/// This function operates on the vectors as 128-bit unsigned integers. It returns low 128 bits of a + b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vaq))] +pub unsafe fn vec_add_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + transmute(a.wrapping_add(b)) +} + +/// Vector Subtract unsigned 128-bits +/// +/// Subtracts unsigned quadword values. +/// +/// This function operates on the vectors as 128-bit unsigned integers. It returns low 128 bits of a - b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vsq))] +pub unsafe fn vec_sub_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + + transmute(a.wrapping_sub(b)) +} + +/// Vector Subtract Carryout +/// +/// Returns a vector containing the borrow produced by subtracting each of corresponding elements of b from a. +/// +/// On each resulting element, the value is 0 if a borrow occurred, or 1 if no borrow occurred. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_subc, U>(a: T, b: U) -> T::Result { + a.vec_subc(b) +} + +/// Vector Subtract Carryout unsigned 128-bits +/// +/// Gets the carry bit of the 128-bit subtraction of two quadword values. +/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the borrow produced by subtracting b from a, as unsigned 128-bits integers. +/// If no borrow occurred, the bit 127 of d is 1; otherwise it is set to 0. All other bits of d are 0. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vscbiq))] +pub unsafe fn vec_subc_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_char { + // FIXME(llvm) sadly this does not work https://github.com/llvm/llvm-project/issues/129608 + // let a: u128 = transmute(a); + // let b: u128 = transmute(b); + // transmute(!a.overflowing_sub(b).1 as u128) + transmute(vscbiq(transmute(a), transmute(b))) +} + +/// Vector Add Compute Carryout unsigned 128-bits +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vaccq))] +pub unsafe fn vec_addc_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + transmute(a.overflowing_add(b).1 as u128) +} + +/// Vector Add With Carry unsigned 128-bits +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vacq))] +pub unsafe fn vec_adde_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + let c: u128 = transmute(c); + // FIXME(llvm) sadly this does not work + // let (d, _carry) = a.carrying_add(b, c & 1 != 0); + // transmute(d) + transmute(vacq(a, b, c)) +} + +/// Vector Add With Carry Compute Carry unsigned 128-bits +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vacccq))] +pub unsafe fn vec_addec_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + let c: u128 = transmute(c); + let (_d, carry) = a.carrying_add(b, c & 1 != 0); + transmute(carry as u128) +} + +/// Vector Subtract with Carryout +/// +/// Subtracts unsigned quadword values with carry bit from a previous operation. +/// +/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the result of subtracting of b from a, +/// and the carryout bit from a previous operation. +/// +/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vsbiq))] +pub unsafe fn vec_sube_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_char, +) -> vector_unsigned_char { + transmute(vsbiq(transmute(a), transmute(b), transmute(c))) +} + +/// Vector Subtract with Carryout, Carryout +/// +/// Gets the carry bit of the 128-bit subtraction of two quadword values with carry bit from the previous operation. +/// +/// It returns a vector containing the carryout produced from the result of subtracting of b from a, +/// and the carryout bit from a previous operation. If no borrow occurred, the 127-bit of d is 1, otherwise 0. +/// All other bits of d are 0. +/// +/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vsbcbiq))] +pub unsafe fn vec_subec_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_char, +) -> vector_unsigned_char { + transmute(vsbcbiq(transmute(a), transmute(b), transmute(c))) +} + +/// Vector Splat Signed Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepib, IMM = 42))] +pub unsafe fn vec_splat_s8() -> vector_signed_char { + vector_signed_char([IMM; 16]) +} + +/// Vector Splat Signed Halfword +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepih, IMM = 42))] +pub unsafe fn vec_splat_s16() -> vector_signed_short { + vector_signed_short([IMM as i16; 8]) +} + +/// Vector Splat Signed Word +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepif, IMM = 42))] +pub unsafe fn vec_splat_s32() -> vector_signed_int { + vector_signed_int([IMM as i32; 4]) +} + +/// Vector Splat Signed Doubleword +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepig, IMM = 42))] +pub unsafe fn vec_splat_s64() -> vector_signed_long_long { + vector_signed_long_long([IMM as i64; 2]) +} + +/// Vector Splat Unsigned Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepib, IMM = 42))] +pub unsafe fn vec_splat_u8() -> vector_unsigned_char { + vector_unsigned_char([IMM; 16]) +} + +/// Vector Splat Unsigned Halfword +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepih, IMM = 42))] +pub unsafe fn vec_splat_u16() -> vector_unsigned_short { + vector_unsigned_short([IMM as u16; 8]) +} + +/// Vector Splat Unsigned Word +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepif, IMM = 42))] +pub unsafe fn vec_splat_u32() -> vector_unsigned_int { + vector_unsigned_int([IMM as u32; 4]) +} + +/// Vector Splat Unsigned Doubleword +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepig, IMM = 42))] +pub unsafe fn vec_splat_u64() -> vector_unsigned_long_long { + vector_unsigned_long_long([IMM as u64; 2]) +} + +macro_rules! vec_find_any { + ($($Trait:ident $fun:ident $doc:literal)*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[unstable(feature = "stdarch_s390x", issue = "135681")] + #[doc = $doc] + pub unsafe fn $fun, U>(a: T, b: U) -> T::Result { + a.$fun(b) + } + )* + } +} + +vec_find_any! { + VectorFindAnyEq vec_find_any_eq "Vector Find Any Element Equal with Condition Code" + VectorFindAnyNe vec_find_any_ne "Vector Find Any Element Not Equal with Condition Code" + VectorFindAnyEqIdx vec_find_any_eq_idx "Vector Find Any Element Equal Index with Condition Code" + VectorFindAnyNeIdx vec_find_any_ne_idx "Vector Find Any Element Not Equal Index with Condition Code" + VectorFindAnyEqOrZeroIdx vec_find_any_eq_or_0_idx "Vector Find Any Element Equal or Zero Index with Condition Code" + VectorFindAnyNeOrZeroIdx vec_find_any_ne_or_0_idx "Vector Find Any Element Not Equal or Zero Index with Condition Code" +} + +macro_rules! vec_find_any_cc { + ($($Trait:ident $fun:ident $doc:literal)*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[unstable(feature = "stdarch_s390x", issue = "135681")] + #[doc = $doc] + pub unsafe fn $fun, U>(a: T, b: U) -> (T::Result, i32) { + a.$fun(b) + } + )* + } +} + +vec_find_any_cc! { + VectorFindAnyEqCC vec_find_any_eq_cc "Vector Find Any Element Equal with Condition Code" + VectorFindAnyNeCC vec_find_any_ne_cc "Vector Find Any Element Not Equal with Condition Code" + VectorFindAnyEqIdxCC vec_find_any_eq_idx_cc "Vector Find Any Element Equal Index with Condition Code" + VectorFindAnyNeIdxCC vec_find_any_ne_idx_cc "Vector Find Any Element Not Equal Index with Condition Code" + VectorFindAnyEqOrZeroIdxCC vec_find_any_eq_or_0_idx_cc "Vector Find Any Element Equal or Zero Index with Condition Code" + VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc "Vector Find Any Element Not Equal or Zero Index with Condition Code" +} + +/// Vector Load +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_xl(offset: isize, ptr: *const T::ElementType) -> T { + T::vec_xl(offset, ptr) +} + +/// Vector Load Pair +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_load_pair(a: T::ElementType, b: T::ElementType) -> T { + T::vec_load_pair(a, b) +} + +/// Vector Load to Block Boundary +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_load_bndry( + ptr: *const T::ElementType, +) -> MaybeUninit { + T::vec_load_bndry::(ptr) +} + +/// Vector Store +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_xst(vector: T, offset: isize, ptr: *mut T::ElementType) { + vector.vec_xst(offset, ptr) +} + +/// Vector Load with Length +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_load_len( + ptr: *const T::ElementType, + byte_count: u32, +) -> T { + T::vec_load_len(ptr, byte_count) +} + +/// Vector Store with Length +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_store_len( + vector: T, + ptr: *mut T::ElementType, + byte_count: u32, +) { + vector.vec_store_len(ptr, byte_count) +} + +/// Vector Load Rightmost with Length +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vlrlr))] +pub unsafe fn vec_load_len_r(ptr: *const u8, byte_count: u32) -> vector_unsigned_char { + vlrl(byte_count, ptr) +} + +/// Vector Store Rightmost with Length +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vstrlr))] +pub unsafe fn vec_store_len_r(vector: vector_unsigned_char, ptr: *mut u8, byte_count: u32) { + vstrl(vector, byte_count, ptr) +} + +/// Vector Multiply Add +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_madd(a: T, b: T, c: T) -> T { + a.vec_madd(b, c) +} + +/// Vector Multiply Add +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_msub(a: T, b: T, c: T) -> T { + a.vec_msub(b, c) +} + +/// Vector Multiply and Add Even +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_meadd(a: T, b: T, c: T::Result) -> T::Result { + a.vec_meadd(b, c) +} + +/// Vector Multiply and Add Odd +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_moadd(a: T, b: T, c: T::Result) -> T::Result { + a.vec_moadd(b, c) +} + +/// Vector Multiply and Add High +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mhadd(a: T, b: T, c: T::Result) -> T::Result { + a.vec_mhadd(b, c) +} + +/// Vector Multiply and Add Low +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mladd(a: T, b: T, c: T::Result) -> T::Result { + a.vec_mladd(b, c) +} + +/// Vector Checksum +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vcksm))] +pub unsafe fn vec_checksum(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int { + vcksm(a, b) +} + +/// Vector Multiply Even +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mule, U>(a: T, b: T) -> U { + a.vec_mule(b) +} + +/// Vector Multiply Odd +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mulo, U>(a: T, b: T) -> U { + a.vec_mulo(b) +} + +/// Vector Multiply High +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mulh, U>(a: T, b: T) -> U { + a.vec_mulh(b) +} + +/// Vector Galois Field Multiply Sum +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_gfmsum, U>(a: T, b: T) -> U { + a.vec_gfmsum(b) +} + +/// Vector Galois Field Multiply Sum +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_gfmsum_accum( + a: T, + b: T, + c: T::Result, +) -> T::Result { + a.vec_gfmsum_accum(b, c) +} + +/// Vector Galois Field Multiply Sum 128-bits +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vgfmg))] +pub unsafe fn vec_gfmsum_128( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, +) -> vector_unsigned_char { + transmute(vgfmg(a, b)) +} + +/// Vector Galois Field Multiply Sum and Accumulate 128-bits +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vgfmag))] +pub unsafe fn vec_gfmsum_accum_128( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, + c: vector_unsigned_char, +) -> vector_unsigned_char { + transmute(vgfmag(a, b, transmute(c))) +} + +/// Vector Bit Permute +#[inline] +#[target_feature(enable = "vector-enhancements-1")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vbperm))] +pub unsafe fn vec_bperm_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_long_long { + vbperm(a, b) +} + +/// Vector Gather Element +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_gather_element( + a: T, + b: T::Offset, + c: *const T::Element, +) -> T { + a.vec_gather_element::(b, c) +} + +/// Vector Select +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sel, U>(a: T, b: T, c: U) -> T { + a.vec_sel(b, c) +} + +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_ZERO_P: u32 = 1 << 11; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_ZERO_N: u32 = 1 << 10; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_ZERO: u32 = __VEC_CLASS_FP_ZERO_P | __VEC_CLASS_FP_ZERO_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NORMAL_P: u32 = 1 << 9; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NORMAL_N: u32 = 1 << 8; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NORMAL: u32 = __VEC_CLASS_FP_NORMAL_P | __VEC_CLASS_FP_NORMAL_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SUBNORMAL_P: u32 = 1 << 7; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SUBNORMAL_N: u32 = 1 << 6; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SUBNORMAL: u32 = __VEC_CLASS_FP_SUBNORMAL_P | __VEC_CLASS_FP_SUBNORMAL_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_INFINITY_P: u32 = 1 << 5; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_INFINITY_N: u32 = 1 << 4; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_INFINITY: u32 = __VEC_CLASS_FP_INFINITY_P | __VEC_CLASS_FP_INFINITY_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_QNAN_P: u32 = 1 << 3; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_QNAN_N: u32 = 1 << 2; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_QNAN: u32 = __VEC_CLASS_FP_QNAN_P | __VEC_CLASS_FP_QNAN_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SNAN_P: u32 = 1 << 1; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SNAN_N: u32 = 1 << 0; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SNAN: u32 = __VEC_CLASS_FP_SNAN_P | __VEC_CLASS_FP_SNAN_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NAN: u32 = __VEC_CLASS_FP_QNAN | __VEC_CLASS_FP_SNAN; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NOT_NORMAL: u32 = + __VEC_CLASS_FP_NAN | __VEC_CLASS_FP_SUBNORMAL | __VEC_CLASS_FP_ZERO | __VEC_CLASS_FP_INFINITY; + +/// Vector Floating-Point Test Data Class +/// +/// You can use the `__VEC_CLASS_FP_*` constants as the argument for this operand +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_fp_test_data_class( + a: T, + c: *mut i32, +) -> T::Result { + let (x, y) = a.vec_fp_test_data_class::(); + c.write(y); + x +} + +/// All Elements Not a Number +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_nan(a: T) -> i32 { + i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 == 0) +} + +/// All Elements Numeric +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_numeric(a: T) -> i32 { + i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 == 3) +} + +/// Any Elements Not a Number +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_nan(a: T) -> i32 { + i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 != 3) +} + +/// Any Elements Numeric +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_numeric(a: T) -> i32 { + i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 != 0) +} + +/// Vector Test under Mask +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_test_mask(a: T, b: T::Mask) -> i32 { + // I can't find much information about this, but this might just be a check for whether the + // bitwise and of a and b is non-zero? + a.vec_test_mask(b) +} + +/// Vector Search String +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_search_string_cc( + a: T, + b: T, + c: vector_unsigned_char, +) -> (vector_unsigned_char, i32) { + a.vec_search_string_cc(b, c) +} + +/// Vector Search String Until Zero +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_search_string_until_zero_cc( + a: T, + b: T, + c: vector_unsigned_char, +) -> (vector_unsigned_char, i32) { + a.vec_search_string_until_zero_cc(b, c) +} + +/// Vector Convert from float (even elements) to double +#[inline] +#[target_feature(enable = "vector-enhancements-1")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +// FIXME: this emits `vflls` where `vldeb` is expected +// #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vldeb))] +pub unsafe fn vec_doublee(a: vector_float) -> vector_double { + let even = simd_shuffle::<_, _, f32x2>(a, a, const { u32x2::from_array([0, 2]) }); + simd_as(even) +} + +/// Vector Convert from double to float (even elements) +#[inline] +#[target_feature(enable = "vector-enhancements-1")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +// FIXME: the C version uses a shuffle mask with poison; we can't do that +// #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vledb))] +pub unsafe fn vec_floate(a: vector_double) -> vector_float { + let truncated: f32x2 = simd_as(a); + simd_shuffle( + truncated, + truncated, + const { u32x4::from_array([0, 0, 1, 1]) }, + ) +} + +/// Vector Convert from int to float +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_float(a: impl sealed::VectorFloat) -> vector_float { + a.vec_float() +} + +/// Vector Convert from long long to double +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_double(a: impl sealed::VectorDouble) -> vector_double { + a.vec_double() +} + +/// Vector Sign Extend to Doubleword +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_extend_s64(a: impl sealed::VectorExtendSigned64) -> vector_signed_long_long { + a.vec_extend_s64() +} + +/// Vector Convert floating point to signed +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_signed(a: T) -> T::Result { + a.vec_signed() +} + +/// Vector Convert floating point to unsigned +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_unsigned(a: T) -> T::Result { + a.vec_unsigned() +} + +/// Vector Copy Until Zero +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cp_until_zero(a: T) -> T { + a.vec_cp_until_zero() +} + +/// Vector Copy Until Zero +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cp_until_zero_cc(a: T) -> (T, i32) { + a.vec_cp_until_zero_cc() +} + +/// Vector Multiply Sum Logical +#[inline] +#[target_feature(enable = "vector-enhancements-1")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr( + all(test, target_feature = "vector-enhancements-1"), + assert_instr(vmslg, D = 4) +)] +pub unsafe fn vec_msum_u128( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, + c: vector_unsigned_char, +) -> vector_unsigned_char { + const { + if !matches!(D, 0 | 4 | 8 | 12) { + panic!("D needs to be one of 0, 4, 8, 12"); + } + }; + transmute(vmslg(a, b, transmute(c), D)) +} + +/// Vector Shift Left Double by Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sld(a: T, b: T) -> T { + static_assert_uimm_bits!(C, 4); + a.vec_sld::(b) +} + +/// Vector Shift Left Double by Word +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sldw(a: T, b: T) -> T { + static_assert_uimm_bits!(C, 2); + a.vec_sldw::(b) +} + +/// Vector Shift Left Double by Bit +#[inline] +#[target_feature(enable = "vector-enhancements-2")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sldb(a: T, b: T) -> T { + static_assert_uimm_bits!(C, 3); + a.vec_sldb::(b) +} + +/// Vector Shift Right Double by Bit +#[inline] +#[target_feature(enable = "vector-enhancements-2")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_srdb(a: T, b: T) -> T { + static_assert_uimm_bits!(C, 3); + a.vec_srdb::(b) +} + +/// Vector Compare Ranges +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmprg(a: T, b: T, c: T) -> T::Result { + a.vstrc::<{ FindImm::Eq as u32 }>(b, c) +} + +/// Vector Compare Not in Ranges +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpnrg(a: T, b: T, c: T) -> T::Result { + a.vstrc::<{ FindImm::Ne as u32 }>(b, c) +} + +/// Vector Compare Ranges Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmprg_idx(a: T, b: T, c: T) -> T::Result { + a.vstrc::<{ FindImm::EqIdx as u32 }>(b, c) +} + +/// Vector Compare Not in Ranges Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpnrg_idx(a: T, b: T, c: T) -> T::Result { + a.vstrc::<{ FindImm::NeIdx as u32 }>(b, c) +} + +/// Vector Compare Ranges with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmprg_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrcs::<{ FindImm::Eq as u32 }>(b, c); + d.write(y); + x +} + +/// Vector Compare Not in Ranges with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpnrg_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrcs::<{ FindImm::Ne as u32 }>(b, c); + d.write(y); + x +} + +/// Vector Compare Ranges Index with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmprg_idx_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrcs::<{ FindImm::EqIdx as u32 }>(b, c); + d.write(y); + x +} + +/// Vector Compare Not in Ranges Index with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpnrg_idx_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrcs::<{ FindImm::NeIdx as u32 }>(b, c); + d.write(y); + x +} + +/// Vector Compare Ranges or Zero Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmprg_or_0_idx(a: T, b: T, c: T) -> T::Result { + a.vstrcz::<{ FindImm::EqIdx as u32 }>(b, c) +} + +/// Vector Compare Not in Ranges or Zero Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpnrg_or_0_idx(a: T, b: T, c: T) -> T::Result { + a.vstrcz::<{ FindImm::NeIdx as u32 }>(b, c) +} + +/// Vector Compare Ranges or Zero Index with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmprg_or_0_idx_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrczs::<{ FindImm::EqIdx as u32 }>(b, c); + d.write(y); + x +} + +/// Vector Compare Not in Ranges or Zero Index with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpnrg_or_0_idx_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrczs::<{ FindImm::NeIdx as u32 }>(b, c); + d.write(y); + x +} + +/// Vector Compare Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpeq(a: T, b: T) -> T::Result { + a.vec_cmpeq(b) +} + +/// Vector Compare Not Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpne(a: T, b: T) -> T::Result { + a.vec_cmpne(b) +} + +/// Vector Compare Greater Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpgt(a: T, b: T) -> T::Result { + a.vec_cmpgt(b) +} + +/// Vector Compare Greater Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpge(a: T, b: T) -> T::Result { + a.vec_cmpge(b) +} + +/// Vector Compare Less +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmplt(a: T, b: T) -> T::Result { + a.vec_cmplt(b) +} + +/// Vector Compare Less Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmple(a: T, b: T) -> T::Result { + a.vec_cmple(b) +} + +/// Vector Compare Equal Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpeq_idx(a: T, b: T) -> T::Result { + a.vec_cmpeq_idx(b) +} +/// Vector Compare Not Equal Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpne_idx(a: T, b: T) -> T::Result { + a.vec_cmpne_idx(b) +} +/// Vector Compare Equal Index with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpeq_idx_cc(a: T, b: T) -> (T::Result, i32) { + a.vec_cmpeq_idx_cc(b) +} +/// Vector Compare Not Equal Index with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpne_idx_cc(a: T, b: T) -> (T::Result, i32) { + a.vec_cmpne_idx_cc(b) +} +/// Vector Compare Equal or Zero Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpeq_or_0_idx(a: T, b: T) -> T::Result { + a.vec_cmpeq_or_0_idx(b) +} +/// Vector Compare Not Equal or Zero Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpne_or_0_idx(a: T, b: T) -> T::Result { + a.vec_cmpne_or_0_idx(b) +} +/// Vector Compare Equal or Zero Index with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpeq_or_0_idx_cc(a: T, b: T) -> (T::Result, i32) { + a.vec_cmpeq_or_0_idx_cc(b) +} +/// Vector Compare Not Equal or Zero Index with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpne_or_0_idx_cc(a: T, b: T) -> (T::Result, i32) { + a.vec_cmpne_or_0_idx_cc(b) +} + +/// All Elements Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_eq(a: T, b: T) -> i32 { + simd_reduce_all(vec_cmpeq(a, b)) as i32 as i32 +} + +/// All Elements Not Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_ne(a: T, b: T) -> i32 { + simd_reduce_all(vec_cmpne(a, b)) as i32 +} + +/// Any Element Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_eq(a: T, b: T) -> i32 { + simd_reduce_any(vec_cmpeq(a, b)) as i32 +} + +/// Any Element Not Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_ne(a: T, b: T) -> i32 { + simd_reduce_any(vec_cmpne(a, b)) as i32 +} + +/// All Elements Less Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_lt(a: T, b: T) -> i32 { + a.vec_all_lt(b) +} + +/// All Elements Less Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_le(a: T, b: T) -> i32 { + a.vec_all_le(b) +} + +/// All Elements Greater Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_gt(a: T, b: T) -> i32 { + a.vec_all_gt(b) +} + +/// All Elements Greater Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_ge(a: T, b: T) -> i32 { + a.vec_all_ge(b) +} + +/// All Elements Not Less Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_nlt(a: T, b: T) -> i32 { + vec_all_ge(a, b) +} + +/// All Elements Not Less Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_nle(a: T, b: T) -> i32 { + vec_all_gt(a, b) +} + +/// All Elements Not Greater Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_ngt(a: T, b: T) -> i32 { + vec_all_le(a, b) +} + +/// All Elements Not Greater Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_all_nge(a: T, b: T) -> i32 { + vec_all_lt(a, b) +} + +/// Any Elements Less Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_lt(a: T, b: T) -> i32 { + !vec_all_ge(a, b) +} + +/// Any Elements Less Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_le(a: T, b: T) -> i32 { + !vec_all_gt(a, b) +} + +/// Any Elements Greater Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_gt(a: T, b: T) -> i32 { + !vec_all_le(a, b) +} + +/// Any Elements Greater Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_ge(a: T, b: T) -> i32 { + !vec_all_lt(a, b) +} + +/// Any Elements Not Less Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_nlt(a: T, b: T) -> i32 { + vec_any_ge(a, b) +} + +/// Any Elements Not Less Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_nle(a: T, b: T) -> i32 { + vec_any_gt(a, b) +} + +/// Any Elements Not Greater Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_ngt(a: T, b: T) -> i32 { + vec_any_le(a, b) +} + +/// Any Elements Not Greater Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_nge(a: T, b: T) -> i32 { + vec_any_lt(a, b) +} + +/// Vector Extract +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_extract(a: T, b: i32) -> T::ElementType { + T::vec_extract(a, b) +} + +/// Vector Insert +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_insert(a: T::ElementType, b: T, c: i32) -> T { + T::vec_insert(a, b, c) +} + +/// Vector Insert and Zero +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_insert_and_zero(a: *const T::ElementType) -> T { + T::vec_insert_and_zero(a) +} + +/// Vector Promote +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_promote(a: T::ElementType, b: i32) -> MaybeUninit { + T::vec_promote(a, b) +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::mem::transmute; + + use crate::core_arch::simd::*; + use stdarch_test::simd_test; + + #[test] + fn reverse_mask() { + assert_eq!(ShuffleMask::<4>::reverse().0, [3, 2, 1, 0]); + } + + #[test] + fn mergel_mask() { + assert_eq!(ShuffleMask::<4>::merge_low().0, [2, 6, 3, 7]); + } + + #[test] + fn mergeh_mask() { + assert_eq!(ShuffleMask::<4>::merge_high().0, [0, 4, 1, 5]); + } + + #[test] + fn pack_mask() { + assert_eq!(ShuffleMask::<4>::pack().0, [1, 3, 5, 7]); + } + + #[test] + fn test_vec_mask() { + assert_eq!( + genmask::<0x00FF>(), + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF + ] + ); + } + + #[test] + fn test_genmasks() { + assert_eq!(genmasks(u8::BITS, 3, 5), 28); + assert_eq!(genmasks(u8::BITS, 3, 7), 31); + + // If a or b is greater than 8, the operation is performed as if the value gets modulo by 8. + assert_eq!(genmasks(u8::BITS, 3 + 8, 7 + 8), 31); + // If a is greater than b, the operation is perform as if b equals 7. + assert_eq!(genmasks(u8::BITS, 5, 4), genmasks(u8::BITS, 5, 7)); + + assert_eq!( + genmasks(u16::BITS, 4, 12) as u16, + u16::from_be_bytes([15, -8i8 as u8]) + ); + assert_eq!( + genmasks(u32::BITS, 4, 29) as u32, + u32::from_be_bytes([15, 0xFF, 0xFF, -4i8 as u8]) + ); + } + + macro_rules! test_vec_1 { + { $name: ident, $fn:ident, f32x4, [$($a:expr),+], ~[$($d:expr),+] } => { + #[simd_test(enable = "vector")] + unsafe fn $name() { + let a: vector_float = transmute(f32x4::new($($a),+)); + + let d: vector_float = transmute(f32x4::new($($d),+)); + let r = transmute(vec_cmple(vec_abs(vec_sub($fn(a), d)), vec_splats(f32::EPSILON))); + let e = m32x4::new(true, true, true, true); + assert_eq!(e, r); + } + }; + { $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($d:expr),+] } => { + test_vec_1! { $name, $fn, $ty -> $ty, [$($a),+], [$($d),+] } + }; + { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($d:expr),+] } => { + #[simd_test(enable = "vector")] + unsafe fn $name() { + let a: s_t_l!($ty) = transmute($ty::new($($a),+)); + + let d = $ty_out::new($($d),+); + let r : $ty_out = transmute($fn(a)); + assert_eq!(d, r); + } + } + } + + macro_rules! test_vec_2 { + { $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + test_vec_2! { $name, $fn, $ty -> $ty, [$($a),+], [$($b),+], [$($d),+] } + }; + { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + test_vec_2! { $name, $fn, $ty, $ty -> $ty, [$($a),+], [$($b),+], [$($d),+] } + }; + { $name: ident, $fn:ident, $ty1: ident, $ty2: ident -> $ty_out: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + #[simd_test(enable = "vector")] + unsafe fn $name() { + let a: s_t_l!($ty1) = transmute($ty1::new($($a),+)); + let b: s_t_l!($ty2) = transmute($ty2::new($($b),+)); + + let d = $ty_out::new($($d),+); + let r : $ty_out = transmute($fn(a, b)); + assert_eq!(d, r); + } + }; + { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($b:expr),+], $d:expr } => { + #[simd_test(enable = "vector")] + unsafe fn $name() { + let a: s_t_l!($ty) = transmute($ty::new($($a),+)); + let b: s_t_l!($ty) = transmute($ty::new($($b),+)); + + let r : $ty_out = transmute($fn(a, b)); + assert_eq!($d, r); + } + } + } + + #[simd_test(enable = "vector")] + unsafe fn vec_add_i32x4_i32x4() { + let x = i32x4::new(1, 2, 3, 4); + let y = i32x4::new(4, 3, 2, 1); + let x: vector_signed_int = transmute(x); + let y: vector_signed_int = transmute(y); + let z = vec_add(x, y); + assert_eq!(i32x4::splat(5), transmute(z)); + } + + macro_rules! test_vec_sub { + { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + test_vec_2! {$name, vec_sub, $ty, [$($a),+], [$($b),+], [$($d),+] } + } + } + + test_vec_sub! { test_vec_sub_f32x4, f32x4, + [-1.0, 0.0, 1.0, 2.0], + [2.0, 1.0, -1.0, -2.0], + [-3.0, -1.0, 2.0, 4.0] } + + test_vec_sub! { test_vec_sub_f64x2, f64x2, + [-1.0, 0.0], + [2.0, 1.0], + [-3.0, -1.0] } + + test_vec_sub! { test_vec_sub_i64x2, i64x2, + [-1, 0], + [2, 1], + [-3, -1] } + + test_vec_sub! { test_vec_sub_u64x2, u64x2, + [0, 1], + [1, 0], + [u64::MAX, 1] } + + test_vec_sub! { test_vec_sub_i32x4, i32x4, + [-1, 0, 1, 2], + [2, 1, -1, -2], + [-3, -1, 2, 4] } + + test_vec_sub! { test_vec_sub_u32x4, u32x4, + [0, 0, 1, 2], + [2, 1, 0, 0], + [4294967294, 4294967295, 1, 2] } + + test_vec_sub! { test_vec_sub_i16x8, i16x8, + [-1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2], + [-3, -1, 2, 4, -3, -1, 2, 4] } + + test_vec_sub! { test_vec_sub_u16x8, u16x8, + [0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0], + [65534, 65535, 1, 2, 65534, 65535, 1, 2] } + + test_vec_sub! { test_vec_sub_i8x16, i8x16, + [-1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2], + [-3, -1, 2, 4, -3, -1, 2, 4, -3, -1, 2, 4, -3, -1, 2, 4] } + + test_vec_sub! { test_vec_sub_u8x16, u8x16, + [0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2], + [2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0], + [254, 255, 1, 2, 254, 255, 1, 2, 254, 255, 1, 2, 254, 255, 1, 2] } + + macro_rules! test_vec_mul { + { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { + test_vec_2! {$name, vec_mul, $ty, [$($a),+], [$($b),+], [$($d),+] } + } + } + + test_vec_mul! { test_vec_mul_f32x4, f32x4, + [-1.0, 0.0, 1.0, 2.0], + [2.0, 1.0, -1.0, -2.0], + [-2.0, 0.0, -1.0, -4.0] } + + test_vec_mul! { test_vec_mul_f64x2, f64x2, + [-1.0, 0.0], + [2.0, 1.0], + [-2.0, 0.0] } + + test_vec_mul! { test_vec_mul_i64x2, i64x2, + [i64::MAX, -4], + [2, 3], + [i64::MAX.wrapping_mul(2), -12] } + + test_vec_mul! { test_vec_mul_u64x2, u64x2, + [u64::MAX, 4], + [2, 3], + [u64::MAX.wrapping_mul(2), 12] } + + test_vec_mul! { test_vec_mul_i32x4, i32x4, + [-1, 0, 1, 2], + [2, 1, -1, -2], + [-2, 0, -1, -4] } + + test_vec_mul! { test_vec_mul_u32x4, u32x4, + [0, u32::MAX - 1, 1, 2], + [5, 6, 7, 8], + [0, 4294967284, 7, 16] } + + test_vec_mul! { test_vec_mul_i16x8, i16x8, + [-1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2], + [-2, 0, -1, -4, -2, 0, -1, -4] } + + test_vec_mul! { test_vec_mul_u16x8, u16x8, + [0, u16::MAX - 1, 1, 2, 3, 4, 5, 6], + [5, 6, 7, 8, 9, 8, 7, 6], + [0, 65524, 7, 16, 27, 32, 35, 36] } + + test_vec_mul! { test_vec_mul_i8x16, i8x16, + [-1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2, -1, 0, 1, 2], + [2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2, 2, 1, -1, -2], + [-2, 0, -1, -4, -2, 0, -1, -4, -2, 0, -1, -4, -2, 0, -1, -4] } + + test_vec_mul! { test_vec_mul_u8x16, u8x16, + [0, u8::MAX - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4], + [5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 0, u8::MAX, 1, 2, 3, 4], + [0, 244, 7, 16, 27, 32, 35, 36, 35, 32, 0, 248, 7, 12, 15, 16] } + + macro_rules! test_vec_abs { + { $name: ident, $ty: ident, $a: expr, $d: expr } => { + #[simd_test(enable = "vector")] + unsafe fn $name() { + let a: s_t_l!($ty) = vec_splats($a); + let a: s_t_l!($ty) = vec_abs(a); + let d = $ty::splat($d); + assert_eq!(d, transmute(a)); + } + } + } + + test_vec_abs! { test_vec_abs_i8, i8x16, -42i8, 42i8 } + test_vec_abs! { test_vec_abs_i16, i16x8, -42i16, 42i16 } + test_vec_abs! { test_vec_abs_i32, i32x4, -42i32, 42i32 } + test_vec_abs! { test_vec_abs_i64, i64x2, -42i64, 42i64 } + test_vec_abs! { test_vec_abs_f32, f32x4, -42f32, 42f32 } + test_vec_abs! { test_vec_abs_f64, f64x2, -42f64, 42f64 } + + test_vec_1! { test_vec_nabs, vec_nabs, f32x4, + [core::f32::consts::PI, 1.0, 0.0, -1.0], + [-core::f32::consts::PI, -1.0, 0.0, -1.0] } + + test_vec_2! { test_vec_andc, vec_andc, i32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b10000000], + [0b11001100, 0b00001100, 0b11000000, 0b01001100] } + + test_vec_2! { test_vec_and, vec_and, i32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b00000000], + [0b00000000, 0b11000000, 0b00001100, 0b00000000] } + + test_vec_2! { test_vec_nand, vec_nand, i32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b00000000], + [!0b00000000, !0b11000000, !0b00001100, !0b00000000] } + + test_vec_2! { test_vec_orc, vec_orc, u32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b00000000], + [0b11001100 | !0b00110011, 0b11001100 | !0b11110011, 0b11001100 | !0b00001100, 0b11001100 | !0b00000000] } + + test_vec_2! { test_vec_or, vec_or, i32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b00000000], + [0b11111111, 0b11111111, 0b11001100, 0b11001100] } + + test_vec_2! { test_vec_nor, vec_nor, i32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b00000000], + [!0b11111111, !0b11111111, !0b11001100, !0b11001100] } + + test_vec_2! { test_vec_xor, vec_xor, i32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b00000000], + [0b11111111, 0b00111111, 0b11000000, 0b11001100] } + + test_vec_2! { test_vec_eqv, vec_eqv, i32x4, + [0b11001100, 0b11001100, 0b11001100, 0b11001100], + [0b00110011, 0b11110011, 0b00001100, 0b00000000], + [!0b11111111, !0b00111111, !0b11000000, !0b11001100] } + + test_vec_1! { test_vec_floor_f32, vec_floor, f32x4, + [1.1, 1.9, -0.5, -0.9], + [1.0, 1.0, -1.0, -1.0] + } + + test_vec_1! { test_vec_floor_f64_1, vec_floor, f64x2, + [1.1, 1.9], + [1.0, 1.0] + } + test_vec_1! { test_vec_floor_f64_2, vec_floor, f64x2, + [-0.5, -0.9], + [-1.0, -1.0] + } + + test_vec_1! { test_vec_ceil_f32, vec_ceil, f32x4, + [0.1, 0.5, 0.6, 0.9], + [1.0, 1.0, 1.0, 1.0] + } + test_vec_1! { test_vec_ceil_f64_1, vec_ceil, f64x2, + [0.1, 0.5], + [1.0, 1.0] + } + test_vec_1! { test_vec_ceil_f64_2, vec_ceil, f64x2, + [0.6, 0.9], + [1.0, 1.0] + } + + test_vec_1! { test_vec_round_f32, vec_round, f32x4, + [0.1, 0.5, 0.6, 0.9], + [0.0, 0.0, 1.0, 1.0] + } + + test_vec_1! { test_vec_round_f32_even_odd, vec_round, f32x4, + [0.5, 1.5, 2.5, 3.5], + [0.0, 2.0, 2.0, 4.0] + } + + test_vec_1! { test_vec_round_f64_1, vec_round, f64x2, + [0.1, 0.5], + [0.0, 0.0] + } + test_vec_1! { test_vec_round_f64_2, vec_round, f64x2, + [0.6, 0.9], + [1.0, 1.0] + } + + test_vec_1! { test_vec_roundc_f32, vec_roundc, f32x4, + [0.1, 0.5, 0.6, 0.9], + [0.0, 0.0, 1.0, 1.0] + } + + test_vec_1! { test_vec_roundc_f32_even_odd, vec_roundc, f32x4, + [0.5, 1.5, 2.5, 3.5], + [0.0, 2.0, 2.0, 4.0] + } + + test_vec_1! { test_vec_roundc_f64_1, vec_roundc, f64x2, + [0.1, 0.5], + [0.0, 0.0] + } + test_vec_1! { test_vec_roundc_f64_2, vec_roundc, f64x2, + [0.6, 0.9], + [1.0, 1.0] + } + + test_vec_1! { test_vec_rint_f32, vec_rint, f32x4, + [0.1, 0.5, 0.6, 0.9], + [0.0, 0.0, 1.0, 1.0] + } + + test_vec_1! { test_vec_rint_f32_even_odd, vec_rint, f32x4, + [0.5, 1.5, 2.5, 3.5], + [0.0, 2.0, 2.0, 4.0] + } + + test_vec_1! { test_vec_rint_f64_1, vec_rint, f64x2, + [0.1, 0.5], + [0.0, 0.0] + } + test_vec_1! { test_vec_rint_f64_2, vec_rint, f64x2, + [0.6, 0.9], + [1.0, 1.0] + } + + test_vec_2! { test_vec_sll, vec_sll, i32x4, u8x16 -> i32x4, + [1, 1, 1, 1], + [0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 8], + [1 << 2, 1 << 3, 1 << 4, 1] } + + test_vec_2! { test_vec_srl, vec_srl, i32x4, u8x16 -> i32x4, + [0b1000, 0b1000, 0b1000, 0b1000], + [0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 16], + [4, 2, 1, 8] } + + test_vec_2! { test_vec_sral_pos, vec_sral, u32x4, u8x16 -> i32x4, + [0b1000, 0b1000, 0b1000, 0b1000], + [0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 16], + [4, 2, 1, 8] } + + test_vec_2! { test_vec_sral_neg, vec_sral, i32x4, u8x16 -> i32x4, + [-8, -8, -8, -8], + [0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 16], + [-4, -2, -1, -8] } + + test_vec_1! { test_vec_reve_f32, vec_reve, f32x4, + [0.1, 0.5, 0.6, 0.9], + [0.9, 0.6, 0.5, 0.1] + } + + test_vec_1! { test_vec_revb_u32, vec_revb, u32x4, + [0xAABBCCDD, 0xEEFF0011, 0x22334455, 0x66778899], + [0xDDCCBBAA, 0x1100FFEE, 0x55443322, 0x99887766] + } + + test_vec_2! { test_vec_mergeh_u32, vec_mergeh, u32x4, + [0xAAAAAAAA, 0xBBBBBBBB, 0xCCCCCCCC, 0xDDDDDDDD], + [0x00000000, 0x11111111, 0x22222222, 0x33333333], + [0xAAAAAAAA, 0x00000000, 0xBBBBBBBB, 0x11111111] + } + + test_vec_2! { test_vec_mergel_u32, vec_mergel, u32x4, + [0xAAAAAAAA, 0xBBBBBBBB, 0xCCCCCCCC, 0xDDDDDDDD], + [0x00000000, 0x11111111, 0x22222222, 0x33333333], + [0xCCCCCCCC, 0x22222222, 0xDDDDDDDD, 0x33333333] + } + + macro_rules! test_vec_perm { + {$name:ident, + $shorttype:ident, $longtype:ident, + [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { + #[simd_test(enable = "vector")] + unsafe fn $name() { + let a: $longtype = transmute($shorttype::new($($a),+)); + let b: $longtype = transmute($shorttype::new($($b),+)); + let c: vector_unsigned_char = transmute(u8x16::new($($c),+)); + let d = $shorttype::new($($d),+); + + let r: $shorttype = transmute(vec_perm(a, b, c)); + assert_eq!(d, r); + } + } + } + + test_vec_perm! {test_vec_perm_u8x16, + u8x16, vector_unsigned_char, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]} + test_vec_perm! {test_vec_perm_i8x16, + i8x16, vector_signed_char, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]} + + test_vec_perm! {test_vec_perm_m8x16, + m8x16, vector_bool_char, + [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], + [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [false, false, true, true, false, false, true, true, false, false, true, true, false, false, true, true]} + test_vec_perm! {test_vec_perm_u16x8, + u16x8, vector_unsigned_short, + [0, 1, 2, 3, 4, 5, 6, 7], + [10, 11, 12, 13, 14, 15, 16, 17], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [0, 10, 1, 11, 2, 12, 3, 13]} + test_vec_perm! {test_vec_perm_i16x8, + i16x8, vector_signed_short, + [0, 1, 2, 3, 4, 5, 6, 7], + [10, 11, 12, 13, 14, 15, 16, 17], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [0, 10, 1, 11, 2, 12, 3, 13]} + test_vec_perm! {test_vec_perm_m16x8, + m16x8, vector_bool_short, + [false, false, false, false, false, false, false, false], + [true, true, true, true, true, true, true, true], + [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], + [false, true, false, true, false, true, false, true]} + + test_vec_perm! {test_vec_perm_u32x4, + u32x4, vector_unsigned_int, + [0, 1, 2, 3], + [10, 11, 12, 13], + [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], + [0, 10, 1, 11]} + test_vec_perm! {test_vec_perm_i32x4, + i32x4, vector_signed_int, + [0, 1, 2, 3], + [10, 11, 12, 13], + [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], + [0, 10, 1, 11]} + test_vec_perm! {test_vec_perm_m32x4, + m32x4, vector_bool_int, + [false, false, false, false], + [true, true, true, true], + [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], + [false, true, false, true]} + test_vec_perm! {test_vec_perm_f32x4, + f32x4, vector_float, + [0.0, 1.0, 2.0, 3.0], + [1.0, 1.1, 1.2, 1.3], + [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], + [0.0, 1.0, 1.0, 1.1]} + + test_vec_1! { test_vec_sqrt, vec_sqrt, f32x4, + [core::f32::consts::PI, 1.0, 25.0, 2.0], + [core::f32::consts::PI.sqrt(), 1.0, 5.0, core::f32::consts::SQRT_2] } + + test_vec_2! { test_vec_find_any_eq, vec_find_any_eq, i32x4, i32x4 -> u32x4, + [1, -2, 3, -4], + [-5, 3, -7, 8], + [0, 0, 0xFFFFFFFF, 0] + } + + test_vec_2! { test_vec_find_any_ne, vec_find_any_ne, i32x4, i32x4 -> u32x4, + [1, -2, 3, -4], + [-5, 3, -7, 8], + [0xFFFFFFFF, 0xFFFFFFFF, 0, 0xFFFFFFFF] + } + + test_vec_2! { test_vec_find_any_eq_idx_1, vec_find_any_eq_idx, i32x4, i32x4 -> u32x4, + [1, 2, 3, 4], + [5, 3, 7, 8], + [0, 8, 0, 0] + } + test_vec_2! { test_vec_find_any_eq_idx_2, vec_find_any_eq_idx, i32x4, i32x4 -> u32x4, + [1, 2, 3, 4], + [5, 6, 7, 8], + [0, 16, 0, 0] + } + + test_vec_2! { test_vec_find_any_ne_idx_1, vec_find_any_ne_idx, i32x4, i32x4 -> u32x4, + [1, 2, 3, 4], + [1, 5, 3, 4], + [0, 4, 0, 0] + } + test_vec_2! { test_vec_find_any_ne_idx_2, vec_find_any_ne_idx, i32x4, i32x4 -> u32x4, + [1, 2, 3, 4], + [1, 2, 3, 4], + [0, 16, 0, 0] + } + + test_vec_2! { test_vec_find_any_eq_or_0_idx_1, vec_find_any_eq_or_0_idx, i32x4, i32x4 -> u32x4, + [1, 2, 0, 4], + [5, 6, 7, 8], + [0, 8, 0, 0] + } + test_vec_2! { test_vec_find_any_ne_or_0_idx_1, vec_find_any_ne_or_0_idx, i32x4, i32x4 -> u32x4, + [1, 2, 0, 4], + [1, 2, 3, 4], + [0, 8, 0, 0] + } + + #[simd_test(enable = "vector")] + fn test_vec_find_any_eq_cc() { + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 3, 7, 8]); + + let (d, c) = unsafe { vec_find_any_eq_cc(a, b) }; + assert_eq!(c, 1); + assert_eq!(d.as_array(), &[0, 0, -1, 0]); + + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 6, 7, 8]); + let (d, c) = unsafe { vec_find_any_eq_cc(a, b) }; + assert_eq!(c, 3); + assert_eq!(d.as_array(), &[0, 0, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_find_any_ne_cc() { + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 3, 7, 8]); + + let (d, c) = unsafe { vec_find_any_ne_cc(a, b) }; + assert_eq!(c, 1); + assert_eq!(d.as_array(), &[-1, -1, 0, -1]); + + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([1, 2, 3, 4]); + let (d, c) = unsafe { vec_find_any_ne_cc(a, b) }; + assert_eq!(c, 3); + assert_eq!(d.as_array(), &[0, 0, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_find_any_eq_idx_cc() { + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 3, 7, 8]); + + let (d, c) = unsafe { vec_find_any_eq_idx_cc(a, b) }; + assert_eq!(c, 1); + assert_eq!(d.as_array(), &[0, 8, 0, 0]); + + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 6, 7, 8]); + let (d, c) = unsafe { vec_find_any_eq_idx_cc(a, b) }; + assert_eq!(c, 3); + assert_eq!(d.as_array(), &[0, 16, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_find_any_ne_idx_cc() { + let a = vector_unsigned_int([5, 2, 3, 4]); + let b = vector_unsigned_int([5, 3, 7, 8]); + + let (d, c) = unsafe { vec_find_any_ne_idx_cc(a, b) }; + assert_eq!(c, 1); + assert_eq!(d.as_array(), &[0, 4, 0, 0]); + + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([1, 2, 3, 4]); + let (d, c) = unsafe { vec_find_any_ne_idx_cc(a, b) }; + assert_eq!(c, 3); + assert_eq!(d.as_array(), &[0, 16, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_find_any_eq_or_0_idx_cc() { + // if no element of a matches any element of b with an equal value, and there is at least one element from a with a value of 0 + let a = vector_unsigned_int([0, 1, 2, 3]); + let b = vector_unsigned_int([4, 5, 6, 7]); + let (d, c) = unsafe { vec_find_any_eq_or_0_idx_cc(a, b) }; + assert_eq!(c, 0); + assert_eq!(d.as_array(), &[0, 0, 0, 0]); + + // if at least one element of a matches any element of b with an equal value, and no elements of a with a value of 0 + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 2, 3, 4]); + let (d, c) = unsafe { vec_find_any_eq_or_0_idx_cc(a, b) }; + assert_eq!(c, 1); + assert_eq!(d.as_array(), &[0, 4, 0, 0]); + + // if at least one element of a matches any element of b with an equal value, and there is at least one element from a has a value of 0 + let a = vector_unsigned_int([1, 2, 3, 0]); + let b = vector_unsigned_int([1, 2, 3, 4]); + let (d, c) = unsafe { vec_find_any_eq_or_0_idx_cc(a, b) }; + assert_eq!(c, 2); + assert_eq!(d.as_array(), &[0, 0, 0, 0]); + + // if no element of a matches any element of b with an equal value, and there is no element from a with a value of 0. + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 6, 7, 8]); + let (d, c) = unsafe { vec_find_any_eq_or_0_idx_cc(a, b) }; + assert_eq!(c, 3); + assert_eq!(d.as_array(), &[0, 16, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_find_any_ne_or_0_idx_cc() { + // if no element of a matches any element of b with a not equal value, and there is at least one element from a with a value of 0. + let a = vector_unsigned_int([0, 1, 2, 3]); + let b = vector_unsigned_int([4, 1, 2, 3]); + let (d, c) = unsafe { vec_find_any_ne_or_0_idx_cc(a, b) }; + assert_eq!(c, 0); + assert_eq!(d.as_array(), &[0, 0, 0, 0]); + + // if at least one element of a matches any element of b with a not equal value, and no elements of a with a value of 0. + let a = vector_unsigned_int([4, 2, 3, 4]); + let b = vector_unsigned_int([4, 5, 6, 7]); + let (d, c) = unsafe { vec_find_any_ne_or_0_idx_cc(a, b) }; + assert_eq!(c, 1); + assert_eq!(d.as_array(), &[0, 4, 0, 0]); + + // if at least one element of a matches any element of b with a not equal value, and there is at least one element from a has a value of 0. + let a = vector_unsigned_int([1, 0, 1, 1]); + let b = vector_unsigned_int([4, 5, 6, 7]); + let (d, c) = unsafe { vec_find_any_ne_or_0_idx_cc(a, b) }; + assert_eq!(c, 2); + assert_eq!(d.as_array(), &[0, 0, 0, 0]); + + // if no element of a matches any element of b with a not equal value, and there is no element from a with a value of 0. + let a = vector_unsigned_int([4, 4, 4, 4]); + let b = vector_unsigned_int([4, 5, 6, 7]); + let (d, c) = unsafe { vec_find_any_ne_or_0_idx_cc(a, b) }; + assert_eq!(c, 3); + assert_eq!(d.as_array(), &[0, 16, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vector_load() { + let expected = [0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD]; + + let source: [u32; 8] = [ + 0xAAAA_AAAA, + 0xBBBB_BBBB, + 0xCCCC_CCCC, + 0xDDDD_DDDD, + 0, + 0, + 0, + 0, + ]; + assert_eq!( + unsafe { vec_xl::(0, source.as_ptr()) }.as_array(), + &expected + ); + + // offset is in bytes + let source: [u32; 8] = [ + 0x0000_AAAA, + 0xAAAA_BBBB, + 0xBBBB_CCCC, + 0xCCCC_DDDD, + 0xDDDD_0000, + 0, + 0, + 0, + ]; + assert_eq!( + unsafe { vec_xl::(2, source.as_ptr()) }.as_array(), + &expected + ); + } + + #[simd_test(enable = "vector")] + fn test_vector_store() { + let vec = vector_unsigned_int([0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD]); + + let mut dest = [0u32; 8]; + unsafe { vec_xst(vec, 0, dest.as_mut_ptr()) }; + assert_eq!( + dest, + [ + 0xAAAA_AAAA, + 0xBBBB_BBBB, + 0xCCCC_CCCC, + 0xDDDD_DDDD, + 0, + 0, + 0, + 0 + ] + ); + + // offset is in bytes + let mut dest = [0u32; 8]; + unsafe { vec_xst(vec, 2, dest.as_mut_ptr()) }; + assert_eq!( + dest, + [ + 0x0000_AAAA, + 0xAAAA_BBBB, + 0xBBBB_CCCC, + 0xCCCC_DDDD, + 0xDDDD_0000, + 0, + 0, + 0, + ] + ); + } + + #[simd_test(enable = "vector")] + fn test_vector_lcbb() { + #[repr(align(64))] + struct Align64(T); + + static ARRAY: Align64<[u8; 128]> = Align64([0; 128]); + + assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[64..].as_ptr()) }, 16); + assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[63..].as_ptr()) }, 1); + assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[56..].as_ptr()) }, 8); + assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[48..].as_ptr()) }, 16); + } + + test_vec_2! { test_vec_pack, vec_pack, i16x8, i16x8 -> i8x16, + [0, 1, -1, 42, 32767, -32768, 30000, -30000], + [32767, -32768, 12345, -12345, 0, 1, -1, 42], + [0, 1, -1, 42, -1, 0, 48, -48, -1, 0, 57, -57, 0, 1, -1, 42] + } + + test_vec_2! { test_vec_packs, vec_packs, i16x8, i16x8 -> i8x16, + [0, 1, -1, 42, 32767, -32768, 30000, -30000], + [32767, -32768, 12345, -12345, 0, 1, -1, 42], + [0, 1, -1, 42, 127, -128, 127, -128, 127, -128, 127, -128, 0, 1, -1, 42] + } + + test_vec_2! { test_vec_packsu_signed, vec_packsu, i16x8, i16x8 -> u8x16, + [0, 1, -1, 42, 32767, -32768, 30000, -30000], + [32767, -32768, 12345, -12345, 0, 1, -1, 42], + [0, 1, 0, 42, 255, 0, 255, 0, 255, 0, 255, 0, 0, 1, 0, 42] + } + + test_vec_2! { test_vec_packsu_unsigned, vec_packsu, u16x8, u16x8 -> u8x16, + [65535, 32768, 1234, 5678, 16, 8, 4, 2], + [30000, 25000, 20000, 15000, 31, 63, 127, 255], + [255, 255, 255, 255, 16, 8, 4, 2, 255, 255, 255, 255, 31, 63, 127, 255] + } + + test_vec_2! { test_vec_rl, vec_rl, u32x4, + [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678], + [4, 8, 12, 68], + [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781] + } + + test_vec_1! { test_vec_unpackh_i, vec_unpackh, i16x8 -> i32x4, + [0x1234, -2, 0x0F0F, -32768, 0, 0, 0, 0], + [0x1234, -2, 0x0F0F, -32768] + } + + test_vec_1! { test_vec_unpackh_u, vec_unpackh, u16x8 -> u32x4, + [0x1234, 0xFFFF, 0x0F0F, 0x8000, 0, 0, 0, 0], + [0x1234, 0xFFFF, 0x0F0F, 0x8000] + } + + test_vec_1! { test_vec_unpackl_i, vec_unpackl, i16x8 -> i32x4, + [0, 0, 0, 0, 0x1234, -2, 0x0F0F, -32768], + [0x1234, -2, 0x0F0F, -32768] + } + + test_vec_1! { test_vec_unpackl_u, vec_unpackl, u16x8 -> u32x4, + [0, 0, 0, 0, 0x1234, 0xFFFF, 0x0F0F, 0x8000], + [0x1234, 0xFFFF, 0x0F0F, 0x8000] + } + + test_vec_2! { test_vec_avg, vec_avg, u32x4, + [2, 1, u32::MAX, 0], + [4, 2, 2, 0], + [3, (1u32 + 2).div_ceil(2), (u32::MAX as u64 + 2u64).div_ceil(2) as u32, 0] + } + + test_vec_2! { test_vec_checksum, vec_checksum, u32x4, + [1, 2, 3, u32::MAX], + [5, 6, 7, 8], + [0, 12, 0, 0] + } + + test_vec_2! { test_vec_add_u128, vec_add_u128, u8x16, + [0x01, 0x05, 0x0F, 0x1A, 0x2F, 0x3F, 0x50, 0x65, + 0x7A, 0x8F, 0x9A, 0xAD, 0xB0, 0xC3, 0xD5, 0xE8], + [0xF0, 0xEF, 0xC3, 0xB1, 0x92, 0x71, 0x5A, 0x43, + 0x3B, 0x29, 0x13, 0x04, 0xD7, 0xA1, 0x8C, 0x76], + [0xF1, 0xF4, 0xD2, 0xCB, 0xC1, 0xB0, 0xAA, 0xA8, 0xB5, 0xB8, 0xAD, 0xB2, 0x88, 0x65, 0x62, 0x5E] + } + + #[simd_test(enable = "vector")] + fn test_vec_addc_u128() { + unsafe { + let a = u128::MAX; + let b = 1u128; + + let d: u128 = transmute(vec_addc_u128(transmute(a), transmute(b))); + assert!(a.checked_add(b).is_none()); + assert_eq!(d, 1); + + let a = 1u128; + let b = 1u128; + + let d: u128 = transmute(vec_addc_u128(transmute(a), transmute(b))); + assert!(a.checked_add(b).is_some()); + assert_eq!(d, 0); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_subc_u128() { + unsafe { + let a = 0u128; + let b = 1u128; + + let d: u128 = transmute(vec_subc_u128(transmute(a), transmute(b))); + assert!(a.checked_sub(b).is_none()); + assert_eq!(d, 0); + + let a = 1u128; + let b = 1u128; + + let d: u128 = transmute(vec_subc_u128(transmute(a), transmute(b))); + assert!(a.checked_sub(b).is_some()); + assert_eq!(d, 1); + } + } + + test_vec_2! { test_vec_mule_u, vec_mule, u16x8, u16x8 -> u32x4, + [0xFFFF, 0, 2, 0, 2, 0, 1, 0], + [0xFFFF, 0, 4, 0, 0xFFFF, 0, 2, 0], + [0xFFFE_0001, 8, 0x0001_FFFE, 2] + } + + test_vec_2! { test_vec_mule_i, vec_mule, i16x8, i16x8 -> i32x4, + [i16::MIN, 0, -2, 0, 2, 0, 1, 0], + [i16::MIN, 0, 4, 0, i16::MAX, 0, 2, 0], + [0x4000_0000, -8, 0xFFFE, 2] + } + + test_vec_2! { test_vec_mulo_u, vec_mulo, u16x8, u16x8 -> u32x4, + [0, 0xFFFF, 0, 2, 0, 2, 0, 1], + [0, 0xFFFF, 0, 4, 0, 0xFFFF, 0, 2], + [0xFFFE_0001, 8, 0x0001_FFFE, 2] + } + + test_vec_2! { test_vec_mulo_i, vec_mulo, i16x8, i16x8 -> i32x4, + [0, i16::MIN, 0, -2, 0, 2, 0, 1], + [0, i16::MIN, 0, 4, 0, i16::MAX, 0, 2], + [0x4000_0000, -8, 0xFFFE, 2] + } + + test_vec_2! { test_vec_mulh_u, vec_mulh, u32x4, u32x4 -> u32x4, + [u32::MAX, 2, 2, 1], + [u32::MAX, 4, u32::MAX, 2], + [u32::MAX - 1, 0, 1, 0] + } + + test_vec_2! { test_vec_mulh_i, vec_mulh, i32x4, i32x4 -> i32x4, + [i32::MIN, -2, 2, 1], + [i32::MIN, 4, i32::MAX, 2], + [0x4000_0000, -1, 0, 0] + } + + test_vec_2! { test_vec_gfmsum_1, vec_gfmsum, u16x8, u16x8 -> u32x4, + [0x1234, 0x5678, 0x9ABC, 0xDEF0, 0x1357, 0x2468, 0xACE0, 0xBDF0], + [0xFFFF, 0x0001, 0x8000, 0x7FFF, 0xAAAA, 0x5555, 0x1234, 0x5678], + [0xE13A794, 0x68764A50, 0x94AA3E, 0x2C93F300] + } + + test_vec_2! { test_vec_gfmsum_2, vec_gfmsum, u16x8, u16x8 -> u32x4, + [0x0000, 0xFFFF, 0xAAAA, 0x5555, 0x1234, 0x5678, 0x9ABC, 0xDEF0], + [0xFFFF, 0x0000, 0x5555, 0xAAAA, 0x0001, 0x8000, 0x7FFF, 0x1357], + [0, 0, 0x2B3C1234, 0x3781D244] + } + + #[simd_test(enable = "vector")] + fn test_vec_gfmsum_128() { + let a = vector_unsigned_long_long([1, 2]); + let b = vector_unsigned_long_long([3, 4]); + + let d: u128 = unsafe { transmute(vec_gfmsum_128(a, b)) }; + assert_eq!(d, 11); + + let a = vector_unsigned_long_long([0x0101010101010101, 0x0202020202020202]); + let b = vector_unsigned_long_long([0x0404040404040404, 0x0505050505050505]); + + let d: u128 = unsafe { transmute(vec_gfmsum_128(a, b)) }; + assert_eq!(d, 0xE000E000E000E000E000E000E000E); + } + + #[simd_test(enable = "vector-enhancements-1")] + fn test_vec_bperm_u128() { + let a = vector_unsigned_char([65, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]); + let b = vector_unsigned_char([ + 0, 0, 0, 0, 1, 1, 1, 1, 128, 128, 128, 128, 255, 255, 255, 255, + ]); + let d = unsafe { vec_bperm_u128(a, b) }; + assert_eq!(d.as_array(), &[0xF00, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_sel() { + let a = vector_signed_int([1, 2, 3, 4]); + let b = vector_signed_int([5, 6, 7, 8]); + + let e = vector_unsigned_int([9, 10, 11, 12]); + let f = vector_unsigned_int([9, 9, 11, 11]); + + let c: vector_bool_int = unsafe { simd_eq(e, f) }; + assert_eq!(c.as_array(), &[!0, 0, !0, 0]); + let d: vector_signed_int = unsafe { vec_sel(a, b, c) }; + assert_eq!(d.as_array(), &[5, 2, 7, 4]); + } + + #[simd_test(enable = "vector")] + fn test_vec_gather_element() { + let a1: [u32; 10] = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + let a2: [u32; 10] = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]; + + let v1 = vector_unsigned_int([1, 2, 3, 4]); + let v2 = vector_unsigned_int([1, 2, 3, 4]); + + let sizeof_int = core::mem::size_of::() as u32; + let v3 = vector_unsigned_int([ + 5 * sizeof_int, + 8 * sizeof_int, + 9 * sizeof_int, + 6 * sizeof_int, + ]); + + unsafe { + let d1 = vec_gather_element::<_, 0>(v1, v3, a1.as_ptr()); + assert_eq!(d1.as_array(), &[15, 2, 3, 4]); + let d2 = vec_gather_element::<_, 0>(v2, v3, a2.as_ptr()); + assert_eq!(d2.as_array(), &[25, 2, 3, 4]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_fp_test_data_class() { + let mut cc = 42; + + let v1 = vector_double([0.0, f64::NAN]); + let v2 = vector_double([f64::INFINITY, 1.0]); + let v3 = vector_double([1.0, 2.0]); + + unsafe { + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_ZERO>(v1, &mut cc); + assert_eq!(cc, 1); + assert_eq!(d.as_array(), &[!0, 0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NAN>(v1, &mut cc); + assert_eq!(cc, 1); + assert_eq!(d.as_array(), &[0, !0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_INFINITY>(v2, &mut cc); + assert_eq!(cc, 1); + assert_eq!(d.as_array(), &[!0, 0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_INFINITY_N>(v2, &mut cc); + assert_eq!(cc, 3); + assert_eq!(d.as_array(), &[0, 0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NORMAL>(v2, &mut cc); + assert_eq!(cc, 1); + assert_eq!(d.as_array(), &[0, !0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NORMAL>(v3, &mut cc); + assert_eq!(cc, 0); + assert_eq!(d.as_array(), &[!0, !0]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_fp_any_all_nan_numeric() { + unsafe { + assert_eq!( + vec_all_nan(vector_double([f64::NAN, f64::NAN])), + i32::from(true) + ); + assert_eq!( + vec_all_nan(vector_double([f64::NAN, 1.0])), + i32::from(false) + ); + assert_eq!(vec_all_nan(vector_double([0.0, 1.0])), i32::from(false)); + + assert_eq!( + vec_any_nan(vector_double([f64::NAN, f64::NAN])), + i32::from(true) + ); + assert_eq!(vec_any_nan(vector_double([f64::NAN, 1.0])), i32::from(true)); + assert_eq!(vec_any_nan(vector_double([0.0, 1.0])), i32::from(false)); + + assert_eq!( + vec_all_numeric(vector_double([f64::NAN, f64::NAN])), + i32::from(false) + ); + assert_eq!( + vec_all_numeric(vector_double([f64::NAN, 1.0])), + i32::from(false) + ); + assert_eq!(vec_all_numeric(vector_double([0.0, 1.0])), i32::from(true)); + + assert_eq!( + vec_any_numeric(vector_double([f64::NAN, f64::NAN])), + i32::from(false) + ); + assert_eq!( + vec_any_numeric(vector_double([f64::NAN, 1.0])), + i32::from(true) + ); + assert_eq!(vec_any_numeric(vector_double([0.0, 1.0])), i32::from(true)); + + // "numeric" means "not NaN". infinities are numeric + assert_eq!( + vec_all_numeric(vector_double([f64::INFINITY, f64::NEG_INFINITY])), + i32::from(true) + ); + assert_eq!( + vec_any_numeric(vector_double([f64::INFINITY, f64::NEG_INFINITY])), + i32::from(true) + ); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_test_mask() { + unsafe { + let v = vector_unsigned_long_long([0xFF00FF00FF00FF00; 2]); + let m = vector_unsigned_long_long([0x0000FF000000FF00; 2]); + assert_eq!(vec_test_mask(v, m), 3); + + let v = vector_unsigned_long_long([u64::MAX; 2]); + let m = vector_unsigned_long_long([0; 2]); + assert_eq!(vec_test_mask(v, m), 0); + + let v = vector_unsigned_long_long([0; 2]); + let m = vector_unsigned_long_long([u64::MAX; 2]); + assert_eq!(vec_test_mask(v, m), 0); + + let v = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA; 2]); + let m = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA; 2]); + assert_eq!(vec_test_mask(v, m), 3); + } + } + + #[simd_test(enable = "vector-enhancements-2")] + fn test_vec_search_string_cc() { + unsafe { + let b = vector_unsigned_char(*b"ABCD------------"); + let c = vector_unsigned_char([4; 16]); + + let haystack = vector_unsigned_char(*b"__ABCD__________"); + let (result, d) = vec_search_string_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 2); + assert_eq!(d, 2); + + let haystack = vector_unsigned_char(*b"___ABCD_________"); + let (result, d) = vec_search_string_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 3); + assert_eq!(d, 2); + + let haystack = vector_unsigned_char(*b"________________"); + let (result, d) = vec_search_string_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 0); + + let haystack = vector_unsigned_char(*b"______\0_________"); + let (result, d) = vec_search_string_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 0); + + let haystack = vector_unsigned_char(*b"______\0__ABCD___"); + let (result, d) = vec_search_string_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 9); + assert_eq!(d, 2); + } + } + + #[simd_test(enable = "vector-enhancements-2")] + fn test_vec_search_string_until_zero_cc() { + unsafe { + let b = vector_unsigned_char(*b"ABCD\0\0\0\0\0\0\0\0\0\0\0\0"); + let c = vector_unsigned_char([16; 16]); + + let haystack = vector_unsigned_char(*b"__ABCD__________"); + let (result, d) = vec_search_string_until_zero_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 2); + assert_eq!(d, 2); + + let haystack = vector_unsigned_char(*b"___ABCD_________"); + let (result, d) = vec_search_string_until_zero_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 3); + assert_eq!(d, 2); + + let haystack = vector_unsigned_char(*b"________________"); + let (result, d) = vec_search_string_until_zero_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 0); + + let haystack = vector_unsigned_char(*b"______\0_________"); + let (result, d) = vec_search_string_until_zero_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 1); + + let haystack = vector_unsigned_char(*b"______\0__ABCD___"); + let (result, d) = vec_search_string_until_zero_cc(haystack, b, c); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 1); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_doublee() { + unsafe { + let v = vector_float([1.0, 2.0, 3.0, 4.0]); + assert_eq!(vec_doublee(v).as_array(), &[1.0, 3.0]); + + let v = vector_float([f32::NAN, 2.0, f32::INFINITY, 4.0]); + let d = vec_doublee(v); + assert!(d.as_array()[0].is_nan()); + assert_eq!(d.as_array()[1], f64::INFINITY); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_floate() { + // NOTE: indices 1 and 3 can have an arbitrary value. With the C version + // these are poison values, our version initializes the memory but its + // value still should not be relied upon by application code. + unsafe { + let v = vector_double([1.0, 2.0]); + let d = vec_floate(v); + assert_eq!(d.as_array()[0], 1.0); + assert_eq!(d.as_array()[2], 2.0); + + let v = vector_double([f64::NAN, f64::INFINITY]); + let d = vec_floate(v); + assert!(d.as_array()[0].is_nan()); + assert_eq!(d.as_array()[2], f32::INFINITY); + + let v = vector_double([f64::MIN, f64::MAX]); + let d = vec_floate(v); + assert_eq!(d.as_array()[0], f64::MIN as f32); + assert_eq!(d.as_array()[2], f64::MAX as f32); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_extend_s64() { + unsafe { + let v = vector_signed_char([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + assert_eq!(vec_extend_s64(v).as_array(), &[7, 15]); + + let v = vector_signed_short([0, 1, 2, 3, 4, 5, 6, 7]); + assert_eq!(vec_extend_s64(v).as_array(), &[3, 7]); + + let v = vector_signed_int([0, 1, 2, 3]); + assert_eq!(vec_extend_s64(v).as_array(), &[1, 3]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_signed() { + unsafe { + let v = vector_float([1.0, 2.5, -2.5, -0.0]); + assert_eq!(vec_signed(v).as_array(), &[1, 2, -2, 0]); + + let v = vector_double([2.5, -2.5]); + assert_eq!(vec_signed(v).as_array(), &[2, -2]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_unsigned() { + // NOTE: converting a negative floating point value is UB! + unsafe { + let v = vector_float([1.0, 2.5, 3.5, 0.0]); + assert_eq!(vec_unsigned(v).as_array(), &[1, 2, 3, 0]); + + let v = vector_double([2.5, 3.5]); + assert_eq!(vec_unsigned(v).as_array(), &[2, 3]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_cp_until_zero() { + unsafe { + let v = vector_signed_int([1, 2, 3, 4]); + let d = vec_cp_until_zero(v); + assert_eq!(d.as_array(), &[1, 2, 3, 4]); + + let v = vector_signed_int([1, 2, 0, 4]); + let d = vec_cp_until_zero(v); + assert_eq!(d.as_array(), &[1, 2, 0, 0]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_cp_until_zero_cc() { + unsafe { + let v = vector_signed_int([1, 2, 3, 4]); + let (d, cc) = vec_cp_until_zero_cc(v); + assert_eq!(d.as_array(), &[1, 2, 3, 4]); + assert_eq!(cc, 3); + + let v = vector_signed_int([1, 2, 0, 4]); + let (d, cc) = vec_cp_until_zero_cc(v); + assert_eq!(d.as_array(), &[1, 2, 0, 0]); + assert_eq!(cc, 0); + } + } + + #[simd_test(enable = "vector-enhancements-1")] + fn test_vec_msum_u128() { + let a = vector_unsigned_long_long([1, 2]); + let b = vector_unsigned_long_long([3, 4]); + + unsafe { + let c: vector_unsigned_char = transmute(100u128); + + let d: u128 = transmute(vec_msum_u128::<0>(a, b, c)); + assert_eq!(d, (1 * 3) + (2 * 4) + 100); + + let d: u128 = transmute(vec_msum_u128::<4>(a, b, c)); + assert_eq!(d, (1 * 3) + (2 * 4) * 2 + 100); + + let d: u128 = transmute(vec_msum_u128::<8>(a, b, c)); + assert_eq!(d, (1 * 3) * 2 + (2 * 4) + 100); + + let d: u128 = transmute(vec_msum_u128::<12>(a, b, c)); + assert_eq!(d, (1 * 3) * 2 + (2 * 4) * 2 + 100); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_sld() { + let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]); + let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + + unsafe { + let d = vec_sld::<_, 4>(a, b); + assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAABBBBBBBB]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_sldw() { + let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]); + let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + + unsafe { + let d = vec_sldw::<_, 1>(a, b); + assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAABBBBBBBB]); + } + } + + #[simd_test(enable = "vector-enhancements-2")] + fn test_vec_sldb() { + let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]); + let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + + unsafe { + let d = vec_sldb::<_, 4>(a, b); + assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAB]); + } + } + + #[simd_test(enable = "vector-enhancements-2")] + fn test_vec_srdb() { + let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]); + let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + + unsafe { + let d = vec_srdb::<_, 4>(a, b); + assert_eq!(d.as_array(), &[0xABBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + } + } + + const GT: u32 = 0x20000000; + const LT: u32 = 0x40000000; + const EQ: u32 = 0x80000000; + + #[simd_test(enable = "vector")] + fn test_vec_cmprg() { + let a = vector_unsigned_int([11, 22, 33, 44]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmprg(a, b, c) }; + assert_eq!(d.as_array(), &[!0, 0, !0, 0]); + + let c = vector_unsigned_int([GT, LT, 0, 0]); + let d = unsafe { vec_cmprg(a, b, c) }; + assert_eq!(d.as_array(), &[!0, 0, 0, 0]); + + let a = vector_unsigned_int([11, 22, 33, 30]); + let b = vector_unsigned_int([10, 20, 30, 30]); + + let c = vector_unsigned_int([GT, LT, EQ, EQ]); + let d = unsafe { vec_cmprg(a, b, c) }; + assert_eq!(d.as_array(), &[!0, 0, 0, !0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmpnrg() { + let a = vector_unsigned_int([11, 22, 33, 44]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmpnrg(a, b, c) }; + assert_eq!(d.as_array(), &[0, !0, 0, !0]); + + let c = vector_unsigned_int([GT, LT, 0, 0]); + let d = unsafe { vec_cmpnrg(a, b, c) }; + assert_eq!(d.as_array(), &[0, !0, !0, !0]); + + let a = vector_unsigned_int([11, 22, 33, 30]); + let b = vector_unsigned_int([10, 20, 30, 30]); + + let c = vector_unsigned_int([GT, LT, EQ, EQ]); + let d = unsafe { vec_cmpnrg(a, b, c) }; + assert_eq!(d.as_array(), &[0, !0, !0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmprg_idx() { + let a = vector_unsigned_int([1, 11, 22, 33]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmprg_idx(a, b, c) }; + assert_eq!(d.as_array(), &[0, 4, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmpnrg_idx() { + let a = vector_unsigned_int([1, 11, 22, 33]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmpnrg_idx(a, b, c) }; + assert_eq!(d.as_array(), &[0, 0, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmprg_or_0_idx() { + let a = vector_unsigned_int([1, 0, 22, 33]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmprg_or_0_idx(a, b, c) }; + assert_eq!(d.as_array(), &[0, 4, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmpnrg_or_0_idx() { + let a = vector_unsigned_int([11, 33, 0, 22]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmpnrg_or_0_idx(a, b, c) }; + assert_eq!(d.as_array(), &[0, 8, 0, 0]); + } + + test_vec_2! { test_vec_cmpgt, vec_cmpgt, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 3.14], + [2.0, f32::NAN, 5.0, 2.0], + [0, 0, 0, !0] + } + + test_vec_2! { test_vec_cmpge, vec_cmpge, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 3.14], + [1.0, f32::NAN, 5.0, 2.0], + [!0, 0, 0, !0] + } + + test_vec_2! { test_vec_cmplt, vec_cmplt, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 2.0], + [2.0, f32::NAN, 5.0, 2.0], + [!0, 0, 0, 0] + } + + test_vec_2! { test_vec_cmple, vec_cmple, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 2.0], + [1.0, f32::NAN, 5.0, 3.14], + [!0, 0, 0, !0] + } + + test_vec_2! { test_vec_cmpeq, vec_cmpeq, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 2.0], + [1.0, f32::NAN, 5.0, 3.14], + [!0, 0, 0, 0] + } + + test_vec_2! { test_vec_cmpne, vec_cmpne, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 2.0], + [1.0, f32::NAN, 5.0, 3.14], + [0, !0, !0, !0] + } + + #[simd_test(enable = "vector")] + fn test_vec_meadd() { + let a = vector_unsigned_short([1, 0, 2, 0, 3, 0, 4, 0]); + let b = vector_unsigned_short([5, 0, 6, 0, 7, 0, 8, 0]); + let c = vector_unsigned_int([2, 2, 2, 2]); + + let d = unsafe { vec_meadd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 14, 23, 34]); + + let a = vector_signed_short([1, 0, 2, 0, 3, 0, 4, 0]); + let b = vector_signed_short([5, 0, 6, 0, 7, 0, 8, 0]); + let c = vector_signed_int([2, -2, 2, -2]); + + let d = unsafe { vec_meadd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 10, 23, 30]); + } + + #[simd_test(enable = "vector")] + fn test_vec_moadd() { + let a = vector_unsigned_short([0, 1, 0, 2, 0, 3, 0, 4]); + let b = vector_unsigned_short([0, 5, 0, 6, 0, 7, 0, 8]); + let c = vector_unsigned_int([2, 2, 2, 2]); + + let d = unsafe { vec_moadd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 14, 23, 34]); + + let a = vector_signed_short([0, 1, 0, 2, 0, 3, 0, 4]); + let b = vector_signed_short([0, 5, 0, 6, 0, 7, 0, 8]); + let c = vector_signed_int([2, -2, 2, -2]); + + let d = unsafe { vec_moadd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 10, 23, 30]); + } + + #[simd_test(enable = "vector")] + fn test_vec_mhadd() { + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 6, 7, 8]); + let c = vector_unsigned_int([u32::MAX; 4]); + + let d = unsafe { vec_mhadd(a, b, c) }; + assert_eq!(d.as_array(), &[1, 1, 1, 1]); + + let a = vector_signed_int([-1, -2, -3, -4]); + let b = vector_signed_int([5, 6, 7, 8]); + let c = vector_signed_int([i32::MIN; 4]); + + let d = unsafe { vec_mhadd(a, b, c) }; + assert_eq!(d.as_array(), &[-1, -1, -1, -1]); + } + + #[simd_test(enable = "vector")] + fn test_vec_mladd() { + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 6, 7, 8]); + let c = vector_unsigned_int([2, 2, 2, 2]); + + let d = unsafe { vec_mladd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 14, 23, 34]); + + let a = vector_signed_int([-1, -2, -3, -4]); + let b = vector_signed_int([5, 6, 7, 8]); + let c = vector_signed_int([2, 2, 2, 2]); + + let d = unsafe { vec_mladd(a, b, c) }; + assert_eq!(d.as_array(), &[-3, -10, -19, -30]); + } + + #[simd_test(enable = "vector")] + fn test_vec_extract() { + let v = vector_unsigned_int([1, 2, 3, 4]); + + assert_eq!(unsafe { vec_extract(v, 1) }, 2); + assert_eq!(unsafe { vec_extract(v, 4 + 2) }, 3); + } + + #[simd_test(enable = "vector")] + fn test_vec_insert() { + let mut v = vector_unsigned_int([1, 2, 3, 4]); + + v = unsafe { vec_insert(42, v, 1) }; + assert_eq!(v.as_array(), &[1, 42, 3, 4]); + + v = unsafe { vec_insert(64, v, 6) }; + assert_eq!(v.as_array(), &[1, 42, 64, 4]); + } + + #[simd_test(enable = "vector")] + fn test_vec_promote() { + let v: vector_unsigned_int = unsafe { vec_promote(42, 1).assume_init() }; + assert_eq!(v.as_array(), &[0, 42, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_insert_and_zero() { + let v = unsafe { vec_insert_and_zero::(&42u32) }; + assert_eq!(v.as_array(), vector_unsigned_int([0, 42, 0, 0]).as_array()); + } +} diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs new file mode 100644 index 000000000000..25834943f009 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/simd.rs @@ -0,0 +1,1021 @@ +//! Internal `#[repr(simd)]` types + +#![allow(non_camel_case_types)] + +macro_rules! simd_ty { + ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => { + #[repr(simd)] + #[derive(Copy, Clone)] + pub(crate) struct $id([$elem_type; $len]); + + #[allow(clippy::use_self)] + impl $id { + /// A value of this type where all elements are zeroed out. + pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() }; + + #[inline(always)] + pub(crate) const fn new($($param_name: $elem_type),*) -> Self { + $id([$($param_name),*]) + } + #[inline(always)] + pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self { + $id(elements) + } + // FIXME: Workaround rust@60637 + #[inline(always)] + pub(crate) fn splat(value: $elem_type) -> Self { + #[derive(Copy, Clone)] + #[repr(simd)] + struct JustOne([$elem_type; 1]); + let one = JustOne([value]); + // SAFETY: 0 is always in-bounds because we're shuffling + // a simd type with exactly one element. + unsafe { simd_shuffle!(one, one, [0; $len]) } + } + + /// Extract the element at position `index`. + /// `index` is not a constant so this is not efficient! + /// Use for testing only. + // FIXME: Workaround rust@60637 + #[inline(always)] + pub(crate) fn extract(&self, index: usize) -> $elem_type { + self.as_array()[index] + } + + #[inline] + pub(crate) fn as_array(&self) -> &[$elem_type; $len] { + let simd_ptr: *const Self = self; + let array_ptr: *const [$elem_type; $len] = simd_ptr.cast(); + // SAFETY: We can always read the prefix of a simd type as an array. + // There might be more padding afterwards for some widths, but + // that's not a problem for reading less than that. + unsafe { &*array_ptr } + } + } + + impl core::cmp::PartialEq for $id { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.as_array() == other.as_array() + } + } + + impl core::fmt::Debug for $id { + #[inline] + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, stringify!($id), self.as_array()) + } + } + } +} + +macro_rules! simd_m_ty { + ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => { + #[repr(simd)] + #[derive(Copy, Clone)] + pub(crate) struct $id([$elem_type; $len]); + + #[allow(clippy::use_self)] + impl $id { + #[inline(always)] + const fn bool_to_internal(x: bool) -> $elem_type { + [0 as $elem_type, !(0 as $elem_type)][x as usize] + } + + #[inline(always)] + pub(crate) const fn new($($param_name: bool),*) -> Self { + $id([$(Self::bool_to_internal($param_name)),*]) + } + + // FIXME: Workaround rust@60637 + #[inline(always)] + pub(crate) fn splat(value: bool) -> Self { + #[derive(Copy, Clone)] + #[repr(simd)] + struct JustOne([$elem_type; 1]); + let one = JustOne([Self::bool_to_internal(value)]); + // SAFETY: 0 is always in-bounds because we're shuffling + // a simd type with exactly one element. + unsafe { simd_shuffle!(one, one, [0; $len]) } + } + + #[inline] + pub(crate) fn as_array(&self) -> &[$elem_type; $len] { + let simd_ptr: *const Self = self; + let array_ptr: *const [$elem_type; $len] = simd_ptr.cast(); + // SAFETY: We can always read the prefix of a simd type as an array. + // There might be more padding afterwards for some widths, but + // that's not a problem for reading less than that. + unsafe { &*array_ptr } + } + } + + impl core::cmp::PartialEq for $id { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.as_array() == other.as_array() + } + } + + impl core::fmt::Debug for $id { + #[inline] + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, stringify!($id), self.as_array()) + } + } + } +} + +// 16-bit wide types: + +simd_ty!(u8x2[u8;2]: x0, x1); +simd_ty!(i8x2[i8;2]: x0, x1); + +// 32-bit wide types: + +simd_ty!(u8x4[u8;4]: x0, x1, x2, x3); +simd_ty!(u16x2[u16;2]: x0, x1); + +simd_ty!(i8x4[i8;4]: x0, x1, x2, x3); +simd_ty!(i16x2[i16;2]: x0, x1); + +// 64-bit wide types: + +simd_ty!( + u8x8[u8;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); +simd_ty!(u16x4[u16;4]: x0, x1, x2, x3); +simd_ty!(u32x2[u32;2]: x0, x1); +simd_ty!(u64x1[u64;1]: x1); + +simd_ty!( + i8x8[i8;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); +simd_ty!(i16x4[i16;4]: x0, x1, x2, x3); +simd_ty!(i32x2[i32;2]: x0, x1); +simd_ty!(i64x1[i64;1]: x1); + +simd_ty!(f32x2[f32;2]: x0, x1); +simd_ty!(f64x1[f64;1]: x1); + +// 128-bit wide types: + +simd_ty!( + u8x16[u8;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + u16x8[u16;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); +simd_ty!(u32x4[u32;4]: x0, x1, x2, x3); +simd_ty!(u64x2[u64;2]: x0, x1); + +simd_ty!( + i8x16[i8;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + i16x8[i16;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); +simd_ty!(i32x4[i32;4]: x0, x1, x2, x3); +simd_ty!(i64x2[i64;2]: x0, x1); + +simd_ty!(f16x4[f16;4]: x0, x1, x2, x3); + +simd_ty!( + f16x8[f16;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); +simd_ty!(f32x4[f32;4]: x0, x1, x2, x3); +simd_ty!(f64x2[f64;2]: x0, x1); + +simd_m_ty!( + m8x16[i8;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_m_ty!( + m16x8[i16;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); +simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3); +simd_m_ty!(m64x2[i64;2]: x0, x1); + +// 256-bit wide types: + +simd_ty!( + u8x32[u8;32]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); +simd_ty!( + u16x16[u16;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + u32x8[u32;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); +simd_ty!(u64x4[u64;4]: x0, x1, x2, x3); + +simd_ty!( + i8x32[i8;32]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); +simd_ty!( + i16x16[i16;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + i32x8[i32;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); +simd_ty!(i64x4[i64;4]: x0, x1, x2, x3); + +simd_ty!( + f16x16[f16;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + f32x8[f32;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); +simd_ty!(f64x4[f64;4]: x0, x1, x2, x3); + +simd_m_ty!( + m8x32[i8;32]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); +simd_m_ty!( + m16x16[i16;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_m_ty!( + m32x8[i32;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); + +// 512-bit wide types: + +simd_ty!( + i8x64[i8;64]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31, + x32, + x33, + x34, + x35, + x36, + x37, + x38, + x39, + x40, + x41, + x42, + x43, + x44, + x45, + x46, + x47, + x48, + x49, + x50, + x51, + x52, + x53, + x54, + x55, + x56, + x57, + x58, + x59, + x60, + x61, + x62, + x63 +); + +simd_ty!( + u8x64[u8;64]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31, + x32, + x33, + x34, + x35, + x36, + x37, + x38, + x39, + x40, + x41, + x42, + x43, + x44, + x45, + x46, + x47, + x48, + x49, + x50, + x51, + x52, + x53, + x54, + x55, + x56, + x57, + x58, + x59, + x60, + x61, + x62, + x63 +); + +simd_ty!( + i16x32[i16;32]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); + +simd_ty!( + u16x32[u16;32]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); + +simd_ty!( + i32x16[i32;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); + +simd_ty!( + u32x16[u32;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); + +simd_ty!( + f16x32[f16;32]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); +simd_ty!( + f32x16[f32;16]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); + +simd_ty!( + i64x8[i64;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); + +simd_ty!( + u64x8[u64;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); + +simd_ty!( + f64x8[f64;8]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); + +// 1024-bit wide types: +simd_ty!( + u16x64[u16;64]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31, + x32, + x33, + x34, + x35, + x36, + x37, + x38, + x39, + x40, + x41, + x42, + x43, + x44, + x45, + x46, + x47, + x48, + x49, + x50, + x51, + x52, + x53, + x54, + x55, + x56, + x57, + x58, + x59, + x60, + x61, + x62, + x63 +); +simd_ty!( + i32x32[i32;32]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); +simd_ty!( + u32x32[u32;32]: + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); + +/// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they +/// were before moving to array-based simd. +#[inline] +pub(crate) fn debug_simd_finish( + formatter: &mut crate::fmt::Formatter<'_>, + type_name: &str, + array: &[T; N], +) -> crate::fmt::Result { + crate::fmt::Formatter::debug_tuple_fields_finish( + formatter, + type_name, + &crate::array::from_fn::<&dyn crate::fmt::Debug, N, _>(|i| &array[i]), + ) +} diff --git a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs new file mode 100644 index 000000000000..fdc8cfbfdb41 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs @@ -0,0 +1,96 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +unsafe extern "unadjusted" { + #[link_name = "llvm.wasm.memory.atomic.wait32"] + fn llvm_atomic_wait_i32(ptr: *mut i32, exp: i32, timeout: i64) -> i32; + #[link_name = "llvm.wasm.memory.atomic.wait64"] + fn llvm_atomic_wait_i64(ptr: *mut i64, exp: i64, timeout: i64) -> i32; + #[link_name = "llvm.wasm.memory.atomic.notify"] + fn llvm_atomic_notify(ptr: *mut i32, cnt: i32) -> i32; +} + +/// Corresponding intrinsic to wasm's [`memory.atomic.wait32` instruction][instr] +/// +/// This function, when called, will block the current thread if the memory +/// pointed to by `ptr` is equal to `expression` (performing this action +/// atomically). +/// +/// The argument `timeout_ns` is a maximum number of nanoseconds the calling +/// thread will be blocked for, if it blocks. If the timeout is negative then +/// the calling thread will be blocked forever. +/// +/// The calling thread can only be woken up with a call to the `wake` intrinsic +/// once it has been blocked. Changing the memory behind `ptr` will not wake +/// the thread once it's blocked. +/// +/// # Return value +/// +/// * 0 - indicates that the thread blocked and then was woken up +/// * 1 - the loaded value from `ptr` didn't match `expression`, the thread +/// didn't block +/// * 2 - the thread blocked, but the timeout expired. +/// +/// [instr]: https://webassembly.github.io/threads/core/syntax/instructions.html#syntax-instr-atomic-memory +#[inline] +#[cfg_attr(test, assert_instr(memory.atomic.wait32))] +#[target_feature(enable = "atomics")] +#[doc(alias("memory.atomic.wait32"))] +#[unstable(feature = "stdarch_wasm_atomic_wait", issue = "77839")] +pub unsafe fn memory_atomic_wait32(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 { + llvm_atomic_wait_i32(ptr, expression, timeout_ns) +} + +/// Corresponding intrinsic to wasm's [`memory.atomic.wait64` instruction][instr] +/// +/// This function, when called, will block the current thread if the memory +/// pointed to by `ptr` is equal to `expression` (performing this action +/// atomically). +/// +/// The argument `timeout_ns` is a maximum number of nanoseconds the calling +/// thread will be blocked for, if it blocks. If the timeout is negative then +/// the calling thread will be blocked forever. +/// +/// The calling thread can only be woken up with a call to the `wake` intrinsic +/// once it has been blocked. Changing the memory behind `ptr` will not wake +/// the thread once it's blocked. +/// +/// # Return value +/// +/// * 0 - indicates that the thread blocked and then was woken up +/// * 1 - the loaded value from `ptr` didn't match `expression`, the thread +/// didn't block +/// * 2 - the thread blocked, but the timeout expired. +/// +/// [instr]: https://webassembly.github.io/threads/core/syntax/instructions.html#syntax-instr-atomic-memory +#[inline] +#[cfg_attr(test, assert_instr(memory.atomic.wait64))] +#[target_feature(enable = "atomics")] +#[doc(alias("memory.atomic.wait64"))] +#[unstable(feature = "stdarch_wasm_atomic_wait", issue = "77839")] +pub unsafe fn memory_atomic_wait64(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 { + llvm_atomic_wait_i64(ptr, expression, timeout_ns) +} + +/// Corresponding intrinsic to wasm's [`memory.atomic.notify` instruction][instr] +/// +/// This function will notify a number of threads blocked on the address +/// indicated by `ptr`. Threads previously blocked with the `i32_atomic_wait` +/// and `i64_atomic_wait` functions above will be woken up. +/// +/// The `waiters` argument indicates how many waiters should be woken up (a +/// maximum). If the value is zero no waiters are woken up. +/// +/// # Return value +/// +/// Returns the number of waiters which were actually notified. +/// +/// [instr]: https://webassembly.github.io/threads/core/syntax/instructions.html#syntax-instr-atomic-memory +#[inline] +#[cfg_attr(test, assert_instr(memory.atomic.notify))] +#[target_feature(enable = "atomics")] +#[doc(alias("memory.atomic.notify"))] +#[unstable(feature = "stdarch_wasm_atomic_wait", issue = "77839")] +pub unsafe fn memory_atomic_notify(ptr: *mut i32, waiters: u32) -> u32 { + llvm_atomic_notify(ptr, waiters as i32) as u32 +} diff --git a/library/stdarch/crates/core_arch/src/wasm32/memory.rs b/library/stdarch/crates/core_arch/src/wasm32/memory.rs new file mode 100644 index 000000000000..90e9075e5136 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/wasm32/memory.rs @@ -0,0 +1,58 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +unsafe extern "unadjusted" { + #[link_name = "llvm.wasm.memory.grow"] + fn llvm_memory_grow(mem: u32, pages: usize) -> usize; + #[link_name = "llvm.wasm.memory.size"] + fn llvm_memory_size(mem: u32) -> usize; +} + +/// Corresponding intrinsic to wasm's [`memory.size` instruction][instr] +/// +/// This function, when called, will return the current memory size in units of +/// pages. The current WebAssembly page size is 65536 bytes (64 KB). +/// +/// The argument `MEM` is the numerical index of which memory to return the +/// size of. Note that currently the WebAssembly specification only supports one +/// memory, so it is required that zero is passed in. The argument is present to +/// be forward-compatible with future WebAssembly revisions. If a nonzero +/// argument is passed to this function it will currently unconditionally abort. +/// +/// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-size +#[inline] +#[cfg_attr(test, assert_instr("memory.size", MEM = 0))] +#[rustc_legacy_const_generics(0)] +#[stable(feature = "simd_wasm32", since = "1.33.0")] +#[doc(alias("memory.size"))] +pub fn memory_size() -> usize { + static_assert!(MEM == 0); + unsafe { llvm_memory_size(MEM) } +} + +/// Corresponding intrinsic to wasm's [`memory.grow` instruction][instr] +/// +/// This function, when called, will attempt to grow the default linear memory +/// by the specified `delta` of pages. The current WebAssembly page size is +/// 65536 bytes (64 KB). If memory is successfully grown then the previous size +/// of memory, in pages, is returned. If memory cannot be grown then +/// `usize::MAX` is returned. +/// +/// The argument `MEM` is the numerical index of which memory to return the +/// size of. Note that currently the WebAssembly specification only supports one +/// memory, so it is required that zero is passed in. The argument is present to +/// be forward-compatible with future WebAssembly revisions. If a nonzero +/// argument is passed to this function it will currently unconditionally abort. +/// +/// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-grow +#[inline] +#[cfg_attr(test, assert_instr("memory.grow", MEM = 0))] +#[rustc_legacy_const_generics(0)] +#[stable(feature = "simd_wasm32", since = "1.33.0")] +#[doc(alias("memory.grow"))] +pub fn memory_grow(delta: usize) -> usize { + unsafe { + static_assert!(MEM == 0); + llvm_memory_grow(MEM, delta) + } +} diff --git a/library/stdarch/crates/core_arch/src/wasm32/mod.rs b/library/stdarch/crates/core_arch/src/wasm32/mod.rs new file mode 100644 index 000000000000..2c4361f1639f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/wasm32/mod.rs @@ -0,0 +1,197 @@ +//! WASM32 intrinsics + +#[cfg(test)] +use stdarch_test::assert_instr; + +mod atomic; +#[unstable(feature = "stdarch_wasm_atomic_wait", issue = "77839")] +pub use self::atomic::*; + +mod simd128; +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use self::simd128::*; + +mod relaxed_simd; +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub use self::relaxed_simd::*; + +mod memory; +#[stable(feature = "simd_wasm32", since = "1.33.0")] +pub use self::memory::*; + +/// Generates the [`unreachable`] instruction, which causes an unconditional [trap]. +/// +/// This function is safe to call and immediately aborts the execution. +/// +/// [`unreachable`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-control +/// [trap]: https://webassembly.github.io/spec/core/intro/overview.html#trap +#[cfg_attr(test, assert_instr(unreachable))] +#[inline] +#[stable(feature = "unreachable_wasm32", since = "1.37.0")] +pub fn unreachable() -> ! { + crate::intrinsics::abort() +} + +/// Generates the [`f32.ceil`] instruction, returning the smallest integer greater than or equal to `a`. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f32::ceil()`]. +/// +/// [`std::f32::ceil()`]: https://doc.rust-lang.org/std/primitive.f32.html#method.ceil +/// [`f32.ceil`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f32.ceil))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f32_ceil(a: f32) -> f32 { + unsafe { crate::intrinsics::ceilf32(a) } +} + +/// Generates the [`f32.floor`] instruction, returning the largest integer less than or equal to `a`. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f32::floor()`]. +/// +/// [`std::f32::floor()`]: https://doc.rust-lang.org/std/primitive.f32.html#method.floor +/// [`f32.floor`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f32.floor))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f32_floor(a: f32) -> f32 { + unsafe { crate::intrinsics::floorf32(a) } +} + +/// Generates the [`f32.trunc`] instruction, roundinging to the nearest integer towards zero. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f32::trunc()`]. +/// +/// [`std::f32::trunc()`]: https://doc.rust-lang.org/std/primitive.f32.html#method.trunc +/// [`f32.trunc`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f32.trunc))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f32_trunc(a: f32) -> f32 { + unsafe { crate::intrinsics::truncf32(a) } +} + +/// Generates the [`f32.nearest`] instruction, roundinging to the nearest integer. Rounds half-way +/// cases to the number with an even least significant digit. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f32::round_ties_even()`]. +/// +/// [`std::f32::round_ties_even()`]: https://doc.rust-lang.org/std/primitive.f32.html#method.round_ties_even +/// [`f32.nearest`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f32.nearest))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f32_nearest(a: f32) -> f32 { + crate::intrinsics::round_ties_even_f32(a) +} + +/// Generates the [`f32.sqrt`] instruction, returning the square root of the number `a`. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f32::sqrt()`]. +/// +/// [`std::f32::sqrt()`]: https://doc.rust-lang.org/std/primitive.f32.html#method.sqrt +/// [`f32.sqrt`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f32.sqrt))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f32_sqrt(a: f32) -> f32 { + unsafe { crate::intrinsics::sqrtf32(a) } +} + +/// Generates the [`f64.ceil`] instruction, returning the smallest integer greater than or equal to `a`. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f64::ceil()`]. +/// +/// [`std::f64::ceil()`]: https://doc.rust-lang.org/std/primitive.f64.html#method.ceil +/// [`f64.ceil`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f64.ceil))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f64_ceil(a: f64) -> f64 { + unsafe { crate::intrinsics::ceilf64(a) } +} + +/// Generates the [`f64.floor`] instruction, returning the largest integer less than or equal to `a`. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f64::floor()`]. +/// +/// [`std::f64::floor()`]: https://doc.rust-lang.org/std/primitive.f64.html#method.floor +/// [`f64.floor`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f64.floor))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f64_floor(a: f64) -> f64 { + unsafe { crate::intrinsics::floorf64(a) } +} + +/// Generates the [`f64.trunc`] instruction, roundinging to the nearest integer towards zero. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f64::trunc()`]. +/// +/// [`std::f64::trunc()`]: https://doc.rust-lang.org/std/primitive.f64.html#method.trunc +/// [`f64.trunc`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f64.trunc))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f64_trunc(a: f64) -> f64 { + unsafe { crate::intrinsics::truncf64(a) } +} + +/// Generates the [`f64.nearest`] instruction, roundinging to the nearest integer. Rounds half-way +/// cases to the number with an even least significant digit. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f64::round_ties_even()`]. +/// +/// [`std::f64::round_ties_even()`]: https://doc.rust-lang.org/std/primitive.f64.html#method.round_ties_even +/// [`f64.nearest`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f64.nearest))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f64_nearest(a: f64) -> f64 { + crate::intrinsics::round_ties_even_f64(a) +} + +/// Generates the [`f64.sqrt`] instruction, returning the square root of the number `a`. +/// +/// This method is useful when targeting `no_std` and is equivalent to [`std::f64::sqrt()`]. +/// +/// [`std::f64::sqrt()`]: https://doc.rust-lang.org/std/primitive.f64.html#method.sqrt +/// [`f64.sqrt`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-numeric +#[cfg_attr(test, assert_instr(f64.sqrt))] +#[inline] +#[must_use = "method returns a new number and does not mutate the original value"] +#[unstable(feature = "wasm_numeric_instr", issue = "133908")] +pub fn f64_sqrt(a: f64) -> f64 { + unsafe { crate::intrinsics::sqrtf64(a) } +} + +unsafe extern "C-unwind" { + #[link_name = "llvm.wasm.throw"] + fn wasm_throw(tag: i32, ptr: *mut u8) -> !; +} + +/// Generates the [`throw`] instruction from the [exception-handling proposal] for WASM. +/// +/// This function is unlikely to be stabilized until codegen backends have better support. +/// +/// [`throw`]: https://webassembly.github.io/exception-handling/core/syntax/instructions.html#syntax-instr-control +/// [exception-handling proposal]: https://github.com/WebAssembly/exception-handling +// FIXME: wasmtime does not currently support exception-handling, so cannot execute +// a wasm module with the throw instruction in it. once it does, we can +// reenable this attribute. +// #[cfg_attr(test, assert_instr(throw, TAG = 0, ptr = core::ptr::null_mut()))] +#[inline] +#[unstable(feature = "wasm_exception_handling_intrinsics", issue = "122465")] +pub unsafe fn throw(ptr: *mut u8) -> ! { + static_assert!(TAG == 0); // LLVM only supports tag 0 == C++ right now. + wasm_throw(TAG, ptr) +} diff --git a/library/stdarch/crates/core_arch/src/wasm32/relaxed_simd.rs b/library/stdarch/crates/core_arch/src/wasm32/relaxed_simd.rs new file mode 100644 index 000000000000..a9b7e9c04d11 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/wasm32/relaxed_simd.rs @@ -0,0 +1,509 @@ +use super::v128; +use crate::core_arch::simd; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.wasm.relaxed.swizzle"] + fn llvm_relaxed_swizzle(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16; + #[link_name = "llvm.wasm.relaxed.trunc.signed"] + fn llvm_relaxed_trunc_signed(a: simd::f32x4) -> simd::i32x4; + #[link_name = "llvm.wasm.relaxed.trunc.unsigned"] + fn llvm_relaxed_trunc_unsigned(a: simd::f32x4) -> simd::i32x4; + #[link_name = "llvm.wasm.relaxed.trunc.signed.zero"] + fn llvm_relaxed_trunc_signed_zero(a: simd::f64x2) -> simd::i32x4; + #[link_name = "llvm.wasm.relaxed.trunc.unsigned.zero"] + fn llvm_relaxed_trunc_unsigned_zero(a: simd::f64x2) -> simd::i32x4; + + #[link_name = "llvm.wasm.relaxed.madd.v4f32"] + fn llvm_f32x4_fma(a: simd::f32x4, b: simd::f32x4, c: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.wasm.relaxed.nmadd.v4f32"] + fn llvm_f32x4_fms(a: simd::f32x4, b: simd::f32x4, c: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.wasm.relaxed.madd.v2f64"] + fn llvm_f64x2_fma(a: simd::f64x2, b: simd::f64x2, c: simd::f64x2) -> simd::f64x2; + #[link_name = "llvm.wasm.relaxed.nmadd.v2f64"] + fn llvm_f64x2_fms(a: simd::f64x2, b: simd::f64x2, c: simd::f64x2) -> simd::f64x2; + + #[link_name = "llvm.wasm.relaxed.laneselect.v16i8"] + fn llvm_i8x16_laneselect(a: simd::i8x16, b: simd::i8x16, c: simd::i8x16) -> simd::i8x16; + #[link_name = "llvm.wasm.relaxed.laneselect.v8i16"] + fn llvm_i16x8_laneselect(a: simd::i16x8, b: simd::i16x8, c: simd::i16x8) -> simd::i16x8; + #[link_name = "llvm.wasm.relaxed.laneselect.v4i32"] + fn llvm_i32x4_laneselect(a: simd::i32x4, b: simd::i32x4, c: simd::i32x4) -> simd::i32x4; + #[link_name = "llvm.wasm.relaxed.laneselect.v2i64"] + fn llvm_i64x2_laneselect(a: simd::i64x2, b: simd::i64x2, c: simd::i64x2) -> simd::i64x2; + + #[link_name = "llvm.wasm.relaxed.min.v4f32"] + fn llvm_f32x4_relaxed_min(a: simd::f32x4, b: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.wasm.relaxed.min.v2f64"] + fn llvm_f64x2_relaxed_min(a: simd::f64x2, b: simd::f64x2) -> simd::f64x2; + #[link_name = "llvm.wasm.relaxed.max.v4f32"] + fn llvm_f32x4_relaxed_max(a: simd::f32x4, b: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.wasm.relaxed.max.v2f64"] + fn llvm_f64x2_relaxed_max(a: simd::f64x2, b: simd::f64x2) -> simd::f64x2; + + #[link_name = "llvm.wasm.relaxed.q15mulr.signed"] + fn llvm_relaxed_q15mulr_signed(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8; + #[link_name = "llvm.wasm.relaxed.dot.i8x16.i7x16.signed"] + fn llvm_i16x8_relaxed_dot_i8x16_i7x16_s(a: simd::i8x16, b: simd::i8x16) -> simd::i16x8; + #[link_name = "llvm.wasm.relaxed.dot.i8x16.i7x16.add.signed"] + fn llvm_i32x4_relaxed_dot_i8x16_i7x16_add_s( + a: simd::i8x16, + b: simd::i8x16, + c: simd::i32x4, + ) -> simd::i32x4; +} + +/// A relaxed version of `i8x16_swizzle(a, s)` which selects lanes from `a` +/// using indices in `s`. +/// +/// Indices in the range `[0,15]` will select the `i`-th element of `a`. +/// If the high bit of any element of `s` is set (meaning 128 or greater) then +/// the corresponding output lane is guaranteed to be zero. Otherwise if the +/// element of `s` is within the range `[16,128)` then the output lane is either +/// 0 or `a[s[i] % 16]` depending on the implementation. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.relaxed_swizzle))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i8x16.relaxed_swizzle"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i8x16_relaxed_swizzle(a: v128, s: v128) -> v128 { + unsafe { llvm_relaxed_swizzle(a.as_i8x16(), s.as_i8x16()).v128() } +} + +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub use i8x16_relaxed_swizzle as u8x16_relaxed_swizzle; + +/// A relaxed version of `i32x4_trunc_sat_f32x4(a)` converts the `f32` lanes +/// of `a` to signed 32-bit integers. +/// +/// Values which don't fit in 32-bit integers or are NaN may have the same +/// result as `i32x4_trunc_sat_f32x4` or may return `i32::MIN`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_trunc_f32x4_s))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_trunc_f32x4_s"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i32x4_relaxed_trunc_f32x4(a: v128) -> v128 { + unsafe { llvm_relaxed_trunc_signed(a.as_f32x4()).v128() } +} + +/// A relaxed version of `u32x4_trunc_sat_f32x4(a)` converts the `f32` lanes +/// of `a` to unsigned 32-bit integers. +/// +/// Values which don't fit in 32-bit unsigned integers or are NaN may have the +/// same result as `u32x4_trunc_sat_f32x4` or may return `u32::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_trunc_f32x4_u))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_trunc_f32x4_u"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn u32x4_relaxed_trunc_f32x4(a: v128) -> v128 { + unsafe { llvm_relaxed_trunc_unsigned(a.as_f32x4()).v128() } +} + +/// A relaxed version of `i32x4_trunc_sat_f64x2_zero(a)` converts the `f64` +/// lanes of `a` to signed 32-bit integers and the upper two lanes are zero. +/// +/// Values which don't fit in 32-bit integers or are NaN may have the same +/// result as `i32x4_trunc_sat_f32x4` or may return `i32::MIN`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_trunc_f64x2_s_zero))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_trunc_f64x2_s_zero"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i32x4_relaxed_trunc_f64x2_zero(a: v128) -> v128 { + unsafe { llvm_relaxed_trunc_signed_zero(a.as_f64x2()).v128() } +} + +/// A relaxed version of `u32x4_trunc_sat_f64x2_zero(a)` converts the `f64` +/// lanes of `a` to unsigned 32-bit integers and the upper two lanes are zero. +/// +/// Values which don't fit in 32-bit unsigned integers or are NaN may have the +/// same result as `u32x4_trunc_sat_f32x4` or may return `u32::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_trunc_f64x2_u_zero))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_trunc_f64x2_u_zero"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn u32x4_relaxed_trunc_f64x2_zero(a: v128) -> v128 { + unsafe { llvm_relaxed_trunc_unsigned_zero(a.as_f64x2()).v128() } +} + +/// Computes `a * b + c` with either one rounding or two roundings. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.relaxed_madd))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f32x4.relaxed_madd"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn f32x4_relaxed_madd(a: v128, b: v128, c: v128) -> v128 { + unsafe { llvm_f32x4_fma(a.as_f32x4(), b.as_f32x4(), c.as_f32x4()).v128() } +} + +/// Computes `-a * b + c` with either one rounding or two roundings. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.relaxed_nmadd))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f32x4.relaxed_nmadd"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn f32x4_relaxed_nmadd(a: v128, b: v128, c: v128) -> v128 { + unsafe { llvm_f32x4_fms(a.as_f32x4(), b.as_f32x4(), c.as_f32x4()).v128() } +} + +/// Computes `a * b + c` with either one rounding or two roundings. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.relaxed_madd))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f64x2.relaxed_madd"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn f64x2_relaxed_madd(a: v128, b: v128, c: v128) -> v128 { + unsafe { llvm_f64x2_fma(a.as_f64x2(), b.as_f64x2(), c.as_f64x2()).v128() } +} + +/// Computes `-a * b + c` with either one rounding or two roundings. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.relaxed_nmadd))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f64x2.relaxed_nmadd"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn f64x2_relaxed_nmadd(a: v128, b: v128, c: v128) -> v128 { + unsafe { llvm_f64x2_fms(a.as_f64x2(), b.as_f64x2(), c.as_f64x2()).v128() } +} + +/// A relaxed version of `v128_bitselect` where this either behaves the same as +/// `v128_bitselect` or the high bit of each lane `m` is inspected and the +/// corresponding lane of `a` is chosen if the bit is 1 or the lane of `b` is +/// chosen if it's zero. +/// +/// If the `m` mask's lanes are either all-one or all-zero then this instruction +/// is the same as `v128_bitselect`. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.relaxed_laneselect))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i8x16.relaxed_laneselect"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i8x16_relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 { + unsafe { llvm_i8x16_laneselect(a.as_i8x16(), b.as_i8x16(), m.as_i8x16()).v128() } +} + +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub use i8x16_relaxed_laneselect as u8x16_relaxed_laneselect; + +/// A relaxed version of `v128_bitselect` where this either behaves the same as +/// `v128_bitselect` or the high bit of each lane `m` is inspected and the +/// corresponding lane of `a` is chosen if the bit is 1 or the lane of `b` is +/// chosen if it's zero. +/// +/// If the `m` mask's lanes are either all-one or all-zero then this instruction +/// is the same as `v128_bitselect`. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.relaxed_laneselect))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i16x8.relaxed_laneselect"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i16x8_relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 { + unsafe { llvm_i16x8_laneselect(a.as_i16x8(), b.as_i16x8(), m.as_i16x8()).v128() } +} + +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub use i16x8_relaxed_laneselect as u16x8_relaxed_laneselect; + +/// A relaxed version of `v128_bitselect` where this either behaves the same as +/// `v128_bitselect` or the high bit of each lane `m` is inspected and the +/// corresponding lane of `a` is chosen if the bit is 1 or the lane of `b` is +/// chosen if it's zero. +/// +/// If the `m` mask's lanes are either all-one or all-zero then this instruction +/// is the same as `v128_bitselect`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_laneselect))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_laneselect"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i32x4_relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 { + unsafe { llvm_i32x4_laneselect(a.as_i32x4(), b.as_i32x4(), m.as_i32x4()).v128() } +} + +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub use i32x4_relaxed_laneselect as u32x4_relaxed_laneselect; + +/// A relaxed version of `v128_bitselect` where this either behaves the same as +/// `v128_bitselect` or the high bit of each lane `m` is inspected and the +/// corresponding lane of `a` is chosen if the bit is 1 or the lane of `b` is +/// chosen if it's zero. +/// +/// If the `m` mask's lanes are either all-one or all-zero then this instruction +/// is the same as `v128_bitselect`. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.relaxed_laneselect))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i64x2.relaxed_laneselect"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i64x2_relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 { + unsafe { llvm_i64x2_laneselect(a.as_i64x2(), b.as_i64x2(), m.as_i64x2()).v128() } +} + +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub use i64x2_relaxed_laneselect as u64x2_relaxed_laneselect; + +/// A relaxed version of `f32x4_min` which is either `f32x4_min` or +/// `f32x4_pmin`. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.relaxed_min))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f32x4.relaxed_min"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn f32x4_relaxed_min(a: v128, b: v128) -> v128 { + unsafe { llvm_f32x4_relaxed_min(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// A relaxed version of `f32x4_max` which is either `f32x4_max` or +/// `f32x4_pmax`. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.relaxed_max))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f32x4.relaxed_max"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn f32x4_relaxed_max(a: v128, b: v128) -> v128 { + unsafe { llvm_f32x4_relaxed_max(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// A relaxed version of `f64x2_min` which is either `f64x2_min` or +/// `f64x2_pmin`. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.relaxed_min))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f64x2.relaxed_min"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn f64x2_relaxed_min(a: v128, b: v128) -> v128 { + unsafe { llvm_f64x2_relaxed_min(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// A relaxed version of `f64x2_max` which is either `f64x2_max` or +/// `f64x2_pmax`. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.relaxed_max))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f64x2.relaxed_max"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn f64x2_relaxed_max(a: v128, b: v128) -> v128 { + unsafe { llvm_f64x2_relaxed_max(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// A relaxed version of `i16x8_relaxed_q15mulr` where if both lanes are +/// `i16::MIN` then the result is either `i16::MIN` or `i16::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.relaxed_q15mulr_s))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i16x8.relaxed_q15mulr_s"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i16x8_relaxed_q15mulr(a: v128, b: v128) -> v128 { + unsafe { llvm_relaxed_q15mulr_signed(a.as_i16x8(), b.as_i16x8()).v128() } +} + +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub use i16x8_relaxed_q15mulr as u16x8_relaxed_q15mulr; + +/// A relaxed dot-product instruction. +/// +/// This instruction will perform pairwise products of the 8-bit values in `a` +/// and `b` and then accumulate adjacent pairs into 16-bit results producing a +/// final `i16x8` vector. The bytes of `a` are always interpreted as signed and +/// the bytes in `b` may be interpreted as signed or unsigned. If the top bit in +/// `b` isn't set then the value is the same regardless of whether it's signed +/// or unsigned. +/// +/// The accumulation into 16-bit values may be saturated on some platforms, and +/// on other platforms it may wrap-around on overflow. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.relaxed_dot_i8x16_i7x16_s))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i16x8.relaxed_dot_i8x16_i7x16_s"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i16x8_relaxed_dot_i8x16_i7x16(a: v128, b: v128) -> v128 { + unsafe { llvm_i16x8_relaxed_dot_i8x16_i7x16_s(a.as_i8x16(), b.as_i8x16()).v128() } +} + +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub use i16x8_relaxed_dot_i8x16_i7x16 as u16x8_relaxed_dot_i8x16_i7x16; + +/// Similar to [`i16x8_relaxed_dot_i8x16_i7x16`] except that the intermediate +/// `i16x8` result is fed into `i32x4_extadd_pairwise_i16x8` followed by +/// `i32x4_add` to add the value `c` to the result. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_dot_i8x16_i7x16_add_s))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_dot_i8x16_i7x16_add_s"))] +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub fn i32x4_relaxed_dot_i8x16_i7x16_add(a: v128, b: v128, c: v128) -> v128 { + unsafe { + llvm_i32x4_relaxed_dot_i8x16_i7x16_add_s(a.as_i8x16(), b.as_i8x16(), c.as_i32x4()).v128() + } +} + +#[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")] +pub use i32x4_relaxed_dot_i8x16_i7x16_add as u32x4_relaxed_dot_i8x16_i7x16_add; + +#[cfg(test)] +mod tests { + use super::super::simd128::*; + use super::*; + use core::ops::{Add, Div, Mul, Neg, Sub}; + + use std::fmt::Debug; + use std::mem::transmute; + use std::num::Wrapping; + use std::prelude::v1::*; + + fn compare_bytes(a: v128, b: &[v128]) { + let a: [u8; 16] = unsafe { transmute(a) }; + if b.iter().any(|b| { + let b: [u8; 16] = unsafe { transmute(*b) }; + a == b + }) { + return; + } + eprintln!("input vector {a:?}"); + eprintln!("did not match any output:"); + for b in b { + eprintln!(" {b:?}"); + } + } + + #[test] + fn test_relaxed_swizzle() { + compare_bytes( + i8x16_relaxed_swizzle( + i8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + i8x16(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1), + ), + &[i8x16(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1)], + ); + compare_bytes( + i8x16_relaxed_swizzle( + i8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + u8x16(0x80, 0xff, 16, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + ), + &[ + i8x16(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + ], + ); + compare_bytes( + u8x16_relaxed_swizzle( + u8x16( + 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + ), + u8x16(0x80, 0xff, 16, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + ), + &[ + u8x16( + 128, 128, 128, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + ), + u8x16( + 0, 0, 0, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + ), + ], + ); + } + + #[test] + fn test_relaxed_trunc() { + compare_bytes( + i32x4_relaxed_trunc_f32x4(f32x4(1.0, 2.0, -1., -4.)), + &[i32x4(1, 2, -1, -4)], + ); + compare_bytes( + i32x4_relaxed_trunc_f32x4(f32x4(f32::NEG_INFINITY, f32::NAN, -0.0, f32::INFINITY)), + &[ + i32x4(i32::MIN, 0, 0, i32::MAX), + i32x4(i32::MIN, i32::MIN, 0, i32::MIN), + ], + ); + compare_bytes( + i32x4_relaxed_trunc_f64x2_zero(f64x2(1.0, -3.0)), + &[i32x4(1, -3, 0, 0)], + ); + compare_bytes( + i32x4_relaxed_trunc_f64x2_zero(f64x2(f64::INFINITY, f64::NAN)), + &[i32x4(i32::MAX, 0, 0, 0), i32x4(i32::MIN, i32::MIN, 0, 0)], + ); + + compare_bytes( + u32x4_relaxed_trunc_f32x4(f32x4(1.0, 2.0, 5., 100.)), + &[i32x4(1, 2, 5, 100)], + ); + compare_bytes( + u32x4_relaxed_trunc_f32x4(f32x4(f32::NEG_INFINITY, f32::NAN, -0.0, f32::INFINITY)), + &[ + u32x4(u32::MAX, 0, 0, u32::MAX), + u32x4(u32::MAX, u32::MAX, 0, u32::MAX), + ], + ); + compare_bytes( + u32x4_relaxed_trunc_f64x2_zero(f64x2(1.0, 3.0)), + &[u32x4(1, 3, 0, 0)], + ); + compare_bytes( + u32x4_relaxed_trunc_f64x2_zero(f64x2(f64::INFINITY, f64::NAN)), + &[i32x4(i32::MAX, 0, 0, 0), i32x4(i32::MIN, i32::MIN, 0, 0)], + ); + } + + #[test] + fn test_madd() { + let floats = [ + f32::NAN, + f32::NEG_INFINITY, + f32::INFINITY, + 1.0, + 2.0, + -1.0, + 0.0, + 100.3, + 7.8, + 9.4, + ]; + for &a in floats.iter() { + for &b in floats.iter() { + for &c in floats.iter() { + let f1 = a * b + c; + let f2 = a.mul_add(b, c); + compare_bytes( + f32x4_relaxed_madd(f32x4(a, a, a, a), f32x4(b, b, b, b), f32x4(c, c, c, c)), + &[f32x4(f1, f1, f1, f1), f32x4(f2, f2, f2, f2)], + ); + + let f1 = -a * b + c; + let f2 = (-a).mul_add(b, c); + compare_bytes( + f32x4_relaxed_nmadd( + f32x4(a, a, a, a), + f32x4(b, b, b, b), + f32x4(c, c, c, c), + ), + &[f32x4(f1, f1, f1, f1), f32x4(f2, f2, f2, f2)], + ); + + let a = f64::from(a); + let b = f64::from(b); + let c = f64::from(c); + let f1 = a * b + c; + let f2 = a.mul_add(b, c); + compare_bytes( + f64x2_relaxed_madd(f64x2(a, a), f64x2(b, b), f64x2(c, c)), + &[f64x2(f1, f1), f64x2(f2, f2)], + ); + let f1 = -a * b + c; + let f2 = (-a).mul_add(b, c); + compare_bytes( + f64x2_relaxed_nmadd(f64x2(a, a), f64x2(b, b), f64x2(c, c)), + &[f64x2(f1, f1), f64x2(f2, f2)], + ); + } + } + } + } +} diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs new file mode 100644 index 000000000000..fc0d7723fa01 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs @@ -0,0 +1,6100 @@ +//! This module implements the [WebAssembly `SIMD128` ISA]. +//! +//! [WebAssembly `SIMD128` ISA]: +//! https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md + +#![allow(non_camel_case_types)] +#![allow(unused_imports)] + +use crate::{core_arch::simd, intrinsics::simd::*, marker::Sized, mem, ptr}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +types! { + #![stable(feature = "wasm_simd", since = "1.54.0")] + + /// WASM-specific 128-bit wide SIMD vector type. + /// + /// This type corresponds to the `v128` type in the [WebAssembly SIMD + /// proposal](https://github.com/webassembly/simd). This type is 128-bits + /// large and the meaning of all the bits is defined within the context of + /// how this value is used. + /// + /// This same type is used simultaneously for all 128-bit-wide SIMD types, + /// for example: + /// + /// * sixteen 8-bit integers (both `i8` and `u8`) + /// * eight 16-bit integers (both `i16` and `u16`) + /// * four 32-bit integers (both `i32` and `u32`) + /// * two 64-bit integers (both `i64` and `u64`) + /// * four 32-bit floats (`f32`) + /// * two 64-bit floats (`f64`) + /// + /// The `v128` type in Rust is intended to be quite analogous to the `v128` + /// type in WebAssembly. Operations on `v128` can only be performed with the + /// functions in this module. + // N.B., internals here are arbitrary. + pub struct v128(4 x i32); +} + +macro_rules! conversions { + ($(($name:ident = $ty:ty))*) => { + impl v128 { + $( + #[inline(always)] + pub(crate) fn $name(self) -> $ty { + unsafe { mem::transmute(self) } + } + )* + } + $( + impl $ty { + #[inline(always)] + pub(crate) const fn v128(self) -> v128 { + unsafe { mem::transmute(self) } + } + } + )* + } +} + +conversions! { + (as_u8x16 = simd::u8x16) + (as_u16x8 = simd::u16x8) + (as_u32x4 = simd::u32x4) + (as_u64x2 = simd::u64x2) + (as_i8x16 = simd::i8x16) + (as_i16x8 = simd::i16x8) + (as_i32x4 = simd::i32x4) + (as_i64x2 = simd::i64x2) + (as_f32x4 = simd::f32x4) + (as_f64x2 = simd::f64x2) +} + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.wasm.swizzle"] + fn llvm_swizzle(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16; + + #[link_name = "llvm.wasm.bitselect.v16i8"] + fn llvm_bitselect(a: simd::i8x16, b: simd::i8x16, c: simd::i8x16) -> simd::i8x16; + #[link_name = "llvm.wasm.anytrue.v16i8"] + fn llvm_any_true_i8x16(x: simd::i8x16) -> i32; + + #[link_name = "llvm.wasm.alltrue.v16i8"] + fn llvm_i8x16_all_true(x: simd::i8x16) -> i32; + #[link_name = "llvm.wasm.bitmask.v16i8"] + fn llvm_bitmask_i8x16(a: simd::i8x16) -> i32; + #[link_name = "llvm.wasm.narrow.signed.v16i8.v8i16"] + fn llvm_narrow_i8x16_s(a: simd::i16x8, b: simd::i16x8) -> simd::i8x16; + #[link_name = "llvm.wasm.narrow.unsigned.v16i8.v8i16"] + fn llvm_narrow_i8x16_u(a: simd::i16x8, b: simd::i16x8) -> simd::i8x16; + #[link_name = "llvm.wasm.avgr.unsigned.v16i8"] + fn llvm_avgr_u_i8x16(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16; + + #[link_name = "llvm.wasm.extadd.pairwise.signed.v8i16"] + fn llvm_i16x8_extadd_pairwise_i8x16_s(x: simd::i8x16) -> simd::i16x8; + #[link_name = "llvm.wasm.extadd.pairwise.unsigned.v8i16"] + fn llvm_i16x8_extadd_pairwise_i8x16_u(x: simd::i8x16) -> simd::i16x8; + #[link_name = "llvm.wasm.q15mulr.sat.signed"] + fn llvm_q15mulr(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8; + #[link_name = "llvm.wasm.alltrue.v8i16"] + fn llvm_i16x8_all_true(x: simd::i16x8) -> i32; + #[link_name = "llvm.wasm.bitmask.v8i16"] + fn llvm_bitmask_i16x8(a: simd::i16x8) -> i32; + #[link_name = "llvm.wasm.narrow.signed.v8i16.v4i32"] + fn llvm_narrow_i16x8_s(a: simd::i32x4, b: simd::i32x4) -> simd::i16x8; + #[link_name = "llvm.wasm.narrow.unsigned.v8i16.v4i32"] + fn llvm_narrow_i16x8_u(a: simd::i32x4, b: simd::i32x4) -> simd::i16x8; + #[link_name = "llvm.wasm.avgr.unsigned.v8i16"] + fn llvm_avgr_u_i16x8(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8; + + #[link_name = "llvm.wasm.extadd.pairwise.signed.v4i32"] + fn llvm_i32x4_extadd_pairwise_i16x8_s(x: simd::i16x8) -> simd::i32x4; + #[link_name = "llvm.wasm.extadd.pairwise.unsigned.v4i32"] + fn llvm_i32x4_extadd_pairwise_i16x8_u(x: simd::i16x8) -> simd::i32x4; + #[link_name = "llvm.wasm.alltrue.v4i32"] + fn llvm_i32x4_all_true(x: simd::i32x4) -> i32; + #[link_name = "llvm.wasm.bitmask.v4i32"] + fn llvm_bitmask_i32x4(a: simd::i32x4) -> i32; + #[link_name = "llvm.wasm.dot"] + fn llvm_i32x4_dot_i16x8_s(a: simd::i16x8, b: simd::i16x8) -> simd::i32x4; + + #[link_name = "llvm.wasm.alltrue.v2i64"] + fn llvm_i64x2_all_true(x: simd::i64x2) -> i32; + #[link_name = "llvm.wasm.bitmask.v2i64"] + fn llvm_bitmask_i64x2(a: simd::i64x2) -> i32; + + #[link_name = "llvm.nearbyint.v4f32"] + fn llvm_f32x4_nearest(x: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.minimum.v4f32"] + fn llvm_f32x4_min(x: simd::f32x4, y: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.maximum.v4f32"] + fn llvm_f32x4_max(x: simd::f32x4, y: simd::f32x4) -> simd::f32x4; + + #[link_name = "llvm.nearbyint.v2f64"] + fn llvm_f64x2_nearest(x: simd::f64x2) -> simd::f64x2; + #[link_name = "llvm.minimum.v2f64"] + fn llvm_f64x2_min(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2; + #[link_name = "llvm.maximum.v2f64"] + fn llvm_f64x2_max(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2; +} + +#[repr(packed)] +#[derive(Copy)] +struct Unaligned(T); + +impl Clone for Unaligned { + fn clone(&self) -> Unaligned { + *self + } +} + +/// Loads a `v128` vector from the given heap address. +/// +/// This intrinsic will emit a load with an alignment of 1. While this is +/// provided for completeness it is not strictly necessary, you can also load +/// the pointer directly: +/// +/// ```rust,ignore +/// let a: &v128 = ...; +/// let value = unsafe { v128_load(a) }; +/// // .. is the same as .. +/// let value = *a; +/// ``` +/// +/// The alignment of the load can be configured by doing a manual load without +/// this intrinsic. +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 16 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load(m: *const v128) -> v128 { + (*(m as *const Unaligned)).0 +} + +/// Load eight 8-bit integers and sign extend each one to a 16-bit lane +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 8 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load8x8_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load8x8_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn i16x8_load_extend_i8x8(m: *const i8) -> v128 { + let m = *(m as *const Unaligned); + simd_cast::<_, simd::i16x8>(m.0).v128() +} + +/// Load eight 8-bit integers and zero extend each one to a 16-bit lane +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 8 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load8x8_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load8x8_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn i16x8_load_extend_u8x8(m: *const u8) -> v128 { + let m = *(m as *const Unaligned); + simd_cast::<_, simd::u16x8>(m.0).v128() +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_load_extend_u8x8 as u16x8_load_extend_u8x8; + +/// Load four 16-bit integers and sign extend each one to a 32-bit lane +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 8 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load16x4_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load16x4_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn i32x4_load_extend_i16x4(m: *const i16) -> v128 { + let m = *(m as *const Unaligned); + simd_cast::<_, simd::i32x4>(m.0).v128() +} + +/// Load four 16-bit integers and zero extend each one to a 32-bit lane +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 8 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load16x4_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load16x4_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn i32x4_load_extend_u16x4(m: *const u16) -> v128 { + let m = *(m as *const Unaligned); + simd_cast::<_, simd::u32x4>(m.0).v128() +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_load_extend_u16x4 as u32x4_load_extend_u16x4; + +/// Load two 32-bit integers and sign extend each one to a 64-bit lane +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 8 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load32x2_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load32x2_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn i64x2_load_extend_i32x2(m: *const i32) -> v128 { + let m = *(m as *const Unaligned); + simd_cast::<_, simd::i64x2>(m.0).v128() +} + +/// Load two 32-bit integers and zero extend each one to a 64-bit lane +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 8 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load32x2_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load32x2_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn i64x2_load_extend_u32x2(m: *const u32) -> v128 { + let m = *(m as *const Unaligned); + simd_cast::<_, simd::u64x2>(m.0).v128() +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_load_extend_u32x2 as u64x2_load_extend_u32x2; + +/// Load a single element and splat to all lanes of a v128 vector. +/// +/// While this intrinsic is provided for completeness it can also be replaced +/// with `u8x16_splat(*m)` and it should generate equivalent code (and also not +/// require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 1 byte from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load8_splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load8_splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load8_splat(m: *const u8) -> v128 { + u8x16_splat(*m) +} + +/// Load a single element and splat to all lanes of a v128 vector. +/// +/// While this intrinsic is provided for completeness it can also be replaced +/// with `u16x8_splat(*m)` and it should generate equivalent code (and also not +/// require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 2 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load16_splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load16_splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load16_splat(m: *const u16) -> v128 { + u16x8_splat(ptr::read_unaligned(m)) +} + +/// Load a single element and splat to all lanes of a v128 vector. +/// +/// While this intrinsic is provided for completeness it can also be replaced +/// with `u32x4_splat(*m)` and it should generate equivalent code (and also not +/// require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 4 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load32_splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load32_splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load32_splat(m: *const u32) -> v128 { + u32x4_splat(ptr::read_unaligned(m)) +} + +/// Load a single element and splat to all lanes of a v128 vector. +/// +/// While this intrinsic is provided for completeness it can also be replaced +/// with `u64x2_splat(*m)` and it should generate equivalent code (and also not +/// require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 8 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load64_splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load64_splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load64_splat(m: *const u64) -> v128 { + u64x2_splat(ptr::read_unaligned(m)) +} + +/// Load a 32-bit element into the low bits of the vector and sets all other +/// bits to zero. +/// +/// This intrinsic is provided for completeness and is equivalent to `u32x4(*m, +/// 0, 0, 0)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 4 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load32_zero))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load32_zero"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load32_zero(m: *const u32) -> v128 { + u32x4(ptr::read_unaligned(m), 0, 0, 0) +} + +/// Load a 64-bit element into the low bits of the vector and sets all other +/// bits to zero. +/// +/// This intrinsic is provided for completeness and is equivalent to +/// `u64x2_replace_lane::<0>(u64x2(0, 0), *m)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 8 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load64_zero))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load64_zero"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load64_zero(m: *const u64) -> v128 { + u64x2_replace_lane::<0>(u64x2(0, 0), ptr::read_unaligned(m)) +} + +/// Stores a `v128` vector to the given heap address. +/// +/// This intrinsic will emit a store with an alignment of 1. While this is +/// provided for completeness it is not strictly necessary, you can also store +/// the pointer directly: +/// +/// ```rust,ignore +/// let a: &mut v128 = ...; +/// unsafe { v128_store(a, value) }; +/// // .. is the same as .. +/// *a = value; +/// ``` +/// +/// The alignment of the store can be configured by doing a manual store without +/// this intrinsic. +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to store 16 bytes to. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned store. +#[inline] +#[cfg_attr(test, assert_instr(v128.store))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.store"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_store(m: *mut v128, a: v128) { + *(m as *mut Unaligned) = Unaligned(a); +} + +/// Loads an 8-bit value from `m` and sets lane `L` of `v` to that value. +/// +/// This intrinsic is provided for completeness and is equivalent to +/// `u8x16_replace_lane::(v, *m)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 1 byte from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load8_lane, L = 0))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load8_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load8_lane(v: v128, m: *const u8) -> v128 { + u8x16_replace_lane::(v, *m) +} + +/// Loads a 16-bit value from `m` and sets lane `L` of `v` to that value. +/// +/// This intrinsic is provided for completeness and is equivalent to +/// `u16x8_replace_lane::(v, *m)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 2 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load16_lane, L = 0))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load16_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load16_lane(v: v128, m: *const u16) -> v128 { + u16x8_replace_lane::(v, ptr::read_unaligned(m)) +} + +/// Loads a 32-bit value from `m` and sets lane `L` of `v` to that value. +/// +/// This intrinsic is provided for completeness and is equivalent to +/// `u32x4_replace_lane::(v, *m)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 4 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load32_lane, L = 0))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load32_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load32_lane(v: v128, m: *const u32) -> v128 { + u32x4_replace_lane::(v, ptr::read_unaligned(m)) +} + +/// Loads a 64-bit value from `m` and sets lane `L` of `v` to that value. +/// +/// This intrinsic is provided for completeness and is equivalent to +/// `u64x2_replace_lane::(v, *m)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to load 8 bytes from. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned load. +#[inline] +#[cfg_attr(test, assert_instr(v128.load64_lane, L = 0))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.load64_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_load64_lane(v: v128, m: *const u64) -> v128 { + u64x2_replace_lane::(v, ptr::read_unaligned(m)) +} + +/// Stores the 8-bit value from lane `L` of `v` into `m` +/// +/// This intrinsic is provided for completeness and is equivalent to +/// `*m = u8x16_extract_lane::(v)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to store 1 byte to. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned store. +#[inline] +#[cfg_attr(test, assert_instr(v128.store8_lane, L = 0))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.store8_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_store8_lane(v: v128, m: *mut u8) { + *m = u8x16_extract_lane::(v); +} + +/// Stores the 16-bit value from lane `L` of `v` into `m` +/// +/// This intrinsic is provided for completeness and is equivalent to +/// `*m = u16x8_extract_lane::(v)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to store 2 bytes to. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned store. +#[inline] +#[cfg_attr(test, assert_instr(v128.store16_lane, L = 0))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.store16_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_store16_lane(v: v128, m: *mut u16) { + ptr::write_unaligned(m, u16x8_extract_lane::(v)) +} + +/// Stores the 32-bit value from lane `L` of `v` into `m` +/// +/// This intrinsic is provided for completeness and is equivalent to +/// `*m = u32x4_extract_lane::(v)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to store 4 bytes to. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned store. +#[inline] +#[cfg_attr(test, assert_instr(v128.store32_lane, L = 0))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.store32_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_store32_lane(v: v128, m: *mut u32) { + ptr::write_unaligned(m, u32x4_extract_lane::(v)) +} + +/// Stores the 64-bit value from lane `L` of `v` into `m` +/// +/// This intrinsic is provided for completeness and is equivalent to +/// `*m = u64x2_extract_lane::(v)` (which doesn't require `unsafe`). +/// +/// # Unsafety +/// +/// This intrinsic is unsafe because it takes a raw pointer as an argument, and +/// the pointer must be valid to store 8 bytes to. Note that there is no +/// alignment requirement on this pointer since this intrinsic performs a +/// 1-aligned store. +#[inline] +#[cfg_attr(test, assert_instr(v128.store64_lane, L = 0))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.store64_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub unsafe fn v128_store64_lane(v: v128, m: *mut u64) { + ptr::write_unaligned(m, u64x2_extract_lane::(v)) +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[cfg_attr( + test, + assert_instr( + v128.const, + a0 = 0, + a1 = 1, + a2 = 2, + a3 = 3, + a4 = 4, + a5 = 5, + a6 = 6, + a7 = 7, + a8 = 8, + a9 = 9, + a10 = 10, + a11 = 11, + a12 = 12, + a13 = 13, + a14 = 14, + a15 = 15, + ) +)] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")] +#[target_feature(enable = "simd128")] +pub const fn i8x16( + a0: i8, + a1: i8, + a2: i8, + a3: i8, + a4: i8, + a5: i8, + a6: i8, + a7: i8, + a8: i8, + a9: i8, + a10: i8, + a11: i8, + a12: i8, + a13: i8, + a14: i8, + a15: i8, +) -> v128 { + simd::i8x16::new( + a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, + ) + .v128() +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")] +#[target_feature(enable = "simd128")] +pub const fn u8x16( + a0: u8, + a1: u8, + a2: u8, + a3: u8, + a4: u8, + a5: u8, + a6: u8, + a7: u8, + a8: u8, + a9: u8, + a10: u8, + a11: u8, + a12: u8, + a13: u8, + a14: u8, + a15: u8, +) -> v128 { + simd::u8x16::new( + a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, + ) + .v128() +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[cfg_attr( + test, + assert_instr( + v128.const, + a0 = 0, + a1 = 1, + a2 = 2, + a3 = 3, + a4 = 4, + a5 = 5, + a6 = 6, + a7 = 7, + ) +)] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")] +#[target_feature(enable = "simd128")] +pub const fn i16x8(a0: i16, a1: i16, a2: i16, a3: i16, a4: i16, a5: i16, a6: i16, a7: i16) -> v128 { + simd::i16x8::new(a0, a1, a2, a3, a4, a5, a6, a7).v128() +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")] +#[target_feature(enable = "simd128")] +pub const fn u16x8(a0: u16, a1: u16, a2: u16, a3: u16, a4: u16, a5: u16, a6: u16, a7: u16) -> v128 { + simd::u16x8::new(a0, a1, a2, a3, a4, a5, a6, a7).v128() +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[cfg_attr(test, assert_instr(v128.const, a0 = 0, a1 = 1, a2 = 2, a3 = 3))] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")] +#[target_feature(enable = "simd128")] +pub const fn i32x4(a0: i32, a1: i32, a2: i32, a3: i32) -> v128 { + simd::i32x4::new(a0, a1, a2, a3).v128() +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")] +#[target_feature(enable = "simd128")] +pub const fn u32x4(a0: u32, a1: u32, a2: u32, a3: u32) -> v128 { + simd::u32x4::new(a0, a1, a2, a3).v128() +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[cfg_attr(test, assert_instr(v128.const, a0 = 1, a1 = 2))] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")] +#[target_feature(enable = "simd128")] +pub const fn i64x2(a0: i64, a1: i64) -> v128 { + simd::i64x2::new(a0, a1).v128() +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")] +#[target_feature(enable = "simd128")] +pub const fn u64x2(a0: u64, a1: u64) -> v128 { + simd::u64x2::new(a0, a1).v128() +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[cfg_attr(test, assert_instr(v128.const, a0 = 0.0, a1 = 1.0, a2 = 2.0, a3 = 3.0))] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd_const", since = "1.56.0")] +#[target_feature(enable = "simd128")] +pub const fn f32x4(a0: f32, a1: f32, a2: f32, a3: f32) -> v128 { + simd::f32x4::new(a0, a1, a2, a3).v128() +} + +/// Materializes a SIMD value from the provided operands. +/// +/// If possible this will generate a `v128.const` instruction, otherwise it may +/// be lowered to a sequence of instructions to materialize the vector value. +#[inline] +#[cfg_attr(test, assert_instr(v128.const, a0 = 0.0, a1 = 1.0))] +#[doc(alias("v128.const"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +#[rustc_const_stable(feature = "wasm_simd_const", since = "1.56.0")] +#[target_feature(enable = "simd128")] +pub const fn f64x2(a0: f64, a1: f64) -> v128 { + simd::f64x2::new(a0, a1).v128() +} + +/// Returns a new vector with lanes selected from the lanes of the two input +/// vectors `$a` and `$b` specified in the 16 immediate operands. +/// +/// The `$a` and `$b` expressions must have type `v128`, and this function +/// generates a wasm instruction that is encoded with 16 bytes providing the +/// indices of the elements to return. The indices `i` in range [0, 15] select +/// the `i`-th element of `a`. The indices in range [16, 31] select the `i - +/// 16`-th element of `b`. +/// +/// Note that this is a macro due to the codegen requirements of all of the +/// index expressions `$i*` must be constant. A compiler error will be +/// generated if any of the expressions are not constant. +/// +/// All indexes `$i*` must have the type `u32`. +#[inline] +#[cfg_attr(test, + assert_instr( + i8x16.shuffle, + I0 = 0, + I1 = 2, + I2 = 4, + I3 = 6, + I4 = 8, + I5 = 10, + I6 = 12, + I7 = 14, + I8 = 16, + I9 = 18, + I10 = 20, + I11 = 22, + I12 = 24, + I13 = 26, + I14 = 28, + I15 = 30, + ) +)] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.shuffle"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_shuffle< + const I0: usize, + const I1: usize, + const I2: usize, + const I3: usize, + const I4: usize, + const I5: usize, + const I6: usize, + const I7: usize, + const I8: usize, + const I9: usize, + const I10: usize, + const I11: usize, + const I12: usize, + const I13: usize, + const I14: usize, + const I15: usize, +>( + a: v128, + b: v128, +) -> v128 { + static_assert!(I0 < 32); + static_assert!(I1 < 32); + static_assert!(I2 < 32); + static_assert!(I3 < 32); + static_assert!(I4 < 32); + static_assert!(I5 < 32); + static_assert!(I6 < 32); + static_assert!(I7 < 32); + static_assert!(I8 < 32); + static_assert!(I9 < 32); + static_assert!(I10 < 32); + static_assert!(I11 < 32); + static_assert!(I12 < 32); + static_assert!(I13 < 32); + static_assert!(I14 < 32); + static_assert!(I15 < 32); + let shuf: simd::u8x16 = unsafe { + simd_shuffle!( + a.as_u8x16(), + b.as_u8x16(), + [ + I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, + I7 as u32, I8 as u32, I9 as u32, I10 as u32, I11 as u32, I12 as u32, I13 as u32, + I14 as u32, I15 as u32, + ], + ) + }; + shuf.v128() +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_shuffle as u8x16_shuffle; + +/// Same as [`i8x16_shuffle`], except operates as if the inputs were eight +/// 16-bit integers, only taking 8 indices to shuffle. +/// +/// Indices in the range [0, 7] select from `a` while [8, 15] select from `b`. +/// Note that this will generate the `i8x16.shuffle` instruction, since there +/// is no native `i16x8.shuffle` instruction (there is no need for one since +/// `i8x16.shuffle` suffices). +#[inline] +#[cfg_attr(test, + assert_instr( + i8x16.shuffle, + I0 = 0, + I1 = 2, + I2 = 4, + I3 = 6, + I4 = 8, + I5 = 10, + I6 = 12, + I7 = 14, + ) +)] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.shuffle"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_shuffle< + const I0: usize, + const I1: usize, + const I2: usize, + const I3: usize, + const I4: usize, + const I5: usize, + const I6: usize, + const I7: usize, +>( + a: v128, + b: v128, +) -> v128 { + static_assert!(I0 < 16); + static_assert!(I1 < 16); + static_assert!(I2 < 16); + static_assert!(I3 < 16); + static_assert!(I4 < 16); + static_assert!(I5 < 16); + static_assert!(I6 < 16); + static_assert!(I7 < 16); + let shuf: simd::u16x8 = unsafe { + simd_shuffle!( + a.as_u16x8(), + b.as_u16x8(), + [ + I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, + I7 as u32, + ], + ) + }; + shuf.v128() +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_shuffle as u16x8_shuffle; + +/// Same as [`i8x16_shuffle`], except operates as if the inputs were four +/// 32-bit integers, only taking 4 indices to shuffle. +/// +/// Indices in the range [0, 3] select from `a` while [4, 7] select from `b`. +/// Note that this will generate the `i8x16.shuffle` instruction, since there +/// is no native `i32x4.shuffle` instruction (there is no need for one since +/// `i8x16.shuffle` suffices). +#[inline] +#[cfg_attr(test, assert_instr(i8x16.shuffle, I0 = 0, I1 = 2, I2 = 4, I3 = 6))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.shuffle"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_shuffle( + a: v128, + b: v128, +) -> v128 { + static_assert!(I0 < 8); + static_assert!(I1 < 8); + static_assert!(I2 < 8); + static_assert!(I3 < 8); + let shuf: simd::u32x4 = unsafe { + simd_shuffle!( + a.as_u32x4(), + b.as_u32x4(), + [I0 as u32, I1 as u32, I2 as u32, I3 as u32], + ) + }; + shuf.v128() +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_shuffle as u32x4_shuffle; + +/// Same as [`i8x16_shuffle`], except operates as if the inputs were two +/// 64-bit integers, only taking 2 indices to shuffle. +/// +/// Indices in the range [0, 1] select from `a` while [2, 3] select from `b`. +/// Note that this will generate the `v8x16.shuffle` instruction, since there +/// is no native `i64x2.shuffle` instruction (there is no need for one since +/// `i8x16.shuffle` suffices). +#[inline] +#[cfg_attr(test, assert_instr(i8x16.shuffle, I0 = 0, I1 = 2))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.shuffle"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_shuffle(a: v128, b: v128) -> v128 { + static_assert!(I0 < 4); + static_assert!(I1 < 4); + let shuf: simd::u64x2 = + unsafe { simd_shuffle!(a.as_u64x2(), b.as_u64x2(), [I0 as u32, I1 as u32]) }; + shuf.v128() +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_shuffle as u64x2_shuffle; + +/// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers. +/// +/// Extracts the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.extract_lane_s, N = 3))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.extract_lane_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_extract_lane(a: v128) -> i8 { + static_assert!(N < 16); + unsafe { simd_extract!(a.as_i8x16(), N as u32) } +} + +/// Extracts a lane from a 128-bit vector interpreted as 16 packed u8 numbers. +/// +/// Extracts the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.extract_lane_u, N = 3))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.extract_lane_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_extract_lane(a: v128) -> u8 { + static_assert!(N < 16); + unsafe { simd_extract!(a.as_u8x16(), N as u32) } +} + +/// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers. +/// +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.replace_lane, N = 2))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_replace_lane(a: v128, val: i8) -> v128 { + static_assert!(N < 16); + unsafe { simd_insert!(a.as_i8x16(), N as u32, val).v128() } +} + +/// Replaces a lane from a 128-bit vector interpreted as 16 packed u8 numbers. +/// +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.replace_lane, N = 2))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_replace_lane(a: v128, val: u8) -> v128 { + static_assert!(N < 16); + unsafe { simd_insert!(a.as_u8x16(), N as u32, val).v128() } +} + +/// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers. +/// +/// Extracts a the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extract_lane_s, N = 2))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extract_lane_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extract_lane(a: v128) -> i16 { + static_assert!(N < 8); + unsafe { simd_extract!(a.as_i16x8(), N as u32) } +} + +/// Extracts a lane from a 128-bit vector interpreted as 8 packed u16 numbers. +/// +/// Extracts a the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extract_lane_u, N = 2))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extract_lane_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_extract_lane(a: v128) -> u16 { + static_assert!(N < 8); + unsafe { simd_extract!(a.as_u16x8(), N as u32) } +} + +/// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers. +/// +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.replace_lane, N = 2))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_replace_lane(a: v128, val: i16) -> v128 { + static_assert!(N < 8); + unsafe { simd_insert!(a.as_i16x8(), N as u32, val).v128() } +} + +/// Replaces a lane from a 128-bit vector interpreted as 8 packed u16 numbers. +/// +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.replace_lane, N = 2))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_replace_lane(a: v128, val: u16) -> v128 { + static_assert!(N < 8); + unsafe { simd_insert!(a.as_u16x8(), N as u32, val).v128() } +} + +/// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers. +/// +/// Extracts the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extract_lane, N = 2))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extract_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extract_lane(a: v128) -> i32 { + static_assert!(N < 4); + unsafe { simd_extract!(a.as_i32x4(), N as u32) } +} + +/// Extracts a lane from a 128-bit vector interpreted as 4 packed u32 numbers. +/// +/// Extracts the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extract_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_extract_lane(a: v128) -> u32 { + i32x4_extract_lane::(a) as u32 +} + +/// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers. +/// +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.replace_lane, N = 2))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_replace_lane(a: v128, val: i32) -> v128 { + static_assert!(N < 4); + unsafe { simd_insert!(a.as_i32x4(), N as u32, val).v128() } +} + +/// Replaces a lane from a 128-bit vector interpreted as 4 packed u32 numbers. +/// +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_replace_lane(a: v128, val: u32) -> v128 { + i32x4_replace_lane::(a, val as i32) +} + +/// Extracts a lane from a 128-bit vector interpreted as 2 packed i64 numbers. +/// +/// Extracts the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.extract_lane, N = 1))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extract_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_extract_lane(a: v128) -> i64 { + static_assert!(N < 2); + unsafe { simd_extract!(a.as_i64x2(), N as u32) } +} + +/// Extracts a lane from a 128-bit vector interpreted as 2 packed u64 numbers. +/// +/// Extracts the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extract_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u64x2_extract_lane(a: v128) -> u64 { + i64x2_extract_lane::(a) as u64 +} + +/// Replaces a lane from a 128-bit vector interpreted as 2 packed i64 numbers. +/// +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.replace_lane, N = 0))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_replace_lane(a: v128, val: i64) -> v128 { + static_assert!(N < 2); + unsafe { simd_insert!(a.as_i64x2(), N as u32, val).v128() } +} + +/// Replaces a lane from a 128-bit vector interpreted as 2 packed u64 numbers. +/// +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u64x2_replace_lane(a: v128, val: u64) -> v128 { + i64x2_replace_lane::(a, val as i64) +} + +/// Extracts a lane from a 128-bit vector interpreted as 4 packed f32 numbers. +/// +/// Extracts the scalar value of lane specified fn the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.extract_lane, N = 1))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.extract_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_extract_lane(a: v128) -> f32 { + static_assert!(N < 4); + unsafe { simd_extract!(a.as_f32x4(), N as u32) } +} + +/// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers. +/// +/// Replaces the scalar value of lane specified fn the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.replace_lane, N = 1))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_replace_lane(a: v128, val: f32) -> v128 { + static_assert!(N < 4); + unsafe { simd_insert!(a.as_f32x4(), N as u32, val).v128() } +} + +/// Extracts a lane from a 128-bit vector interpreted as 2 packed f64 numbers. +/// +/// Extracts the scalar value of lane specified fn the immediate mode operand +/// `N` from `a`. If `N` fs out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.extract_lane, N = 1))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.extract_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_extract_lane(a: v128) -> f64 { + static_assert!(N < 2); + unsafe { simd_extract!(a.as_f64x2(), N as u32) } +} + +/// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers. +/// +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.replace_lane, N = 1))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.replace_lane"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_replace_lane(a: v128, val: f64) -> v128 { + static_assert!(N < 2); + unsafe { simd_insert!(a.as_f64x2(), N as u32, val).v128() } +} + +/// Returns a new vector with lanes selected from the lanes of the first input +/// vector `a` specified in the second input vector `s`. +/// +/// The indices `i` in range [0, 15] select the `i`-th element of `a`. For +/// indices outside of the range the resulting lane is 0. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.swizzle))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.swizzle"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_swizzle(a: v128, s: v128) -> v128 { + unsafe { llvm_swizzle(a.as_i8x16(), s.as_i8x16()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_swizzle as u8x16_swizzle; + +/// Creates a vector with identical lanes. +/// +/// Constructs a vector with `x` replicated to all 16 lanes. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_splat(a: i8) -> v128 { + simd::i8x16::splat(a).v128() +} + +/// Creates a vector with identical lanes. +/// +/// Constructs a vector with `x` replicated to all 16 lanes. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_splat(a: u8) -> v128 { + simd::u8x16::splat(a).v128() +} + +/// Creates a vector with identical lanes. +/// +/// Construct a vector with `x` replicated to all 8 lanes. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_splat(a: i16) -> v128 { + simd::i16x8::splat(a).v128() +} + +/// Creates a vector with identical lanes. +/// +/// Construct a vector with `x` replicated to all 8 lanes. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_splat(a: u16) -> v128 { + simd::u16x8::splat(a).v128() +} + +/// Creates a vector with identical lanes. +/// +/// Constructs a vector with `x` replicated to all 4 lanes. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_splat(a: i32) -> v128 { + simd::i32x4::splat(a).v128() +} + +/// Creates a vector with identical lanes. +/// +/// Constructs a vector with `x` replicated to all 4 lanes. +#[inline] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_splat(a: u32) -> v128 { + i32x4_splat(a as i32) +} + +/// Creates a vector with identical lanes. +/// +/// Construct a vector with `x` replicated to all 2 lanes. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_splat(a: i64) -> v128 { + simd::i64x2::splat(a).v128() +} + +/// Creates a vector with identical lanes. +/// +/// Construct a vector with `x` replicated to all 2 lanes. +#[inline] +#[target_feature(enable = "simd128")] +#[doc(alias("u64x2.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u64x2_splat(a: u64) -> v128 { + i64x2_splat(a as i64) +} + +/// Creates a vector with identical lanes. +/// +/// Constructs a vector with `x` replicated to all 4 lanes. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_splat(a: f32) -> v128 { + simd::f32x4::splat(a).v128() +} + +/// Creates a vector with identical lanes. +/// +/// Constructs a vector with `x` replicated to all 2 lanes. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.splat))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.splat"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_splat(a: f64) -> v128 { + simd::f64x2::splat(a).v128() +} + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// integers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.eq))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.eq"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_eq(a: v128, b: v128) -> v128 { + unsafe { simd_eq::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// integers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were not equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.ne))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.ne"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_ne(a: v128, b: v128) -> v128 { + unsafe { simd_ne::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_eq as u8x16_eq; +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_ne as u8x16_ne; + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.lt_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.lt_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_lt(a: v128, b: v128) -> v128 { + unsafe { simd_lt::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.lt_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.lt_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_lt(a: v128, b: v128) -> v128 { + unsafe { simd_lt::<_, simd::i8x16>(a.as_u8x16(), b.as_u8x16()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.gt_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.gt_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_gt(a: v128, b: v128) -> v128 { + unsafe { simd_gt::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.gt_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.gt_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_gt(a: v128, b: v128) -> v128 { + unsafe { simd_gt::<_, simd::i8x16>(a.as_u8x16(), b.as_u8x16()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.le_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.le_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_le(a: v128, b: v128) -> v128 { + unsafe { simd_le::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.le_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.le_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_le(a: v128, b: v128) -> v128 { + unsafe { simd_le::<_, simd::i8x16>(a.as_u8x16(), b.as_u8x16()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.ge_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.ge_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_ge(a: v128, b: v128) -> v128 { + unsafe { simd_ge::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.ge_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.ge_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_ge(a: v128, b: v128) -> v128 { + unsafe { simd_ge::<_, simd::i8x16>(a.as_u8x16(), b.as_u8x16()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// integers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.eq))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.eq"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_eq(a: v128, b: v128) -> v128 { + unsafe { simd_eq::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// integers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were not equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.ne))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.ne"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_ne(a: v128, b: v128) -> v128 { + unsafe { simd_ne::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_eq as u16x8_eq; +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_ne as u16x8_ne; + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.lt_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.lt_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_lt(a: v128, b: v128) -> v128 { + unsafe { simd_lt::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.lt_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.lt_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_lt(a: v128, b: v128) -> v128 { + unsafe { simd_lt::<_, simd::i16x8>(a.as_u16x8(), b.as_u16x8()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.gt_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.gt_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_gt(a: v128, b: v128) -> v128 { + unsafe { simd_gt::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.gt_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.gt_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_gt(a: v128, b: v128) -> v128 { + unsafe { simd_gt::<_, simd::i16x8>(a.as_u16x8(), b.as_u16x8()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.le_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.le_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_le(a: v128, b: v128) -> v128 { + unsafe { simd_le::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.le_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.le_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_le(a: v128, b: v128) -> v128 { + unsafe { simd_le::<_, simd::i16x8>(a.as_u16x8(), b.as_u16x8()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.ge_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.ge_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_ge(a: v128, b: v128) -> v128 { + unsafe { simd_ge::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.ge_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.ge_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_ge(a: v128, b: v128) -> v128 { + unsafe { simd_ge::<_, simd::i16x8>(a.as_u16x8(), b.as_u16x8()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// integers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.eq))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.eq"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_eq(a: v128, b: v128) -> v128 { + unsafe { simd_eq::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// integers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were not equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.ne))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.ne"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_ne(a: v128, b: v128) -> v128 { + unsafe { simd_ne::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_eq as u32x4_eq; +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_ne as u32x4_ne; + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.lt_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.lt_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_lt(a: v128, b: v128) -> v128 { + unsafe { simd_lt::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.lt_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.lt_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_lt(a: v128, b: v128) -> v128 { + unsafe { simd_lt::<_, simd::i32x4>(a.as_u32x4(), b.as_u32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.gt_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.gt_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_gt(a: v128, b: v128) -> v128 { + unsafe { simd_gt::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.gt_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.gt_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_gt(a: v128, b: v128) -> v128 { + unsafe { simd_gt::<_, simd::i32x4>(a.as_u32x4(), b.as_u32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.le_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.le_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_le(a: v128, b: v128) -> v128 { + unsafe { simd_le::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.le_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.le_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_le(a: v128, b: v128) -> v128 { + unsafe { simd_le::<_, simd::i32x4>(a.as_u32x4(), b.as_u32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.ge_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.ge_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_ge(a: v128, b: v128) -> v128 { + unsafe { simd_ge::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// unsigned integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.ge_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.ge_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_ge(a: v128, b: v128) -> v128 { + unsafe { simd_ge::<_, simd::i32x4>(a.as_u32x4(), b.as_u32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// integers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.eq))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.eq"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_eq(a: v128, b: v128) -> v128 { + unsafe { simd_eq::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// integers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were not equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.ne))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.ne"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_ne(a: v128, b: v128) -> v128 { + unsafe { simd_ne::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_eq as u64x2_eq; +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_ne as u64x2_ne; + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.lt_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.lt_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_lt(a: v128, b: v128) -> v128 { + unsafe { simd_lt::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.gt_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.gt_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_gt(a: v128, b: v128) -> v128 { + unsafe { simd_gt::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.le_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.le_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_le(a: v128, b: v128) -> v128 { + unsafe { simd_le::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// signed integers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.ge_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.ge_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_ge(a: v128, b: v128) -> v128 { + unsafe { simd_ge::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.eq))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.eq"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_eq(a: v128, b: v128) -> v128 { + unsafe { simd_eq::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were not equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.ne))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.ne"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_ne(a: v128, b: v128) -> v128 { + unsafe { simd_ne::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.lt))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.lt"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_lt(a: v128, b: v128) -> v128 { + unsafe { simd_lt::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.gt))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.gt"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_gt(a: v128, b: v128) -> v128 { + unsafe { simd_gt::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.le))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.le"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_le(a: v128, b: v128) -> v128 { + unsafe { simd_le::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.ge))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.ge"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_ge(a: v128, b: v128) -> v128 { + unsafe { simd_ge::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.eq))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.eq"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_eq(a: v128, b: v128) -> v128 { + unsafe { simd_eq::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the corresponding input elements +/// were not equal, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.ne))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.ne"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_ne(a: v128, b: v128) -> v128 { + unsafe { simd_ne::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.lt))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.lt"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_lt(a: v128, b: v128) -> v128 { + unsafe { simd_lt::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.gt))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.gt"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_gt(a: v128, b: v128) -> v128 { + unsafe { simd_gt::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is less than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.le))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.le"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_le(a: v128, b: v128) -> v128 { + unsafe { simd_le::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit +/// floating point numbers. +/// +/// Returns a new vector where each lane is all ones if the lane-wise left +/// element is greater than the right element, or all zeros otherwise. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.ge))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.ge"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_ge(a: v128, b: v128) -> v128 { + unsafe { simd_ge::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Flips each bit of the 128-bit input vector. +#[inline] +#[cfg_attr(test, assert_instr(v128.not))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.not"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn v128_not(a: v128) -> v128 { + unsafe { simd_xor(a.as_i64x2(), simd::i64x2::new(!0, !0)).v128() } +} + +/// Performs a bitwise and of the two input 128-bit vectors, returning the +/// resulting vector. +#[inline] +#[cfg_attr(test, assert_instr(v128.and))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.and"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn v128_and(a: v128, b: v128) -> v128 { + unsafe { simd_and(a.as_i64x2(), b.as_i64x2()).v128() } +} + +/// Bitwise AND of bits of `a` and the logical inverse of bits of `b`. +/// +/// This operation is equivalent to `v128.and(a, v128.not(b))` +#[inline] +#[cfg_attr(test, assert_instr(v128.andnot))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.andnot"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn v128_andnot(a: v128, b: v128) -> v128 { + unsafe { + simd_and( + a.as_i64x2(), + simd_xor(b.as_i64x2(), simd::i64x2::new(-1, -1)), + ) + .v128() + } +} + +/// Performs a bitwise or of the two input 128-bit vectors, returning the +/// resulting vector. +#[inline] +#[cfg_attr(test, assert_instr(v128.or))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.or"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn v128_or(a: v128, b: v128) -> v128 { + unsafe { simd_or(a.as_i64x2(), b.as_i64x2()).v128() } +} + +/// Performs a bitwise xor of the two input 128-bit vectors, returning the +/// resulting vector. +#[inline] +#[cfg_attr(test, assert_instr(v128.xor))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.xor"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn v128_xor(a: v128, b: v128) -> v128 { + unsafe { simd_xor(a.as_i64x2(), b.as_i64x2()).v128() } +} + +/// Use the bitmask in `c` to select bits from `v1` when 1 and `v2` when 0. +#[inline] +#[cfg_attr(test, assert_instr(v128.bitselect))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.bitselect"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 { + unsafe { llvm_bitselect(v1.as_i8x16(), v2.as_i8x16(), c.as_i8x16()).v128() } +} + +/// Returns `true` if any bit in `a` is set, or `false` otherwise. +#[inline] +#[cfg_attr(test, assert_instr(v128.any_true))] +#[target_feature(enable = "simd128")] +#[doc(alias("v128.any_true"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn v128_any_true(a: v128) -> bool { + unsafe { llvm_any_true_i8x16(a.as_i8x16()) != 0 } +} + +/// Lane-wise wrapping absolute value. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.abs))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.abs"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_abs(a: v128) -> v128 { + unsafe { + let a = a.as_i8x16(); + let zero = simd::i8x16::ZERO; + simd_select::(simd_lt(a, zero), simd_sub(zero, a), a).v128() + } +} + +/// Negates a 128-bit vectors interpreted as sixteen 8-bit signed integers +#[inline] +#[cfg_attr(test, assert_instr(i8x16.neg))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.neg"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_neg(a: v128) -> v128 { + unsafe { simd_mul(a.as_i8x16(), simd::i8x16::splat(-1)).v128() } +} + +/// Count the number of bits set to one within each lane. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.popcnt))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.popcnt"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_popcnt(v: v128) -> v128 { + unsafe { simd_ctpop(v.as_i8x16()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_popcnt as u8x16_popcnt; + +/// Returns true if all lanes are non-zero, false otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.all_true))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.all_true"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_all_true(a: v128) -> bool { + unsafe { llvm_i8x16_all_true(a.as_i8x16()) != 0 } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_all_true as u8x16_all_true; + +/// Extracts the high bit for each lane in `a` and produce a scalar mask with +/// all bits concatenated. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.bitmask))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.bitmask"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_bitmask(a: v128) -> u16 { + unsafe { llvm_bitmask_i8x16(a.as_i8x16()) as u16 } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_bitmask as u8x16_bitmask; + +/// Converts two input vectors into a smaller lane vector by narrowing each +/// lane. +/// +/// Signed saturation to 0x7f or 0x80 is used and the input lanes are always +/// interpreted as signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.narrow_i16x8_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_narrow_i16x8(a: v128, b: v128) -> v128 { + unsafe { llvm_narrow_i8x16_s(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Converts two input vectors into a smaller lane vector by narrowing each +/// lane. +/// +/// Signed saturation to 0x00 or 0xff is used and the input lanes are always +/// interpreted as signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.narrow_i16x8_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_narrow_i16x8(a: v128, b: v128) -> v128 { + unsafe { llvm_narrow_i8x16_u(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Shifts each lane to the left by the specified number of bits. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.shl))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.shl"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_shl(a: v128, amt: u32) -> v128 { + // SAFETY: the safety of this intrinsic relies on the fact that the + // shift amount for each lane is less than the number of bits in the input + // lane. In this case the input has 8-bit lanes but the shift amount above + // is `u32`, so a mask is required to discard all the upper bits of `amt` to + // ensure that the safety condition is met. + // + // Note that this is distinct from the behavior of the native WebAssembly + // instruction here where WebAssembly defines this instruction as performing + // a mask as well. This is nonetheless required since this must have defined + // semantics in LLVM, not just WebAssembly. + // + // Finally note that this mask operation is not actually emitted into the + // final binary itself. LLVM understands that the wasm operation implicitly + // masks, so it knows this mask operation is redundant. + // + // Basically the extra mask here is required as a bridge from the documented + // semantics through LLVM back out to WebAssembly. Both ends have the + // documented semantics, and the mask is required by LLVM in the middle. + unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_shl as u8x16_shl; + +/// Shifts each lane to the right by the specified number of bits, sign +/// extending. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.shr_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.shr_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_shr(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() } +} + +/// Shifts each lane to the right by the specified number of bits, shifting in +/// zeros. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.shr_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.shr_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_shr(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat((amt & 0x7) as u8)).v128() } +} + +/// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.add))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.add"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_add(a: v128, b: v128) -> v128 { + unsafe { simd_add(a.as_i8x16(), b.as_i8x16()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_add as u8x16_add; + +/// Adds two 128-bit vectors as if they were two packed sixteen 8-bit signed +/// integers, saturating on overflow to `i8::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.add_sat_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.add_sat_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_add_sat(a: v128, b: v128) -> v128 { + unsafe { simd_saturating_add(a.as_i8x16(), b.as_i8x16()).v128() } +} + +/// Adds two 128-bit vectors as if they were two packed sixteen 8-bit unsigned +/// integers, saturating on overflow to `u8::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.add_sat_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.add_sat_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_add_sat(a: v128, b: v128) -> v128 { + unsafe { simd_saturating_add(a.as_u8x16(), b.as_u8x16()).v128() } +} + +/// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit integers. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.sub))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.sub"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_sub(a: v128, b: v128) -> v128 { + unsafe { simd_sub(a.as_i8x16(), b.as_i8x16()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i8x16_sub as u8x16_sub; + +/// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit +/// signed integers, saturating on overflow to `i8::MIN`. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.sub_sat_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.sub_sat_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_sub_sat(a: v128, b: v128) -> v128 { + unsafe { simd_saturating_sub(a.as_i8x16(), b.as_i8x16()).v128() } +} + +/// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit +/// unsigned integers, saturating on overflow to 0. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.sub_sat_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.sub_sat_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_sub_sat(a: v128, b: v128) -> v128 { + unsafe { simd_saturating_sub(a.as_u8x16(), b.as_u8x16()).v128() } +} + +/// Compares lane-wise signed integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.min_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.min_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_min(a: v128, b: v128) -> v128 { + let a = a.as_i8x16(); + let b = b.as_i8x16(); + unsafe { simd_select::(simd_lt(a, b), a, b).v128() } +} + +/// Compares lane-wise unsigned integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.min_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.min_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_min(a: v128, b: v128) -> v128 { + let a = a.as_u8x16(); + let b = b.as_u8x16(); + unsafe { simd_select::(simd_lt(a, b), a, b).v128() } +} + +/// Compares lane-wise signed integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.max_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.max_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i8x16_max(a: v128, b: v128) -> v128 { + let a = a.as_i8x16(); + let b = b.as_i8x16(); + unsafe { simd_select::(simd_gt(a, b), a, b).v128() } +} + +/// Compares lane-wise unsigned integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.max_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.max_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_max(a: v128, b: v128) -> v128 { + let a = a.as_u8x16(); + let b = b.as_u8x16(); + unsafe { simd_select::(simd_gt(a, b), a, b).v128() } +} + +/// Lane-wise rounding average. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.avgr_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i8x16.avgr_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u8x16_avgr(a: v128, b: v128) -> v128 { + unsafe { llvm_avgr_u_i8x16(a.as_i8x16(), b.as_i8x16()).v128() } +} + +/// Integer extended pairwise addition producing extended results +/// (twice wider results than the inputs). +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extadd_pairwise_i8x16_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extadd_pairwise_i8x16_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extadd_pairwise_i8x16(a: v128) -> v128 { + unsafe { llvm_i16x8_extadd_pairwise_i8x16_s(a.as_i8x16()).v128() } +} + +/// Integer extended pairwise addition producing extended results +/// (twice wider results than the inputs). +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extadd_pairwise_i8x16_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extadd_pairwise_i8x16_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extadd_pairwise_u8x16(a: v128) -> v128 { + unsafe { llvm_i16x8_extadd_pairwise_i8x16_u(a.as_i8x16()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_extadd_pairwise_u8x16 as u16x8_extadd_pairwise_u8x16; + +/// Lane-wise wrapping absolute value. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.abs))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.abs"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_abs(a: v128) -> v128 { + let a = a.as_i16x8(); + let zero = simd::i16x8::ZERO; + unsafe { + simd_select::(simd_lt(a, zero), simd_sub(zero, a), a).v128() + } +} + +/// Negates a 128-bit vectors interpreted as eight 16-bit signed integers +#[inline] +#[cfg_attr(test, assert_instr(i16x8.neg))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.neg"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_neg(a: v128) -> v128 { + unsafe { simd_mul(a.as_i16x8(), simd::i16x8::splat(-1)).v128() } +} + +/// Lane-wise saturating rounding multiplication in Q15 format. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.q15mulr_sat_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.q15mulr_sat_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_q15mulr_sat(a: v128, b: v128) -> v128 { + unsafe { llvm_q15mulr(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Returns true if all lanes are non-zero, false otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.all_true))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.all_true"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_all_true(a: v128) -> bool { + unsafe { llvm_i16x8_all_true(a.as_i16x8()) != 0 } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_all_true as u16x8_all_true; + +/// Extracts the high bit for each lane in `a` and produce a scalar mask with +/// all bits concatenated. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.bitmask))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.bitmask"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_bitmask(a: v128) -> u8 { + unsafe { llvm_bitmask_i16x8(a.as_i16x8()) as u8 } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_bitmask as u16x8_bitmask; + +/// Converts two input vectors into a smaller lane vector by narrowing each +/// lane. +/// +/// Signed saturation to 0x7fff or 0x8000 is used and the input lanes are always +/// interpreted as signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.narrow_i32x4_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_narrow_i32x4(a: v128, b: v128) -> v128 { + unsafe { llvm_narrow_i16x8_s(a.as_i32x4(), b.as_i32x4()).v128() } +} + +/// Converts two input vectors into a smaller lane vector by narrowing each +/// lane. +/// +/// Signed saturation to 0x0000 or 0xffff is used and the input lanes are always +/// interpreted as signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.narrow_i32x4_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_narrow_i32x4(a: v128, b: v128) -> v128 { + unsafe { llvm_narrow_i16x8_u(a.as_i32x4(), b.as_i32x4()).v128() } +} + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extend_low_i8x16_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extend_low_i8x16_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extend_low_i8x16(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!( + a.as_i8x16(), + a.as_i8x16(), + [0, 1, 2, 3, 4, 5, 6, 7], + )) + .v128() + } +} + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extend_high_i8x16_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extend_high_i8x16_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extend_high_i8x16(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!( + a.as_i8x16(), + a.as_i8x16(), + [8, 9, 10, 11, 12, 13, 14, 15], + )) + .v128() + } +} + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extend_low_i8x16_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extend_low_i8x16_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extend_low_u8x16(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!( + a.as_u8x16(), + a.as_u8x16(), + [0, 1, 2, 3, 4, 5, 6, 7], + )) + .v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_extend_low_u8x16 as u16x8_extend_low_u8x16; + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extend_high_i8x16_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extend_high_i8x16_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extend_high_u8x16(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!( + a.as_u8x16(), + a.as_u8x16(), + [8, 9, 10, 11, 12, 13, 14, 15], + )) + .v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_extend_high_u8x16 as u16x8_extend_high_u8x16; + +/// Shifts each lane to the left by the specified number of bits. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.shl))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.shl"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_shl(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_shl as u16x8_shl; + +/// Shifts each lane to the right by the specified number of bits, sign +/// extending. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.shr_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.shr_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_shr(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() } +} + +/// Shifts each lane to the right by the specified number of bits, shifting in +/// zeros. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.shr_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.shr_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_shr(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat((amt & 0xf) as u16)).v128() } +} + +/// Adds two 128-bit vectors as if they were two packed eight 16-bit integers. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.add))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.add"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_add(a: v128, b: v128) -> v128 { + unsafe { simd_add(a.as_i16x8(), b.as_i16x8()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_add as u16x8_add; + +/// Adds two 128-bit vectors as if they were two packed eight 16-bit signed +/// integers, saturating on overflow to `i16::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.add_sat_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.add_sat_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_add_sat(a: v128, b: v128) -> v128 { + unsafe { simd_saturating_add(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Adds two 128-bit vectors as if they were two packed eight 16-bit unsigned +/// integers, saturating on overflow to `u16::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.add_sat_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.add_sat_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_add_sat(a: v128, b: v128) -> v128 { + unsafe { simd_saturating_add(a.as_u16x8(), b.as_u16x8()).v128() } +} + +/// Subtracts two 128-bit vectors as if they were two packed eight 16-bit integers. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.sub))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.sub"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_sub(a: v128, b: v128) -> v128 { + unsafe { simd_sub(a.as_i16x8(), b.as_i16x8()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_sub as u16x8_sub; + +/// Subtracts two 128-bit vectors as if they were two packed eight 16-bit +/// signed integers, saturating on overflow to `i16::MIN`. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.sub_sat_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.sub_sat_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_sub_sat(a: v128, b: v128) -> v128 { + unsafe { simd_saturating_sub(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Subtracts two 128-bit vectors as if they were two packed eight 16-bit +/// unsigned integers, saturating on overflow to 0. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.sub_sat_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.sub_sat_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_sub_sat(a: v128, b: v128) -> v128 { + unsafe { simd_saturating_sub(a.as_u16x8(), b.as_u16x8()).v128() } +} + +/// Multiplies two 128-bit vectors as if they were two packed eight 16-bit +/// signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.mul))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.mul"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_mul(a: v128, b: v128) -> v128 { + unsafe { simd_mul(a.as_i16x8(), b.as_i16x8()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_mul as u16x8_mul; + +/// Compares lane-wise signed integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.min_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.min_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_min(a: v128, b: v128) -> v128 { + let a = a.as_i16x8(); + let b = b.as_i16x8(); + unsafe { simd_select::(simd_lt(a, b), a, b).v128() } +} + +/// Compares lane-wise unsigned integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.min_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.min_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_min(a: v128, b: v128) -> v128 { + let a = a.as_u16x8(); + let b = b.as_u16x8(); + unsafe { simd_select::(simd_lt(a, b), a, b).v128() } +} + +/// Compares lane-wise signed integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.max_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.max_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_max(a: v128, b: v128) -> v128 { + let a = a.as_i16x8(); + let b = b.as_i16x8(); + unsafe { simd_select::(simd_gt(a, b), a, b).v128() } +} + +/// Compares lane-wise unsigned integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.max_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.max_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_max(a: v128, b: v128) -> v128 { + let a = a.as_u16x8(); + let b = b.as_u16x8(); + unsafe { simd_select::(simd_gt(a, b), a, b).v128() } +} + +/// Lane-wise rounding average. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.avgr_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.avgr_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u16x8_avgr(a: v128, b: v128) -> v128 { + unsafe { llvm_avgr_u_i16x8(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i16x8_mul(i16x8_extend_low_i8x16(a), i16x8_extend_low_i8x16(b))` +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extmul_low_i8x16_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extmul_low_i8x16_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extmul_low_i8x16(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_i8x16(), + a.as_i8x16(), + [0, 1, 2, 3, 4, 5, 6, 7], + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_i8x16(), + b.as_i8x16(), + [0, 1, 2, 3, 4, 5, 6, 7], + )); + simd_mul(lhs, rhs).v128() + } +} + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i16x8_mul(i16x8_extend_high_i8x16(a), i16x8_extend_high_i8x16(b))` +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extmul_high_i8x16_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extmul_high_i8x16_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extmul_high_i8x16(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_i8x16(), + a.as_i8x16(), + [8, 9, 10, 11, 12, 13, 14, 15], + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_i8x16(), + b.as_i8x16(), + [8, 9, 10, 11, 12, 13, 14, 15], + )); + simd_mul(lhs, rhs).v128() + } +} + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i16x8_mul(i16x8_extend_low_u8x16(a), i16x8_extend_low_u8x16(b))` +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extmul_low_i8x16_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extmul_low_i8x16_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extmul_low_u8x16(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_u8x16(), + a.as_u8x16(), + [0, 1, 2, 3, 4, 5, 6, 7], + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_u8x16(), + b.as_u8x16(), + [0, 1, 2, 3, 4, 5, 6, 7], + )); + simd_mul(lhs, rhs).v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_extmul_low_u8x16 as u16x8_extmul_low_u8x16; + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i16x8_mul(i16x8_extend_high_u8x16(a), i16x8_extend_high_u8x16(b))` +#[inline] +#[cfg_attr(test, assert_instr(i16x8.extmul_high_i8x16_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i16x8.extmul_high_i8x16_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i16x8_extmul_high_u8x16(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_u8x16(), + a.as_u8x16(), + [8, 9, 10, 11, 12, 13, 14, 15], + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_u8x16(), + b.as_u8x16(), + [8, 9, 10, 11, 12, 13, 14, 15], + )); + simd_mul(lhs, rhs).v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i16x8_extmul_high_u8x16 as u16x8_extmul_high_u8x16; + +/// Integer extended pairwise addition producing extended results +/// (twice wider results than the inputs). +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extadd_pairwise_i16x8_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extadd_pairwise_i16x8_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extadd_pairwise_i16x8(a: v128) -> v128 { + unsafe { llvm_i32x4_extadd_pairwise_i16x8_s(a.as_i16x8()).v128() } +} + +/// Integer extended pairwise addition producing extended results +/// (twice wider results than the inputs). +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extadd_pairwise_i16x8_u))] +#[doc(alias("i32x4.extadd_pairwise_i16x8_u"))] +#[target_feature(enable = "simd128")] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extadd_pairwise_u16x8(a: v128) -> v128 { + unsafe { llvm_i32x4_extadd_pairwise_i16x8_u(a.as_i16x8()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_extadd_pairwise_u16x8 as u32x4_extadd_pairwise_u16x8; + +/// Lane-wise wrapping absolute value. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.abs))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.abs"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_abs(a: v128) -> v128 { + let a = a.as_i32x4(); + let zero = simd::i32x4::ZERO; + unsafe { + simd_select::(simd_lt(a, zero), simd_sub(zero, a), a).v128() + } +} + +/// Negates a 128-bit vectors interpreted as four 32-bit signed integers +#[inline] +#[cfg_attr(test, assert_instr(i32x4.neg))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.neg"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_neg(a: v128) -> v128 { + unsafe { simd_mul(a.as_i32x4(), simd::i32x4::splat(-1)).v128() } +} + +/// Returns true if all lanes are non-zero, false otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.all_true))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.all_true"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_all_true(a: v128) -> bool { + unsafe { llvm_i32x4_all_true(a.as_i32x4()) != 0 } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_all_true as u32x4_all_true; + +/// Extracts the high bit for each lane in `a` and produce a scalar mask with +/// all bits concatenated. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.bitmask))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.bitmask"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_bitmask(a: v128) -> u8 { + unsafe { llvm_bitmask_i32x4(a.as_i32x4()) as u8 } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_bitmask as u32x4_bitmask; + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extend_low_i16x8_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extend_low_i16x8_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extend_low_i16x8(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!( + a.as_i16x8(), + a.as_i16x8(), + [0, 1, 2, 3] + )) + .v128() + } +} + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extend_high_i16x8_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extend_high_i16x8_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extend_high_i16x8(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!( + a.as_i16x8(), + a.as_i16x8(), + [4, 5, 6, 7] + )) + .v128() + } +} + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extend_low_i16x8_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extend_low_i16x8_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extend_low_u16x8(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!( + a.as_u16x8(), + a.as_u16x8(), + [0, 1, 2, 3] + )) + .v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_extend_low_u16x8 as u32x4_extend_low_u16x8; + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extend_high_i16x8_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extend_high_i16x8_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extend_high_u16x8(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!( + a.as_u16x8(), + a.as_u16x8(), + [4, 5, 6, 7] + )) + .v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_extend_high_u16x8 as u32x4_extend_high_u16x8; + +/// Shifts each lane to the left by the specified number of bits. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.shl))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.shl"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_shl(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_shl as u32x4_shl; + +/// Shifts each lane to the right by the specified number of bits, sign +/// extending. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.shr_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.shr_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_shr(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() } +} + +/// Shifts each lane to the right by the specified number of bits, shifting in +/// zeros. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.shr_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.shr_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_shr(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt & 0x1f)).v128() } +} + +/// Adds two 128-bit vectors as if they were two packed four 32-bit integers. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.add))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.add"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_add(a: v128, b: v128) -> v128 { + unsafe { simd_add(a.as_i32x4(), b.as_i32x4()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_add as u32x4_add; + +/// Subtracts two 128-bit vectors as if they were two packed four 32-bit integers. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.sub))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.sub"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_sub(a: v128, b: v128) -> v128 { + unsafe { simd_sub(a.as_i32x4(), b.as_i32x4()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_sub as u32x4_sub; + +/// Multiplies two 128-bit vectors as if they were two packed four 32-bit +/// signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.mul))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.mul"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_mul(a: v128, b: v128) -> v128 { + unsafe { simd_mul(a.as_i32x4(), b.as_i32x4()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_mul as u32x4_mul; + +/// Compares lane-wise signed integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.min_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.min_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_min(a: v128, b: v128) -> v128 { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + unsafe { simd_select::(simd_lt(a, b), a, b).v128() } +} + +/// Compares lane-wise unsigned integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.min_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.min_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_min(a: v128, b: v128) -> v128 { + let a = a.as_u32x4(); + let b = b.as_u32x4(); + unsafe { simd_select::(simd_lt(a, b), a, b).v128() } +} + +/// Compares lane-wise signed integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.max_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.max_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_max(a: v128, b: v128) -> v128 { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + unsafe { simd_select::(simd_gt(a, b), a, b).v128() } +} + +/// Compares lane-wise unsigned integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.max_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.max_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_max(a: v128, b: v128) -> v128 { + let a = a.as_u32x4(); + let b = b.as_u32x4(); + unsafe { simd_select::(simd_gt(a, b), a, b).v128() } +} + +/// Lane-wise multiply signed 16-bit integers in the two input vectors and add +/// adjacent pairs of the full 32-bit results. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.dot_i16x8_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.dot_i16x8_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_dot_i16x8(a: v128, b: v128) -> v128 { + unsafe { llvm_i32x4_dot_i16x8_s(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i32x4_mul(i32x4_extend_low_i16x8_s(a), i32x4_extend_low_i16x8_s(b))` +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extmul_low_i16x8_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extmul_low_i16x8_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extmul_low_i16x8(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_i16x8(), + a.as_i16x8(), + [0, 1, 2, 3] + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_i16x8(), + b.as_i16x8(), + [0, 1, 2, 3] + )); + simd_mul(lhs, rhs).v128() + } +} + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i32x4_mul(i32x4_extend_high_i16x8_s(a), i32x4_extend_high_i16x8_s(b))` +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extmul_high_i16x8_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extmul_high_i16x8_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extmul_high_i16x8(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_i16x8(), + a.as_i16x8(), + [4, 5, 6, 7] + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_i16x8(), + b.as_i16x8(), + [4, 5, 6, 7] + )); + simd_mul(lhs, rhs).v128() + } +} + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i32x4_mul(i32x4_extend_low_u16x8(a), i32x4_extend_low_u16x8(b))` +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extmul_low_i16x8_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extmul_low_i16x8_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extmul_low_u16x8(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_u16x8(), + a.as_u16x8(), + [0, 1, 2, 3] + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_u16x8(), + b.as_u16x8(), + [0, 1, 2, 3] + )); + simd_mul(lhs, rhs).v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_extmul_low_u16x8 as u32x4_extmul_low_u16x8; + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i32x4_mul(i32x4_extend_high_u16x8(a), i32x4_extend_high_u16x8(b))` +#[inline] +#[cfg_attr(test, assert_instr(i32x4.extmul_high_i16x8_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.extmul_high_i16x8_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_extmul_high_u16x8(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_u16x8(), + a.as_u16x8(), + [4, 5, 6, 7] + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_u16x8(), + b.as_u16x8(), + [4, 5, 6, 7] + )); + simd_mul(lhs, rhs).v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i32x4_extmul_high_u16x8 as u32x4_extmul_high_u16x8; + +/// Lane-wise wrapping absolute value. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.abs))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.abs"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_abs(a: v128) -> v128 { + let a = a.as_i64x2(); + let zero = simd::i64x2::ZERO; + unsafe { + simd_select::(simd_lt(a, zero), simd_sub(zero, a), a).v128() + } +} + +/// Negates a 128-bit vectors interpreted as two 64-bit signed integers +#[inline] +#[cfg_attr(test, assert_instr(i64x2.neg))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.neg"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_neg(a: v128) -> v128 { + unsafe { simd_mul(a.as_i64x2(), simd::i64x2::splat(-1)).v128() } +} + +/// Returns true if all lanes are non-zero, false otherwise. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.all_true))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.all_true"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_all_true(a: v128) -> bool { + unsafe { llvm_i64x2_all_true(a.as_i64x2()) != 0 } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_all_true as u64x2_all_true; + +/// Extracts the high bit for each lane in `a` and produce a scalar mask with +/// all bits concatenated. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.bitmask))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.bitmask"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_bitmask(a: v128) -> u8 { + unsafe { llvm_bitmask_i64x2(a.as_i64x2()) as u8 } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_bitmask as u64x2_bitmask; + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extend_low_i32x4_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_extend_low_i32x4(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!(a.as_i32x4(), a.as_i32x4(), [0, 1])) + .v128() + } +} + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extend_high_i32x4_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_extend_high_i32x4(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!(a.as_i32x4(), a.as_i32x4(), [2, 3])) + .v128() + } +} + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extend_low_i32x4_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_extend_low_u32x4(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!(a.as_u32x4(), a.as_u32x4(), [0, 1])) + .v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_extend_low_u32x4 as u64x2_extend_low_u32x4; + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extend_high_i32x4_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_extend_high_u32x4(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!(a.as_u32x4(), a.as_u32x4(), [2, 3])) + .v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_extend_high_u32x4 as u64x2_extend_high_u32x4; + +/// Shifts each lane to the left by the specified number of bits. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.shl))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.shl"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_shl(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_shl as u64x2_shl; + +/// Shifts each lane to the right by the specified number of bits, sign +/// extending. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.shr_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.shr_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_shr(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() } +} + +/// Shifts each lane to the right by the specified number of bits, shifting in +/// zeros. +/// +/// Only the low bits of the shift amount are used if the shift amount is +/// greater than the lane width. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.shr_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.shr_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u64x2_shr(a: v128, amt: u32) -> v128 { + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat((amt & 0x3f) as u64)).v128() } +} + +/// Adds two 128-bit vectors as if they were two packed two 64-bit integers. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.add))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.add"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_add(a: v128, b: v128) -> v128 { + unsafe { simd_add(a.as_i64x2(), b.as_i64x2()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_add as u64x2_add; + +/// Subtracts two 128-bit vectors as if they were two packed two 64-bit integers. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.sub))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.sub"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_sub(a: v128, b: v128) -> v128 { + unsafe { simd_sub(a.as_i64x2(), b.as_i64x2()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_sub as u64x2_sub; + +/// Multiplies two 128-bit vectors as if they were two packed two 64-bit integers. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.mul))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.mul"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_mul(a: v128, b: v128) -> v128 { + unsafe { simd_mul(a.as_i64x2(), b.as_i64x2()).v128() } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_mul as u64x2_mul; + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i64x2_mul(i64x2_extend_low_i32x4_s(a), i64x2_extend_low_i32x4_s(b))` +#[inline] +#[cfg_attr(test, assert_instr(i64x2.extmul_low_i32x4_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extmul_low_i32x4_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_extmul_low_i32x4(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_i32x4(), + a.as_i32x4(), + [0, 1] + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_i32x4(), + b.as_i32x4(), + [0, 1] + )); + simd_mul(lhs, rhs).v128() + } +} + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i64x2_mul(i64x2_extend_high_i32x4_s(a), i64x2_extend_high_i32x4_s(b))` +#[inline] +#[cfg_attr(test, assert_instr(i64x2.extmul_high_i32x4_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extmul_high_i32x4_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_extmul_high_i32x4(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_i32x4(), + a.as_i32x4(), + [2, 3] + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_i32x4(), + b.as_i32x4(), + [2, 3] + )); + simd_mul(lhs, rhs).v128() + } +} + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i64x2_mul(i64x2_extend_low_i32x4_u(a), i64x2_extend_low_i32x4_u(b))` +#[inline] +#[cfg_attr(test, assert_instr(i64x2.extmul_low_i32x4_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extmul_low_i32x4_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_extmul_low_u32x4(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_u32x4(), + a.as_u32x4(), + [0, 1] + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_u32x4(), + b.as_u32x4(), + [0, 1] + )); + simd_mul(lhs, rhs).v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_extmul_low_u32x4 as u64x2_extmul_low_u32x4; + +/// Lane-wise integer extended multiplication producing twice wider result than +/// the inputs. +/// +/// Equivalent of `i64x2_mul(i64x2_extend_high_i32x4_u(a), i64x2_extend_high_i32x4_u(b))` +#[inline] +#[cfg_attr(test, assert_instr(i64x2.extmul_high_i32x4_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i64x2.extmul_high_i32x4_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i64x2_extmul_high_u32x4(a: v128, b: v128) -> v128 { + unsafe { + let lhs = simd_cast::(simd_shuffle!( + a.as_u32x4(), + a.as_u32x4(), + [2, 3] + )); + let rhs = simd_cast::(simd_shuffle!( + b.as_u32x4(), + b.as_u32x4(), + [2, 3] + )); + simd_mul(lhs, rhs).v128() + } +} + +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub use i64x2_extmul_high_u32x4 as u64x2_extmul_high_u32x4; + +/// Lane-wise rounding to the nearest integral value not smaller than the input. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.ceil))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.ceil"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_ceil(a: v128) -> v128 { + unsafe { simd_ceil(a.as_f32x4()).v128() } +} + +/// Lane-wise rounding to the nearest integral value not greater than the input. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.floor))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.floor"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_floor(a: v128) -> v128 { + unsafe { simd_floor(a.as_f32x4()).v128() } +} + +/// Lane-wise rounding to the nearest integral value with the magnitude not +/// larger than the input. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.trunc))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.trunc"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_trunc(a: v128) -> v128 { + unsafe { simd_trunc(a.as_f32x4()).v128() } +} + +/// Lane-wise rounding to the nearest integral value; if two values are equally +/// near, rounds to the even one. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.nearest))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.nearest"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_nearest(a: v128) -> v128 { + unsafe { llvm_f32x4_nearest(a.as_f32x4()).v128() } +} + +/// Calculates the absolute value of each lane of a 128-bit vector interpreted +/// as four 32-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.abs))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.abs"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_abs(a: v128) -> v128 { + unsafe { simd_fabs(a.as_f32x4()).v128() } +} + +/// Negates each lane of a 128-bit vector interpreted as four 32-bit floating +/// point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.neg))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.neg"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_neg(a: v128) -> v128 { + unsafe { simd_neg(a.as_f32x4()).v128() } +} + +/// Calculates the square root of each lane of a 128-bit vector interpreted as +/// four 32-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.sqrt))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.sqrt"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_sqrt(a: v128) -> v128 { + unsafe { simd_fsqrt(a.as_f32x4()).v128() } +} + +/// Lane-wise addition of two 128-bit vectors interpreted as four 32-bit +/// floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.add))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.add"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_add(a: v128, b: v128) -> v128 { + unsafe { simd_add(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Lane-wise subtraction of two 128-bit vectors interpreted as four 32-bit +/// floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.sub))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.sub"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_sub(a: v128, b: v128) -> v128 { + unsafe { simd_sub(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Lane-wise multiplication of two 128-bit vectors interpreted as four 32-bit +/// floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.mul))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.mul"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_mul(a: v128, b: v128) -> v128 { + unsafe { simd_mul(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Lane-wise division of two 128-bit vectors interpreted as four 32-bit +/// floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.div))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.div"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_div(a: v128, b: v128) -> v128 { + unsafe { simd_div(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Calculates the lane-wise minimum of two 128-bit vectors interpreted +/// as four 32-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.min))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.min"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_min(a: v128, b: v128) -> v128 { + unsafe { llvm_f32x4_min(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Calculates the lane-wise minimum of two 128-bit vectors interpreted +/// as four 32-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.max))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.max"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_max(a: v128, b: v128) -> v128 { + unsafe { llvm_f32x4_max(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// Lane-wise minimum value, defined as `b < a ? b : a` +#[inline] +#[cfg_attr(test, assert_instr(f32x4.pmin))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.pmin"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_pmin(a: v128, b: v128) -> v128 { + unsafe { + simd_select::( + simd_lt(b.as_f32x4(), a.as_f32x4()), + b.as_f32x4(), + a.as_f32x4(), + ) + .v128() + } +} + +/// Lane-wise maximum value, defined as `a < b ? b : a` +#[inline] +#[cfg_attr(test, assert_instr(f32x4.pmax))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.pmax"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_pmax(a: v128, b: v128) -> v128 { + unsafe { + simd_select::( + simd_lt(a.as_f32x4(), b.as_f32x4()), + b.as_f32x4(), + a.as_f32x4(), + ) + .v128() + } +} + +/// Lane-wise rounding to the nearest integral value not smaller than the input. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.ceil))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.ceil"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_ceil(a: v128) -> v128 { + unsafe { simd_ceil(a.as_f64x2()).v128() } +} + +/// Lane-wise rounding to the nearest integral value not greater than the input. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.floor))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.floor"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_floor(a: v128) -> v128 { + unsafe { simd_floor(a.as_f64x2()).v128() } +} + +/// Lane-wise rounding to the nearest integral value with the magnitude not +/// larger than the input. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.trunc))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.trunc"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_trunc(a: v128) -> v128 { + unsafe { simd_trunc(a.as_f64x2()).v128() } +} + +/// Lane-wise rounding to the nearest integral value; if two values are equally +/// near, rounds to the even one. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.nearest))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.nearest"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_nearest(a: v128) -> v128 { + unsafe { llvm_f64x2_nearest(a.as_f64x2()).v128() } +} + +/// Calculates the absolute value of each lane of a 128-bit vector interpreted +/// as two 64-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.abs))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.abs"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_abs(a: v128) -> v128 { + unsafe { simd_fabs(a.as_f64x2()).v128() } +} + +/// Negates each lane of a 128-bit vector interpreted as two 64-bit floating +/// point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.neg))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.neg"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_neg(a: v128) -> v128 { + unsafe { simd_neg(a.as_f64x2()).v128() } +} + +/// Calculates the square root of each lane of a 128-bit vector interpreted as +/// two 64-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.sqrt))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.sqrt"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_sqrt(a: v128) -> v128 { + unsafe { simd_fsqrt(a.as_f64x2()).v128() } +} + +/// Lane-wise add of two 128-bit vectors interpreted as two 64-bit +/// floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.add))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.add"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_add(a: v128, b: v128) -> v128 { + unsafe { simd_add(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Lane-wise subtract of two 128-bit vectors interpreted as two 64-bit +/// floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.sub))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.sub"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_sub(a: v128, b: v128) -> v128 { + unsafe { simd_sub(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Lane-wise multiply of two 128-bit vectors interpreted as two 64-bit +/// floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.mul))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.mul"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_mul(a: v128, b: v128) -> v128 { + unsafe { simd_mul(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Lane-wise divide of two 128-bit vectors interpreted as two 64-bit +/// floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.div))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.div"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_div(a: v128, b: v128) -> v128 { + unsafe { simd_div(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Calculates the lane-wise minimum of two 128-bit vectors interpreted +/// as two 64-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.min))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.min"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_min(a: v128, b: v128) -> v128 { + unsafe { llvm_f64x2_min(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Calculates the lane-wise maximum of two 128-bit vectors interpreted +/// as two 64-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.max))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.max"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_max(a: v128, b: v128) -> v128 { + unsafe { llvm_f64x2_max(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// Lane-wise minimum value, defined as `b < a ? b : a` +#[inline] +#[cfg_attr(test, assert_instr(f64x2.pmin))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.pmin"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_pmin(a: v128, b: v128) -> v128 { + unsafe { + simd_select::( + simd_lt(b.as_f64x2(), a.as_f64x2()), + b.as_f64x2(), + a.as_f64x2(), + ) + .v128() + } +} + +/// Lane-wise maximum value, defined as `a < b ? b : a` +#[inline] +#[cfg_attr(test, assert_instr(f64x2.pmax))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.pmax"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_pmax(a: v128, b: v128) -> v128 { + unsafe { + simd_select::( + simd_lt(a.as_f64x2(), b.as_f64x2()), + b.as_f64x2(), + a.as_f64x2(), + ) + .v128() + } +} + +/// Converts a 128-bit vector interpreted as four 32-bit floating point numbers +/// into a 128-bit vector of four 32-bit signed integers. +/// +/// NaN is converted to 0 and if it's out of bounds it becomes the nearest +/// representable intger. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.trunc_sat_f32x4_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.trunc_sat_f32x4_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_trunc_sat_f32x4(a: v128) -> v128 { + unsafe { simd_as::(a.as_f32x4()).v128() } +} + +/// Converts a 128-bit vector interpreted as four 32-bit floating point numbers +/// into a 128-bit vector of four 32-bit unsigned integers. +/// +/// NaN is converted to 0 and if it's out of bounds it becomes the nearest +/// representable intger. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.trunc_sat_f32x4_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.trunc_sat_f32x4_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_trunc_sat_f32x4(a: v128) -> v128 { + unsafe { simd_as::(a.as_f32x4()).v128() } +} + +/// Converts a 128-bit vector interpreted as four 32-bit signed integers into a +/// 128-bit vector of four 32-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.convert_i32x4_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.convert_i32x4_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_convert_i32x4(a: v128) -> v128 { + unsafe { simd_cast::<_, simd::f32x4>(a.as_i32x4()).v128() } +} + +/// Converts a 128-bit vector interpreted as four 32-bit unsigned integers into a +/// 128-bit vector of four 32-bit floating point numbers. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.convert_i32x4_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.convert_i32x4_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_convert_u32x4(a: v128) -> v128 { + unsafe { simd_cast::<_, simd::f32x4>(a.as_u32x4()).v128() } +} + +/// Saturating conversion of the two double-precision floating point lanes to +/// two lower integer lanes using the IEEE `convertToIntegerTowardZero` +/// function. +/// +/// The two higher lanes of the result are initialized to zero. If any input +/// lane is a NaN, the resulting lane is 0. If the rounded integer value of a +/// lane is outside the range of the destination type, the result is saturated +/// to the nearest representable integer value. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_s_zero))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.trunc_sat_f64x2_s_zero"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 { + let ret: simd::i32x4 = unsafe { + simd_shuffle!( + simd_as::(a.as_f64x2()), + simd::i32x2::ZERO, + [0, 1, 2, 3], + ) + }; + ret.v128() +} + +/// Saturating conversion of the two double-precision floating point lanes to +/// two lower integer lanes using the IEEE `convertToIntegerTowardZero` +/// function. +/// +/// The two higher lanes of the result are initialized to zero. If any input +/// lane is a NaN, the resulting lane is 0. If the rounded integer value of a +/// lane is outside the range of the destination type, the result is saturated +/// to the nearest representable integer value. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_u_zero))] +#[target_feature(enable = "simd128")] +#[doc(alias("i32x4.trunc_sat_f64x2_u_zero"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 { + let ret: simd::u32x4 = unsafe { + simd_shuffle!( + simd_as::(a.as_f64x2()), + simd::u32x2::ZERO, + [0, 1, 2, 3], + ) + }; + ret.v128() +} + +/// Lane-wise conversion from integer to floating point. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_s))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.convert_low_i32x4_s"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_convert_low_i32x4(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!(a.as_i32x4(), a.as_i32x4(), [0, 1],)) + .v128() + } +} + +/// Lane-wise conversion from integer to floating point. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_u))] +#[target_feature(enable = "simd128")] +#[doc(alias("f64x2.convert_low_i32x4_u"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_convert_low_u32x4(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!(a.as_u32x4(), a.as_u32x4(), [0, 1],)) + .v128() + } +} + +/// Conversion of the two double-precision floating point lanes to two lower +/// single-precision lanes of the result. The two higher lanes of the result are +/// initialized to zero. If the conversion result is not representable as a +/// single-precision floating point number, it is rounded to the nearest-even +/// representable number. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.demote_f64x2_zero))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.demote_f64x2_zero"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f32x4_demote_f64x2_zero(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!( + a.as_f64x2(), + simd::f64x2::ZERO, + [0, 1, 2, 3] + )) + .v128() + } +} + +/// Conversion of the two lower single-precision floating point lanes to the two +/// double-precision lanes of the result. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.promote_low_f32x4))] +#[target_feature(enable = "simd128")] +#[doc(alias("f32x4.promote_low_f32x4"))] +#[stable(feature = "wasm_simd", since = "1.54.0")] +pub fn f64x2_promote_low_f32x4(a: v128) -> v128 { + unsafe { + simd_cast::(simd_shuffle!(a.as_f32x4(), a.as_f32x4(), [0, 1])) + .v128() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use core::ops::{Add, Div, Mul, Neg, Sub}; + + use std::fmt::Debug; + use std::mem::transmute; + use std::num::Wrapping; + use std::prelude::v1::*; + + const _C1: v128 = i8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const _C2: v128 = u8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const _C3: v128 = i16x8(0, 1, 2, 3, 4, 5, 6, 7); + const _C4: v128 = u16x8(0, 1, 2, 3, 4, 5, 6, 7); + const _C5: v128 = i32x4(0, 1, 2, 3); + const _C6: v128 = u32x4(0, 1, 2, 3); + const _C7: v128 = i64x2(0, 1); + const _C8: v128 = u64x2(0, 1); + const _C9: v128 = f32x4(0.0, 1.0, 2.0, 3.0); + const _C10: v128 = f64x2(0.0, 1.0); + + fn compare_bytes(a: v128, b: v128) { + let a: [u8; 16] = unsafe { transmute(a) }; + let b: [u8; 16] = unsafe { transmute(b) }; + assert_eq!(a, b); + } + + #[test] + fn test_load() { + unsafe { + let arr: [i32; 4] = [0, 1, 2, 3]; + let vec = v128_load(arr.as_ptr() as *const v128); + compare_bytes(vec, i32x4(0, 1, 2, 3)); + } + } + + #[test] + fn test_load_extend() { + unsafe { + let arr: [i8; 8] = [-3, -2, -1, 0, 1, 2, 3, 4]; + let vec = i16x8_load_extend_i8x8(arr.as_ptr()); + compare_bytes(vec, i16x8(-3, -2, -1, 0, 1, 2, 3, 4)); + let vec = i16x8_load_extend_u8x8(arr.as_ptr() as *const u8); + compare_bytes(vec, i16x8(253, 254, 255, 0, 1, 2, 3, 4)); + + let arr: [i16; 4] = [-1, 0, 1, 2]; + let vec = i32x4_load_extend_i16x4(arr.as_ptr()); + compare_bytes(vec, i32x4(-1, 0, 1, 2)); + let vec = i32x4_load_extend_u16x4(arr.as_ptr() as *const u16); + compare_bytes(vec, i32x4(65535, 0, 1, 2)); + + let arr: [i32; 2] = [-1, 1]; + let vec = i64x2_load_extend_i32x2(arr.as_ptr()); + compare_bytes(vec, i64x2(-1, 1)); + let vec = i64x2_load_extend_u32x2(arr.as_ptr() as *const u32); + compare_bytes(vec, i64x2(u32::max_value().into(), 1)); + } + } + + #[test] + fn test_load_splat() { + unsafe { + compare_bytes(v128_load8_splat(&8), i8x16_splat(8)); + compare_bytes(v128_load16_splat(&9), i16x8_splat(9)); + compare_bytes(v128_load32_splat(&10), i32x4_splat(10)); + compare_bytes(v128_load64_splat(&11), i64x2_splat(11)); + } + } + + #[test] + fn test_load_zero() { + unsafe { + compare_bytes(v128_load32_zero(&10), i32x4(10, 0, 0, 0)); + compare_bytes(v128_load64_zero(&11), i64x2(11, 0)); + } + } + + #[test] + fn test_store() { + unsafe { + let mut spot = i8x16_splat(0); + v128_store(&mut spot, i8x16_splat(1)); + compare_bytes(spot, i8x16_splat(1)); + } + } + + #[test] + fn test_load_lane() { + unsafe { + let zero = i8x16_splat(0); + compare_bytes( + v128_load8_lane::<2>(zero, &1), + i8x16_replace_lane::<2>(zero, 1), + ); + + compare_bytes( + v128_load16_lane::<2>(zero, &1), + i16x8_replace_lane::<2>(zero, 1), + ); + + compare_bytes( + v128_load32_lane::<2>(zero, &1), + i32x4_replace_lane::<2>(zero, 1), + ); + + compare_bytes( + v128_load64_lane::<1>(zero, &1), + i64x2_replace_lane::<1>(zero, 1), + ); + } + } + + #[test] + fn test_store_lane() { + unsafe { + let mut spot = 0; + let zero = i8x16_splat(0); + v128_store8_lane::<5>(i8x16_replace_lane::<5>(zero, 7), &mut spot); + assert_eq!(spot, 7); + + let mut spot = 0; + v128_store16_lane::<5>(i16x8_replace_lane::<5>(zero, 7), &mut spot); + assert_eq!(spot, 7); + + let mut spot = 0; + v128_store32_lane::<3>(i32x4_replace_lane::<3>(zero, 7), &mut spot); + assert_eq!(spot, 7); + + let mut spot = 0; + v128_store64_lane::<0>(i64x2_replace_lane::<0>(zero, 7), &mut spot); + assert_eq!(spot, 7); + } + } + + #[test] + fn test_i8x16() { + const A: v128 = super::i8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + compare_bytes(A, A); + + const _: v128 = i16x8(0, 1, 2, 3, 4, 5, 6, 7); + const _: v128 = i32x4(0, 1, 2, 3); + const _: v128 = i64x2(0, 1); + const _: v128 = f32x4(0., 1., 2., 3.); + const _: v128 = f64x2(0., 1.); + + let bytes: [i16; 8] = unsafe { mem::transmute(i16x8(-1, -2, -3, -4, -5, -6, -7, -8)) }; + assert_eq!(bytes, [-1, -2, -3, -4, -5, -6, -7, -8]); + let bytes: [i8; 16] = unsafe { + mem::transmute(i8x16( + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, + )) + }; + assert_eq!( + bytes, + [ + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16 + ] + ); + } + + #[test] + fn test_shuffle() { + let vec_a = i8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let vec_b = i8x16( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + + let vec_r = i8x16_shuffle::<0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30>( + vec_a, vec_b, + ); + let vec_e = i8x16(0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); + compare_bytes(vec_r, vec_e); + + let vec_a = i16x8(0, 1, 2, 3, 4, 5, 6, 7); + let vec_b = i16x8(8, 9, 10, 11, 12, 13, 14, 15); + let vec_r = i16x8_shuffle::<0, 8, 2, 10, 4, 12, 6, 14>(vec_a, vec_b); + let vec_e = i16x8(0, 8, 2, 10, 4, 12, 6, 14); + compare_bytes(vec_r, vec_e); + + let vec_a = i32x4(0, 1, 2, 3); + let vec_b = i32x4(4, 5, 6, 7); + let vec_r = i32x4_shuffle::<0, 4, 2, 6>(vec_a, vec_b); + let vec_e = i32x4(0, 4, 2, 6); + compare_bytes(vec_r, vec_e); + + let vec_a = i64x2(0, 1); + let vec_b = i64x2(2, 3); + let vec_r = i64x2_shuffle::<0, 2>(vec_a, vec_b); + let vec_e = i64x2(0, 2); + compare_bytes(vec_r, vec_e); + } + + // tests extract and replace lanes + macro_rules! test_extract { + ( + name: $test_id:ident, + extract: $extract:ident, + replace: $replace:ident, + elem: $elem:ty, + count: $count:expr, + indices: [$($idx:expr),*], + ) => { + #[test] + fn $test_id() { + unsafe { + let arr: [$elem; $count] = [123 as $elem; $count]; + let vec: v128 = transmute(arr); + $( + assert_eq!($extract::<$idx>(vec), 123 as $elem); + )* + + // create a vector from array and check that the indices contain + // the same values as in the array: + let arr: [$elem; $count] = [$($idx as $elem),*]; + let vec: v128 = transmute(arr); + $( + assert_eq!($extract::<$idx>(vec), $idx as $elem); + + let tmp = $replace::<$idx>(vec, 124 as $elem); + assert_eq!($extract::<$idx>(tmp), 124 as $elem); + )* + } + } + } + } + + test_extract! { + name: test_i8x16_extract_replace, + extract: i8x16_extract_lane, + replace: i8x16_replace_lane, + elem: i8, + count: 16, + indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + } + test_extract! { + name: test_i16x8_extract_replace, + extract: i16x8_extract_lane, + replace: i16x8_replace_lane, + elem: i16, + count: 8, + indices: [0, 1, 2, 3, 4, 5, 6, 7], + } + test_extract! { + name: test_i32x4_extract_replace, + extract: i32x4_extract_lane, + replace: i32x4_replace_lane, + elem: i32, + count: 4, + indices: [0, 1, 2, 3], + } + test_extract! { + name: test_i64x2_extract_replace, + extract: i64x2_extract_lane, + replace: i64x2_replace_lane, + elem: i64, + count: 2, + indices: [0, 1], + } + test_extract! { + name: test_f32x4_extract_replace, + extract: f32x4_extract_lane, + replace: f32x4_replace_lane, + elem: f32, + count: 4, + indices: [0, 1, 2, 3], + } + test_extract! { + name: test_f64x2_extract_replace, + extract: f64x2_extract_lane, + replace: f64x2_replace_lane, + elem: f64, + count: 2, + indices: [0, 1], + } + + #[test] + #[rustfmt::skip] + fn test_swizzle() { + compare_bytes( + i8x16_swizzle( + i32x4(1, 2, 3, 4), + i8x16( + 32, 31, 30, 29, + 0, 1, 2, 3, + 12, 13, 14, 15, + 0, 4, 8, 12), + ), + i32x4(0, 1, 4, 0x04030201), + ); + } + + macro_rules! test_splat { + ($test_id:ident: $val:expr => $($vals:expr),*) => { + #[test] + fn $test_id() { + let a = super::$test_id($val); + let b = u8x16($($vals as u8),*); + compare_bytes(a, b); + } + } + } + + mod splats { + use super::*; + test_splat!(i8x16_splat: 42 => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42); + test_splat!(i16x8_splat: 42 => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0); + test_splat!(i32x4_splat: 42 => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0); + test_splat!(i64x2_splat: 42 => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0); + test_splat!(f32x4_splat: 42. => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66); + test_splat!(f64x2_splat: 42. => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64); + } + + #[test] + fn test_bitmasks() { + let zero = i8x16_splat(0); + let ones = i8x16_splat(!0); + + assert_eq!(i8x16_bitmask(zero), 0); + assert_eq!(i8x16_bitmask(ones), 0xffff); + assert_eq!(i8x16_bitmask(i8x16_splat(i8::MAX)), 0); + assert_eq!(i8x16_bitmask(i8x16_splat(i8::MIN)), 0xffff); + assert_eq!(i8x16_bitmask(i8x16_replace_lane::<1>(zero, -1)), 0b10); + + assert_eq!(i16x8_bitmask(zero), 0); + assert_eq!(i16x8_bitmask(ones), 0xff); + assert_eq!(i16x8_bitmask(i16x8_splat(i16::MAX)), 0); + assert_eq!(i16x8_bitmask(i16x8_splat(i16::MIN)), 0xff); + assert_eq!(i16x8_bitmask(i16x8_replace_lane::<1>(zero, -1)), 0b10); + + assert_eq!(i32x4_bitmask(zero), 0); + assert_eq!(i32x4_bitmask(ones), 0b1111); + assert_eq!(i32x4_bitmask(i32x4_splat(i32::MAX)), 0); + assert_eq!(i32x4_bitmask(i32x4_splat(i32::MIN)), 0b1111); + assert_eq!(i32x4_bitmask(i32x4_replace_lane::<1>(zero, -1)), 0b10); + + assert_eq!(i64x2_bitmask(zero), 0); + assert_eq!(i64x2_bitmask(ones), 0b11); + assert_eq!(i64x2_bitmask(i64x2_splat(i64::MAX)), 0); + assert_eq!(i64x2_bitmask(i64x2_splat(i64::MIN)), 0b11); + assert_eq!(i64x2_bitmask(i64x2_replace_lane::<1>(zero, -1)), 0b10); + } + + #[test] + fn test_narrow() { + let zero = i8x16_splat(0); + let ones = i8x16_splat(!0); + + compare_bytes(i8x16_narrow_i16x8(zero, zero), zero); + compare_bytes(u8x16_narrow_i16x8(zero, zero), zero); + compare_bytes(i8x16_narrow_i16x8(ones, ones), ones); + compare_bytes(u8x16_narrow_i16x8(ones, ones), zero); + + compare_bytes( + i8x16_narrow_i16x8( + i16x8( + 0, + 1, + 2, + -1, + i8::MIN.into(), + i8::MAX.into(), + u8::MIN.into(), + u8::MAX.into(), + ), + i16x8( + i16::MIN, + i16::MAX, + u16::MIN as i16, + u16::MAX as i16, + 0, + 0, + 0, + 0, + ), + ), + i8x16(0, 1, 2, -1, -128, 127, 0, 127, -128, 127, 0, -1, 0, 0, 0, 0), + ); + + compare_bytes( + u8x16_narrow_i16x8( + i16x8( + 0, + 1, + 2, + -1, + i8::MIN.into(), + i8::MAX.into(), + u8::MIN.into(), + u8::MAX.into(), + ), + i16x8( + i16::MIN, + i16::MAX, + u16::MIN as i16, + u16::MAX as i16, + 0, + 0, + 0, + 0, + ), + ), + i8x16(0, 1, 2, 0, 0, 127, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0), + ); + + compare_bytes(i16x8_narrow_i32x4(zero, zero), zero); + compare_bytes(u16x8_narrow_i32x4(zero, zero), zero); + compare_bytes(i16x8_narrow_i32x4(ones, ones), ones); + compare_bytes(u16x8_narrow_i32x4(ones, ones), zero); + + compare_bytes( + i16x8_narrow_i32x4( + i32x4(0, -1, i16::MIN.into(), i16::MAX.into()), + i32x4(i32::MIN, i32::MAX, u32::MIN as i32, u32::MAX as i32), + ), + i16x8(0, -1, i16::MIN, i16::MAX, i16::MIN, i16::MAX, 0, -1), + ); + + compare_bytes( + u16x8_narrow_i32x4( + i32x4(u16::MAX.into(), -1, i16::MIN.into(), i16::MAX.into()), + i32x4(i32::MIN, i32::MAX, u32::MIN as i32, u32::MAX as i32), + ), + i16x8(-1, 0, 0, i16::MAX, 0, -1, 0, 0), + ); + } + + #[test] + fn test_extend() { + let zero = i8x16_splat(0); + let ones = i8x16_splat(!0); + + compare_bytes(i16x8_extend_low_i8x16(zero), zero); + compare_bytes(i16x8_extend_high_i8x16(zero), zero); + compare_bytes(i16x8_extend_low_u8x16(zero), zero); + compare_bytes(i16x8_extend_high_u8x16(zero), zero); + compare_bytes(i16x8_extend_low_i8x16(ones), ones); + compare_bytes(i16x8_extend_high_i8x16(ones), ones); + let halves = u16x8_splat(u8::MAX.into()); + compare_bytes(i16x8_extend_low_u8x16(ones), halves); + compare_bytes(i16x8_extend_high_u8x16(ones), halves); + + compare_bytes(i32x4_extend_low_i16x8(zero), zero); + compare_bytes(i32x4_extend_high_i16x8(zero), zero); + compare_bytes(i32x4_extend_low_u16x8(zero), zero); + compare_bytes(i32x4_extend_high_u16x8(zero), zero); + compare_bytes(i32x4_extend_low_i16x8(ones), ones); + compare_bytes(i32x4_extend_high_i16x8(ones), ones); + let halves = u32x4_splat(u16::MAX.into()); + compare_bytes(i32x4_extend_low_u16x8(ones), halves); + compare_bytes(i32x4_extend_high_u16x8(ones), halves); + + compare_bytes(i64x2_extend_low_i32x4(zero), zero); + compare_bytes(i64x2_extend_high_i32x4(zero), zero); + compare_bytes(i64x2_extend_low_u32x4(zero), zero); + compare_bytes(i64x2_extend_high_u32x4(zero), zero); + compare_bytes(i64x2_extend_low_i32x4(ones), ones); + compare_bytes(i64x2_extend_high_i32x4(ones), ones); + let halves = i64x2_splat(u32::MAX.into()); + compare_bytes(u64x2_extend_low_u32x4(ones), halves); + compare_bytes(u64x2_extend_high_u32x4(ones), halves); + } + + #[test] + fn test_dot() { + let zero = i8x16_splat(0); + let ones = i8x16_splat(!0); + let two = i32x4_splat(2); + compare_bytes(i32x4_dot_i16x8(zero, zero), zero); + compare_bytes(i32x4_dot_i16x8(ones, ones), two); + } + + macro_rules! test_binop { + ( + $($name:ident => { + $([$($vec1:tt)*] ($op:ident | $f:ident) [$($vec2:tt)*],)* + })* + ) => ($( + #[test] + fn $name() { + unsafe { + $( + let v1 = [$($vec1)*]; + let v2 = [$($vec2)*]; + let v1_v128: v128 = mem::transmute(v1); + let v2_v128: v128 = mem::transmute(v2); + let v3_v128 = super::$f(v1_v128, v2_v128); + let mut v3 = [$($vec1)*]; + let _ignore = v3; + v3 = mem::transmute(v3_v128); + + for (i, actual) in v3.iter().enumerate() { + let expected = v1[i].$op(v2[i]); + assert_eq!(*actual, expected); + } + )* + } + } + )*) + } + + macro_rules! test_unop { + ( + $($name:ident => { + $(($op:ident | $f:ident) [$($vec1:tt)*],)* + })* + ) => ($( + #[test] + fn $name() { + unsafe { + $( + let v1 = [$($vec1)*]; + let v1_v128: v128 = mem::transmute(v1); + let v2_v128 = super::$f(v1_v128); + let mut v2 = [$($vec1)*]; + let _ignore = v2; + v2 = mem::transmute(v2_v128); + + for (i, actual) in v2.iter().enumerate() { + let expected = v1[i].$op(); + assert_eq!(*actual, expected); + } + )* + } + } + )*) + } + + trait Avgr: Sized { + fn avgr(self, other: Self) -> Self; + } + + macro_rules! impl_avgr { + ($($i:ident)*) => ($(impl Avgr for $i { + fn avgr(self, other: Self) -> Self { + ((self as u64 + other as u64 + 1) / 2) as $i + } + })*) + } + + impl_avgr!(u8 u16); + + test_binop! { + test_i8x16_add => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (wrapping_add | i8x16_add) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (wrapping_add | i8x16_add) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (wrapping_add | i8x16_add) + [127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 9, -24], + } + + test_i8x16_add_sat_s => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (saturating_add | i8x16_add_sat) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_add | i8x16_add_sat) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_add | i8x16_add_sat) + [127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 9, -24], + } + + test_i8x16_add_sat_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (saturating_add | u8x16_add_sat) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_add | u8x16_add_sat) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_add | u8x16_add_sat) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + + test_i8x16_sub => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (wrapping_sub | i8x16_sub) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (wrapping_sub | i8x16_sub) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (wrapping_sub | i8x16_sub) + [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8], + } + + test_i8x16_sub_sat_s => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (saturating_sub | i8x16_sub_sat) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_sub | i8x16_sub_sat) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_sub | i8x16_sub_sat) + [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8], + } + + test_i8x16_sub_sat_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (saturating_sub | u8x16_sub_sat) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_sub | u8x16_sub_sat) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_sub | u8x16_sub_sat) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + + test_i8x16_min_s => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (min | i8x16_min) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (min | i8x16_min) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (min | i8x16_min) + [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8], + } + + test_i8x16_min_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (min | u8x16_min) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (min | u8x16_min) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (min | u8x16_min) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + + test_i8x16_max_s => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (max | i8x16_max) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (max | i8x16_max) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (max | i8x16_max) + [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8], + } + + test_i8x16_max_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (max | u8x16_max) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (max | u8x16_max) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (max | u8x16_max) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + + test_i8x16_avgr_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (avgr | u8x16_avgr) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (avgr | u8x16_avgr) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (avgr | u8x16_avgr) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + + test_i16x8_add => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (wrapping_add | i16x8_add) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (wrapping_add | i16x8_add) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_add_sat_s => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (saturating_add | i16x8_add_sat) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (saturating_add | i16x8_add_sat) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_add_sat_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (saturating_add | u16x8_add_sat) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (saturating_add | u16x8_add_sat) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + + test_i16x8_sub => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (wrapping_sub | i16x8_sub) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (wrapping_sub | i16x8_sub) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_sub_sat_s => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (saturating_sub | i16x8_sub_sat) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (saturating_sub | i16x8_sub_sat) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_sub_sat_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (saturating_sub | u16x8_sub_sat) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (saturating_sub | u16x8_sub_sat) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + + test_i16x8_mul => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (wrapping_mul | i16x8_mul) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (wrapping_mul | i16x8_mul) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_min_s => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (min | i16x8_min) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (min | i16x8_min) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_min_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (min | u16x8_min) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (min | u16x8_min) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + + test_i16x8_max_s => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (max | i16x8_max) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (max | i16x8_max) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_max_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (max | u16x8_max) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (max | u16x8_max) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + + test_i16x8_avgr_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (avgr | u16x8_avgr) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (avgr | u16x8_avgr) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + + test_i32x4_add => { + [0i32, 0, 0, 0] (wrapping_add | i32x4_add) [1, 2, 3, 4], + [1i32, 1283, i32::MAX, i32::MIN] + (wrapping_add | i32x4_add) + [i32::MAX; 4], + } + + test_i32x4_sub => { + [0i32, 0, 0, 0] (wrapping_sub | i32x4_sub) [1, 2, 3, 4], + [1i32, 1283, i32::MAX, i32::MIN] + (wrapping_sub | i32x4_sub) + [i32::MAX; 4], + } + + test_i32x4_mul => { + [0i32, 0, 0, 0] (wrapping_mul | i32x4_mul) [1, 2, 3, 4], + [1i32, 1283, i32::MAX, i32::MIN] + (wrapping_mul | i32x4_mul) + [i32::MAX; 4], + } + + test_i32x4_min_s => { + [0i32, 0, 0, 0] (min | i32x4_min) [1, 2, 3, 4], + [1i32, 1283, i32::MAX, i32::MIN] + (min | i32x4_min) + [i32::MAX; 4], + } + + test_i32x4_min_u => { + [0u32, 0, 0, 0] (min | u32x4_min) [1, 2, 3, 4], + [1u32, 1283, i32::MAX as u32, i32::MIN as u32] + (min | u32x4_min) + [i32::MAX as u32; 4], + } + + test_i32x4_max_s => { + [0i32, 0, 0, 0] (max | i32x4_max) [1, 2, 3, 4], + [1i32, 1283, i32::MAX, i32::MIN] + (max | i32x4_max) + [i32::MAX; 4], + } + + test_i32x4_max_u => { + [0u32, 0, 0, 0] (max | u32x4_max) [1, 2, 3, 4], + [1u32, 1283, i32::MAX as u32, i32::MIN as u32] + (max | u32x4_max) + [i32::MAX as u32; 4], + } + + test_i64x2_add => { + [0i64, 0] (wrapping_add | i64x2_add) [1, 2], + [i64::MIN, i64::MAX] (wrapping_add | i64x2_add) [i64::MAX, i64::MIN], + [i64::MAX; 2] (wrapping_add | i64x2_add) [i64::MAX; 2], + [-4i64, -4] (wrapping_add | i64x2_add) [800, 939], + } + + test_i64x2_sub => { + [0i64, 0] (wrapping_sub | i64x2_sub) [1, 2], + [i64::MIN, i64::MAX] (wrapping_sub | i64x2_sub) [i64::MAX, i64::MIN], + [i64::MAX; 2] (wrapping_sub | i64x2_sub) [i64::MAX; 2], + [-4i64, -4] (wrapping_sub | i64x2_sub) [800, 939], + } + + test_i64x2_mul => { + [0i64, 0] (wrapping_mul | i64x2_mul) [1, 2], + [i64::MIN, i64::MAX] (wrapping_mul | i64x2_mul) [i64::MAX, i64::MIN], + [i64::MAX; 2] (wrapping_mul | i64x2_mul) [i64::MAX; 2], + [-4i64, -4] (wrapping_mul | i64x2_mul) [800, 939], + } + + test_f32x4_add => { + [-1.0f32, 2.0, 3.0, 4.0] (add | f32x4_add) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (add | f32x4_add) + [1., 2., 0., 0.], + } + + test_f32x4_sub => { + [-1.0f32, 2.0, 3.0, 4.0] (sub | f32x4_sub) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (sub | f32x4_sub) + [1., 2., 0., 0.], + } + + test_f32x4_mul => { + [-1.0f32, 2.0, 3.0, 4.0] (mul | f32x4_mul) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (mul | f32x4_mul) + [1., 2., 1., 0.], + } + + test_f32x4_div => { + [-1.0f32, 2.0, 3.0, 4.0] (div | f32x4_div) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (div | f32x4_div) + [1., 2., 0., 0.], + } + + test_f32x4_min => { + [-1.0f32, 2.0, 3.0, 4.0] (min | f32x4_min) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (min | f32x4_min) + [1., 2., 0., 0.], + } + + test_f32x4_max => { + [-1.0f32, 2.0, 3.0, 4.0] (max | f32x4_max) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (max | f32x4_max) + [1., 2., 0., 0.], + } + + test_f32x4_pmin => { + [-1.0f32, 2.0, 3.0, 4.0] (min | f32x4_pmin) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (min | f32x4_pmin) + [1., 2., 0., 0.], + } + + test_f32x4_pmax => { + [-1.0f32, 2.0, 3.0, 4.0] (max | f32x4_pmax) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (max | f32x4_pmax) + [1., 2., 0., 0.], + } + + test_f64x2_add => { + [-1.0f64, 2.0] (add | f64x2_add) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (add | f64x2_add) [1., 2.], + } + + test_f64x2_sub => { + [-1.0f64, 2.0] (sub | f64x2_sub) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (sub | f64x2_sub) [1., 2.], + } + + test_f64x2_mul => { + [-1.0f64, 2.0] (mul | f64x2_mul) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (mul | f64x2_mul) [1., 2.], + } + + test_f64x2_div => { + [-1.0f64, 2.0] (div | f64x2_div) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (div | f64x2_div) [1., 2.], + } + + test_f64x2_min => { + [-1.0f64, 2.0] (min | f64x2_min) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (min | f64x2_min) [1., 2.], + } + + test_f64x2_max => { + [-1.0f64, 2.0] (max | f64x2_max) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (max | f64x2_max) [1., 2.], + } + + test_f64x2_pmin => { + [-1.0f64, 2.0] (min | f64x2_pmin) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (min | f64x2_pmin) [1., 2.], + } + + test_f64x2_pmax => { + [-1.0f64, 2.0] (max | f64x2_pmax) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (max | f64x2_pmax) [1., 2.], + } + } + + test_unop! { + test_i8x16_abs => { + (wrapping_abs | i8x16_abs) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + (wrapping_abs | i8x16_abs) + [-2i8, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + (wrapping_abs | i8x16_abs) + [-127i8, -44, 43, 126, 4, -128, 127, -59, -43, 39, -69, 79, -3, 35, 83, 13], + } + + test_i8x16_neg => { + (wrapping_neg | i8x16_neg) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + (wrapping_neg | i8x16_neg) + [-2i8, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + (wrapping_neg | i8x16_neg) + [-127i8, -44, 43, 126, 4, -128, 127, -59, -43, 39, -69, 79, -3, 35, 83, 13], + } + + test_i16x8_abs => { + (wrapping_abs | i16x8_abs) [1i16, 1, 1, 1, 1, 1, 1, 1], + (wrapping_abs | i16x8_abs) [2i16, 0x7fff, !0, 4, 42, -5, 33, -4847], + } + + test_i16x8_neg => { + (wrapping_neg | i16x8_neg) [1i16, 1, 1, 1, 1, 1, 1, 1], + (wrapping_neg | i16x8_neg) [2i16, 0x7fff, !0, 4, 42, -5, 33, -4847], + } + + test_i32x4_abs => { + (wrapping_abs | i32x4_abs) [1i32, 2, 3, 4], + (wrapping_abs | i32x4_abs) [i32::MIN, i32::MAX, 0, 4], + } + + test_i32x4_neg => { + (wrapping_neg | i32x4_neg) [1i32, 2, 3, 4], + (wrapping_neg | i32x4_neg) [i32::MIN, i32::MAX, 0, 4], + } + + test_i64x2_abs => { + (wrapping_abs | i64x2_abs) [1i64, 2], + (wrapping_abs | i64x2_abs) [i64::MIN, i64::MAX], + } + + test_i64x2_neg => { + (wrapping_neg | i64x2_neg) [1i64, 2], + (wrapping_neg | i64x2_neg) [i64::MIN, i64::MAX], + } + + test_f32x4_ceil => { + (ceil | f32x4_ceil) [1.0f32, 2., 2.5, 3.3], + (ceil | f32x4_ceil) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_floor => { + (floor | f32x4_floor) [1.0f32, 2., 2.5, 3.3], + (floor | f32x4_floor) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_trunc => { + (trunc | f32x4_trunc) [1.0f32, 2., 2.5, 3.3], + (trunc | f32x4_trunc) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_nearest => { + (round | f32x4_nearest) [1.0f32, 2., 2.6, 3.3], + (round | f32x4_nearest) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_abs => { + (abs | f32x4_abs) [1.0f32, 2., 2.6, 3.3], + (abs | f32x4_abs) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_neg => { + (neg | f32x4_neg) [1.0f32, 2., 2.6, 3.3], + (neg | f32x4_neg) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_sqrt => { + (sqrt | f32x4_sqrt) [1.0f32, 2., 2.6, 3.3], + (sqrt | f32x4_sqrt) [0.0, 0.3, f32::INFINITY, 0.1], + } + + test_f64x2_ceil => { + (ceil | f64x2_ceil) [1.0f64, 2.3], + (ceil | f64x2_ceil) [f64::INFINITY, -0.1], + } + + test_f64x2_floor => { + (floor | f64x2_floor) [1.0f64, 2.3], + (floor | f64x2_floor) [f64::INFINITY, -0.1], + } + + test_f64x2_trunc => { + (trunc | f64x2_trunc) [1.0f64, 2.3], + (trunc | f64x2_trunc) [f64::INFINITY, -0.1], + } + + test_f64x2_nearest => { + (round | f64x2_nearest) [1.0f64, 2.3], + (round | f64x2_nearest) [f64::INFINITY, -0.1], + } + + test_f64x2_abs => { + (abs | f64x2_abs) [1.0f64, 2.3], + (abs | f64x2_abs) [f64::INFINITY, -0.1], + } + + test_f64x2_neg => { + (neg | f64x2_neg) [1.0f64, 2.3], + (neg | f64x2_neg) [f64::INFINITY, -0.1], + } + + test_f64x2_sqrt => { + (sqrt | f64x2_sqrt) [1.0f64, 2.3], + (sqrt | f64x2_sqrt) [f64::INFINITY, 0.1], + } + } + + macro_rules! floating_point { + (f32) => { + true + }; + (f64) => { + true + }; + ($id:ident) => { + false + }; + } + + trait IsNan: Sized { + fn is_nan(self) -> bool { + false + } + } + impl IsNan for i8 {} + impl IsNan for i16 {} + impl IsNan for i32 {} + impl IsNan for i64 {} + + macro_rules! test_bop { + ($id:ident[$ety:ident; $ecount:expr] | + $binary_op:ident [$op_test_id:ident] : + ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { + test_bop!( + $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]: + ([$($in_a),*], [$($in_b),*]) => [$($out),*] + ); + + }; + ($id:ident[$ety:ident; $ecount:expr] => $oty:ident | + $binary_op:ident [$op_test_id:ident] : + ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { + #[test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let b_input: [$ety; $ecount] = [$($in_b),*]; + let output: [$oty; $ecount] = [$($out),*]; + + let a_vec_in: v128 = transmute(a_input); + let b_vec_in: v128 = transmute(b_input); + let vec_res: v128 = $binary_op(a_vec_in, b_vec_in); + + let res: [$oty; $ecount] = transmute(vec_res); + + if !floating_point!($ety) { + assert_eq!(res, output); + } else { + for i in 0..$ecount { + let r = res[i]; + let o = output[i]; + assert_eq!(r.is_nan(), o.is_nan()); + if !r.is_nan() { + assert_eq!(r, o); + } + } + } + } + } + } + } + + macro_rules! test_bops { + ($id:ident[$ety:ident; $ecount:expr] | + $binary_op:ident [$op_test_id:ident]: + ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => { + #[test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let output: [$ety; $ecount] = [$($out),*]; + + let a_vec_in: v128 = transmute(a_input); + let vec_res: v128 = $binary_op(a_vec_in, $in_b); + + let res: [$ety; $ecount] = transmute(vec_res); + assert_eq!(res, output); + } + } + } + } + + macro_rules! test_uop { + ($id:ident[$ety:ident; $ecount:expr] | + $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => { + #[test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let output: [$ety; $ecount] = [$($out),*]; + + let a_vec_in: v128 = transmute(a_input); + let vec_res: v128 = $unary_op(a_vec_in); + + let res: [$ety; $ecount] = transmute(vec_res); + assert_eq!(res, output); + } + } + } + } + + test_bops!(i8x16[i8; 16] | i8x16_shl[i8x16_shl_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]); + test_bops!(i16x8[i16; 8] | i16x8_shl[i16x8_shl_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) => + [0, -2, 4, 6, 8, 10, 12, -2]); + test_bops!(i32x4[i32; 4] | i32x4_shl[i32x4_shl_test]: + ([0, -1, 2, 3], 1) => [0, -2, 4, 6]); + test_bops!(i64x2[i64; 2] | i64x2_shl[i64x2_shl_test]: + ([0, -1], 1) => [0, -2]); + + test_bops!(i8x16[i8; 16] | i8x16_shr[i8x16_shr_s_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); + test_bops!(i16x8[i16; 8] | i16x8_shr[i16x8_shr_s_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) => + [0, -1, 1, 1, 2, 2, 3, i16::MAX / 2]); + test_bops!(i32x4[i32; 4] | i32x4_shr[i32x4_shr_s_test]: + ([0, -1, 2, 3], 1) => [0, -1, 1, 1]); + test_bops!(i64x2[i64; 2] | i64x2_shr[i64x2_shr_s_test]: + ([0, -1], 1) => [0, -1]); + + test_bops!(i8x16[i8; 16] | u8x16_shr[i8x16_uhr_u_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, i8::MAX, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); + test_bops!(i16x8[i16; 8] | u16x8_shr[i16x8_uhr_u_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) => + [0, i16::MAX, 1, 1, 2, 2, 3, i16::MAX / 2]); + test_bops!(i32x4[i32; 4] | u32x4_shr[i32x4_uhr_u_test]: + ([0, -1, 2, 3], 1) => [0, i32::MAX, 1, 1]); + test_bops!(i64x2[i64; 2] | u64x2_shr[i64x2_uhr_u_test]: + ([0, -1], 1) => [0, i64::MAX]); + + #[test] + fn v128_bitwise_logical_ops() { + unsafe { + let a: [u32; 4] = [u32::MAX, 0, u32::MAX, 0]; + let b: [u32; 4] = [u32::MAX; 4]; + let c: [u32; 4] = [0; 4]; + + let vec_a: v128 = transmute(a); + let vec_b: v128 = transmute(b); + let vec_c: v128 = transmute(c); + + let r: v128 = v128_and(vec_a, vec_a); + compare_bytes(r, vec_a); + let r: v128 = v128_and(vec_a, vec_b); + compare_bytes(r, vec_a); + let r: v128 = v128_andnot(vec_a, vec_b); + compare_bytes(r, vec_c); + let r: v128 = v128_andnot(vec_a, vec_a); + compare_bytes(r, vec_c); + let r: v128 = v128_andnot(vec_a, vec_c); + compare_bytes(r, vec_a); + let r: v128 = v128_or(vec_a, vec_b); + compare_bytes(r, vec_b); + let r: v128 = v128_not(vec_b); + compare_bytes(r, vec_c); + let r: v128 = v128_xor(vec_a, vec_c); + compare_bytes(r, vec_a); + + let r: v128 = v128_bitselect(vec_b, vec_c, vec_b); + compare_bytes(r, vec_b); + let r: v128 = v128_bitselect(vec_b, vec_c, vec_c); + compare_bytes(r, vec_c); + let r: v128 = v128_bitselect(vec_b, vec_c, vec_a); + compare_bytes(r, vec_a); + } + } + + macro_rules! test_bool_red { + ([$test_id:ident, $any:ident, $all:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => { + #[test] + fn $test_id() { + unsafe { + let vec_a: v128 = transmute([$($true),*]); // true + let vec_b: v128 = transmute([$($false),*]); // false + let vec_c: v128 = transmute([$($alt),*]); // alternating + + // TODO + // assert_eq!($any(vec_a), true); + // assert_eq!($any(vec_b), false); + // assert_eq!($any(vec_c), true); + + assert_eq!($all(vec_a), true); + assert_eq!($all(vec_b), false); + assert_eq!($all(vec_c), false); + } + } + } + } + + test_bool_red!( + [i8x16_boolean_reductions, v128_any_true, i8x16_all_true] + | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + ); + test_bool_red!( + [i16x8_boolean_reductions, v128_any_true, i16x8_all_true] + | [1_i16, 1, 1, 1, 1, 1, 1, 1] + | [0_i16, 0, 0, 0, 0, 0, 0, 0] + | [1_i16, 0, 1, 0, 1, 0, 1, 0] + ); + test_bool_red!( + [i32x4_boolean_reductions, v128_any_true, i32x4_all_true] + | [1_i32, 1, 1, 1] + | [0_i32, 0, 0, 0] + | [1_i32, 0, 1, 0] + ); + test_bool_red!( + [i64x2_boolean_reductions, v128_any_true, i64x2_all_true] + | [1_i64, 1] + | [0_i64, 0] + | [1_i64, 0] + ); + + test_bop!(i8x16[i8; 16] | i8x16_eq[i8x16_eq_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i16x8[i16; 8] | i16x8_eq[i16x8_eq_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | i32x4_eq[i32x4_eq_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); + test_bop!(i64x2[i64; 2] | i64x2_eq[i64x2_eq_test]: + ([0, 1], [0, 2]) => [-1, 0]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_eq[f32x4_eq_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]); + + test_bop!(i8x16[i8; 16] | i8x16_ne[i8x16_ne_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i16x8[i16; 8] | i16x8_ne[i16x8_ne_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | i32x4_ne[i32x4_ne_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); + test_bop!(i64x2[i64; 2] | i64x2_ne[i64x2_ne_test]: + ([0, 1], [0, 2]) => [0, -1]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_ne[f32x4_ne_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]); + + test_bop!(i8x16[i8; 16] | i8x16_lt[i8x16_lt_s_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0]); + test_bop!(i8x16[i8; 16] | u8x16_lt[i8x16_lt_u_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i16x8[i16; 8] | i16x8_lt[i16x8_lt_s_test]: + ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, -1]); + test_bop!(i16x8[i16; 8] | u16x8_lt[i16x8_lt_u_test]: + ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | i32x4_lt[i32x4_lt_s_test]: + ([-1, 1, 2, 3], [0, 2, 2, 4]) => [-1, -1, 0, -1]); + test_bop!(i32x4[i32; 4] | u32x4_lt[i32x4_lt_u_test]: + ([-1, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); + test_bop!(i64x2[i64; 2] | i64x2_lt[i64x2_lt_s_test]: + ([-1, 3], [0, 2]) => [-1, 0]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_lt[f32x4_lt_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]); + + test_bop!(i8x16[i8; 16] | i8x16_gt[i8x16_gt_s_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i8x16[i8; 16] | u8x16_gt[i8x16_gt_u_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, -1]); + test_bop!(i16x8[i16; 8] | i16x8_gt[i16x8_gt_s_test]: + ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i16x8[i16; 8] | u16x8_gt[i16x8_gt_u_test]: + ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, -1]); + test_bop!(i32x4[i32; 4] | i32x4_gt[i32x4_gt_s_test]: + ([0, 2, 2, -4], [0, 1, 2, 3]) => [0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | u32x4_gt[i32x4_gt_u_test]: + ([0, 2, 2, -4], [0, 1, 2, 3]) => [0, -1, 0, -1]); + test_bop!(i64x2[i64; 2] | i64x2_gt[i64x2_gt_s_test]: + ([-1, 2], [0, 1]) => [0, -1]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_gt[f32x4_gt_test]: + ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]); + + test_bop!(i8x16[i8; 16] | i8x16_ge[i8x16_ge_s_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, 0]); + test_bop!(i8x16[i8; 16] | u8x16_ge[i8x16_ge_u_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i16x8[i16; 8] | i16x8_ge[i16x8_ge_s_test]: + ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, 0]); + test_bop!(i16x8[i16; 8] | u16x8_ge[i16x8_ge_u_test]: + ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | i32x4_ge[i32x4_ge_s_test]: + ([0, 1, 2, -3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); + test_bop!(i32x4[i32; 4] | u32x4_ge[i32x4_ge_u_test]: + ([0, 1, 2, -3], [0, 2, 2, 4]) => [-1, 0, -1, -1]); + test_bop!(i64x2[i64; 2] | i64x2_ge[i64x2_ge_s_test]: + ([0, 1], [-1, 2]) => [-1, 0]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_ge[f32x4_ge_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]); + + test_bop!(i8x16[i8; 16] | i8x16_le[i8x16_le_s_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + ) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i8x16[i8; 16] | u8x16_le[i8x16_le_u_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + ) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, 0]); + test_bop!(i16x8[i16; 8] | i16x8_le[i16x8_le_s_test]: + ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i16x8[i16; 8] | u16x8_le[i16x8_le_u_test]: + ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, 0]); + test_bop!(i32x4[i32; 4] | i32x4_le[i32x4_le_s_test]: + ([0, 2, 2, -4], [0, 1, 2, 3]) => [-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | u32x4_le[i32x4_le_u_test]: + ([0, 2, 2, -4], [0, 1, 2, 3]) => [-1, 0, -1, 0]); + test_bop!(i64x2[i64; 2] | i64x2_le[i64x2_le_s_test]: + ([0, 2], [0, 1]) => [-1, 0]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_le[f32x4_le_test]: + ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]); + + test_uop!(f32x4[f32; 4] | f32x4_neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]); + test_uop!(f32x4[f32; 4] | f32x4_abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]); + test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]); + test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test_nan]: + ([0., -1., 7., 8.], [1., -3., -4., f32::NAN]) + => [0., -3., -4., f32::NAN]); + test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]); + test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test_nan]: + ([0., -1., 7., 8.], [1., -3., -4., f32::NAN]) + => [1., -1., 7., f32::NAN]); + test_bop!(f32x4[f32; 4] | f32x4_add[f32x4_add_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]); + test_bop!(f32x4[f32; 4] | f32x4_sub[f32x4_sub_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]); + test_bop!(f32x4[f32; 4] | f32x4_mul[f32x4_mul_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]); + test_bop!(f32x4[f32; 4] | f32x4_div[f32x4_div_test]: + ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]); + + test_uop!(f64x2[f64; 2] | f64x2_neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]); + test_uop!(f64x2[f64; 2] | f64x2_abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]); + test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test]: + ([0., -1.], [1., -3.]) => [0., -3.]); + test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test_nan]: + ([7., 8.], [-4., f64::NAN]) + => [ -4., f64::NAN]); + test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test]: + ([0., -1.], [1., -3.]) => [1., -1.]); + test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test_nan]: + ([7., 8.], [ -4., f64::NAN]) + => [7., f64::NAN]); + test_bop!(f64x2[f64; 2] | f64x2_add[f64x2_add_test]: + ([0., -1.], [1., -3.]) => [1., -4.]); + test_bop!(f64x2[f64; 2] | f64x2_sub[f64x2_sub_test]: + ([0., -1.], [1., -3.]) => [-1., 2.]); + test_bop!(f64x2[f64; 2] | f64x2_mul[f64x2_mul_test]: + ([0., -1.], [1., -3.]) => [0., 3.]); + test_bop!(f64x2[f64; 2] | f64x2_div[f64x2_div_test]: + ([0., -8.], [1., 4.]) => [0., -2.]); + + macro_rules! test_conv { + ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr, $to:expr) => { + #[test] + fn $test_id() { + unsafe { + let from: v128 = transmute($from); + let to: v128 = transmute($to); + + let r: v128 = $conv_id(from); + + compare_bytes(r, to); + } + } + }; + } + + test_conv!( + f32x4_convert_s_i32x4 | f32x4_convert_i32x4 | f32x4 | [1_i32, 2, 3, 4], + [1_f32, 2., 3., 4.] + ); + test_conv!( + f32x4_convert_u_i32x4 | f32x4_convert_u32x4 | f32x4 | [u32::MAX, 2, 3, 4], + [u32::MAX as f32, 2., 3., 4.] + ); + + #[test] + fn test_conversions() { + compare_bytes( + i32x4_trunc_sat_f32x4(f32x4(1., f32::NEG_INFINITY, f32::INFINITY, f32::NAN)), + i32x4(1, i32::MIN, i32::MAX, 0), + ); + compare_bytes( + u32x4_trunc_sat_f32x4(f32x4(1., f32::NEG_INFINITY, f32::INFINITY, f32::NAN)), + u32x4(1, 0, u32::MAX, 0), + ); + compare_bytes(f64x2_convert_low_i32x4(i32x4(1, 2, 3, 4)), f64x2(1., 2.)); + compare_bytes( + f64x2_convert_low_i32x4(i32x4(i32::MIN, i32::MAX, 3, 4)), + f64x2(f64::from(i32::MIN), f64::from(i32::MAX)), + ); + compare_bytes(f64x2_convert_low_u32x4(u32x4(1, 2, 3, 4)), f64x2(1., 2.)); + compare_bytes( + f64x2_convert_low_u32x4(u32x4(u32::MIN, u32::MAX, 3, 4)), + f64x2(f64::from(u32::MIN), f64::from(u32::MAX)), + ); + + compare_bytes( + i32x4_trunc_sat_f64x2_zero(f64x2(1., f64::NEG_INFINITY)), + i32x4(1, i32::MIN, 0, 0), + ); + compare_bytes( + i32x4_trunc_sat_f64x2_zero(f64x2(f64::NAN, f64::INFINITY)), + i32x4(0, i32::MAX, 0, 0), + ); + compare_bytes( + u32x4_trunc_sat_f64x2_zero(f64x2(1., f64::NEG_INFINITY)), + u32x4(1, 0, 0, 0), + ); + compare_bytes( + u32x4_trunc_sat_f64x2_zero(f64x2(f64::NAN, f64::INFINITY)), + u32x4(0, u32::MAX, 0, 0), + ); + } + + #[test] + fn test_popcnt() { + unsafe { + for i in 0..=255 { + compare_bytes( + i8x16_popcnt(u8x16_splat(i)), + u8x16_splat(i.count_ones() as u8), + ) + } + + let vectors = [ + [0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [ + 100, 200, 50, 0, 10, 7, 38, 185, 192, 3, 34, 85, 93, 7, 31, 99, + ], + ]; + + for vector in vectors.iter() { + compare_bytes( + i8x16_popcnt(transmute(*vector)), + i8x16( + vector[0].count_ones() as i8, + vector[1].count_ones() as i8, + vector[2].count_ones() as i8, + vector[3].count_ones() as i8, + vector[4].count_ones() as i8, + vector[5].count_ones() as i8, + vector[6].count_ones() as i8, + vector[7].count_ones() as i8, + vector[8].count_ones() as i8, + vector[9].count_ones() as i8, + vector[10].count_ones() as i8, + vector[11].count_ones() as i8, + vector[12].count_ones() as i8, + vector[13].count_ones() as i8, + vector[14].count_ones() as i8, + vector[15].count_ones() as i8, + ), + ) + } + } + } + + #[test] + fn test_promote_demote() { + let tests = [ + [1., 2.], + [f64::NAN, f64::INFINITY], + [100., 201.], + [0., -0.], + [f64::NEG_INFINITY, 0.], + ]; + + for [a, b] in tests { + compare_bytes( + f32x4_demote_f64x2_zero(f64x2(a, b)), + f32x4(a as f32, b as f32, 0., 0.), + ); + compare_bytes( + f64x2_promote_low_f32x4(f32x4(a as f32, b as f32, 0., 0.)), + f64x2(a, b), + ); + } + } + + #[test] + fn test_extmul() { + macro_rules! test { + ($( + $ctor:ident { + from: $from:ident, + to: $to:ident, + low: $low:ident, + high: $high:ident, + } => { + $(([$($a:tt)*] * [$($b:tt)*]))* + } + )*) => ($( + $(unsafe { + let a: [$from; 16 / mem::size_of::<$from>()] = [$($a)*]; + let b: [$from; 16 / mem::size_of::<$from>()] = [$($b)*]; + let low = mem::transmute::<_, [$to; 16 / mem::size_of::<$to>()]>($low($ctor($($a)*), $ctor($($b)*))); + let high = mem::transmute::<_, [$to; 16 / mem::size_of::<$to>()]>($high($ctor($($a)*), $ctor($($b)*))); + + let half = a.len() / 2; + for i in 0..half { + assert_eq!( + (a[i] as $to).wrapping_mul((b[i] as $to)), + low[i], + "expected {} * {}", a[i] as $to, b[i] as $to, + ); + assert_eq!( + (a[half + i] as $to).wrapping_mul((b[half + i] as $to)), + high[i], + "expected {} * {}", a[half + i] as $to, b[half + i] as $to, + ); + } + })* + )*) + } + test! { + i8x16 { + from: i8, + to: i16, + low: i16x8_extmul_low_i8x16, + high: i16x8_extmul_high_i8x16, + } => { + ( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + * + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + ( + [-1, -2, 3, 100, 124, -38, 33, 87, 92, 108, 22, 8, -43, -128, 22, 0] + * + [-5, -2, 6, 10, 45, -4, 4, -2, 0, 88, 92, -102, -98, 83, 73, 54] + ) + } + u8x16 { + from: u8, + to: u16, + low: u16x8_extmul_low_u8x16, + high: u16x8_extmul_high_u8x16, + } => { + ( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + * + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + ( + [1, 2, 3, 100, 124, 38, 33, 87, 92, 198, 22, 8, 43, 128, 22, 0] + * + [5, 200, 6, 10, 45, 248, 4, 2, 0, 2, 92, 102, 234, 83, 73, 54] + ) + } + i16x8 { + from: i16, + to: i32, + low: i32x4_extmul_low_i16x8, + high: i32x4_extmul_high_i16x8, + } => { + ( + [0, 0, 0, 0, 0, 0, 0, 0] + * + [0, 0, 0, 0, 0, 0, 0, 0] + ) + ( + [-1, 0, i16::MAX, 19931, -2259, 64, 200, 87] + * + [1, 1, i16::MIN, 29391, 105, 2, 100, -2] + ) + } + u16x8 { + from: u16, + to: u32, + low: u32x4_extmul_low_u16x8, + high: u32x4_extmul_high_u16x8, + } => { + ( + [0, 0, 0, 0, 0, 0, 0, 0] + * + [0, 0, 0, 0, 0, 0, 0, 0] + ) + ( + [1, 0, u16::MAX, 19931, 2259, 64, 200, 87] + * + [1, 1, 3, 29391, 105, 2, 100, 2] + ) + } + i32x4 { + from: i32, + to: i64, + low: i64x2_extmul_low_i32x4, + high: i64x2_extmul_high_i32x4, + } => { + ( + [0, 0, 0, 0] + * + [0, 0, 0, 0] + ) + ( + [-1, 0, i32::MAX, 19931] + * + [1, 1, i32::MIN, 29391] + ) + ( + [i32::MAX, 3003183, 3 << 20, 0xffffff] + * + [i32::MAX, i32::MIN, -40042, 300] + ) + } + u32x4 { + from: u32, + to: u64, + low: u64x2_extmul_low_u32x4, + high: u64x2_extmul_high_u32x4, + } => { + ( + [0, 0, 0, 0] + * + [0, 0, 0, 0] + ) + ( + [1, 0, u32::MAX, 19931] + * + [1, 1, 3, 29391] + ) + ( + [u32::MAX, 3003183, 3 << 20, 0xffffff] + * + [u32::MAX, 3000, 40042, 300] + ) + } + } + } + + #[test] + fn test_q15mulr_sat_s() { + fn test(a: [i16; 8], b: [i16; 8]) { + let a_v = i16x8(a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]); + let b_v = i16x8(b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]); + let result = i16x8_q15mulr_sat(a_v, b_v); + let result = unsafe { mem::transmute::(result) }; + + for (i, (a, b)) in a.iter().zip(&b).enumerate() { + assert_eq!( + result[i], + (((*a as i32) * (*b as i32) + 0x4000) >> 15) as i16 + ); + } + } + + test([0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]); + test([1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1]); + test( + [-1, 100, 2003, -29494, 12, 128, 994, 1], + [-4049, 8494, -10483, 0, 5, 2222, 883, -9], + ); + } + + #[test] + fn test_extadd() { + macro_rules! test { + ($( + $func:ident { + from: $from:ident, + to: $to:ident, + } => { + $([$($a:tt)*])* + } + )*) => ($( + $(unsafe { + let a: [$from; 16 / mem::size_of::<$from>()] = [$($a)*]; + let a_v = mem::transmute::<_, v128>(a); + let r = mem::transmute::()]>($func(a_v)); + + let half = a.len() / 2; + for i in 0..half { + assert_eq!( + (a[2 * i] as $to).wrapping_add((a[2 * i + 1] as $to)), + r[i], + "failed {} + {} != {}", + a[2 * i] as $to, + a[2 * i + 1] as $to, + r[i], + ); + } + })* + )*) + } + test! { + i16x8_extadd_pairwise_i8x16 { + from: i8, + to: i16, + } => { + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + [-1, -2, 3, 100, 124, -38, 33, 87, 92, 108, 22, 8, -43, -128, 22, 0] + [-5, -2, 6, 10, 45, -4, 4, -2, 0, 88, 92, -102, -98, 83, 73, 54] + } + i16x8_extadd_pairwise_u8x16 { + from: u8, + to: i16, + } => { + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + [1, 2, 3, 100, 124, 38, 33, 87, 92, 198, 22, 8, 43, 128, 22, 0] + [5, 200, 6, 10, 45, 248, 4, 2, 0, 2, 92, 102, 234, 83, 73, 54] + } + i32x4_extadd_pairwise_i16x8 { + from: i16, + to: i32, + } => { + [0, 0, 0, 0, 0, 0, 0, 0] + [-1, 0, i16::MAX, 19931, -2259, 64, 200, 87] + [1, 1, i16::MIN, 29391, 105, 2, 100, -2] + } + i32x4_extadd_pairwise_u16x8 { + from: u16, + to: i32, + } => { + [0, 0, 0, 0, 0, 0, 0, 0] + [1, 0, u16::MAX, 19931, 2259, 64, 200, 87] + [1, 1, 3, 29391, 105, 2, 100, 2] + } + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/abm.rs b/library/stdarch/crates/core_arch/src/x86/abm.rs new file mode 100644 index 000000000000..e6d551760043 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/abm.rs @@ -0,0 +1,62 @@ +//! Advanced Bit Manipulation (ABM) instructions +//! +//! The POPCNT and LZCNT have their own CPUID bits to indicate support. +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +//! Instruction Set Reference, A-Z][intel64_ref]. +//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +//! System Instructions][amd64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions +//! available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wikipedia_bmi]: +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Counts the leading most significant zero bits. +/// +/// When the operand is zero, it returns its size in bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_lzcnt_u32) +#[inline] +#[target_feature(enable = "lzcnt")] +#[cfg_attr(test, assert_instr(lzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _lzcnt_u32(x: u32) -> u32 { + x.leading_zeros() +} + +/// Counts the bits that are set. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_popcnt32) +#[inline] +#[target_feature(enable = "popcnt")] +#[cfg_attr(test, assert_instr(popcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _popcnt32(x: i32) -> i32 { + x.count_ones() as i32 +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "lzcnt")] + unsafe fn test_lzcnt_u32() { + assert_eq!(_lzcnt_u32(0b0101_1010), 25); + } + + #[simd_test(enable = "popcnt")] + unsafe fn test_popcnt32() { + assert_eq!(_popcnt32(0b0101_1010), 4); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs new file mode 100644 index 000000000000..5ba766461653 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/adx.rs @@ -0,0 +1,164 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.x86.addcarry.32"] + fn llvm_addcarry_u32(a: u8, b: u32, c: u32) -> (u8, u32); + #[link_name = "llvm.x86.addcarryx.u32"] + fn llvm_addcarryx_u32(a: u8, b: u32, c: u32, d: *mut u32) -> u8; + #[link_name = "llvm.x86.subborrow.32"] + fn llvm_subborrow_u32(a: u8, b: u32, c: u32) -> (u8, u32); +} + +/// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry or overflow flag), and store the unsigned 32-bit result in `out`, and the carry-out +/// is returned (carry or overflow flag). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_addcarry_u32) +#[inline] +#[cfg_attr(test, assert_instr(adc))] +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { + let (a, b) = llvm_addcarry_u32(c_in, a, b); + *out = b; + a +} + +/// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry or overflow flag), and store the unsigned 32-bit result in `out`, and +/// the carry-out is returned (carry or overflow flag). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_addcarryx_u32) +#[inline] +#[target_feature(enable = "adx")] +#[cfg_attr(test, assert_instr(adc))] +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { + llvm_addcarryx_u32(c_in, a, b, out as *mut _) +} + +/// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry or overflow flag), and store the unsigned 32-bit result in `out`, and +/// the carry-out is returned (carry or overflow flag). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_subborrow_u32) +#[inline] +#[cfg_attr(test, assert_instr(sbb))] +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub unsafe fn _subborrow_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { + let (a, b) = llvm_subborrow_u32(c_in, a, b); + *out = b; + a +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[test] + fn test_addcarry_u32() { + unsafe { + let a = u32::MAX; + let mut out = 0; + + let r = _addcarry_u32(0, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u32(0, a, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, a); + + let r = _addcarry_u32(1, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 1); + + let r = _addcarry_u32(1, a, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u32(0, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 7); + + let r = _addcarry_u32(1, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 8); + } + } + + #[simd_test(enable = "adx")] + unsafe fn test_addcarryx_u32() { + let a = u32::MAX; + let mut out = 0; + + let r = _addcarryx_u32(0, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarryx_u32(0, a, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, a); + + let r = _addcarryx_u32(1, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 1); + + let r = _addcarryx_u32(1, a, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarryx_u32(0, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 7); + + let r = _addcarryx_u32(1, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 8); + } + + #[simd_test(enable = "adx")] + unsafe fn test_addcarryx_u32_2() { + unsafe fn add_1_2_3() -> u32 { + let mut out = 0; + _addcarryx_u32(1, 2, 3, &mut out); + out + } + assert_eq!(6, add_1_2_3()); + } + + #[test] + fn test_subborrow_u32() { + unsafe { + let a = u32::MAX; + let mut out = 0; + + let r = _subborrow_u32(0, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u32(0, 0, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 0); + + let r = _subborrow_u32(1, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a - 1); + + let r = _subborrow_u32(1, 0, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u32(0, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 4); + + let r = _subborrow_u32(1, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 3); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/aes.rs b/library/stdarch/crates/core_arch/src/x86/aes.rs new file mode 100644 index 000000000000..7db743b2ccd3 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/aes.rs @@ -0,0 +1,171 @@ +//! AES New Instructions (AES-NI) +//! +//! The intrinsics here correspond to those in the `wmmintrin.h` C header. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +use crate::core_arch::x86::__m128i; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.aesni.aesdec"] + fn aesdec(a: __m128i, round_key: __m128i) -> __m128i; + #[link_name = "llvm.x86.aesni.aesdeclast"] + fn aesdeclast(a: __m128i, round_key: __m128i) -> __m128i; + #[link_name = "llvm.x86.aesni.aesenc"] + fn aesenc(a: __m128i, round_key: __m128i) -> __m128i; + #[link_name = "llvm.x86.aesni.aesenclast"] + fn aesenclast(a: __m128i, round_key: __m128i) -> __m128i; + #[link_name = "llvm.x86.aesni.aesimc"] + fn aesimc(a: __m128i) -> __m128i; + #[link_name = "llvm.x86.aesni.aeskeygenassist"] + fn aeskeygenassist(a: __m128i, imm8: u8) -> __m128i; +} + +/// Performs one round of an AES decryption flow on data (state) in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128) +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(test, assert_instr(aesdec))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i { + unsafe { aesdec(a, round_key) } +} + +/// Performs the last round of an AES decryption flow on data (state) in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128) +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(test, assert_instr(aesdeclast))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i { + unsafe { aesdeclast(a, round_key) } +} + +/// Performs one round of an AES encryption flow on data (state) in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128) +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(test, assert_instr(aesenc))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i { + unsafe { aesenc(a, round_key) } +} + +/// Performs the last round of an AES encryption flow on data (state) in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128) +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(test, assert_instr(aesenclast))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i { + unsafe { aesenclast(a, round_key) } +} + +/// Performs the `InvMixColumns` transformation on `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128) +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(test, assert_instr(aesimc))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_aesimc_si128(a: __m128i) -> __m128i { + unsafe { aesimc(a) } +} + +/// Assist in expanding the AES cipher key. +/// +/// Assist in expanding the AES cipher key by computing steps towards +/// generating a round key for encryption cipher using data from `a` and an +/// 8-bit round constant `IMM8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128) +#[inline] +#[target_feature(enable = "aes")] +#[cfg_attr(test, assert_instr(aeskeygenassist, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_aeskeygenassist_si128(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { aeskeygenassist(a, IMM8 as u8) } +} + +#[cfg(test)] +mod tests { + // The constants in the tests below are just bit patterns. They should not + // be interpreted as integers; signedness does not make sense for them, but + // __m128i happens to be defined in terms of signed integers. + #![allow(overflowing_literals)] + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "aes")] + unsafe fn test_mm_aesdec_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee); + let r = _mm_aesdec_si128(a, k); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "aes")] + unsafe fn test_mm_aesdeclast_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493); + let r = _mm_aesdeclast_si128(a, k); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "aes")] + unsafe fn test_mm_aesenc_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333); + let r = _mm_aesenc_si128(a, k); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "aes")] + unsafe fn test_mm_aesenclast_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8); + let r = _mm_aesenclast_si128(a, k); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "aes")] + unsafe fn test_mm_aesimc_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714195.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let e = _mm_set_epi64x(0xc66c82284ee40aa0, 0x6633441122770055); + let r = _mm_aesimc_si128(a); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "aes")] + unsafe fn test_mm_aeskeygenassist_si128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714138.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let e = _mm_set_epi64x(0x857c266b7c266e85, 0xeac4eea9c4eeacea); + let r = _mm_aeskeygenassist_si128::<5>(a); + assert_eq_m128i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs new file mode 100644 index 000000000000..df1cb63be30f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -0,0 +1,5022 @@ +//! Advanced Vector Extensions (AVX) +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture +//! Programmer's Manual, Volume 3: General-Purpose and System +//! Instructions][amd64_ref]. +//! +//! [Wikipedia][wiki] provides a quick overview of the instructions available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions + +use crate::{ + core_arch::{simd::*, x86::*}, + intrinsics::simd::*, + mem, ptr, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Adds packed double-precision (64-bit) floating-point elements +/// in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vaddpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_add(a, b) } +} + +/// Adds packed single-precision (32-bit) floating-point elements in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vaddps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_add(a, b) } +} + +/// Computes the bitwise AND of a packed double-precision (64-bit) +/// floating-point elements in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd) +#[inline] +#[target_feature(enable = "avx")] +// See https://github.com/rust-lang/stdarch/issues/71 +#[cfg_attr(test, assert_instr(vandp))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_and(a, b)) + } +} + +/// Computes the bitwise AND of packed single-precision (32-bit) floating-point +/// elements in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_and(a, b)) + } +} + +/// Computes the bitwise OR packed double-precision (64-bit) floating-point +/// elements in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd) +#[inline] +#[target_feature(enable = "avx")] +// See . +#[cfg_attr(test, assert_instr(vorp))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_or(a, b)) + } +} + +/// Computes the bitwise OR packed single-precision (32-bit) floating-point +/// elements in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_or(a, b)) + } +} + +/// Shuffles double-precision (64-bit) floating-point elements within 128-bit +/// lanes using the control in `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m256d { + static_assert_uimm_bits!(MASK, 8); + unsafe { + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b1, + ((MASK as u32 >> 1) & 0b1) + 4, + ((MASK as u32 >> 2) & 0b1) + 2, + ((MASK as u32 >> 3) & 0b1) + 6, + ], + ) + } +} + +/// Shuffles single-precision (32-bit) floating-point elements in `a` within +/// 128-bit lanes using the control in `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 { + static_assert_uimm_bits!(MASK, 8); + unsafe { + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ) + } +} + +/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point +/// elements in `a`, and then AND with `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vandnp))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b)) + } +} + +/// Computes the bitwise NOT of packed single-precision (32-bit) floating-point +/// elements in `a` +/// and then AND with `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b)) + } +} + +/// Compares packed double-precision (64-bit) floating-point elements +/// in `a` and `b`, and returns packed maximum values +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaxpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vmaxpd(a, b) } +} + +/// Compares packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and returns packed maximum values +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaxps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vmaxps(a, b) } +} + +/// Compares packed double-precision (64-bit) floating-point elements +/// in `a` and `b`, and returns packed minimum values +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vminpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vminpd(a, b) } +} + +/// Compares packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and returns packed minimum values +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vminps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vminps(a, b) } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements +/// in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmulpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_mul(a, b) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmulps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_mul(a, b) } +} + +/// Alternatively adds and subtracts packed double-precision (64-bit) +/// floating-point elements in `a` to/from packed elements in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vaddsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } +} + +/// Alternatively adds and subtracts packed single-precision (32-bit) +/// floating-point elements in `a` to/from packed elements in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vaddsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) + } +} + +/// Subtracts packed double-precision (64-bit) floating-point elements in `b` +/// from packed elements in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_sub(a, b) } +} + +/// Subtracts packed single-precision (32-bit) floating-point elements in `b` +/// from packed elements in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_sub(a, b) } +} + +/// Computes the division of each of the 8 packed 32-bit floating-point elements +/// in `a` by the corresponding packed elements in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vdivps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_div(a, b) } +} + +/// Computes the division of each of the 4 packed 64-bit floating-point elements +/// in `a` by the corresponding packed elements in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vdivpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_div(a, b) } +} + +/// Rounds packed double-precision (64-bit) floating point elements in `a` +/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows: +/// +/// - `0x00`: Round to the nearest whole number. +/// - `0x01`: Round down, toward negative infinity. +/// - `0x02`: Round up, toward positive infinity. +/// - `0x03`: Truncate the values. +/// +/// For a complete list of options, check [the LLVM docs][llvm_docs]. +/// +/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_round_pd(a: __m256d) -> __m256d { + static_assert_uimm_bits!(ROUNDING, 4); + unsafe { roundpd256(a, ROUNDING) } +} + +/// Rounds packed double-precision (64-bit) floating point elements in `a` +/// toward positive infinity. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vroundpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_ceil_pd(a: __m256d) -> __m256d { + unsafe { simd_ceil(a) } +} + +/// Rounds packed double-precision (64-bit) floating point elements in `a` +/// toward negative infinity. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vroundpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_floor_pd(a: __m256d) -> __m256d { + unsafe { simd_floor(a) } +} + +/// Rounds packed single-precision (32-bit) floating point elements in `a` +/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows: +/// +/// - `0x00`: Round to the nearest whole number. +/// - `0x01`: Round down, toward negative infinity. +/// - `0x02`: Round up, toward positive infinity. +/// - `0x03`: Truncate the values. +/// +/// For a complete list of options, check [the LLVM docs][llvm_docs]. +/// +/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_round_ps(a: __m256) -> __m256 { + static_assert_uimm_bits!(ROUNDING, 4); + unsafe { roundps256(a, ROUNDING) } +} + +/// Rounds packed single-precision (32-bit) floating point elements in `a` +/// toward positive infinity. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vroundps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_ceil_ps(a: __m256) -> __m256 { + unsafe { simd_ceil(a) } +} + +/// Rounds packed single-precision (32-bit) floating point elements in `a` +/// toward negative infinity. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vroundps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_floor_ps(a: __m256) -> __m256 { + unsafe { simd_floor(a) } +} + +/// Returns the square root of packed single-precision (32-bit) floating point +/// elements in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vsqrtps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sqrt_ps(a: __m256) -> __m256 { + unsafe { simd_fsqrt(a) } +} + +/// Returns the square root of packed double-precision (64-bit) floating point +/// elements in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vsqrtpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sqrt_pd(a: __m256d) -> __m256d { + unsafe { simd_fsqrt(a) } +} + +/// Blends packed double-precision (64-bit) floating-point elements from +/// `a` and `b` using control mask `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_pd) +#[inline] +#[target_feature(enable = "avx")] +// Note: LLVM7 prefers single-precision blend instructions when +// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194 +// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))] +#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256d { + static_assert_uimm_bits!(IMM4, 4); + unsafe { + simd_shuffle!( + a, + b, + [ + ((IMM4 as u32 >> 0) & 1) * 4 + 0, + ((IMM4 as u32 >> 1) & 1) * 4 + 1, + ((IMM4 as u32 >> 2) & 1) * 4 + 2, + ((IMM4 as u32 >> 3) & 1) * 4 + 3, + ], + ) + } +} + +/// Blends packed single-precision (32-bit) floating-point elements from +/// `a` and `b` using control mask `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + simd_shuffle!( + a, + b, + [ + ((IMM8 as u32 >> 0) & 1) * 8 + 0, + ((IMM8 as u32 >> 1) & 1) * 8 + 1, + ((IMM8 as u32 >> 2) & 1) * 8 + 2, + ((IMM8 as u32 >> 3) & 1) * 8 + 3, + ((IMM8 as u32 >> 4) & 1) * 8 + 4, + ((IMM8 as u32 >> 5) & 1) * 8 + 5, + ((IMM8 as u32 >> 6) & 1) * 8 + 6, + ((IMM8 as u32 >> 7) & 1) * 8 + 7, + ], + ) + } +} + +/// Blends packed double-precision (64-bit) floating-point elements from +/// `a` and `b` using `c` as a mask. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vblendvpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { + let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO); + transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4())) + } +} + +/// Blends packed single-precision (32-bit) floating-point elements from +/// `a` and `b` using `c` as a mask. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vblendvps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { + let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO); + transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8())) + } +} + +/// Conditionally multiplies the packed single-precision (32-bit) floating-point +/// elements in `a` and `b` using the high 4 bits in `imm8`, +/// sum the four products, and conditionally return the sum +/// using the low 4 bits of `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dp_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { vdpps(a, b, IMM8 as i8) } +} + +/// Horizontal addition of adjacent pairs in the two packed vectors +/// of 4 64-bit floating points `a` and `b`. +/// In the result, sums of elements from `a` are returned in even locations, +/// while sums of elements from `b` are returned in odd locations. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vhaddpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vhaddpd(a, b) } +} + +/// Horizontal addition of adjacent pairs in the two packed vectors +/// of 8 32-bit floating points `a` and `b`. +/// In the result, sums of elements from `a` are returned in locations of +/// indices 0, 1, 4, 5; while sums of elements from `b` are locations +/// 2, 3, 6, 7. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vhaddps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vhaddps(a, b) } +} + +/// Horizontal subtraction of adjacent pairs in the two packed vectors +/// of 4 64-bit floating points `a` and `b`. +/// In the result, sums of elements from `a` are returned in even locations, +/// while sums of elements from `b` are returned in odd locations. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vhsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vhsubpd(a, b) } +} + +/// Horizontal subtraction of adjacent pairs in the two packed vectors +/// of 8 32-bit floating points `a` and `b`. +/// In the result, sums of elements from `a` are returned in locations of +/// indices 0, 1, 4, 5; while sums of elements from `b` are locations +/// 2, 3, 6, 7. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vhsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vhsubps(a, b) } +} + +/// Computes the bitwise XOR of packed double-precision (64-bit) floating-point +/// elements in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vxorp))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_xor(a, b)) + } +} + +/// Computes the bitwise XOR of packed single-precision (32-bit) floating-point +/// elements in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_xor(a, b)) + } +} + +/// Equal (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_EQ_OQ: i32 = 0x00; +/// Less-than (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_LT_OS: i32 = 0x01; +/// Less-than-or-equal (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_LE_OS: i32 = 0x02; +/// Unordered (non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_UNORD_Q: i32 = 0x03; +/// Not-equal (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NEQ_UQ: i32 = 0x04; +/// Not-less-than (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NLT_US: i32 = 0x05; +/// Not-less-than-or-equal (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NLE_US: i32 = 0x06; +/// Ordered (non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_ORD_Q: i32 = 0x07; +/// Equal (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_EQ_UQ: i32 = 0x08; +/// Not-greater-than-or-equal (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NGE_US: i32 = 0x09; +/// Not-greater-than (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NGT_US: i32 = 0x0a; +/// False (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_FALSE_OQ: i32 = 0x0b; +/// Not-equal (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NEQ_OQ: i32 = 0x0c; +/// Greater-than-or-equal (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_GE_OS: i32 = 0x0d; +/// Greater-than (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_GT_OS: i32 = 0x0e; +/// True (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_TRUE_UQ: i32 = 0x0f; +/// Equal (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_EQ_OS: i32 = 0x10; +/// Less-than (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_LT_OQ: i32 = 0x11; +/// Less-than-or-equal (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_LE_OQ: i32 = 0x12; +/// Unordered (signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_UNORD_S: i32 = 0x13; +/// Not-equal (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NEQ_US: i32 = 0x14; +/// Not-less-than (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NLT_UQ: i32 = 0x15; +/// Not-less-than-or-equal (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NLE_UQ: i32 = 0x16; +/// Ordered (signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_ORD_S: i32 = 0x17; +/// Equal (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_EQ_US: i32 = 0x18; +/// Not-greater-than-or-equal (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NGE_UQ: i32 = 0x19; +/// Not-greater-than (unordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NGT_UQ: i32 = 0x1a; +/// False (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_FALSE_OS: i32 = 0x1b; +/// Not-equal (ordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_NEQ_OS: i32 = 0x1c; +/// Greater-than-or-equal (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_GE_OQ: i32 = 0x1d; +/// Greater-than (ordered, non-signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_GT_OQ: i32 = 0x1e; +/// True (unordered, signaling) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _CMP_TRUE_US: i32 = 0x1f; + +/// Compares packed double-precision (64-bit) floating-point +/// elements in `a` and `b` based on the comparison operand +/// specified by `IMM5`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM5, 5); + unsafe { vcmppd(a, b, const { IMM5 as i8 }) } +} + +/// Compares packed double-precision (64-bit) floating-point +/// elements in `a` and `b` based on the comparison operand +/// specified by `IMM5`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d { + static_assert_uimm_bits!(IMM5, 5); + unsafe { vcmppd256(a, b, IMM5 as u8) } +} + +/// Compares packed single-precision (32-bit) floating-point +/// elements in `a` and `b` based on the comparison operand +/// specified by `IMM5`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM5, 5); + unsafe { vcmpps(a, b, const { IMM5 as i8 }) } +} + +/// Compares packed single-precision (32-bit) floating-point +/// elements in `a` and `b` based on the comparison operand +/// specified by `IMM5`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { + static_assert_uimm_bits!(IMM5, 5); + unsafe { vcmpps256(a, b, const { IMM5 as u8 }) } +} + +/// Compares the lower double-precision (64-bit) floating-point element in +/// `a` and `b` based on the comparison operand specified by `IMM5`, +/// store the result in the lower element of returned vector, +/// and copies the upper element from `a` to the upper element of returned +/// vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] // TODO Validate vcmpsd +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM5, 5); + unsafe { vcmpsd(a, b, IMM5 as i8) } +} + +/// Compares the lower single-precision (32-bit) floating-point element in +/// `a` and `b` based on the comparison operand specified by `IMM5`, +/// store the result in the lower element of returned vector, +/// and copies the upper 3 packed elements from `a` to the upper elements of +/// returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] // TODO Validate vcmpss +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM5, 5); + unsafe { vcmpss(a, b, IMM5 as i8) } +} + +/// Converts packed 32-bit integers in `a` to packed double-precision (64-bit) +/// floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { + unsafe { simd_cast(a.as_i32x4()) } +} + +/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) +/// floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcvtdq2ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { + unsafe { simd_cast(a.as_i32x8()) } +} + +/// Converts packed double-precision (64-bit) floating-point elements in `a` +/// to packed single-precision (32-bit) floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { + unsafe { simd_cast(a) } +} + +/// Converts packed single-precision (32-bit) floating-point elements in `a` +/// to packed 32-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi32) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtps_epi32(a: __m256) -> __m256i { + unsafe { transmute(vcvtps2dq(a)) } +} + +/// Converts packed single-precision (32-bit) floating-point elements in `a` +/// to packed double-precision (64-bit) floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcvtps2pd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtps_pd(a: __m128) -> __m256d { + unsafe { simd_cast(a) } +} + +/// Returns the first element of the input vector of `[4 x double]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsd_f64) +#[inline] +#[target_feature(enable = "avx")] +//#[cfg_attr(test, assert_instr(movsd))] FIXME +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtsd_f64(a: __m256d) -> f64 { + unsafe { simd_extract!(a, 0) } +} + +/// Converts packed double-precision (64-bit) floating-point elements in `a` +/// to packed 32-bit integers with truncation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi32) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2dq(a)) } +} + +/// Converts packed double-precision (64-bit) floating-point elements in `a` +/// to packed 32-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi32) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { + unsafe { transmute(vcvtpd2dq(a)) } +} + +/// Converts packed single-precision (32-bit) floating-point elements in `a` +/// to packed 32-bit integers with truncation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi32) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvttps_epi32(a: __m256) -> __m256i { + unsafe { transmute(vcvttps2dq(a)) } +} + +/// Extracts 128 bits (composed of 4 packed single-precision (32-bit) +/// floating-point elements) from `a`, selected with `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_extractf128_ps(a: __m256) -> __m128 { + static_assert_uimm_bits!(IMM1, 1); + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ps(), + [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize], + ) + } +} + +/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) +/// floating-point elements) from `a`, selected with `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_extractf128_pd(a: __m256d) -> __m128d { + static_assert_uimm_bits!(IMM1, 1); + unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) } +} + +/// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_extractf128_si256(a: __m256i) -> __m128i { + static_assert_uimm_bits!(IMM1, 1); + unsafe { + let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],); + transmute(dst) + } +} + +/// Extracts a 32-bit integer from `a`, selected with `INDEX`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_extract_epi32(a: __m256i) -> i32 { + static_assert_uimm_bits!(INDEX, 3); + unsafe { simd_extract!(a.as_i32x8(), INDEX as u32) } +} + +/// Returns the first element of the input vector of `[8 x i32]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32) +#[inline] +#[target_feature(enable = "avx")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { + unsafe { simd_extract!(a.as_i32x8(), 0) } +} + +/// Zeroes the contents of all XMM or YMM registers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vzeroall))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_zeroall() { + unsafe { vzeroall() } +} + +/// Zeroes the upper 128 bits of all YMM registers; +/// the lower 128-bits of the registers are unmodified. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vzeroupper))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_zeroupper() { + unsafe { vzeroupper() } +} + +/// Shuffles single-precision (32-bit) floating-point elements in `a` +/// within 128-bit lanes using the control in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vpermilps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { + unsafe { vpermilps256(a, b.as_i32x8()) } +} + +/// Shuffles single-precision (32-bit) floating-point elements in `a` +/// using the control in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vpermilps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { + unsafe { vpermilps(a, b.as_i32x4()) } +} + +/// Shuffles single-precision (32-bit) floating-point elements in `a` +/// within 128-bit lanes using the control in `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permute_ps(a: __m256) -> __m256 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ps(), + [ + (IMM8 as u32 >> 0) & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ((IMM8 as u32 >> 0) & 0b11) + 4, + ((IMM8 as u32 >> 2) & 0b11) + 4, + ((IMM8 as u32 >> 4) & 0b11) + 4, + ((IMM8 as u32 >> 6) & 0b11) + 4, + ], + ) + } +} + +/// Shuffles single-precision (32-bit) floating-point elements in `a` +/// using the control in `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_permute_ps(a: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + simd_shuffle!( + a, + _mm_undefined_ps(), + [ + (IMM8 as u32 >> 0) & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ], + ) + } +} + +/// Shuffles double-precision (64-bit) floating-point elements in `a` +/// within 256-bit lanes using the control in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vpermilpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { + unsafe { vpermilpd256(a, b.as_i64x4()) } +} + +/// Shuffles double-precision (64-bit) floating-point elements in `a` +/// using the control in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vpermilpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { + unsafe { vpermilpd(a, b.as_i64x2()) } +} + +/// Shuffles double-precision (64-bit) floating-point elements in `a` +/// within 128-bit lanes using the control in `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permute_pd(a: __m256d) -> __m256d { + static_assert_uimm_bits!(IMM4, 4); + unsafe { + simd_shuffle!( + a, + _mm256_undefined_pd(), + [ + ((IMM4 as u32 >> 0) & 1), + ((IMM4 as u32 >> 1) & 1), + ((IMM4 as u32 >> 2) & 1) + 2, + ((IMM4 as u32 >> 3) & 1) + 2, + ], + ) + } +} + +/// Shuffles double-precision (64-bit) floating-point elements in `a` +/// using the control in `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_permute_pd(a: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM2, 2); + unsafe { + simd_shuffle!( + a, + _mm_undefined_pd(), + [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1], + ) + } +} + +/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit) +/// floating-point elements) selected by `imm8` from `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> __m256 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { vperm2f128ps256(a, b, IMM8 as i8) } +} + +/// Shuffles 256 bits (composed of 4 packed double-precision (64-bit) +/// floating-point elements) selected by `imm8` from `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> __m256d { + static_assert_uimm_bits!(IMM8, 8); + unsafe { vperm2f128pd256(a, b, IMM8 as i8) } +} + +/// Shuffles 128-bits (composed of integer data) selected by `imm8` +/// from `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permute2f128_si256(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8)) } +} + +/// Broadcasts a single-precision (32-bit) floating-point element from memory +/// to all elements of the returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ss) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::trivially_copy_pass_by_ref)] +pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 { + _mm256_set1_ps(*f) +} + +/// Broadcasts a single-precision (32-bit) floating-point element from memory +/// to all elements of the returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_ss) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::trivially_copy_pass_by_ref)] +pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 { + _mm_set1_ps(*f) +} + +/// Broadcasts a double-precision (64-bit) floating-point element from memory +/// to all elements of the returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_sd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vbroadcastsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::trivially_copy_pass_by_ref)] +pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d { + _mm256_set1_pd(*f) +} + +/// Broadcasts 128 bits from memory (composed of 4 packed single-precision +/// (32-bit) floating-point elements) to all elements of the returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vbroadcastf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 { + simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3]) +} + +/// Broadcasts 128 bits from memory (composed of 2 packed double-precision +/// (64-bit) floating-point elements) to all elements of the returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vbroadcastf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { + simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1]) +} + +/// Copies `a` to result, then inserts 128 bits (composed of 4 packed +/// single-precision (32-bit) floating-point elements) from `b` into result +/// at the location specified by `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m256 { + static_assert_uimm_bits!(IMM1, 1); + unsafe { + simd_shuffle!( + a, + _mm256_castps128_ps256(b), + [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize], + ) + } +} + +/// Copies `a` to result, then inserts 128 bits (composed of 2 packed +/// double-precision (64-bit) floating-point elements) from `b` into result +/// at the location specified by `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> __m256d { + static_assert_uimm_bits!(IMM1, 1); + unsafe { + simd_shuffle!( + a, + _mm256_castpd128_pd256(b), + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + ) + } +} + +/// Copies `a` to result, then inserts 128 bits from `b` into result +/// at the location specified by `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_insertf128_si256(a: __m256i, b: __m128i) -> __m256i { + static_assert_uimm_bits!(IMM1, 1); + unsafe { + let dst: i64x4 = simd_shuffle!( + a.as_i64x4(), + _mm256_castsi128_si256(b).as_i64x4(), + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + ); + transmute(dst) + } +} + +/// Copies `a` to result, and inserts the 8-bit integer `i` into result +/// at the location specified by `index`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi8) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i { + static_assert_uimm_bits!(INDEX, 5); + unsafe { transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) } +} + +/// Copies `a` to result, and inserts the 16-bit integer `i` into result +/// at the location specified by `index`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi16) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m256i { + static_assert_uimm_bits!(INDEX, 4); + unsafe { transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) } +} + +/// Copies `a` to result, and inserts the 32-bit integer `i` into result +/// at the location specified by `index`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi32) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_insert_epi32(a: __m256i, i: i32) -> __m256i { + static_assert_uimm_bits!(INDEX, 3); + unsafe { transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) } +} + +/// Loads 256-bits (composed of 4 packed double-precision (64-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` must be aligned on a 32-byte boundary or a +/// general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovap) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { + *(mem_addr as *const __m256d) +} + +/// Stores 256-bits (composed of 4 packed double-precision (64-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` must be aligned on a 32-byte boundary or a +/// general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovap) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) { + *(mem_addr as *mut __m256d) = a; +} + +/// Loads 256-bits (composed of 8 packed single-precision (32-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` must be aligned on a 32-byte boundary or a +/// general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 { + *(mem_addr as *const __m256) +} + +/// Stores 256-bits (composed of 8 packed single-precision (32-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` must be aligned on a 32-byte boundary or a +/// general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) { + *(mem_addr as *mut __m256) = a; +} + +/// Loads 256-bits (composed of 4 packed double-precision (64-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d { + let mut dst = _mm256_undefined_pd(); + ptr::copy_nonoverlapping( + mem_addr as *const u8, + ptr::addr_of_mut!(dst) as *mut u8, + mem::size_of::<__m256d>(), + ); + dst +} + +/// Stores 256-bits (composed of 4 packed double-precision (64-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) { + mem_addr.cast::<__m256d>().write_unaligned(a); +} + +/// Loads 256-bits (composed of 8 packed single-precision (32-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 { + let mut dst = _mm256_undefined_ps(); + ptr::copy_nonoverlapping( + mem_addr as *const u8, + ptr::addr_of_mut!(dst) as *mut u8, + mem::size_of::<__m256>(), + ); + dst +} + +/// Stores 256-bits (composed of 8 packed single-precision (32-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) { + mem_addr.cast::<__m256>().write_unaligned(a); +} + +/// Loads 256-bits of integer data from memory into result. +/// `mem_addr` must be aligned on a 32-byte boundary or a +/// general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] // FIXME vmovdqa expected +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i { + *mem_addr +} + +/// Stores 256-bits of integer data from `a` into memory. +/// `mem_addr` must be aligned on a 32-byte boundary or a +/// general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] // FIXME vmovdqa expected +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) { + *mem_addr = a; +} + +/// Loads 256-bits of integer data from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i { + let mut dst = _mm256_undefined_si256(); + ptr::copy_nonoverlapping( + mem_addr as *const u8, + ptr::addr_of_mut!(dst) as *mut u8, + mem::size_of::<__m256i>(), + ); + dst +} + +/// Stores 256-bits of integer data from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) { + mem_addr.write_unaligned(a); +} + +/// Loads packed double-precision (64-bit) floating-point elements from memory +/// into result using `mask` (elements are zeroed out when the high bit of the +/// corresponding element is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaskmovpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d { + maskloadpd256(mem_addr as *const i8, mask.as_i64x4()) +} + +/// Stores packed double-precision (64-bit) floating-point elements from `a` +/// into memory using `mask`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaskmovpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) { + maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a); +} + +/// Loads packed double-precision (64-bit) floating-point elements from memory +/// into result using `mask` (elements are zeroed out when the high bit of the +/// corresponding element is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaskmovpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d { + maskloadpd(mem_addr as *const i8, mask.as_i64x2()) +} + +/// Stores packed double-precision (64-bit) floating-point elements from `a` +/// into memory using `mask`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaskmovpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) { + maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a); +} + +/// Loads packed single-precision (32-bit) floating-point elements from memory +/// into result using `mask` (elements are zeroed out when the high bit of the +/// corresponding element is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaskmovps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 { + maskloadps256(mem_addr as *const i8, mask.as_i32x8()) +} + +/// Stores packed single-precision (32-bit) floating-point elements from `a` +/// into memory using `mask`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaskmovps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) { + maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a); +} + +/// Loads packed single-precision (32-bit) floating-point elements from memory +/// into result using `mask` (elements are zeroed out when the high bit of the +/// corresponding element is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaskmovps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 { + maskloadps(mem_addr as *const i8, mask.as_i32x4()) +} + +/// Stores packed single-precision (32-bit) floating-point elements from `a` +/// into memory using `mask`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmaskmovps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) { + maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a); +} + +/// Duplicate odd-indexed single-precision (32-bit) floating-point elements +/// from `a`, and returns the results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movehdup_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovshdup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_movehdup_ps(a: __m256) -> __m256 { + unsafe { simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) } +} + +/// Duplicate even-indexed single-precision (32-bit) floating-point elements +/// from `a`, and returns the results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_moveldup_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovsldup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_moveldup_ps(a: __m256) -> __m256 { + unsafe { simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) } +} + +/// Duplicate even-indexed double-precision (64-bit) floating-point elements +/// from `a`, and returns the results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movedup_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovddup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_movedup_pd(a: __m256d) -> __m256d { + unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) } +} + +/// Loads 256-bits of integer data from unaligned memory into result. +/// This intrinsic may perform better than `_mm256_loadu_si256` when the +/// data crosses a cache line boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lddqu_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vlddqu))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i { + transmute(vlddqu(mem_addr as *const i8)) +} + +/// Moves integer data from a 256-bit integer vector to a 32-byte +/// aligned memory location. To minimize caching, the data is flagged as +/// non-temporal (unlikely to be used again soon) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovntdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) { + crate::arch::asm!( + vps!("vmovntdq", ",{a}"), + p = in(reg) mem_addr, + a = in(ymm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Moves double-precision values from a 256-bit vector of `[4 x double]` +/// to a 32-byte aligned memory location. To minimize caching, the data is +/// flagged as non-temporal (unlikely to be used again soon). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_pd) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovntpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) { + crate::arch::asm!( + vps!("vmovntpd", ",{a}"), + p = in(reg) mem_addr, + a = in(ymm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Moves single-precision floating point values from a 256-bit vector +/// of `[8 x float]` to a 32-byte aligned memory location. To minimize +/// caching, the data is flagged as non-temporal (unlikely to be used again +/// soon). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_ps) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovntps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) { + crate::arch::asm!( + vps!("vmovntps", ",{a}"), + p = in(reg) mem_addr, + a = in(ymm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Computes the approximate reciprocal of packed single-precision (32-bit) +/// floating-point elements in `a`, and returns the results. The maximum +/// relative error for this approximation is less than 1.5*2^-12. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vrcpps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_rcp_ps(a: __m256) -> __m256 { + unsafe { vrcpps(a) } +} + +/// Computes the approximate reciprocal square root of packed single-precision +/// (32-bit) floating-point elements in `a`, and returns the results. +/// The maximum relative error for this approximation is less than 1.5*2^-12. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vrsqrtps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_rsqrt_ps(a: __m256) -> __m256 { + unsafe { vrsqrtps(a) } +} + +/// Unpacks and interleave double-precision (64-bit) floating-point elements +/// from the high half of each 128-bit lane in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vunpckhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +} + +/// Unpacks and interleave single-precision (32-bit) floating-point elements +/// from the high half of each 128-bit lane in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vunpckhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) } +} + +/// Unpacks and interleave double-precision (64-bit) floating-point elements +/// from the low half of each 128-bit lane in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vunpcklpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +} + +/// Unpacks and interleave single-precision (32-bit) floating-point elements +/// from the low half of each 128-bit lane in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vunpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) } +} + +/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and +/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. +/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if +/// the result is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { + unsafe { ptestz256(a.as_i64x4(), b.as_i64x4()) } +} + +/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and +/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. +/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if +/// the result is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { + unsafe { ptestc256(a.as_i64x4(), b.as_i64x4()) } +} + +/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and +/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. +/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if +/// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and +/// `CF` values are zero, otherwise return 0. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { + unsafe { ptestnzc256(a.as_i64x4(), b.as_i64x4()) } +} + +/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { + unsafe { vtestzpd256(a, b) } +} + +/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { + unsafe { vtestcpd256(a, b) } +} + +/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values +/// are zero, otherwise return 0. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { + unsafe { vtestnzcpd256(a, b) } +} + +/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { + unsafe { vtestzpd(a, b) } +} + +/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { + unsafe { vtestcpd(a, b) } +} + +/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values +/// are zero, otherwise return 0. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { + unsafe { vtestnzcpd(a, b) } +} + +/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { + unsafe { vtestzps256(a, b) } +} + +/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { + unsafe { vtestcps256(a, b) } +} + +/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values +/// are zero, otherwise return 0. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { + unsafe { vtestnzcps256(a, b) } +} + +/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `ZF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { + unsafe { vtestzps(a, b) } +} + +/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return the `CF` value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { + unsafe { vtestcps(a, b) } +} + +/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) +/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit +/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the +/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise +/// NOT of `a` and then AND with `b`, producing an intermediate value, and set +/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value +/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values +/// are zero, otherwise return 0. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vtestps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { + unsafe { vtestnzcps(a, b) } +} + +/// Sets each bit of the returned mask based on the most significant bit of the +/// corresponding packed double-precision (64-bit) floating-point element in +/// `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovmskpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_movemask_pd(a: __m256d) -> i32 { + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + unsafe { + let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO); + simd_bitmask::(mask).into() + } +} + +/// Sets each bit of the returned mask based on the most significant bit of the +/// corresponding packed single-precision (32-bit) floating-point element in +/// `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vmovmskps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_movemask_ps(a: __m256) -> i32 { + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + unsafe { + let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO); + simd_bitmask::(mask).into() + } +} + +/// Returns vector of type __m256d with all elements set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vxorp))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setzero_pd() -> __m256d { + const { unsafe { mem::zeroed() } } +} + +/// Returns vector of type __m256 with all elements set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_ps) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setzero_ps() -> __m256 { + const { unsafe { mem::zeroed() } } +} + +/// Returns vector of type __m256i with all elements set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_si256) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vxor))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setzero_si256() -> __m256i { + const { unsafe { mem::zeroed() } } +} + +/// Sets packed double-precision (64-bit) floating-point elements in returned +/// vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_pd) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { + _mm256_setr_pd(d, c, b, a) +} + +/// Sets packed single-precision (32-bit) floating-point elements in returned +/// vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_ps) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 { + _mm256_setr_ps(h, g, f, e, d, c, b, a) +} + +/// Sets packed 8-bit integers in returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi8) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set_epi8( + e00: i8, + e01: i8, + e02: i8, + e03: i8, + e04: i8, + e05: i8, + e06: i8, + e07: i8, + e08: i8, + e09: i8, + e10: i8, + e11: i8, + e12: i8, + e13: i8, + e14: i8, + e15: i8, + e16: i8, + e17: i8, + e18: i8, + e19: i8, + e20: i8, + e21: i8, + e22: i8, + e23: i8, + e24: i8, + e25: i8, + e26: i8, + e27: i8, + e28: i8, + e29: i8, + e30: i8, + e31: i8, +) -> __m256i { + #[rustfmt::skip] + _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e09, e08, + e07, e06, e05, e04, e03, e02, e01, e00, + ) +} + +/// Sets packed 16-bit integers in returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi16) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set_epi16( + e00: i16, + e01: i16, + e02: i16, + e03: i16, + e04: i16, + e05: i16, + e06: i16, + e07: i16, + e08: i16, + e09: i16, + e10: i16, + e11: i16, + e12: i16, + e13: i16, + e14: i16, + e15: i16, +) -> __m256i { + #[rustfmt::skip] + _mm256_setr_epi16( + e15, e14, e13, e12, + e11, e10, e09, e08, + e07, e06, e05, e04, + e03, e02, e01, e00, + ) +} + +/// Sets packed 32-bit integers in returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi32) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set_epi32( + e0: i32, + e1: i32, + e2: i32, + e3: i32, + e4: i32, + e5: i32, + e6: i32, + e7: i32, +) -> __m256i { + _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +} + +/// Sets packed 64-bit integers in returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi64x) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { + _mm256_setr_epi64x(d, c, b, a) +} + +/// Sets packed double-precision (64-bit) floating-point elements in returned +/// vector with the supplied values in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_pd) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { + __m256d([a, b, c, d]) +} + +/// Sets packed single-precision (32-bit) floating-point elements in returned +/// vector with the supplied values in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_ps) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 { + __m256([a, b, c, d, e, f, g, h]) +} + +/// Sets packed 8-bit integers in returned vector with the supplied values in +/// reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi8) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setr_epi8( + e00: i8, + e01: i8, + e02: i8, + e03: i8, + e04: i8, + e05: i8, + e06: i8, + e07: i8, + e08: i8, + e09: i8, + e10: i8, + e11: i8, + e12: i8, + e13: i8, + e14: i8, + e15: i8, + e16: i8, + e17: i8, + e18: i8, + e19: i8, + e20: i8, + e21: i8, + e22: i8, + e23: i8, + e24: i8, + e25: i8, + e26: i8, + e27: i8, + e28: i8, + e29: i8, + e30: i8, + e31: i8, +) -> __m256i { + unsafe { + #[rustfmt::skip] + transmute(i8x32::new( + e00, e01, e02, e03, e04, e05, e06, e07, + e08, e09, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31, + )) + } +} + +/// Sets packed 16-bit integers in returned vector with the supplied values in +/// reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi16) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setr_epi16( + e00: i16, + e01: i16, + e02: i16, + e03: i16, + e04: i16, + e05: i16, + e06: i16, + e07: i16, + e08: i16, + e09: i16, + e10: i16, + e11: i16, + e12: i16, + e13: i16, + e14: i16, + e15: i16, +) -> __m256i { + unsafe { + #[rustfmt::skip] + transmute(i16x16::new( + e00, e01, e02, e03, + e04, e05, e06, e07, + e08, e09, e10, e11, + e12, e13, e14, e15, + )) + } +} + +/// Sets packed 32-bit integers in returned vector with the supplied values in +/// reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi32) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setr_epi32( + e0: i32, + e1: i32, + e2: i32, + e3: i32, + e4: i32, + e5: i32, + e6: i32, + e7: i32, +) -> __m256i { + unsafe { transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } +} + +/// Sets packed 64-bit integers in returned vector with the supplied values in +/// reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi64x) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { + unsafe { transmute(i64x4::new(a, b, c, d)) } +} + +/// Broadcasts double-precision (64-bit) floating-point value `a` to all +/// elements of returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_pd) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set1_pd(a: f64) -> __m256d { + _mm256_setr_pd(a, a, a, a) +} + +/// Broadcasts single-precision (32-bit) floating-point value `a` to all +/// elements of returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_ps) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set1_ps(a: f32) -> __m256 { + _mm256_setr_ps(a, a, a, a, a, a, a, a) +} + +/// Broadcasts 8-bit integer `a` to all elements of returned vector. +/// This intrinsic may generate the `vpbroadcastb`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi8) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set1_epi8(a: i8) -> __m256i { + #[rustfmt::skip] + _mm256_setr_epi8( + a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a, + ) +} + +/// Broadcasts 16-bit integer `a` to all elements of returned vector. +/// This intrinsic may generate the `vpbroadcastw`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi16) +#[inline] +#[target_feature(enable = "avx")] +//#[cfg_attr(test, assert_instr(vpshufb))] +#[cfg_attr(test, assert_instr(vinsertf128))] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set1_epi16(a: i16) -> __m256i { + _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) +} + +/// Broadcasts 32-bit integer `a` to all elements of returned vector. +/// This intrinsic may generate the `vpbroadcastd`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi32) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set1_epi32(a: i32) -> __m256i { + _mm256_setr_epi32(a, a, a, a, a, a, a, a) +} + +/// Broadcasts 64-bit integer `a` to all elements of returned vector. +/// This intrinsic may generate the `vpbroadcastq`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi64x) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))] +#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set1_epi64x(a: i64) -> __m256i { + _mm256_setr_epi64x(a, a, a, a) +} + +/// Cast vector of type __m256d to type __m256. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_ps) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castpd_ps(a: __m256d) -> __m256 { + unsafe { transmute(a) } +} + +/// Cast vector of type __m256 to type __m256d. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_pd) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castps_pd(a: __m256) -> __m256d { + unsafe { transmute(a) } +} + +/// Casts vector of type __m256 to type __m256i. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_si256) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castps_si256(a: __m256) -> __m256i { + unsafe { transmute(a) } +} + +/// Casts vector of type __m256i to type __m256. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castsi256_ps(a: __m256i) -> __m256 { + unsafe { transmute(a) } +} + +/// Casts vector of type __m256d to type __m256i. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_si256) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castpd_si256(a: __m256d) -> __m256i { + unsafe { transmute(a) } +} + +/// Casts vector of type __m256i to type __m256d. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_pd) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castsi256_pd(a: __m256i) -> __m256d { + unsafe { transmute(a) } +} + +/// Casts vector of type __m256 to type __m128. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps256_ps128) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castps256_ps128(a: __m256) -> __m128 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +} + +/// Casts vector of type __m256d to type __m128d. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd256_pd128) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { + unsafe { simd_shuffle!(a, a, [0, 1]) } +} + +/// Casts vector of type __m256i to type __m128i. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_si128) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i { + unsafe { + let a = a.as_i64x4(); + let dst: i64x2 = simd_shuffle!(a, a, [0, 1]); + transmute(dst) + } +} + +/// Casts vector of type __m128 to type __m256; +/// the upper 128 bits of the result are undefined. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps128_ps256) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castps128_ps256(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) } +} + +/// Casts vector of type __m128d to type __m256d; +/// the upper 128 bits of the result are undefined. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd128_pd256) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { + unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) } +} + +/// Casts vector of type __m128i to type __m256i; +/// the upper 128 bits of the result are undefined. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi128_si256) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i64x2(); + let undefined = i64x2::ZERO; + let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]); + transmute(dst) + } +} + +/// Constructs a 256-bit floating-point vector of `[8 x float]` from a +/// 128-bit floating-point vector of `[4 x float]`. The lower 128 bits contain +/// the value of the source vector. The upper 128 bits are set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextps128_ps256) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_zextps128_ps256(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) } +} + +/// Constructs a 256-bit integer vector from a 128-bit integer vector. +/// The lower 128 bits contain the value of the source vector. The upper +/// 128 bits are set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextsi128_si256) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { + unsafe { + let b = i64x2::ZERO; + let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]); + transmute(dst) + } +} + +/// Constructs a 256-bit floating-point vector of `[4 x double]` from a +/// 128-bit floating-point vector of `[2 x double]`. The lower 128 bits +/// contain the value of the source vector. The upper 128 bits are set +/// to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextpd128_pd256) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic is only used for compilation and does not generate any +// instructions, thus it has zero latency. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { + unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) } +} + +/// Returns vector of type `__m256` with indeterminate elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_ps) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_undefined_ps() -> __m256 { + const { unsafe { mem::zeroed() } } +} + +/// Returns vector of type `__m256d` with indeterminate elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_pd) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_undefined_pd() -> __m256d { + const { unsafe { mem::zeroed() } } +} + +/// Returns vector of type __m256i with with indeterminate elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_si256) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_undefined_si256() -> __m256i { + const { unsafe { mem::zeroed() } } +} + +/// Sets packed __m256 returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { + unsafe { simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) } +} + +/// Sets packed __m256d returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128d) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { + unsafe { + let hi: __m128 = transmute(hi); + let lo: __m128 = transmute(lo); + transmute(_mm256_set_m128(hi, lo)) + } +} + +/// Sets packed __m256i returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128i) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { + unsafe { + let hi: __m128 = transmute(hi); + let lo: __m128 = transmute(lo); + transmute(_mm256_set_m128(hi, lo)) + } +} + +/// Sets packed __m256 returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { + _mm256_set_m128(hi, lo) +} + +/// Sets packed __m256d returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128d) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { + _mm256_set_m128d(hi, lo) +} + +/// Sets packed __m256i returned vector with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128i) +#[inline] +#[target_feature(enable = "avx")] +#[cfg_attr(test, assert_instr(vinsertf128))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { + _mm256_set_m128i(hi, lo) +} + +/// Loads two 128-bit values (composed of 4 packed single-precision (32-bit) +/// floating-point elements) from memory, and combine them into a 256-bit +/// value. +/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 { + let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr)); + _mm256_insertf128_ps::<1>(a, _mm_loadu_ps(hiaddr)) +} + +/// Loads two 128-bit values (composed of 2 packed double-precision (64-bit) +/// floating-point elements) from memory, and combine them into a 256-bit +/// value. +/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128d) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d { + let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr)); + _mm256_insertf128_pd::<1>(a, _mm_loadu_pd(hiaddr)) +} + +/// Loads two 128-bit values (composed of integer data) from memory, and combine +/// them into a 256-bit value. +/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128i) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i { + let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr)); + _mm256_insertf128_si256::<1>(a, _mm_loadu_si128(hiaddr)) +} + +/// Stores the high and low 128-bit halves (each composed of 4 packed +/// single-precision (32-bit) floating-point elements) from `a` into memory two +/// different 128-bit locations. +/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) { + let lo = _mm256_castps256_ps128(a); + _mm_storeu_ps(loaddr, lo); + let hi = _mm256_extractf128_ps::<1>(a); + _mm_storeu_ps(hiaddr, hi); +} + +/// Stores the high and low 128-bit halves (each composed of 2 packed +/// double-precision (64-bit) floating-point elements) from `a` into memory two +/// different 128-bit locations. +/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128d) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) { + let lo = _mm256_castpd256_pd128(a); + _mm_storeu_pd(loaddr, lo); + let hi = _mm256_extractf128_pd::<1>(a); + _mm_storeu_pd(hiaddr, hi); +} + +/// Stores the high and low 128-bit halves (each composed of integer data) from +/// `a` into memory two different 128-bit locations. +/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128i) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) { + let lo = _mm256_castsi256_si128(a); + _mm_storeu_si128(loaddr, lo); + let hi = _mm256_extractf128_si256::<1>(a); + _mm_storeu_si128(hiaddr, hi); +} + +/// Returns the first element of the input vector of `[8 x float]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtss_f32) +#[inline] +#[target_feature(enable = "avx")] +//#[cfg_attr(test, assert_instr(movss))] FIXME +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtss_f32(a: __m256) -> f32 { + unsafe { simd_extract!(a, 0) } +} + +// LLVM intrinsics used in the above functions +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx.round.pd.256"] + fn roundpd256(a: __m256d, b: i32) -> __m256d; + #[link_name = "llvm.x86.avx.round.ps.256"] + fn roundps256(a: __m256, b: i32) -> __m256; + #[link_name = "llvm.x86.avx.dp.ps.256"] + fn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256; + #[link_name = "llvm.x86.avx.hadd.pd.256"] + fn vhaddpd(a: __m256d, b: __m256d) -> __m256d; + #[link_name = "llvm.x86.avx.hadd.ps.256"] + fn vhaddps(a: __m256, b: __m256) -> __m256; + #[link_name = "llvm.x86.avx.hsub.pd.256"] + fn vhsubpd(a: __m256d, b: __m256d) -> __m256d; + #[link_name = "llvm.x86.avx.hsub.ps.256"] + fn vhsubps(a: __m256, b: __m256) -> __m256; + #[link_name = "llvm.x86.sse2.cmp.pd"] + fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; + #[link_name = "llvm.x86.avx.cmp.pd.256"] + fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d; + #[link_name = "llvm.x86.sse.cmp.ps"] + fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128; + #[link_name = "llvm.x86.avx.cmp.ps.256"] + fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256; + #[link_name = "llvm.x86.sse2.cmp.sd"] + fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; + #[link_name = "llvm.x86.sse.cmp.ss"] + fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128; + #[link_name = "llvm.x86.avx.cvt.ps2dq.256"] + fn vcvtps2dq(a: __m256) -> i32x8; + #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"] + fn vcvttpd2dq(a: __m256d) -> i32x4; + #[link_name = "llvm.x86.avx.cvt.pd2dq.256"] + fn vcvtpd2dq(a: __m256d) -> i32x4; + #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"] + fn vcvttps2dq(a: __m256) -> i32x8; + #[link_name = "llvm.x86.avx.vzeroall"] + fn vzeroall(); + #[link_name = "llvm.x86.avx.vzeroupper"] + fn vzeroupper(); + #[link_name = "llvm.x86.avx.vpermilvar.ps.256"] + fn vpermilps256(a: __m256, b: i32x8) -> __m256; + #[link_name = "llvm.x86.avx.vpermilvar.ps"] + fn vpermilps(a: __m128, b: i32x4) -> __m128; + #[link_name = "llvm.x86.avx.vpermilvar.pd.256"] + fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d; + #[link_name = "llvm.x86.avx.vpermilvar.pd"] + fn vpermilpd(a: __m128d, b: i64x2) -> __m128d; + #[link_name = "llvm.x86.avx.vperm2f128.ps.256"] + fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256; + #[link_name = "llvm.x86.avx.vperm2f128.pd.256"] + fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d; + #[link_name = "llvm.x86.avx.vperm2f128.si.256"] + fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8; + #[link_name = "llvm.x86.avx.maskload.pd.256"] + fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d; + #[link_name = "llvm.x86.avx.maskstore.pd.256"] + fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d); + #[link_name = "llvm.x86.avx.maskload.pd"] + fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d; + #[link_name = "llvm.x86.avx.maskstore.pd"] + fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d); + #[link_name = "llvm.x86.avx.maskload.ps.256"] + fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256; + #[link_name = "llvm.x86.avx.maskstore.ps.256"] + fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256); + #[link_name = "llvm.x86.avx.maskload.ps"] + fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128; + #[link_name = "llvm.x86.avx.maskstore.ps"] + fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128); + #[link_name = "llvm.x86.avx.ldu.dq.256"] + fn vlddqu(mem_addr: *const i8) -> i8x32; + #[link_name = "llvm.x86.avx.rcp.ps.256"] + fn vrcpps(a: __m256) -> __m256; + #[link_name = "llvm.x86.avx.rsqrt.ps.256"] + fn vrsqrtps(a: __m256) -> __m256; + #[link_name = "llvm.x86.avx.ptestz.256"] + fn ptestz256(a: i64x4, b: i64x4) -> i32; + #[link_name = "llvm.x86.avx.ptestc.256"] + fn ptestc256(a: i64x4, b: i64x4) -> i32; + #[link_name = "llvm.x86.avx.ptestnzc.256"] + fn ptestnzc256(a: i64x4, b: i64x4) -> i32; + #[link_name = "llvm.x86.avx.vtestz.pd.256"] + fn vtestzpd256(a: __m256d, b: __m256d) -> i32; + #[link_name = "llvm.x86.avx.vtestc.pd.256"] + fn vtestcpd256(a: __m256d, b: __m256d) -> i32; + #[link_name = "llvm.x86.avx.vtestnzc.pd.256"] + fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32; + #[link_name = "llvm.x86.avx.vtestz.pd"] + fn vtestzpd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.avx.vtestc.pd"] + fn vtestcpd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.avx.vtestnzc.pd"] + fn vtestnzcpd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.avx.vtestz.ps.256"] + fn vtestzps256(a: __m256, b: __m256) -> i32; + #[link_name = "llvm.x86.avx.vtestc.ps.256"] + fn vtestcps256(a: __m256, b: __m256) -> i32; + #[link_name = "llvm.x86.avx.vtestnzc.ps.256"] + fn vtestnzcps256(a: __m256, b: __m256) -> i32; + #[link_name = "llvm.x86.avx.vtestz.ps"] + fn vtestzps(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.avx.vtestc.ps"] + fn vtestcps(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.avx.vtestnzc.ps"] + fn vtestnzcps(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.avx.min.ps.256"] + fn vminps(a: __m256, b: __m256) -> __m256; + #[link_name = "llvm.x86.avx.max.ps.256"] + fn vmaxps(a: __m256, b: __m256) -> __m256; + #[link_name = "llvm.x86.avx.min.pd.256"] + fn vminpd(a: __m256d, b: __m256d) -> __m256d; + #[link_name = "llvm.x86.avx.max.pd.256"] + fn vmaxpd(a: __m256d, b: __m256d) -> __m256d; +} + +#[cfg(test)] +mod tests { + use crate::hint::black_box; + use crate::ptr; + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_add_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_add_pd(a, b); + let e = _mm256_setr_pd(6., 8., 10., 12.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_add_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm256_add_ps(a, b); + let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_and_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(0.6); + let r = _mm256_and_pd(a, b); + let e = _mm256_set1_pd(0.5); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_and_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set1_ps(0.6); + let r = _mm256_and_ps(a, b); + let e = _mm256_set1_ps(0.5); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_or_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(0.6); + let r = _mm256_or_pd(a, b); + let e = _mm256_set1_pd(1.2); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_or_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set1_ps(0.6); + let r = _mm256_or_ps(a, b); + let e = _mm256_set1_ps(1.2); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_shuffle_pd() { + let a = _mm256_setr_pd(1., 4., 5., 8.); + let b = _mm256_setr_pd(2., 3., 6., 7.); + let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b); + let e = _mm256_setr_pd(4., 3., 8., 7.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_shuffle_ps() { + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b); + let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_andnot_pd() { + let a = _mm256_set1_pd(0.); + let b = _mm256_set1_pd(0.6); + let r = _mm256_andnot_pd(a, b); + assert_eq_m256d(r, b); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_andnot_ps() { + let a = _mm256_set1_ps(0.); + let b = _mm256_set1_ps(0.6); + let r = _mm256_andnot_ps(a, b); + assert_eq_m256(r, b); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_max_pd() { + let a = _mm256_setr_pd(1., 4., 5., 8.); + let b = _mm256_setr_pd(2., 3., 6., 7.); + let r = _mm256_max_pd(a, b); + let e = _mm256_setr_pd(2., 4., 6., 8.); + assert_eq_m256d(r, e); + // > If the values being compared are both 0.0s (of either sign), the + // > value in the second operand (source operand) is returned. + let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0)); + let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0)); + let wu: [u64; 4] = transmute(w); + let xu: [u64; 4] = transmute(x); + assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]); + assert_eq!(xu, [0u64; 4]); + // > If only one value is a NaN (SNaN or QNaN) for this instruction, the + // > second operand (source operand), either a NaN or a valid + // > floating-point value, is written to the result. + let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0)); + let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN)); + let yf: [f64; 4] = transmute(y); + let zf: [f64; 4] = transmute(z); + assert_eq!(yf, [0.0; 4]); + assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_max_ps() { + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_max_ps(a, b); + let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.); + assert_eq_m256(r, e); + // > If the values being compared are both 0.0s (of either sign), the + // > value in the second operand (source operand) is returned. + let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0)); + let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0)); + let wu: [u32; 8] = transmute(w); + let xu: [u32; 8] = transmute(x); + assert_eq!(wu, [0x8000_0000u32; 8]); + assert_eq!(xu, [0u32; 8]); + // > If only one value is a NaN (SNaN or QNaN) for this instruction, the + // > second operand (source operand), either a NaN or a valid + // > floating-point value, is written to the result. + let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0)); + let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN)); + let yf: [f32; 8] = transmute(y); + let zf: [f32; 8] = transmute(z); + assert_eq!(yf, [0.0; 8]); + assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_min_pd() { + let a = _mm256_setr_pd(1., 4., 5., 8.); + let b = _mm256_setr_pd(2., 3., 6., 7.); + let r = _mm256_min_pd(a, b); + let e = _mm256_setr_pd(1., 3., 5., 7.); + assert_eq_m256d(r, e); + // > If the values being compared are both 0.0s (of either sign), the + // > value in the second operand (source operand) is returned. + let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0)); + let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0)); + let wu: [u64; 4] = transmute(w); + let xu: [u64; 4] = transmute(x); + assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]); + assert_eq!(xu, [0u64; 4]); + // > If only one value is a NaN (SNaN or QNaN) for this instruction, the + // > second operand (source operand), either a NaN or a valid + // > floating-point value, is written to the result. + let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0)); + let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN)); + let yf: [f64; 4] = transmute(y); + let zf: [f64; 4] = transmute(z); + assert_eq!(yf, [0.0; 4]); + assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_min_ps() { + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_min_ps(a, b); + let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.); + assert_eq_m256(r, e); + // > If the values being compared are both 0.0s (of either sign), the + // > value in the second operand (source operand) is returned. + let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0)); + let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0)); + let wu: [u32; 8] = transmute(w); + let xu: [u32; 8] = transmute(x); + assert_eq!(wu, [0x8000_0000u32; 8]); + assert_eq!(xu, [0u32; 8]); + // > If only one value is a NaN (SNaN or QNaN) for this instruction, the + // > second operand (source operand), either a NaN or a valid + // > floating-point value, is written to the result. + let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0)); + let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN)); + let yf: [f32; 8] = transmute(y); + let zf: [f32; 8] = transmute(z); + assert_eq!(yf, [0.0; 8]); + assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_mul_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_mul_pd(a, b); + let e = _mm256_setr_pd(5., 12., 21., 32.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_mul_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm256_mul_ps(a, b); + let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_addsub_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_addsub_pd(a, b); + let e = _mm256_setr_pd(-4., 8., -4., 12.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_addsub_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_addsub_ps(a, b); + let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_sub_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_sub_pd(a, b); + let e = _mm256_setr_pd(-4., -4., -4., -4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_sub_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.); + let r = _mm256_sub_ps(a, b); + let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_round_pd() { + let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2); + let result_closest = _mm256_round_pd::<0b0000>(a); + let result_down = _mm256_round_pd::<0b0001>(a); + let result_up = _mm256_round_pd::<0b0010>(a); + let expected_closest = _mm256_setr_pd(2., 2., 4., -1.); + let expected_down = _mm256_setr_pd(1., 2., 3., -2.); + let expected_up = _mm256_setr_pd(2., 3., 4., -1.); + assert_eq_m256d(result_closest, expected_closest); + assert_eq_m256d(result_down, expected_down); + assert_eq_m256d(result_up, expected_up); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_floor_pd() { + let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2); + let result_down = _mm256_floor_pd(a); + let expected_down = _mm256_setr_pd(1., 2., 3., -2.); + assert_eq_m256d(result_down, expected_down); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_ceil_pd() { + let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2); + let result_up = _mm256_ceil_pd(a); + let expected_up = _mm256_setr_pd(2., 3., 4., -1.); + assert_eq_m256d(result_up, expected_up); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_round_ps() { + let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); + let result_closest = _mm256_round_ps::<0b0000>(a); + let result_down = _mm256_round_ps::<0b0001>(a); + let result_up = _mm256_round_ps::<0b0010>(a); + let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.); + let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.); + let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.); + assert_eq_m256(result_closest, expected_closest); + assert_eq_m256(result_down, expected_down); + assert_eq_m256(result_up, expected_up); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_floor_ps() { + let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); + let result_down = _mm256_floor_ps(a); + let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.); + assert_eq_m256(result_down, expected_down); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_ceil_ps() { + let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); + let result_up = _mm256_ceil_ps(a); + let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.); + assert_eq_m256(result_up, expected_up); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_sqrt_pd() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_sqrt_pd(a); + let e = _mm256_setr_pd(2., 3., 4., 5.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_sqrt_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let r = _mm256_sqrt_ps(a); + let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_div_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_div_ps(a, b); + let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_div_pd() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_div_pd(a, b); + let e = _mm256_setr_pd(1., 3., 8., 5.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_blend_pd() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_blend_pd::<0x0>(a, b); + assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.)); + let r = _mm256_blend_pd::<0x3>(a, b); + assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.)); + let r = _mm256_blend_pd::<0xF>(a, b); + assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_blend_ps() { + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_blend_ps::<0x0>(a, b); + assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.)); + let r = _mm256_blend_ps::<0x3>(a, b); + assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.)); + let r = _mm256_blend_ps::<0xF>(a, b); + assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_blendv_pd() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64); + let r = _mm256_blendv_pd(a, b, c); + let e = _mm256_setr_pd(4., 9., 2., 5.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_blendv_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + #[rustfmt::skip] + let c = _mm256_setr_ps( + 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32, + ); + let r = _mm256_blendv_ps(a, b, c); + let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_dp_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_dp_ps::<0xFF>(a, b); + let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_hadd_pd() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_hadd_pd(a, b); + let e = _mm256_setr_pd(13., 7., 41., 7.); + assert_eq_m256d(r, e); + + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_hadd_pd(a, b); + let e = _mm256_setr_pd(3., 11., 7., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_hadd_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_hadd_ps(a, b); + let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.); + assert_eq_m256(r, e); + + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_hadd_ps(a, b); + let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_hsub_pd() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_hsub_pd(a, b); + let e = _mm256_setr_pd(-5., 1., -9., -3.); + assert_eq_m256d(r, e); + + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_hsub_pd(a, b); + let e = _mm256_setr_pd(-1., -1., -1., -1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_hsub_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_hsub_ps(a, b); + let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.); + assert_eq_m256(r, e); + + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_hsub_ps(a, b); + let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_xor_pd() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_set1_pd(0.); + let r = _mm256_xor_pd(a, b); + assert_eq_m256d(r, a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_xor_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_set1_ps(0.); + let r = _mm256_xor_ps(a, b); + assert_eq_m256(r, a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_cmp_pd() { + let a = _mm_setr_pd(4., 9.); + let b = _mm_setr_pd(4., 3.); + let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b); + assert!(get_m128d(r, 0).is_nan()); + assert!(get_m128d(r, 1).is_nan()); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cmp_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b); + let e = _mm256_set1_pd(0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_cmp_ps() { + let a = _mm_setr_ps(4., 3., 2., 5.); + let b = _mm_setr_ps(4., 9., 16., 25.); + let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b); + assert!(get_m128(r, 0).is_nan()); + assert_eq!(get_m128(r, 1), 0.); + assert_eq!(get_m128(r, 2), 0.); + assert_eq!(get_m128(r, 3), 0.); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cmp_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b); + let e = _mm256_set1_ps(0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_cmp_sd() { + let a = _mm_setr_pd(4., 9.); + let b = _mm_setr_pd(4., 3.); + let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b); + assert!(get_m128d(r, 0).is_nan()); + assert_eq!(get_m128d(r, 1), 9.); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_cmp_ss() { + let a = _mm_setr_ps(4., 3., 2., 5.); + let b = _mm_setr_ps(4., 9., 16., 25.); + let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b); + assert!(get_m128(r, 0).is_nan()); + assert_eq!(get_m128(r, 1), 3.); + assert_eq!(get_m128(r, 2), 2.); + assert_eq!(get_m128(r, 3), 5.); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtepi32_pd() { + let a = _mm_setr_epi32(4, 9, 16, 25); + let r = _mm256_cvtepi32_pd(a); + let e = _mm256_setr_pd(4., 9., 16., 25.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtepi32_ps() { + let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); + let r = _mm256_cvtepi32_ps(a); + let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtpd_ps() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_cvtpd_ps(a); + let e = _mm_setr_ps(4., 9., 16., 25.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtps_epi32() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let r = _mm256_cvtps_epi32(a); + let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtps_pd() { + let a = _mm_setr_ps(4., 9., 16., 25.); + let r = _mm256_cvtps_pd(a); + let e = _mm256_setr_pd(4., 9., 16., 25.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtsd_f64() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let r = _mm256_cvtsd_f64(a); + assert_eq!(r, 1.); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvttpd_epi32() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_cvttpd_epi32(a); + let e = _mm_setr_epi32(4, 9, 16, 25); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtpd_epi32() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_cvtpd_epi32(a); + let e = _mm_setr_epi32(4, 9, 16, 25); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvttps_epi32() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let r = _mm256_cvttps_epi32(a); + let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_extractf128_ps() { + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_extractf128_ps::<0>(a); + let e = _mm_setr_ps(4., 3., 2., 5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_extractf128_pd() { + let a = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_extractf128_pd::<0>(a); + let e = _mm_setr_pd(4., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_extractf128_si256() { + let a = _mm256_setr_epi64x(4, 3, 2, 5); + let r = _mm256_extractf128_si256::<0>(a); + let e = _mm_setr_epi64x(4, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_extract_epi32() { + let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7); + let r1 = _mm256_extract_epi32::<0>(a); + let r2 = _mm256_extract_epi32::<3>(a); + assert_eq!(r1, -1); + assert_eq!(r2, 3); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtsi256_si32() { + let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_cvtsi256_si32(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx")] + #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri + unsafe fn test_mm256_zeroall() { + _mm256_zeroall(); + } + + #[simd_test(enable = "avx")] + #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri + unsafe fn test_mm256_zeroupper() { + _mm256_zeroupper(); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_permutevar_ps() { + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_permutevar_ps(a, b); + let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_permutevar_ps() { + let a = _mm_setr_ps(4., 3., 2., 5.); + let b = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm_permutevar_ps(a, b); + let e = _mm_setr_ps(3., 2., 5., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_permute_ps() { + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_permute_ps::<0x1b>(a); + let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_permute_ps() { + let a = _mm_setr_ps(4., 3., 2., 5.); + let r = _mm_permute_ps::<0x1b>(a); + let e = _mm_setr_ps(5., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_permutevar_pd() { + let a = _mm256_setr_pd(4., 3., 2., 5.); + let b = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_permutevar_pd(a, b); + let e = _mm256_setr_pd(4., 3., 5., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_permutevar_pd() { + let a = _mm_setr_pd(4., 3.); + let b = _mm_setr_epi64x(3, 0); + let r = _mm_permutevar_pd(a, b); + let e = _mm_setr_pd(3., 4.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_permute_pd() { + let a = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_permute_pd::<5>(a); + let e = _mm256_setr_pd(3., 4., 5., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_permute_pd() { + let a = _mm_setr_pd(4., 3.); + let r = _mm_permute_pd::<1>(a); + let e = _mm_setr_pd(3., 4.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_permute2f128_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_permute2f128_ps::<0x13>(a, b); + let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_permute2f128_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_permute2f128_pd::<0x31>(a, b); + let e = _mm256_setr_pd(3., 4., 7., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_permute2f128_si256() { + let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4); + let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8); + let r = _mm256_permute2f128_si256::<0x20>(a, b); + let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_broadcast_ss() { + let r = _mm256_broadcast_ss(&3.); + let e = _mm256_set1_ps(3.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_broadcast_ss() { + let r = _mm_broadcast_ss(&3.); + let e = _mm_set1_ps(3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_broadcast_sd() { + let r = _mm256_broadcast_sd(&3.); + let e = _mm256_set1_pd(3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_broadcast_ps() { + let a = _mm_setr_ps(4., 3., 2., 5.); + let r = _mm256_broadcast_ps(&a); + let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_broadcast_pd() { + let a = _mm_setr_pd(4., 3.); + let r = _mm256_broadcast_pd(&a); + let e = _mm256_setr_pd(4., 3., 4., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_insertf128_ps() { + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let b = _mm_setr_ps(4., 9., 16., 25.); + let r = _mm256_insertf128_ps::<0>(a, b); + let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_insertf128_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm_setr_pd(5., 6.); + let r = _mm256_insertf128_pd::<0>(a, b); + let e = _mm256_setr_pd(5., 6., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_insertf128_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm_setr_epi64x(5, 6); + let r = _mm256_insertf128_si256::<0>(a, b); + let e = _mm256_setr_epi64x(5, 6, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_insert_epi8() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm256_insert_epi8::<31>(a, 0); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 0, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_insert_epi16() { + #[rustfmt::skip] + let a = _mm256_setr_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let r = _mm256_insert_epi16::<15>(a, 0); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 0, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_insert_epi32() { + let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_insert_epi32::<7>(a, 0); + let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_load_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let p = ptr::addr_of!(a) as *const f64; + let r = _mm256_load_pd(p); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_store_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let mut r = _mm256_undefined_pd(); + _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a); + assert_eq_m256d(r, a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_load_ps() { + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let p = ptr::addr_of!(a) as *const f32; + let r = _mm256_load_ps(p); + let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_store_ps() { + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let mut r = _mm256_undefined_ps(); + _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a); + assert_eq_m256(r, a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_loadu_pd() { + let a = &[1.0f64, 2., 3., 4.]; + let p = a.as_ptr(); + let r = _mm256_loadu_pd(black_box(p)); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_storeu_pd() { + let a = _mm256_set1_pd(9.); + let mut r = _mm256_undefined_pd(); + _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a); + assert_eq_m256d(r, a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_loadu_ps() { + let a = &[4., 3., 2., 5., 8., 9., 64., 50.]; + let p = a.as_ptr(); + let r = _mm256_loadu_ps(black_box(p)); + let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_storeu_ps() { + let a = _mm256_set1_ps(9.); + let mut r = _mm256_undefined_ps(); + _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a); + assert_eq_m256(r, a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_load_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let p = ptr::addr_of!(a); + let r = _mm256_load_si256(p); + let e = _mm256_setr_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_store_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let mut r = _mm256_undefined_si256(); + _mm256_store_si256(ptr::addr_of_mut!(r), a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_loadu_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let p = ptr::addr_of!(a); + let r = _mm256_loadu_si256(black_box(p)); + let e = _mm256_setr_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_storeu_si256() { + let a = _mm256_set1_epi8(9); + let mut r = _mm256_undefined_si256(); + _mm256_storeu_si256(ptr::addr_of_mut!(r), a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_maskload_pd() { + let a = &[1.0f64, 2., 3., 4.]; + let p = a.as_ptr(); + let mask = _mm256_setr_epi64x(0, !0, 0, !0); + let r = _mm256_maskload_pd(black_box(p), mask); + let e = _mm256_setr_pd(0., 2., 0., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_maskstore_pd() { + let mut r = _mm256_set1_pd(0.); + let mask = _mm256_setr_epi64x(0, !0, 0, !0); + let a = _mm256_setr_pd(1., 2., 3., 4.); + _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a); + let e = _mm256_setr_pd(0., 2., 0., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_maskload_pd() { + let a = &[1.0f64, 2.]; + let p = a.as_ptr(); + let mask = _mm_setr_epi64x(0, !0); + let r = _mm_maskload_pd(black_box(p), mask); + let e = _mm_setr_pd(0., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_maskstore_pd() { + let mut r = _mm_set1_pd(0.); + let mask = _mm_setr_epi64x(0, !0); + let a = _mm_setr_pd(1., 2.); + _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a); + let e = _mm_setr_pd(0., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_maskload_ps() { + let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.]; + let p = a.as_ptr(); + let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); + let r = _mm256_maskload_ps(black_box(p), mask); + let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_maskstore_ps() { + let mut r = _mm256_set1_ps(0.); + let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a); + let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_maskload_ps() { + let a = &[1.0f32, 2., 3., 4.]; + let p = a.as_ptr(); + let mask = _mm_setr_epi32(0, !0, 0, !0); + let r = _mm_maskload_ps(black_box(p), mask); + let e = _mm_setr_ps(0., 2., 0., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_maskstore_ps() { + let mut r = _mm_set1_ps(0.); + let mask = _mm_setr_epi32(0, !0, 0, !0); + let a = _mm_setr_ps(1., 2., 3., 4.); + _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a); + let e = _mm_setr_ps(0., 2., 0., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_movehdup_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_movehdup_ps(a); + let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_moveldup_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_moveldup_ps(a); + let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_movedup_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let r = _mm256_movedup_pd(a); + let e = _mm256_setr_pd(1., 1., 3., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_lddqu_si256() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let p = ptr::addr_of!(a); + let r = _mm256_lddqu_si256(black_box(p)); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri + unsafe fn test_mm256_stream_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let mut r = _mm256_undefined_si256(); + _mm256_stream_si256(ptr::addr_of_mut!(r), a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx")] + #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri + unsafe fn test_mm256_stream_pd() { + #[repr(align(32))] + struct Memory { + pub data: [f64; 4], + } + let a = _mm256_set1_pd(7.0); + let mut mem = Memory { data: [-1.0; 4] }; + + _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a); + for i in 0..4 { + assert_eq!(mem.data[i], get_m256d(a, i)); + } + } + + #[simd_test(enable = "avx")] + #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri + unsafe fn test_mm256_stream_ps() { + #[repr(align(32))] + struct Memory { + pub data: [f32; 8], + } + let a = _mm256_set1_ps(7.0); + let mut mem = Memory { data: [-1.0; 8] }; + + _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a); + for i in 0..8 { + assert_eq!(mem.data[i], get_m256(a, i)); + } + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_rcp_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_rcp_ps(a); + #[rustfmt::skip] + let e = _mm256_setr_ps( + 0.99975586, 0.49987793, 0.33325195, 0.24993896, + 0.19995117, 0.16662598, 0.14282227, 0.12496948, + ); + let rel_err = 0.00048828125; + for i in 0..8 { + assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err); + } + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_rsqrt_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_rsqrt_ps(a); + #[rustfmt::skip] + let e = _mm256_setr_ps( + 0.99975586, 0.7069092, 0.5772705, 0.49987793, + 0.44714355, 0.40820313, 0.3779297, 0.3534546, + ); + let rel_err = 0.00048828125; + for i in 0..8 { + assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err); + } + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_unpackhi_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_unpackhi_pd(a, b); + let e = _mm256_setr_pd(2., 6., 4., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_unpackhi_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm256_unpackhi_ps(a, b); + let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_unpacklo_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_unpacklo_pd(a, b); + let e = _mm256_setr_pd(1., 5., 3., 7.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_unpacklo_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm256_unpacklo_ps(a, b); + let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_testz_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm256_setr_epi64x(5, 6, 7, 8); + let r = _mm256_testz_si256(a, b); + assert_eq!(r, 0); + let b = _mm256_set1_epi64x(0); + let r = _mm256_testz_si256(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_testc_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm256_setr_epi64x(5, 6, 7, 8); + let r = _mm256_testc_si256(a, b); + assert_eq!(r, 0); + let b = _mm256_set1_epi64x(0); + let r = _mm256_testc_si256(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_testnzc_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm256_setr_epi64x(5, 6, 7, 8); + let r = _mm256_testnzc_si256(a, b); + assert_eq!(r, 1); + let a = _mm256_setr_epi64x(0, 0, 0, 0); + let b = _mm256_setr_epi64x(0, 0, 0, 0); + let r = _mm256_testnzc_si256(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_testz_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_testz_pd(a, b); + assert_eq!(r, 1); + let a = _mm256_set1_pd(-1.); + let r = _mm256_testz_pd(a, a); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_testc_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_testc_pd(a, b); + assert_eq!(r, 1); + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(-1.); + let r = _mm256_testc_pd(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_testnzc_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_testnzc_pd(a, b); + assert_eq!(r, 0); + let a = _mm256_setr_pd(1., -1., -1., -1.); + let b = _mm256_setr_pd(-1., -1., 1., 1.); + let r = _mm256_testnzc_pd(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_testz_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 6.); + let r = _mm_testz_pd(a, b); + assert_eq!(r, 1); + let a = _mm_set1_pd(-1.); + let r = _mm_testz_pd(a, a); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_testc_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 6.); + let r = _mm_testc_pd(a, b); + assert_eq!(r, 1); + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(-1.); + let r = _mm_testc_pd(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_testnzc_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 6.); + let r = _mm_testnzc_pd(a, b); + assert_eq!(r, 0); + let a = _mm_setr_pd(1., -1.); + let b = _mm_setr_pd(-1., -1.); + let r = _mm_testnzc_pd(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_testz_ps() { + let a = _mm256_set1_ps(1.); + let r = _mm256_testz_ps(a, a); + assert_eq!(r, 1); + let a = _mm256_set1_ps(-1.); + let r = _mm256_testz_ps(a, a); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_testc_ps() { + let a = _mm256_set1_ps(1.); + let r = _mm256_testc_ps(a, a); + assert_eq!(r, 1); + let b = _mm256_set1_ps(-1.); + let r = _mm256_testc_ps(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_testnzc_ps() { + let a = _mm256_set1_ps(1.); + let r = _mm256_testnzc_ps(a, a); + assert_eq!(r, 0); + let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.); + let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.); + let r = _mm256_testnzc_ps(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_testz_ps() { + let a = _mm_set1_ps(1.); + let r = _mm_testz_ps(a, a); + assert_eq!(r, 1); + let a = _mm_set1_ps(-1.); + let r = _mm_testz_ps(a, a); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_testc_ps() { + let a = _mm_set1_ps(1.); + let r = _mm_testc_ps(a, a); + assert_eq!(r, 1); + let b = _mm_set1_ps(-1.); + let r = _mm_testc_ps(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm_testnzc_ps() { + let a = _mm_set1_ps(1.); + let r = _mm_testnzc_ps(a, a); + assert_eq!(r, 0); + let a = _mm_setr_ps(1., -1., -1., -1.); + let b = _mm_setr_ps(-1., -1., 1., 1.); + let r = _mm_testnzc_ps(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_movemask_pd() { + let a = _mm256_setr_pd(1., -2., 3., -4.); + let r = _mm256_movemask_pd(a); + assert_eq!(r, 0xA); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_movemask_ps() { + let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.); + let r = _mm256_movemask_ps(a); + assert_eq!(r, 0xAA); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setzero_pd() { + let r = _mm256_setzero_pd(); + assert_eq_m256d(r, _mm256_set1_pd(0.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setzero_ps() { + let r = _mm256_setzero_ps(); + assert_eq_m256(r, _mm256_set1_ps(0.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setzero_si256() { + let r = _mm256_setzero_si256(); + assert_eq_m256i(r, _mm256_set1_epi8(0)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set_pd() { + let r = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set_ps() { + let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set_epi8() { + #[rustfmt::skip] + let r = _mm256_set_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 32, 31, 30, 29, 28, 27, 26, 25, + 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1 + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set_epi16() { + #[rustfmt::skip] + let r = _mm256_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 16, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set_epi32() { + let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set_epi64x() { + let r = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setr_pd() { + let r = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setr_ps() { + let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setr_epi8() { + #[rustfmt::skip] + let r = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32 + ); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setr_epi16() { + #[rustfmt::skip] + let r = _mm256_setr_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setr_epi32() { + let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setr_epi64x() { + let r = _mm256_setr_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set1_pd() { + let r = _mm256_set1_pd(1.); + assert_eq_m256d(r, _mm256_set1_pd(1.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set1_ps() { + let r = _mm256_set1_ps(1.); + assert_eq_m256(r, _mm256_set1_ps(1.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set1_epi8() { + let r = _mm256_set1_epi8(1); + assert_eq_m256i(r, _mm256_set1_epi8(1)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set1_epi16() { + let r = _mm256_set1_epi16(1); + assert_eq_m256i(r, _mm256_set1_epi16(1)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set1_epi32() { + let r = _mm256_set1_epi32(1); + assert_eq_m256i(r, _mm256_set1_epi32(1)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set1_epi64x() { + let r = _mm256_set1_epi64x(1); + assert_eq_m256i(r, _mm256_set1_epi64x(1)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castpd_ps() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let r = _mm256_castpd_ps(a); + let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castps_pd() { + let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); + let r = _mm256_castps_pd(a); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castps_si256() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_castps_si256(a); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 0, 0, -128, 63, 0, 0, 0, 64, + 0, 0, 64, 64, 0, 0, -128, 64, + 0, 0, -96, 64, 0, 0, -64, 64, + 0, 0, -32, 64, 0, 0, 0, 65, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castsi256_ps() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 0, 0, -128, 63, 0, 0, 0, 64, + 0, 0, 64, 64, 0, 0, -128, 64, + 0, 0, -96, 64, 0, 0, -64, 64, + 0, 0, -32, 64, 0, 0, 0, 65, + ); + let r = _mm256_castsi256_ps(a); + let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castpd_si256() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let r = _mm256_castpd_si256(a); + assert_eq_m256d(transmute(r), a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castsi256_pd() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_castsi256_pd(a); + assert_eq_m256d(r, transmute(a)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castps256_ps128() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_castps256_ps128(a); + assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castpd256_pd128() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let r = _mm256_castpd256_pd128(a); + assert_eq_m128d(r, _mm_setr_pd(1., 2.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castsi256_si128() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_castsi256_si128(a); + assert_eq_m128i(r, _mm_setr_epi64x(1, 2)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castps128_ps256() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm256_castps128_ps256(a); + assert_eq_m128(_mm256_castps256_ps128(r), a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castpd128_pd256() { + let a = _mm_setr_pd(1., 2.); + let r = _mm256_castpd128_pd256(a); + assert_eq_m128d(_mm256_castpd256_pd128(r), a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castsi128_si256() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm256_castsi128_si256(a); + assert_eq_m128i(_mm256_castsi256_si128(r), a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_zextps128_ps256() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm256_zextps128_ps256(a); + let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_zextsi128_si256() { + let a = _mm_setr_epi64x(1, 2); + let r = _mm256_zextsi128_si256(a); + let e = _mm256_setr_epi64x(1, 2, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_zextpd128_pd256() { + let a = _mm_setr_pd(1., 2.); + let r = _mm256_zextpd128_pd256(a); + let e = _mm256_setr_pd(1., 2., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set_m128() { + let hi = _mm_setr_ps(5., 6., 7., 8.); + let lo = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm256_set_m128(hi, lo); + let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set_m128d() { + let hi = _mm_setr_pd(3., 4.); + let lo = _mm_setr_pd(1., 2.); + let r = _mm256_set_m128d(hi, lo); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_set_m128i() { + #[rustfmt::skip] + let hi = _mm_setr_epi8( + 17, 18, 19, 20, + 21, 22, 23, 24, + 25, 26, 27, 28, + 29, 30, 31, 32, + ); + #[rustfmt::skip] + let lo = _mm_setr_epi8( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16, + ); + let r = _mm256_set_m128i(hi, lo); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setr_m128() { + let lo = _mm_setr_ps(1., 2., 3., 4.); + let hi = _mm_setr_ps(5., 6., 7., 8.); + let r = _mm256_setr_m128(lo, hi); + let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setr_m128d() { + let lo = _mm_setr_pd(1., 2.); + let hi = _mm_setr_pd(3., 4.); + let r = _mm256_setr_m128d(lo, hi); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_setr_m128i() { + #[rustfmt::skip] + let lo = _mm_setr_epi8( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16, + ); + #[rustfmt::skip] + let hi = _mm_setr_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm256_setr_m128i(lo, hi); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_loadu2_m128() { + let hi = &[5., 6., 7., 8.]; + let hiaddr = hi.as_ptr(); + let lo = &[1., 2., 3., 4.]; + let loaddr = lo.as_ptr(); + let r = _mm256_loadu2_m128(hiaddr, loaddr); + let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_loadu2_m128d() { + let hi = &[3., 4.]; + let hiaddr = hi.as_ptr(); + let lo = &[1., 2.]; + let loaddr = lo.as_ptr(); + let r = _mm256_loadu2_m128d(hiaddr, loaddr); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_loadu2_m128i() { + #[rustfmt::skip] + let hi = _mm_setr_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + #[rustfmt::skip] + let lo = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_storeu2_m128() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let mut hi = _mm_undefined_ps(); + let mut lo = _mm_undefined_ps(); + _mm256_storeu2_m128( + ptr::addr_of_mut!(hi) as *mut f32, + ptr::addr_of_mut!(lo) as *mut f32, + a, + ); + assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.)); + assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_storeu2_m128d() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let mut hi = _mm_undefined_pd(); + let mut lo = _mm_undefined_pd(); + _mm256_storeu2_m128d( + ptr::addr_of_mut!(hi) as *mut f64, + ptr::addr_of_mut!(lo) as *mut f64, + a, + ); + assert_eq_m128d(hi, _mm_setr_pd(3., 4.)); + assert_eq_m128d(lo, _mm_setr_pd(1., 2.)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_storeu2_m128i() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let mut hi = _mm_undefined_si128(); + let mut lo = _mm_undefined_si128(); + _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a); + #[rustfmt::skip] + let e_hi = _mm_setr_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32 + ); + #[rustfmt::skip] + let e_lo = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 + ); + + assert_eq_m128i(hi, e_hi); + assert_eq_m128i(lo, e_lo); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtss_f32() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_cvtss_f32(a); + assert_eq!(r, 1.); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs new file mode 100644 index 000000000000..739de2b34126 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs @@ -0,0 +1,5724 @@ +//! Advanced Vector Extensions 2 (AVX) +//! +//! AVX2 expands most AVX commands to 256-bit wide vector registers and +//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate). +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +//! Instruction Set Reference, A-Z][intel64_ref]. +//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +//! System Instructions][amd64_ref]. +//! +//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick +//! overview of the instructions available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions +//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate + +use crate::core_arch::{simd::*, x86::*}; +use crate::intrinsics::simd::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Computes the absolute values of packed 32-bit integers in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpabsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_abs_epi32(a: __m256i) -> __m256i { + unsafe { + let a = a.as_i32x8(); + let r = simd_select::(simd_lt(a, i32x8::ZERO), simd_neg(a), a); + transmute(r) + } +} + +/// Computes the absolute values of packed 16-bit integers in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpabsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_abs_epi16(a: __m256i) -> __m256i { + unsafe { + let a = a.as_i16x16(); + let r = simd_select::(simd_lt(a, i16x16::ZERO), simd_neg(a), a); + transmute(r) + } +} + +/// Computes the absolute values of packed 8-bit integers in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpabsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_abs_epi8(a: __m256i) -> __m256i { + unsafe { + let a = a.as_i8x32(); + let r = simd_select::(simd_lt(a, i8x32::ZERO), simd_neg(a), a); + transmute(r) + } +} + +/// Adds packed 64-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpaddq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) } +} + +/// Adds packed 32-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpaddd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) } +} + +/// Adds packed 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpaddw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) } +} + +/// Adds packed 8-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpaddb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) } +} + +/// Adds packed 8-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpaddsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) } +} + +/// Adds packed 16-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpaddsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) } +} + +/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpaddusb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) } +} + +/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpaddusw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) } +} + +/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary +/// result, shifts the result right by `n` bytes, and returns the low 16 bytes. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + + // If palignr is shifting the pair of vectors more than the size of two + // lanes, emit zero. + if IMM8 >= 32 { + return _mm256_setzero_si256(); + } + // If palignr is shifting the pair of input vectors more than one lane, + // but less than two lanes, convert to shifting in zeroes. + let (a, b) = if IMM8 > 16 { + (_mm256_setzero_si256(), a) + } else { + (a, b) + }; + unsafe { + if IMM8 == 16 { + return transmute(a); + } + } + const fn mask(shift: u32, i: u32) -> u32 { + let shift = shift % 16; + let mod_i = i % 16; + if mod_i < (16 - shift) { + i + shift + } else { + i + 16 + shift + } + } + + unsafe { + let r: i8x32 = simd_shuffle!( + b.as_i8x32(), + a.as_i8x32(), + [ + mask(IMM8 as u32, 0), + mask(IMM8 as u32, 1), + mask(IMM8 as u32, 2), + mask(IMM8 as u32, 3), + mask(IMM8 as u32, 4), + mask(IMM8 as u32, 5), + mask(IMM8 as u32, 6), + mask(IMM8 as u32, 7), + mask(IMM8 as u32, 8), + mask(IMM8 as u32, 9), + mask(IMM8 as u32, 10), + mask(IMM8 as u32, 11), + mask(IMM8 as u32, 12), + mask(IMM8 as u32, 13), + mask(IMM8 as u32, 14), + mask(IMM8 as u32, 15), + mask(IMM8 as u32, 16), + mask(IMM8 as u32, 17), + mask(IMM8 as u32, 18), + mask(IMM8 as u32, 19), + mask(IMM8 as u32, 20), + mask(IMM8 as u32, 21), + mask(IMM8 as u32, 22), + mask(IMM8 as u32, 23), + mask(IMM8 as u32, 24), + mask(IMM8 as u32, 25), + mask(IMM8 as u32, 26), + mask(IMM8 as u32, 27), + mask(IMM8 as u32, 28), + mask(IMM8 as u32, 29), + mask(IMM8 as u32, 30), + mask(IMM8 as u32, 31), + ], + ); + transmute(r) + } +} + +/// Computes the bitwise AND of 256 bits (representing integer data) +/// in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) } +} + +/// Computes the bitwise NOT of 256 bits (representing integer data) +/// in `a` and then AND with `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let all_ones = _mm256_set1_epi8(-1); + transmute(simd_and( + simd_xor(a.as_i64x4(), all_ones.as_i64x4()), + b.as_i64x4(), + )) + } +} + +/// Averages packed unsigned 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpavgw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, u32x16>(a.as_u16x16()); + let b = simd_cast::<_, u32x16>(b.as_u16x16()); + let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1)); + transmute(simd_cast::<_, u16x16>(r)) + } +} + +/// Averages packed unsigned 8-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpavgb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, u16x32>(a.as_u8x32()); + let b = simd_cast::<_, u16x32>(b.as_u8x32()); + let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1)); + transmute(simd_cast::<_, u8x32>(r)) + } +} + +/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_blend_epi32(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM4, 4); + unsafe { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let r: i32x4 = simd_shuffle!( + a, + b, + [ + [0, 4, 0, 4][IMM4 as usize & 0b11], + [1, 1, 5, 5][IMM4 as usize & 0b11], + [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11], + [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11], + ], + ); + transmute(r) + } +} + +/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_blend_epi32(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let r: i32x8 = simd_shuffle!( + a, + b, + [ + [0, 8, 0, 8][IMM8 as usize & 0b11], + [1, 1, 9, 9][IMM8 as usize & 0b11], + [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11], + [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11], + [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11], + [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11], + [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11], + [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11], + ], + ); + transmute(r) + } +} + +/// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_blend_epi16(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + let a = a.as_i16x16(); + let b = b.as_i16x16(); + + let r: i16x16 = simd_shuffle!( + a, + b, + [ + [0, 16, 0, 16][IMM8 as usize & 0b11], + [1, 1, 17, 17][IMM8 as usize & 0b11], + [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11], + [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11], + [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11], + [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11], + [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11], + [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11], + [8, 24, 8, 24][IMM8 as usize & 0b11], + [9, 9, 25, 25][IMM8 as usize & 0b11], + [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11], + [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11], + [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11], + [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11], + [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11], + [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11], + ], + ); + transmute(r) + } +} + +/// Blends packed 8-bit integers from `a` and `b` using `mask`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpblendvb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i { + unsafe { + let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO); + transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32())) + } +} + +/// Broadcasts the low packed 8-bit integer from `a` to all elements of +/// the 128-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastb_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpbroadcastb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { + unsafe { + let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]); + transmute::(ret) + } +} + +/// Broadcasts the low packed 8-bit integer from `a` to all elements of +/// the 256-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastb_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpbroadcastb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]); + transmute::(ret) + } +} + +// N.B., `simd_shuffle4` with integer data types for `a` and `b` is +// often compiled to `vbroadcastss`. +/// Broadcasts the low packed 32-bit integer from `a` to all elements of +/// the 128-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastd_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { + unsafe { + let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]); + transmute::(ret) + } +} + +// N.B., `simd_shuffle4`` with integer data types for `a` and `b` is +// often compiled to `vbroadcastss`. +/// Broadcasts the low packed 32-bit integer from `a` to all elements of +/// the 256-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastd_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]); + transmute::(ret) + } +} + +/// Broadcasts the low packed 64-bit integer from `a` to all elements of +/// the 128-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastq_epi64) +#[inline] +#[target_feature(enable = "avx2")] +// Emits `vmovddup` instead of `vpbroadcastq` +// See https://github.com/rust-lang/stdarch/issues/791 +#[cfg_attr(test, assert_instr(vmovddup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { + unsafe { + let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]); + transmute::(ret) + } +} + +/// Broadcasts the low packed 64-bit integer from `a` to all elements of +/// the 256-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastq_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vbroadcastsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]); + transmute::(ret) + } +} + +/// Broadcasts the low double-precision (64-bit) floating-point element +/// from `a` to all elements of the 128-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsd_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vmovddup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { + unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) } +} + +/// Broadcasts the low double-precision (64-bit) floating-point element +/// from `a` to all elements of the 256-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsd_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vbroadcastsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { + unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) } +} + +/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in +/// the 256-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsi128_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]); + transmute::(ret) + } +} + +// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or +// `vbroadcastf128`. +/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in +/// the 256-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsi128_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]); + transmute::(ret) + } +} + +/// Broadcasts the low single-precision (32-bit) floating-point element +/// from `a` to all elements of the 128-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastss_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_broadcastss_ps(a: __m128) -> __m128 { + unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) } +} + +/// Broadcasts the low single-precision (32-bit) floating-point element +/// from `a` to all elements of the 256-bit returned value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastss_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) } +} + +/// Broadcasts the low packed 16-bit integer from a to all elements of +/// the 128-bit returned value +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastw_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { + unsafe { + let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]); + transmute::(ret) + } +} + +/// Broadcasts the low packed 16-bit integer from a to all elements of +/// the 256-bit returned value +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastw_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]); + transmute::(ret) + } +} + +/// Compares packed 64-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpcmpeqq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_eq(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compares packed 32-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpcmpeqd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_eq(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compares packed 16-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpcmpeqw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_eq(a.as_i16x16(), b.as_i16x16())) } +} + +/// Compares packed 8-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpcmpeqb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_eq(a.as_i8x32(), b.as_i8x32())) } +} + +/// Compares packed 64-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpcmpgtq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_gt(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compares packed 32-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpcmpgtd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_gt(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compares packed 16-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpcmpgtw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_gt(a.as_i16x16(), b.as_i16x16())) } +} + +/// Compares packed 8-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpcmpgtb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_gt(a.as_i8x32(), b.as_i8x32())) } +} + +/// Sign-extend 16-bit integers to 32-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovsxwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_i16x8())) } +} + +/// Sign-extend 16-bit integers to 64-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovsxwq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i16x8(); + let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute::(simd_cast(v64)) + } +} + +/// Sign-extend 32-bit integers to 64-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovsxdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_i32x4())) } +} + +/// Sign-extend 8-bit integers to 16-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovsxbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_i8x16())) } +} + +/// Sign-extend 8-bit integers to 32-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovsxbd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i8x16(); + let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v64)) + } +} + +/// Sign-extend 8-bit integers to 64-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i8x16(); + let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute::(simd_cast(v32)) + } +} + +/// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit +/// integers, and stores the results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_u16x8())) } +} + +/// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit +/// integers. The upper four elements of `a` are unused. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i { + unsafe { + let a = a.as_u16x8(); + let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute::(simd_cast(v64)) + } +} + +/// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_u32x4())) } +} + +/// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovzxbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_u8x16())) } +} + +/// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit +/// integers. The upper eight elements of `a` are unused. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { + unsafe { + let a = a.as_u8x16(); + let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v64)) + } +} + +/// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit +/// integers. The upper twelve elements of `a` are unused. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { + unsafe { + let a = a.as_u8x16(); + let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute::(simd_cast(v32)) + } +} + +/// Extracts 128 bits (of integer data) from `a` selected with `IMM1`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti128_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_extracti128_si256(a: __m256i) -> __m128i { + static_assert_uimm_bits!(IMM1, 1); + unsafe { + let a = a.as_i64x4(); + let b = i64x4::ZERO; + let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]); + transmute(dst) + } +} + +/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vphaddw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phaddw(a.as_i16x16(), b.as_i16x16())) } +} + +/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vphaddd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phaddd(a.as_i32x8(), b.as_i32x8())) } +} + +/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b` +/// using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadds_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vphaddsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) } +} + +/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vphsubw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phsubw(a.as_i16x16(), b.as_i16x16())) } +} + +/// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vphsubd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phsubd(a.as_i32x8(), b.as_i32x8())) } +} + +/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b` +/// using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsubs_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vphsubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) } +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_i32gather_epi32( + slice: *const i32, + offsets: __m128i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let zero = i32x4::ZERO; + let neg_one = _mm_set1_epi32(-1).as_i32x4(); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_mask_i32gather_epi32( + src: __m128i, + slice: *const i32, + offsets: __m128i, + mask: __m128i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x4(); + let mask = mask.as_i32x4(); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + let r = pgatherdd(src, slice, offsets, mask, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_i32gather_epi32( + slice: *const i32, + offsets: __m256i, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + let zero = i32x8::ZERO; + let neg_one = _mm256_set1_epi32(-1).as_i32x8(); + let offsets = offsets.as_i32x8(); + let slice = slice as *const i8; + let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_mask_i32gather_epi32( + src: __m256i, + slice: *const i32, + offsets: __m256i, + mask: __m256i, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x8(); + let mask = mask.as_i32x8(); + let offsets = offsets.as_i32x8(); + let slice = slice as *const i8; + let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_i32gather_ps(slice: *const f32, offsets: __m128i) -> __m128 { + static_assert_imm8_scale!(SCALE); + let zero = _mm_setzero_ps(); + let neg_one = _mm_set1_ps(-1.0); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + pgatherdps(zero, slice, offsets, neg_one, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_mask_i32gather_ps( + src: __m128, + slice: *const f32, + offsets: __m128i, + mask: __m128, +) -> __m128 { + static_assert_imm8_scale!(SCALE); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + pgatherdps(src, slice, offsets, mask, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_i32gather_ps(slice: *const f32, offsets: __m256i) -> __m256 { + static_assert_imm8_scale!(SCALE); + let zero = _mm256_setzero_ps(); + let neg_one = _mm256_set1_ps(-1.0); + let offsets = offsets.as_i32x8(); + let slice = slice as *const i8; + vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_mask_i32gather_ps( + src: __m256, + slice: *const f32, + offsets: __m256i, + mask: __m256, +) -> __m256 { + static_assert_imm8_scale!(SCALE); + let offsets = offsets.as_i32x8(); + let slice = slice as *const i8; + vpgatherdps(src, slice, offsets, mask, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_i32gather_epi64( + slice: *const i64, + offsets: __m128i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let zero = i64x2::ZERO; + let neg_one = _mm_set1_epi64x(-1).as_i64x2(); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_mask_i32gather_epi64( + src: __m128i, + slice: *const i64, + offsets: __m128i, + mask: __m128i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x2(); + let mask = mask.as_i64x2(); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + let r = pgatherdq(src, slice, offsets, mask, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_i32gather_epi64( + slice: *const i64, + offsets: __m128i, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + let zero = i64x4::ZERO; + let neg_one = _mm256_set1_epi64x(-1).as_i64x4(); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_mask_i32gather_epi64( + src: __m256i, + slice: *const i64, + offsets: __m128i, + mask: __m256i, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x4(); + let mask = mask.as_i64x4(); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_i32gather_pd(slice: *const f64, offsets: __m128i) -> __m128d { + static_assert_imm8_scale!(SCALE); + let zero = _mm_setzero_pd(); + let neg_one = _mm_set1_pd(-1.0); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_mask_i32gather_pd( + src: __m128d, + slice: *const f64, + offsets: __m128i, + mask: __m128d, +) -> __m128d { + static_assert_imm8_scale!(SCALE); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + pgatherdpd(src, slice, offsets, mask, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_i32gather_pd( + slice: *const f64, + offsets: __m128i, +) -> __m256d { + static_assert_imm8_scale!(SCALE); + let zero = _mm256_setzero_pd(); + let neg_one = _mm256_set1_pd(-1.0); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_mask_i32gather_pd( + src: __m256d, + slice: *const f64, + offsets: __m128i, + mask: __m256d, +) -> __m256d { + static_assert_imm8_scale!(SCALE); + let offsets = offsets.as_i32x4(); + let slice = slice as *const i8; + vpgatherdpd(src, slice, offsets, mask, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_i64gather_epi32( + slice: *const i32, + offsets: __m128i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let zero = i32x4::ZERO; + let neg_one = _mm_set1_epi64x(-1).as_i32x4(); + let offsets = offsets.as_i64x2(); + let slice = slice as *const i8; + let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_mask_i64gather_epi32( + src: __m128i, + slice: *const i32, + offsets: __m128i, + mask: __m128i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x4(); + let mask = mask.as_i32x4(); + let offsets = offsets.as_i64x2(); + let slice = slice as *const i8; + let r = pgatherqd(src, slice, offsets, mask, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_i64gather_epi32( + slice: *const i32, + offsets: __m256i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let zero = i32x4::ZERO; + let neg_one = _mm_set1_epi64x(-1).as_i32x4(); + let offsets = offsets.as_i64x4(); + let slice = slice as *const i8; + let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_mask_i64gather_epi32( + src: __m128i, + slice: *const i32, + offsets: __m256i, + mask: __m128i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x4(); + let mask = mask.as_i32x4(); + let offsets = offsets.as_i64x4(); + let slice = slice as *const i8; + let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_i64gather_ps(slice: *const f32, offsets: __m128i) -> __m128 { + static_assert_imm8_scale!(SCALE); + let zero = _mm_setzero_ps(); + let neg_one = _mm_set1_ps(-1.0); + let offsets = offsets.as_i64x2(); + let slice = slice as *const i8; + pgatherqps(zero, slice, offsets, neg_one, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_mask_i64gather_ps( + src: __m128, + slice: *const f32, + offsets: __m128i, + mask: __m128, +) -> __m128 { + static_assert_imm8_scale!(SCALE); + let offsets = offsets.as_i64x2(); + let slice = slice as *const i8; + pgatherqps(src, slice, offsets, mask, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_i64gather_ps(slice: *const f32, offsets: __m256i) -> __m128 { + static_assert_imm8_scale!(SCALE); + let zero = _mm_setzero_ps(); + let neg_one = _mm_set1_ps(-1.0); + let offsets = offsets.as_i64x4(); + let slice = slice as *const i8; + vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_mask_i64gather_ps( + src: __m128, + slice: *const f32, + offsets: __m256i, + mask: __m128, +) -> __m128 { + static_assert_imm8_scale!(SCALE); + let offsets = offsets.as_i64x4(); + let slice = slice as *const i8; + vpgatherqps(src, slice, offsets, mask, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_i64gather_epi64( + slice: *const i64, + offsets: __m128i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let zero = i64x2::ZERO; + let neg_one = _mm_set1_epi64x(-1).as_i64x2(); + let slice = slice as *const i8; + let offsets = offsets.as_i64x2(); + let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_mask_i64gather_epi64( + src: __m128i, + slice: *const i64, + offsets: __m128i, + mask: __m128i, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x2(); + let mask = mask.as_i64x2(); + let offsets = offsets.as_i64x2(); + let slice = slice as *const i8; + let r = pgatherqq(src, slice, offsets, mask, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_i64gather_epi64( + slice: *const i64, + offsets: __m256i, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + let zero = i64x4::ZERO; + let neg_one = _mm256_set1_epi64x(-1).as_i64x4(); + let slice = slice as *const i8; + let offsets = offsets.as_i64x4(); + let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_mask_i64gather_epi64( + src: __m256i, + slice: *const i64, + offsets: __m256i, + mask: __m256i, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x4(); + let mask = mask.as_i64x4(); + let offsets = offsets.as_i64x4(); + let slice = slice as *const i8; + let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8); + transmute(r) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_i64gather_pd(slice: *const f64, offsets: __m128i) -> __m128d { + static_assert_imm8_scale!(SCALE); + let zero = _mm_setzero_pd(); + let neg_one = _mm_set1_pd(-1.0); + let slice = slice as *const i8; + let offsets = offsets.as_i64x2(); + pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_mask_i64gather_pd( + src: __m128d, + slice: *const f64, + offsets: __m128i, + mask: __m128d, +) -> __m128d { + static_assert_imm8_scale!(SCALE); + let slice = slice as *const i8; + let offsets = offsets.as_i64x2(); + pgatherqpd(src, slice, offsets, mask, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_i64gather_pd( + slice: *const f64, + offsets: __m256i, +) -> __m256d { + static_assert_imm8_scale!(SCALE); + let zero = _mm256_setzero_pd(); + let neg_one = _mm256_set1_pd(-1.0); + let slice = slice as *const i8; + let offsets = offsets.as_i64x4(); + vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8) +} + +/// Returns values from `slice` at offsets determined by `offsets * scale`, +/// where +/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in +/// that position instead. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_mask_i64gather_pd( + src: __m256d, + slice: *const f64, + offsets: __m256i, + mask: __m256d, +) -> __m256d { + static_assert_imm8_scale!(SCALE); + let slice = slice as *const i8; + let offsets = offsets.as_i64x4(); + vpgatherqpd(src, slice, offsets, mask, SCALE as i8) +} + +/// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the +/// location specified by `IMM1`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti128_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_inserti128_si256(a: __m256i, b: __m128i) -> __m256i { + static_assert_uimm_bits!(IMM1, 1); + unsafe { + let a = a.as_i64x4(); + let b = _mm256_castsi128_si256(b).as_i64x4(); + let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); + transmute(dst) + } +} + +/// Multiplies packed signed 16-bit integers in `a` and `b`, producing +/// intermediate signed 32-bit integers. Horizontally add adjacent pairs +/// of intermediate 32-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaddwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) } +} + +/// Vertically multiplies each unsigned 8-bit integer from `a` with the +/// corresponding signed 8-bit integer from `b`, producing intermediate +/// signed 16-bit integers. Horizontally add adjacent pairs of intermediate +/// signed 16-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaddubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) } +} + +/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` +/// (elements are zeroed out when the highest bit is not set in the +/// corresponding element). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaskmovd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i { + transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4())) +} + +/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` +/// (elements are zeroed out when the highest bit is not set in the +/// corresponding element). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaskmovd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i { + transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8())) +} + +/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask` +/// (elements are zeroed out when the highest bit is not set in the +/// corresponding element). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaskmovq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i { + transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2())) +} + +/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask` +/// (elements are zeroed out when the highest bit is not set in the +/// corresponding element). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaskmovq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i { + transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4())) +} + +/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr` +/// using `mask` (elements are not stored when the highest bit is not set +/// in the corresponding element). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaskmovd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) { + maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4()) +} + +/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr` +/// using `mask` (elements are not stored when the highest bit is not set +/// in the corresponding element). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaskmovd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) { + maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8()) +} + +/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr` +/// using `mask` (elements are not stored when the highest bit is not set +/// in the corresponding element). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaskmovq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) { + maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2()) +} + +/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr` +/// using `mask` (elements are not stored when the highest bit is not set +/// in the corresponding element). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaskmovq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) { + maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4()) +} + +/// Compares packed 16-bit integers in `a` and `b`, and returns the packed +/// maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaxsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i16x16(); + let b = b.as_i16x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed 32-bit integers in `a` and `b`, and returns the packed +/// maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaxsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i32x8(); + let b = b.as_i32x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed 8-bit integers in `a` and `b`, and returns the packed +/// maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaxsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i8x32(); + let b = b.as_i8x32(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns +/// the packed maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaxuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u16x16(); + let b = b.as_u16x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns +/// the packed maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaxud))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u32x8(); + let b = b.as_u32x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns +/// the packed maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmaxub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u8x32(); + let b = b.as_u8x32(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed 16-bit integers in `a` and `b`, and returns the packed +/// minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpminsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i16x16(); + let b = b.as_i16x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compares packed 32-bit integers in `a` and `b`, and returns the packed +/// minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpminsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i32x8(); + let b = b.as_i32x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compares packed 8-bit integers in `a` and `b`, and returns the packed +/// minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpminsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i8x32(); + let b = b.as_i8x32(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns +/// the packed minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpminuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u16x16(); + let b = b.as_u16x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns +/// the packed minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpminud))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u32x8(); + let b = b.as_u32x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns +/// the packed minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpminub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u8x32(); + let b = b.as_u8x32(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Creates mask from the most significant bit of each 8-bit element in `a`, +/// return the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmovmskb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_movemask_epi8(a: __m256i) -> i32 { + unsafe { + let z = i8x32::ZERO; + let m: i8x32 = simd_lt(a.as_i8x32(), z); + simd_bitmask::<_, u32>(m) as i32 + } +} + +/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned +/// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit +/// results in dst. Eight SADs are performed for each 128-bit lane using one +/// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is +/// selected from `b` starting at on the offset specified in `imm8`. Eight +/// quadruplets are formed from sequential 8-bit integers selected from `a` +/// starting at the offset specified in `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) } +} + +/// Multiplies the low 32-bit integers from each packed 64-bit element in +/// `a` and `b` +/// +/// Returns the 64-bit results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmuldq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4())); + let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4())); + transmute(simd_mul(a, b)) + } +} + +/// Multiplies the low unsigned 32-bit integers from each packed 64-bit +/// element in `a` and `b` +/// +/// Returns the unsigned 64-bit results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epu32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmuludq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u64x4(); + let b = b.as_u64x4(); + let mask = u64x4::splat(u32::MAX.into()); + transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) + } +} + +/// Multiplies the packed 16-bit integers in `a` and `b`, producing +/// intermediate 32-bit integers and returning the high 16 bits of the +/// intermediate integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmulhw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, i32x16>(a.as_i16x16()); + let b = simd_cast::<_, i32x16>(b.as_i16x16()); + let r = simd_shr(simd_mul(a, b), i32x16::splat(16)); + transmute(simd_cast::(r)) + } +} + +/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing +/// intermediate 32-bit integers and returning the high 16 bits of the +/// intermediate integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epu16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmulhuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, u32x16>(a.as_u16x16()); + let b = simd_cast::<_, u32x16>(b.as_u16x16()); + let r = simd_shr(simd_mul(a, b), u32x16::splat(16)); + transmute(simd_cast::(r)) + } +} + +/// Multiplies the packed 16-bit integers in `a` and `b`, producing +/// intermediate 32-bit integers, and returns the low 16 bits of the +/// intermediate integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmullw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) } +} + +/// Multiplies the packed 32-bit integers in `a` and `b`, producing +/// intermediate 64-bit integers, and returns the low 32 bits of the +/// intermediate integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmulld))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiplies packed 16-bit integers in `a` and `b`, producing +/// intermediate signed 32-bit integers. Truncate each intermediate +/// integer to the 18 most significant bits, round by adding 1, and +/// return bits `[16:1]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpmulhrsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) } +} + +/// Computes the bitwise OR of 256 bits (representing integer data) in `a` +/// and `b` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) } +} + +/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// using signed saturation +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpacksswb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) } +} + +/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// using signed saturation +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpackssdw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) } +} + +/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// using unsigned saturation +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpackuswb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) } +} + +/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// using unsigned saturation +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpackusdw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) } +} + +/// Permutes packed 32-bit integers from `a` according to the content of `b`. +/// +/// The last 3 bits of each integer of `b` are used as addresses into the 8 +/// integers of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpermps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) } +} + +/// Permutes 64-bit integers from `a` using control mask `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permute4x64_epi64(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + let zero = i64x4::ZERO; + let r: i64x4 = simd_shuffle!( + a.as_i64x4(), + zero, + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ], + ); + transmute(r) + } +} + +/// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permute2x128_si256(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { transmute(vperm2i128(a.as_i64x4(), b.as_i64x4(), IMM8 as i8)) } +} + +/// Shuffles 64-bit floating-point elements in `a` across lanes using the +/// control in `imm8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_pd) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permute4x64_pd(a: __m256d) -> __m256d { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + simd_shuffle!( + a, + _mm256_undefined_pd(), + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ], + ) + } +} + +/// Shuffles eight 32-bit floating-point elements in `a` across lanes using +/// the corresponding 32-bit integer index in `idx`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_ps) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpermps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 { + unsafe { permps(a, idx.as_i32x8()) } +} + +/// Computes the absolute differences of packed unsigned 8-bit integers in `a` +/// and `b`, then horizontally sum each consecutive 8 differences to +/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit +/// integers in the low 16 bits of the 64-bit return value +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsadbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) } +} + +/// Shuffles bytes from `a` according to the content of `b`. +/// +/// For each of the 128-bit low and high halves of the vectors, the last +/// 4 bits of each byte of `b` are used as addresses into the respective +/// low or high 16 bytes of `a`. That is, the halves are shuffled separately. +/// +/// In addition, if the highest significant bit of a byte of `b` is set, the +/// respective destination byte is set to 0. +/// +/// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically +/// equivalent to: +/// +/// ``` +/// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] { +/// let mut r = [0; 32]; +/// for i in 0..16 { +/// // if the most significant bit of b is set, +/// // then the destination byte is set to 0. +/// if b[i] & 0x80 == 0u8 { +/// r[i] = a[(b[i] % 16) as usize]; +/// } +/// if b[i + 16] & 0x80 == 0u8 { +/// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize]; +/// } +/// } +/// r +/// } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpshufb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) } +} + +/// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in +/// `imm8`. +/// +/// ```rust +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("avx2") { +/// # #[target_feature(enable = "avx2")] +/// # unsafe fn worker() { +/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); +/// +/// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01); +/// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11); +/// +/// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4); +/// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5); +/// +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0); +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0); +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { + static_assert_uimm_bits!(MASK, 8); + unsafe { + let r: i32x8 = simd_shuffle!( + a.as_i32x8(), + a.as_i32x8(), + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + (MASK as u32 >> 4) & 0b11, + (MASK as u32 >> 6) & 0b11, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ); + transmute(r) + } +} + +/// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using +/// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied +/// to the output. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflehi_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_shufflehi_epi16(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + let a = a.as_i16x16(); + let r: i16x16 = simd_shuffle!( + a, + a, + [ + 0, + 1, + 2, + 3, + 4 + (IMM8 as u32 & 0b11), + 4 + ((IMM8 as u32 >> 2) & 0b11), + 4 + ((IMM8 as u32 >> 4) & 0b11), + 4 + ((IMM8 as u32 >> 6) & 0b11), + 8, + 9, + 10, + 11, + 12 + (IMM8 as u32 & 0b11), + 12 + ((IMM8 as u32 >> 2) & 0b11), + 12 + ((IMM8 as u32 >> 4) & 0b11), + 12 + ((IMM8 as u32 >> 6) & 0b11), + ], + ); + transmute(r) + } +} + +/// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using +/// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied +/// to the output. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflelo_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_shufflelo_epi16(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + let a = a.as_i16x16(); + let r: i16x16 = simd_shuffle!( + a, + a, + [ + 0 + (IMM8 as u32 & 0b11), + 0 + ((IMM8 as u32 >> 2) & 0b11), + 0 + ((IMM8 as u32 >> 4) & 0b11), + 0 + ((IMM8 as u32 >> 6) & 0b11), + 4, + 5, + 6, + 7, + 8 + (IMM8 as u32 & 0b11), + 8 + ((IMM8 as u32 >> 2) & 0b11), + 8 + ((IMM8 as u32 >> 4) & 0b11), + 8 + ((IMM8 as u32 >> 6) & 0b11), + 12, + 13, + 14, + 15, + ], + ); + transmute(r) + } +} + +/// Negates packed 16-bit integers in `a` when the corresponding signed +/// 16-bit integer in `b` is negative, and returns the results. +/// Results are zeroed out when the corresponding element in `b` is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsignw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) } +} + +/// Negates packed 32-bit integers in `a` when the corresponding signed +/// 32-bit integer in `b` is negative, and returns the results. +/// Results are zeroed out when the corresponding element in `b` is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsignd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) } +} + +/// Negates packed 8-bit integers in `a` when the corresponding signed +/// 8-bit integer in `b` is negative, and returns the results. +/// Results are zeroed out when the corresponding element in `b` is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsignb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) } +} + +/// Shifts packed 16-bit integers in `a` left by `count` while +/// shifting in zeros, and returns the result +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsllw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) } +} + +/// Shifts packed 32-bit integers in `a` left by `count` while +/// shifting in zeros, and returns the result +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpslld))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) } +} + +/// Shifts packed 64-bit integers in `a` left by `count` while +/// shifting in zeros, and returns the result +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsllq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) } +} + +/// Shifts packed 16-bit integers in `a` left by `IMM8` while +/// shifting in zeros, return the results; +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_slli_epi16(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 16 { + _mm256_setzero_si256() + } else { + transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))) + } + } +} + +/// Shifts packed 32-bit integers in `a` left by `IMM8` while +/// shifting in zeros, return the results; +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_slli_epi32(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm256_setzero_si256() + } else { + transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32))) + } + } +} + +/// Shifts packed 64-bit integers in `a` left by `IMM8` while +/// shifting in zeros, return the results; +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_slli_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm256_setzero_si256() + } else { + transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))) + } + } +} + +/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_slli_si256(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + _mm256_bslli_epi128::(a) +} + +/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bslli_epi128) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_bslli_epi128(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + const fn mask(shift: i32, i: u32) -> u32 { + let shift = shift as u32 & 0xff; + if shift > 15 || i % 16 < shift { + 0 + } else { + 32 + (i - shift) + } + } + unsafe { + let a = a.as_i8x32(); + let r: i8x32 = simd_shuffle!( + i8x32::ZERO, + a, + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + mask(IMM8, 16), + mask(IMM8, 17), + mask(IMM8, 18), + mask(IMM8, 19), + mask(IMM8, 20), + mask(IMM8, 21), + mask(IMM8, 22), + mask(IMM8, 23), + mask(IMM8, 24), + mask(IMM8, 25), + mask(IMM8, 26), + mask(IMM8, 27), + mask(IMM8, 28), + mask(IMM8, 29), + mask(IMM8, 30), + mask(IMM8, 31), + ], + ); + transmute(r) + } +} + +/// Shifts packed 32-bit integers in `a` left by the amount +/// specified by the corresponding element in `count` while +/// shifting in zeros, and returns the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsllvd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) } +} + +/// Shifts packed 32-bit integers in `a` left by the amount +/// specified by the corresponding element in `count` while +/// shifting in zeros, and returns the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsllvd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) } +} + +/// Shifts packed 64-bit integers in `a` left by the amount +/// specified by the corresponding element in `count` while +/// shifting in zeros, and returns the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsllvq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) } +} + +/// Shifts packed 64-bit integers in `a` left by the amount +/// specified by the corresponding element in `count` while +/// shifting in zeros, and returns the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsllvq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) } +} + +/// Shifts packed 16-bit integers in `a` right by `count` while +/// shifting in sign bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsraw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) } +} + +/// Shifts packed 32-bit integers in `a` right by `count` while +/// shifting in sign bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrad))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) } +} + +/// Shifts packed 16-bit integers in `a` right by `IMM8` while +/// shifting in sign bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srai_epi16(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) } +} + +/// Shifts packed 32-bit integers in `a` right by `IMM8` while +/// shifting in sign bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srai_epi32(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) } +} + +/// Shifts packed 32-bit integers in `a` right by the amount specified by the +/// corresponding element in `count` while shifting in sign bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsravd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) } +} + +/// Shifts packed 32-bit integers in `a` right by the amount specified by the +/// corresponding element in `count` while shifting in sign bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsravd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) } +} + +/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srli_si256(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + _mm256_bsrli_epi128::(a) +} + +/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + const fn mask(shift: i32, i: u32) -> u32 { + let shift = shift as u32 & 0xff; + if shift > 15 || (15 - (i % 16)) < shift { + 0 + } else { + 32 + (i + shift) + } + } + unsafe { + let a = a.as_i8x32(); + let r: i8x32 = simd_shuffle!( + i8x32::ZERO, + a, + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + mask(IMM8, 16), + mask(IMM8, 17), + mask(IMM8, 18), + mask(IMM8, 19), + mask(IMM8, 20), + mask(IMM8, 21), + mask(IMM8, 22), + mask(IMM8, 23), + mask(IMM8, 24), + mask(IMM8, 25), + mask(IMM8, 26), + mask(IMM8, 27), + mask(IMM8, 28), + mask(IMM8, 29), + mask(IMM8, 30), + mask(IMM8, 31), + ], + ); + transmute(r) + } +} + +/// Shifts packed 16-bit integers in `a` right by `count` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrlw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) } +} + +/// Shifts packed 32-bit integers in `a` right by `count` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrld))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) } +} + +/// Shifts packed 64-bit integers in `a` right by `count` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrlq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) } +} + +/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in +/// zeros +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srli_epi16(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 16 { + _mm256_setzero_si256() + } else { + transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16))) + } + } +} + +/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in +/// zeros +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srli_epi32(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 32 { + _mm256_setzero_si256() + } else { + transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32))) + } + } +} + +/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in +/// zeros +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srli_epi64(a: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 64 { + _mm256_setzero_si256() + } else { + transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))) + } + } +} + +/// Shifts packed 32-bit integers in `a` right by the amount specified by +/// the corresponding element in `count` while shifting in zeros, +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) } +} + +/// Shifts packed 32-bit integers in `a` right by the amount specified by +/// the corresponding element in `count` while shifting in zeros, +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) } +} + +/// Shifts packed 64-bit integers in `a` right by the amount specified by +/// the corresponding element in `count` while shifting in zeros, +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) } +} + +/// Shifts packed 64-bit integers in `a` right by the amount specified by +/// the corresponding element in `count` while shifting in zeros, +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) } +} + +/// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr +/// must be aligned on a 32-byte boundary or a general-protection exception may be generated. To +/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_load_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vmovntdqa))] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i { + let dst: __m256i; + crate::arch::asm!( + vpl!("vmovntdqa {a}"), + a = out(ymm_reg) dst, + p = in(reg) mem_addr, + options(pure, readonly, nostack, preserves_flags), + ); + dst +} + +/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsubw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) } +} + +/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsubd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) } +} + +/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsubq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) } +} + +/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsubb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) } +} + +/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in +/// `a` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) } +} + +/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in +/// `a` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsubsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) } +} + +/// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit +/// integers in `a` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsubusw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) } +} + +/// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit +/// integers in `a` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpsubusb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) } +} + +/// Unpacks and interleave 8-bit integers from the high half of each +/// 128-bit lane in `a` and `b`. +/// +/// ```rust +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("avx2") { +/// # #[target_feature(enable = "avx2")] +/// # unsafe fn worker() { +/// let a = _mm256_setr_epi8( +/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +/// ); +/// let b = _mm256_setr_epi8( +/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, +/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, +/// -30, -31, +/// ); +/// +/// let c = _mm256_unpackhi_epi8(a, b); +/// +/// let expected = _mm256_setr_epi8( +/// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15, +/// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31, +/// -31, +/// ); +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); +/// +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpunpckhbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + #[rustfmt::skip] + let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [ + 8, 40, 9, 41, 10, 42, 11, 43, + 12, 44, 13, 45, 14, 46, 15, 47, + 24, 56, 25, 57, 26, 58, 27, 59, + 28, 60, 29, 61, 30, 62, 31, 63, + ]); + transmute(r) + } +} + +/// Unpacks and interleave 8-bit integers from the low half of each +/// 128-bit lane of `a` and `b`. +/// +/// ```rust +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("avx2") { +/// # #[target_feature(enable = "avx2")] +/// # unsafe fn worker() { +/// let a = _mm256_setr_epi8( +/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +/// ); +/// let b = _mm256_setr_epi8( +/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, +/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, +/// -30, -31, +/// ); +/// +/// let c = _mm256_unpacklo_epi8(a, b); +/// +/// let expected = _mm256_setr_epi8( +/// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17, +/// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23, +/// ); +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); +/// +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi8) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpunpcklbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + #[rustfmt::skip] + let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [ + 0, 32, 1, 33, 2, 34, 3, 35, + 4, 36, 5, 37, 6, 38, 7, 39, + 16, 48, 17, 49, 18, 50, 19, 51, + 20, 52, 21, 53, 22, 54, 23, 55, + ]); + transmute(r) + } +} + +/// Unpacks and interleave 16-bit integers from the high half of each +/// 128-bit lane of `a` and `b`. +/// +/// ```rust +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("avx2") { +/// # #[target_feature(enable = "avx2")] +/// # unsafe fn worker() { +/// let a = _mm256_setr_epi16( +/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +/// ); +/// let b = _mm256_setr_epi16( +/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, +/// ); +/// +/// let c = _mm256_unpackhi_epi16(a, b); +/// +/// let expected = _mm256_setr_epi16( +/// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15, +/// ); +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); +/// +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpunpckhwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i16x16 = simd_shuffle!( + a.as_i16x16(), + b.as_i16x16(), + [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31], + ); + transmute(r) + } +} + +/// Unpacks and interleave 16-bit integers from the low half of each +/// 128-bit lane of `a` and `b`. +/// +/// ```rust +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("avx2") { +/// # #[target_feature(enable = "avx2")] +/// # unsafe fn worker() { +/// +/// let a = _mm256_setr_epi16( +/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +/// ); +/// let b = _mm256_setr_epi16( +/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, +/// ); +/// +/// let c = _mm256_unpacklo_epi16(a, b); +/// +/// let expected = _mm256_setr_epi16( +/// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11, +/// ); +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); +/// +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi16) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vpunpcklwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i16x16 = simd_shuffle!( + a.as_i16x16(), + b.as_i16x16(), + [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27], + ); + transmute(r) + } +} + +/// Unpacks and interleave 32-bit integers from the high half of each +/// 128-bit lane of `a` and `b`. +/// +/// ```rust +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("avx2") { +/// # #[target_feature(enable = "avx2")] +/// # unsafe fn worker() { +/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); +/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7); +/// +/// let c = _mm256_unpackhi_epi32(a, b); +/// +/// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7); +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); +/// +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vunpckhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]); + transmute(r) + } +} + +/// Unpacks and interleave 32-bit integers from the low half of each +/// 128-bit lane of `a` and `b`. +/// +/// ```rust +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("avx2") { +/// # #[target_feature(enable = "avx2")] +/// # unsafe fn worker() { +/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); +/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7); +/// +/// let c = _mm256_unpacklo_epi32(a, b); +/// +/// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5); +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); +/// +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi32) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vunpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]); + transmute(r) + } +} + +/// Unpacks and interleave 64-bit integers from the high half of each +/// 128-bit lane of `a` and `b`. +/// +/// ```rust +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("avx2") { +/// # #[target_feature(enable = "avx2")] +/// # unsafe fn worker() { +/// let a = _mm256_setr_epi64x(0, 1, 2, 3); +/// let b = _mm256_setr_epi64x(0, -1, -2, -3); +/// +/// let c = _mm256_unpackhi_epi64(a, b); +/// +/// let expected = _mm256_setr_epi64x(1, -1, 3, -3); +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); +/// +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vunpckhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]); + transmute(r) + } +} + +/// Unpacks and interleave 64-bit integers from the low half of each +/// 128-bit lane of `a` and `b`. +/// +/// ```rust +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("avx2") { +/// # #[target_feature(enable = "avx2")] +/// # unsafe fn worker() { +/// let a = _mm256_setr_epi64x(0, 1, 2, 3); +/// let b = _mm256_setr_epi64x(0, -1, -2, -3); +/// +/// let c = _mm256_unpacklo_epi64(a, b); +/// +/// let expected = _mm256_setr_epi64x(0, 0, 2, -2); +/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); +/// +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vunpcklpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]); + transmute(r) + } +} + +/// Computes the bitwise XOR of 256 bits (representing integer data) +/// in `a` and `b` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_si256) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) } +} + +/// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit +/// integer containing the zero-extended integer data. +/// +/// See [LLVM commit D20468](https://reviews.llvm.org/D20468). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi8) +#[inline] +#[target_feature(enable = "avx2")] +// This intrinsic has no corresponding instruction. +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_extract_epi8(a: __m256i) -> i32 { + static_assert_uimm_bits!(INDEX, 5); + unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 } +} + +/// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit +/// integer containing the zero-extended integer data. +/// +/// See [LLVM commit D20468](https://reviews.llvm.org/D20468). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi16) +#[inline] +#[target_feature(enable = "avx2")] +// This intrinsic has no corresponding instruction. +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_extract_epi16(a: __m256i) -> i32 { + static_assert_uimm_bits!(INDEX, 4); + unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx2.phadd.w"] + fn phaddw(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx2.phadd.d"] + fn phaddd(a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx2.phadd.sw"] + fn phaddsw(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx2.phsub.w"] + fn phsubw(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx2.phsub.d"] + fn phsubd(a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx2.phsub.sw"] + fn phsubsw(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx2.pmadd.wd"] + fn pmaddwd(a: i16x16, b: i16x16) -> i32x8; + #[link_name = "llvm.x86.avx2.pmadd.ub.sw"] + fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16; + #[link_name = "llvm.x86.avx2.maskload.d"] + fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.maskload.d.256"] + fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx2.maskload.q"] + fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2; + #[link_name = "llvm.x86.avx2.maskload.q.256"] + fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4; + #[link_name = "llvm.x86.avx2.maskstore.d"] + fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4); + #[link_name = "llvm.x86.avx2.maskstore.d.256"] + fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8); + #[link_name = "llvm.x86.avx2.maskstore.q"] + fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2); + #[link_name = "llvm.x86.avx2.maskstore.q.256"] + fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4); + #[link_name = "llvm.x86.avx2.mpsadbw"] + fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16; + #[link_name = "llvm.x86.avx2.pmul.hr.sw"] + fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx2.packsswb"] + fn packsswb(a: i16x16, b: i16x16) -> i8x32; + #[link_name = "llvm.x86.avx2.packssdw"] + fn packssdw(a: i32x8, b: i32x8) -> i16x16; + #[link_name = "llvm.x86.avx2.packuswb"] + fn packuswb(a: i16x16, b: i16x16) -> u8x32; + #[link_name = "llvm.x86.avx2.packusdw"] + fn packusdw(a: i32x8, b: i32x8) -> u16x16; + #[link_name = "llvm.x86.avx2.psad.bw"] + fn psadbw(a: u8x32, b: u8x32) -> u64x4; + #[link_name = "llvm.x86.avx2.psign.b"] + fn psignb(a: i8x32, b: i8x32) -> i8x32; + #[link_name = "llvm.x86.avx2.psign.w"] + fn psignw(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx2.psign.d"] + fn psignd(a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx2.psll.w"] + fn psllw(a: i16x16, count: i16x8) -> i16x16; + #[link_name = "llvm.x86.avx2.psll.d"] + fn pslld(a: i32x8, count: i32x4) -> i32x8; + #[link_name = "llvm.x86.avx2.psll.q"] + fn psllq(a: i64x4, count: i64x2) -> i64x4; + #[link_name = "llvm.x86.avx2.psllv.d"] + fn psllvd(a: i32x4, count: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.psllv.d.256"] + fn psllvd256(a: i32x8, count: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx2.psllv.q"] + fn psllvq(a: i64x2, count: i64x2) -> i64x2; + #[link_name = "llvm.x86.avx2.psllv.q.256"] + fn psllvq256(a: i64x4, count: i64x4) -> i64x4; + #[link_name = "llvm.x86.avx2.psra.w"] + fn psraw(a: i16x16, count: i16x8) -> i16x16; + #[link_name = "llvm.x86.avx2.psra.d"] + fn psrad(a: i32x8, count: i32x4) -> i32x8; + #[link_name = "llvm.x86.avx2.psrav.d"] + fn psravd(a: i32x4, count: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.psrav.d.256"] + fn psravd256(a: i32x8, count: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx2.psrl.w"] + fn psrlw(a: i16x16, count: i16x8) -> i16x16; + #[link_name = "llvm.x86.avx2.psrl.d"] + fn psrld(a: i32x8, count: i32x4) -> i32x8; + #[link_name = "llvm.x86.avx2.psrl.q"] + fn psrlq(a: i64x4, count: i64x2) -> i64x4; + #[link_name = "llvm.x86.avx2.psrlv.d"] + fn psrlvd(a: i32x4, count: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.psrlv.d.256"] + fn psrlvd256(a: i32x8, count: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx2.psrlv.q"] + fn psrlvq(a: i64x2, count: i64x2) -> i64x2; + #[link_name = "llvm.x86.avx2.psrlv.q.256"] + fn psrlvq256(a: i64x4, count: i64x4) -> i64x4; + #[link_name = "llvm.x86.avx2.pshuf.b"] + fn pshufb(a: u8x32, b: u8x32) -> u8x32; + #[link_name = "llvm.x86.avx2.permd"] + fn permd(a: u32x8, b: u32x8) -> u32x8; + #[link_name = "llvm.x86.avx2.permps"] + fn permps(a: __m256, b: i32x8) -> __m256; + #[link_name = "llvm.x86.avx2.vperm2i128"] + fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4; + #[link_name = "llvm.x86.avx2.gather.d.d"] + fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4; + #[link_name = "llvm.x86.avx2.gather.d.d.256"] + fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8; + #[link_name = "llvm.x86.avx2.gather.d.q"] + fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2; + #[link_name = "llvm.x86.avx2.gather.d.q.256"] + fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4; + #[link_name = "llvm.x86.avx2.gather.q.d"] + fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4; + #[link_name = "llvm.x86.avx2.gather.q.d.256"] + fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4; + #[link_name = "llvm.x86.avx2.gather.q.q"] + fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2; + #[link_name = "llvm.x86.avx2.gather.q.q.256"] + fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4; + #[link_name = "llvm.x86.avx2.gather.d.pd"] + fn pgatherdpd( + src: __m128d, + slice: *const i8, + offsets: i32x4, + mask: __m128d, + scale: i8, + ) -> __m128d; + #[link_name = "llvm.x86.avx2.gather.d.pd.256"] + fn vpgatherdpd( + src: __m256d, + slice: *const i8, + offsets: i32x4, + mask: __m256d, + scale: i8, + ) -> __m256d; + #[link_name = "llvm.x86.avx2.gather.q.pd"] + fn pgatherqpd( + src: __m128d, + slice: *const i8, + offsets: i64x2, + mask: __m128d, + scale: i8, + ) -> __m128d; + #[link_name = "llvm.x86.avx2.gather.q.pd.256"] + fn vpgatherqpd( + src: __m256d, + slice: *const i8, + offsets: i64x4, + mask: __m256d, + scale: i8, + ) -> __m256d; + #[link_name = "llvm.x86.avx2.gather.d.ps"] + fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8) + -> __m128; + #[link_name = "llvm.x86.avx2.gather.d.ps.256"] + fn vpgatherdps( + src: __m256, + slice: *const i8, + offsets: i32x8, + mask: __m256, + scale: i8, + ) -> __m256; + #[link_name = "llvm.x86.avx2.gather.q.ps"] + fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8) + -> __m128; + #[link_name = "llvm.x86.avx2.gather.q.ps.256"] + fn vpgatherqps( + src: __m128, + slice: *const i8, + offsets: i64x4, + mask: __m128, + scale: i8, + ) -> __m128; +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_abs_epi32() { + #[rustfmt::skip] + let a = _mm256_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm256_abs_epi32(a); + #[rustfmt::skip] + let e = _mm256_setr_epi32( + 0, 1, 1, i32::MAX, + i32::MAX.wrapping_add(1), 100, 100, 32, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_abs_epi16() { + #[rustfmt::skip] + let a = _mm256_setr_epi16( + 0, 1, -1, 2, -2, 3, -3, 4, + -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32, + ); + let r = _mm256_abs_epi16(a); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 0, 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_abs_epi8() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 0, 1, -1, 2, -2, 3, -3, 4, + -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32, + 0, 1, -1, 2, -2, 3, -3, 4, + -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32, + ); + let r = _mm256_abs_epi8(a); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 0, 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32, + 0, 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_add_epi64() { + let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000); + let b = _mm256_setr_epi64x(-1, 0, 1, 2); + let r = _mm256_add_epi64(a, b); + let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_add_epi32() { + let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6); + let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_add_epi32(a, b); + let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_add_epi16() { + #[rustfmt::skip] + let a = _mm256_setr_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let r = _mm256_add_epi16(a, b); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_add_epi8() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm256_add_epi8(a, b); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epi8() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi8( + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + ); + let r = _mm256_adds_epi8(a, b); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 74, 76, 78, + 80, 82, 84, 86, 88, 90, 92, 94, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epi8_saturate_positive() { + let a = _mm256_set1_epi8(0x7F); + let b = _mm256_set1_epi8(1); + let r = _mm256_adds_epi8(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epi8_saturate_negative() { + let a = _mm256_set1_epi8(-0x80); + let b = _mm256_set1_epi8(-1); + let r = _mm256_adds_epi8(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epi16() { + #[rustfmt::skip] + let a = _mm256_setr_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi16( + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + ); + let r = _mm256_adds_epi16(a, b); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + ); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epi16_saturate_positive() { + let a = _mm256_set1_epi16(0x7FFF); + let b = _mm256_set1_epi16(1); + let r = _mm256_adds_epi16(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epi16_saturate_negative() { + let a = _mm256_set1_epi16(-0x8000); + let b = _mm256_set1_epi16(-1); + let r = _mm256_adds_epi16(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epu8() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi8( + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + ); + let r = _mm256_adds_epu8(a, b); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 74, 76, 78, + 80, 82, 84, 86, 88, 90, 92, 94, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epu8_saturate() { + let a = _mm256_set1_epi8(!0); + let b = _mm256_set1_epi8(1); + let r = _mm256_adds_epu8(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epu16() { + #[rustfmt::skip] + let a = _mm256_setr_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi16( + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + ); + let r = _mm256_adds_epu16(a, b); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + ); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_adds_epu16_saturate() { + let a = _mm256_set1_epi16(!0); + let b = _mm256_set1_epi16(1); + let r = _mm256_adds_epu16(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_and_si256() { + let a = _mm256_set1_epi8(5); + let b = _mm256_set1_epi8(3); + let got = _mm256_and_si256(a, b); + assert_eq_m256i(got, _mm256_set1_epi8(1)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_andnot_si256() { + let a = _mm256_set1_epi8(5); + let b = _mm256_set1_epi8(3); + let got = _mm256_andnot_si256(a, b); + assert_eq_m256i(got, _mm256_set1_epi8(2)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_avg_epu8() { + let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9)); + let r = _mm256_avg_epu8(a, b); + assert_eq_m256i(r, _mm256_set1_epi8(6)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_avg_epu16() { + let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9)); + let r = _mm256_avg_epu16(a, b); + assert_eq_m256i(r, _mm256_set1_epi16(6)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_blend_epi32() { + let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9)); + let e = _mm_setr_epi32(9, 3, 3, 3); + let r = _mm_blend_epi32::<0x01>(a, b); + assert_eq_m128i(r, e); + + let r = _mm_blend_epi32::<0x0E>(b, a); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_blend_epi32() { + let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9)); + let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3); + let r = _mm256_blend_epi32::<0x01>(a, b); + assert_eq_m256i(r, e); + + let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9); + let r = _mm256_blend_epi32::<0x82>(a, b); + assert_eq_m256i(r, e); + + let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3); + let r = _mm256_blend_epi32::<0x7C>(a, b); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_blend_epi16() { + let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9)); + let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3); + let r = _mm256_blend_epi16::<0x01>(a, b); + assert_eq_m256i(r, e); + + let r = _mm256_blend_epi16::<0xFE>(b, a); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_blendv_epi8() { + let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2)); + let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1); + let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2); + let r = _mm256_blendv_epi8(a, b, mask); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_broadcastb_epi8() { + let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a); + let res = _mm_broadcastb_epi8(a); + assert_eq_m128i(res, _mm_set1_epi8(0x2a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_broadcastb_epi8() { + let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a); + let res = _mm256_broadcastb_epi8(a); + assert_eq_m256i(res, _mm256_set1_epi8(0x2a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_broadcastd_epi32() { + let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0); + let res = _mm_broadcastd_epi32(a); + assert_eq_m128i(res, _mm_set1_epi32(0x2a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_broadcastd_epi32() { + let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0); + let res = _mm256_broadcastd_epi32(a); + assert_eq_m256i(res, _mm256_set1_epi32(0x2a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_broadcastq_epi64() { + let a = _mm_setr_epi64x(0x1ffffffff, 0); + let res = _mm_broadcastq_epi64(a); + assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_broadcastq_epi64() { + let a = _mm_setr_epi64x(0x1ffffffff, 0); + let res = _mm256_broadcastq_epi64(a); + assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_broadcastsd_pd() { + let a = _mm_setr_pd(6.88, 3.44); + let res = _mm_broadcastsd_pd(a); + assert_eq_m128d(res, _mm_set1_pd(6.88)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_broadcastsd_pd() { + let a = _mm_setr_pd(6.88, 3.44); + let res = _mm256_broadcastsd_pd(a); + assert_eq_m256d(res, _mm256_set1_pd(6.88f64)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_broadcastsi128_si256() { + let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210); + let res = _mm_broadcastsi128_si256(a); + let retval = _mm256_setr_epi64x( + 0x0987654321012334, + 0x5678909876543210, + 0x0987654321012334, + 0x5678909876543210, + ); + assert_eq_m256i(res, retval); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_broadcastsi128_si256() { + let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210); + let res = _mm256_broadcastsi128_si256(a); + let retval = _mm256_setr_epi64x( + 0x0987654321012334, + 0x5678909876543210, + 0x0987654321012334, + 0x5678909876543210, + ); + assert_eq_m256i(res, retval); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_broadcastss_ps() { + let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0); + let res = _mm_broadcastss_ps(a); + assert_eq_m128(res, _mm_set1_ps(6.88)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_broadcastss_ps() { + let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0); + let res = _mm256_broadcastss_ps(a); + assert_eq_m256(res, _mm256_set1_ps(6.88)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_broadcastw_epi16() { + let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b); + let res = _mm_broadcastw_epi16(a); + assert_eq_m128i(res, _mm_set1_epi16(0x22b)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_broadcastw_epi16() { + let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b); + let res = _mm256_broadcastw_epi16(a); + assert_eq_m256i(res, _mm256_set1_epi16(0x22b)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cmpeq_epi8() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi8( + 31, 30, 2, 28, 27, 26, 25, 24, + 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0, + ); + let r = _mm256_cmpeq_epi8(a, b); + assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cmpeq_epi16() { + #[rustfmt::skip] + let a = _mm256_setr_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi16( + 15, 14, 2, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0, + ); + let r = _mm256_cmpeq_epi16(a, b); + assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cmpeq_epi32() { + let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0); + let r = _mm256_cmpeq_epi32(a, b); + let e = _mm256_set1_epi32(0); + let e = _mm256_insert_epi32::<2>(e, !0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cmpeq_epi64() { + let a = _mm256_setr_epi64x(0, 1, 2, 3); + let b = _mm256_setr_epi64x(3, 2, 2, 0); + let r = _mm256_cmpeq_epi64(a, b); + assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cmpgt_epi8() { + let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5); + let b = _mm256_set1_epi8(0); + let r = _mm256_cmpgt_epi8(a, b); + assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cmpgt_epi16() { + let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5); + let b = _mm256_set1_epi16(0); + let r = _mm256_cmpgt_epi16(a, b); + assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cmpgt_epi32() { + let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5); + let b = _mm256_set1_epi32(0); + let r = _mm256_cmpgt_epi32(a, b); + assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cmpgt_epi64() { + let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5); + let b = _mm256_set1_epi64x(0); + let r = _mm256_cmpgt_epi64(a, b); + assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepi8_epi16() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 0, -1, 1, -2, 2, -3, 3, + -4, 4, -5, 5, -6, 6, -7, 7, + ); + #[rustfmt::skip] + let r = _mm256_setr_epi16( + 0, 0, -1, 1, -2, 2, -3, 3, + -4, 4, -5, 5, -6, 6, -7, 7, + ); + assert_eq_m256i(r, _mm256_cvtepi8_epi16(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepi8_epi32() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 0, -1, 1, -2, 2, -3, 3, + -4, 4, -5, 5, -6, 6, -7, 7, + ); + let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3); + assert_eq_m256i(r, _mm256_cvtepi8_epi32(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepi8_epi64() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 0, -1, 1, -2, 2, -3, 3, + -4, 4, -5, 5, -6, 6, -7, 7, + ); + let r = _mm256_setr_epi64x(0, 0, -1, 1); + assert_eq_m256i(r, _mm256_cvtepi8_epi64(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepi16_epi32() { + let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3); + let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3); + assert_eq_m256i(r, _mm256_cvtepi16_epi32(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepi16_epi64() { + let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3); + let r = _mm256_setr_epi64x(0, 0, -1, 1); + assert_eq_m256i(r, _mm256_cvtepi16_epi64(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepi32_epi64() { + let a = _mm_setr_epi32(0, 0, -1, 1); + let r = _mm256_setr_epi64x(0, 0, -1, 1); + assert_eq_m256i(r, _mm256_cvtepi32_epi64(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepu16_epi32() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m256i(r, _mm256_cvtepu16_epi32(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepu16_epi64() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_setr_epi64x(0, 1, 2, 3); + assert_eq_m256i(r, _mm256_cvtepu16_epi64(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepu32_epi64() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let r = _mm256_setr_epi64x(0, 1, 2, 3); + assert_eq_m256i(r, _mm256_cvtepu32_epi64(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepu8_epi16() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let r = _mm256_setr_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + assert_eq_m256i(r, _mm256_cvtepu8_epi16(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepu8_epi32() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m256i(r, _mm256_cvtepu8_epi32(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_cvtepu8_epi64() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let r = _mm256_setr_epi64x(0, 1, 2, 3); + assert_eq_m256i(r, _mm256_cvtepu8_epi64(a)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_extracti128_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_extracti128_si256::<1>(a); + let e = _mm_setr_epi64x(3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_hadd_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_hadd_epi16(a, b); + let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_hadd_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(4); + let r = _mm256_hadd_epi32(a, b); + let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_hadds_epi16() { + let a = _mm256_set1_epi16(2); + let a = _mm256_insert_epi16::<0>(a, 0x7fff); + let a = _mm256_insert_epi16::<1>(a, 1); + let b = _mm256_set1_epi16(4); + let r = _mm256_hadds_epi16(a, b); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 0x7FFF, 4, 4, 4, 8, 8, 8, 8, + 4, 4, 4, 4, 8, 8, 8, 8, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_hsub_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_hsub_epi16(a, b); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_hsub_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(4); + let r = _mm256_hsub_epi32(a, b); + let e = _mm256_set1_epi32(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_hsubs_epi16() { + let a = _mm256_set1_epi16(2); + let a = _mm256_insert_epi16::<0>(a, 0x7fff); + let a = _mm256_insert_epi16::<1>(a, -1); + let b = _mm256_set1_epi16(4); + let r = _mm256_hsubs_epi16(a, b); + let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_madd_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_madd_epi16(a, b); + let e = _mm256_set1_epi32(16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_inserti128_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm_setr_epi64x(7, 8); + let r = _mm256_inserti128_si256::<1>(a, b); + let e = _mm256_setr_epi64x(1, 2, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_maddubs_epi16() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_maddubs_epi16(a, b); + let e = _mm256_set1_epi16(16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_maskload_epi32() { + let nums = [1, 2, 3, 4]; + let a = &nums as *const i32; + let mask = _mm_setr_epi32(-1, 0, 0, -1); + let r = _mm_maskload_epi32(a, mask); + let e = _mm_setr_epi32(1, 0, 0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_maskload_epi32() { + let nums = [1, 2, 3, 4, 5, 6, 7, 8]; + let a = &nums as *const i32; + let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0); + let r = _mm256_maskload_epi32(a, mask); + let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_maskload_epi64() { + let nums = [1_i64, 2_i64]; + let a = &nums as *const i64; + let mask = _mm_setr_epi64x(0, -1); + let r = _mm_maskload_epi64(a, mask); + let e = _mm_setr_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_maskload_epi64() { + let nums = [1_i64, 2_i64, 3_i64, 4_i64]; + let a = &nums as *const i64; + let mask = _mm256_setr_epi64x(0, -1, -1, 0); + let r = _mm256_maskload_epi64(a, mask); + let e = _mm256_setr_epi64x(0, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_maskstore_epi32() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let mut arr = [-1, -1, -1, -1]; + let mask = _mm_setr_epi32(-1, 0, 0, -1); + _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a); + let e = [1, -1, -1, 4]; + assert_eq!(arr, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_maskstore_epi32() { + let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8); + let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1]; + let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0); + _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a); + let e = [1, -1, -1, 42, -1, 6, 7, -1]; + assert_eq!(arr, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_maskstore_epi64() { + let a = _mm_setr_epi64x(1_i64, 2_i64); + let mut arr = [-1_i64, -1_i64]; + let mask = _mm_setr_epi64x(0, -1); + _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a); + let e = [-1, 2]; + assert_eq!(arr, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_maskstore_epi64() { + let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64); + let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64]; + let mask = _mm256_setr_epi64x(0, -1, -1, 0); + _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a); + let e = [-1, 2, 3, -1]; + assert_eq!(arr, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_max_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_max_epi16(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_max_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(4); + let r = _mm256_max_epi32(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_max_epi8() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_max_epi8(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_max_epu16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_max_epu16(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_max_epu32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(4); + let r = _mm256_max_epu32(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_max_epu8() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_max_epu8(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_min_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_min_epi16(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_min_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(4); + let r = _mm256_min_epi32(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_min_epi8() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_min_epi8(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_min_epu16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_min_epu16(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_min_epu32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(4); + let r = _mm256_min_epu32(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_min_epu8() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_min_epu8(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_movemask_epi8() { + let a = _mm256_set1_epi8(-1); + let r = _mm256_movemask_epi8(a); + let e = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mpsadbw_epu8() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_mpsadbw_epu8::<0>(a, b); + let e = _mm256_set1_epi16(8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mul_epi32() { + let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2); + let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_mul_epi32(a, b); + let e = _mm256_setr_epi64x(0, 0, 10, 14); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mul_epu32() { + let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2); + let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_mul_epu32(a, b); + let e = _mm256_setr_epi64x(0, 0, 10, 14); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mulhi_epi16() { + let a = _mm256_set1_epi16(6535); + let b = _mm256_set1_epi16(6535); + let r = _mm256_mulhi_epi16(a, b); + let e = _mm256_set1_epi16(651); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mulhi_epu16() { + let a = _mm256_set1_epi16(6535); + let b = _mm256_set1_epi16(6535); + let r = _mm256_mulhi_epu16(a, b); + let e = _mm256_set1_epi16(651); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mullo_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_mullo_epi16(a, b); + let e = _mm256_set1_epi16(8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mullo_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(4); + let r = _mm256_mullo_epi32(a, b); + let e = _mm256_set1_epi32(8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mulhrs_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_mullo_epi16(a, b); + let e = _mm256_set1_epi16(8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_or_si256() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(0); + let r = _mm256_or_si256(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_packs_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_packs_epi16(a, b); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 2, 2, 2, 2, 2, 2, 2, 2, + 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, + 4, 4, 4, 4, 4, 4, 4, 4, + ); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_packs_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(4); + let r = _mm256_packs_epi32(a, b); + let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_packus_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(4); + let r = _mm256_packus_epi16(a, b); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 2, 2, 2, 2, 2, 2, 2, 2, + 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, + 4, 4, 4, 4, 4, 4, 4, 4, + ); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_packus_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(4); + let r = _mm256_packus_epi32(a, b); + let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sad_epu8() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_sad_epu8(a, b); + let e = _mm256_set1_epi64x(16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_shufflehi_epi16() { + #[rustfmt::skip] + let a = _mm256_setr_epi16( + 0, 1, 2, 3, 11, 22, 33, 44, + 4, 5, 6, 7, 55, 66, 77, 88, + ); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 0, 1, 2, 3, 44, 22, 22, 11, + 4, 5, 6, 7, 88, 66, 66, 55, + ); + let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_shufflelo_epi16() { + #[rustfmt::skip] + let a = _mm256_setr_epi16( + 11, 22, 33, 44, 0, 1, 2, 3, + 55, 66, 77, 88, 4, 5, 6, 7, + ); + #[rustfmt::skip] + let e = _mm256_setr_epi16( + 44, 22, 22, 11, 0, 1, 2, 3, + 88, 66, 66, 55, 4, 5, 6, 7, + ); + let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sign_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(-1); + let r = _mm256_sign_epi16(a, b); + let e = _mm256_set1_epi16(-2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sign_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(-1); + let r = _mm256_sign_epi32(a, b); + let e = _mm256_set1_epi32(-2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sign_epi8() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(-1); + let r = _mm256_sign_epi8(a, b); + let e = _mm256_set1_epi8(-2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sll_epi16() { + let a = _mm256_set1_epi16(0xFF); + let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4); + let r = _mm256_sll_epi16(a, b); + assert_eq_m256i(r, _mm256_set1_epi16(0xFF0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sll_epi32() { + let a = _mm256_set1_epi32(0xFFFF); + let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4); + let r = _mm256_sll_epi32(a, b); + assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sll_epi64() { + let a = _mm256_set1_epi64x(0xFFFFFFFF); + let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4); + let r = _mm256_sll_epi64(a, b); + assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_slli_epi16() { + assert_eq_m256i( + _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)), + _mm256_set1_epi16(0xFF0), + ); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_slli_epi32() { + assert_eq_m256i( + _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)), + _mm256_set1_epi32(0xFFFF0), + ); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_slli_epi64() { + assert_eq_m256i( + _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)), + _mm256_set1_epi64x(0xFFFFFFFF0), + ); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_slli_si256() { + let a = _mm256_set1_epi64x(0xFFFFFFFF); + let r = _mm256_slli_si256::<3>(a); + assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_sllv_epi32() { + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(1); + let r = _mm_sllv_epi32(a, b); + let e = _mm_set1_epi32(4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sllv_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(1); + let r = _mm256_sllv_epi32(a, b); + let e = _mm256_set1_epi32(4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_sllv_epi64() { + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(1); + let r = _mm_sllv_epi64(a, b); + let e = _mm_set1_epi64x(4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sllv_epi64() { + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(1); + let r = _mm256_sllv_epi64(a, b); + let e = _mm256_set1_epi64x(4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sra_epi16() { + let a = _mm256_set1_epi16(-1); + let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); + let r = _mm256_sra_epi16(a, b); + assert_eq_m256i(r, _mm256_set1_epi16(-1)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sra_epi32() { + let a = _mm256_set1_epi32(-1); + let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1); + let r = _mm256_sra_epi32(a, b); + assert_eq_m256i(r, _mm256_set1_epi32(-1)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srai_epi16() { + assert_eq_m256i( + _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)), + _mm256_set1_epi16(-1), + ); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srai_epi32() { + assert_eq_m256i( + _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)), + _mm256_set1_epi32(-1), + ); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_srav_epi32() { + let a = _mm_set1_epi32(4); + let count = _mm_set1_epi32(1); + let r = _mm_srav_epi32(a, count); + let e = _mm_set1_epi32(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srav_epi32() { + let a = _mm256_set1_epi32(4); + let count = _mm256_set1_epi32(1); + let r = _mm256_srav_epi32(a, count); + let e = _mm256_set1_epi32(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srli_si256() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm256_srli_si256::<3>(a); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 0, 0, 0, + 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 0, 0, 0, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srl_epi16() { + let a = _mm256_set1_epi16(0xFF); + let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4); + let r = _mm256_srl_epi16(a, b); + assert_eq_m256i(r, _mm256_set1_epi16(0xF)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srl_epi32() { + let a = _mm256_set1_epi32(0xFFFF); + let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4); + let r = _mm256_srl_epi32(a, b); + assert_eq_m256i(r, _mm256_set1_epi32(0xFFF)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srl_epi64() { + let a = _mm256_set1_epi64x(0xFFFFFFFF); + let b = _mm_setr_epi64x(4, 0); + let r = _mm256_srl_epi64(a, b); + assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srli_epi16() { + assert_eq_m256i( + _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)), + _mm256_set1_epi16(0xF), + ); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srli_epi32() { + assert_eq_m256i( + _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)), + _mm256_set1_epi32(0xFFF), + ); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srli_epi64() { + assert_eq_m256i( + _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)), + _mm256_set1_epi64x(0xFFFFFFF), + ); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_srlv_epi32() { + let a = _mm_set1_epi32(2); + let count = _mm_set1_epi32(1); + let r = _mm_srlv_epi32(a, count); + let e = _mm_set1_epi32(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srlv_epi32() { + let a = _mm256_set1_epi32(2); + let count = _mm256_set1_epi32(1); + let r = _mm256_srlv_epi32(a, count); + let e = _mm256_set1_epi32(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_srlv_epi64() { + let a = _mm_set1_epi64x(2); + let count = _mm_set1_epi64x(1); + let r = _mm_srlv_epi64(a, count); + let e = _mm_set1_epi64x(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_srlv_epi64() { + let a = _mm256_set1_epi64x(2); + let count = _mm256_set1_epi64x(1); + let r = _mm256_srlv_epi64(a, count); + let e = _mm256_set1_epi64x(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_stream_load_si256() { + let a = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _); + assert_eq_m256i(a, r); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sub_epi16() { + let a = _mm256_set1_epi16(4); + let b = _mm256_set1_epi16(2); + let r = _mm256_sub_epi16(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sub_epi32() { + let a = _mm256_set1_epi32(4); + let b = _mm256_set1_epi32(2); + let r = _mm256_sub_epi32(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sub_epi64() { + let a = _mm256_set1_epi64x(4); + let b = _mm256_set1_epi64x(2); + let r = _mm256_sub_epi64(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_sub_epi8() { + let a = _mm256_set1_epi8(4); + let b = _mm256_set1_epi8(2); + let r = _mm256_sub_epi8(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_subs_epi16() { + let a = _mm256_set1_epi16(4); + let b = _mm256_set1_epi16(2); + let r = _mm256_subs_epi16(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_subs_epi8() { + let a = _mm256_set1_epi8(4); + let b = _mm256_set1_epi8(2); + let r = _mm256_subs_epi8(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_subs_epu16() { + let a = _mm256_set1_epi16(4); + let b = _mm256_set1_epi16(2); + let r = _mm256_subs_epu16(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_subs_epu8() { + let a = _mm256_set1_epi8(4); + let b = _mm256_set1_epi8(2); + let r = _mm256_subs_epu8(a, b); + assert_eq_m256i(r, b); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_xor_si256() { + let a = _mm256_set1_epi8(5); + let b = _mm256_set1_epi8(3); + let r = _mm256_xor_si256(a, b); + assert_eq_m256i(r, _mm256_set1_epi8(6)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_alignr_epi8() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi8( + -1, -2, -3, -4, -5, -6, -7, -8, + -9, -10, -11, -12, -13, -14, -15, -16, + -17, -18, -19, -20, -21, -22, -23, -24, + -25, -26, -27, -28, -29, -30, -31, -32, + ); + let r = _mm256_alignr_epi8::<33>(a, b); + assert_eq_m256i(r, _mm256_set1_epi8(0)); + + let r = _mm256_alignr_epi8::<17>(a, b); + #[rustfmt::skip] + let expected = _mm256_setr_epi8( + 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 0, + 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 0, + ); + assert_eq_m256i(r, expected); + + let r = _mm256_alignr_epi8::<4>(a, b); + #[rustfmt::skip] + let expected = _mm256_setr_epi8( + -5, -6, -7, -8, -9, -10, -11, -12, + -13, -14, -15, -16, 1, 2, 3, 4, + -21, -22, -23, -24, -25, -26, -27, -28, + -29, -30, -31, -32, 17, 18, 19, 20, + ); + assert_eq_m256i(r, expected); + + let r = _mm256_alignr_epi8::<15>(a, b); + #[rustfmt::skip] + let expected = _mm256_setr_epi8( + -16, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + -32, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + assert_eq_m256i(r, expected); + + let r = _mm256_alignr_epi8::<0>(a, b); + assert_eq_m256i(r, b); + + let r = _mm256_alignr_epi8::<16>(a, b); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_shuffle_epi8() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + #[rustfmt::skip] + let b = _mm256_setr_epi8( + 4, 128u8 as i8, 4, 3, 24, 12, 6, 19, + 12, 5, 5, 10, 4, 1, 8, 0, + 4, 128u8 as i8, 4, 3, 24, 12, 6, 19, + 12, 5, 5, 10, 4, 1, 8, 0, + ); + #[rustfmt::skip] + let expected = _mm256_setr_epi8( + 5, 0, 5, 4, 9, 13, 7, 4, + 13, 6, 6, 11, 5, 2, 9, 1, + 21, 0, 21, 20, 25, 29, 23, 20, + 29, 22, 22, 27, 21, 18, 25, 17, + ); + let r = _mm256_shuffle_epi8(a, b); + assert_eq_m256i(r, expected); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_permutevar8x32_epi32() { + let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800); + let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4); + let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500); + let r = _mm256_permutevar8x32_epi32(a, b); + assert_eq_m256i(r, expected); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_permute4x64_epi64() { + let a = _mm256_setr_epi64x(100, 200, 300, 400); + let expected = _mm256_setr_epi64x(400, 100, 200, 100); + let r = _mm256_permute4x64_epi64::<0b00010011>(a); + assert_eq_m256i(r, expected); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_permute2x128_si256() { + let a = _mm256_setr_epi64x(100, 200, 500, 600); + let b = _mm256_setr_epi64x(300, 400, 700, 800); + let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b); + let e = _mm256_setr_epi64x(700, 800, 500, 600); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_permute4x64_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a); + let e = _mm256_setr_pd(4., 1., 2., 1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_permutevar8x32_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4); + let r = _mm256_permutevar8x32_ps(a, b); + let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_i32gather_epi32() { + let arr: [i32; 128] = core::array::from_fn(|i| i as i32); + // A multiplier of 4 is word-addressing + let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)); + assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_mask_i32gather_epi32() { + let arr: [i32; 128] = core::array::from_fn(|i| i as i32); + // A multiplier of 4 is word-addressing + let r = _mm_mask_i32gather_epi32::<4>( + _mm_set1_epi32(256), + arr.as_ptr(), + _mm_setr_epi32(0, 16, 64, 96), + _mm_setr_epi32(-1, -1, -1, 0), + ); + assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_i32gather_epi32() { + let arr: [i32; 128] = core::array::from_fn(|i| i as i32); + // A multiplier of 4 is word-addressing + let r = + _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4)); + assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mask_i32gather_epi32() { + let arr: [i32; 128] = core::array::from_fn(|i| i as i32); + // A multiplier of 4 is word-addressing + let r = _mm256_mask_i32gather_epi32::<4>( + _mm256_set1_epi32(256), + arr.as_ptr(), + _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0), + _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0), + ); + assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_i32gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing for f32s + let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)); + assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_mask_i32gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing for f32s + let r = _mm_mask_i32gather_ps::<4>( + _mm_set1_ps(256.0), + arr.as_ptr(), + _mm_setr_epi32(0, 16, 64, 96), + _mm_setr_ps(-1.0, -1.0, -1.0, 0.0), + ); + assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_i32gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing for f32s + let r = + _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4)); + assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mask_i32gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing for f32s + let r = _mm256_mask_i32gather_ps::<4>( + _mm256_set1_ps(256.0), + arr.as_ptr(), + _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0), + _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0), + ); + assert_eq_m256( + r, + _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0), + ); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_i32gather_epi64() { + let arr: [i64; 128] = core::array::from_fn(|i| i as i64); + // A multiplier of 8 is word-addressing for i64s + let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)); + assert_eq_m128i(r, _mm_setr_epi64x(0, 16)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_mask_i32gather_epi64() { + let arr: [i64; 128] = core::array::from_fn(|i| i as i64); + // A multiplier of 8 is word-addressing for i64s + let r = _mm_mask_i32gather_epi64::<8>( + _mm_set1_epi64x(256), + arr.as_ptr(), + _mm_setr_epi32(16, 16, 16, 16), + _mm_setr_epi64x(-1, 0), + ); + assert_eq_m128i(r, _mm_setr_epi64x(16, 256)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_i32gather_epi64() { + let arr: [i64; 128] = core::array::from_fn(|i| i as i64); + // A multiplier of 8 is word-addressing for i64s + let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)); + assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mask_i32gather_epi64() { + let arr: [i64; 128] = core::array::from_fn(|i| i as i64); + // A multiplier of 8 is word-addressing for i64s + let r = _mm256_mask_i32gather_epi64::<8>( + _mm256_set1_epi64x(256), + arr.as_ptr(), + _mm_setr_epi32(0, 16, 64, 96), + _mm256_setr_epi64x(-1, -1, -1, 0), + ); + assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_i32gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing for f64s + let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)); + assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_mask_i32gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing for f64s + let r = _mm_mask_i32gather_pd::<8>( + _mm_set1_pd(256.0), + arr.as_ptr(), + _mm_setr_epi32(16, 16, 16, 16), + _mm_setr_pd(-1.0, 0.0), + ); + assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_i32gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing for f64s + let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)); + assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mask_i32gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing for f64s + let r = _mm256_mask_i32gather_pd::<8>( + _mm256_set1_pd(256.0), + arr.as_ptr(), + _mm_setr_epi32(0, 16, 64, 96), + _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0), + ); + assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_i64gather_epi32() { + let arr: [i32; 128] = core::array::from_fn(|i| i as i32); + // A multiplier of 4 is word-addressing + let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)); + assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_mask_i64gather_epi32() { + let arr: [i32; 128] = core::array::from_fn(|i| i as i32); + // A multiplier of 4 is word-addressing + let r = _mm_mask_i64gather_epi32::<4>( + _mm_set1_epi32(256), + arr.as_ptr(), + _mm_setr_epi64x(0, 16), + _mm_setr_epi32(-1, 0, -1, 0), + ); + assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_i64gather_epi32() { + let arr: [i32; 128] = core::array::from_fn(|i| i as i32); + // A multiplier of 4 is word-addressing + let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)); + assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mask_i64gather_epi32() { + let arr: [i32; 128] = core::array::from_fn(|i| i as i32); + // A multiplier of 4 is word-addressing + let r = _mm256_mask_i64gather_epi32::<4>( + _mm_set1_epi32(256), + arr.as_ptr(), + _mm256_setr_epi64x(0, 16, 64, 96), + _mm_setr_epi32(-1, -1, -1, 0), + ); + assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_i64gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing for f32s + let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)); + assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_mask_i64gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing for f32s + let r = _mm_mask_i64gather_ps::<4>( + _mm_set1_ps(256.0), + arr.as_ptr(), + _mm_setr_epi64x(0, 16), + _mm_setr_ps(-1.0, 0.0, -1.0, 0.0), + ); + assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_i64gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing for f32s + let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)); + assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mask_i64gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing for f32s + let r = _mm256_mask_i64gather_ps::<4>( + _mm_set1_ps(256.0), + arr.as_ptr(), + _mm256_setr_epi64x(0, 16, 64, 96), + _mm_setr_ps(-1.0, -1.0, -1.0, 0.0), + ); + assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_i64gather_epi64() { + let arr: [i64; 128] = core::array::from_fn(|i| i as i64); + // A multiplier of 8 is word-addressing for i64s + let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)); + assert_eq_m128i(r, _mm_setr_epi64x(0, 16)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_mask_i64gather_epi64() { + let arr: [i64; 128] = core::array::from_fn(|i| i as i64); + // A multiplier of 8 is word-addressing for i64s + let r = _mm_mask_i64gather_epi64::<8>( + _mm_set1_epi64x(256), + arr.as_ptr(), + _mm_setr_epi64x(16, 16), + _mm_setr_epi64x(-1, 0), + ); + assert_eq_m128i(r, _mm_setr_epi64x(16, 256)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_i64gather_epi64() { + let arr: [i64; 128] = core::array::from_fn(|i| i as i64); + // A multiplier of 8 is word-addressing for i64s + let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)); + assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mask_i64gather_epi64() { + let arr: [i64; 128] = core::array::from_fn(|i| i as i64); + // A multiplier of 8 is word-addressing for i64s + let r = _mm256_mask_i64gather_epi64::<8>( + _mm256_set1_epi64x(256), + arr.as_ptr(), + _mm256_setr_epi64x(0, 16, 64, 96), + _mm256_setr_epi64x(-1, -1, -1, 0), + ); + assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_i64gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing for f64s + let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)); + assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm_mask_i64gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing for f64s + let r = _mm_mask_i64gather_pd::<8>( + _mm_set1_pd(256.0), + arr.as_ptr(), + _mm_setr_epi64x(16, 16), + _mm_setr_pd(-1.0, 0.0), + ); + assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_i64gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing for f64s + let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)); + assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0)); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_mask_i64gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing for f64s + let r = _mm256_mask_i64gather_pd::<8>( + _mm256_set1_pd(256.0), + arr.as_ptr(), + _mm256_setr_epi64x(0, 16, 64, 96), + _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0), + ); + assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0)); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_extract_epi8() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + -1, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ); + let r1 = _mm256_extract_epi8::<0>(a); + let r2 = _mm256_extract_epi8::<3>(a); + assert_eq!(r1, 0xFF); + assert_eq!(r2, 3); + } + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_extract_epi16() { + #[rustfmt::skip] + let a = _mm256_setr_epi16( + -1, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let r1 = _mm256_extract_epi16::<0>(a); + let r2 = _mm256_extract_epi16::<3>(a); + assert_eq!(r1, 0xFFFF); + assert_eq!(r2, 3); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs new file mode 100644 index 000000000000..85afd91fba7b --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs @@ -0,0 +1,1977 @@ +//! [AVX512BF16 intrinsics]. +//! +//! [AVX512BF16 intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769&avx512techs=AVX512_BF16 + +use crate::arch::asm; +use crate::core_arch::{simd::*, x86::*}; +use crate::intrinsics::simd::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.128"] + fn cvtne2ps2bf16(a: f32x4, b: f32x4) -> i16x8; + #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.256"] + fn cvtne2ps2bf16_256(a: f32x8, b: f32x8) -> i16x16; + #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.512"] + fn cvtne2ps2bf16_512(a: f32x16, b: f32x16) -> i16x32; + #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.256"] + fn cvtneps2bf16_256(a: f32x8) -> i16x8; + #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"] + fn cvtneps2bf16_512(a: f32x16) -> i16x16; + #[link_name = "llvm.x86.avx512bf16.dpbf16ps.128"] + fn dpbf16ps(a: f32x4, b: i16x8, c: i16x8) -> f32x4; + #[link_name = "llvm.x86.avx512bf16.dpbf16ps.256"] + fn dpbf16ps_256(a: f32x8, b: i16x16, c: i16x16) -> f32x8; + #[link_name = "llvm.x86.avx512bf16.dpbf16ps.512"] + fn dpbf16ps_512(a: f32x16, b: i16x32, c: i16x32) -> f32x16; +} + +/// Convert packed single-precision (32-bit) floating-point elements in two 128-bit vectors +/// a and b to packed BF16 (16-bit) floating-point elements, and store the results in a +/// 128-bit wide vector. +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651&avx512techs=AVX512_BF16&text=_mm_cvtne2ps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] +pub fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh { + unsafe { transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4())) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in two vectors +/// a and b to packed BF16 (16-bit) floating-point elements, and store the results +/// in single vector dst using writemask k (elements are copied from src when the +/// corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651&avx512techs=AVX512_BF16&text=_mm_mask_cvtne2ps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] +pub fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh { + unsafe { + let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x8())) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in two vectors +/// a and b to packed BF16 (16-bit) floating-point elements, and store the results +/// in single vector dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651&avx512techs=AVX512_BF16&text=_mm_maskz_cvtne2ps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] +pub fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh { + unsafe { + let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, cvt, u16x8::ZERO)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in two 256-bit vectors +/// a and b to packed BF16 (16-bit) floating-point elements, and store the results in a +/// 256-bit wide vector. +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654&avx512techs=AVX512_BF16&text=_mm256_cvtne2ps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] +pub fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh { + unsafe { transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8())) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b +/// to packed BF16 (16-bit) floating-point elements and store the results in single vector +/// dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654&avx512techs=AVX512_BF16&text=_mm256_mask_cvtne2ps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] +pub fn _mm256_mask_cvtne2ps_pbh(src: __m256bh, k: __mmask16, a: __m256, b: __m256) -> __m256bh { + unsafe { + let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x16())) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b +/// to packed BF16 (16-bit) floating-point elements, and store the results in single vector +/// dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654&avx512techs=AVX512_BF16&text=_mm256_maskz_cvtne2ps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] +pub fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh { + unsafe { + let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, cvt, u16x16::ZERO)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in two 512-bit vectors +/// a and b to packed BF16 (16-bit) floating-point elements, and store the results in a +/// 512-bit wide vector. +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657&avx512techs=AVX512_BF16&text=_mm512_cvtne2ps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] +pub fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh { + unsafe { transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16())) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in two vectors +/// a and b to packed BF16 (16-bit) floating-point elements, and store the results +/// in single vector dst using writemask k (elements are copied from src when the +/// corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657&avx512techs=AVX512_BF16&text=_mm512_mask_cvtne2ps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] +pub fn _mm512_mask_cvtne2ps_pbh(src: __m512bh, k: __mmask32, a: __m512, b: __m512) -> __m512bh { + unsafe { + let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x32())) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in two vectors +/// a and b to packed BF16 (16-bit) floating-point elements, and store the results +/// in single vector dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657&avx512techs=AVX512_BF16&text=_mm512_maskz_cvtne2ps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] +pub fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh { + unsafe { + let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, cvt, u16x32::ZERO)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) +/// floating-point elements, and store the results in dst. +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm256_cvtneps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtneps2bf16"))] +pub fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh { + unsafe { transmute(cvtneps2bf16_256(a.as_f32x8())) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) +/// floating-point elements, and store the results in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm256_mask_cvtneps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtneps2bf16"))] +pub fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh { + unsafe { + let cvt = _mm256_cvtneps_pbh(a).as_u16x8(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x8())) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) +/// floating-point elements, and store the results in dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm256_maskz_cvtneps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtneps2bf16"))] +pub fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh { + unsafe { + let cvt = _mm256_cvtneps_pbh(a).as_u16x8(); + transmute(simd_select_bitmask(k, cvt, u16x8::ZERO)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) +/// floating-point elements, and store the results in dst. +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm512_cvtneps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtneps2bf16"))] +pub fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh { + unsafe { transmute(cvtneps2bf16_512(a.as_f32x16())) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) +/// floating-point elements, and store the results in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm512_mask_cvtneps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtneps2bf16"))] +pub fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh { + unsafe { + let cvt = _mm512_cvtneps_pbh(a).as_u16x16(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x16())) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) +/// floating-point elements, and store the results in dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm512_maskz_cvtneps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vcvtneps2bf16"))] +pub fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh { + unsafe { + let cvt = _mm512_cvtneps_pbh(a).as_u16x16(); + transmute(simd_select_bitmask(k, cvt, u16x16::ZERO)) + } +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in src, and store the results in dst. +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm_dpbf16_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vdpbf16ps"))] +pub fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 { + unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i16x8(), b.as_i16x8())) } +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in src, and store the results in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm_mask_dpbf16_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vdpbf16ps"))] +pub fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 { + unsafe { + let rst = _mm_dpbf16_ps(src, a, b).as_f32x4(); + transmute(simd_select_bitmask(k, rst, src.as_f32x4())) + } +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in src, and store the results in dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm_maskz_dpbf16_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vdpbf16ps"))] +pub fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 { + unsafe { + let rst = _mm_dpbf16_ps(src, a, b).as_f32x4(); + let zero = _mm_set1_ps(0.0_f32).as_f32x4(); + transmute(simd_select_bitmask(k, rst, zero)) + } +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in src, and store the results in dst. +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm256_dpbf16_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vdpbf16ps"))] +pub fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 { + unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i16x16(), b.as_i16x16())) } +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in src, and store the results in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm256_mask_dpbf16_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vdpbf16ps"))] +pub fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 { + unsafe { + let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8(); + transmute(simd_select_bitmask(k, rst, src.as_f32x8())) + } +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in src, and store the results in dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm256_maskz_dpbf16_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vdpbf16ps"))] +pub fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 { + unsafe { + let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8(); + transmute(simd_select_bitmask(k, rst, f32x8::ZERO)) + } +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in src, and store the results in dst.Compute dot-product of BF16 (16-bit) +/// floating-point pairs in a and b, accumulating the intermediate single-precision (32-bit) +/// floating-point elements with elements in src, and store the results in dst. +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm512_dpbf16_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vdpbf16ps"))] +pub fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 { + unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i16x32(), b.as_i16x32())) } +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in src, and store the results in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm512_mask_dpbf16_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vdpbf16ps"))] +pub fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 { + unsafe { + let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16(); + transmute(simd_select_bitmask(k, rst, src.as_f32x16())) + } +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in src, and store the results in dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654,1657,1660&avx512techs=AVX512_BF16&text=_mm512_maskz_dpbf16_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr("vdpbf16ps"))] +pub fn _mm512_maskz_dpbf16_ps(k: __mmask16, src: __m512, a: __m512bh, b: __m512bh) -> __m512 { + unsafe { + let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16(); + transmute(simd_select_bitmask(k, rst, f32x16::ZERO)) + } +} + +/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpbh_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 { + unsafe { _mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a)))) } +} + +/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpbh_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 { + unsafe { + let cvt = _mm512_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16())) + } +} + +/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out +/// when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpbh_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 { + unsafe { + let cvt = _mm512_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO)) + } +} + +/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpbh_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 { + unsafe { _mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a)))) } +} + +/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpbh_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 { + unsafe { + let cvt = _mm256_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8())) + } +} + +/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out +/// when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpbh_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 { + unsafe { + let cvt = _mm256_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO)) + } +} + +/// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point +/// elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpbh_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 { + unsafe { _mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a)))) } +} + +/// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point +/// elements, and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpbh_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 { + unsafe { + let cvt = _mm_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4())) + } +} + +/// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point +/// elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpbh_ps) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 { + unsafe { + let cvt = _mm_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO)) + } +} + +/// Converts a single BF16 (16-bit) floating-point element in a to a single-precision (32-bit) floating-point +/// element, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsbh_ss) +#[inline] +#[target_feature(enable = "avx512bf16,avx512f")] +#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] +pub fn _mm_cvtsbh_ss(a: bf16) -> f32 { + f32::from_bits((a.to_bits() as u32) << 16) +} + +/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[cfg_attr(test, assert_instr("vcvtneps2bf16"))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh { + unsafe { + let mut dst: __m128bh; + asm!( + "vcvtneps2bf16 {dst}, {src}", + dst = lateout(xmm_reg) dst, + src = in(xmm_reg) a, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } +} + +/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtneps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[cfg_attr(test, assert_instr("vcvtneps2bf16"))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh { + unsafe { + let mut dst = src; + asm!( + "vcvtneps2bf16 {dst}{{{k}}},{src}", + dst = inlateout(xmm_reg) dst, + src = in(xmm_reg) a, + k = in(kreg) k, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } +} + +/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out +/// when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtneps_pbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[cfg_attr(test, assert_instr("vcvtneps2bf16"))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh { + unsafe { + let mut dst: __m128bh; + asm!( + "vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}", + dst = lateout(xmm_reg) dst, + src = in(xmm_reg) a, + k = in(kreg) k, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } +} + +/// Converts a single-precision (32-bit) floating-point element in a to a BF16 (16-bit) floating-point +/// element, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtness_sbh) +#[inline] +#[target_feature(enable = "avx512bf16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] +pub fn _mm_cvtness_sbh(a: f32) -> bf16 { + unsafe { + let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0); + bf16::from_bits(value) + } +} + +#[cfg(test)] +mod tests { + use crate::core_arch::simd::u16x4; + use crate::{ + core_arch::x86::*, + mem::{transmute, transmute_copy}, + }; + use stdarch_test::simd_test; + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_cvtne2ps_pbh() { + let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32]; + let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32]; + let a: __m128 = transmute(a_array); + let b: __m128 = transmute(b_array); + let c: __m128bh = _mm_cvtne2ps_pbh(a, b); + let result: [u16; 8] = transmute(c.as_u16x8()); + #[rustfmt::skip] + let expected_result: [u16; 8] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_mask_cvtne2ps_pbh() { + let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32]; + let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32]; + #[rustfmt::skip] + let src_array: [u16; 8] = [ + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + ]; + let src: __m128bh = transmute(src_array); + let a: __m128 = transmute(a_array); + let b: __m128 = transmute(b_array); + let k: __mmask8 = 0b1111_1111; + let c: __m128bh = _mm_mask_cvtne2ps_pbh(src, k, a, b); + let result: [u16; 8] = transmute(c.as_u16x8()); + #[rustfmt::skip] + let expected_result: [u16; 8] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + ]; + assert_eq!(result, expected_result); + let k = 0b0000_0000; + let c = _mm_mask_cvtne2ps_pbh(src, k, a, b); + let result: [u16; 8] = transmute(c.as_u16x8()); + let expected_result = src_array; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_maskz_cvtne2ps_pbh() { + let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32]; + let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32]; + let a: __m128 = transmute(a_array); + let b: __m128 = transmute(b_array); + let k: __mmask8 = 0b1111_1111; + let c: __m128bh = _mm_maskz_cvtne2ps_pbh(k, a, b); + let result: [u16; 8] = transmute(c.as_u16x8()); + #[rustfmt::skip] + let expected_result: [u16; 8] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + ]; + assert_eq!(result, expected_result); + let k = 0b0011_1100; + let c = _mm_maskz_cvtne2ps_pbh(k, a, b); + let result: [u16; 8] = transmute(c.as_u16x8()); + #[rustfmt::skip] + let expected_result: [u16; 8] = [ + 0, + 0, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0, + 0, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_cvtne2ps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let b_array = [ + -178.125_f32, + -10.5_f32, + -3.75_f32, + -50.25_f32, + -16.5_f32, + -255.11_f32, + -1000.158_f32, + -575.575_f32, + ]; + let a: __m256 = transmute(a_array); + let b: __m256 = transmute(b_array); + let c: __m256bh = _mm256_cvtne2ps_pbh(a, b); + let result: [u16; 16] = transmute(c.as_u16x16()); + #[rustfmt::skip] + let expected_result: [u16; 16] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_mask_cvtne2ps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let b_array = [ + -178.125_f32, + -10.5_f32, + -3.75_f32, + -50.25_f32, + -16.5_f32, + -255.11_f32, + -1000.158_f32, + -575.575_f32, + ]; + let src_array: [u16; 16] = [ + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + ]; + let src: __m256bh = transmute(src_array); + let a: __m256 = transmute(a_array); + let b: __m256 = transmute(b_array); + let k: __mmask16 = 0xffff; + let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b); + let result: [u16; 16] = transmute(c.as_u16x16()); + #[rustfmt::skip] + let expected_result: [u16; 16] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + let k: __mmask16 = 0; + let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b); + let result: [u16; 16] = transmute(c.as_u16x16()); + let expected_result = src_array; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_maskz_cvtne2ps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let b_array = [ + -178.125_f32, + -10.5_f32, + -3.75_f32, + -50.25_f32, + -16.5_f32, + -255.11_f32, + -1000.158_f32, + -575.575_f32, + ]; + let a: __m256 = transmute(a_array); + let b: __m256 = transmute(b_array); + let k: __mmask16 = 0xffff; + let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b); + let result: [u16; 16] = transmute(c.as_u16x16()); + #[rustfmt::skip] + let expected_result: [u16; 16] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + let k: __mmask16 = 0b0110_1100_0011_0110; + let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b); + let result: [u16; 16] = transmute(c.as_u16x16()); + #[rustfmt::skip] + let expected_result: [u16; 16] = [ + 0, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0, + 0, + 0, + 0, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512f")] + unsafe fn test_mm512_cvtne2ps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let b_array = [ + -178.125_f32, + -10.5_f32, + -3.75_f32, + -50.25_f32, + -16.5_f32, + -255.11_f32, + -1000.158_f32, + -575.575_f32, + -178.125_f32, + -10.5_f32, + -3.75_f32, + -50.25_f32, + -16.5_f32, + -255.11_f32, + -1000.158_f32, + -575.575_f32, + ]; + let a: __m512 = transmute(a_array); + let b: __m512 = transmute(b_array); + let c: __m512bh = _mm512_cvtne2ps_pbh(a, b); + let result: [u16; 32] = transmute(c.as_u16x32()); + #[rustfmt::skip] + let expected_result: [u16; 32] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512f")] + unsafe fn test_mm512_mask_cvtne2ps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let b_array = [ + -178.125_f32, + -10.5_f32, + -3.75_f32, + -50.25_f32, + -16.5_f32, + -255.11_f32, + -1000.158_f32, + -575.575_f32, + -178.125_f32, + -10.5_f32, + -3.75_f32, + -50.25_f32, + -16.5_f32, + -255.11_f32, + -1000.158_f32, + -575.575_f32, + ]; + let src_array: [u16; 32] = [ + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + ]; + let src: __m512bh = transmute(src_array); + let a: __m512 = transmute(a_array); + let b: __m512 = transmute(b_array); + let k: __mmask32 = 0xffffffff; + let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b); + let result: [u16; 32] = transmute(c.as_u16x32()); + #[rustfmt::skip] + let expected_result: [u16; 32] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + let k: __mmask32 = 0; + let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b); + let result: [u16; 32] = transmute(c.as_u16x32()); + let expected_result = src_array; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512f")] + unsafe fn test_mm512_maskz_cvtne2ps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let b_array = [ + -178.125_f32, + -10.5_f32, + -3.75_f32, + -50.25_f32, + -16.5_f32, + -255.11_f32, + -1000.158_f32, + -575.575_f32, + -178.125_f32, + -10.5_f32, + -3.75_f32, + -50.25_f32, + -16.5_f32, + -255.11_f32, + -1000.158_f32, + -575.575_f32, + ]; + let a: __m512 = transmute(a_array); + let b: __m512 = transmute(b_array); + let k: __mmask32 = 0xffffffff; + let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b); + let result: [u16; 32] = transmute(c.as_u16x32()); + #[rustfmt::skip] + let expected_result: [u16; 32] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + let k: __mmask32 = 0b1100_1010_1001_0110_1010_0011_0101_0110; + let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b); + let result: [u16; 32] = transmute(c.as_u16x32()); + #[rustfmt::skip] + let expected_result: [u16; 32] = [ + 0, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0, + 0b1_10000011_0000100, + 0, + 0b1_10001000_1111010, + 0, + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0, + 0, + 0, + 0b1_10000110_1111111, + 0, + 0b1_10001000_0010000, + 0, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0, + 0b0_10000011_0000100, + 0, + 0, + 0b0_10001000_0010000, + 0, + 0b0_10000010_0101000, + 0, + 0b0_10000100_1001001, + 0, + 0, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_cvtneps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let a: __m256 = transmute(a_array); + let c: __m128bh = _mm256_cvtneps_pbh(a); + let result: [u16; 8] = transmute(c.as_u16x8()); + #[rustfmt::skip] + let expected_result: [u16; 8] = [ + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_mask_cvtneps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let src_array: [u16; 8] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + ]; + let src: __m128bh = transmute(src_array); + let a: __m256 = transmute(a_array); + let k: __mmask8 = 0xff; + let b = _mm256_mask_cvtneps_pbh(src, k, a); + let result: [u16; 8] = transmute(b.as_u16x8()); + #[rustfmt::skip] + let expected_result: [u16; 8] = [ + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0x0; + let b: __m128bh = _mm256_mask_cvtneps_pbh(src, k, a); + let result: [u16; 8] = transmute(b.as_u16x8()); + let expected_result: [u16; 8] = src_array; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_maskz_cvtneps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let a: __m256 = transmute(a_array); + let k: __mmask8 = 0xff; + let b = _mm256_maskz_cvtneps_pbh(k, a); + let result: [u16; 8] = transmute(b.as_u16x8()); + #[rustfmt::skip] + let expected_result: [u16; 8] = [ + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0x6; + let b: __m128bh = _mm256_maskz_cvtneps_pbh(k, a); + let result: [u16; 8] = transmute(b.as_u16x8()); + let expected_result: [u16; 8] = + [0, 0b0_10000010_0101000, 0b0_10000000_1110000, 0, 0, 0, 0, 0]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512f")] + unsafe fn test_mm512_cvtneps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let a: __m512 = transmute(a_array); + let c: __m256bh = _mm512_cvtneps_pbh(a); + let result: [u16; 16] = transmute(c.as_u16x16()); + #[rustfmt::skip] + let expected_result: [u16; 16] = [ + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512f")] + unsafe fn test_mm512_mask_cvtneps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let src_array: [u16; 16] = [ + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + 0b1_10000110_0110010, + 0b1_10000010_0101000, + 0b1_10000000_1110000, + 0b1_10000100_1001001, + 0b1_10000011_0000100, + 0b1_10000110_1111111, + 0b1_10001000_1111010, + 0b1_10001000_0010000, + ]; + let src: __m256bh = transmute(src_array); + let a: __m512 = transmute(a_array); + let k: __mmask16 = 0xffff; + let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a); + let result: [u16; 16] = transmute(c.as_u16x16()); + #[rustfmt::skip] + let expected_result: [u16; 16] = [ + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + let k: __mmask16 = 0; + let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a); + let result: [u16; 16] = transmute(c.as_u16x16()); + let expected_result = src_array; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512f")] + unsafe fn test_mm512_maskz_cvtneps_pbh() { + #[rustfmt::skip] + let a_array = [ + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + 178.125_f32, + 10.5_f32, + 3.75_f32, + 50.25_f32, + 16.5_f32, + 255.11_f32, + 1000.158_f32, + 575.575_f32, + ]; + let a: __m512 = transmute(a_array); + let k: __mmask16 = 0xffff; + let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a); + let result: [u16; 16] = transmute(c.as_u16x16()); + #[rustfmt::skip] + let expected_result: [u16; 16] = [ + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + 0b0_10000110_0110010, + 0b0_10000010_0101000, + 0b0_10000000_1110000, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0b0_10001000_0010000, + ]; + assert_eq!(result, expected_result); + let k: __mmask16 = 0x653a; + let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a); + let result: [u16; 16] = transmute(c.as_u16x16()); + #[rustfmt::skip] + let expected_result: [u16; 16] = [ + 0, + 0b0_10000010_0101000, + 0, + 0b0_10000100_1001001, + 0b0_10000011_0000100, + 0b0_10000110_1111111, + 0, + 0, + 0b0_10000110_0110010, + 0, + 0b0_10000000_1110000, + 0, + 0, + 0b0_10000110_1111111, + 0b0_10001000_1111010, + 0, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_dpbf16_ps() { + let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32]; + let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32]; + let a1: __m128 = transmute(a_array); + let b1: __m128 = transmute(b_array); + let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]); + let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1); + let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1); + let c: __m128 = _mm_dpbf16_ps(src, a, b); + let result: [f32; 4] = transmute(c.as_f32x4()); + let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_mask_dpbf16_ps() { + let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32]; + let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32]; + let a1: __m128 = transmute(a_array); + let b1: __m128 = transmute(b_array); + let k: __mmask8 = 0xf3; + let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]); + let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1); + let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1); + let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b); + let result: [f32; 4] = transmute(c.as_f32x4()); + let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0xff; + let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b); + let result: [f32; 4] = transmute(c.as_f32x4()); + let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0; + let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b); + let result: [f32; 4] = transmute(c.as_f32x4()); + let expected_result: [f32; 4] = [1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_maskz_dpbf16_ps() { + let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32]; + let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32]; + let a1: __m128 = transmute(a_array); + let b1: __m128 = transmute(b_array); + let k: __mmask8 = 0xf3; + let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]); + let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1); + let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1); + let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b); + let result: [f32; 4] = transmute(c.as_f32x4()); + let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 0.0, 0.0]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0xff; + let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b); + let result: [f32; 4] = transmute(c.as_f32x4()); + let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0; + let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b); + let result: [f32; 4] = transmute(c.as_f32x4()); + let expected_result: [f32; 4] = [0.0, 0.0, 0.0, 0.0]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_dpbf16_ps() { + #[rustfmt::skip] + let a_array = [ + 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, + ]; + let b_array = [ + -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, + ]; + let a1: __m256 = transmute(a_array); + let b1: __m256 = transmute(b_array); + #[rustfmt::skip] + let src: __m256 = transmute([ + 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, + ]); + let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1); + let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1); + let c: __m256 = _mm256_dpbf16_ps(src, a, b); + let result: [f32; 8] = transmute(c.as_f32x8()); + #[rustfmt::skip] + let expected_result: [f32; 8] = [ + -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_mask_dpbf16_ps() { + #[rustfmt::skip] + let a_array = [ + 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, + ]; + let b_array = [ + -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, + ]; + let a1: __m256 = transmute(a_array); + let b1: __m256 = transmute(b_array); + let k: __mmask8 = 0x33; + #[rustfmt::skip] + let src: __m256 = transmute([ + 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, + ]); + let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1); + let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1); + let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b); + let result: [f32; 8] = transmute(c.as_f32x8()); + #[rustfmt::skip] + let expected_result: [f32; 8] = [ + -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, + ]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0xff; + let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b); + let result: [f32; 8] = transmute(c.as_f32x8()); + #[rustfmt::skip] + let expected_result: [f32; 8] = [ + -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, + ]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0; + let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b); + let result: [f32; 8] = transmute(c.as_f32x8()); + #[rustfmt::skip] + let expected_result: [f32; 8] = [ + 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_maskz_dpbf16_ps() { + #[rustfmt::skip] + let a_array = [ + 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, + ]; + let b_array = [ + -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, + ]; + let a1: __m256 = transmute(a_array); + let b1: __m256 = transmute(b_array); + let k: __mmask8 = 0x33; + #[rustfmt::skip] + let src: __m256 = transmute([ + 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, + ]); + let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1); + let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1); + let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b); + let result: [f32; 8] = transmute(c.as_f32x8()); + #[rustfmt::skip] + let expected_result: [f32; 8] = [ + -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0, + ]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0xff; + let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b); + let result: [f32; 8] = transmute(c.as_f32x8()); + #[rustfmt::skip] + let expected_result: [f32; 8] = [ + -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, + ]; + assert_eq!(result, expected_result); + let k: __mmask8 = 0; + let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b); + let result: [f32; 8] = transmute(c.as_f32x8()); + let expected_result: [f32; 8] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512f")] + unsafe fn test_mm512_dpbf16_ps() { + #[rustfmt::skip] + let a_array = [ + 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, + 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, + ]; + let b_array = [ + -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, + -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, + ]; + let a1: __m512 = transmute(a_array); + let b1: __m512 = transmute(b_array); + let src: __m512 = transmute([ + 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, + 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, + ]); + let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1); + let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1); + let c: __m512 = _mm512_dpbf16_ps(src, a, b); + let result: [f32; 16] = transmute(c.as_f32x16()); + #[rustfmt::skip] + let expected_result: [f32; 16] = [ + -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, + -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512f")] + unsafe fn test_mm512_mask_dpbf16_ps() { + #[rustfmt::skip] + let a_array = [ + 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, + 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, + ]; + let b_array = [ + -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, + -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, + ]; + let a1: __m512 = transmute(a_array); + let b1: __m512 = transmute(b_array); + let k: __mmask16 = 0x3333; + #[rustfmt::skip] + let src: __m512 = transmute([ + 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, + 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, + ]); + let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1); + let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1); + let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b); + let result: [f32; 16] = transmute(c.as_f32x16()); + #[rustfmt::skip] + let expected_result: [f32; 16] = [ + -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, + -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, + ]; + assert_eq!(result, expected_result); + let k: __mmask16 = 0xffff; + let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b); + let result: [f32; 16] = transmute(c.as_f32x16()); + #[rustfmt::skip] + let expected_result: [f32; 16] = [ + -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, + -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, + ]; + assert_eq!(result, expected_result); + let k: __mmask16 = 0; + let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b); + let result: [f32; 16] = transmute(c.as_f32x16()); + #[rustfmt::skip] + let expected_result: [f32; 16] = [ + 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, + 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, + ]; + assert_eq!(result, expected_result); + } + + #[simd_test(enable = "avx512bf16,avx512f")] + unsafe fn test_mm512_maskz_dpbf16_ps() { + #[rustfmt::skip] + let a_array = [ + 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, + 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, + ]; + let b_array = [ + -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, + -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, + ]; + let a1: __m512 = transmute(a_array); + let b1: __m512 = transmute(b_array); + let k: __mmask16 = 0x3333; + #[rustfmt::skip] + let src: __m512 = transmute([ + 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, + 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, + ]); + let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1); + let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1); + let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b); + let result: [f32; 16] = transmute(c.as_f32x16()); + #[rustfmt::skip] + let expected_result: [f32; 16] = [ + -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, + 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0, + ]; + assert_eq!(result, expected_result); + let k: __mmask16 = 0xffff; + let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b); + let result: [f32; 16] = transmute(c.as_f32x16()); + #[rustfmt::skip] + let expected_result: [f32; 16] = [ + -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, + -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, + ]; + assert_eq!(result, expected_result); + let k: __mmask16 = 0; + let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b); + let result: [f32; 16] = transmute(c.as_f32x16()); + #[rustfmt::skip] + let expected_result: [f32; 16] = [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + ]; + assert_eq!(result, expected_result); + } + + const BF16_ONE: u16 = 0b0_01111111_0000000; + const BF16_TWO: u16 = 0b0_10000000_0000000; + const BF16_THREE: u16 = 0b0_10000000_1000000; + const BF16_FOUR: u16 = 0b0_10000001_0000000; + const BF16_FIVE: u16 = 0b0_10000001_0100000; + const BF16_SIX: u16 = 0b0_10000001_1000000; + const BF16_SEVEN: u16 = 0b0_10000001_1100000; + const BF16_EIGHT: u16 = 0b0_10000010_0000000; + + #[simd_test(enable = "avx512bf16")] + unsafe fn test_mm512_cvtpbh_ps() { + let a = __m256bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let r = _mm512_cvtpbh_ps(a); + let e = _mm512_setr_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512bf16")] + unsafe fn test_mm512_mask_cvtpbh_ps() { + let a = __m256bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let src = _mm512_setr_ps( + 9., 10., 11., 12., 13., 14., 15., 16., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let k = 0b1010_1010_1010_1010; + let r = _mm512_mask_cvtpbh_ps(src, k, a); + let e = _mm512_setr_ps( + 9., 2., 11., 4., 13., 6., 15., 8., 9., 2., 11., 4., 13., 6., 15., 8., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512bf16")] + unsafe fn test_mm512_maskz_cvtpbh_ps() { + let a = __m256bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let k = 0b1010_1010_1010_1010; + let r = _mm512_maskz_cvtpbh_ps(k, a); + let e = _mm512_setr_ps( + 0., 2., 0., 4., 0., 6., 0., 8., 0., 2., 0., 4., 0., 6., 0., 8., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_cvtpbh_ps() { + let a = __m128bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let r = _mm256_cvtpbh_ps(a); + let e = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_mask_cvtpbh_ps() { + let a = __m128bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let src = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let k = 0b1010_1010; + let r = _mm256_mask_cvtpbh_ps(src, k, a); + let e = _mm256_setr_ps(9., 2., 11., 4., 13., 6., 15., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm256_maskz_cvtpbh_ps() { + let a = __m128bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let k = 0b1010_1010; + let r = _mm256_maskz_cvtpbh_ps(k, a); + let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_cvtpbh_ps() { + let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]); + let r = _mm_cvtpbh_ps(a); + let e = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_mask_cvtpbh_ps() { + let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]); + let src = _mm_setr_ps(9., 10., 11., 12.); + let k = 0b1010; + let r = _mm_mask_cvtpbh_ps(src, k, a); + let e = _mm_setr_ps(9., 2., 11., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_maskz_cvtpbh_ps() { + let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]); + let k = 0b1010; + let r = _mm_maskz_cvtpbh_ps(k, a); + let e = _mm_setr_ps(0., 2., 0., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512bf16")] + unsafe fn test_mm_cvtsbh_ss() { + let r = _mm_cvtsbh_ss(bf16::from_bits(BF16_ONE)); + assert_eq!(r, 1.); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_cvtneps_pbh() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r: u16x4 = transmute_copy(&_mm_cvtneps_pbh(a)); + let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR); + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_mask_cvtneps_pbh() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let src = __m128bh([5, 6, 7, 8, !0, !0, !0, !0]); + let k = 0b1010; + let r: u16x4 = transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a)); + let e = u16x4::new(5, BF16_TWO, 7, BF16_FOUR); + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_maskz_cvtneps_pbh() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let k = 0b1010; + let r: u16x4 = transmute_copy(&_mm_maskz_cvtneps_pbh(k, a)); + let e = u16x4::new(0, BF16_TWO, 0, BF16_FOUR); + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bf16,avx512vl")] + unsafe fn test_mm_cvtness_sbh() { + let r = _mm_cvtness_sbh(1.); + assert_eq!(r.to_bits(), BF16_ONE); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs new file mode 100644 index 000000000000..1cbf0faea09f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs @@ -0,0 +1,806 @@ +//! Bit-oriented Algorithms (BITALG) +//! +//! The intrinsics here correspond to those in the `immintrin.h` C header. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +use crate::core_arch::simd::i8x16; +use crate::core_arch::simd::i8x32; +use crate::core_arch::simd::i8x64; +use crate::core_arch::simd::i16x8; +use crate::core_arch::simd::i16x16; +use crate::core_arch::simd::i16x32; +use crate::core_arch::x86::__m128i; +use crate::core_arch::x86::__m256i; +use crate::core_arch::x86::__m512i; +use crate::core_arch::x86::__mmask8; +use crate::core_arch::x86::__mmask16; +use crate::core_arch::x86::__mmask32; +use crate::core_arch::x86::__mmask64; +use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask}; +use crate::mem::transmute; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"] + fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64; + #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"] + fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32; + #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"] + fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16; +} + +/// For each packed 16-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16) +#[inline] +#[target_feature(enable = "avx512bitalg")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntw))] +pub fn _mm512_popcnt_epi16(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctpop(a.as_i16x32())) } +} + +/// For each packed 16-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16) +#[inline] +#[target_feature(enable = "avx512bitalg")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntw))] +pub fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x32()), + i16x32::ZERO, + )) + } +} + +/// For each packed 16-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16) +#[inline] +#[target_feature(enable = "avx512bitalg")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntw))] +pub fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x32()), + src.as_i16x32(), + )) + } +} + +/// For each packed 16-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntw))] +pub fn _mm256_popcnt_epi16(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctpop(a.as_i16x16())) } +} + +/// For each packed 16-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntw))] +pub fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x16()), + i16x16::ZERO, + )) + } +} + +/// For each packed 16-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntw))] +pub fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x16()), + src.as_i16x16(), + )) + } +} + +/// For each packed 16-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntw))] +pub fn _mm_popcnt_epi16(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctpop(a.as_i16x8())) } +} + +/// For each packed 16-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntw))] +pub fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x8()), + i16x8::ZERO, + )) + } +} + +/// For each packed 16-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntw))] +pub fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x8()), + src.as_i16x8(), + )) + } +} + +/// For each packed 8-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8) +#[inline] +#[target_feature(enable = "avx512bitalg")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntb))] +pub fn _mm512_popcnt_epi8(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctpop(a.as_i8x64())) } +} + +/// For each packed 8-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8) +#[inline] +#[target_feature(enable = "avx512bitalg")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntb))] +pub fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x64()), + i8x64::ZERO, + )) + } +} + +/// For each packed 8-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8) +#[inline] +#[target_feature(enable = "avx512bitalg")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntb))] +pub fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x64()), + src.as_i8x64(), + )) + } +} + +/// For each packed 8-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntb))] +pub fn _mm256_popcnt_epi8(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctpop(a.as_i8x32())) } +} + +/// For each packed 8-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntb))] +pub fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x32()), + i8x32::ZERO, + )) + } +} + +/// For each packed 8-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntb))] +pub fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x32()), + src.as_i8x32(), + )) + } +} + +/// For each packed 8-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntb))] +pub fn _mm_popcnt_epi8(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctpop(a.as_i8x16())) } +} + +/// For each packed 8-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntb))] +pub fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x16()), + i8x16::ZERO, + )) + } +} + +/// For each packed 8-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntb))] +pub fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x16()), + src.as_i8x16(), + )) + } +} + +/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. +/// It then selects these bits and packs them into the output. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask) +#[inline] +#[target_feature(enable = "avx512bitalg")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufbitqmb))] +pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 { + unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) } +} + +/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. +/// It then selects these bits and packs them into the output. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask) +#[inline] +#[target_feature(enable = "avx512bitalg")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufbitqmb))] +pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 { + unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) } +} + +/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. +/// It then selects these bits and packs them into the output. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufbitqmb))] +pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 { + unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) } +} + +/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. +/// It then selects these bits and packs them into the output. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufbitqmb))] +pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 { + unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) } +} + +/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. +/// It then selects these bits and packs them into the output. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufbitqmb))] +pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 { + unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) } +} + +/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. +/// It then selects these bits and packs them into the output. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask) +#[inline] +#[target_feature(enable = "avx512bitalg,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufbitqmb))] +pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 { + unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) } +} + +#[cfg(test)] +mod tests { + // Some of the constants in the tests below are just bit patterns. They should not + // be interpreted as integers; signedness does not make sense for them, but + // __mXXXi happens to be defined in terms of signed integers. + #![allow(overflowing_literals)] + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_popcnt_epi16() { + let test_data = _mm512_set_epi16( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, + 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512, + 1024, 2048, + ); + let actual_result = _mm512_popcnt_epi16(test_data); + let reference_result = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, + ); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_maskz_popcnt_epi16() { + let test_data = _mm512_set_epi16( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, + 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512, + 1024, 2048, + ); + let mask = 0xFF_FF_00_00; + let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data); + let reference_result = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_mask_popcnt_epi16() { + let test_data = _mm512_set_epi16( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, + 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512, + 1024, 2048, + ); + let mask = 0xFF_FF_00_00; + let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data); + let reference_result = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2, + 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, + ); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_popcnt_epi16() { + let test_data = _mm256_set_epi16( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, + 0x3F_FF, 0x7F_FF, + ); + let actual_result = _mm256_popcnt_epi16(test_data); + let reference_result = + _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_maskz_popcnt_epi16() { + let test_data = _mm256_set_epi16( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, + 0x3F_FF, 0x7F_FF, + ); + let mask = 0xFF_00; + let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data); + let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_mask_popcnt_epi16() { + let test_data = _mm256_set_epi16( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, + 0x3F_FF, 0x7F_FF, + ); + let mask = 0xFF_00; + let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data); + let reference_result = _mm256_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF, + ); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_popcnt_epi16() { + let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F); + let actual_result = _mm_popcnt_epi16(test_data); + let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_maskz_popcnt_epi16() { + let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F); + let mask = 0xF0; + let actual_result = _mm_maskz_popcnt_epi16(mask, test_data); + let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_mask_popcnt_epi16() { + let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F); + let mask = 0xF0; + let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data); + let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_popcnt_epi8() { + let test_data = _mm512_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, + 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189, + 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90, + 225, 21, 249, 211, 155, 228, 70, + ); + let actual_result = _mm512_popcnt_epi8(test_data); + let reference_result = _mm512_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, + 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4, + 3, 6, 5, 5, 4, 3, + ); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_maskz_popcnt_epi8() { + let test_data = _mm512_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, + 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189, + 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90, + 225, 21, 249, 211, 155, 228, 70, + ); + let mask = 0xFF_FF_FF_FF_00_00_00_00; + let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data); + let reference_result = _mm512_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, + 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_mask_popcnt_epi8() { + let test_data = _mm512_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, + 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189, + 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90, + 225, 21, 249, 211, 155, 228, 70, + ); + let mask = 0xFF_FF_FF_FF_00_00_00_00; + let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data); + let reference_result = _mm512_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, + 2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, + 251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70, + ); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_popcnt_epi8() { + let test_data = _mm256_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, + 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, + ); + let actual_result = _mm256_popcnt_epi8(test_data); + let reference_result = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, + 2, 4, 4, + ); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_maskz_popcnt_epi8() { + let test_data = _mm256_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143, + 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70, + ); + let mask = 0xFF_FF_00_00; + let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data); + let reference_result = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, + ); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_mask_popcnt_epi8() { + let test_data = _mm256_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143, + 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70, + ); + let mask = 0xFF_FF_00_00; + let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data); + let reference_result = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137, + 90, 225, 21, 249, 211, 155, 228, 70, + ); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_popcnt_epi8() { + let test_data = _mm_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, + ); + let actual_result = _mm_popcnt_epi8(test_data); + let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_maskz_popcnt_epi8() { + let test_data = _mm_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70, + ); + let mask = 0xFF_00; + let actual_result = _mm_maskz_popcnt_epi8(mask, test_data); + let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_mask_popcnt_epi8() { + let test_data = _mm_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70, + ); + let mask = 0xFF_00; + let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data); + let reference_result = + _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_bitshuffle_epi64_mask() { + let test_indices = _mm512_set_epi8( + 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0, + 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, + 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, + ); + let test_data = _mm512_setr_epi64( + 0xFF_FF_FF_FF_00_00_00_00, + 0xFF_00_FF_00_FF_00_FF_00, + 0xFF_00_00_00_00_00_00_00, + 0xAC_00_00_00_00_00_00_00, + 0xFF_FF_FF_FF_00_00_00_00, + 0xFF_00_FF_00_FF_00_FF_00, + 0xFF_00_00_00_00_00_00_00, + 0xAC_00_00_00_00_00_00_00, + ); + let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices); + let reference_result = 0xF0 << 0 + | 0x03 << 8 + | 0xFF << 16 + | 0xAC << 24 + | 0xF0 << 32 + | 0x03 << 40 + | 0xFF << 48 + | 0xAC << 56; + + assert_eq!(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_mask_bitshuffle_epi64_mask() { + let test_indices = _mm512_set_epi8( + 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0, + 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, + 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, + ); + let test_data = _mm512_setr_epi64( + 0xFF_FF_FF_FF_00_00_00_00, + 0xFF_00_FF_00_FF_00_FF_00, + 0xFF_00_00_00_00_00_00_00, + 0xAC_00_00_00_00_00_00_00, + 0xFF_FF_FF_FF_00_00_00_00, + 0xFF_00_FF_00_FF_00_FF_00, + 0xFF_00_00_00_00_00_00_00, + 0xAC_00_00_00_00_00_00_00, + ); + let mask = 0xFF_FF_FF_FF_00_00_00_00; + let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices); + let reference_result = 0x00 << 0 + | 0x00 << 8 + | 0x00 << 16 + | 0x00 << 24 + | 0xF0 << 32 + | 0x03 << 40 + | 0xFF << 48 + | 0xAC << 56; + + assert_eq!(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_bitshuffle_epi64_mask() { + let test_indices = _mm256_set_epi8( + 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0, + 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, + ); + let test_data = _mm256_setr_epi64x( + 0xFF_FF_FF_FF_00_00_00_00, + 0xFF_00_FF_00_FF_00_FF_00, + 0xFF_00_00_00_00_00_00_00, + 0xAC_00_00_00_00_00_00_00, + ); + let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices); + let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24; + + assert_eq!(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_mask_bitshuffle_epi64_mask() { + let test_indices = _mm256_set_epi8( + 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0, + 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, + ); + let test_data = _mm256_setr_epi64x( + 0xFF_FF_FF_FF_00_00_00_00, + 0xFF_00_FF_00_FF_00_FF_00, + 0xFF_00_00_00_00_00_00_00, + 0xAC_00_00_00_00_00_00_00, + ); + let mask = 0xFF_FF_00_00; + let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices); + let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24; + + assert_eq!(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_bitshuffle_epi64_mask() { + let test_indices = _mm_set_epi8( + 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, + ); + let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00); + let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices); + let reference_result = 0xFF << 0 | 0xAC << 8; + + assert_eq!(actual_result, reference_result); + } + + #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_mask_bitshuffle_epi64_mask() { + let test_indices = _mm_set_epi8( + 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, + ); + let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00); + let mask = 0xFF_00; + let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices); + let reference_result = 0x00 << 0 | 0xAC << 8; + + assert_eq!(actual_result, reference_result); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs new file mode 100644 index 000000000000..8139b8cd6f3c --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs @@ -0,0 +1,21108 @@ +use crate::{ + core_arch::{simd::*, x86::*}, + intrinsics::simd::*, + ptr, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsw))] +pub fn _mm512_abs_epi16(a: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let cmp: i16x32 = simd_gt(a, i16x32::ZERO); + transmute(simd_select(cmp, a, simd_neg(a))) + } +} + +/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsw))] +pub fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, abs, src.as_i16x32())) + } +} + +/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsw))] +pub fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, abs, i16x32::ZERO)) + } +} + +/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsw))] +pub fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, abs, src.as_i16x16())) + } +} + +/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsw))] +pub fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, abs, i16x16::ZERO)) + } +} + +/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsw))] +pub fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, abs, src.as_i16x8())) + } +} + +/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsw))] +pub fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, abs, i16x8::ZERO)) + } +} + +/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsb))] +pub fn _mm512_abs_epi8(a: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let cmp: i8x64 = simd_gt(a, i8x64::ZERO); + transmute(simd_select(cmp, a, simd_neg(a))) + } +} + +/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsb))] +pub fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, abs, src.as_i8x64())) + } +} + +/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsb))] +pub fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, abs, i8x64::ZERO)) + } +} + +/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsb))] +pub fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, abs, src.as_i8x32())) + } +} + +/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsb))] +pub fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, abs, i8x32::ZERO)) + } +} + +/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsb))] +pub fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, abs, src.as_i8x16())) + } +} + +/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsb))] +pub fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, abs, i8x16::ZERO)) + } +} + +/// Add packed 16-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddw))] +pub fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) } +} + +/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddw))] +pub fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, add, src.as_i16x32())) + } +} + +/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddw))] +pub fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, add, i16x32::ZERO)) + } +} + +/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddw))] +pub fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, add, src.as_i16x16())) + } +} + +/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddw))] +pub fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, add, i16x16::ZERO)) + } +} + +/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddw))] +pub fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, add, src.as_i16x8())) + } +} + +/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddw))] +pub fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, add, i16x8::ZERO)) + } +} + +/// Add packed 8-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddb))] +pub fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) } +} + +/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddb))] +pub fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, add, src.as_i8x64())) + } +} + +/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddb))] +pub fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, add, i8x64::ZERO)) + } +} + +/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddb))] +pub fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, add, src.as_i8x32())) + } +} + +/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddb))] +pub fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, add, i8x32::ZERO)) + } +} + +/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddb))] +pub fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, add, src.as_i8x16())) + } +} + +/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddb))] +pub fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, add, i8x16::ZERO)) + } +} + +/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusw))] +pub fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) } +} + +/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusw))] +pub fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, add, src.as_u16x32())) + } +} + +/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusw))] +pub fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, add, u16x32::ZERO)) + } +} + +/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusw))] +pub fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, add, src.as_u16x16())) + } +} + +/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusw))] +pub fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, add, u16x16::ZERO)) + } +} + +/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusw))] +pub fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, add, src.as_u16x8())) + } +} + +/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusw))] +pub fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, add, u16x8::ZERO)) + } +} + +/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusb))] +pub fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) } +} + +/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusb))] +pub fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, add, src.as_u8x64())) + } +} + +/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusb))] +pub fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, add, u8x64::ZERO)) + } +} + +/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusb))] +pub fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, add, src.as_u8x32())) + } +} + +/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusb))] +pub fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, add, u8x32::ZERO)) + } +} + +/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusb))] +pub fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, add, src.as_u8x16())) + } +} + +/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddusb))] +pub fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, add, u8x16::ZERO)) + } +} + +/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsw))] +pub fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) } +} + +/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsw))] +pub fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, add, src.as_i16x32())) + } +} + +/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsw))] +pub fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, add, i16x32::ZERO)) + } +} + +/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsw))] +pub fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, add, src.as_i16x16())) + } +} + +/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsw))] +pub fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, add, i16x16::ZERO)) + } +} + +/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsw))] +pub fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, add, src.as_i16x8())) + } +} + +/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsw))] +pub fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, add, i16x8::ZERO)) + } +} + +/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsb))] +pub fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) } +} + +/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsb))] +pub fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, add, src.as_i8x64())) + } +} + +/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsb))] +pub fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, add, i8x64::ZERO)) + } +} + +/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsb))] +pub fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, add, src.as_i8x32())) + } +} + +/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsb))] +pub fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, add, i8x32::ZERO)) + } +} + +/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsb))] +pub fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, add, src.as_i8x16())) + } +} + +/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddsb))] +pub fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, add, i8x16::ZERO)) + } +} + +/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubw))] +pub fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) } +} + +/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubw))] +pub fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, sub, src.as_i16x32())) + } +} + +/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubw))] +pub fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, sub, i16x32::ZERO)) + } +} + +/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubw))] +pub fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, sub, src.as_i16x16())) + } +} + +/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubw))] +pub fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, sub, i16x16::ZERO)) + } +} + +/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubw))] +pub fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, sub, src.as_i16x8())) + } +} + +/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubw))] +pub fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, sub, i16x8::ZERO)) + } +} + +/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubb))] +pub fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) } +} + +/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubb))] +pub fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, sub, src.as_i8x64())) + } +} + +/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubb))] +pub fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, sub, i8x64::ZERO)) + } +} + +/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubb))] +pub fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, sub, src.as_i8x32())) + } +} + +/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubb))] +pub fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, sub, i8x32::ZERO)) + } +} + +/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubb))] +pub fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, sub, src.as_i8x16())) + } +} + +/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubb))] +pub fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, sub, i8x16::ZERO)) + } +} + +/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusw))] +pub fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) } +} + +/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusw))] +pub fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, sub, src.as_u16x32())) + } +} + +/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusw))] +pub fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, sub, u16x32::ZERO)) + } +} + +/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusw))] +pub fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, sub, src.as_u16x16())) + } +} + +/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusw))] +pub fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, sub, u16x16::ZERO)) + } +} + +/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusw))] +pub fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, sub, src.as_u16x8())) + } +} + +/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusw))] +pub fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, sub, u16x8::ZERO)) + } +} + +/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusb))] +pub fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) } +} + +/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusb))] +pub fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, sub, src.as_u8x64())) + } +} + +/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusb))] +pub fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, sub, u8x64::ZERO)) + } +} + +/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusb))] +pub fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, sub, src.as_u8x32())) + } +} + +/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusb))] +pub fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, sub, u8x32::ZERO)) + } +} + +/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusb))] +pub fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, sub, src.as_u8x16())) + } +} + +/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubusb))] +pub fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, sub, u8x16::ZERO)) + } +} + +/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsw))] +pub fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) } +} + +/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsw))] +pub fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, sub, src.as_i16x32())) + } +} + +/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsw))] +pub fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, sub, i16x32::ZERO)) + } +} + +/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsw))] +pub fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, sub, src.as_i16x16())) + } +} + +/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsw))] +pub fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, sub, i16x16::ZERO)) + } +} + +/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsw))] +pub fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, sub, src.as_i16x8())) + } +} + +/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsw))] +pub fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, sub, i16x8::ZERO)) + } +} + +/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsb))] +pub fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) } +} + +/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsb))] +pub fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, sub, src.as_i8x64())) + } +} + +/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsb))] +pub fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, sub, i8x64::ZERO)) + } +} + +/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsb))] +pub fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, sub, src.as_i8x32())) + } +} + +/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsb))] +pub fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, sub, i8x32::ZERO)) + } +} + +/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsb))] +pub fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, sub, src.as_i8x16())) + } +} + +/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubsb))] +pub fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, sub, i8x16::ZERO)) + } +} + +/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhuw))] +pub fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, u32x32>(a.as_u16x32()); + let b = simd_cast::<_, u32x32>(b.as_u16x32()); + let r = simd_shr(simd_mul(a, b), u32x32::splat(16)); + transmute(simd_cast::(r)) + } +} + +/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhuw))] +pub fn _mm512_mask_mulhi_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhi_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, mul, src.as_u16x32())) + } +} + +/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhuw))] +pub fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhi_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, mul, u16x32::ZERO)) + } +} + +/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhuw))] +pub fn _mm256_mask_mulhi_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhi_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, mul, src.as_u16x16())) + } +} + +/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhuw))] +pub fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhi_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, mul, u16x16::ZERO)) + } +} + +/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhuw))] +pub fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhi_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, mul, src.as_u16x8())) + } +} + +/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhuw))] +pub fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhi_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, mul, u16x8::ZERO)) + } +} + +/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhw))] +pub fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, i32x32>(a.as_i16x32()); + let b = simd_cast::<_, i32x32>(b.as_i16x32()); + let r = simd_shr(simd_mul(a, b), i32x32::splat(16)); + transmute(simd_cast::(r)) + } +} + +/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhw))] +pub fn _mm512_mask_mulhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhi_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, src.as_i16x32())) + } +} + +/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhw))] +pub fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhi_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) + } +} + +/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhw))] +pub fn _mm256_mask_mulhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhi_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, src.as_i16x16())) + } +} + +/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhw))] +pub fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhi_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) + } +} + +/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhw))] +pub fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhi_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, src.as_i16x8())) + } +} + +/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhw))] +pub fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhi_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhrsw))] +pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhrsw))] +pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhrs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, src.as_i16x32())) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhrsw))] +pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhrs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhrsw))] +pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhrs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, src.as_i16x16())) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhrsw))] +pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhrs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhrsw))] +pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhrs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, src.as_i16x8())) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulhrsw))] +pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhrs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) + } +} + +/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmullw))] +pub fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) } +} + +/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmullw))] +pub fn _mm512_mask_mullo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullo_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, src.as_i16x32())) + } +} + +/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmullw))] +pub fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullo_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) + } +} + +/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmullw))] +pub fn _mm256_mask_mullo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mullo_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, src.as_i16x16())) + } +} + +/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmullw))] +pub fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mullo_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) + } +} + +/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmullw))] +pub fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mullo_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, src.as_i16x8())) + } +} + +/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmullw))] +pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mullo_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuw))] +pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u16x32(); + let b = b.as_u16x32(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuw))] +pub fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, max, src.as_u16x32())) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuw))] +pub fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, max, u16x32::ZERO)) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuw))] +pub fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, max, src.as_u16x16())) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuw))] +pub fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, max, u16x16::ZERO)) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuw))] +pub fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, max, src.as_u16x8())) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuw))] +pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, max, u16x8::ZERO)) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxub))] +pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u8x64(); + let b = b.as_u8x64(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxub))] +pub fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, max, src.as_u8x64())) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxub))] +pub fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, max, u8x64::ZERO)) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxub))] +pub fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, max, src.as_u8x32())) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxub))] +pub fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, max, u8x32::ZERO)) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxub))] +pub fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, max, src.as_u8x16())) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxub))] +pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, max, u8x16::ZERO)) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsw))] +pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let b = b.as_i16x32(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsw))] +pub fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, max, src.as_i16x32())) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsw))] +pub fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, max, i16x32::ZERO)) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsw))] +pub fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, max, src.as_i16x16())) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsw))] +pub fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, max, i16x16::ZERO)) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsw))] +pub fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, max, src.as_i16x8())) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsw))] +pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, max, i16x8::ZERO)) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsb))] +pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let b = b.as_i8x64(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsb))] +pub fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, max, src.as_i8x64())) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsb))] +pub fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, max, i8x64::ZERO)) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsb))] +pub fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, max, src.as_i8x32())) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsb))] +pub fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, max, i8x32::ZERO)) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsb))] +pub fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, max, src.as_i8x16())) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsb))] +pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, max, i8x16::ZERO)) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuw))] +pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u16x32(); + let b = b.as_u16x32(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuw))] +pub fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, min, src.as_u16x32())) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuw))] +pub fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, min, u16x32::ZERO)) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuw))] +pub fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, min, src.as_u16x16())) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuw))] +pub fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, min, u16x16::ZERO)) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuw))] +pub fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, min, src.as_u16x8())) + } +} + +/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuw))] +pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, min, u16x8::ZERO)) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminub))] +pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u8x64(); + let b = b.as_u8x64(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminub))] +pub fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, min, src.as_u8x64())) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminub))] +pub fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, min, u8x64::ZERO)) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminub))] +pub fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, min, src.as_u8x32())) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminub))] +pub fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, min, u8x32::ZERO)) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminub))] +pub fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, min, src.as_u8x16())) + } +} + +/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminub))] +pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, min, u8x16::ZERO)) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsw))] +pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let b = b.as_i16x32(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsw))] +pub fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, min, src.as_i16x32())) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsw))] +pub fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, min, i16x32::ZERO)) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsw))] +pub fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, min, src.as_i16x16())) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsw))] +pub fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, min, i16x16::ZERO)) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsw))] +pub fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, min, src.as_i16x8())) + } +} + +/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsw))] +pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, min, i16x8::ZERO)) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsb))] +pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let b = b.as_i8x64(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsb))] +pub fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, min, src.as_i8x64())) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsb))] +pub fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, min, i8x64::ZERO)) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsb))] +pub fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, min, src.as_i8x32())) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsb))] +pub fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, min, i8x32::ZERO)) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsb))] +pub fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, min, src.as_i8x16())) + } +} + +/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsb))] +pub fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, min, i8x16::ZERO)) + } +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_lt(a.as_u16x32(), b.as_u16x32())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_u16x16(), b.as_u16x16())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_u16x8(), b.as_u16x8())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_lt(a.as_u8x64(), b.as_u8x64())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_lt(a.as_u8x32(), b.as_u8x32())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_u8x16(), b.as_u8x16())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_lt(a.as_i16x32(), b.as_i16x32())) } +} + +/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_i16x16(), b.as_i16x16())) } +} + +/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_lt(a.as_i8x64(), b.as_i8x64())) } +} + +/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_lt(a.as_i8x32(), b.as_i8x32())) } +} + +/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_i8x16(), b.as_i8x16())) } +} + +/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_gt(a.as_u16x32(), b.as_u16x32())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_u16x16(), b.as_u16x16())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_u16x8(), b.as_u16x8())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_gt(a.as_u8x64(), b.as_u8x64())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_gt(a.as_u8x32(), b.as_u8x32())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_u8x16(), b.as_u8x16())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_gt(a.as_i16x32(), b.as_i16x32())) } +} + +/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_i16x16(), b.as_i16x16())) } +} + +/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_gt(a.as_i8x64(), b.as_i8x64())) } +} + +/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_gt(a.as_i8x32(), b.as_i8x32())) } +} + +/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_i8x16(), b.as_i8x16())) } +} + +/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_le(a.as_u16x32(), b.as_u16x32())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_u16x16(), b.as_u16x16())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_u16x8(), b.as_u16x8())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_le(a.as_u8x64(), b.as_u8x64())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_le(a.as_u8x32(), b.as_u8x32())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_u8x16(), b.as_u8x16())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_le(a.as_i16x32(), b.as_i16x32())) } +} + +/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_i16x16(), b.as_i16x16())) } +} + +/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_le(a.as_i8x64(), b.as_i8x64())) } +} + +/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_le(a.as_i8x32(), b.as_i8x32())) } +} + +/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_i8x16(), b.as_i8x16())) } +} + +/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ge(a.as_u16x32(), b.as_u16x32())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_u16x16(), b.as_u16x16())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_u16x8(), b.as_u16x8())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_ge(a.as_u8x64(), b.as_u8x64())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ge(a.as_u8x32(), b.as_u8x32())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_u8x16(), b.as_u8x16())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ge(a.as_i16x32(), b.as_i16x32())) } +} + +/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_i16x16(), b.as_i16x16())) } +} + +/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_ge(a.as_i8x64(), b.as_i8x64())) } +} + +/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ge(a.as_i8x32(), b.as_i8x32())) } +} + +/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_i8x16(), b.as_i8x16())) } +} + +/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_eq(a.as_u16x32(), b.as_u16x32())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_u16x16(), b.as_u16x16())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_u16x8(), b.as_u16x8())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_eq(a.as_u8x64(), b.as_u8x64())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_eq(a.as_u8x32(), b.as_u8x32())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_u8x16(), b.as_u8x16())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_eq(a.as_i16x32(), b.as_i16x32())) } +} + +/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_i16x16(), b.as_i16x16())) } +} + +/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_eq(a.as_i8x64(), b.as_i8x64())) } +} + +/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_eq(a.as_i8x32(), b.as_i8x32())) } +} + +/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_i8x16(), b.as_i8x16())) } +} + +/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ne(a.as_u16x32(), b.as_u16x32())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_u16x16(), b.as_u16x16())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_u16x8(), b.as_u16x8())) } +} + +/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_ne(a.as_u8x64(), b.as_u8x64())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ne(a.as_u8x32(), b.as_u8x32())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_u8x16(), b.as_u8x16())) } +} + +/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ne(a.as_i16x32(), b.as_i16x32())) } +} + +/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_i16x16(), b.as_i16x16())) } +} + +/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_ne(a.as_i8x64(), b.as_i8x64())) } +} + +/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ne(a.as_i8x32(), b.as_i8x32())) } +} + +/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_i8x16(), b.as_i8x16())) } +} + +/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x32(); + let b = b.as_u16x32(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x32::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x32::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm512_mask_cmp_epu16_mask( + k1: __mmask32, + a: __m512i, + b: __m512i, +) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x32(); + let b = b.as_u16x32(); + let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x32::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm256_cmp_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x16(); + let b = b.as_u16x16(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x16::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm256_mask_cmp_epu16_mask( + k1: __mmask16, + a: __m256i, + b: __m256i, +) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x16(); + let b = b.as_u16x16(); + let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm_cmp_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x8(); + let b = b.as_u16x8(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x8::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm_mask_cmp_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x8(); + let b = b.as_u16x8(); + let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x64::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x64::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm512_mask_cmp_epu8_mask( + k1: __mmask64, + a: __m512i, + b: __m512i, +) -> __mmask64 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x64::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm256_cmp_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x32::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x32::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm256_mask_cmp_epu8_mask( + k1: __mmask32, + a: __m256i, + b: __m256i, +) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x32::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm_cmp_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x16::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm_mask_cmp_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x32(); + let b = b.as_i16x32(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x32::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x32::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm512_mask_cmp_epi16_mask( + k1: __mmask32, + a: __m512i, + b: __m512i, +) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x32(); + let b = b.as_i16x32(); + let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x32::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm256_cmp_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x16(); + let b = b.as_i16x16(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x16::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm256_mask_cmp_epi16_mask( + k1: __mmask16, + a: __m256i, + b: __m256i, +) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x16(); + let b = b.as_i16x16(); + let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm_cmp_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x8(); + let b = b.as_i16x8(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x8::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm_mask_cmp_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x8(); + let b = b.as_i16x8(); + let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x64(); + let b = b.as_i8x64(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x64::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x64::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm512_mask_cmp_epi8_mask( + k1: __mmask64, + a: __m512i, + b: __m512i, +) -> __mmask64 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x64(); + let b = b.as_i8x64(); + let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x64::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm256_cmp_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x32(); + let b = b.as_i8x32(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x32::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x32::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm256_mask_cmp_epi8_mask( + k1: __mmask32, + a: __m256i, + b: __m256i, +) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x32(); + let b = b.as_i8x32(); + let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x32::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm_cmp_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x16(); + let b = b.as_i8x16(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x16::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub fn _mm_mask_cmp_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x16(); + let b = b.as_i8x16(); + let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_add_unordered(a.as_i16x16()) } +} + +/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) } +} + +/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_add_unordered(a.as_i16x8()) } +} + +/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) } +} + +/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_add_unordered(a.as_i8x32()) } +} + +/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) } +} + +/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_add_unordered(a.as_i8x16()) } +} + +/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) } +} + +/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_and_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_and(a.as_i16x16()) } +} + +/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { + simd_reduce_and(simd_select_bitmask( + k, + a.as_i16x16(), + _mm256_set1_epi64x(-1).as_i16x16(), + )) + } +} + +/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_and_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_and(a.as_i16x8()) } +} + +/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { + simd_reduce_and(simd_select_bitmask( + k, + a.as_i16x8(), + _mm_set1_epi64x(-1).as_i16x8(), + )) + } +} + +/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_and_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_and(a.as_i8x32()) } +} + +/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { + simd_reduce_and(simd_select_bitmask( + k, + a.as_i8x32(), + _mm256_set1_epi64x(-1).as_i8x32(), + )) + } +} + +/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_and_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_and(a.as_i8x16()) } +} + +/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { + simd_reduce_and(simd_select_bitmask( + k, + a.as_i8x16(), + _mm_set1_epi64x(-1).as_i8x16(), + )) + } +} + +/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_max_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_max(a.as_i16x16()) } +} + +/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) } +} + +/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_max_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_max(a.as_i16x8()) } +} + +/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) } +} + +/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_max_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_max(a.as_i8x32()) } +} + +/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) } +} + +/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_max_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_max(a.as_i8x16()) } +} + +/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) } +} + +/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_max_epu16(a: __m256i) -> u16 { + unsafe { simd_reduce_max(a.as_u16x16()) } +} + +/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) } +} + +/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_max_epu16(a: __m128i) -> u16 { + unsafe { simd_reduce_max(a.as_u16x8()) } +} + +/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) } +} + +/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_max_epu8(a: __m256i) -> u8 { + unsafe { simd_reduce_max(a.as_u8x32()) } +} + +/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) } +} + +/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_max_epu8(a: __m128i) -> u8 { + unsafe { simd_reduce_max(a.as_u8x16()) } +} + +/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) } +} + +/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_min_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_min(a.as_i16x16()) } +} + +/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) } +} + +/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_min_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_min(a.as_i16x8()) } +} + +/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) } +} + +/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_min_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_min(a.as_i8x32()) } +} + +/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) } +} + +/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_min_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_min(a.as_i8x16()) } +} + +/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) } +} + +/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_min_epu16(a: __m256i) -> u16 { + unsafe { simd_reduce_min(a.as_u16x16()) } +} + +/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) } +} + +/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_min_epu16(a: __m128i) -> u16 { + unsafe { simd_reduce_min(a.as_u16x8()) } +} + +/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) } +} + +/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_min_epu8(a: __m256i) -> u8 { + unsafe { simd_reduce_min(a.as_u8x32()) } +} + +/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) } +} + +/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_min_epu8(a: __m128i) -> u8 { + unsafe { simd_reduce_min(a.as_u8x16()) } +} + +/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) } +} + +/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_mul_unordered(a.as_i16x16()) } +} + +/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) } +} + +/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_mul_unordered(a.as_i16x8()) } +} + +/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) } +} + +/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_mul_unordered(a.as_i8x32()) } +} + +/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) } +} + +/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_mul_unordered(a.as_i8x16()) } +} + +/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) } +} + +/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_or_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_or(a.as_i16x16()) } +} + +/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) } +} + +/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_or_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_or(a.as_i16x8()) } +} + +/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) } +} + +/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_or_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_or(a.as_i8x32()) } +} + +/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) } +} + +/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_or_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_or(a.as_i8x16()) } +} + +/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) } +} + +/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 +pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i { + ptr::read_unaligned(mem_addr as *const __m512i) +} + +/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 +pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i { + ptr::read_unaligned(mem_addr as *const __m256i) +} + +/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 +pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i { + ptr::read_unaligned(mem_addr as *const __m128i) +} + +/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 +pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i { + ptr::read_unaligned(mem_addr as *const __m512i) +} + +/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 +pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i { + ptr::read_unaligned(mem_addr as *const __m256i) +} + +/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 +pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i { + ptr::read_unaligned(mem_addr as *const __m128i) +} + +/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 +pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) { + ptr::write_unaligned(mem_addr as *mut __m512i, a); +} + +/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 +pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) { + ptr::write_unaligned(mem_addr as *mut __m256i, a); +} + +/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 +pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) { + ptr::write_unaligned(mem_addr as *mut __m128i, a); +} + +/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 +pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) { + ptr::write_unaligned(mem_addr as *mut __m512i, a); +} + +/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 +pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) { + ptr::write_unaligned(mem_addr as *mut __m256i, a); +} + +/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 +pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) { + ptr::write_unaligned(mem_addr as *mut __m128i, a); +} + +/// Load packed 16-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16) +#[inline] +#[target_feature(enable = "avx512bw")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i { + transmute(loaddqu16_512(mem_addr, src.as_i16x32(), k)) +} + +/// Load packed 16-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16) +#[inline] +#[target_feature(enable = "avx512bw")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i { + _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load packed 8-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i { + transmute(loaddqu8_512(mem_addr, src.as_i8x64(), k)) +} + +/// Load packed 8-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i { + _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load packed 16-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i { + transmute(loaddqu16_256(mem_addr, src.as_i16x16(), k)) +} + +/// Load packed 16-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i { + _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load packed 8-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i { + transmute(loaddqu8_256(mem_addr, src.as_i8x32(), k)) +} + +/// Load packed 8-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i { + _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load packed 16-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i { + transmute(loaddqu16_128(mem_addr, src.as_i16x8(), k)) +} + +/// Load packed 16-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i { + _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr) +} + +/// Load packed 8-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i { + transmute(loaddqu8_128(mem_addr, src.as_i8x16(), k)) +} + +/// Load packed 8-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i { + _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr) +} + +/// Store packed 16-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16) +#[inline] +#[target_feature(enable = "avx512bw")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) { + storedqu16_512(mem_addr, a.as_i16x32(), mask) +} + +/// Store packed 8-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) { + storedqu8_512(mem_addr, a.as_i8x64(), mask) +} + +/// Store packed 16-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) { + storedqu16_256(mem_addr, a.as_i16x16(), mask) +} + +/// Store packed 8-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) { + storedqu8_256(mem_addr, a.as_i8x32(), mask) +} + +/// Store packed 16-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) { + storedqu16_128(mem_addr, a.as_i16x8(), mask) +} + +/// Store packed 8-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) { + storedqu8_128(mem_addr, a.as_i8x16(), mask) +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddwd))] +pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddwd))] +pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let madd = _mm512_madd_epi16(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, madd, src.as_i32x16())) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddwd))] +pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let madd = _mm512_madd_epi16(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, madd, i32x16::ZERO)) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddwd))] +pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let madd = _mm256_madd_epi16(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, madd, src.as_i32x8())) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddwd))] +pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let madd = _mm256_madd_epi16(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, madd, i32x8::ZERO)) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddwd))] +pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let madd = _mm_madd_epi16(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, madd, src.as_i32x4())) + } +} + +/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddwd))] +pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let madd = _mm_madd_epi16(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, madd, i32x4::ZERO)) + } +} + +/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddubsw))] +pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) } +} + +/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddubsw))] +pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let madd = _mm512_maddubs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, madd, src.as_i16x32())) + } +} + +/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddubsw))] +pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let madd = _mm512_maddubs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, madd, i16x32::ZERO)) + } +} + +/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddubsw))] +pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let madd = _mm256_maddubs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, madd, src.as_i16x16())) + } +} + +/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddubsw))] +pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let madd = _mm256_maddubs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, madd, i16x16::ZERO)) + } +} + +/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddubsw))] +pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let madd = _mm_maddubs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, madd, src.as_i16x8())) + } +} + +/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaddubsw))] +pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let madd = _mm_maddubs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, madd, i16x8::ZERO)) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackssdw))] +pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackssdw))] +pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packs_epi32(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, pack, src.as_i16x32())) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackssdw))] +pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packs_epi32(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackssdw))] +pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packs_epi32(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, pack, src.as_i16x16())) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackssdw))] +pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packs_epi32(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, pack, i16x16::ZERO)) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackssdw))] +pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packs_epi32(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, pack, src.as_i16x8())) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackssdw))] +pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packs_epi32(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpacksswb))] +pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpacksswb))] +pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packs_epi16(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, pack, src.as_i8x64())) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpacksswb))] +pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packs_epi16(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpacksswb))] +pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packs_epi16(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, pack, src.as_i8x32())) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpacksswb))] +pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packs_epi16(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpacksswb))] +pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packs_epi16(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, pack, src.as_i8x16())) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpacksswb))] +pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packs_epi16(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackusdw))] +pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackusdw))] +pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packus_epi32(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, pack, src.as_i16x32())) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackusdw))] +pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packus_epi32(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackusdw))] +pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packus_epi32(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, pack, src.as_i16x16())) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackusdw))] +pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packus_epi32(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, pack, i16x16::ZERO)) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackusdw))] +pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packus_epi32(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, pack, src.as_i16x8())) + } +} + +/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackusdw))] +pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packus_epi32(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackuswb))] +pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackuswb))] +pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packus_epi16(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, pack, src.as_i8x64())) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackuswb))] +pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packus_epi16(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackuswb))] +pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packus_epi16(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, pack, src.as_i8x32())) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackuswb))] +pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packus_epi16(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackuswb))] +pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packus_epi16(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, pack, src.as_i8x16())) + } +} + +/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpackuswb))] +pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packus_epi16(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) + } +} + +/// Average packed unsigned 16-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgw))] +pub fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, u32x32>(a.as_u16x32()); + let b = simd_cast::<_, u32x32>(b.as_u16x32()); + let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1)); + transmute(simd_cast::<_, u16x32>(r)) + } +} + +/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgw))] +pub fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let avg = _mm512_avg_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, avg, src.as_u16x32())) + } +} + +/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgw))] +pub fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let avg = _mm512_avg_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, avg, u16x32::ZERO)) + } +} + +/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgw))] +pub fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let avg = _mm256_avg_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, avg, src.as_u16x16())) + } +} + +/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgw))] +pub fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let avg = _mm256_avg_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, avg, u16x16::ZERO)) + } +} + +/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgw))] +pub fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let avg = _mm_avg_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, avg, src.as_u16x8())) + } +} + +/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgw))] +pub fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let avg = _mm_avg_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, avg, u16x8::ZERO)) + } +} + +/// Average packed unsigned 8-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgb))] +pub fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, u16x64>(a.as_u8x64()); + let b = simd_cast::<_, u16x64>(b.as_u8x64()); + let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1)); + transmute(simd_cast::<_, u8x64>(r)) + } +} + +/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgb))] +pub fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let avg = _mm512_avg_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, avg, src.as_u8x64())) + } +} + +/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgb))] +pub fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let avg = _mm512_avg_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, avg, u8x64::ZERO)) + } +} + +/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgb))] +pub fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let avg = _mm256_avg_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, avg, src.as_u8x32())) + } +} + +/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgb))] +pub fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let avg = _mm256_avg_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, avg, u8x32::ZERO)) + } +} + +/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgb))] +pub fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let avg = _mm_avg_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, avg, src.as_u8x16())) + } +} + +/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpavgb))] +pub fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let avg = _mm_avg_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, avg, u8x16::ZERO)) + } +} + +/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw))] +pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) } +} + +/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw))] +pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } +} + +/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw))] +pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw))] +pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } +} + +/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw))] +pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw))] +pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } +} + +/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw))] +pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_slli_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm512_setzero_si512() + } else { + transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))) + } + } +} + +/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_slli_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 16 { + u16x32::ZERO + } else { + simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u16x32())) + } +} + +/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_slli_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm512_setzero_si512() + } else { + let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)); + transmute(simd_select_bitmask(k, shf, u16x32::ZERO)) + } + } +} + +/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_slli_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 16 { + u16x16::ZERO + } else { + simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u16x16())) + } +} + +/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_slli_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm256_setzero_si256() + } else { + let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)); + transmute(simd_select_bitmask(k, shf, u16x16::ZERO)) + } + } +} + +/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_slli_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 16 { + u16x8::ZERO + } else { + simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u16x8())) + } +} + +/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_slli_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm_setzero_si128() + } else { + let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)); + transmute(simd_select_bitmask(k, shf, u16x8::ZERO)) + } + } +} + +/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvw))] +pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) } +} + +/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvw))] +pub fn _mm512_mask_sllv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } +} + +/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvw))] +pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvw))] +pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) } +} + +/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvw))] +pub fn _mm256_mask_sllv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } +} + +/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvw))] +pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvw))] +pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) } +} + +/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvw))] +pub fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } +} + +/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvw))] +pub fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw))] +pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) } +} + +/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw))] +pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw))] +pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw))] +pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw))] +pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw))] +pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw))] +pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_srli_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm512_setzero_si512() + } else { + transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))) + } + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_srli_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 16 { + u16x32::ZERO + } else { + simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u16x32())) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + //imm8 should be u32, it seems the document to verify is incorrect + if IMM8 >= 16 { + _mm512_setzero_si512() + } else { + let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)); + transmute(simd_select_bitmask(k, shf, u16x32::ZERO)) + } + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_srli_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_srli_epi16::(a); + transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16())) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_srli_epi16::(a); + transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_srli_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_srli_epi16::(a); + transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8())) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_srli_epi16::(a); + transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvw))] +pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvw))] +pub fn _mm512_mask_srlv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvw))] +pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvw))] +pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvw))] +pub fn _mm256_mask_srlv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvw))] +pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvw))] +pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvw))] +pub fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvw))] +pub fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw))] +pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) } +} + +/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw))] +pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw))] +pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw))] +pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw))] +pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw))] +pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } +} + +/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw))] +pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_srai_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16))) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_srai_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_srai_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, r, src.as_i16x16())) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, r, i16x16::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, r, src.as_i16x8())) + } +} + +/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, r, i16x8::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravw))] +pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravw))] +pub fn _mm512_mask_srav_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravw))] +pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravw))] +pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravw))] +pub fn _mm256_mask_srav_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravw))] +pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravw))] +pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravw))] +pub fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } +} + +/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravw))] +pub fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w +pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2w))] +pub fn _mm512_mask_permutex2var_epi16( + a: __m512i, + k: __mmask32, + idx: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); + transmute(simd_select_bitmask(k, permute, a.as_i16x32())) + } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w +pub fn _mm512_maskz_permutex2var_epi16( + k: __mmask32, + a: __m512i, + idx: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); + transmute(simd_select_bitmask(k, permute, i16x32::ZERO)) + } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2w))] +pub fn _mm512_mask2_permutex2var_epi16( + a: __m512i, + idx: __m512i, + k: __mmask32, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); + transmute(simd_select_bitmask(k, permute, idx.as_i16x32())) + } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w +pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2w))] +pub fn _mm256_mask_permutex2var_epi16( + a: __m256i, + k: __mmask16, + idx: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); + transmute(simd_select_bitmask(k, permute, a.as_i16x16())) + } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w +pub fn _mm256_maskz_permutex2var_epi16( + k: __mmask16, + a: __m256i, + idx: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); + transmute(simd_select_bitmask(k, permute, i16x16::ZERO)) + } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2w))] +pub fn _mm256_mask2_permutex2var_epi16( + a: __m256i, + idx: __m256i, + k: __mmask16, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); + transmute(simd_select_bitmask(k, permute, idx.as_i16x16())) + } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w +pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2w))] +pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); + transmute(simd_select_bitmask(k, permute, a.as_i16x8())) + } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w +pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); + transmute(simd_select_bitmask(k, permute, i16x8::ZERO)) + } +} + +/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2w))] +pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); + transmute(simd_select_bitmask(k, permute, idx.as_i16x8())) + } +} + +/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermw))] +pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) } +} + +/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermw))] +pub fn _mm512_mask_permutexvar_epi16( + src: __m512i, + k: __mmask32, + idx: __m512i, + a: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32(); + transmute(simd_select_bitmask(k, permute, src.as_i16x32())) + } +} + +/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermw))] +pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32(); + transmute(simd_select_bitmask(k, permute, i16x32::ZERO)) + } +} + +/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermw))] +pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i { + unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) } +} + +/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermw))] +pub fn _mm256_mask_permutexvar_epi16( + src: __m256i, + k: __mmask16, + idx: __m256i, + a: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16(); + transmute(simd_select_bitmask(k, permute, src.as_i16x16())) + } +} + +/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermw))] +pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16(); + transmute(simd_select_bitmask(k, permute, i16x16::ZERO)) + } +} + +/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermw))] +pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i { + unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) } +} + +/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermw))] +pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutexvar_epi16(idx, a).as_i16x8(); + transmute(simd_select_bitmask(k, permute, src.as_i16x8())) + } +} + +/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermw))] +pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutexvar_epi16(idx, a).as_i16x8(); + transmute(simd_select_bitmask(k, permute, i16x8::ZERO)) + } +} + +/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw +pub fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) } +} + +/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw +pub fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) } +} + +/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw +pub fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) } +} + +/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb +pub fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) } +} + +/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb +pub fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) } +} + +/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb +pub fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i { + unsafe { + let a = _mm512_castsi128_si512(a).as_i16x32(); + let ret: i16x32 = simd_shuffle!( + a, + a, + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + ], + ); + transmute(ret) + } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastw_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, broadcast, src.as_i16x32())) + } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastw_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO)) + } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastw_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_i16x16())) + } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastw_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO)) + } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastw_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i16x8())) + } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastw_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO)) + } +} + +/// Broadcast the low packed 8-bit integer from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastb))] +pub fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i { + unsafe { + let a = _mm512_castsi128_si512(a).as_i8x64(); + let ret: i8x64 = simd_shuffle!( + a, + a, + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + ); + transmute(ret) + } +} + +/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastb))] +pub fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastb_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, broadcast, src.as_i8x64())) + } +} + +/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastb))] +pub fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastb_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO)) + } +} + +/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastb))] +pub fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastb_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, broadcast, src.as_i8x32())) + } +} + +/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastb))] +pub fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastb_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO)) + } +} + +/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastb))] +pub fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastb_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_i8x16())) + } +} + +/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastb))] +pub fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastb_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO)) + } +} + +/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhwd))] +pub fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let b = b.as_i16x32(); + #[rustfmt::skip] + let r: i16x32 = simd_shuffle!( + a, + b, + [ + 4, 32 + 4, 5, 32 + 5, + 6, 32 + 6, 7, 32 + 7, + 12, 32 + 12, 13, 32 + 13, + 14, 32 + 14, 15, 32 + 15, + 20, 32 + 20, 21, 32 + 21, + 22, 32 + 22, 23, 32 + 23, + 28, 32 + 28, 29, 32 + 29, + 30, 32 + 30, 31, 32 + 31, + ], + ); + transmute(r) + } +} + +/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhwd))] +pub fn _mm512_mask_unpackhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32())) + } +} + +/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhwd))] +pub fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO)) + } +} + +/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhwd))] +pub fn _mm256_mask_unpackhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16())) + } +} + +/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhwd))] +pub fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO)) + } +} + +/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhwd))] +pub fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8())) + } +} + +/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhwd))] +pub fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO)) + } +} + +/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhbw))] +pub fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let b = b.as_i8x64(); + #[rustfmt::skip] + let r: i8x64 = simd_shuffle!( + a, + b, + [ + 8, 64 + 8, 9, 64 + 9, + 10, 64 + 10, 11, 64 + 11, + 12, 64 + 12, 13, 64 + 13, + 14, 64 + 14, 15, 64 + 15, + 24, 64 + 24, 25, 64 + 25, + 26, 64 + 26, 27, 64 + 27, + 28, 64 + 28, 29, 64 + 29, + 30, 64 + 30, 31, 64 + 31, + 40, 64 + 40, 41, 64 + 41, + 42, 64 + 42, 43, 64 + 43, + 44, 64 + 44, 45, 64 + 45, + 46, 64 + 46, 47, 64 + 47, + 56, 64 + 56, 57, 64 + 57, + 58, 64 + 58, 59, 64 + 59, + 60, 64 + 60, 61, 64 + 61, + 62, 64 + 62, 63, 64 + 63, + ], + ); + transmute(r) + } +} + +/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhbw))] +pub fn _mm512_mask_unpackhi_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64())) + } +} + +/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhbw))] +pub fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO)) + } +} + +/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhbw))] +pub fn _mm256_mask_unpackhi_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32())) + } +} + +/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhbw))] +pub fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO)) + } +} + +/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhbw))] +pub fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16())) + } +} + +/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhbw))] +pub fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO)) + } +} + +/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklwd))] +pub fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let b = b.as_i16x32(); + #[rustfmt::skip] + let r: i16x32 = simd_shuffle!( + a, + b, + [ + 0, 32+0, 1, 32+1, + 2, 32+2, 3, 32+3, + 8, 32+8, 9, 32+9, + 10, 32+10, 11, 32+11, + 16, 32+16, 17, 32+17, + 18, 32+18, 19, 32+19, + 24, 32+24, 25, 32+25, + 26, 32+26, 27, 32+27 + ], + ); + transmute(r) + } +} + +/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklwd))] +pub fn _mm512_mask_unpacklo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32())) + } +} + +/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklwd))] +pub fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO)) + } +} + +/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklwd))] +pub fn _mm256_mask_unpacklo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16())) + } +} + +/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklwd))] +pub fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO)) + } +} + +/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklwd))] +pub fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8())) + } +} + +/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklwd))] +pub fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO)) + } +} + +/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklbw))] +pub fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let b = b.as_i8x64(); + #[rustfmt::skip] + let r: i8x64 = simd_shuffle!( + a, + b, + [ + 0, 64+0, 1, 64+1, + 2, 64+2, 3, 64+3, + 4, 64+4, 5, 64+5, + 6, 64+6, 7, 64+7, + 16, 64+16, 17, 64+17, + 18, 64+18, 19, 64+19, + 20, 64+20, 21, 64+21, + 22, 64+22, 23, 64+23, + 32, 64+32, 33, 64+33, + 34, 64+34, 35, 64+35, + 36, 64+36, 37, 64+37, + 38, 64+38, 39, 64+39, + 48, 64+48, 49, 64+49, + 50, 64+50, 51, 64+51, + 52, 64+52, 53, 64+53, + 54, 64+54, 55, 64+55, + ], + ); + transmute(r) + } +} + +/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklbw))] +pub fn _mm512_mask_unpacklo_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64())) + } +} + +/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklbw))] +pub fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO)) + } +} + +/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklbw))] +pub fn _mm256_mask_unpacklo_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32())) + } +} + +/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklbw))] +pub fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO)) + } +} + +/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklbw))] +pub fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16())) + } +} + +/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklbw))] +pub fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO)) + } +} + +/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +pub fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i16x32(); + transmute(simd_select_bitmask(k, mov, src.as_i16x32())) + } +} + +/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +pub fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i16x32(); + transmute(simd_select_bitmask(k, mov, i16x32::ZERO)) + } +} + +/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +pub fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i16x16(); + transmute(simd_select_bitmask(k, mov, src.as_i16x16())) + } +} + +/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +pub fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i16x16(); + transmute(simd_select_bitmask(k, mov, i16x16::ZERO)) + } +} + +/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +pub fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i16x8(); + transmute(simd_select_bitmask(k, mov, src.as_i16x8())) + } +} + +/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu16))] +pub fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i16x8(); + transmute(simd_select_bitmask(k, mov, i16x8::ZERO)) + } +} + +/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +pub fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i8x64(); + transmute(simd_select_bitmask(k, mov, src.as_i8x64())) + } +} + +/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +pub fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i8x64(); + transmute(simd_select_bitmask(k, mov, i8x64::ZERO)) + } +} + +/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +pub fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i8x32(); + transmute(simd_select_bitmask(k, mov, src.as_i8x32())) + } +} + +/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +pub fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i8x32(); + transmute(simd_select_bitmask(k, mov, i8x32::ZERO)) + } +} + +/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +pub fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i8x16(); + transmute(simd_select_bitmask(k, mov, src.as_i8x16())) + } +} + +/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqu8))] +pub fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i8x16(); + transmute(simd_select_bitmask(k, mov, i8x16::ZERO)) + } +} + +/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i { + unsafe { + let r = _mm512_set1_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, r, src.as_i16x32())) + } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i { + unsafe { + let r = _mm512_set1_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, r, i16x32::ZERO)) + } +} + +/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i { + unsafe { + let r = _mm256_set1_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, r, src.as_i16x16())) + } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i { + unsafe { + let r = _mm256_set1_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, r, i16x16::ZERO)) + } +} + +/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i { + unsafe { + let r = _mm_set1_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, r, src.as_i16x8())) + } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastw))] +pub fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i { + unsafe { + let r = _mm_set1_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, r, i16x8::ZERO)) + } +} + +/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] +pub fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i { + unsafe { + let r = _mm512_set1_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, r, src.as_i8x64())) + } +} + +/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] +pub fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i { + unsafe { + let r = _mm512_set1_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, r, i8x64::ZERO)) + } +} + +/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] +pub fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i { + unsafe { + let r = _mm256_set1_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, r, src.as_i8x32())) + } +} + +/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] +pub fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i { + unsafe { + let r = _mm256_set1_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, r, i8x32::ZERO)) + } +} + +/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] +pub fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i { + unsafe { + let r = _mm_set1_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, r, src.as_i8x16())) + } +} + +/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] +pub fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { + unsafe { + let r = _mm_set1_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, r, i8x16::ZERO)) + } +} + +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_shufflelo_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i16x32(); + let r: i16x32 = simd_shuffle!( + a, + a, + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + 4, + 5, + 6, + 7, + (IMM8 as u32 & 0b11) + 8, + ((IMM8 as u32 >> 2) & 0b11) + 8, + ((IMM8 as u32 >> 4) & 0b11) + 8, + ((IMM8 as u32 >> 6) & 0b11) + 8, + 12, + 13, + 14, + 15, + (IMM8 as u32 & 0b11) + 16, + ((IMM8 as u32 >> 2) & 0b11) + 16, + ((IMM8 as u32 >> 4) & 0b11) + 16, + ((IMM8 as u32 >> 6) & 0b11) + 16, + 20, + 21, + 22, + 23, + (IMM8 as u32 & 0b11) + 24, + ((IMM8 as u32 >> 2) & 0b11) + 24, + ((IMM8 as u32 >> 4) & 0b11) + 24, + ((IMM8 as u32 >> 6) & 0b11) + 24, + 28, + 29, + 30, + 31, + ], + ); + transmute(r) + } +} + +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_shufflelo_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) + } +} + +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_shufflelo_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO)) + } +} + +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_shufflelo_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm256_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) + } +} + +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_shufflelo_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm256_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO)) + } +} + +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_shufflelo_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) + } +} + +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO)) + } +} + +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_shufflehi_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i16x32(); + let r: i16x32 = simd_shuffle!( + a, + a, + [ + 0, + 1, + 2, + 3, + (IMM8 as u32 & 0b11) + 4, + ((IMM8 as u32 >> 2) & 0b11) + 4, + ((IMM8 as u32 >> 4) & 0b11) + 4, + ((IMM8 as u32 >> 6) & 0b11) + 4, + 8, + 9, + 10, + 11, + (IMM8 as u32 & 0b11) + 12, + ((IMM8 as u32 >> 2) & 0b11) + 12, + ((IMM8 as u32 >> 4) & 0b11) + 12, + ((IMM8 as u32 >> 6) & 0b11) + 12, + 16, + 17, + 18, + 19, + (IMM8 as u32 & 0b11) + 20, + ((IMM8 as u32 >> 2) & 0b11) + 20, + ((IMM8 as u32 >> 4) & 0b11) + 20, + ((IMM8 as u32 >> 6) & 0b11) + 20, + 24, + 25, + 26, + 27, + (IMM8 as u32 & 0b11) + 28, + ((IMM8 as u32 >> 2) & 0b11) + 28, + ((IMM8 as u32 >> 4) & 0b11) + 28, + ((IMM8 as u32 >> 6) & 0b11) + 28, + ], + ); + transmute(r) + } +} + +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_shufflehi_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) + } +} + +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_shufflehi_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO)) + } +} + +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_shufflehi_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm256_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) + } +} + +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_shufflehi_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm256_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO)) + } +} + +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_shufflehi_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) + } +} + +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_shufflehi_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO)) + } +} + +/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufb))] +pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) } +} + +/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufb))] +pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, shuffle, src.as_i8x64())) + } +} + +/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufb))] +pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO)) + } +} + +/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufb))] +pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, shuffle, src.as_i8x32())) + } +} + +/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufb))] +pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO)) + } +} + +/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufb))] +pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let shuffle = _mm_shuffle_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, shuffle, src.as_i8x16())) + } +} + +/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufb))] +pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let shuffle = _mm_shuffle_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO)) + } +} + +/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmw))] +pub fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + let and = _mm512_and_si512(a, b); + let zero = _mm512_setzero_si512(); + _mm512_cmpneq_epi16_mask(and, zero) +} + +/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmw))] +pub fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + let and = _mm512_and_si512(a, b); + let zero = _mm512_setzero_si512(); + _mm512_mask_cmpneq_epi16_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmw))] +pub fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_cmpneq_epi16_mask(and, zero) +} + +/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmw))] +pub fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_mask_cmpneq_epi16_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmw))] +pub fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_cmpneq_epi16_mask(and, zero) +} + +/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmw))] +pub fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_mask_cmpneq_epi16_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmb))] +pub fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + let and = _mm512_and_si512(a, b); + let zero = _mm512_setzero_si512(); + _mm512_cmpneq_epi8_mask(and, zero) +} + +/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmb))] +pub fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + let and = _mm512_and_si512(a, b); + let zero = _mm512_setzero_si512(); + _mm512_mask_cmpneq_epi8_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmb))] +pub fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_cmpneq_epi8_mask(and, zero) +} + +/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmb))] +pub fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_mask_cmpneq_epi8_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmb))] +pub fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_cmpneq_epi8_mask(and, zero) +} + +/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmb))] +pub fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_mask_cmpneq_epi8_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmw))] +pub fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + let and = _mm512_and_si512(a, b); + let zero = _mm512_setzero_si512(); + _mm512_cmpeq_epi16_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmw))] +pub fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { + let and = _mm512_and_si512(a, b); + let zero = _mm512_setzero_si512(); + _mm512_mask_cmpeq_epi16_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmw))] +pub fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_cmpeq_epi16_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmw))] +pub fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_mask_cmpeq_epi16_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmw))] +pub fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_cmpeq_epi16_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmw))] +pub fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_mask_cmpeq_epi16_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmb))] +pub fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + let and = _mm512_and_si512(a, b); + let zero = _mm512_setzero_si512(); + _mm512_cmpeq_epi8_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmb))] +pub fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { + let and = _mm512_and_si512(a, b); + let zero = _mm512_setzero_si512(); + _mm512_mask_cmpeq_epi8_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmb))] +pub fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_cmpeq_epi8_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmb))] +pub fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_mask_cmpeq_epi8_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmb))] +pub fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_cmpeq_epi8_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmb))] +pub fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_mask_cmpeq_epi8_mask(k, and, zero) +} + +/// Store 64-bit mask from a into memory. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(mov))] //should be kmovq +pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) { + ptr::write(mem_addr as *mut __mmask64, a); +} + +/// Store 32-bit mask from a into memory. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(mov))] //should be kmovd +pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) { + ptr::write(mem_addr as *mut __mmask32, a); +} + +/// Load 64-bit mask from memory into k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(mov))] //should be kmovq +pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 { + ptr::read(mem_addr as *const __mmask64) +} + +/// Load 32-bit mask from memory into k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(mov))] //should be kmovd +pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 { + ptr::read(mem_addr as *const __mmask32) +} + +/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsadbw))] +pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) } +} + +/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub fn _mm512_dbsad_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let r = vdbpsadbw(a, b, IMM8); + transmute(r) + } +} + +/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub fn _mm512_mask_dbsad_epu8( + src: __m512i, + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let r = vdbpsadbw(a, b, IMM8); + transmute(simd_select_bitmask(k, r, src.as_u16x32())) + } +} + +/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub fn _mm512_maskz_dbsad_epu8(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let r = vdbpsadbw(a, b, IMM8); + transmute(simd_select_bitmask(k, r, u16x32::ZERO)) + } +} + +/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub fn _mm256_dbsad_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let r = vdbpsadbw256(a, b, IMM8); + transmute(r) + } +} + +/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub fn _mm256_mask_dbsad_epu8( + src: __m256i, + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let r = vdbpsadbw256(a, b, IMM8); + transmute(simd_select_bitmask(k, r, src.as_u16x16())) + } +} + +/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub fn _mm256_maskz_dbsad_epu8(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let r = vdbpsadbw256(a, b, IMM8); + transmute(simd_select_bitmask(k, r, u16x16::ZERO)) + } +} + +/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub fn _mm_dbsad_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let r = vdbpsadbw128(a, b, IMM8); + transmute(r) + } +} + +/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub fn _mm_mask_dbsad_epu8( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let r = vdbpsadbw128(a, b, IMM8); + transmute(simd_select_bitmask(k, r, src.as_u16x8())) + } +} + +/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub fn _mm_maskz_dbsad_epu8(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let r = vdbpsadbw128(a, b, IMM8); + transmute(simd_select_bitmask(k, r, u16x8::ZERO)) + } +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovw2m))] +pub fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 { + let filter = _mm512_set1_epi16(1 << 15); + let a = _mm512_and_si512(a, filter); + _mm512_cmpeq_epi16_mask(a, filter) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovw2m))] +pub fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 { + let filter = _mm256_set1_epi16(1 << 15); + let a = _mm256_and_si256(a, filter); + _mm256_cmpeq_epi16_mask(a, filter) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovw2m))] +pub fn _mm_movepi16_mask(a: __m128i) -> __mmask8 { + let filter = _mm_set1_epi16(1 << 15); + let a = _mm_and_si128(a, filter); + _mm_cmpeq_epi16_mask(a, filter) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovb2m))] +pub fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 { + let filter = _mm512_set1_epi8(1 << 7); + let a = _mm512_and_si512(a, filter); + _mm512_cmpeq_epi8_mask(a, filter) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than +// using vpmovb2m plus converting the mask register to a standard register. +pub fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 { + let filter = _mm256_set1_epi8(1 << 7); + let a = _mm256_and_si256(a, filter); + _mm256_cmpeq_epi8_mask(a, filter) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than +// using vpmovb2m plus converting the mask register to a standard register. +pub fn _mm_movepi8_mask(a: __m128i) -> __mmask16 { + let filter = _mm_set1_epi8(1 << 7); + let a = _mm_and_si128(a, filter); + _mm_cmpeq_epi8_mask(a, filter) +} + +/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovm2w))] +pub fn _mm512_movm_epi16(k: __mmask32) -> __m512i { + unsafe { + let one = _mm512_set1_epi16( + 1 << 15 + | 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + ) + .as_i16x32(); + transmute(simd_select_bitmask(k, one, i16x32::ZERO)) + } +} + +/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovm2w))] +pub fn _mm256_movm_epi16(k: __mmask16) -> __m256i { + unsafe { + let one = _mm256_set1_epi16( + 1 << 15 + | 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + ) + .as_i16x16(); + transmute(simd_select_bitmask(k, one, i16x16::ZERO)) + } +} + +/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovm2w))] +pub fn _mm_movm_epi16(k: __mmask8) -> __m128i { + unsafe { + let one = _mm_set1_epi16( + 1 << 15 + | 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + ) + .as_i16x8(); + transmute(simd_select_bitmask(k, one, i16x8::ZERO)) + } +} + +/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovm2b))] +pub fn _mm512_movm_epi8(k: __mmask64) -> __m512i { + unsafe { + let one = + _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) + .as_i8x64(); + transmute(simd_select_bitmask(k, one, i8x64::ZERO)) + } +} + +/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovm2b))] +pub fn _mm256_movm_epi8(k: __mmask32) -> __m256i { + unsafe { + let one = + _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) + .as_i8x32(); + transmute(simd_select_bitmask(k, one, i8x32::ZERO)) + } +} + +/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovm2b))] +pub fn _mm_movm_epi8(k: __mmask16) -> __m128i { + unsafe { + let one = + _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) + .as_i8x16(); + transmute(simd_select_bitmask(k, one, i8x16::ZERO)) + } +} + +/// Convert 32-bit mask a into an integer value, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _cvtmask32_u32(a: __mmask32) -> u32 { + a +} + +/// Convert integer value a into an 32-bit mask, and store the result in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _cvtu32_mask32(a: u32) -> __mmask32 { + a +} + +/// Add 32-bit masks in a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { + a + b +} + +/// Add 64-bit masks in a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { + a + b +} + +/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { + a & b +} + +/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { + a & b +} + +/// Compute the bitwise NOT of 32-bit mask a, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _knot_mask32(a: __mmask32) -> __mmask32 { + !a +} + +/// Compute the bitwise NOT of 64-bit mask a, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _knot_mask64(a: __mmask64) -> __mmask64 { + !a +} + +/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { + _knot_mask32(a) & b +} + +/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { + _knot_mask64(a) & b +} + +/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { + a | b +} + +/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { + a | b +} + +/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { + a ^ b +} + +/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { + a ^ b +} + +/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { + _knot_mask32(a ^ b) +} + +/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { + _knot_mask64(a ^ b) +} + +/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 { + let tmp = _kor_mask32(a, b); + *all_ones = (tmp == 0xffffffff) as u8; + (tmp == 0) as u8 +} + +/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 { + let tmp = _kor_mask64(a, b); + *all_ones = (tmp == 0xffffffff_ffffffff) as u8; + (tmp == 0) as u8 +} + +/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { + (_kor_mask32(a, b) == 0xffffffff) as u8 +} + +/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { + (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8 +} + +/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { + (_kor_mask32(a, b) == 0) as u8 +} + +/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { + (_kor_mask64(a, b) == 0) as u8 +} + +/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32) +#[inline] +#[target_feature(enable = "avx512bw")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kshiftli_mask32(a: __mmask32) -> __mmask32 { + a << COUNT +} + +/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64) +#[inline] +#[target_feature(enable = "avx512bw")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kshiftli_mask64(a: __mmask64) -> __mmask64 { + a << COUNT +} + +/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32) +#[inline] +#[target_feature(enable = "avx512bw")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kshiftri_mask32(a: __mmask32) -> __mmask32 { + a >> COUNT +} + +/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64) +#[inline] +#[target_feature(enable = "avx512bw")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kshiftri_mask64(a: __mmask64) -> __mmask64 { + a >> COUNT +} + +/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst, +/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all +/// zeros, store 1 in and_not, otherwise store 0 in and_not. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 { + *and_not = (_kandn_mask32(a, b) == 0) as u8; + (_kand_mask32(a, b) == 0) as u8 +} + +/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst, +/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all +/// zeros, store 1 in and_not, otherwise store 0 in and_not. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 { + *and_not = (_kandn_mask64(a, b) == 0) as u8; + (_kand_mask64(a, b) == 0) as u8 +} + +/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all +/// zeros, store 1 in dst, otherwise store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { + (_kandn_mask32(a, b) == 0) as u8 +} + +/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all +/// zeros, store 1 in dst, otherwise store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { + (_kandn_mask64(a, b) == 0) as u8 +} + +/// Compute the bitwise AND of 32-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { + (_kand_mask32(a, b) == 0) as u8 +} + +/// Compute the bitwise AND of 64-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { + (_kand_mask64(a, b) == 0) as u8 +} + +/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd +pub fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 { + ((a & 0xffff) << 16) | (b & 0xffff) +} + +/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq +pub fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 { + ((a & 0xffffffff) << 32) | (b & 0xffffffff) +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i { + unsafe { + let a = a.as_i16x32(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi16_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, convert, src.as_i8x32())) + } +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi16_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, convert, i8x32::ZERO)) + } +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i { + unsafe { + let a = a.as_i16x16(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi16_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, convert, src.as_i8x16())) + } +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi16_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) + } +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let v256: i16x16 = simd_shuffle!( + a, + i16x8::ZERO, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] + ); + transmute::(simd_cast(v256)) + } +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi8(a).as_i8x16(); + let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; + transmute(simd_select_bitmask(k, convert, src.as_i8x16())) + } +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi8(a).as_i8x16(); + let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; + transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) + } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i { + unsafe { + transmute(vpmovswb( + a.as_i16x32(), + i8x32::ZERO, + 0b11111111_11111111_11111111_11111111, + )) + } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { + unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { + unsafe { transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { + unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { + unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) } +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i { + unsafe { + transmute(vpmovuswb( + a.as_u16x32(), + u8x32::ZERO, + 0b11111111_11111111_11111111_11111111, + )) + } +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { + unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) } +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { + unsafe { transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) } +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i { + unsafe { + transmute(vpmovuswb256( + a.as_u16x16(), + u8x16::ZERO, + 0b11111111_11111111, + )) + } +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { + unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) } +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { + unsafe { transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) } +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) } +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) } +} + +/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbw))] +pub fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i { + unsafe { + let a = a.as_i8x32(); + transmute::(simd_cast(a)) + } +} + +/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbw))] +pub fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, convert, src.as_i16x32())) + } +} + +/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbw))] +pub fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, convert, i16x32::ZERO)) + } +} + +/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbw))] +pub fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, src.as_i16x16())) + } +} + +/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbw))] +pub fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) + } +} + +/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbw))] +pub fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, src.as_i16x8())) + } +} + +/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbw))] +pub fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbw))] +pub fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i { + unsafe { + let a = a.as_u8x32(); + transmute::(simd_cast(a)) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbw))] +pub fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, convert, src.as_i16x32())) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbw))] +pub fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, convert, i16x32::ZERO)) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbw))] +pub fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, src.as_i16x16())) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbw))] +pub fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbw))] +pub fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, src.as_i16x8())) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbw))] +pub fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) + } +} + +/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_bslli_epi128(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + const fn mask(shift: i32, i: u32) -> u32 { + let shift = shift as u32 & 0xff; + if shift > 15 || i % 16 < shift { + 0 + } else { + 64 + (i - shift) + } + } + let a = a.as_i8x64(); + let zero = i8x64::ZERO; + let r: i8x64 = simd_shuffle!( + zero, + a, + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + mask(IMM8, 16), + mask(IMM8, 17), + mask(IMM8, 18), + mask(IMM8, 19), + mask(IMM8, 20), + mask(IMM8, 21), + mask(IMM8, 22), + mask(IMM8, 23), + mask(IMM8, 24), + mask(IMM8, 25), + mask(IMM8, 26), + mask(IMM8, 27), + mask(IMM8, 28), + mask(IMM8, 29), + mask(IMM8, 30), + mask(IMM8, 31), + mask(IMM8, 32), + mask(IMM8, 33), + mask(IMM8, 34), + mask(IMM8, 35), + mask(IMM8, 36), + mask(IMM8, 37), + mask(IMM8, 38), + mask(IMM8, 39), + mask(IMM8, 40), + mask(IMM8, 41), + mask(IMM8, 42), + mask(IMM8, 43), + mask(IMM8, 44), + mask(IMM8, 45), + mask(IMM8, 46), + mask(IMM8, 47), + mask(IMM8, 48), + mask(IMM8, 49), + mask(IMM8, 50), + mask(IMM8, 51), + mask(IMM8, 52), + mask(IMM8, 53), + mask(IMM8, 54), + mask(IMM8, 55), + mask(IMM8, 56), + mask(IMM8, 57), + mask(IMM8, 58), + mask(IMM8, 59), + mask(IMM8, 60), + mask(IMM8, 61), + mask(IMM8, 62), + mask(IMM8, 63), + ], + ); + transmute(r) + } +} + +/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + const fn mask(shift: i32, i: u32) -> u32 { + let shift = shift as u32 & 0xff; + if shift > 15 || (15 - (i % 16)) < shift { + 0 + } else { + 64 + (i + shift) + } + } + let a = a.as_i8x64(); + let zero = i8x64::ZERO; + let r: i8x64 = simd_shuffle!( + zero, + a, + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + mask(IMM8, 16), + mask(IMM8, 17), + mask(IMM8, 18), + mask(IMM8, 19), + mask(IMM8, 20), + mask(IMM8, 21), + mask(IMM8, 22), + mask(IMM8, 23), + mask(IMM8, 24), + mask(IMM8, 25), + mask(IMM8, 26), + mask(IMM8, 27), + mask(IMM8, 28), + mask(IMM8, 29), + mask(IMM8, 30), + mask(IMM8, 31), + mask(IMM8, 32), + mask(IMM8, 33), + mask(IMM8, 34), + mask(IMM8, 35), + mask(IMM8, 36), + mask(IMM8, 37), + mask(IMM8, 38), + mask(IMM8, 39), + mask(IMM8, 40), + mask(IMM8, 41), + mask(IMM8, 42), + mask(IMM8, 43), + mask(IMM8, 44), + mask(IMM8, 45), + mask(IMM8, 46), + mask(IMM8, 47), + mask(IMM8, 48), + mask(IMM8, 49), + mask(IMM8, 50), + mask(IMM8, 51), + mask(IMM8, 52), + mask(IMM8, 53), + mask(IMM8, 54), + mask(IMM8, 55), + mask(IMM8, 56), + mask(IMM8, 57), + mask(IMM8, 58), + mask(IMM8, 59), + mask(IMM8, 60), + mask(IMM8, 61), + mask(IMM8, 62), + mask(IMM8, 63), + ], + ); + transmute(r) + } +} + +/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst. +/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result, +/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m512i { + const fn mask(shift: u32, i: u32) -> u32 { + let shift = shift % 16; + let mod_i = i % 16; + if mod_i < (16 - shift) { + i + shift + } else { + i + 48 + shift + } + } + + // If palignr is shifting the pair of vectors more than the size of two + // lanes, emit zero. + if IMM8 >= 32 { + return _mm512_setzero_si512(); + } + // If palignr is shifting the pair of input vectors more than one lane, + // but less than two lanes, convert to shifting in zeroes. + let (a, b) = if IMM8 > 16 { + (_mm512_setzero_si512(), a) + } else { + (a, b) + }; + unsafe { + if IMM8 == 16 { + return transmute(a); + } + + let r: i8x64 = simd_shuffle!( + b.as_i8x64(), + a.as_i8x64(), + [ + mask(IMM8 as u32, 0), + mask(IMM8 as u32, 1), + mask(IMM8 as u32, 2), + mask(IMM8 as u32, 3), + mask(IMM8 as u32, 4), + mask(IMM8 as u32, 5), + mask(IMM8 as u32, 6), + mask(IMM8 as u32, 7), + mask(IMM8 as u32, 8), + mask(IMM8 as u32, 9), + mask(IMM8 as u32, 10), + mask(IMM8 as u32, 11), + mask(IMM8 as u32, 12), + mask(IMM8 as u32, 13), + mask(IMM8 as u32, 14), + mask(IMM8 as u32, 15), + mask(IMM8 as u32, 16), + mask(IMM8 as u32, 17), + mask(IMM8 as u32, 18), + mask(IMM8 as u32, 19), + mask(IMM8 as u32, 20), + mask(IMM8 as u32, 21), + mask(IMM8 as u32, 22), + mask(IMM8 as u32, 23), + mask(IMM8 as u32, 24), + mask(IMM8 as u32, 25), + mask(IMM8 as u32, 26), + mask(IMM8 as u32, 27), + mask(IMM8 as u32, 28), + mask(IMM8 as u32, 29), + mask(IMM8 as u32, 30), + mask(IMM8 as u32, 31), + mask(IMM8 as u32, 32), + mask(IMM8 as u32, 33), + mask(IMM8 as u32, 34), + mask(IMM8 as u32, 35), + mask(IMM8 as u32, 36), + mask(IMM8 as u32, 37), + mask(IMM8 as u32, 38), + mask(IMM8 as u32, 39), + mask(IMM8 as u32, 40), + mask(IMM8 as u32, 41), + mask(IMM8 as u32, 42), + mask(IMM8 as u32, 43), + mask(IMM8 as u32, 44), + mask(IMM8 as u32, 45), + mask(IMM8 as u32, 46), + mask(IMM8 as u32, 47), + mask(IMM8 as u32, 48), + mask(IMM8 as u32, 49), + mask(IMM8 as u32, 50), + mask(IMM8 as u32, 51), + mask(IMM8 as u32, 52), + mask(IMM8 as u32, 53), + mask(IMM8 as u32, 54), + mask(IMM8 as u32, 55), + mask(IMM8 as u32, 56), + mask(IMM8 as u32, 57), + mask(IMM8 as u32, 58), + mask(IMM8 as u32, 59), + mask(IMM8 as u32, 60), + mask(IMM8 as u32, 61), + mask(IMM8 as u32, 62), + mask(IMM8 as u32, 63), + ], + ); + transmute(r) + } +} + +/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_alignr_epi8( + src: __m512i, + k: __mmask64, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64())) + } +} + +/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_alignr_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO)) + } +} + +/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] +pub fn _mm256_mask_alignr_epi8( + src: __m256i, + k: __mmask32, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32())) + } +} + +/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] +pub fn _mm256_maskz_alignr_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO)) + } +} + +/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] +pub fn _mm_mask_alignr_epi8( + src: __m128i, + k: __mmask16, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16())) + } +} + +/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] +pub fn _mm_maskz_alignr_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO)) + } +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) { + vpmovswbmem(mem_addr, a.as_i16x32(), k); +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) { + vpmovswbmem256(mem_addr, a.as_i16x16(), k); +} + +/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovswb))] +pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovswbmem128(mem_addr, a.as_i16x8(), k); +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) { + vpmovwbmem(mem_addr, a.as_i16x32(), k); +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) { + vpmovwbmem256(mem_addr, a.as_i16x16(), k); +} + +/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovwb))] +pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovwbmem128(mem_addr, a.as_i16x8(), k); +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) { + vpmovuswbmem(mem_addr, a.as_i16x32(), k); +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) { + vpmovuswbmem256(mem_addr, a.as_i16x16(), k); +} + +/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045) +#[inline] +#[target_feature(enable = "avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovuswb))] +pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovuswbmem128(mem_addr, a.as_i16x8(), k); +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"] + fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32; + + #[link_name = "llvm.x86.avx512.pmaddw.d.512"] + fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16; + #[link_name = "llvm.x86.avx512.pmaddubs.w.512"] + fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32; + + #[link_name = "llvm.x86.avx512.packssdw.512"] + fn vpackssdw(a: i32x16, b: i32x16) -> i16x32; + #[link_name = "llvm.x86.avx512.packsswb.512"] + fn vpacksswb(a: i16x32, b: i16x32) -> i8x64; + #[link_name = "llvm.x86.avx512.packusdw.512"] + fn vpackusdw(a: i32x16, b: i32x16) -> u16x32; + #[link_name = "llvm.x86.avx512.packuswb.512"] + fn vpackuswb(a: i16x32, b: i16x32) -> u8x64; + + #[link_name = "llvm.x86.avx512.psll.w.512"] + fn vpsllw(a: i16x32, count: i16x8) -> i16x32; + + #[link_name = "llvm.x86.avx512.psllv.w.512"] + fn vpsllvw(a: i16x32, b: i16x32) -> i16x32; + #[link_name = "llvm.x86.avx512.psllv.w.256"] + fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx512.psllv.w.128"] + fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8; + + #[link_name = "llvm.x86.avx512.psrl.w.512"] + fn vpsrlw(a: i16x32, count: i16x8) -> i16x32; + + #[link_name = "llvm.x86.avx512.psrlv.w.512"] + fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32; + #[link_name = "llvm.x86.avx512.psrlv.w.256"] + fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx512.psrlv.w.128"] + fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8; + + #[link_name = "llvm.x86.avx512.psra.w.512"] + fn vpsraw(a: i16x32, count: i16x8) -> i16x32; + + #[link_name = "llvm.x86.avx512.psrav.w.512"] + fn vpsravw(a: i16x32, count: i16x32) -> i16x32; + #[link_name = "llvm.x86.avx512.psrav.w.256"] + fn vpsravw256(a: i16x16, count: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx512.psrav.w.128"] + fn vpsravw128(a: i16x8, count: i16x8) -> i16x8; + + #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"] + fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32; + #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"] + fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"] + fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8; + + #[link_name = "llvm.x86.avx512.permvar.hi.512"] + fn vpermw(a: i16x32, idx: i16x32) -> i16x32; + #[link_name = "llvm.x86.avx512.permvar.hi.256"] + fn vpermw256(a: i16x16, idx: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx512.permvar.hi.128"] + fn vpermw128(a: i16x8, idx: i16x8) -> i16x8; + + #[link_name = "llvm.x86.avx512.pshuf.b.512"] + fn vpshufb(a: i8x64, b: i8x64) -> i8x64; + + #[link_name = "llvm.x86.avx512.psad.bw.512"] + fn vpsadbw(a: u8x64, b: u8x64) -> u64x8; + + #[link_name = "llvm.x86.avx512.dbpsadbw.512"] + fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32; + #[link_name = "llvm.x86.avx512.dbpsadbw.256"] + fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16; + #[link_name = "llvm.x86.avx512.dbpsadbw.128"] + fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8; + + #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"] + fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32; + #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"] + fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"] + fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16; + + #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"] + fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32; + #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"] + fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"] + fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16; + + #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"] + fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32); + #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"] + fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"] + fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"] + fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32); + #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"] + fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"] + fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"] + fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32); + #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"] + fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"] + fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.loadu.b.128"] + fn loaddqu8_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.loadu.w.128"] + fn loaddqu16_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.loadu.b.256"] + fn loaddqu8_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32; + #[link_name = "llvm.x86.avx512.mask.loadu.w.256"] + fn loaddqu16_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16; + #[link_name = "llvm.x86.avx512.mask.loadu.b.512"] + fn loaddqu8_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64; + #[link_name = "llvm.x86.avx512.mask.loadu.w.512"] + fn loaddqu16_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32; + + #[link_name = "llvm.x86.avx512.mask.storeu.b.128"] + fn storedqu8_128(mem_addr: *mut i8, a: i8x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.storeu.w.128"] + fn storedqu16_128(mem_addr: *mut i16, a: i16x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.b.256"] + fn storedqu8_256(mem_addr: *mut i8, a: i8x32, mask: u32); + #[link_name = "llvm.x86.avx512.mask.storeu.w.256"] + fn storedqu16_256(mem_addr: *mut i16, a: i16x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.storeu.b.512"] + fn storedqu8_512(mem_addr: *mut i8, a: i8x64, mask: u64); + #[link_name = "llvm.x86.avx512.mask.storeu.w.512"] + fn storedqu16_512(mem_addr: *mut i16, a: i16x32, mask: u32); + +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::hint::black_box; + use crate::mem::{self}; + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_abs_epi16() { + let a = _mm512_set1_epi16(-1); + let r = _mm512_abs_epi16(a); + let e = _mm512_set1_epi16(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_abs_epi16() { + let a = _mm512_set1_epi16(-1); + let r = _mm512_mask_abs_epi16(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_abs_epi16() { + let a = _mm512_set1_epi16(-1); + let r = _mm512_maskz_abs_epi16(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_abs_epi16() { + let a = _mm256_set1_epi16(-1); + let r = _mm256_mask_abs_epi16(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a); + let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_abs_epi16() { + let a = _mm256_set1_epi16(-1); + let r = _mm256_maskz_abs_epi16(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_abs_epi16() { + let a = _mm_set1_epi16(-1); + let r = _mm_mask_abs_epi16(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_abs_epi16(a, 0b00001111, a); + let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_abs_epi16() { + let a = _mm_set1_epi16(-1); + let r = _mm_maskz_abs_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_abs_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_abs_epi8() { + let a = _mm512_set1_epi8(-1); + let r = _mm512_abs_epi8(a); + let e = _mm512_set1_epi8(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_abs_epi8() { + let a = _mm512_set1_epi8(-1); + let r = _mm512_mask_abs_epi8(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_abs_epi8( + a, + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_abs_epi8() { + let a = _mm512_set1_epi8(-1); + let r = _mm512_maskz_abs_epi8(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi8( + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_abs_epi8() { + let a = _mm256_set1_epi8(-1); + let r = _mm256_mask_abs_epi8(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a); + #[rustfmt::skip] + let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_abs_epi8() { + let a = _mm256_set1_epi8(-1); + let r = _mm256_maskz_abs_epi8(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_abs_epi8() { + let a = _mm_set1_epi8(-1); + let r = _mm_mask_abs_epi8(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a); + let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_abs_epi8() { + let a = _mm_set1_epi8(-1); + let r = _mm_maskz_abs_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_abs_epi8(0b00000000_11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_add_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(2); + let r = _mm512_add_epi16(a, b); + let e = _mm512_set1_epi16(3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_add_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(2); + let r = _mm512_mask_add_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_add_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(2); + let r = _mm512_maskz_add_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_add_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(2); + let r = _mm256_mask_add_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_add_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(2); + let r = _mm256_maskz_add_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_add_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(2); + let r = _mm_mask_add_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_add_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_add_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(2); + let r = _mm_maskz_add_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_add_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_add_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(2); + let r = _mm512_add_epi8(a, b); + let e = _mm512_set1_epi8(3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_add_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(2); + let r = _mm512_mask_add_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_add_epi8( + a, + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_add_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(2); + let r = _mm512_maskz_add_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_add_epi8( + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_add_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(2); + let r = _mm256_mask_add_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_add_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(2); + let r = _mm256_maskz_add_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_add_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(2); + let r = _mm_mask_add_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b); + let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_add_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(2); + let r = _mm_maskz_add_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_adds_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(u16::MAX as i16); + let r = _mm512_adds_epu16(a, b); + let e = _mm512_set1_epi16(u16::MAX as i16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_adds_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(u16::MAX as i16); + let r = _mm512_mask_adds_epu16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_adds_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(u16::MAX as i16); + let r = _mm512_maskz_adds_epu16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_adds_epu16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(u16::MAX as i16); + let r = _mm256_mask_adds_epu16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_adds_epu16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(u16::MAX as i16); + let r = _mm256_maskz_adds_epu16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_adds_epu16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(u16::MAX as i16); + let r = _mm_mask_adds_epu16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_adds_epu16(a, 0b00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_adds_epu16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(u16::MAX as i16); + let r = _mm_maskz_adds_epu16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_adds_epu16(0b00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_adds_epu8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(u8::MAX as i8); + let r = _mm512_adds_epu8(a, b); + let e = _mm512_set1_epi8(u8::MAX as i8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_adds_epu8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(u8::MAX as i8); + let r = _mm512_mask_adds_epu8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_adds_epu8( + a, + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_adds_epu8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(u8::MAX as i8); + let r = _mm512_maskz_adds_epu8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_adds_epu8( + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_adds_epu8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(u8::MAX as i8); + let r = _mm256_mask_adds_epu8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_adds_epu8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(u8::MAX as i8); + let r = _mm256_maskz_adds_epu8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_adds_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(u8::MAX as i8); + let r = _mm_mask_adds_epu8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_adds_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(u8::MAX as i8); + let r = _mm_maskz_adds_epu8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_adds_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(i16::MAX); + let r = _mm512_adds_epi16(a, b); + let e = _mm512_set1_epi16(i16::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_adds_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(i16::MAX); + let r = _mm512_mask_adds_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_adds_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(i16::MAX); + let r = _mm512_maskz_adds_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_adds_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(i16::MAX); + let r = _mm256_mask_adds_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_adds_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(i16::MAX); + let r = _mm256_maskz_adds_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_adds_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(i16::MAX); + let r = _mm_mask_adds_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_adds_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_adds_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(i16::MAX); + let r = _mm_maskz_adds_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_adds_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_adds_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(i8::MAX); + let r = _mm512_adds_epi8(a, b); + let e = _mm512_set1_epi8(i8::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_adds_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(i8::MAX); + let r = _mm512_mask_adds_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_adds_epi8( + a, + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_adds_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(i8::MAX); + let r = _mm512_maskz_adds_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_adds_epi8( + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_adds_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(i8::MAX); + let r = _mm256_mask_adds_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_adds_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(i8::MAX); + let r = _mm256_maskz_adds_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_adds_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(i8::MAX); + let r = _mm_mask_adds_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_adds_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(i8::MAX); + let r = _mm_maskz_adds_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_sub_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(2); + let r = _mm512_sub_epi16(a, b); + let e = _mm512_set1_epi16(-1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_sub_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(2); + let r = _mm512_mask_sub_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_sub_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(2); + let r = _mm512_maskz_sub_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_sub_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(2); + let r = _mm256_mask_sub_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_sub_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(2); + let r = _mm256_maskz_sub_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_sub_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(2); + let r = _mm_mask_sub_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_sub_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_sub_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(2); + let r = _mm_maskz_sub_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sub_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_sub_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(2); + let r = _mm512_sub_epi8(a, b); + let e = _mm512_set1_epi8(-1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_sub_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(2); + let r = _mm512_mask_sub_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_sub_epi8( + a, + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_sub_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(2); + let r = _mm512_maskz_sub_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sub_epi8( + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_sub_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(2); + let r = _mm256_mask_sub_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_sub_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(2); + let r = _mm256_maskz_sub_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_sub_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(2); + let r = _mm_mask_sub_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b); + let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_sub_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(2); + let r = _mm_maskz_sub_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_subs_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(u16::MAX as i16); + let r = _mm512_subs_epu16(a, b); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_subs_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(u16::MAX as i16); + let r = _mm512_mask_subs_epu16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_subs_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(u16::MAX as i16); + let r = _mm512_maskz_subs_epu16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_subs_epu16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(u16::MAX as i16); + let r = _mm256_mask_subs_epu16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_subs_epu16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(u16::MAX as i16); + let r = _mm256_maskz_subs_epu16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_subs_epu16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(u16::MAX as i16); + let r = _mm_mask_subs_epu16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_subs_epu16(a, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_subs_epu16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(u16::MAX as i16); + let r = _mm_maskz_subs_epu16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_subs_epu16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_subs_epu8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(u8::MAX as i8); + let r = _mm512_subs_epu8(a, b); + let e = _mm512_set1_epi8(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_subs_epu8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(u8::MAX as i8); + let r = _mm512_mask_subs_epu8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_subs_epu8( + a, + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_subs_epu8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(u8::MAX as i8); + let r = _mm512_maskz_subs_epu8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_subs_epu8( + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_subs_epu8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(u8::MAX as i8); + let r = _mm256_mask_subs_epu8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_subs_epu8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(u8::MAX as i8); + let r = _mm256_maskz_subs_epu8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_subs_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(u8::MAX as i8); + let r = _mm_mask_subs_epu8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b); + let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_subs_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(u8::MAX as i8); + let r = _mm_maskz_subs_epu8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_subs_epi16() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(i16::MAX); + let r = _mm512_subs_epi16(a, b); + let e = _mm512_set1_epi16(i16::MIN); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_subs_epi16() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(i16::MAX); + let r = _mm512_mask_subs_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_subs_epi16() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(i16::MAX); + let r = _mm512_maskz_subs_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_subs_epi16() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(i16::MAX); + let r = _mm256_mask_subs_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_subs_epi16() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(i16::MAX); + let r = _mm256_maskz_subs_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_subs_epi16() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(i16::MAX); + let r = _mm_mask_subs_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_subs_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_subs_epi16() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(i16::MAX); + let r = _mm_maskz_subs_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_subs_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_subs_epi8() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(i8::MAX); + let r = _mm512_subs_epi8(a, b); + let e = _mm512_set1_epi8(i8::MIN); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_subs_epi8() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(i8::MAX); + let r = _mm512_mask_subs_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_subs_epi8( + a, + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_subs_epi8() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(i8::MAX); + let r = _mm512_maskz_subs_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_subs_epi8( + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_subs_epi8() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(i8::MAX); + let r = _mm256_mask_subs_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_subs_epi8() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(i8::MAX); + let r = _mm256_maskz_subs_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_subs_epi8() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(i8::MAX); + let r = _mm_mask_subs_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_subs_epi8() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(i8::MAX); + let r = _mm_maskz_subs_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mulhi_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mulhi_epu16(a, b); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_mulhi_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mask_mulhi_epu16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_mulhi_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_maskz_mulhi_epu16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_mulhi_epu16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_mask_mulhi_epu16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_mulhi_epu16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_maskz_mulhi_epu16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_mulhi_epu16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_mask_mulhi_epu16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_mulhi_epu16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_maskz_mulhi_epu16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mulhi_epu16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mulhi_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mulhi_epi16(a, b); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_mulhi_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mask_mulhi_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_mulhi_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_maskz_mulhi_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_mulhi_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_mask_mulhi_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_mulhi_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_maskz_mulhi_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_mulhi_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_mask_mulhi_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_mulhi_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_maskz_mulhi_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mulhi_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mulhrs_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mulhrs_epi16(a, b); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_mulhrs_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mask_mulhrs_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_mulhrs_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_maskz_mulhrs_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_mulhrs_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_mask_mulhrs_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_mulhrs_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_maskz_mulhrs_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_mulhrs_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_mask_mulhrs_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_mulhrs_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_maskz_mulhrs_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mullo_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mullo_epi16(a, b); + let e = _mm512_set1_epi16(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_mullo_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mask_mullo_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_mullo_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_maskz_mullo_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_mullo_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_mask_mullo_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_mullo_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_maskz_mullo_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_mullo_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_mask_mullo_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_mullo_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_maskz_mullo_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mullo_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_max_epu16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epu16(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, + 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epu16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epu16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epu16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epu16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epu16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_max_epu16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epu16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_max_epu16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epu16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_mask_max_epu16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epu16(a, 0b00001111, a, b); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epu16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_maskz_max_epu16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epu16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_max_epu8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epu8(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, + 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, + 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, + 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epu8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epu8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epu8( + a, + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epu8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epu8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epu8( + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epu8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_max_epu8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epu8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_max_epu8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epu8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_mask_max_epu8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epu8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_maskz_max_epu8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_max_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epi16(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, + 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_max_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_max_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_mask_max_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_maskz_max_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_max_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epi8(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, + 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, + 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, + 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epi8( + a, + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epi8( + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_max_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_max_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_mask_max_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_maskz_max_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_min_epu16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epu16(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epu16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epu16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epu16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epu16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epu16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_min_epu16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epu16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_min_epu16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epu16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_mask_min_epu16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epu16(a, 0b00001111, a, b); + let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epu16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_maskz_min_epu16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epu16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_min_epu8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epu8(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epu8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epu8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epu8( + a, + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epu8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epu8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epu8( + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epu8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_min_epu8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epu8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_min_epu8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epu8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_mask_min_epu8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epu8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_maskz_min_epu8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_min_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epi16(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_min_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_min_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_mask_min_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_maskz_min_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_min_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epi8(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epi8( + a, + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epi8( + 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_min_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_min_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_mask_min_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_maskz_min_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmplt_epu16_mask() { + let a = _mm512_set1_epi16(-2); + let b = _mm512_set1_epi16(-1); + let m = _mm512_cmplt_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmplt_epu16_mask() { + let a = _mm512_set1_epi16(-2); + let b = _mm512_set1_epi16(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmplt_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmplt_epu16_mask() { + let a = _mm256_set1_epi16(-2); + let b = _mm256_set1_epi16(-1); + let m = _mm256_cmplt_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epu16_mask() { + let a = _mm256_set1_epi16(-2); + let b = _mm256_set1_epi16(-1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmplt_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmplt_epu16_mask() { + let a = _mm_set1_epi16(-2); + let b = _mm_set1_epi16(-1); + let m = _mm_cmplt_epu16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmplt_epu16_mask() { + let a = _mm_set1_epi16(-2); + let b = _mm_set1_epi16(-1); + let mask = 0b01010101; + let r = _mm_mask_cmplt_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmplt_epu8_mask() { + let a = _mm512_set1_epi8(-2); + let b = _mm512_set1_epi8(-1); + let m = _mm512_cmplt_epu8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmplt_epu8_mask() { + let a = _mm512_set1_epi8(-2); + let b = _mm512_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmplt_epu8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmplt_epu8_mask() { + let a = _mm256_set1_epi8(-2); + let b = _mm256_set1_epi8(-1); + let m = _mm256_cmplt_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epu8_mask() { + let a = _mm256_set1_epi8(-2); + let b = _mm256_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmplt_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmplt_epu8_mask() { + let a = _mm_set1_epi8(-2); + let b = _mm_set1_epi8(-1); + let m = _mm_cmplt_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmplt_epu8_mask() { + let a = _mm_set1_epi8(-2); + let b = _mm_set1_epi8(-1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmplt_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmplt_epi16_mask() { + let a = _mm512_set1_epi16(-2); + let b = _mm512_set1_epi16(-1); + let m = _mm512_cmplt_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmplt_epi16_mask() { + let a = _mm512_set1_epi16(-2); + let b = _mm512_set1_epi16(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmplt_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmplt_epi16_mask() { + let a = _mm256_set1_epi16(-2); + let b = _mm256_set1_epi16(-1); + let m = _mm256_cmplt_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epi16_mask() { + let a = _mm256_set1_epi16(-2); + let b = _mm256_set1_epi16(-1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmplt_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmplt_epi16_mask() { + let a = _mm_set1_epi16(-2); + let b = _mm_set1_epi16(-1); + let m = _mm_cmplt_epi16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmplt_epi16_mask() { + let a = _mm_set1_epi16(-2); + let b = _mm_set1_epi16(-1); + let mask = 0b01010101; + let r = _mm_mask_cmplt_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmplt_epi8_mask() { + let a = _mm512_set1_epi8(-2); + let b = _mm512_set1_epi8(-1); + let m = _mm512_cmplt_epi8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmplt_epi8_mask() { + let a = _mm512_set1_epi8(-2); + let b = _mm512_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmplt_epi8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmplt_epi8_mask() { + let a = _mm256_set1_epi8(-2); + let b = _mm256_set1_epi8(-1); + let m = _mm256_cmplt_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epi8_mask() { + let a = _mm256_set1_epi8(-2); + let b = _mm256_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmplt_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmplt_epi8_mask() { + let a = _mm_set1_epi8(-2); + let b = _mm_set1_epi8(-1); + let m = _mm_cmplt_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmplt_epi8_mask() { + let a = _mm_set1_epi8(-2); + let b = _mm_set1_epi8(-1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmplt_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpgt_epu16_mask() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(1); + let m = _mm512_cmpgt_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpgt_epu16_mask() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpgt_epu16_mask() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(1); + let m = _mm256_cmpgt_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epu16_mask() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpgt_epu16_mask() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(1); + let m = _mm_cmpgt_epu16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epu16_mask() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(1); + let mask = 0b01010101; + let r = _mm_mask_cmpgt_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpgt_epu8_mask() { + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(1); + let m = _mm512_cmpgt_epu8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpgt_epu8_mask() { + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpgt_epu8_mask() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(1); + let m = _mm256_cmpgt_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epu8_mask() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpgt_epu8_mask() { + let a = _mm_set1_epi8(2); + let b = _mm_set1_epi8(1); + let m = _mm_cmpgt_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epu8_mask() { + let a = _mm_set1_epi8(2); + let b = _mm_set1_epi8(1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmpgt_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpgt_epi16_mask() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(-1); + let m = _mm512_cmpgt_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpgt_epi16_mask() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpgt_epi16_mask() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(-1); + let m = _mm256_cmpgt_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epi16_mask() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(-1); + let mask = 0b001010101_01010101; + let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpgt_epi16_mask() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(-1); + let m = _mm_cmpgt_epi16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epi16_mask() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(-1); + let mask = 0b01010101; + let r = _mm_mask_cmpgt_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpgt_epi8_mask() { + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(-1); + let m = _mm512_cmpgt_epi8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpgt_epi8_mask() { + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpgt_epi8_mask() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(-1); + let m = _mm256_cmpgt_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epi8_mask() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpgt_epi8_mask() { + let a = _mm_set1_epi8(2); + let b = _mm_set1_epi8(-1); + let m = _mm_cmpgt_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epi8_mask() { + let a = _mm_set1_epi8(2); + let b = _mm_set1_epi8(-1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmpgt_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmple_epu16_mask() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(-1); + let m = _mm512_cmple_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmple_epu16_mask() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmple_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmple_epu16_mask() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(-1); + let m = _mm256_cmple_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmple_epu16_mask() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(-1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmple_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmple_epu16_mask() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(-1); + let m = _mm_cmple_epu16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmple_epu16_mask() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(-1); + let mask = 0b01010101; + let r = _mm_mask_cmple_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmple_epu8_mask() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(-1); + let m = _mm512_cmple_epu8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmple_epu8_mask() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmple_epu8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmple_epu8_mask() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(-1); + let m = _mm256_cmple_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmple_epu8_mask() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmple_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmple_epu8_mask() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(-1); + let m = _mm_cmple_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmple_epu8_mask() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(-1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmple_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmple_epi16_mask() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(-1); + let m = _mm512_cmple_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmple_epi16_mask() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmple_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmple_epi16_mask() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(-1); + let m = _mm256_cmple_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmple_epi16_mask() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(-1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmple_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmple_epi16_mask() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(-1); + let m = _mm_cmple_epi16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmple_epi16_mask() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(-1); + let mask = 0b01010101; + let r = _mm_mask_cmple_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmple_epi8_mask() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(-1); + let m = _mm512_cmple_epi8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmple_epi8_mask() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmple_epi8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmple_epi8_mask() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(-1); + let m = _mm256_cmple_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmple_epi8_mask() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmple_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmple_epi8_mask() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(-1); + let m = _mm_cmple_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmple_epi8_mask() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(-1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmple_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpge_epu16_mask() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let m = _mm512_cmpge_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpge_epu16_mask() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpge_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpge_epu16_mask() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let m = _mm256_cmpge_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epu16_mask() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmpge_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpge_epu16_mask() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let m = _mm_cmpge_epu16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpge_epu16_mask() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let mask = 0b01010101; + let r = _mm_mask_cmpge_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpge_epu8_mask() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let m = _mm512_cmpge_epu8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpge_epu8_mask() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpge_epu8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpge_epu8_mask() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let m = _mm256_cmpge_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epu8_mask() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmpge_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpge_epu8_mask() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let m = _mm_cmpge_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpge_epu8_mask() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmpge_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpge_epi16_mask() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(-1); + let m = _mm512_cmpge_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpge_epi16_mask() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpge_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpge_epi16_mask() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(-1); + let m = _mm256_cmpge_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epi16_mask() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(-1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmpge_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpge_epi16_mask() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(-1); + let m = _mm_cmpge_epi16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpge_epi16_mask() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(-1); + let mask = 0b01010101; + let r = _mm_mask_cmpge_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpge_epi8_mask() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(-1); + let m = _mm512_cmpge_epi8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpge_epi8_mask() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpge_epi8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpge_epi8_mask() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(-1); + let m = _mm256_cmpge_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epi8_mask() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmpge_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpge_epi8_mask() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(-1); + let m = _mm_cmpge_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpge_epi8_mask() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(-1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmpge_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpeq_epu16_mask() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let m = _mm512_cmpeq_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpeq_epu16_mask() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpeq_epu16_mask() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let m = _mm256_cmpeq_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epu16_mask() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpeq_epu16_mask() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let m = _mm_cmpeq_epu16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epu16_mask() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let mask = 0b01010101; + let r = _mm_mask_cmpeq_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpeq_epu8_mask() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let m = _mm512_cmpeq_epu8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpeq_epu8_mask() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpeq_epu8_mask() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let m = _mm256_cmpeq_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epu8_mask() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpeq_epu8_mask() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let m = _mm_cmpeq_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epu8_mask() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmpeq_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpeq_epi16_mask() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(-1); + let m = _mm512_cmpeq_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpeq_epi16_mask() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpeq_epi16_mask() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(-1); + let m = _mm256_cmpeq_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epi16_mask() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(-1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpeq_epi16_mask() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(-1); + let m = _mm_cmpeq_epi16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epi16_mask() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(-1); + let mask = 0b01010101; + let r = _mm_mask_cmpeq_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpeq_epi8_mask() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(-1); + let m = _mm512_cmpeq_epi8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpeq_epi8_mask() { + let a = _mm512_set1_epi8(-1); + let b = _mm512_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpeq_epi8_mask() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(-1); + let m = _mm256_cmpeq_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epi8_mask() { + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpeq_epi8_mask() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(-1); + let m = _mm_cmpeq_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epi8_mask() { + let a = _mm_set1_epi8(-1); + let b = _mm_set1_epi8(-1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmpeq_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpneq_epu16_mask() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(1); + let m = _mm512_cmpneq_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpneq_epu16_mask() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpneq_epu16_mask() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(1); + let m = _mm256_cmpneq_epu16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epu16_mask() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpneq_epu16_mask() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(1); + let m = _mm_cmpneq_epu16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epu16_mask() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(1); + let mask = 0b01010101; + let r = _mm_mask_cmpneq_epu16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpneq_epu8_mask() { + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(1); + let m = _mm512_cmpneq_epu8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpneq_epu8_mask() { + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpneq_epu8_mask() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(1); + let m = _mm256_cmpneq_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epu8_mask() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpneq_epu8_mask() { + let a = _mm_set1_epi8(2); + let b = _mm_set1_epi8(1); + let m = _mm_cmpneq_epu8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epu8_mask() { + let a = _mm_set1_epi8(2); + let b = _mm_set1_epi8(1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmpneq_epu8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpneq_epi16_mask() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(-1); + let m = _mm512_cmpneq_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpneq_epi16_mask() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpneq_epi16_mask() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(-1); + let m = _mm256_cmpneq_epi16_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epi16_mask() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(-1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpneq_epi16_mask() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(-1); + let m = _mm_cmpneq_epi16_mask(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epi16_mask() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(-1); + let mask = 0b01010101; + let r = _mm_mask_cmpneq_epi16_mask(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmpneq_epi8_mask() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(-1); + let m = _mm512_cmpneq_epi8_mask(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmpneq_epi8_mask() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmpneq_epi8_mask() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(-1); + let m = _mm256_cmpneq_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epi8_mask() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(-1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmpneq_epi8_mask() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(-1); + let m = _mm_cmpneq_epi8_mask(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epi8_mask() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(-1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmpneq_epi8_mask(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmp_epu16_mask() { + let a = _mm512_set1_epi16(0); + let b = _mm512_set1_epi16(1); + let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmp_epu16_mask() { + let a = _mm512_set1_epi16(0); + let b = _mm512_set1_epi16(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmp_epu16_mask() { + let a = _mm256_set1_epi16(0); + let b = _mm256_set1_epi16(1); + let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmp_epu16_mask() { + let a = _mm256_set1_epi16(0); + let b = _mm256_set1_epi16(1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmp_epu16_mask() { + let a = _mm_set1_epi16(0); + let b = _mm_set1_epi16(1); + let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmp_epu16_mask() { + let a = _mm_set1_epi16(0); + let b = _mm_set1_epi16(1); + let mask = 0b01010101; + let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmp_epu8_mask() { + let a = _mm512_set1_epi8(0); + let b = _mm512_set1_epi8(1); + let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmp_epu8_mask() { + let a = _mm512_set1_epi8(0); + let b = _mm512_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmp_epu8_mask() { + let a = _mm256_set1_epi8(0); + let b = _mm256_set1_epi8(1); + let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmp_epu8_mask() { + let a = _mm256_set1_epi8(0); + let b = _mm256_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmp_epu8_mask() { + let a = _mm_set1_epi8(0); + let b = _mm_set1_epi8(1); + let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmp_epu8_mask() { + let a = _mm_set1_epi8(0); + let b = _mm_set1_epi8(1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmp_epi16_mask() { + let a = _mm512_set1_epi16(0); + let b = _mm512_set1_epi16(1); + let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmp_epi16_mask() { + let a = _mm512_set1_epi16(0); + let b = _mm512_set1_epi16(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmp_epi16_mask() { + let a = _mm256_set1_epi16(0); + let b = _mm256_set1_epi16(1); + let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmp_epi16_mask() { + let a = _mm256_set1_epi16(0); + let b = _mm256_set1_epi16(1); + let mask = 0b01010101_01010101; + let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmp_epi16_mask() { + let a = _mm_set1_epi16(0); + let b = _mm_set1_epi16(1); + let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmp_epi16_mask() { + let a = _mm_set1_epi16(0); + let b = _mm_set1_epi16(1); + let mask = 0b01010101; + let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cmp_epi8_mask() { + let a = _mm512_set1_epi8(0); + let b = _mm512_set1_epi8(1); + let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!( + m, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + ); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cmp_epi8_mask() { + let a = _mm512_set1_epi8(0); + let b = _mm512_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; + let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!( + r, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 + ); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cmp_epi8_mask() { + let a = _mm256_set1_epi8(0); + let b = _mm256_set1_epi8(1); + let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111_11111111_11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cmp_epi8_mask() { + let a = _mm256_set1_epi8(0); + let b = _mm256_set1_epi8(1); + let mask = 0b01010101_01010101_01010101_01010101; + let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101_01010101_01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cmp_epi8_mask() { + let a = _mm_set1_epi8(0); + let b = _mm_set1_epi8(1); + let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11111111_11111111); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cmp_epi8_mask() { + let a = _mm_set1_epi8(0); + let b = _mm_set1_epi8(1); + let mask = 0b01010101_01010101; + let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01010101_01010101); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_add_epi16() { + let a = _mm256_set1_epi16(1); + let e = _mm256_reduce_add_epi16(a); + assert_eq!(16, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_add_epi16() { + let a = _mm256_set1_epi16(1); + let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a); + assert_eq!(8, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_add_epi16() { + let a = _mm_set1_epi16(1); + let e = _mm_reduce_add_epi16(a); + assert_eq!(8, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_add_epi16() { + let a = _mm_set1_epi16(1); + let e = _mm_mask_reduce_add_epi16(0b11110000, a); + assert_eq!(4, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_add_epi8() { + let a = _mm256_set1_epi8(1); + let e = _mm256_reduce_add_epi8(a); + assert_eq!(32, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_add_epi8() { + let a = _mm256_set1_epi8(1); + let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a); + assert_eq!(16, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_add_epi8() { + let a = _mm_set1_epi8(1); + let e = _mm_reduce_add_epi8(a); + assert_eq!(16, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_add_epi8() { + let a = _mm_set1_epi8(1); + let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a); + assert_eq!(8, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_and_epi16() { + let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm256_reduce_and_epi16(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_and_epi16() { + let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_and_epi16() { + let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2); + let e = _mm_reduce_and_epi16(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_and_epi16() { + let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2); + let e = _mm_mask_reduce_and_epi16(0b11110000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_and_epi8() { + let a = _mm256_set_epi8( + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 2, 2, 2, + ); + let e = _mm256_reduce_and_epi8(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_and_epi8() { + let a = _mm256_set_epi8( + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 2, 2, 2, + ); + let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_and_epi8() { + let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm_reduce_and_epi8(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_and_epi8() { + let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_mul_epi16() { + let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); + let e = _mm256_reduce_mul_epi16(a); + assert_eq!(256, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_mul_epi16() { + let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_mul_epi16() { + let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1); + let e = _mm_reduce_mul_epi16(a); + assert_eq!(16, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_mul_epi16() { + let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2); + let e = _mm_mask_reduce_mul_epi16(0b11110000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_mul_epi8() { + let a = _mm256_set_epi8( + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, + ); + let e = _mm256_reduce_mul_epi8(a); + assert_eq!(64, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_mul_epi8() { + let a = _mm256_set_epi8( + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, + ); + let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_mul_epi8() { + let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2); + let e = _mm_reduce_mul_epi8(a); + assert_eq!(8, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_mul_epi8() { + let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2); + let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_max_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i16 = _mm256_reduce_max_epi16(a); + assert_eq!(15, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_max_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_max_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16 = _mm_reduce_max_epi16(a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_max_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_max_epi8() { + let a = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + let e: i8 = _mm256_reduce_max_epi8(a); + assert_eq!(31, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_max_epi8() { + let a = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a); + assert_eq!(15, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_max_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8 = _mm_reduce_max_epi8(a); + assert_eq!(15, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_max_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_max_epu16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u16 = _mm256_reduce_max_epu16(a); + assert_eq!(15, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_max_epu16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_max_epu16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16 = _mm_reduce_max_epu16(a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_max_epu16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_max_epu8() { + let a = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + let e: u8 = _mm256_reduce_max_epu8(a); + assert_eq!(31, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_max_epu8() { + let a = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a); + assert_eq!(15, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_max_epu8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8 = _mm_reduce_max_epu8(a); + assert_eq!(15, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_max_epu8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_min_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i16 = _mm256_reduce_min_epi16(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_min_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_min_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16 = _mm_reduce_min_epi16(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_min_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_min_epi8() { + let a = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + let e: i8 = _mm256_reduce_min_epi8(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_min_epi8() { + let a = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_min_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8 = _mm_reduce_min_epi8(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_min_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_min_epu16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u16 = _mm256_reduce_min_epu16(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_min_epu16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_min_epu16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16 = _mm_reduce_min_epu16(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_min_epu16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_min_epu8() { + let a = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + let e: u8 = _mm256_reduce_min_epu8(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_min_epu8() { + let a = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ); + let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_min_epu8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8 = _mm_reduce_min_epu8(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_min_epu8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_or_epi16() { + let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm256_reduce_or_epi16(a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_or_epi16() { + let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_or_epi16() { + let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2); + let e = _mm_reduce_or_epi16(a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_or_epi16() { + let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2); + let e = _mm_mask_reduce_or_epi16(0b11110000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_reduce_or_epi8() { + let a = _mm256_set_epi8( + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 2, 2, 2, + ); + let e = _mm256_reduce_or_epi8(a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_reduce_or_epi8() { + let a = _mm256_set_epi8( + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 2, 2, 2, + ); + let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_reduce_or_epi8() { + let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm_reduce_or_epi8(a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_reduce_or_epi8() { + let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_loadu_epi16() { + #[rustfmt::skip] + let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let r = _mm512_loadu_epi16(&a[0]); + #[rustfmt::skip] + let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_loadu_epi16() { + let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let r = _mm256_loadu_epi16(&a[0]); + let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_loadu_epi16() { + let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let r = _mm_loadu_epi16(&a[0]); + let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_loadu_epi8() { + #[rustfmt::skip] + let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let r = _mm512_loadu_epi8(&a[0]); + #[rustfmt::skip] + let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_loadu_epi8() { + #[rustfmt::skip] + let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let r = _mm256_loadu_epi8(&a[0]); + #[rustfmt::skip] + let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_loadu_epi8() { + let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let r = _mm_loadu_epi8(&a[0]); + let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_storeu_epi16() { + let a = _mm512_set1_epi16(9); + let mut r = _mm512_undefined_epi32(); + _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_storeu_epi16() { + let a = _mm256_set1_epi16(9); + let mut r = _mm256_set1_epi32(0); + _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_storeu_epi16() { + let a = _mm_set1_epi16(9); + let mut r = _mm_set1_epi32(0); + _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_storeu_epi8() { + let a = _mm512_set1_epi8(9); + let mut r = _mm512_undefined_epi32(); + _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_storeu_epi8() { + let a = _mm256_set1_epi8(9); + let mut r = _mm256_set1_epi32(0); + _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_storeu_epi8() { + let a = _mm_set1_epi8(9); + let mut r = _mm_set1_epi32(0); + _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f,avx512bw")] + unsafe fn test_mm512_mask_loadu_epi16() { + let src = _mm512_set1_epi16(42); + let a = &[ + 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let p = a.as_ptr(); + let m = 0b10101010_11001100_11101000_11001010; + let r = _mm512_mask_loadu_epi16(src, m, black_box(p)); + let e = &[ + 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, + 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, + ]; + let e = _mm512_loadu_epi16(e.as_ptr()); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw")] + unsafe fn test_mm512_maskz_loadu_epi16() { + let a = &[ + 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let p = a.as_ptr(); + let m = 0b10101010_11001100_11101000_11001010; + let r = _mm512_maskz_loadu_epi16(m, black_box(p)); + let e = &[ + 0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0, + 26, 0, 28, 0, 30, 0, 32, + ]; + let e = _mm512_loadu_epi16(e.as_ptr()); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw")] + unsafe fn test_mm512_mask_storeu_epi16() { + let mut r = [42_i16; 32]; + let a = &[ + 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let a = _mm512_loadu_epi16(a.as_ptr()); + let m = 0b10101010_11001100_11101000_11001010; + _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a); + let e = &[ + 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, + 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, + ]; + let e = _mm512_loadu_epi16(e.as_ptr()); + assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512bw")] + unsafe fn test_mm512_mask_loadu_epi8() { + let src = _mm512_set1_epi8(42); + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + ]; + let p = a.as_ptr(); + let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010; + let r = _mm512_mask_loadu_epi8(src, m, black_box(p)); + let e = &[ + 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, + 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42, + ]; + let e = _mm512_loadu_epi8(e.as_ptr()); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw")] + unsafe fn test_mm512_maskz_loadu_epi8() { + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + ]; + let p = a.as_ptr(); + let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010; + let r = _mm512_maskz_loadu_epi8(m, black_box(p)); + let e = &[ + 0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0, + 26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0, + ]; + let e = _mm512_loadu_epi8(e.as_ptr()); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw")] + unsafe fn test_mm512_mask_storeu_epi8() { + let mut r = [42_i8; 64]; + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + ]; + let a = _mm512_loadu_epi8(a.as_ptr()); + let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010; + _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a); + let e = &[ + 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, + 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42, + ]; + let e = _mm512_loadu_epi8(e.as_ptr()); + assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm256_mask_loadu_epi16() { + let src = _mm256_set1_epi16(42); + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm256_mask_loadu_epi16(src, m, black_box(p)); + let e = &[ + 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, + ]; + let e = _mm256_loadu_epi16(e.as_ptr()); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_loadu_epi16() { + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm256_maskz_loadu_epi16(m, black_box(p)); + let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16]; + let e = _mm256_loadu_epi16(e.as_ptr()); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm256_mask_storeu_epi16() { + let mut r = [42_i16; 16]; + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let a = _mm256_loadu_epi16(a.as_ptr()); + let m = 0b11101000_11001010; + _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a); + let e = &[ + 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, + ]; + let e = _mm256_loadu_epi16(e.as_ptr()); + assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm256_mask_loadu_epi8() { + let src = _mm256_set1_epi8(42); + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let p = a.as_ptr(); + let m = 0b10101010_11001100_11101000_11001010; + let r = _mm256_mask_loadu_epi8(src, m, black_box(p)); + let e = &[ + 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, + 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, + ]; + let e = _mm256_loadu_epi8(e.as_ptr()); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_loadu_epi8() { + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let p = a.as_ptr(); + let m = 0b10101010_11001100_11101000_11001010; + let r = _mm256_maskz_loadu_epi8(m, black_box(p)); + let e = &[ + 0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0, + 26, 0, 28, 0, 30, 0, 32, + ]; + let e = _mm256_loadu_epi8(e.as_ptr()); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm256_mask_storeu_epi8() { + let mut r = [42_i8; 32]; + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let a = _mm256_loadu_epi8(a.as_ptr()); + let m = 0b10101010_11001100_11101000_11001010; + _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a); + let e = &[ + 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, + 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, + ]; + let e = _mm256_loadu_epi8(e.as_ptr()); + assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm_mask_loadu_epi16() { + let src = _mm_set1_epi16(42); + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm_mask_loadu_epi16(src, m, black_box(p)); + let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8]; + let e = _mm_loadu_epi16(e.as_ptr()); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm_maskz_loadu_epi16() { + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm_maskz_loadu_epi16(m, black_box(p)); + let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8]; + let e = _mm_loadu_epi16(e.as_ptr()); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm_mask_storeu_epi16() { + let mut r = [42_i16; 8]; + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; + let a = _mm_loadu_epi16(a.as_ptr()); + let m = 0b11001010; + _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a); + let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8]; + let e = _mm_loadu_epi16(e.as_ptr()); + assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm_mask_loadu_epi8() { + let src = _mm_set1_epi8(42); + let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm_mask_loadu_epi8(src, m, black_box(p)); + let e = &[ + 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, + ]; + let e = _mm_loadu_epi8(e.as_ptr()); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm_maskz_loadu_epi8() { + let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm_maskz_loadu_epi8(m, black_box(p)); + let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16]; + let e = _mm_loadu_epi8(e.as_ptr()); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512bw,avx512vl")] + unsafe fn test_mm_mask_storeu_epi8() { + let mut r = [42_i8; 16]; + let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let a = _mm_loadu_epi8(a.as_ptr()); + let m = 0b11101000_11001010; + _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a); + let e = &[ + 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, + ]; + let e = _mm_loadu_epi8(e.as_ptr()); + assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_madd_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_madd_epi16(a, b); + let e = _mm512_set1_epi32(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_madd_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mask_madd_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b); + let e = _mm512_set_epi32( + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 2, + 2, + 2, + 2, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_madd_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_maskz_madd_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_madd_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_mask_madd_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b); + let e = _mm256_set_epi32( + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 1 << 16 | 1, + 2, + 2, + 2, + 2, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_madd_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_maskz_madd_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_madd_epi16(0b00001111, a, b); + let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_madd_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_mask_madd_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_madd_epi16(a, 0b00001111, a, b); + let e = _mm_set_epi32(2, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_madd_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_maskz_madd_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_madd_epi16(0b00001111, a, b); + let e = _mm_set_epi32(2, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maddubs_epi16() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let r = _mm512_maddubs_epi16(a, b); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_maddubs_epi16() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let src = _mm512_set1_epi16(1); + let r = _mm512_mask_maddubs_epi16(src, 0, a, b); + assert_eq_m512i(r, src); + let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_maddubs_epi16() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let r = _mm512_maskz_maddubs_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_maddubs_epi16() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let src = _mm256_set1_epi16(1); + let r = _mm256_mask_maddubs_epi16(src, 0, a, b); + assert_eq_m256i(r, src); + let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_maddubs_epi16() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let r = _mm256_maskz_maddubs_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_maddubs_epi16() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let src = _mm_set1_epi16(1); + let r = _mm_mask_maddubs_epi16(src, 0, a, b); + assert_eq_m128i(r, src); + let r = _mm_mask_add_epi16(src, 0b00000001, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_maddubs_epi16() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let r = _mm_maskz_maddubs_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_maddubs_epi16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_packs_epi32() { + let a = _mm512_set1_epi32(i32::MAX); + let b = _mm512_set1_epi32(1); + let r = _mm512_packs_epi32(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, + 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_packs_epi32() { + let a = _mm512_set1_epi32(i32::MAX); + let b = _mm512_set1_epi32(1 << 16 | 1); + let r = _mm512_mask_packs_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_packs_epi32() { + let a = _mm512_set1_epi32(i32::MAX); + let b = _mm512_set1_epi32(1); + let r = _mm512_maskz_packs_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_packs_epi32() { + let a = _mm256_set1_epi32(i32::MAX); + let b = _mm256_set1_epi32(1 << 16 | 1); + let r = _mm256_mask_packs_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_packs_epi32() { + let a = _mm256_set1_epi32(i32::MAX); + let b = _mm256_set1_epi32(1); + let r = _mm256_maskz_packs_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_packs_epi32() { + let a = _mm_set1_epi32(i32::MAX); + let b = _mm_set1_epi32(1 << 16 | 1); + let r = _mm_mask_packs_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_packs_epi32(b, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_packs_epi32() { + let a = _mm_set1_epi32(i32::MAX); + let b = _mm_set1_epi32(1); + let r = _mm_maskz_packs_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_packs_epi32(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_packs_epi16() { + let a = _mm512_set1_epi16(i16::MAX); + let b = _mm512_set1_epi16(1); + let r = _mm512_packs_epi16(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, + 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, + 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, + 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_packs_epi16() { + let a = _mm512_set1_epi16(i16::MAX); + let b = _mm512_set1_epi16(1 << 8 | 1); + let r = _mm512_mask_packs_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_packs_epi16( + b, + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_packs_epi16() { + let a = _mm512_set1_epi16(i16::MAX); + let b = _mm512_set1_epi16(1); + let r = _mm512_maskz_packs_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_packs_epi16( + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_packs_epi16() { + let a = _mm256_set1_epi16(i16::MAX); + let b = _mm256_set1_epi16(1 << 8 | 1); + let r = _mm256_mask_packs_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_packs_epi16() { + let a = _mm256_set1_epi16(i16::MAX); + let b = _mm256_set1_epi16(1); + let r = _mm256_maskz_packs_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_packs_epi16() { + let a = _mm_set1_epi16(i16::MAX); + let b = _mm_set1_epi16(1 << 8 | 1); + let r = _mm_mask_packs_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_packs_epi16() { + let a = _mm_set1_epi16(i16::MAX); + let b = _mm_set1_epi16(1); + let r = _mm_maskz_packs_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_packus_epi32() { + let a = _mm512_set1_epi32(-1); + let b = _mm512_set1_epi32(1); + let r = _mm512_packus_epi32(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_packus_epi32() { + let a = _mm512_set1_epi32(-1); + let b = _mm512_set1_epi32(1 << 16 | 1); + let r = _mm512_mask_packus_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_packus_epi32() { + let a = _mm512_set1_epi32(-1); + let b = _mm512_set1_epi32(1); + let r = _mm512_maskz_packus_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_packus_epi32() { + let a = _mm256_set1_epi32(-1); + let b = _mm256_set1_epi32(1 << 16 | 1); + let r = _mm256_mask_packus_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_packus_epi32() { + let a = _mm256_set1_epi32(-1); + let b = _mm256_set1_epi32(1); + let r = _mm256_maskz_packus_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_packus_epi32() { + let a = _mm_set1_epi32(-1); + let b = _mm_set1_epi32(1 << 16 | 1); + let r = _mm_mask_packus_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_packus_epi32(b, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_packus_epi32() { + let a = _mm_set1_epi32(-1); + let b = _mm_set1_epi32(1); + let r = _mm_maskz_packus_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_packus_epi32(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_packus_epi16() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(1); + let r = _mm512_packus_epi16(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_packus_epi16() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(1 << 8 | 1); + let r = _mm512_mask_packus_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_packus_epi16( + b, + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_packus_epi16() { + let a = _mm512_set1_epi16(-1); + let b = _mm512_set1_epi16(1); + let r = _mm512_maskz_packus_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_packus_epi16( + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_packus_epi16() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(1 << 8 | 1); + let r = _mm256_mask_packus_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_packus_epi16() { + let a = _mm256_set1_epi16(-1); + let b = _mm256_set1_epi16(1); + let r = _mm256_maskz_packus_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_packus_epi16() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(1 << 8 | 1); + let r = _mm_mask_packus_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b); + let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_packus_epi16() { + let a = _mm_set1_epi16(-1); + let b = _mm_set1_epi16(1); + let r = _mm_maskz_packus_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_avg_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_avg_epu16(a, b); + let e = _mm512_set1_epi16(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_avg_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_mask_avg_epu16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_avg_epu16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1); + let r = _mm512_maskz_avg_epu16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_avg_epu16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_mask_avg_epu16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b); + let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_avg_epu16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1); + let r = _mm256_maskz_avg_epu16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_avg_epu16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_mask_avg_epu16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_avg_epu16(a, 0b00001111, a, b); + let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_avg_epu16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1); + let r = _mm_maskz_avg_epu16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_avg_epu16(0b00001111, a, b); + let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_avg_epu8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let r = _mm512_avg_epu8(a, b); + let e = _mm512_set1_epi8(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_avg_epu8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let r = _mm512_mask_avg_epu8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_avg_epu8( + a, + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_avg_epu8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let r = _mm512_maskz_avg_epu8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_avg_epu8( + 0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_avg_epu8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let r = _mm256_mask_avg_epu8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_avg_epu8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let r = _mm256_maskz_avg_epu8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_avg_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let r = _mm_mask_avg_epu8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b); + let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_avg_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let r = _mm_maskz_avg_epu8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_sll_epi16() { + let a = _mm512_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm512_sll_epi16(a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_sll_epi16() { + let a = _mm512_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm512_mask_sll_epi16(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_sll_epi16() { + let a = _mm512_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm512_maskz_sll_epi16(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_sll_epi16() { + let a = _mm256_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm256_mask_sll_epi16(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_sll_epi16() { + let a = _mm256_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm256_maskz_sll_epi16(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_sll_epi16() { + let a = _mm_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm_mask_sll_epi16(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sll_epi16(a, 0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_sll_epi16() { + let a = _mm_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm_maskz_sll_epi16(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sll_epi16(0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_slli_epi16() { + let a = _mm512_set1_epi16(1 << 15); + let r = _mm512_slli_epi16::<1>(a); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_slli_epi16() { + let a = _mm512_set1_epi16(1 << 15); + let r = _mm512_mask_slli_epi16::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_slli_epi16() { + let a = _mm512_set1_epi16(1 << 15); + let r = _mm512_maskz_slli_epi16::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_slli_epi16() { + let a = _mm256_set1_epi16(1 << 15); + let r = _mm256_mask_slli_epi16::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_slli_epi16() { + let a = _mm256_set1_epi16(1 << 15); + let r = _mm256_maskz_slli_epi16::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_slli_epi16() { + let a = _mm_set1_epi16(1 << 15); + let r = _mm_mask_slli_epi16::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_slli_epi16() { + let a = _mm_set1_epi16(1 << 15); + let r = _mm_maskz_slli_epi16::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_slli_epi16::<1>(0b11111111, a); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_sllv_epi16() { + let a = _mm512_set1_epi16(1 << 15); + let count = _mm512_set1_epi16(2); + let r = _mm512_sllv_epi16(a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_sllv_epi16() { + let a = _mm512_set1_epi16(1 << 15); + let count = _mm512_set1_epi16(2); + let r = _mm512_mask_sllv_epi16(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_sllv_epi16() { + let a = _mm512_set1_epi16(1 << 15); + let count = _mm512_set1_epi16(2); + let r = _mm512_maskz_sllv_epi16(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_sllv_epi16() { + let a = _mm256_set1_epi16(1 << 15); + let count = _mm256_set1_epi16(2); + let r = _mm256_sllv_epi16(a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_sllv_epi16() { + let a = _mm256_set1_epi16(1 << 15); + let count = _mm256_set1_epi16(2); + let r = _mm256_mask_sllv_epi16(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_sllv_epi16() { + let a = _mm256_set1_epi16(1 << 15); + let count = _mm256_set1_epi16(2); + let r = _mm256_maskz_sllv_epi16(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_sllv_epi16() { + let a = _mm_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm_sllv_epi16(a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_sllv_epi16() { + let a = _mm_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm_mask_sllv_epi16(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_sllv_epi16() { + let a = _mm_set1_epi16(1 << 15); + let count = _mm_set1_epi16(2); + let r = _mm_maskz_sllv_epi16(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sllv_epi16(0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_srl_epi16() { + let a = _mm512_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm512_srl_epi16(a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_srl_epi16() { + let a = _mm512_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm512_mask_srl_epi16(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_srl_epi16() { + let a = _mm512_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm512_maskz_srl_epi16(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_srl_epi16() { + let a = _mm256_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm256_mask_srl_epi16(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_srl_epi16() { + let a = _mm256_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm256_maskz_srl_epi16(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_srl_epi16() { + let a = _mm_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm_mask_srl_epi16(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srl_epi16(a, 0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_srl_epi16() { + let a = _mm_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm_maskz_srl_epi16(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srl_epi16(0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_srli_epi16() { + let a = _mm512_set1_epi16(1 << 1); + let r = _mm512_srli_epi16::<2>(a); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_srli_epi16() { + let a = _mm512_set1_epi16(1 << 1); + let r = _mm512_mask_srli_epi16::<2>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_srli_epi16() { + let a = _mm512_set1_epi16(1 << 1); + let r = _mm512_maskz_srli_epi16::<2>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_srli_epi16() { + let a = _mm256_set1_epi16(1 << 1); + let r = _mm256_mask_srli_epi16::<2>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_srli_epi16() { + let a = _mm256_set1_epi16(1 << 1); + let r = _mm256_maskz_srli_epi16::<2>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_srli_epi16() { + let a = _mm_set1_epi16(1 << 1); + let r = _mm_mask_srli_epi16::<2>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_srli_epi16() { + let a = _mm_set1_epi16(1 << 1); + let r = _mm_maskz_srli_epi16::<2>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srli_epi16::<2>(0b11111111, a); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_srlv_epi16() { + let a = _mm512_set1_epi16(1 << 1); + let count = _mm512_set1_epi16(2); + let r = _mm512_srlv_epi16(a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_srlv_epi16() { + let a = _mm512_set1_epi16(1 << 1); + let count = _mm512_set1_epi16(2); + let r = _mm512_mask_srlv_epi16(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_srlv_epi16() { + let a = _mm512_set1_epi16(1 << 1); + let count = _mm512_set1_epi16(2); + let r = _mm512_maskz_srlv_epi16(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_srlv_epi16() { + let a = _mm256_set1_epi16(1 << 1); + let count = _mm256_set1_epi16(2); + let r = _mm256_srlv_epi16(a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_srlv_epi16() { + let a = _mm256_set1_epi16(1 << 1); + let count = _mm256_set1_epi16(2); + let r = _mm256_mask_srlv_epi16(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_srlv_epi16() { + let a = _mm256_set1_epi16(1 << 1); + let count = _mm256_set1_epi16(2); + let r = _mm256_maskz_srlv_epi16(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_srlv_epi16() { + let a = _mm_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm_srlv_epi16(a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_srlv_epi16() { + let a = _mm_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm_mask_srlv_epi16(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_srlv_epi16() { + let a = _mm_set1_epi16(1 << 1); + let count = _mm_set1_epi16(2); + let r = _mm_maskz_srlv_epi16(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srlv_epi16(0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_sra_epi16() { + let a = _mm512_set1_epi16(8); + let count = _mm_set1_epi16(1); + let r = _mm512_sra_epi16(a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_sra_epi16() { + let a = _mm512_set1_epi16(8); + let count = _mm_set1_epi16(1); + let r = _mm512_mask_sra_epi16(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_sra_epi16() { + let a = _mm512_set1_epi16(8); + let count = _mm_set1_epi16(1); + let r = _mm512_maskz_sra_epi16(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_sra_epi16() { + let a = _mm256_set1_epi16(8); + let count = _mm_set1_epi16(1); + let r = _mm256_mask_sra_epi16(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_sra_epi16() { + let a = _mm256_set1_epi16(8); + let count = _mm_set1_epi16(1); + let r = _mm256_maskz_sra_epi16(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_sra_epi16() { + let a = _mm_set1_epi16(8); + let count = _mm_set1_epi16(1); + let r = _mm_mask_sra_epi16(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sra_epi16(a, 0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_sra_epi16() { + let a = _mm_set1_epi16(8); + let count = _mm_set1_epi16(1); + let r = _mm_maskz_sra_epi16(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sra_epi16(0b11111111, a, count); + let e = _mm_set1_epi16(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_srai_epi16() { + let a = _mm512_set1_epi16(8); + let r = _mm512_srai_epi16::<2>(a); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_srai_epi16() { + let a = _mm512_set1_epi16(8); + let r = _mm512_mask_srai_epi16::<2>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_srai_epi16() { + let a = _mm512_set1_epi16(8); + let r = _mm512_maskz_srai_epi16::<2>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_srai_epi16() { + let a = _mm256_set1_epi16(8); + let r = _mm256_mask_srai_epi16::<2>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_srai_epi16() { + let a = _mm256_set1_epi16(8); + let r = _mm256_maskz_srai_epi16::<2>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a); + let e = _mm256_set1_epi16(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_srai_epi16() { + let a = _mm_set1_epi16(8); + let r = _mm_mask_srai_epi16::<2>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a); + let e = _mm_set1_epi16(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_srai_epi16() { + let a = _mm_set1_epi16(8); + let r = _mm_maskz_srai_epi16::<2>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srai_epi16::<2>(0b11111111, a); + let e = _mm_set1_epi16(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_srav_epi16() { + let a = _mm512_set1_epi16(8); + let count = _mm512_set1_epi16(2); + let r = _mm512_srav_epi16(a, count); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_srav_epi16() { + let a = _mm512_set1_epi16(8); + let count = _mm512_set1_epi16(2); + let r = _mm512_mask_srav_epi16(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_srav_epi16() { + let a = _mm512_set1_epi16(8); + let count = _mm512_set1_epi16(2); + let r = _mm512_maskz_srav_epi16(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_srav_epi16() { + let a = _mm256_set1_epi16(8); + let count = _mm256_set1_epi16(2); + let r = _mm256_srav_epi16(a, count); + let e = _mm256_set1_epi16(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_srav_epi16() { + let a = _mm256_set1_epi16(8); + let count = _mm256_set1_epi16(2); + let r = _mm256_mask_srav_epi16(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_srav_epi16() { + let a = _mm256_set1_epi16(8); + let count = _mm256_set1_epi16(2); + let r = _mm256_maskz_srav_epi16(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count); + let e = _mm256_set1_epi16(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_srav_epi16() { + let a = _mm_set1_epi16(8); + let count = _mm_set1_epi16(2); + let r = _mm_srav_epi16(a, count); + let e = _mm_set1_epi16(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_srav_epi16() { + let a = _mm_set1_epi16(8); + let count = _mm_set1_epi16(2); + let r = _mm_mask_srav_epi16(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srav_epi16(a, 0b11111111, a, count); + let e = _mm_set1_epi16(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_srav_epi16() { + let a = _mm_set1_epi16(8); + let count = _mm_set1_epi16(2); + let r = _mm_maskz_srav_epi16(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srav_epi16(0b11111111, a, count); + let e = _mm_set1_epi16(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_permutex2var_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #[rustfmt::skip] + let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, + 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); + let b = _mm512_set1_epi16(100); + let r = _mm512_permutex2var_epi16(a, idx, b); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, + 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_permutex2var_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #[rustfmt::skip] + let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, + 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); + let b = _mm512_set1_epi16(100); + let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, + 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_permutex2var_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #[rustfmt::skip] + let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, + 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); + let b = _mm512_set1_epi16(100); + let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, + 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask2_permutex2var_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #[rustfmt::skip] + let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, + 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); + let b = _mm512_set1_epi16(100); + let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b); + assert_eq_m512i(r, idx); + let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, + 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_permutex2var_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); + let b = _mm256_set1_epi16(100); + let r = _mm256_permutex2var_epi16(a, idx, b); + let e = _mm256_set_epi16( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_permutex2var_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); + let b = _mm256_set1_epi16(100); + let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b); + let e = _mm256_set_epi16( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_permutex2var_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); + let b = _mm256_set1_epi16(100); + let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b); + let e = _mm256_set_epi16( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask2_permutex2var_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); + let b = _mm256_set1_epi16(100); + let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b); + assert_eq_m256i(r, idx); + let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b); + #[rustfmt::skip] + let e = _mm256_set_epi16( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_permutex2var_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm_set1_epi16(100); + let r = _mm_permutex2var_epi16(a, idx, b); + let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_permutex2var_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm_set1_epi16(100); + let r = _mm_mask_permutex2var_epi16(a, 0, idx, b); + assert_eq_m128i(r, a); + let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b); + let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_permutex2var_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm_set1_epi16(100); + let r = _mm_maskz_permutex2var_epi16(0, a, idx, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b); + let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask2_permutex2var_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm_set1_epi16(100); + let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b); + assert_eq_m128i(r, idx); + let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b); + let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_permutexvar_epi16() { + let idx = _mm512_set1_epi16(1); + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm512_permutexvar_epi16(idx, a); + let e = _mm512_set1_epi16(30); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_permutexvar_epi16() { + let idx = _mm512_set1_epi16(1); + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a); + let e = _mm512_set1_epi16(30); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_permutexvar_epi16() { + let idx = _mm512_set1_epi16(1); + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm512_maskz_permutexvar_epi16(0, idx, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a); + let e = _mm512_set1_epi16(30); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_permutexvar_epi16() { + let idx = _mm256_set1_epi16(1); + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_permutexvar_epi16(idx, a); + let e = _mm256_set1_epi16(14); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_permutexvar_epi16() { + let idx = _mm256_set1_epi16(1); + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a); + let e = _mm256_set1_epi16(14); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_permutexvar_epi16() { + let idx = _mm256_set1_epi16(1); + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_permutexvar_epi16(0, idx, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a); + let e = _mm256_set1_epi16(14); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_permutexvar_epi16() { + let idx = _mm_set1_epi16(1); + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_permutexvar_epi16(idx, a); + let e = _mm_set1_epi16(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_permutexvar_epi16() { + let idx = _mm_set1_epi16(1); + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_mask_permutexvar_epi16(a, 0, idx, a); + assert_eq_m128i(r, a); + let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a); + let e = _mm_set1_epi16(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_permutexvar_epi16() { + let idx = _mm_set1_epi16(1); + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_maskz_permutexvar_epi16(0, idx, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a); + let e = _mm_set1_epi16(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_blend_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(2); + let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_blend_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(2); + let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b); + let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_blend_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(2); + let r = _mm_mask_blend_epi16(0b11110000, a, b); + let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_blend_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(2); + let r = _mm512_mask_blend_epi8( + 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_blend_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(2); + let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_blend_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(2); + let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b); + let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_broadcastw_epi16() { + let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_broadcastw_epi16(a); + let e = _mm512_set1_epi16(24); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_broadcastw_epi16() { + let src = _mm512_set1_epi16(1); + let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_mask_broadcastw_epi16(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(24); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_broadcastw_epi16() { + let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_maskz_broadcastw_epi16(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(24); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_broadcastw_epi16() { + let src = _mm256_set1_epi16(1); + let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm256_mask_broadcastw_epi16(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(24); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_broadcastw_epi16() { + let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm256_maskz_broadcastw_epi16(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a); + let e = _mm256_set1_epi16(24); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_broadcastw_epi16() { + let src = _mm_set1_epi16(1); + let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm_mask_broadcastw_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a); + let e = _mm_set1_epi16(24); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_broadcastw_epi16() { + let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm_maskz_broadcastw_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_broadcastw_epi16(0b11111111, a); + let e = _mm_set1_epi16(24); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_broadcastb_epi8() { + let a = _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_broadcastb_epi8(a); + let e = _mm512_set1_epi8(32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_broadcastb_epi8() { + let src = _mm512_set1_epi8(1); + let a = _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_mask_broadcastb_epi8(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_broadcastb_epi8( + src, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + ); + let e = _mm512_set1_epi8(32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_broadcastb_epi8() { + let a = _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_maskz_broadcastb_epi8(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_broadcastb_epi8( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + ); + let e = _mm512_set1_epi8(32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_broadcastb_epi8() { + let src = _mm256_set1_epi8(1); + let a = _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm256_mask_broadcastb_epi8(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(32); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_broadcastb_epi8() { + let a = _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm256_maskz_broadcastb_epi8(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(32); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_broadcastb_epi8() { + let src = _mm_set1_epi8(1); + let a = _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm_mask_broadcastb_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a); + let e = _mm_set1_epi8(32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_broadcastb_epi8() { + let a = _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm_maskz_broadcastb_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a); + let e = _mm_set1_epi8(32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_unpackhi_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + let r = _mm512_unpackhi_epi16(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12, + 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_unpackhi_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + let r = _mm512_mask_unpackhi_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12, + 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_unpackhi_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + let r = _mm512_maskz_unpackhi_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12, + 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_unpackhi_epi16() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm256_set_epi16( + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + ); + let r = _mm256_mask_unpackhi_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b); + let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_unpackhi_epi16() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm256_set_epi16( + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + ); + let r = _mm256_maskz_unpackhi_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b); + let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_unpackhi_epi16() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); + let r = _mm_mask_unpackhi_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b); + let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_unpackhi_epi16() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); + let r = _mm_maskz_unpackhi_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b); + let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_unpackhi_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + #[rustfmt::skip] + let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); + let r = _mm512_unpackhi_epi8(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, + 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24, + 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40, + 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_unpackhi_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + #[rustfmt::skip] + let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); + let r = _mm512_mask_unpackhi_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpackhi_epi8( + a, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, + 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24, + 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40, + 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_unpackhi_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + #[rustfmt::skip] + let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); + let r = _mm512_maskz_unpackhi_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpackhi_epi8( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, + 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24, + 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40, + 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_unpackhi_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); + let r = _mm256_mask_unpackhi_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, + 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_unpackhi_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); + let r = _mm256_maskz_unpackhi_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, + 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_unpackhi_epi8() { + let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm_set_epi8( + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + ); + let r = _mm_mask_unpackhi_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b); + let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_unpackhi_epi8() { + let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm_set_epi8( + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + ); + let r = _mm_maskz_unpackhi_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b); + let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_unpacklo_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + let r = _mm512_unpacklo_epi16(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16, + 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_unpacklo_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + let r = _mm512_mask_unpacklo_epi16(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16, + 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_unpacklo_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + let r = _mm512_maskz_unpacklo_epi16(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16, + 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_unpacklo_epi16() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm256_set_epi16( + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + ); + let r = _mm256_mask_unpacklo_epi16(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b); + let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_unpacklo_epi16() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm256_set_epi16( + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + ); + let r = _mm256_maskz_unpacklo_epi16(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b); + let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_unpacklo_epi16() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); + let r = _mm_mask_unpacklo_epi16(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b); + let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_unpacklo_epi16() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); + let r = _mm_maskz_unpacklo_epi16(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b); + let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_unpacklo_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + #[rustfmt::skip] + let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); + let r = _mm512_unpacklo_epi8(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, + 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32, + 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48, + 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_unpacklo_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + #[rustfmt::skip] + let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); + let r = _mm512_mask_unpacklo_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpacklo_epi8( + a, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, + 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32, + 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48, + 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_unpacklo_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + #[rustfmt::skip] + let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); + let r = _mm512_maskz_unpacklo_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpacklo_epi8( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, + 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32, + 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48, + 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_unpacklo_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); + let r = _mm256_mask_unpacklo_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, + 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_unpacklo_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + #[rustfmt::skip] + let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); + let r = _mm256_maskz_unpacklo_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, + 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_unpacklo_epi8() { + let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm_set_epi8( + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + ); + let r = _mm_mask_unpacklo_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b); + let e = _mm_set_epi8( + 73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_unpacklo_epi8() { + let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm_set_epi8( + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + ); + let r = _mm_maskz_unpacklo_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b); + let e = _mm_set_epi8( + 73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_mov_epi16() { + let src = _mm512_set1_epi16(1); + let a = _mm512_set1_epi16(2); + let r = _mm512_mask_mov_epi16(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_mov_epi16() { + let a = _mm512_set1_epi16(2); + let r = _mm512_maskz_mov_epi16(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_mov_epi16() { + let src = _mm256_set1_epi16(1); + let a = _mm256_set1_epi16(2); + let r = _mm256_mask_mov_epi16(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_mov_epi16() { + let a = _mm256_set1_epi16(2); + let r = _mm256_maskz_mov_epi16(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_mov_epi16() { + let src = _mm_set1_epi16(1); + let a = _mm_set1_epi16(2); + let r = _mm_mask_mov_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_mov_epi16(src, 0b11111111, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_mov_epi16() { + let a = _mm_set1_epi16(2); + let r = _mm_maskz_mov_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mov_epi16(0b11111111, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_mov_epi8() { + let src = _mm512_set1_epi8(1); + let a = _mm512_set1_epi8(2); + let r = _mm512_mask_mov_epi8(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_mov_epi8( + src, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + ); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_mov_epi8() { + let a = _mm512_set1_epi8(2); + let r = _mm512_maskz_mov_epi8(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mov_epi8( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + ); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_mov_epi8() { + let src = _mm256_set1_epi8(1); + let a = _mm256_set1_epi8(2); + let r = _mm256_mask_mov_epi8(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_mov_epi8() { + let a = _mm256_set1_epi8(2); + let r = _mm256_maskz_mov_epi8(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_mov_epi8() { + let src = _mm_set1_epi8(1); + let a = _mm_set1_epi8(2); + let r = _mm_mask_mov_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_mov_epi8() { + let a = _mm_set1_epi8(2); + let r = _mm_maskz_mov_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mov_epi8(0b11111111_11111111, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_set1_epi16() { + let src = _mm512_set1_epi16(2); + let a: i16 = 11; + let r = _mm512_mask_set1_epi16(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_set1_epi16() { + let a: i16 = 11; + let r = _mm512_maskz_set1_epi16(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_set1_epi16() { + let src = _mm256_set1_epi16(2); + let a: i16 = 11; + let r = _mm256_mask_set1_epi16(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_set1_epi16() { + let a: i16 = 11; + let r = _mm256_maskz_set1_epi16(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a); + let e = _mm256_set1_epi16(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_set1_epi16() { + let src = _mm_set1_epi16(2); + let a: i16 = 11; + let r = _mm_mask_set1_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_set1_epi16(src, 0b11111111, a); + let e = _mm_set1_epi16(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_set1_epi16() { + let a: i16 = 11; + let r = _mm_maskz_set1_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_set1_epi16(0b11111111, a); + let e = _mm_set1_epi16(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_set1_epi8() { + let src = _mm512_set1_epi8(2); + let a: i8 = 11; + let r = _mm512_mask_set1_epi8(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_set1_epi8( + src, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + ); + let e = _mm512_set1_epi8(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_set1_epi8() { + let a: i8 = 11; + let r = _mm512_maskz_set1_epi8(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_set1_epi8( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + ); + let e = _mm512_set1_epi8(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_set1_epi8() { + let src = _mm256_set1_epi8(2); + let a: i8 = 11; + let r = _mm256_mask_set1_epi8(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_set1_epi8() { + let a: i8 = 11; + let r = _mm256_maskz_set1_epi8(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_set1_epi8() { + let src = _mm_set1_epi8(2); + let a: i8 = 11; + let r = _mm_mask_set1_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a); + let e = _mm_set1_epi8(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_set1_epi8() { + let a: i8 = 11; + let r = _mm_maskz_set1_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_set1_epi8(0b11111111_11111111, a); + let e = _mm_set1_epi8(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_shufflelo_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, + 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28, + ); + let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_shufflelo_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>( + a, + 0b11111111_11111111_11111111_11111111, + a, + ); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, + 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_shufflelo_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = + _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, + 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_shufflelo_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a); + let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_shufflelo_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a); + let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_shufflelo_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a); + let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_shufflelo_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a); + let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_shufflehi_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, + 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31, + ); + let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_shufflehi_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>( + a, + 0b11111111_11111111_11111111_11111111, + a, + ); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, + 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_shufflehi_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = + _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, + 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_shufflehi_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a); + let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_shufflehi_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a); + let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_shufflehi_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a); + let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_shufflehi_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a); + let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_shuffle_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let b = _mm512_set1_epi8(1); + let r = _mm512_shuffle_epi8(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_shuffle_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let b = _mm512_set1_epi8(1); + let r = _mm512_mask_shuffle_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shuffle_epi8( + a, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_shuffle_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let b = _mm512_set1_epi8(1); + let r = _mm512_maskz_shuffle_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shuffle_epi8( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_shuffle_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let b = _mm256_set1_epi8(1); + let r = _mm256_mask_shuffle_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let b = _mm256_set1_epi8(1); + let r = _mm256_maskz_shuffle_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_shuffle_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set1_epi8(1); + let r = _mm_mask_shuffle_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b); + let e = _mm_set_epi8( + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_shuffle_epi8() { + #[rustfmt::skip] + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_set1_epi8(1); + let r = _mm_maskz_shuffle_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b); + let e = _mm_set_epi8( + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_test_epi16_mask() { + let a = _mm512_set1_epi16(1 << 0); + let b = _mm512_set1_epi16(1 << 0 | 1 << 1); + let r = _mm512_test_epi16_mask(a, b); + let e: __mmask32 = 0b11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_test_epi16_mask() { + let a = _mm512_set1_epi16(1 << 0); + let b = _mm512_set1_epi16(1 << 0 | 1 << 1); + let r = _mm512_mask_test_epi16_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b); + let e: __mmask32 = 0b11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_test_epi16_mask() { + let a = _mm256_set1_epi16(1 << 0); + let b = _mm256_set1_epi16(1 << 0 | 1 << 1); + let r = _mm256_test_epi16_mask(a, b); + let e: __mmask16 = 0b11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_test_epi16_mask() { + let a = _mm256_set1_epi16(1 << 0); + let b = _mm256_set1_epi16(1 << 0 | 1 << 1); + let r = _mm256_mask_test_epi16_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b); + let e: __mmask16 = 0b11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_test_epi16_mask() { + let a = _mm_set1_epi16(1 << 0); + let b = _mm_set1_epi16(1 << 0 | 1 << 1); + let r = _mm_test_epi16_mask(a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_test_epi16_mask() { + let a = _mm_set1_epi16(1 << 0); + let b = _mm_set1_epi16(1 << 0 | 1 << 1); + let r = _mm_mask_test_epi16_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_test_epi16_mask(0b11111111, a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_test_epi8_mask() { + let a = _mm512_set1_epi8(1 << 0); + let b = _mm512_set1_epi8(1 << 0 | 1 << 1); + let r = _mm512_test_epi8_mask(a, b); + let e: __mmask64 = + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_test_epi8_mask() { + let a = _mm512_set1_epi8(1 << 0); + let b = _mm512_set1_epi8(1 << 0 | 1 << 1); + let r = _mm512_mask_test_epi8_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_test_epi8_mask( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + let e: __mmask64 = + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_test_epi8_mask() { + let a = _mm256_set1_epi8(1 << 0); + let b = _mm256_set1_epi8(1 << 0 | 1 << 1); + let r = _mm256_test_epi8_mask(a, b); + let e: __mmask32 = 0b11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_test_epi8_mask() { + let a = _mm256_set1_epi8(1 << 0); + let b = _mm256_set1_epi8(1 << 0 | 1 << 1); + let r = _mm256_mask_test_epi8_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b); + let e: __mmask32 = 0b11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_test_epi8_mask() { + let a = _mm_set1_epi8(1 << 0); + let b = _mm_set1_epi8(1 << 0 | 1 << 1); + let r = _mm_test_epi8_mask(a, b); + let e: __mmask16 = 0b11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_test_epi8_mask() { + let a = _mm_set1_epi8(1 << 0); + let b = _mm_set1_epi8(1 << 0 | 1 << 1); + let r = _mm_mask_test_epi8_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b); + let e: __mmask16 = 0b11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_testn_epi16_mask() { + let a = _mm512_set1_epi16(1 << 0); + let b = _mm512_set1_epi16(1 << 0 | 1 << 1); + let r = _mm512_testn_epi16_mask(a, b); + let e: __mmask32 = 0b00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_testn_epi16_mask() { + let a = _mm512_set1_epi16(1 << 0); + let b = _mm512_set1_epi16(1 << 0 | 1 << 1); + let r = _mm512_mask_testn_epi16_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b); + let e: __mmask32 = 0b00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_testn_epi16_mask() { + let a = _mm256_set1_epi16(1 << 0); + let b = _mm256_set1_epi16(1 << 0 | 1 << 1); + let r = _mm256_testn_epi16_mask(a, b); + let e: __mmask16 = 0b00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_testn_epi16_mask() { + let a = _mm256_set1_epi16(1 << 0); + let b = _mm256_set1_epi16(1 << 0 | 1 << 1); + let r = _mm256_mask_testn_epi16_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b); + let e: __mmask16 = 0b00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_testn_epi16_mask() { + let a = _mm_set1_epi16(1 << 0); + let b = _mm_set1_epi16(1 << 0 | 1 << 1); + let r = _mm_testn_epi16_mask(a, b); + let e: __mmask8 = 0b00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_testn_epi16_mask() { + let a = _mm_set1_epi16(1 << 0); + let b = _mm_set1_epi16(1 << 0 | 1 << 1); + let r = _mm_mask_testn_epi16_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_testn_epi16_mask(0b11111111, a, b); + let e: __mmask8 = 0b00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_testn_epi8_mask() { + let a = _mm512_set1_epi8(1 << 0); + let b = _mm512_set1_epi8(1 << 0 | 1 << 1); + let r = _mm512_testn_epi8_mask(a, b); + let e: __mmask64 = + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_testn_epi8_mask() { + let a = _mm512_set1_epi8(1 << 0); + let b = _mm512_set1_epi8(1 << 0 | 1 << 1); + let r = _mm512_mask_testn_epi8_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_testn_epi8_mask( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + let e: __mmask64 = + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_testn_epi8_mask() { + let a = _mm256_set1_epi8(1 << 0); + let b = _mm256_set1_epi8(1 << 0 | 1 << 1); + let r = _mm256_testn_epi8_mask(a, b); + let e: __mmask32 = 0b00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_testn_epi8_mask() { + let a = _mm256_set1_epi8(1 << 0); + let b = _mm256_set1_epi8(1 << 0 | 1 << 1); + let r = _mm256_mask_testn_epi8_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b); + let e: __mmask32 = 0b00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_testn_epi8_mask() { + let a = _mm_set1_epi8(1 << 0); + let b = _mm_set1_epi8(1 << 0 | 1 << 1); + let r = _mm_testn_epi8_mask(a, b); + let e: __mmask16 = 0b00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_testn_epi8_mask() { + let a = _mm_set1_epi8(1 << 0); + let b = _mm_set1_epi8(1 << 0 | 1 << 1); + let r = _mm_mask_testn_epi8_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b); + let e: __mmask16 = 0b00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_store_mask64() { + let a: __mmask64 = + 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000; + let mut r = 0; + _store_mask64(&mut r, a); + assert_eq!(r, a); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_store_mask32() { + let a: __mmask32 = 0b11111111_00000000_11111111_00000000; + let mut r = 0; + _store_mask32(&mut r, a); + assert_eq!(r, a); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_load_mask64() { + let p: __mmask64 = + 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000; + let r = _load_mask64(&p); + let e: __mmask64 = + 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_load_mask32() { + let p: __mmask32 = 0b11111111_00000000_11111111_00000000; + let r = _load_mask32(&p); + let e: __mmask32 = 0b11111111_00000000_11111111_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_sad_epu8() { + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(4); + let r = _mm512_sad_epu8(a, b); + let e = _mm512_set1_epi64(16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_dbsad_epu8() { + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(4); + let r = _mm512_dbsad_epu8::<0>(a, b); + let e = _mm512_set1_epi16(8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_dbsad_epu8() { + let src = _mm512_set1_epi16(1); + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(4); + let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b); + assert_eq_m512i(r, src); + let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b); + let e = _mm512_set1_epi16(8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_dbsad_epu8() { + let a = _mm512_set1_epi8(2); + let b = _mm512_set1_epi8(4); + let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b); + let e = _mm512_set1_epi16(8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_dbsad_epu8() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_dbsad_epu8::<0>(a, b); + let e = _mm256_set1_epi16(8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_dbsad_epu8() { + let src = _mm256_set1_epi16(1); + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b); + assert_eq_m256i(r, src); + let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b); + let e = _mm256_set1_epi16(8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_dbsad_epu8() { + let a = _mm256_set1_epi8(2); + let b = _mm256_set1_epi8(4); + let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b); + let e = _mm256_set1_epi16(8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_dbsad_epu8() { + let a = _mm_set1_epi8(2); + let b = _mm_set1_epi8(4); + let r = _mm_dbsad_epu8::<0>(a, b); + let e = _mm_set1_epi16(8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_dbsad_epu8() { + let src = _mm_set1_epi16(1); + let a = _mm_set1_epi8(2); + let b = _mm_set1_epi8(4); + let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b); + assert_eq_m128i(r, src); + let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b); + let e = _mm_set1_epi16(8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_dbsad_epu8() { + let a = _mm_set1_epi8(2); + let b = _mm_set1_epi8(4); + let r = _mm_maskz_dbsad_epu8::<0>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b); + let e = _mm_set1_epi16(8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_movepi16_mask() { + let a = _mm512_set1_epi16(1 << 15); + let r = _mm512_movepi16_mask(a); + let e: __mmask32 = 0b11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_movepi16_mask() { + let a = _mm256_set1_epi16(1 << 15); + let r = _mm256_movepi16_mask(a); + let e: __mmask16 = 0b11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_movepi16_mask() { + let a = _mm_set1_epi16(1 << 15); + let r = _mm_movepi16_mask(a); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_movepi8_mask() { + let a = _mm512_set1_epi8(1 << 7); + let r = _mm512_movepi8_mask(a); + let e: __mmask64 = + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_movepi8_mask() { + let a = _mm256_set1_epi8(1 << 7); + let r = _mm256_movepi8_mask(a); + let e: __mmask32 = 0b11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_movepi8_mask() { + let a = _mm_set1_epi8(1 << 7); + let r = _mm_movepi8_mask(a); + let e: __mmask16 = 0b11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_movm_epi16() { + let a: __mmask32 = 0b11111111_11111111_11111111_11111111; + let r = _mm512_movm_epi16(a); + let e = _mm512_set1_epi16( + 1 << 15 + | 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_movm_epi16() { + let a: __mmask16 = 0b11111111_11111111; + let r = _mm256_movm_epi16(a); + let e = _mm256_set1_epi16( + 1 << 15 + | 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_movm_epi16() { + let a: __mmask8 = 0b11111111; + let r = _mm_movm_epi16(a); + let e = _mm_set1_epi16( + 1 << 15 + | 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_movm_epi8() { + let a: __mmask64 = + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; + let r = _mm512_movm_epi8(a); + let e = + _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_movm_epi8() { + let a: __mmask32 = 0b11111111_11111111_11111111_11111111; + let r = _mm256_movm_epi8(a); + let e = + _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_movm_epi8() { + let a: __mmask16 = 0b11111111_11111111; + let r = _mm_movm_epi8(a); + let e = + _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_cvtmask32_u32() { + let a: __mmask32 = 0b11001100_00110011_01100110_10011001; + let r = _cvtmask32_u32(a); + let e: u32 = 0b11001100_00110011_01100110_10011001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_cvtu32_mask32() { + let a: u32 = 0b11001100_00110011_01100110_10011001; + let r = _cvtu32_mask32(a); + let e: __mmask32 = 0b11001100_00110011_01100110_10011001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kadd_mask32() { + let a: __mmask32 = 11; + let b: __mmask32 = 22; + let r = _kadd_mask32(a, b); + let e: __mmask32 = 33; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kadd_mask64() { + let a: __mmask64 = 11; + let b: __mmask64 = 22; + let r = _kadd_mask64(a, b); + let e: __mmask64 = 33; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kand_mask32() { + let a: __mmask32 = 0b11001100_00110011_11001100_00110011; + let b: __mmask32 = 0b11001100_00110011_11001100_00110011; + let r = _kand_mask32(a, b); + let e: __mmask32 = 0b11001100_00110011_11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kand_mask64() { + let a: __mmask64 = + 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; + let b: __mmask64 = + 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; + let r = _kand_mask64(a, b); + let e: __mmask64 = + 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_knot_mask32() { + let a: __mmask32 = 0b11001100_00110011_11001100_00110011; + let r = _knot_mask32(a); + let e: __mmask32 = 0b00110011_11001100_00110011_11001100; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_knot_mask64() { + let a: __mmask64 = + 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; + let r = _knot_mask64(a); + let e: __mmask64 = + 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kandn_mask32() { + let a: __mmask32 = 0b11001100_00110011_11001100_00110011; + let b: __mmask32 = 0b11001100_00110011_11001100_00110011; + let r = _kandn_mask32(a, b); + let e: __mmask32 = 0b00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kandn_mask64() { + let a: __mmask64 = + 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; + let b: __mmask64 = + 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; + let r = _kandn_mask64(a, b); + let e: __mmask64 = + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kor_mask32() { + let a: __mmask32 = 0b00110011_11001100_00110011_11001100; + let b: __mmask32 = 0b11001100_00110011_11001100_00110011; + let r = _kor_mask32(a, b); + let e: __mmask32 = 0b11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kor_mask64() { + let a: __mmask64 = + 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; + let b: __mmask64 = + 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; + let r = _kor_mask64(a, b); + let e: __mmask64 = + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kxor_mask32() { + let a: __mmask32 = 0b00110011_11001100_00110011_11001100; + let b: __mmask32 = 0b11001100_00110011_11001100_00110011; + let r = _kxor_mask32(a, b); + let e: __mmask32 = 0b11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kxor_mask64() { + let a: __mmask64 = + 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; + let b: __mmask64 = + 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; + let r = _kxor_mask64(a, b); + let e: __mmask64 = + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kxnor_mask32() { + let a: __mmask32 = 0b00110011_11001100_00110011_11001100; + let b: __mmask32 = 0b11001100_00110011_11001100_00110011; + let r = _kxnor_mask32(a, b); + let e: __mmask32 = 0b00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kxnor_mask64() { + let a: __mmask64 = + 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; + let b: __mmask64 = + 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; + let r = _kxnor_mask64(a, b); + let e: __mmask64 = + 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kortest_mask32_u8() { + let a: __mmask32 = 0b0110100101101001_0110100101101001; + let b: __mmask32 = 0b1011011010110110_1011011010110110; + let mut all_ones: u8 = 0; + let r = _kortest_mask32_u8(a, b, &mut all_ones); + assert_eq!(r, 0); + assert_eq!(all_ones, 1); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kortest_mask64_u8() { + let a: __mmask64 = 0b0110100101101001_0110100101101001; + let b: __mmask64 = 0b1011011010110110_1011011010110110; + let mut all_ones: u8 = 0; + let r = _kortest_mask64_u8(a, b, &mut all_ones); + assert_eq!(r, 0); + assert_eq!(all_ones, 0); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kortestc_mask32_u8() { + let a: __mmask32 = 0b0110100101101001_0110100101101001; + let b: __mmask32 = 0b1011011010110110_1011011010110110; + let r = _kortestc_mask32_u8(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kortestc_mask64_u8() { + let a: __mmask64 = 0b0110100101101001_0110100101101001; + let b: __mmask64 = 0b1011011010110110_1011011010110110; + let r = _kortestc_mask64_u8(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kortestz_mask32_u8() { + let a: __mmask32 = 0b0110100101101001_0110100101101001; + let b: __mmask32 = 0b1011011010110110_1011011010110110; + let r = _kortestz_mask32_u8(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kortestz_mask64_u8() { + let a: __mmask64 = 0b0110100101101001_0110100101101001; + let b: __mmask64 = 0b1011011010110110_1011011010110110; + let r = _kortestz_mask64_u8(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kshiftli_mask32() { + let a: __mmask32 = 0b0110100101101001_0110100101101001; + let r = _kshiftli_mask32::<3>(a); + let e: __mmask32 = 0b0100101101001011_0100101101001000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kshiftli_mask64() { + let a: __mmask64 = 0b0110100101101001_0110100101101001; + let r = _kshiftli_mask64::<3>(a); + let e: __mmask64 = 0b0110100101101001011_0100101101001000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kshiftri_mask32() { + let a: __mmask32 = 0b0110100101101001_0110100101101001; + let r = _kshiftri_mask32::<3>(a); + let e: __mmask32 = 0b0000110100101101_0010110100101101; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_kshiftri_mask64() { + let a: __mmask64 = 0b0110100101101001011_0100101101001000; + let r = _kshiftri_mask64::<3>(a); + let e: __mmask64 = 0b0110100101101001_0110100101101001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_ktest_mask32_u8() { + let a: __mmask32 = 0b0110100100111100_0110100100111100; + let b: __mmask32 = 0b1001011011000011_1001011011000011; + let mut and_not: u8 = 0; + let r = _ktest_mask32_u8(a, b, &mut and_not); + assert_eq!(r, 1); + assert_eq!(and_not, 0); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_ktestc_mask32_u8() { + let a: __mmask32 = 0b0110100100111100_0110100100111100; + let b: __mmask32 = 0b1001011011000011_1001011011000011; + let r = _ktestc_mask32_u8(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_ktestz_mask32_u8() { + let a: __mmask32 = 0b0110100100111100_0110100100111100; + let b: __mmask32 = 0b1001011011000011_1001011011000011; + let r = _ktestz_mask32_u8(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_ktest_mask64_u8() { + let a: __mmask64 = 0b0110100100111100_0110100100111100; + let b: __mmask64 = 0b1001011011000011_1001011011000011; + let mut and_not: u8 = 0; + let r = _ktest_mask64_u8(a, b, &mut and_not); + assert_eq!(r, 1); + assert_eq!(and_not, 0); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_ktestc_mask64_u8() { + let a: __mmask64 = 0b0110100100111100_0110100100111100; + let b: __mmask64 = 0b1001011011000011_1001011011000011; + let r = _ktestc_mask64_u8(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_ktestz_mask64_u8() { + let a: __mmask64 = 0b0110100100111100_0110100100111100; + let b: __mmask64 = 0b1001011011000011_1001011011000011; + let r = _ktestz_mask64_u8(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_kunpackw() { + let a: u32 = 0x00110011; + let b: u32 = 0x00001011; + let r = _mm512_kunpackw(a, b); + let e: u32 = 0x00111011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_kunpackd() { + let a: u64 = 0x11001100_00110011; + let b: u64 = 0x00101110_00001011; + let r = _mm512_kunpackd(a, b); + let e: u64 = 0x00110011_00001011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cvtepi16_epi8() { + let a = _mm512_set1_epi16(2); + let r = _mm512_cvtepi16_epi8(a); + let e = _mm256_set1_epi8(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cvtepi16_epi8() { + let src = _mm256_set1_epi8(1); + let a = _mm512_set1_epi16(2); + let r = _mm512_mask_cvtepi16_epi8(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_cvtepi16_epi8() { + let a = _mm512_set1_epi16(2); + let r = _mm512_maskz_cvtepi16_epi8(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cvtepi16_epi8() { + let a = _mm256_set1_epi16(2); + let r = _mm256_cvtepi16_epi8(a); + let e = _mm_set1_epi8(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cvtepi16_epi8() { + let src = _mm_set1_epi8(1); + let a = _mm256_set1_epi16(2); + let r = _mm256_mask_cvtepi16_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a); + let e = _mm_set1_epi8(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi16_epi8() { + let a = _mm256_set1_epi16(2); + let r = _mm256_maskz_cvtepi16_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a); + let e = _mm_set1_epi8(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cvtepi16_epi8() { + let a = _mm_set1_epi16(2); + let r = _mm_cvtepi16_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cvtepi16_epi8() { + let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm_set1_epi16(2); + let r = _mm_mask_cvtepi16_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_cvtepi16_epi8() { + let a = _mm_set1_epi16(2); + let r = _mm_maskz_cvtepi16_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi16_epi8(0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cvtsepi16_epi8() { + let a = _mm512_set1_epi16(i16::MAX); + let r = _mm512_cvtsepi16_epi8(a); + let e = _mm256_set1_epi8(i8::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cvtsepi16_epi8() { + let src = _mm256_set1_epi8(1); + let a = _mm512_set1_epi16(i16::MAX); + let r = _mm512_mask_cvtsepi16_epi8(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(i8::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cvtsepi16_epi8() { + let a = _mm256_set1_epi16(i16::MAX); + let r = _mm256_cvtsepi16_epi8(a); + let e = _mm_set1_epi8(i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi16_epi8() { + let src = _mm_set1_epi8(1); + let a = _mm256_set1_epi16(i16::MAX); + let r = _mm256_mask_cvtsepi16_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a); + let e = _mm_set1_epi8(i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi16_epi8() { + let a = _mm256_set1_epi16(i16::MAX); + let r = _mm256_maskz_cvtsepi16_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a); + let e = _mm_set1_epi8(i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cvtsepi16_epi8() { + let a = _mm_set1_epi16(i16::MAX); + let r = _mm_cvtsepi16_epi8(a); + #[rustfmt::skip] + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cvtsepi16_epi8() { + let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm_set1_epi16(i16::MAX); + let r = _mm_mask_cvtsepi16_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi16_epi8() { + let a = _mm_set1_epi16(i16::MAX); + let r = _mm_maskz_cvtsepi16_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_cvtsepi16_epi8() { + let a = _mm512_set1_epi16(i16::MAX); + let r = _mm512_maskz_cvtsepi16_epi8(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(i8::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cvtusepi16_epi8() { + let a = _mm512_set1_epi16(i16::MIN); + let r = _mm512_cvtusepi16_epi8(a); + let e = _mm256_set1_epi8(-1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cvtusepi16_epi8() { + let src = _mm256_set1_epi8(1); + let a = _mm512_set1_epi16(i16::MIN); + let r = _mm512_mask_cvtusepi16_epi8(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(-1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_cvtusepi16_epi8() { + let a = _mm512_set1_epi16(i16::MIN); + let r = _mm512_maskz_cvtusepi16_epi8(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a); + let e = _mm256_set1_epi8(-1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_cvtusepi16_epi8() { + let a = _mm256_set1_epi16(i16::MIN); + let r = _mm256_cvtusepi16_epi8(a); + let e = _mm_set1_epi8(-1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi16_epi8() { + let src = _mm_set1_epi8(1); + let a = _mm256_set1_epi16(i16::MIN); + let r = _mm256_mask_cvtusepi16_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a); + let e = _mm_set1_epi8(-1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi16_epi8() { + let a = _mm256_set1_epi16(i16::MIN); + let r = _mm256_maskz_cvtusepi16_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a); + let e = _mm_set1_epi8(-1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_cvtusepi16_epi8() { + let a = _mm_set1_epi16(i16::MIN); + let r = _mm_cvtusepi16_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cvtusepi16_epi8() { + let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm_set1_epi16(i16::MIN); + let r = _mm_mask_cvtusepi16_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi16_epi8() { + let a = _mm_set1_epi16(i16::MIN); + let r = _mm_maskz_cvtusepi16_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cvtepi8_epi16() { + let a = _mm256_set1_epi8(2); + let r = _mm512_cvtepi8_epi16(a); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cvtepi8_epi16() { + let src = _mm512_set1_epi16(1); + let a = _mm256_set1_epi8(2); + let r = _mm512_mask_cvtepi8_epi16(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_cvtepi8_epi16() { + let a = _mm256_set1_epi8(2); + let r = _mm512_maskz_cvtepi8_epi16(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cvtepi8_epi16() { + let src = _mm256_set1_epi16(1); + let a = _mm_set1_epi8(2); + let r = _mm256_mask_cvtepi8_epi16(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi8_epi16() { + let a = _mm_set1_epi8(2); + let r = _mm256_maskz_cvtepi8_epi16(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a); + let e = _mm256_set1_epi16(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cvtepi8_epi16() { + let src = _mm_set1_epi16(1); + let a = _mm_set1_epi8(2); + let r = _mm_mask_cvtepi8_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a); + let e = _mm_set1_epi16(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_cvtepi8_epi16() { + let a = _mm_set1_epi8(2); + let r = _mm_maskz_cvtepi8_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi8_epi16(0b11111111, a); + let e = _mm_set1_epi16(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_cvtepu8_epi16() { + let a = _mm256_set1_epi8(2); + let r = _mm512_cvtepu8_epi16(a); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cvtepu8_epi16() { + let src = _mm512_set1_epi16(1); + let a = _mm256_set1_epi8(2); + let r = _mm512_mask_cvtepu8_epi16(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_cvtepu8_epi16() { + let a = _mm256_set1_epi8(2); + let r = _mm512_maskz_cvtepu8_epi16(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a); + let e = _mm512_set1_epi16(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cvtepu8_epi16() { + let src = _mm256_set1_epi16(1); + let a = _mm_set1_epi8(2); + let r = _mm256_mask_cvtepu8_epi16(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu8_epi16() { + let a = _mm_set1_epi8(2); + let r = _mm256_maskz_cvtepu8_epi16(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a); + let e = _mm256_set1_epi16(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cvtepu8_epi16() { + let src = _mm_set1_epi16(1); + let a = _mm_set1_epi8(2); + let r = _mm_mask_cvtepu8_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a); + let e = _mm_set1_epi16(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_cvtepu8_epi16() { + let a = _mm_set1_epi8(2); + let r = _mm_maskz_cvtepu8_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu8_epi16(0b11111111, a); + let e = _mm_set1_epi16(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_bslli_epi128() { + #[rustfmt::skip] + let a = _mm512_set_epi8( + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + ); + let r = _mm512_bslli_epi128::<9>(a); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_bsrli_epi128() { + #[rustfmt::skip] + let a = _mm512_set_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + ); + let r = _mm512_bsrli_epi128::<3>(a); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_alignr_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8( + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + ); + let b = _mm512_set1_epi8(1); + let r = _mm512_alignr_epi8::<14>(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_alignr_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8( + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + ); + let b = _mm512_set1_epi8(1); + let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_alignr_epi8::<14>( + a, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_maskz_alignr_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8( + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + ); + let b = _mm512_set1_epi8(1); + let r = _mm512_maskz_alignr_epi8::<14>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_alignr_epi8::<14>( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_alignr_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8( + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + ); + let b = _mm256_set1_epi8(1); + let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_alignr_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8( + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + ); + let b = _mm256_set1_epi8(1); + let r = _mm256_maskz_alignr_epi8::<14>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_alignr_epi8() { + let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); + let b = _mm_set1_epi8(1); + let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b); + let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_maskz_alignr_epi8() { + let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); + let b = _mm_set1_epi8(1); + let r = _mm_maskz_alignr_epi8::<14>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b); + let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() { + let a = _mm512_set1_epi16(i16::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtsepi16_storeu_epi8( + &mut r as *mut _ as *mut i8, + 0b11111111_11111111_11111111_11111111, + a, + ); + let e = _mm256_set1_epi8(i8::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() { + let a = _mm256_set1_epi16(i16::MAX); + let mut r = _mm_undefined_si128(); + _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm_set1_epi8(i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() { + let a = _mm_set1_epi16(i16::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() { + let a = _mm512_set1_epi16(8); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtepi16_storeu_epi8( + &mut r as *mut _ as *mut i8, + 0b11111111_11111111_11111111_11111111, + a, + ); + let e = _mm256_set1_epi8(8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() { + let a = _mm256_set1_epi16(8); + let mut r = _mm_undefined_si128(); + _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm_set1_epi8(8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cvtepi16_storeu_epi8() { + let a = _mm_set1_epi16(8); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() { + let a = _mm512_set1_epi16(i16::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtusepi16_storeu_epi8( + &mut r as *mut _ as *mut i8, + 0b11111111_11111111_11111111_11111111, + a, + ); + let e = _mm256_set1_epi8(u8::MAX as i8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() { + let a = _mm256_set1_epi16(i16::MAX); + let mut r = _mm_undefined_si128(); + _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm_set1_epi8(u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512bw,avx512vl")] + unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() { + let a = _mm_set1_epi16(i16::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512cd.rs b/library/stdarch/crates/core_arch/src/x86/avx512cd.rs new file mode 100644 index 000000000000..78735fcc90f5 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512cd.rs @@ -0,0 +1,1232 @@ +use crate::core_arch::{simd::*, x86::*}; +use crate::intrinsics::simd::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastmw_epi32&expand=553) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d +pub fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i { + _mm512_set1_epi32(k as i32) +} + +/// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastmw_epi32&expand=552) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d +pub fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i { + _mm256_set1_epi32(k as i32) +} + +/// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastmw_epi32&expand=551) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d +pub fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i { + _mm_set1_epi32(k as i32) +} + +/// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastmb_epi64&expand=550) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q +pub fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i { + _mm512_set1_epi64(k as i64) +} + +/// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastmb_epi64&expand=549) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q +pub fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i { + _mm256_set1_epi64x(k as i64) +} + +/// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastmb_epi64&expand=548) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q +pub fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i { + _mm_set1_epi64x(k as i64) +} + +/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_conflict_epi32&expand=1248) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictd))] +pub fn _mm512_conflict_epi32(a: __m512i) -> __m512i { + unsafe { transmute(vpconflictd(a.as_i32x16())) } +} + +/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_conflict_epi32&expand=1249) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictd))] +pub fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let conflict = _mm512_conflict_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, conflict, src.as_i32x16())) + } +} + +/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_conflict_epi32&expand=1250) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictd))] +pub fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let conflict = _mm512_conflict_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, conflict, i32x16::ZERO)) + } +} + +/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_conflict_epi32&expand=1245) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictd))] +pub fn _mm256_conflict_epi32(a: __m256i) -> __m256i { + unsafe { transmute(vpconflictd256(a.as_i32x8())) } +} + +/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_conflict_epi32&expand=1246) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictd))] +pub fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let conflict = _mm256_conflict_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, conflict, src.as_i32x8())) + } +} + +/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_conflict_epi32&expand=1247) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictd))] +pub fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let conflict = _mm256_conflict_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, conflict, i32x8::ZERO)) + } +} + +/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_conflict_epi32&expand=1242) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictd))] +pub fn _mm_conflict_epi32(a: __m128i) -> __m128i { + unsafe { transmute(vpconflictd128(a.as_i32x4())) } +} + +/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_conflict_epi32&expand=1243) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictd))] +pub fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let conflict = _mm_conflict_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, conflict, src.as_i32x4())) + } +} + +/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_conflict_epi32&expand=1244) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictd))] +pub fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let conflict = _mm_conflict_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, conflict, i32x4::ZERO)) + } +} + +/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_conflict_epi64&expand=1257) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictq))] +pub fn _mm512_conflict_epi64(a: __m512i) -> __m512i { + unsafe { transmute(vpconflictq(a.as_i64x8())) } +} + +/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_conflict_epi64&expand=1258) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictq))] +pub fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let conflict = _mm512_conflict_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, conflict, src.as_i64x8())) + } +} + +/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_conflict_epi64&expand=1259) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictq))] +pub fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let conflict = _mm512_conflict_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, conflict, i64x8::ZERO)) + } +} + +/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_conflict_epi64&expand=1254) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictq))] +pub fn _mm256_conflict_epi64(a: __m256i) -> __m256i { + unsafe { transmute(vpconflictq256(a.as_i64x4())) } +} + +/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_conflict_epi64&expand=1255) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictq))] +pub fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let conflict = _mm256_conflict_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, conflict, src.as_i64x4())) + } +} + +/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_conflict_epi64&expand=1256) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictq))] +pub fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let conflict = _mm256_conflict_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, conflict, i64x4::ZERO)) + } +} + +/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_conflict_epi64&expand=1251) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictq))] +pub fn _mm_conflict_epi64(a: __m128i) -> __m128i { + unsafe { transmute(vpconflictq128(a.as_i64x2())) } +} + +/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_conflict_epi64&expand=1252) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictq))] +pub fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let conflict = _mm_conflict_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, conflict, src.as_i64x2())) + } +} + +/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_conflict_epi64&expand=1253) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpconflictq))] +pub fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let conflict = _mm_conflict_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, conflict, i64x2::ZERO)) + } +} + +/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_lzcnt_epi32&expand=3491) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntd))] +pub fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctlz(a.as_i32x16())) } +} + +/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_lzcnt_epi32&expand=3492) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntd))] +pub fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let zerocount = _mm512_lzcnt_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, zerocount, src.as_i32x16())) + } +} + +/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_lzcnt_epi32&expand=3493) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntd))] +pub fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let zerocount = _mm512_lzcnt_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, zerocount, i32x16::ZERO)) + } +} + +/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lzcnt_epi32&expand=3488) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntd))] +pub fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctlz(a.as_i32x8())) } +} + +/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_lzcnt_epi32&expand=3489) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntd))] +pub fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let zerocount = _mm256_lzcnt_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, zerocount, src.as_i32x8())) + } +} + +/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_lzcnt_epi32&expand=3490) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntd))] +pub fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let zerocount = _mm256_lzcnt_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, zerocount, i32x8::ZERO)) + } +} + +/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lzcnt_epi32&expand=3485) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntd))] +pub fn _mm_lzcnt_epi32(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctlz(a.as_i32x4())) } +} + +/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_lzcnt_epi32&expand=3486) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntd))] +pub fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let zerocount = _mm_lzcnt_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, zerocount, src.as_i32x4())) + } +} + +/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_lzcnt_epi32&expand=3487) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntd))] +pub fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let zerocount = _mm_lzcnt_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, zerocount, i32x4::ZERO)) + } +} + +/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_lzcnt_epi64&expand=3500) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntq))] +pub fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctlz(a.as_i64x8())) } +} + +/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_lzcnt_epi64&expand=3501) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntq))] +pub fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let zerocount = _mm512_lzcnt_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, zerocount, src.as_i64x8())) + } +} + +/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_lzcnt_epi64&expand=3502) +#[inline] +#[target_feature(enable = "avx512cd")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntq))] +pub fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let zerocount = _mm512_lzcnt_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, zerocount, i64x8::ZERO)) + } +} + +/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lzcnt_epi64&expand=3497) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntq))] +pub fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctlz(a.as_i64x4())) } +} + +/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_lzcnt_epi64&expand=3498) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntq))] +pub fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let zerocount = _mm256_lzcnt_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, zerocount, src.as_i64x4())) + } +} + +/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_lzcnt_epi64&expand=3499) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntq))] +pub fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let zerocount = _mm256_lzcnt_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, zerocount, i64x4::ZERO)) + } +} + +/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lzcnt_epi64&expand=3494) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntq))] +pub fn _mm_lzcnt_epi64(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctlz(a.as_i64x2())) } +} + +/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_lzcnt_epi64&expand=3495) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntq))] +pub fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let zerocount = _mm_lzcnt_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, zerocount, src.as_i64x2())) + } +} + +/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_lzcnt_epi64&expand=3496) +#[inline] +#[target_feature(enable = "avx512cd,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vplzcntq))] +pub fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let zerocount = _mm_lzcnt_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, zerocount, i64x2::ZERO)) + } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.conflict.d.512"] + fn vpconflictd(a: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.conflict.d.256"] + fn vpconflictd256(a: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx512.conflict.d.128"] + fn vpconflictd128(a: i32x4) -> i32x4; + + #[link_name = "llvm.x86.avx512.conflict.q.512"] + fn vpconflictq(a: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.conflict.q.256"] + fn vpconflictq256(a: i64x4) -> i64x4; + #[link_name = "llvm.x86.avx512.conflict.q.128"] + fn vpconflictq128(a: i64x2) -> i64x2; +} + +#[cfg(test)] +mod tests { + + use crate::core_arch::x86::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_broadcastmw_epi32() { + let a: __mmask16 = 2; + let r = _mm512_broadcastmw_epi32(a); + let e = _mm512_set1_epi32(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_broadcastmw_epi32() { + let a: __mmask16 = 2; + let r = _mm256_broadcastmw_epi32(a); + let e = _mm256_set1_epi32(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_broadcastmw_epi32() { + let a: __mmask16 = 2; + let r = _mm_broadcastmw_epi32(a); + let e = _mm_set1_epi32(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_broadcastmb_epi64() { + let a: __mmask8 = 2; + let r = _mm512_broadcastmb_epi64(a); + let e = _mm512_set1_epi64(2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_broadcastmb_epi64() { + let a: __mmask8 = 2; + let r = _mm256_broadcastmb_epi64(a); + let e = _mm256_set1_epi64x(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_broadcastmb_epi64() { + let a: __mmask8 = 2; + let r = _mm_broadcastmb_epi64(a); + let e = _mm_set1_epi64x(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_conflict_epi32() { + let a = _mm512_set1_epi32(1); + let r = _mm512_conflict_epi32(a); + let e = _mm512_set_epi32( + 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 2 | 1 << 1 | 1 << 0, + 1 << 1 | 1 << 0, + 1 << 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_mask_conflict_epi32() { + let a = _mm512_set1_epi32(1); + let r = _mm512_mask_conflict_epi32(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_conflict_epi32(a, 0b11111111_11111111, a); + let e = _mm512_set_epi32( + 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 2 | 1 << 1 | 1 << 0, + 1 << 1 | 1 << 0, + 1 << 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_maskz_conflict_epi32() { + let a = _mm512_set1_epi32(1); + let r = _mm512_maskz_conflict_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_conflict_epi32(0b11111111_11111111, a); + let e = _mm512_set_epi32( + 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 2 | 1 << 1 | 1 << 0, + 1 << 1 | 1 << 0, + 1 << 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_conflict_epi32() { + let a = _mm256_set1_epi32(1); + let r = _mm256_conflict_epi32(a); + let e = _mm256_set_epi32( + 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 2 | 1 << 1 | 1 << 0, + 1 << 1 | 1 << 0, + 1 << 0, + 0, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_mask_conflict_epi32() { + let a = _mm256_set1_epi32(1); + let r = _mm256_mask_conflict_epi32(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_conflict_epi32(a, 0b11111111, a); + let e = _mm256_set_epi32( + 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 2 | 1 << 1 | 1 << 0, + 1 << 1 | 1 << 0, + 1 << 0, + 0, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_maskz_conflict_epi32() { + let a = _mm256_set1_epi32(1); + let r = _mm256_maskz_conflict_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_conflict_epi32(0b11111111, a); + let e = _mm256_set_epi32( + 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 2 | 1 << 1 | 1 << 0, + 1 << 1 | 1 << 0, + 1 << 0, + 0, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_conflict_epi32() { + let a = _mm_set1_epi32(1); + let r = _mm_conflict_epi32(a); + let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_mask_conflict_epi32() { + let a = _mm_set1_epi32(1); + let r = _mm_mask_conflict_epi32(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_conflict_epi32(a, 0b00001111, a); + let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_maskz_conflict_epi32() { + let a = _mm_set1_epi32(1); + let r = _mm_maskz_conflict_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_conflict_epi32(0b00001111, a); + let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_conflict_epi64() { + let a = _mm512_set1_epi64(1); + let r = _mm512_conflict_epi64(a); + let e = _mm512_set_epi64( + 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 2 | 1 << 1 | 1 << 0, + 1 << 1 | 1 << 0, + 1 << 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_mask_conflict_epi64() { + let a = _mm512_set1_epi64(1); + let r = _mm512_mask_conflict_epi64(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_conflict_epi64(a, 0b11111111, a); + let e = _mm512_set_epi64( + 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 2 | 1 << 1 | 1 << 0, + 1 << 1 | 1 << 0, + 1 << 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_maskz_conflict_epi64() { + let a = _mm512_set1_epi64(1); + let r = _mm512_maskz_conflict_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_conflict_epi64(0b11111111, a); + let e = _mm512_set_epi64( + 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, + 1 << 2 | 1 << 1 | 1 << 0, + 1 << 1 | 1 << 0, + 1 << 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_conflict_epi64() { + let a = _mm256_set1_epi64x(1); + let r = _mm256_conflict_epi64(a); + let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_mask_conflict_epi64() { + let a = _mm256_set1_epi64x(1); + let r = _mm256_mask_conflict_epi64(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_conflict_epi64(a, 0b00001111, a); + let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_maskz_conflict_epi64() { + let a = _mm256_set1_epi64x(1); + let r = _mm256_maskz_conflict_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_conflict_epi64(0b00001111, a); + let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_conflict_epi64() { + let a = _mm_set1_epi64x(1); + let r = _mm_conflict_epi64(a); + let e = _mm_set_epi64x(1 << 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_mask_conflict_epi64() { + let a = _mm_set1_epi64x(1); + let r = _mm_mask_conflict_epi64(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_conflict_epi64(a, 0b00000011, a); + let e = _mm_set_epi64x(1 << 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_maskz_conflict_epi64() { + let a = _mm_set1_epi64x(1); + let r = _mm_maskz_conflict_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_conflict_epi64(0b00000011, a); + let e = _mm_set_epi64x(1 << 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_lzcnt_epi32() { + let a = _mm512_set1_epi32(1); + let r = _mm512_lzcnt_epi32(a); + let e = _mm512_set1_epi32(31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_mask_lzcnt_epi32() { + let a = _mm512_set1_epi32(1); + let r = _mm512_mask_lzcnt_epi32(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_lzcnt_epi32(a, 0b11111111_11111111, a); + let e = _mm512_set1_epi32(31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_maskz_lzcnt_epi32() { + let a = _mm512_set1_epi32(2); + let r = _mm512_maskz_lzcnt_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_lzcnt_epi32(0b11111111_11111111, a); + let e = _mm512_set1_epi32(30); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_lzcnt_epi32() { + let a = _mm256_set1_epi32(1); + let r = _mm256_lzcnt_epi32(a); + let e = _mm256_set1_epi32(31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_mask_lzcnt_epi32() { + let a = _mm256_set1_epi32(1); + let r = _mm256_mask_lzcnt_epi32(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_lzcnt_epi32(a, 0b11111111, a); + let e = _mm256_set1_epi32(31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_maskz_lzcnt_epi32() { + let a = _mm256_set1_epi32(1); + let r = _mm256_maskz_lzcnt_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_lzcnt_epi32(0b11111111, a); + let e = _mm256_set1_epi32(31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_lzcnt_epi32() { + let a = _mm_set1_epi32(1); + let r = _mm_lzcnt_epi32(a); + let e = _mm_set1_epi32(31); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_mask_lzcnt_epi32() { + let a = _mm_set1_epi32(1); + let r = _mm_mask_lzcnt_epi32(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_lzcnt_epi32(a, 0b00001111, a); + let e = _mm_set1_epi32(31); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_maskz_lzcnt_epi32() { + let a = _mm_set1_epi32(1); + let r = _mm_maskz_lzcnt_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_lzcnt_epi32(0b00001111, a); + let e = _mm_set1_epi32(31); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_lzcnt_epi64() { + let a = _mm512_set1_epi64(1); + let r = _mm512_lzcnt_epi64(a); + let e = _mm512_set1_epi64(63); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_mask_lzcnt_epi64() { + let a = _mm512_set1_epi64(1); + let r = _mm512_mask_lzcnt_epi64(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_lzcnt_epi64(a, 0b11111111, a); + let e = _mm512_set1_epi64(63); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd")] + unsafe fn test_mm512_maskz_lzcnt_epi64() { + let a = _mm512_set1_epi64(2); + let r = _mm512_maskz_lzcnt_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_lzcnt_epi64(0b11111111, a); + let e = _mm512_set1_epi64(62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_lzcnt_epi64() { + let a = _mm256_set1_epi64x(1); + let r = _mm256_lzcnt_epi64(a); + let e = _mm256_set1_epi64x(63); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_mask_lzcnt_epi64() { + let a = _mm256_set1_epi64x(1); + let r = _mm256_mask_lzcnt_epi64(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_lzcnt_epi64(a, 0b00001111, a); + let e = _mm256_set1_epi64x(63); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm256_maskz_lzcnt_epi64() { + let a = _mm256_set1_epi64x(1); + let r = _mm256_maskz_lzcnt_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_lzcnt_epi64(0b00001111, a); + let e = _mm256_set1_epi64x(63); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_lzcnt_epi64() { + let a = _mm_set1_epi64x(1); + let r = _mm_lzcnt_epi64(a); + let e = _mm_set1_epi64x(63); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_mask_lzcnt_epi64() { + let a = _mm_set1_epi64x(1); + let r = _mm_mask_lzcnt_epi64(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_lzcnt_epi64(a, 0b00001111, a); + let e = _mm_set1_epi64x(63); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512cd,avx512vl")] + unsafe fn test_mm_maskz_lzcnt_epi64() { + let a = _mm_set1_epi64x(1); + let r = _mm_maskz_lzcnt_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_lzcnt_epi64(0b00001111, a); + let e = _mm_set1_epi64x(63); + assert_eq_m128i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs new file mode 100644 index 000000000000..c90ec894f217 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs @@ -0,0 +1,10955 @@ +use crate::{ + core_arch::{simd::*, x86::*}, + intrinsics::simd::*, + mem::transmute, +}; + +// And // + +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let and = _mm_and_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, and, src.as_f64x2())) + } +} + +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let and = _mm_and_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, and, f64x2::ZERO)) + } +} + +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let and = _mm256_and_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, and, src.as_f64x4())) + } +} + +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let and = _mm256_and_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, and, f64x4::ZERO)) + } +} + +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandp))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } +} + +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let and = _mm512_and_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, and, src.as_f64x8())) + } +} + +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let and = _mm512_and_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, and, f64x8::ZERO)) + } +} + +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let and = _mm_and_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, and, src.as_f32x4())) + } +} + +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let and = _mm_and_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, and, f32x4::ZERO)) + } +} + +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let and = _mm256_and_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, and, src.as_f32x8())) + } +} + +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let and = _mm256_and_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, and, f32x8::ZERO)) + } +} + +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(simd_and( + transmute::<_, u32x16>(a), + transmute::<_, u32x16>(b), + )) + } +} + +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let and = _mm512_and_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, and, src.as_f32x16())) + } +} + +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let and = _mm512_and_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, and, f32x16::ZERO)) + } +} + +// Andnot + +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandnpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let andnot = _mm_andnot_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, andnot, src.as_f64x2())) + } +} + +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandnpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let andnot = _mm_andnot_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, andnot, f64x2::ZERO)) + } +} + +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandnpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let andnot = _mm256_andnot_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, andnot, src.as_f64x4())) + } +} + +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandnpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let andnot = _mm256_andnot_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, andnot, f64x4::ZERO)) + } +} + +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandnp))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) } +} + +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandnpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let andnot = _mm512_andnot_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, andnot, src.as_f64x8())) + } +} + +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandnpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let andnot = _mm512_andnot_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, andnot, f64x8::ZERO)) + } +} + +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let andnot = _mm_andnot_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, andnot, src.as_f32x4())) + } +} + +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let andnot = _mm_andnot_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, andnot, f32x4::ZERO)) + } +} + +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let andnot = _mm256_andnot_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, andnot, src.as_f32x8())) + } +} + +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let andnot = _mm256_andnot_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, andnot, f32x8::ZERO)) + } +} + +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 { + unsafe { _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) } +} + +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let andnot = _mm512_andnot_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, andnot, src.as_f32x16())) + } +} + +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vandnps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let andnot = _mm512_andnot_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, andnot, f32x16::ZERO)) + } +} + +// Or + +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let or = _mm_or_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, or, src.as_f64x2())) + } +} + +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let or = _mm_or_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, or, f64x2::ZERO)) + } +} + +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let or = _mm256_or_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, or, src.as_f64x4())) + } +} + +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let or = _mm256_or_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, or, f64x4::ZERO)) + } +} + +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vorp))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } +} + +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let or = _mm512_or_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, or, src.as_f64x8())) + } +} + +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let or = _mm512_or_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, or, f64x8::ZERO)) + } +} + +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let or = _mm_or_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, or, src.as_f32x4())) + } +} + +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let or = _mm_or_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, or, f32x4::ZERO)) + } +} + +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let or = _mm256_or_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, or, src.as_f32x8())) + } +} + +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let or = _mm256_or_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, or, f32x8::ZERO)) + } +} + +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(simd_or( + transmute::<_, u32x16>(a), + transmute::<_, u32x16>(b), + )) + } +} + +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let or = _mm512_or_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, or, src.as_f32x16())) + } +} + +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let or = _mm512_or_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, or, f32x16::ZERO)) + } +} + +// Xor + +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vxorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let xor = _mm_xor_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, xor, src.as_f64x2())) + } +} + +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vxorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let xor = _mm_xor_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, xor, f64x2::ZERO)) + } +} + +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vxorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let xor = _mm256_xor_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, xor, src.as_f64x4())) + } +} + +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vxorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let xor = _mm256_xor_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, xor, f64x4::ZERO)) + } +} + +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vxorp))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } +} + +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vxorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let xor = _mm512_xor_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, xor, src.as_f64x8())) + } +} + +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vxorpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let xor = _mm512_xor_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, xor, f64x8::ZERO)) + } +} + +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let xor = _mm_xor_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, xor, src.as_f32x4())) + } +} + +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let xor = _mm_xor_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, xor, f32x4::ZERO)) + } +} + +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let xor = _mm256_xor_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, xor, src.as_f32x8())) + } +} + +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let xor = _mm256_xor_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, xor, f32x8::ZERO)) + } +} + +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(simd_xor( + transmute::<_, u32x16>(a), + transmute::<_, u32x16>(b), + )) + } +} + +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let xor = _mm512_xor_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, xor, src.as_f32x16())) + } +} + +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vxorps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let xor = _mm512_xor_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, xor, f32x16::ZERO)) + } +} + +// Broadcast + +/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all +/// elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_broadcast_f32x2(a: __m128) -> __m256 { + unsafe { + let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } +} + +/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all +/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vbroadcastf32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 { + unsafe { + let b = _mm256_broadcast_f32x2(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } +} + +/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all +/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vbroadcastf32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 { + unsafe { + let b = _mm256_broadcast_f32x2(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } +} + +/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all +/// elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_f32x2(a: __m128) -> __m512 { + unsafe { + let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } +} + +/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all +/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vbroadcastf32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 { + unsafe { + let b = _mm512_broadcast_f32x2(a).as_f32x16(); + transmute(simd_select_bitmask(k, b, src.as_f32x16())) + } +} + +/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all +/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vbroadcastf32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 { + unsafe { + let b = _mm512_broadcast_f32x2(a).as_f32x16(); + transmute(simd_select_bitmask(k, b, f32x16::ZERO)) + } +} + +/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all +/// elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_f32x8(a: __m256) -> __m512 { + unsafe { + let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); + transmute(b) + } +} + +/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all +/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 { + unsafe { + let b = _mm512_broadcast_f32x8(a).as_f32x16(); + transmute(simd_select_bitmask(k, b, src.as_f32x16())) + } +} + +/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all +/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 { + unsafe { + let b = _mm512_broadcast_f32x8(a).as_f32x16(); + transmute(simd_select_bitmask(k, b, f32x16::ZERO)) + } +} + +/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all +/// elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d { + unsafe { + let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); + transmute(b) + } +} + +/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all +/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d { + unsafe { + let b = _mm256_broadcast_f64x2(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, src.as_f64x4())) + } +} + +/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all +/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d { + unsafe { + let b = _mm256_broadcast_f64x2(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, f64x4::ZERO)) + } +} + +/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all +/// elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d { + unsafe { + let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } +} + +/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all +/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d { + unsafe { + let b = _mm512_broadcast_f64x2(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } +} + +/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all +/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d { + unsafe { + let b = _mm512_broadcast_f64x2(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } +} + +/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_broadcast_i32x2(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); + transmute(b) + } +} + +/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k +/// (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vbroadcasti32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let b = _mm_broadcast_i32x2(a).as_i32x4(); + transmute(simd_select_bitmask(k, b, src.as_i32x4())) + } +} + +/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k +/// (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vbroadcasti32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let b = _mm_broadcast_i32x2(a).as_i32x4(); + transmute(simd_select_bitmask(k, b, i32x4::ZERO)) + } +} + +/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i32x4(); + let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } +} + +/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k +/// (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vbroadcasti32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let b = _mm256_broadcast_i32x2(a).as_i32x8(); + transmute(simd_select_bitmask(k, b, src.as_i32x8())) + } +} + +/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k +/// (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vbroadcasti32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let b = _mm256_broadcast_i32x2(a).as_i32x8(); + transmute(simd_select_bitmask(k, b, i32x8::ZERO)) + } +} + +/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i32x4(); + let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } +} + +/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k +/// (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vbroadcasti32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i32x2(a).as_i32x16(); + transmute(simd_select_bitmask(k, b, src.as_i32x16())) + } +} + +/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k +/// (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vbroadcasti32x2))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i32x2(a).as_i32x16(); + transmute(simd_select_bitmask(k, b, i32x16::ZERO)) + } +} + +/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i { + unsafe { + let a = a.as_i32x8(); + let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); + transmute(b) + } +} + +/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k +/// (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i32x8(a).as_i32x16(); + transmute(simd_select_bitmask(k, b, src.as_i32x16())) + } +} + +/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k +/// (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i32x8(a).as_i32x16(); + transmute(simd_select_bitmask(k, b, i32x16::ZERO)) + } +} + +/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i64x2(); + let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); + transmute(b) + } +} + +/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k +/// (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let b = _mm256_broadcast_i64x2(a).as_i64x4(); + transmute(simd_select_bitmask(k, b, src.as_i64x4())) + } +} + +/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k +/// (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let b = _mm256_broadcast_i64x2(a).as_i64x4(); + transmute(simd_select_bitmask(k, b, i64x4::ZERO)) + } +} + +/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i64x2(); + let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } +} + +/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k +/// (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i64x2(a).as_i64x8(); + transmute(simd_select_bitmask(k, b, src.as_i64x8())) + } +} + +/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k +/// (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i64x2(a).as_i64x8(); + transmute(simd_select_bitmask(k, b, i64x8::ZERO)) + } +} + +// Extract + +/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, +/// selected with IMM8, and stores the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_extractf32x8_ps(a: __m512) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + match IMM8 & 1 { + 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + } + } +} + +/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, +/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src +/// if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_extractf32x8_ps(src: __m256, k: __mmask8, a: __m512) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_extractf32x8_ps::(a); + transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8())) + } +} + +/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, +/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_extractf32x8_ps(k: __mmask8, a: __m512) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_extractf32x8_ps::(a); + transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO)) + } +} + +/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, +/// selected with IMM8, and stores the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_extractf64x2_pd(a: __m256d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + match IMM8 & 1 { + 0 => simd_shuffle!(a, a, [0, 1]), + _ => simd_shuffle!(a, a, [2, 3]), + } + } +} + +/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, +/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src +/// if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_extractf64x2_pd( + src: __m128d, + k: __mmask8, + a: __m256d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_extractf64x2_pd::(a); + transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2())) + } +} + +/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, +/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_extractf64x2_pd(k: __mmask8, a: __m256d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_extractf64x2_pd::(a); + transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO)) + } +} + +/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, +/// selected with IMM8, and stores the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_extractf64x2_pd(a: __m512d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + match IMM8 & 3 { + 0 => simd_shuffle!(a, a, [0, 1]), + 1 => simd_shuffle!(a, a, [2, 3]), + 2 => simd_shuffle!(a, a, [4, 5]), + _ => simd_shuffle!(a, a, [6, 7]), + } + } +} + +/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, +/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src +/// if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_extractf64x2_pd( + src: __m128d, + k: __mmask8, + a: __m512d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_extractf64x2_pd::(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, src.as_f64x2())) + } +} + +/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, +/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_extractf64x2_pd(k: __mmask8, a: __m512d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_extractf64x2_pd::(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, f64x2::ZERO)) + } +} + +/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores +/// the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_extracti32x8_epi32(a: __m512i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i32x16(); + let b: i32x8 = match IMM8 & 1 { + 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + }; + transmute(b) + } +} + +/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores +/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_extracti32x8_epi32( + src: __m256i, + k: __mmask8, + a: __m512i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_extracti32x8_epi32::(a).as_i32x8(); + transmute(simd_select_bitmask(k, b, src.as_i32x8())) + } +} + +/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores +/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_extracti32x8_epi32(k: __mmask8, a: __m512i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_extracti32x8_epi32::(a).as_i32x8(); + transmute(simd_select_bitmask(k, b, i32x8::ZERO)) + } +} + +/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores +/// the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_extracti64x2_epi64(a: __m256i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i64x4(); + match IMM8 & 1 { + 0 => simd_shuffle!(a, a, [0, 1]), + _ => simd_shuffle!(a, a, [2, 3]), + } + } +} + +/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores +/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_extracti64x2_epi64( + src: __m128i, + k: __mmask8, + a: __m256i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_extracti64x2_epi64::(a).as_i64x2(); + transmute(simd_select_bitmask(k, b, src.as_i64x2())) + } +} + +/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores +/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_extracti64x2_epi64(k: __mmask8, a: __m256i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_extracti64x2_epi64::(a).as_i64x2(); + transmute(simd_select_bitmask(k, b, i64x2::ZERO)) + } +} + +/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores +/// the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_extracti64x2_epi64(a: __m512i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let a = a.as_i64x8(); + match IMM8 & 3 { + 0 => simd_shuffle!(a, a, [0, 1]), + 1 => simd_shuffle!(a, a, [2, 3]), + 2 => simd_shuffle!(a, a, [4, 5]), + _ => simd_shuffle!(a, a, [6, 7]), + } + } +} + +/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores +/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_extracti64x2_epi64( + src: __m128i, + k: __mmask8, + a: __m512i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_extracti64x2_epi64::(a).as_i64x2(); + transmute(simd_select_bitmask(k, b, src.as_i64x2())) + } +} + +/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores +/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_extracti64x2_epi64(k: __mmask8, a: __m512i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_extracti64x2_epi64::(a).as_i64x2(); + transmute(simd_select_bitmask(k, b, i64x2::ZERO)) + } +} + +// Insert + +/// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point +/// elements) from b into dst at the location specified by IMM8. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_insertf32x8(a: __m512, b: __m256) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_castps256_ps512(b); + match IMM8 & 1 { + 0 => { + simd_shuffle!( + a, + b, + [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] + ) + } + _ => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] + ) + } + } + } +} + +/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point +/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k +/// (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_insertf32x8( + src: __m512, + k: __mmask16, + a: __m512, + b: __m256, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm512_insertf32x8::(a, b); + transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16())) + } +} + +/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point +/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k +/// (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_insertf32x8(k: __mmask16, a: __m512, b: __m256) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm512_insertf32x8::(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, c, f32x16::ZERO)) + } +} + +/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point +/// elements) from b into dst at the location specified by IMM8. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_insertf64x2(a: __m256d, b: __m128d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_castpd128_pd256(b); + match IMM8 & 1 { + 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), + _ => simd_shuffle!(a, b, [0, 1, 4, 5]), + } + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point +/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k +/// (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_insertf64x2( + src: __m256d, + k: __mmask8, + a: __m256d, + b: __m128d, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm256_insertf64x2::(a, b); + transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4())) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point +/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k +/// (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_insertf64x2(k: __mmask8, a: __m256d, b: __m128d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm256_insertf64x2::(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, c, f64x4::ZERO)) + } +} + +/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point +/// elements) from b into dst at the location specified by IMM8. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_insertf64x2(a: __m512d, b: __m128d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_castpd128_pd512(b); + match IMM8 & 3 { + 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), + 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), + } + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point +/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k +/// (elements are copied from src if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_insertf64x2( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m128d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let c = _mm512_insertf64x2::(a, b); + transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8())) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point +/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k +/// (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_insertf64x2(k: __mmask8, a: __m512d, b: __m128d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let c = _mm512_insertf64x2::(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, c, f64x8::ZERO)) + } +} + +/// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the +/// location specified by IMM8. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_inserti32x8(a: __m512i, b: __m256i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i32x16(); + let b = _mm512_castsi256_si512(b).as_i32x16(); + let r: i32x16 = match IMM8 & 1 { + 0 => { + simd_shuffle!( + a, + b, + [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] + ) + } + _ => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] + ) + } + }; + transmute(r) + } +} + +/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the +/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if +/// the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_inserti32x8( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m256i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm512_inserti32x8::(a, b); + transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16())) + } +} + +/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the +/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_inserti32x8(k: __mmask16, a: __m512i, b: __m256i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm512_inserti32x8::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, c, i32x16::ZERO)) + } +} + +/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the +/// location specified by IMM8. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_inserti64x2(a: __m256i, b: __m128i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i64x4(); + let b = _mm256_castsi128_si256(b).as_i64x4(); + match IMM8 & 1 { + 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), + _ => simd_shuffle!(a, b, [0, 1, 4, 5]), + } + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the +/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if +/// the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_inserti64x2( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m128i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm256_inserti64x2::(a, b); + transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4())) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the +/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_inserti64x2(k: __mmask8, a: __m256i, b: __m128i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm256_inserti64x2::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, c, i64x4::ZERO)) + } +} + +/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the +/// location specified by IMM8. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_inserti64x2(a: __m512i, b: __m128i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let a = a.as_i64x8(); + let b = _mm512_castsi128_si512(b).as_i64x8(); + match IMM8 & 3 { + 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), + 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), + } + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the +/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if +/// the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_inserti64x2( + src: __m512i, + k: __mmask8, + a: __m512i, + b: __m128i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let c = _mm512_inserti64x2::(a, b); + transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8())) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the +/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_inserti64x2(k: __mmask8, a: __m512i, b: __m128i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let c = _mm512_inserti64x2::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, c, i64x8::ZERO)) + } +} + +// Convert + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvt_roundepi64_pd(a: __m512i) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtqq2pd_512(a.as_i64x8(), ROUNDING)) + } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvt_roundepi64_pd( + src: __m512d, + k: __mmask8, + a: __m512i, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepi64_pd::(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvt_roundepi64_pd(k: __mmask8, a: __m512i) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepi64_pd::(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d { + unsafe { transmute(vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let b = _mm_cvtepi64_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, src.as_f64x2())) + } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let b = _mm_cvtepi64_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, f64x2::ZERO)) + } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d { + unsafe { transmute(vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { + unsafe { + let b = _mm256_cvtepi64_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, src.as_f64x4())) + } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d { + unsafe { + let b = _mm256_cvtepi64_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, f64x4::ZERO)) + } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d { + unsafe { transmute(vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { + unsafe { + let b = _mm512_cvtepi64_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } +} + +/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d { + unsafe { + let b = _mm512_cvtepi64_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvt_roundepi64_ps(a: __m512i) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtqq2ps_512(a.as_i64x8(), ROUNDING)) + } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvt_roundepi64_ps( + src: __m256, + k: __mmask8, + a: __m512i, +) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepi64_ps::(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvt_roundepi64_ps(k: __mmask8, a: __m512i) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepi64_ps::(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 { + _mm_mask_cvtepi64_ps(_mm_undefined_ps(), 0xff, a) +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { + unsafe { transmute(vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 { + _mm_mask_cvtepi64_ps(_mm_setzero_ps(), k, a) +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 { + unsafe { transmute(vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { + unsafe { + let b = _mm256_cvtepi64_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, b, src.as_f32x4())) + } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 { + unsafe { + let b = _mm256_cvtepi64_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, b, f32x4::ZERO)) + } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 { + unsafe { transmute(vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { + unsafe { + let b = _mm512_cvtepi64_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } +} + +/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 { + unsafe { + let b = _mm512_cvtepi64_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvt_roundepu64_pd(a: __m512i) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtuqq2pd_512(a.as_u64x8(), ROUNDING)) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvt_roundepu64_pd( + src: __m512d, + k: __mmask8, + a: __m512i, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepu64_pd::(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvt_roundepu64_pd(k: __mmask8, a: __m512i) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepu64_pd::(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d { + unsafe { transmute(vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let b = _mm_cvtepu64_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, src.as_f64x2())) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let b = _mm_cvtepu64_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, f64x2::ZERO)) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d { + unsafe { transmute(vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { + unsafe { + let b = _mm256_cvtepu64_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, src.as_f64x4())) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d { + unsafe { + let b = _mm256_cvtepu64_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, f64x4::ZERO)) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d { + unsafe { transmute(vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { + unsafe { + let b = _mm512_cvtepu64_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2pd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d { + unsafe { + let b = _mm512_cvtepu64_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvt_roundepu64_ps(a: __m512i) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtuqq2ps_512(a.as_u64x8(), ROUNDING)) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvt_roundepu64_ps( + src: __m256, + k: __mmask8, + a: __m512i, +) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepu64_ps::(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvt_roundepu64_ps(k: __mmask8, a: __m512i) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepu64_ps::(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 { + _mm_mask_cvtepu64_ps(_mm_undefined_ps(), 0xff, a) +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { + unsafe { transmute(vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 { + _mm_mask_cvtepu64_ps(_mm_setzero_ps(), k, a) +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 { + unsafe { transmute(vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { + unsafe { + let b = _mm256_cvtepu64_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, b, src.as_f32x4())) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 { + unsafe { + let b = _mm256_cvtepu64_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, b, f32x4::ZERO)) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 { + unsafe { transmute(vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { + unsafe { + let b = _mm512_cvtepu64_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtuqq2ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 { + unsafe { + let b = _mm512_cvtepu64_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvt_roundpd_epi64(a: __m512d) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundpd_epi64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvt_roundpd_epi64( + src: __m512i, + k: __mmask8, + a: __m512d, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING)) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvt_roundpd_epi64(k: __mmask8, a: __m512d) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundpd_epi64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i { + _mm_mask_cvtpd_epi64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i { + _mm_mask_cvtpd_epi64(_mm_setzero_si128(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i { + _mm256_mask_cvtpd_epi64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { + unsafe { transmute(vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i { + _mm256_mask_cvtpd_epi64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i { + _mm512_mask_cvtpd_epi64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { + unsafe { + transmute(vcvtpd2qq_512( + a.as_f64x8(), + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i { + _mm512_mask_cvtpd_epi64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvt_roundps_epi64(a: __m256) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundps_epi64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvt_roundps_epi64( + src: __m512i, + k: __mmask8, + a: __m256, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvt_roundps_epi64(k: __mmask8, a: __m256) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundps_epi64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtps_epi64(a: __m128) -> __m128i { + _mm_mask_cvtps_epi64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i { + _mm_mask_cvtps_epi64(_mm_setzero_si128(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i { + _mm256_mask_cvtps_epi64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { + unsafe { transmute(vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i { + _mm256_mask_cvtps_epi64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i { + _mm512_mask_cvtps_epi64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { + unsafe { + transmute(vcvtps2qq_512( + a.as_f32x8(), + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i { + _mm512_mask_cvtps_epi64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvt_roundpd_epu64(a: __m512d) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundpd_epu64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvt_roundpd_epu64( + src: __m512i, + k: __mmask8, + a: __m512d, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING)) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvt_roundpd_epu64(k: __mmask8, a: __m512d) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundpd_epu64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i { + _mm_mask_cvtpd_epu64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i { + _mm_mask_cvtpd_epu64(_mm_setzero_si128(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i { + _mm256_mask_cvtpd_epu64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { + unsafe { transmute(vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i { + _mm256_mask_cvtpd_epu64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i { + _mm512_mask_cvtpd_epu64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { + unsafe { + transmute(vcvtpd2uqq_512( + a.as_f64x8(), + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i { + _mm512_mask_cvtpd_epu64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvt_roundps_epu64(a: __m256) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundps_epu64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvt_roundps_epu64( + src: __m512i, + k: __mmask8, + a: __m256, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// Rounding is done according to the ROUNDING parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvt_roundps_epu64(k: __mmask8, a: __m256) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundps_epu64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtps_epu64(a: __m128) -> __m128i { + _mm_mask_cvtps_epu64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i { + _mm_mask_cvtps_epu64(_mm_setzero_si128(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i { + _mm256_mask_cvtps_epu64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { + unsafe { transmute(vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i { + _mm256_mask_cvtps_epu64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i { + _mm512_mask_cvtps_epu64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is +/// not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { + unsafe { + transmute(vcvtps2uqq_512( + a.as_f32x8(), + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvtps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i { + _mm512_mask_cvtps_epu64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC +/// to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtt_roundpd_epi64(a: __m512d) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundpd_epi64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtt_roundpd_epi64( + src: __m512i, + k: __mmask8, + a: __m512d, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE)) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtt_roundpd_epi64(k: __mmask8, a: __m512d) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundpd_epi64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i { + _mm_mask_cvttpd_epi64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i { + _mm_mask_cvttpd_epi64(_mm_setzero_si128(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i { + _mm256_mask_cvttpd_epi64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { + unsafe { transmute(vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i { + _mm256_mask_cvttpd_epi64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i { + _mm512_mask_cvttpd_epi64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { + unsafe { + transmute(vcvttpd2qq_512( + a.as_f64x8(), + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i { + _mm512_mask_cvttpd_epi64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC +/// to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtt_roundps_epi64(a: __m256) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundps_epi64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtt_roundps_epi64( + src: __m512i, + k: __mmask8, + a: __m256, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtt_roundps_epi64(k: __mmask8, a: __m256) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundps_epi64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvttps_epi64(a: __m128) -> __m128i { + _mm_mask_cvttps_epi64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i { + _mm_mask_cvttps_epi64(_mm_setzero_si128(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i { + _mm256_mask_cvttps_epi64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { + unsafe { transmute(vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i { + _mm256_mask_cvttps_epi64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i { + _mm512_mask_cvttps_epi64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { + unsafe { + transmute(vcvttps2qq_512( + a.as_f32x8(), + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2qq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i { + _mm512_mask_cvttps_epi64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC +/// to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtt_roundpd_epu64(a: __m512d) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundpd_epu64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtt_roundpd_epu64( + src: __m512i, + k: __mmask8, + a: __m512d, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE)) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtt_roundpd_epu64(k: __mmask8, a: __m512d) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundpd_epu64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i { + _mm_mask_cvttpd_epu64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i { + _mm_mask_cvttpd_epu64(_mm_setzero_si128(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i { + _mm256_mask_cvttpd_epu64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { + unsafe { transmute(vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i { + _mm256_mask_cvttpd_epu64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i { + _mm512_mask_cvttpd_epu64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { + unsafe { + transmute(vcvttpd2uqq_512( + a.as_f64x8(), + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttpd2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i { + _mm512_mask_cvttpd_epu64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC +/// to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtt_roundps_epu64(a: __m256) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundps_epu64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvtt_roundps_epu64( + src: __m512i, + k: __mmask8, + a: __m256, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvtt_roundps_epu64(k: __mmask8, a: __m256) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundps_epu64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvttps_epu64(a: __m128) -> __m128i { + _mm_mask_cvttps_epu64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i { + _mm_mask_cvttps_epu64(_mm_setzero_si128(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i { + _mm256_mask_cvttps_epu64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { + unsafe { transmute(vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i { + _mm256_mask_cvttps_epu64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i { + _mm512_mask_cvttps_epu64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { + unsafe { + transmute(vcvttps2uqq_512( + a.as_f32x8(), + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers +/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vcvttps2uqq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i { + _mm512_mask_cvttps_epu64(_mm512_setzero_si512(), k, a) +} + +// Multiply-Low + +/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store +/// the low 64 bits of the intermediate integers in `dst`. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmullq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) } +} + +/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store +/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from +/// `src` if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmullq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let b = _mm_mullo_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, b, src.as_i64x2())) + } +} + +/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store +/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if +/// the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmullq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let b = _mm_mullo_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, b, i64x2::ZERO)) + } +} + +/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store +/// the low 64 bits of the intermediate integers in `dst`. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmullq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_mul(a.as_i64x4(), b.as_i64x4())) } +} + +/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store +/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from +/// `src` if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmullq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let b = _mm256_mullo_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, b, src.as_i64x4())) + } +} + +/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store +/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if +/// the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmullq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let b = _mm256_mullo_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, b, i64x4::ZERO)) + } +} + +/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store +/// the low 64 bits of the intermediate integers in `dst`. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vpmullq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) } +} + +/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store +/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from +/// `src` if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vpmullq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let b = _mm512_mullo_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, b, src.as_i64x8())) + } +} + +/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store +/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if +/// the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vpmullq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let b = _mm512_mullo_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, b, i64x8::ZERO)) + } +} + +// Mask Registers + +/// Convert 8-bit mask a to a 32-bit integer value and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _cvtmask8_u32(a: __mmask8) -> u32 { + a as u32 +} + +/// Convert 32-bit integer value a to an 8-bit mask and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _cvtu32_mask8(a: u32) -> __mmask8 { + a as __mmask8 +} + +/// Add 16-bit masks a and b, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + a + b +} + +/// Add 8-bit masks a and b, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { + a + b +} + +/// Bitwise AND of 8-bit masks a and b, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { + a & b +} + +/// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { + _knot_mask8(a) & b +} + +/// Bitwise NOT of 8-bit mask a, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _knot_mask8(a: __mmask8) -> __mmask8 { + a ^ 0b11111111 +} + +/// Bitwise OR of 8-bit masks a and b, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { + a | b +} + +/// Bitwise XNOR of 8-bit masks a and b, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { + _knot_mask8(_kxor_mask8(a, b)) +} + +/// Bitwise XOR of 8-bit masks a and b, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { + a ^ b +} + +/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 { + let tmp = _kor_mask8(a, b); + *all_ones = (tmp == 0xff) as u8; + (tmp == 0) as u8 +} + +/// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { + (_kor_mask8(a, b) == 0xff) as u8 +} + +/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { + (_kor_mask8(a, b) == 0) as u8 +} + +/// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kshiftli_mask8(a: __mmask8) -> __mmask8 { + a << COUNT +} + +/// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949) +#[inline] +#[target_feature(enable = "avx512dq")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kshiftri_mask8(a: __mmask8) -> __mmask8 { + a >> COUNT +} + +/// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst, +/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all +/// zeros, store 1 in and_not, otherwise store 0 in and_not. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 { + *and_not = (_kandn_mask16(a, b) == 0) as u8; + (_kand_mask16(a, b) == 0) as u8 +} + +/// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst, +/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all +/// zeros, store 1 in and_not, otherwise store 0 in and_not. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 { + *and_not = (_kandn_mask8(a, b) == 0) as u8; + (_kand_mask8(a, b) == 0) as u8 +} + +/// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all +/// zeros, store 1 in dst, otherwise store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { + (_kandn_mask16(a, b) == 0) as u8 +} + +/// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all +/// zeros, store 1 in dst, otherwise store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { + (_kandn_mask8(a, b) == 0) as u8 +} + +/// Compute the bitwise AND of 16-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { + (_kand_mask16(a, b) == 0) as u8 +} + +/// Compute the bitwise AND of 8-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { + (_kand_mask8(a, b) == 0) as u8 +} + +/// Load 8-bit mask from memory +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 { + *mem_addr +} + +/// Store 8-bit mask to memory +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) { + *mem_addr = a; +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit +/// integer in a. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_movepi32_mask(a: __m128i) -> __mmask8 { + let zero = _mm_setzero_si128(); + _mm_cmplt_epi32_mask(a, zero) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit +/// integer in a. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 { + let zero = _mm256_setzero_si256(); + _mm256_cmplt_epi32_mask(a, zero) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit +/// integer in a. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 { + let zero = _mm512_setzero_si512(); + _mm512_cmplt_epi32_mask(a, zero) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit +/// integer in a. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_movepi64_mask(a: __m128i) -> __mmask8 { + let zero = _mm_setzero_si128(); + _mm_cmplt_epi64_mask(a, zero) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit +/// integer in a. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 { + let zero = _mm256_setzero_si256(); + _mm256_cmplt_epi64_mask(a, zero) +} + +/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit +/// integer in a. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617) +#[inline] +#[target_feature(enable = "avx512dq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 { + let zero = _mm512_setzero_si512(); + _mm512_cmplt_epi64_mask(a, zero) +} + +/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding +/// bit in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovm2d))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_movm_epi32(k: __mmask8) -> __m128i { + let ones = _mm_set1_epi32(-1); + _mm_maskz_mov_epi32(k, ones) +} + +/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding +/// bit in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovm2d))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_movm_epi32(k: __mmask8) -> __m256i { + let ones = _mm256_set1_epi32(-1); + _mm256_maskz_mov_epi32(k, ones) +} + +/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding +/// bit in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vpmovm2d))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_movm_epi32(k: __mmask16) -> __m512i { + let ones = _mm512_set1_epi32(-1); + _mm512_maskz_mov_epi32(k, ones) +} + +/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding +/// bit in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovm2q))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_movm_epi64(k: __mmask8) -> __m128i { + let ones = _mm_set1_epi64x(-1); + _mm_maskz_mov_epi64(k, ones) +} + +/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding +/// bit in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovm2q))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_movm_epi64(k: __mmask8) -> __m256i { + let ones = _mm256_set1_epi64x(-1); + _mm256_maskz_mov_epi64(k, ones) +} + +/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding +/// bit in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vpmovm2q))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_movm_epi64(k: __mmask8) -> __m512i { + let ones = _mm512_set1_epi64(-1); + _mm512_maskz_mov_epi64(k, ones) +} + +// Range + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_range_round_pd(a: __m512d, b: __m512d) -> __m512d { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + _mm512_mask_range_round_pd::(_mm512_setzero_pd(), 0xff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_range_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + transmute(vrangepd_512( + a.as_f64x8(), + b.as_f64x8(), + IMM8, + src.as_f64x8(), + k, + SAE, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_range_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + _mm512_mask_range_round_pd::(_mm512_setzero_pd(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_range_pd(a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM8, 4); + _mm_mask_range_pd::(_mm_setzero_pd(), 0xff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_range_pd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangepd_128( + a.as_f64x2(), + b.as_f64x2(), + IMM8, + src.as_f64x2(), + k, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_range_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM8, 4); + _mm_mask_range_pd::(_mm_setzero_pd(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_range_pd(a: __m256d, b: __m256d) -> __m256d { + static_assert_uimm_bits!(IMM8, 4); + _mm256_mask_range_pd::(_mm256_setzero_pd(), 0xff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_range_pd( + src: __m256d, + k: __mmask8, + a: __m256d, + b: __m256d, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangepd_256( + a.as_f64x4(), + b.as_f64x4(), + IMM8, + src.as_f64x4(), + k, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_range_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + static_assert_uimm_bits!(IMM8, 4); + _mm256_mask_range_pd::(_mm256_setzero_pd(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_range_pd(a: __m512d, b: __m512d) -> __m512d { + static_assert_uimm_bits!(IMM8, 4); + _mm512_mask_range_pd::(_mm512_setzero_pd(), 0xff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_range_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangepd_512( + a.as_f64x8(), + b.as_f64x8(), + IMM8, + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_range_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + static_assert_uimm_bits!(IMM8, 4); + _mm512_mask_range_pd::(_mm512_setzero_pd(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_range_round_ps(a: __m512, b: __m512) -> __m512 { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + _mm512_mask_range_round_ps::(_mm512_setzero_ps(), 0xffff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_range_round_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + transmute(vrangeps_512( + a.as_f32x16(), + b.as_f32x16(), + IMM8, + src.as_f32x16(), + k, + SAE, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_range_round_ps( + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + _mm512_mask_range_round_ps::(_mm512_setzero_ps(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_range_ps(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 4); + _mm_mask_range_ps::(_mm_setzero_ps(), 0xff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_range_ps( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangeps_128( + a.as_f32x4(), + b.as_f32x4(), + IMM8, + src.as_f32x4(), + k, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_range_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 4); + _mm_mask_range_ps::(_mm_setzero_ps(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_range_ps(a: __m256, b: __m256) -> __m256 { + static_assert_uimm_bits!(IMM8, 4); + _mm256_mask_range_ps::(_mm256_setzero_ps(), 0xff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_range_ps( + src: __m256, + k: __mmask8, + a: __m256, + b: __m256, +) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangeps_256( + a.as_f32x8(), + b.as_f32x8(), + IMM8, + src.as_f32x8(), + k, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_range_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + static_assert_uimm_bits!(IMM8, 4); + _mm256_mask_range_ps::(_mm256_setzero_ps(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_range_ps(a: __m512, b: __m512) -> __m512 { + static_assert_uimm_bits!(IMM8, 4); + _mm512_mask_range_ps::(_mm512_setzero_ps(), 0xffff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_range_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangeps_512( + a.as_f32x16(), + b.as_f32x16(), + IMM8, + src.as_f32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed +/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out if the corresponding mask bit is not set). +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_range_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + static_assert_uimm_bits!(IMM8, 4); + _mm512_mask_range_ps::(_mm512_setzero_ps(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element +/// of dst, and copy the upper element from a to the upper element of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_range_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + _mm_mask_range_round_sd::(_mm_setzero_pd(), 0xff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element +/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the +/// upper element from a to the upper element of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_range_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + transmute(vrangesd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + IMM8, + SAE, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element +/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper +/// element from a to the upper element of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_range_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + _mm_mask_range_round_sd::(_mm_setzero_pd(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element +/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the +/// upper element from a to the upper element of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_range_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangesd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + IMM8, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element +/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper +/// element from a to the upper element of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_range_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM8, 4); + _mm_mask_range_sd::(_mm_setzero_pd(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element +/// of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_range_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + _mm_mask_range_round_ss::(_mm_setzero_ps(), 0xff, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element +/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the +/// upper 3 packed elements from a to the upper elements of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_range_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + transmute(vrangess( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + IMM8, + SAE, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element +/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper +/// 3 packed elements from a to the upper elements of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_range_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + _mm_mask_range_round_ss::(_mm_setzero_ps(), k, a, b) +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element +/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the +/// upper 3 packed elements from a to the upper elements of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_range_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangess( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + IMM8, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower +/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element +/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper +/// 3 packed elements from a to the upper elements of dst. +/// Lower 2 bits of IMM8 specifies the operation control: +/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. +/// Upper 2 bits of IMM8 specifies the sign control: +/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_range_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 4); + _mm_mask_range_ss::(_mm_setzero_ps(), k, a, b) +} + +// Reduce + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(1, 2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_round_pd(a: __m512d) -> __m512d { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm512_mask_reduce_round_pd::(_mm512_undefined_pd(), 0xff, a) +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are +/// copied from src to dst if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + transmute(vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE)) + } +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are +/// zeroed out if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_reduce_round_pd( + k: __mmask8, + a: __m512d, +) -> __m512d { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm512_mask_reduce_round_pd::(_mm512_setzero_pd(), k, a) +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_pd(a: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_pd::(_mm_undefined_pd(), 0xff, a) +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are +/// copied from src to dst if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k)) + } +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are +/// zeroed out if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_reduce_pd(k: __mmask8, a: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_pd::(_mm_setzero_pd(), k, a) +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_pd(a: __m256d) -> __m256d { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_reduce_pd::(_mm256_undefined_pd(), 0xff, a) +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are +/// copied from src to dst if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k)) + } +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are +/// zeroed out if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_reduce_pd(k: __mmask8, a: __m256d) -> __m256d { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_reduce_pd::(_mm256_setzero_pd(), k, a) +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_pd(a: __m512d) -> __m512d { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_reduce_pd::(_mm512_undefined_pd(), 0xff, a) +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are +/// copied from src to dst if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducepd_512( + a.as_f64x8(), + IMM8, + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are +/// zeroed out if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_reduce_pd(k: __mmask8, a: __m512d) -> __m512d { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_reduce_pd::(_mm512_setzero_pd(), k, a) +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(1, 2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_round_ps(a: __m512) -> __m512 { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm512_mask_reduce_round_ps::(_mm512_undefined_ps(), 0xffff, a) +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are +/// copied from src to dst if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_round_ps( + src: __m512, + k: __mmask16, + a: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + transmute(vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE)) + } +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are +/// zeroed out if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_reduce_round_ps( + k: __mmask16, + a: __m512, +) -> __m512 { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm512_mask_reduce_round_ps::(_mm512_setzero_ps(), k, a) +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_ps(a: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_ps::(_mm_undefined_ps(), 0xff, a) +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are +/// copied from src to dst if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k)) + } +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are +/// zeroed out if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_reduce_ps(k: __mmask8, a: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_ps::(_mm_setzero_ps(), k, a) +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_reduce_ps(a: __m256) -> __m256 { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_reduce_ps::(_mm256_undefined_ps(), 0xff, a) +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are +/// copied from src to dst if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_reduce_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k)) + } +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are +/// zeroed out if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_reduce_ps(k: __mmask8, a: __m256) -> __m256 { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_reduce_ps::(_mm256_setzero_ps(), k, a) +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_ps(a: __m512) -> __m512 { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_reduce_ps::(_mm512_undefined_ps(), 0xffff, a) +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are +/// copied from src to dst if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreduceps_512( + a.as_f32x16(), + IMM8, + src.as_f32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by +/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are +/// zeroed out if the corresponding mask bit is not set). +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_reduce_ps(k: __mmask16, a: __m512) -> __m512 { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_reduce_ps::(_mm512_setzero_ps(), k, a) +} + +/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy +/// the upper element from a to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm_mask_reduce_round_sd::(_mm_undefined_pd(), 0xff, a, b) +} + +/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask +/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + transmute(vreducesd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + IMM8, + SAE, + )) + } +} + +/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask +/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_reduce_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm_mask_reduce_round_sd::(_mm_setzero_pd(), k, a, b) +} + +/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst using, and +/// copy the upper element from a. +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_sd::(_mm_undefined_pd(), 0xff, a, b) +} + +/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask +/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducesd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + IMM8, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask +/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_reduce_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_sd::(_mm_setzero_pd(), k, a, b) +} + +/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy +/// the upper element from a. +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm_mask_reduce_round_ss::(_mm_undefined_ps(), 0xff, a, b) +} + +/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask +/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a. +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + transmute(vreducess( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + IMM8, + SAE, + )) + } +} + +/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask +/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a. +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_reduce_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm_mask_reduce_round_ss::(_mm_setzero_ps(), k, a, b) +} + +/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy +/// the upper element from a. +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_reduce_ss(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_ss::(_mm_undefined_ps(), 0xff, a, b) +} + +/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask +/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a. +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_reduce_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducess( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + IMM8, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b +/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask +/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a. +/// to the upper element of dst. +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_maskz_reduce_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_ss::(_mm_setzero_ps(), k, a, b) +} + +// FP-Class + +/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_fpclass_pd_mask(a: __m128d) -> __mmask8 { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_fpclass_pd_mask::(0xff, a) +} + +/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_fpclass_pd_mask(k1: __mmask8, a: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1)) + } +} + +/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_fpclass_pd_mask(a: __m256d) -> __mmask8 { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_fpclass_pd_mask::(0xff, a) +} + +/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_fpclass_pd_mask(k1: __mmask8, a: __m256d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1)) + } +} + +/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_fpclass_pd_mask(a: __m512d) -> __mmask8 { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_fpclass_pd_mask::(0xff, a) +} + +/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_fpclass_pd_mask(k1: __mmask8, a: __m512d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1)) + } +} + +/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_fpclass_ps_mask(a: __m128) -> __mmask8 { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_fpclass_ps_mask::(0xff, a) +} + +/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_fpclass_ps_mask(k1: __mmask8, a: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1)) + } +} + +/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_fpclass_ps_mask(a: __m256) -> __mmask8 { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_fpclass_ps_mask::(0xff, a) +} + +/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508) +#[inline] +#[target_feature(enable = "avx512dq,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_fpclass_ps_mask(k1: __mmask8, a: __m256) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1)) + } +} + +/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_fpclass_ps_mask(a: __m512) -> __mmask16 { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_fpclass_ps_mask::(0xffff, a) +} + +/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_fpclass_ps_mask(k1: __mmask16, a: __m512) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1)) + } +} + +/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_fpclass_sd_mask(a: __m128d) -> __mmask8 { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_fpclass_sd_mask::(0xff, a) +} + +/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_fpclass_sd_mask(k1: __mmask8, a: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vfpclasssd(a.as_f64x2(), IMM8, k1) + } +} + +/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_fpclass_ss_mask(a: __m128) -> __mmask8 { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_fpclass_ss_mask::(0xff, a) +} + +/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// - 0x01 // QNaN +/// - 0x02 // Positive Zero +/// - 0x04 // Negative Zero +/// - 0x08 // Positive Infinity +/// - 0x10 // Negative Infinity +/// - 0x20 // Denormal +/// - 0x40 // Negative +/// - 0x80 // SNaN +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516) +#[inline] +#[target_feature(enable = "avx512dq")] +#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_mask_fpclass_ss_mask(k1: __mmask8, a: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vfpclassss(a.as_f32x4(), IMM8, k1) + } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"] + fn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"] + fn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4; + #[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64"] + fn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8; + + #[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128"] + fn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4; + #[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64"] + fn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64"] + fn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8; + + #[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2i64"] + fn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4i64"] + fn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4; + #[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8i64"] + fn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8; + + #[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128"] + fn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4; + #[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4i64"] + fn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8i64"] + fn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8; + + #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128"] + fn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2; + #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256"] + fn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512"] + fn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8; + + #[link_name = "llvm.x86.avx512.mask.cvtps2qq.128"] + fn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2; + #[link_name = "llvm.x86.avx512.mask.cvtps2qq.256"] + fn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.cvtps2qq.512"] + fn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8; + + #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128"] + fn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2; + #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256"] + fn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4; + #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512"] + fn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8; + + #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128"] + fn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2; + #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256"] + fn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4; + #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512"] + fn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8; + + #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128"] + fn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2; + #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256"] + fn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512"] + fn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8; + + #[link_name = "llvm.x86.avx512.mask.cvttps2qq.128"] + fn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2; + #[link_name = "llvm.x86.avx512.mask.cvttps2qq.256"] + fn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.cvttps2qq.512"] + fn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8; + + #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128"] + fn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2; + #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256"] + fn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4; + #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512"] + fn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8; + + #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128"] + fn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2; + #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256"] + fn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4; + #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512"] + fn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8; + + #[link_name = "llvm.x86.avx512.mask.range.pd.128"] + fn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.range.pd.256"] + fn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.range.pd.512"] + fn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8; + + #[link_name = "llvm.x86.avx512.mask.range.ps.128"] + fn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.range.ps.256"] + fn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.range.ps.512"] + fn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) + -> f32x16; + + #[link_name = "llvm.x86.avx512.mask.range.sd"] + fn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.range.ss"] + fn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.reduce.pd.128"] + fn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.reduce.pd.256"] + fn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.reduce.pd.512"] + fn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8; + + #[link_name = "llvm.x86.avx512.mask.reduce.ps.128"] + fn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.reduce.ps.256"] + fn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.reduce.ps.512"] + fn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16; + + #[link_name = "llvm.x86.avx512.mask.reduce.sd"] + fn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.reduce.ss"] + fn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"] + fn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8; + #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"] + fn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8; + #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"] + fn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8; + + #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"] + fn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8; + #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"] + fn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8; + #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"] + fn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16; + + #[link_name = "llvm.x86.avx512.mask.fpclass.sd"] + fn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8; + #[link_name = "llvm.x86.avx512.mask.fpclass.ss"] + fn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8; +} + +#[cfg(test)] +mod tests { + use super::*; + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::mem::transmute; + + const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) }; + const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) }; + + const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) }; + const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) }; + const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) }; + const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) }; + + const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) }; + const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) }; + + const AND_32: f32 = unsafe { transmute(0x11111111_u32) }; + const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) }; + const OR_32: f32 = unsafe { transmute(0x77777777_u32) }; + const XOR_32: f32 = unsafe { transmute(0x66666666_u32) }; + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_and_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let src = _mm_set_pd(1., 2.); + let r = _mm_mask_and_pd(src, 0b01, a, b); + let e = _mm_set_pd(1., AND_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_and_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let r = _mm_maskz_and_pd(0b01, a, b); + let e = _mm_set_pd(0.0, AND_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_and_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let src = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_and_pd(src, 0b0101, a, b); + let e = _mm256_set_pd(1., AND_64, 3., AND_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_and_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let r = _mm256_maskz_and_pd(0b0101, a, b); + let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_and_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_and_pd(a, b); + let e = _mm512_set1_pd(AND_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_and_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_and_pd(src, 0b01010101, a, b); + let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_and_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_maskz_and_pd(0b01010101, a, b); + let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_and_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let src = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_and_ps(src, 0b0101, a, b); + let e = _mm_set_ps(1., AND_32, 3., AND_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_and_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let r = _mm_maskz_and_ps(0b0101, a, b); + let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_and_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_and_ps(src, 0b01010101, a, b); + let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_and_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let r = _mm256_maskz_and_ps(0b01010101, a, b); + let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_and_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_and_ps(a, b); + let e = _mm512_set1_ps(AND_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_and_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let src = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b); + let e = _mm512_set_ps( + 1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32, + 15., AND_32, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_and_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_maskz_and_ps(0b0101010101010101, a, b); + let e = _mm512_set_ps( + 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., + AND_32, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_andnot_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let src = _mm_set_pd(1., 2.); + let r = _mm_mask_andnot_pd(src, 0b01, a, b); + let e = _mm_set_pd(1., ANDN_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_andnot_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let r = _mm_maskz_andnot_pd(0b01, a, b); + let e = _mm_set_pd(0.0, ANDN_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_andnot_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let src = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_andnot_pd(src, 0b0101, a, b); + let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_andnot_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let r = _mm256_maskz_andnot_pd(0b0101, a, b); + let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_andnot_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_andnot_pd(a, b); + let e = _mm512_set1_pd(ANDN_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_andnot_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b); + let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_andnot_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_maskz_andnot_pd(0b01010101, a, b); + let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_andnot_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let src = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_andnot_ps(src, 0b0101, a, b); + let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_andnot_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let r = _mm_maskz_andnot_ps(0b0101, a, b); + let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_andnot_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b); + let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_andnot_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let r = _mm256_maskz_andnot_ps(0b01010101, a, b); + let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_andnot_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_andnot_ps(a, b); + let e = _mm512_set1_ps(ANDN_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_andnot_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let src = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b); + let e = _mm512_set_ps( + 1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13., + ANDN_32, 15., ANDN_32, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_andnot_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b); + let e = _mm512_set_ps( + 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., + ANDN_32, 0., ANDN_32, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_or_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let src = _mm_set_pd(1., 2.); + let r = _mm_mask_or_pd(src, 0b01, a, b); + let e = _mm_set_pd(1., OR_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_or_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let r = _mm_maskz_or_pd(0b01, a, b); + let e = _mm_set_pd(0.0, OR_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_or_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let src = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_or_pd(src, 0b0101, a, b); + let e = _mm256_set_pd(1., OR_64, 3., OR_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_or_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let r = _mm256_maskz_or_pd(0b0101, a, b); + let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_or_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_or_pd(a, b); + let e = _mm512_set1_pd(OR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_or_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_or_pd(src, 0b01010101, a, b); + let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_or_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_maskz_or_pd(0b01010101, a, b); + let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_or_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let src = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_or_ps(src, 0b0101, a, b); + let e = _mm_set_ps(1., OR_32, 3., OR_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_or_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let r = _mm_maskz_or_ps(0b0101, a, b); + let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_or_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_or_ps(src, 0b01010101, a, b); + let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_or_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let r = _mm256_maskz_or_ps(0b01010101, a, b); + let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_or_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_or_ps(a, b); + let e = _mm512_set1_ps(OR_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_or_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let src = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b); + let e = _mm512_set_ps( + 1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15., + OR_32, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_or_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_maskz_or_ps(0b0101010101010101, a, b); + let e = _mm512_set_ps( + 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_xor_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let src = _mm_set_pd(1., 2.); + let r = _mm_mask_xor_pd(src, 0b01, a, b); + let e = _mm_set_pd(1., XOR_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_xor_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let r = _mm_maskz_xor_pd(0b01, a, b); + let e = _mm_set_pd(0.0, XOR_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_xor_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let src = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_xor_pd(src, 0b0101, a, b); + let e = _mm256_set_pd(1., XOR_64, 3., XOR_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_xor_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let r = _mm256_maskz_xor_pd(0b0101, a, b); + let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_xor_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_xor_pd(a, b); + let e = _mm512_set1_pd(XOR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_xor_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_xor_pd(src, 0b01010101, a, b); + let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_xor_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_maskz_xor_pd(0b01010101, a, b); + let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_xor_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let src = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_xor_ps(src, 0b0101, a, b); + let e = _mm_set_ps(1., XOR_32, 3., XOR_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_xor_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let r = _mm_maskz_xor_ps(0b0101, a, b); + let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_xor_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_xor_ps(src, 0b01010101, a, b); + let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_xor_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let r = _mm256_maskz_xor_ps(0b01010101, a, b); + let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_xor_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_xor_ps(a, b); + let e = _mm512_set1_ps(XOR_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_xor_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let src = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b); + let e = _mm512_set_ps( + 1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32, + 15., XOR_32, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_xor_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b); + let e = _mm512_set_ps( + 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., + XOR_32, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_broadcast_f32x2() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_broadcast_f32x2(a); + let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_broadcast_f32x2() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.); + let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a); + let e = _mm256_set_ps(5., 4., 3., 8., 3., 10., 11., 4.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_broadcast_f32x2() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_maskz_broadcast_f32x2(0b01101001, a); + let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_broadcast_f32x2() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm512_broadcast_f32x2(a); + let e = _mm512_set_ps( + 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_broadcast_f32x2() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm512_set_ps( + 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., + ); + let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a); + let e = _mm512_set_ps( + 5., 4., 3., 8., 3., 10., 11., 4., 13., 14., 3., 4., 3., 4., 19., 20., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_broadcast_f32x2() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a); + let e = _mm512_set_ps( + 0., 4., 3., 0., 3., 0., 0., 4., 0., 0., 3., 4., 3., 4., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_broadcast_f32x8() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_broadcast_f32x8(a); + let e = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_broadcast_f32x8() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_ps( + 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., + ); + let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a); + let e = _mm512_set_ps( + 9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_broadcast_f32x8() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a); + let e = _mm512_set_ps( + 0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_broadcast_f64x2() { + let a = _mm_set_pd(1., 2.); + let r = _mm256_broadcast_f64x2(a); + let e = _mm256_set_pd(1., 2., 1., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_broadcast_f64x2() { + let a = _mm_set_pd(1., 2.); + let b = _mm256_set_pd(3., 4., 5., 6.); + let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a); + let e = _mm256_set_pd(3., 2., 1., 6.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_broadcast_f64x2() { + let a = _mm_set_pd(1., 2.); + let r = _mm256_maskz_broadcast_f64x2(0b0110, a); + let e = _mm256_set_pd(0., 2., 1., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_broadcast_f64x2() { + let a = _mm_set_pd(1., 2.); + let r = _mm512_broadcast_f64x2(a); + let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_broadcast_f64x2() { + let a = _mm_set_pd(1., 2.); + let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.); + let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a); + let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_broadcast_f64x2() { + let a = _mm_set_pd(1., 2.); + let r = _mm512_maskz_broadcast_f64x2(0b01101001, a); + let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_broadcast_i32x2() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_broadcast_i32x2(a); + let e = _mm_set_epi32(3, 4, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_broadcast_i32x2() { + let a = _mm_set_epi32(1, 2, 3, 4); + let b = _mm_set_epi32(5, 6, 7, 8); + let r = _mm_mask_broadcast_i32x2(b, 0b0110, a); + let e = _mm_set_epi32(5, 4, 3, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_broadcast_i32x2() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_maskz_broadcast_i32x2(0b0110, a); + let e = _mm_set_epi32(0, 4, 3, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_broadcast_i32x2() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm256_broadcast_i32x2(a); + let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_broadcast_i32x2() { + let a = _mm_set_epi32(1, 2, 3, 4); + let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12); + let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a); + let e = _mm256_set_epi32(5, 4, 3, 8, 3, 10, 11, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_broadcast_i32x2() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm256_maskz_broadcast_i32x2(0b01101001, a); + let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_broadcast_i32x2() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm512_broadcast_i32x2(a); + let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_broadcast_i32x2() { + let a = _mm_set_epi32(1, 2, 3, 4); + let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20); + let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a); + let e = _mm512_set_epi32(5, 4, 3, 8, 3, 10, 11, 4, 13, 14, 3, 4, 3, 4, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_broadcast_i32x2() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a); + let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_broadcast_i32x8() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_broadcast_i32x8(a); + let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_broadcast_i32x8() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi32( + 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + ); + let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a); + let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_broadcast_i32x8() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a); + let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_broadcast_i64x2() { + let a = _mm_set_epi64x(1, 2); + let r = _mm256_broadcast_i64x2(a); + let e = _mm256_set_epi64x(1, 2, 1, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_broadcast_i64x2() { + let a = _mm_set_epi64x(1, 2); + let b = _mm256_set_epi64x(3, 4, 5, 6); + let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a); + let e = _mm256_set_epi64x(3, 2, 1, 6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_broadcast_i64x2() { + let a = _mm_set_epi64x(1, 2); + let r = _mm256_maskz_broadcast_i64x2(0b0110, a); + let e = _mm256_set_epi64x(0, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_broadcast_i64x2() { + let a = _mm_set_epi64x(1, 2); + let r = _mm512_broadcast_i64x2(a); + let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_broadcast_i64x2() { + let a = _mm_set_epi64x(1, 2); + let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10); + let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a); + let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_broadcast_i64x2() { + let a = _mm_set_epi64x(1, 2); + let r = _mm512_maskz_broadcast_i64x2(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_extractf32x8_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_extractf32x8_ps::<1>(a); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_extractf32x8_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a); + let e = _mm256_set_ps(17., 2., 3., 20., 5., 22., 23., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_extractf32x8_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a); + let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_extractf64x2_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_extractf64x2_pd::<1>(a); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_extractf64x2_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm_set_pd(5., 6.); + let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a); + let e = _mm_set_pd(5., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_extractf64x2_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a); + let e = _mm_set_pd(0., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_extractf64x2_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_extractf64x2_pd::<2>(a); + let e = _mm_set_pd(3., 4.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_extractf64x2_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm_set_pd(9., 10.); + let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a); + let e = _mm_set_pd(9., 4.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_extractf64x2_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a); + let e = _mm_set_pd(0., 4.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_extracti32x8_epi32() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_extracti32x8_epi32::<1>(a); + let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_extracti32x8_epi32() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a); + let e = _mm256_set_epi32(17, 2, 3, 20, 5, 22, 23, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_extracti32x8_epi32() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a); + let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_extracti64x2_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_extracti64x2_epi64::<1>(a); + let e = _mm_set_epi64x(1, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_extracti64x2_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm_set_epi64x(5, 6); + let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a); + let e = _mm_set_epi64x(5, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_extracti64x2_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a); + let e = _mm_set_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_extracti64x2_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_extracti64x2_epi64::<2>(a); + let e = _mm_set_epi64x(3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_extracti64x2_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi64x(9, 10); + let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a); + let e = _mm_set_epi64x(9, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_extracti64x2_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a); + let e = _mm_set_epi64x(0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_insertf32x8() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_insertf32x8::<1>(a, b); + let e = _mm512_set_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 9., 10., 11., 12., 13., 14., 15., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_insertf32x8() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let src = _mm512_set_ps( + 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., + ); + let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b); + let e = _mm512_set_ps( + 25., 18., 19., 28., 21., 30., 31., 24., 33., 34., 11., 12., 13., 14., 39., 40., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_insertf32x8() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b); + let e = _mm512_set_ps( + 0., 18., 19., 0., 21., 0., 0., 24., 0., 0., 11., 12., 13., 14., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_insertf64x2() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm_set_pd(5., 6.); + let r = _mm256_insertf64x2::<1>(a, b); + let e = _mm256_set_pd(5., 6., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_insertf64x2() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm_set_pd(5., 6.); + let src = _mm256_set_pd(7., 8., 9., 10.); + let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b); + let e = _mm256_set_pd(7., 6., 3., 10.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_insertf64x2() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm_set_pd(5., 6.); + let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b); + let e = _mm256_set_pd(0., 6., 3., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_insertf64x2() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm_set_pd(9., 10.); + let r = _mm512_insertf64x2::<2>(a, b); + let e = _mm512_set_pd(1., 2., 9., 10., 5., 6., 7., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_insertf64x2() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm_set_pd(9., 10.); + let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.); + let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b); + let e = _mm512_set_pd(11., 2., 9., 14., 5., 16., 17., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_insertf64x2() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm_set_pd(9., 10.); + let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b); + let e = _mm512_set_pd(0., 2., 9., 0., 5., 0., 0., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_inserti32x8() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_inserti32x8::<1>(a, b); + let e = _mm512_set_epi32( + 17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_inserti32x8() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); + let src = _mm512_set_epi32( + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + ); + let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b); + let e = _mm512_set_epi32( + 25, 18, 19, 28, 21, 30, 31, 24, 33, 34, 11, 12, 13, 14, 39, 40, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_inserti32x8() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b); + let e = _mm512_set_epi32(0, 18, 19, 0, 21, 0, 0, 24, 0, 0, 11, 12, 13, 14, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_inserti64x2() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm_set_epi64x(5, 6); + let r = _mm256_inserti64x2::<1>(a, b); + let e = _mm256_set_epi64x(5, 6, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_inserti64x2() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm_set_epi64x(5, 6); + let src = _mm256_set_epi64x(7, 8, 9, 10); + let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b); + let e = _mm256_set_epi64x(7, 6, 3, 10); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_inserti64x2() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm_set_epi64x(5, 6); + let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b); + let e = _mm256_set_epi64x(0, 6, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_inserti64x2() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi64x(9, 10); + let r = _mm512_inserti64x2::<2>(a, b); + let e = _mm512_set_epi64(1, 2, 9, 10, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_inserti64x2() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi64x(9, 10); + let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18); + let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b); + let e = _mm512_set_epi64(11, 2, 9, 14, 5, 16, 17, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_inserti64x2() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi64x(9, 10); + let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b); + let e = _mm512_set_epi64(0, 2, 9, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvt_roundepi64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvt_roundepi64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + b, 0b01101001, a, + ); + let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvt_roundepi64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01101001, a, + ); + let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvtepi64_pd() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_cvtepi64_pd(a); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_pd() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_cvtepi64_pd(b, 0b01, a); + let e = _mm_set_pd(3., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvtepi64_pd() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_maskz_cvtepi64_pd(0b01, a); + let e = _mm_set_pd(0., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvtepi64_pd() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_cvtepi64_pd(a); + let e = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_pd() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_pd(5., 6., 7., 8.); + let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a); + let e = _mm256_set_pd(5., 2., 3., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi64_pd() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_maskz_cvtepi64_pd(0b0110, a); + let e = _mm256_set_pd(0., 2., 3., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtepi64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvtepi64_pd(a); + let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtepi64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a); + let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtepi64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvtepi64_pd(0b01101001, a); + let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvt_roundepi64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvt_roundepi64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + b, 0b01101001, a, + ); + let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvt_roundepi64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01101001, a, + ); + let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvtepi64_ps() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_cvtepi64_ps(a); + let e = _mm_set_ps(0., 0., 1., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_ps() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_ps(3., 4., 5., 6.); + let r = _mm_mask_cvtepi64_ps(b, 0b01, a); + let e = _mm_set_ps(0., 0., 5., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvtepi64_ps() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_maskz_cvtepi64_ps(0b01, a); + let e = _mm_set_ps(0., 0., 0., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvtepi64_ps() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_cvtepi64_ps(a); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_ps() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm_set_ps(5., 6., 7., 8.); + let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a); + let e = _mm_set_ps(5., 2., 3., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi64_ps() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_maskz_cvtepi64_ps(0b0110, a); + let e = _mm_set_ps(0., 2., 3., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtepi64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvtepi64_ps(a); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtepi64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a); + let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtepi64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvtepi64_ps(0b01101001, a); + let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvt_roundepu64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvt_roundepu64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + b, 0b01101001, a, + ); + let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvt_roundepu64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01101001, a, + ); + let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvtepu64_pd() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_cvtepu64_pd(a); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvtepu64_pd() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_cvtepu64_pd(b, 0b01, a); + let e = _mm_set_pd(3., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvtepu64_pd() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_maskz_cvtepu64_pd(0b01, a); + let e = _mm_set_pd(0., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvtepu64_pd() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_cvtepu64_pd(a); + let e = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvtepu64_pd() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_pd(5., 6., 7., 8.); + let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a); + let e = _mm256_set_pd(5., 2., 3., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu64_pd() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_maskz_cvtepu64_pd(0b0110, a); + let e = _mm256_set_pd(0., 2., 3., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtepu64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvtepu64_pd(a); + let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtepu64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a); + let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtepu64_pd() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvtepu64_pd(0b01101001, a); + let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvt_roundepu64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvt_roundepu64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + b, 0b01101001, a, + ); + let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvt_roundepu64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01101001, a, + ); + let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvtepu64_ps() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_cvtepu64_ps(a); + let e = _mm_set_ps(0., 0., 1., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvtepu64_ps() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_ps(3., 4., 5., 6.); + let r = _mm_mask_cvtepu64_ps(b, 0b01, a); + let e = _mm_set_ps(0., 0., 5., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvtepu64_ps() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_maskz_cvtepu64_ps(0b01, a); + let e = _mm_set_ps(0., 0., 0., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvtepu64_ps() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_cvtepu64_ps(a); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvtepu64_ps() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm_set_ps(5., 6., 7., 8.); + let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a); + let e = _mm_set_ps(5., 2., 3., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu64_ps() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_maskz_cvtepu64_ps(0b0110, a); + let e = _mm_set_ps(0., 2., 3., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtepu64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvtepu64_ps(a); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtepu64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a); + let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtepu64_ps() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvtepu64_ps(0b01101001, a); + let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvt_roundpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvt_roundpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + b, 0b01101001, a, + ); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvt_roundpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01101001, a, + ); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvtpd_epi64() { + let a = _mm_set_pd(1., 2.); + let r = _mm_cvtpd_epi64(a); + let e = _mm_set_epi64x(1, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvtpd_epi64() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_epi64x(3, 4); + let r = _mm_mask_cvtpd_epi64(b, 0b01, a); + let e = _mm_set_epi64x(3, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_epi64() { + let a = _mm_set_pd(1., 2.); + let r = _mm_maskz_cvtpd_epi64(0b01, a); + let e = _mm_set_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvtpd_epi64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_cvtpd_epi64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_epi64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a); + let e = _mm256_set_epi64x(5, 2, 3, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_epi64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_maskz_cvtpd_epi64(0b0110, a); + let e = _mm256_set_epi64x(0, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvtpd_epi64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvtpd_epi64(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvt_roundps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvt_roundps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + b, 0b01101001, a, + ); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvt_roundps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01101001, a, + ); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvtps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_cvtps_epi64(a); + let e = _mm_set_epi64x(3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvtps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_epi64x(5, 6); + let r = _mm_mask_cvtps_epi64(b, 0b01, a); + let e = _mm_set_epi64x(5, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvtps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_maskz_cvtps_epi64(0b01, a); + let e = _mm_set_epi64x(0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvtps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_cvtps_epi64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvtps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_mask_cvtps_epi64(b, 0b0110, a); + let e = _mm256_set_epi64x(5, 2, 3, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvtps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_maskz_cvtps_epi64(0b0110, a); + let e = _mm256_set_epi64x(0, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvtps_epi64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvtps_epi64(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvt_roundpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvt_roundpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + b, 0b01101001, a, + ); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvt_roundpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01101001, a, + ); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvtpd_epu64() { + let a = _mm_set_pd(1., 2.); + let r = _mm_cvtpd_epu64(a); + let e = _mm_set_epi64x(1, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvtpd_epu64() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_epi64x(3, 4); + let r = _mm_mask_cvtpd_epu64(b, 0b01, a); + let e = _mm_set_epi64x(3, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_epu64() { + let a = _mm_set_pd(1., 2.); + let r = _mm_maskz_cvtpd_epu64(0b01, a); + let e = _mm_set_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvtpd_epu64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_cvtpd_epu64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_epu64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a); + let e = _mm256_set_epi64x(5, 2, 3, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_epu64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_maskz_cvtpd_epu64(0b0110, a); + let e = _mm256_set_epi64x(0, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvtpd_epu64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvtpd_epu64(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvt_roundps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvt_roundps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + b, 0b01101001, a, + ); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvt_roundps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01101001, a, + ); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvtps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_cvtps_epu64(a); + let e = _mm_set_epi64x(3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvtps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_epi64x(5, 6); + let r = _mm_mask_cvtps_epu64(b, 0b01, a); + let e = _mm_set_epi64x(5, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvtps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_maskz_cvtps_epu64(0b01, a); + let e = _mm_set_epi64x(0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvtps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_cvtps_epu64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvtps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_mask_cvtps_epu64(b, 0b0110, a); + let e = _mm256_set_epi64x(5, 2, 3, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvtps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_maskz_cvtps_epu64(0b0110, a); + let e = _mm256_set_epi64x(0, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvtps_epu64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvtps_epu64(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtt_roundpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtt_roundpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtt_roundpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvttpd_epi64() { + let a = _mm_set_pd(1., 2.); + let r = _mm_cvttpd_epi64(a); + let e = _mm_set_epi64x(1, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvttpd_epi64() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_epi64x(3, 4); + let r = _mm_mask_cvttpd_epi64(b, 0b01, a); + let e = _mm_set_epi64x(3, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvttpd_epi64() { + let a = _mm_set_pd(1., 2.); + let r = _mm_maskz_cvttpd_epi64(0b01, a); + let e = _mm_set_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvttpd_epi64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_cvttpd_epi64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvttpd_epi64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a); + let e = _mm256_set_epi64x(5, 2, 3, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvttpd_epi64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_maskz_cvttpd_epi64(0b0110, a); + let e = _mm256_set_epi64x(0, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvttpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvttpd_epi64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvttpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvttpd_epi64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvttpd_epi64(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtt_roundps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtt_roundps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtt_roundps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvttps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_cvttps_epi64(a); + let e = _mm_set_epi64x(3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvttps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_epi64x(5, 6); + let r = _mm_mask_cvttps_epi64(b, 0b01, a); + let e = _mm_set_epi64x(5, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvttps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_maskz_cvttps_epi64(0b01, a); + let e = _mm_set_epi64x(0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvttps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_cvttps_epi64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvttps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_mask_cvttps_epi64(b, 0b0110, a); + let e = _mm256_set_epi64x(5, 2, 3, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvttps_epi64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_maskz_cvttps_epi64(0b0110, a); + let e = _mm256_set_epi64x(0, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvttps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvttps_epi64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvttps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvttps_epi64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvttps_epi64(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtt_roundpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtt_roundpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtt_roundpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvttpd_epu64() { + let a = _mm_set_pd(1., 2.); + let r = _mm_cvttpd_epu64(a); + let e = _mm_set_epi64x(1, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvttpd_epu64() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_epi64x(3, 4); + let r = _mm_mask_cvttpd_epu64(b, 0b01, a); + let e = _mm_set_epi64x(3, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvttpd_epu64() { + let a = _mm_set_pd(1., 2.); + let r = _mm_maskz_cvttpd_epu64(0b01, a); + let e = _mm_set_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvttpd_epu64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_cvttpd_epu64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvttpd_epu64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a); + let e = _mm256_set_epi64x(5, 2, 3, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvttpd_epu64() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_maskz_cvttpd_epu64(0b0110, a); + let e = _mm256_set_epi64x(0, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvttpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvttpd_epu64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvttpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvttpd_epu64() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvttpd_epu64(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvtt_roundps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvtt_roundps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvtt_roundps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_cvttps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_cvttps_epu64(a); + let e = _mm_set_epi64x(3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_cvttps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_epi64x(5, 6); + let r = _mm_mask_cvttps_epu64(b, 0b01, a); + let e = _mm_set_epi64x(5, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_cvttps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_maskz_cvttps_epu64(0b01, a); + let e = _mm_set_epi64x(0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_cvttps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_cvttps_epu64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_cvttps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_mask_cvttps_epu64(b, 0b0110, a); + let e = _mm256_set_epi64x(5, 2, 3, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_cvttps_epu64() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm256_maskz_cvttps_epu64(0b0110, a); + let e = _mm256_set_epi64x(0, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_cvttps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_cvttps_epu64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_cvttps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a); + let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_cvttps_epu64() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_cvttps_epu64(0b01101001, a); + let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mullo_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(3, 4); + let r = _mm_mullo_epi64(a, b); + let e = _mm_set_epi64x(3, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_mullo_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(3, 4); + let c = _mm_set_epi64x(5, 6); + let r = _mm_mask_mullo_epi64(c, 0b01, a, b); + let e = _mm_set_epi64x(5, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_mullo_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(3, 4); + let r = _mm_maskz_mullo_epi64(0b01, a, b); + let e = _mm_set_epi64x(0, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mullo_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_mullo_epi64(a, b); + let e = _mm256_set_epi64x(5, 12, 21, 32); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_mullo_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let c = _mm256_set_epi64x(9, 10, 11, 12); + let r = _mm256_mask_mullo_epi64(c, 0b0110, a, b); + let e = _mm256_set_epi64x(9, 12, 21, 12); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_mullo_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(5, 6, 7, 8); + let r = _mm256_maskz_mullo_epi64(0b0110, a, b); + let e = _mm256_set_epi64x(0, 12, 21, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mullo_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mullo_epi64(a, b); + let e = _mm512_set_epi64(9, 20, 33, 48, 65, 84, 105, 128); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_mullo_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_mask_mullo_epi64(c, 0b01101001, a, b); + let e = _mm512_set_epi64(17, 20, 33, 20, 65, 22, 23, 128); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_mullo_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_mullo_epi64(0b01101001, a, b); + let e = _mm512_set_epi64(0, 20, 33, 0, 65, 0, 0, 128); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_cvtmask8_u32() { + let a: __mmask8 = 0b01101001; + let r = _cvtmask8_u32(a); + let e: u32 = 0b01101001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_cvtu32_mask8() { + let a: u32 = 0b01101001; + let r = _cvtu32_mask8(a); + let e: __mmask8 = 0b01101001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kadd_mask16() { + let a: __mmask16 = 27549; + let b: __mmask16 = 23434; + let r = _kadd_mask16(a, b); + let e: __mmask16 = 50983; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kadd_mask8() { + let a: __mmask8 = 98; + let b: __mmask8 = 117; + let r = _kadd_mask8(a, b); + let e: __mmask8 = 215; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kand_mask8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10110011; + let r = _kand_mask8(a, b); + let e: __mmask8 = 0b00100001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kandn_mask8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10110011; + let r = _kandn_mask8(a, b); + let e: __mmask8 = 0b10010010; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_knot_mask8() { + let a: __mmask8 = 0b01101001; + let r = _knot_mask8(a); + let e: __mmask8 = 0b10010110; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kor_mask8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10110011; + let r = _kor_mask8(a, b); + let e: __mmask8 = 0b11111011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kxnor_mask8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10110011; + let r = _kxnor_mask8(a, b); + let e: __mmask8 = 0b00100101; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kxor_mask8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10110011; + let r = _kxor_mask8(a, b); + let e: __mmask8 = 0b11011010; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kortest_mask8_u8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10110110; + let mut all_ones: u8 = 0; + let r = _kortest_mask8_u8(a, b, &mut all_ones); + assert_eq!(r, 0); + assert_eq!(all_ones, 1); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kortestc_mask8_u8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10110110; + let r = _kortestc_mask8_u8(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kortestz_mask8_u8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10110110; + let r = _kortestz_mask8_u8(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kshiftli_mask8() { + let a: __mmask8 = 0b01101001; + let r = _kshiftli_mask8::<3>(a); + let e: __mmask8 = 0b01001000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kshiftri_mask8() { + let a: __mmask8 = 0b01101001; + let r = _kshiftri_mask8::<3>(a); + let e: __mmask8 = 0b00001101; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_ktest_mask8_u8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10010110; + let mut and_not: u8 = 0; + let r = _ktest_mask8_u8(a, b, &mut and_not); + assert_eq!(r, 1); + assert_eq!(and_not, 0); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_ktestc_mask8_u8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10010110; + let r = _ktestc_mask8_u8(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_ktestz_mask8_u8() { + let a: __mmask8 = 0b01101001; + let b: __mmask8 = 0b10010110; + let r = _ktestz_mask8_u8(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_ktest_mask16_u8() { + let a: __mmask16 = 0b0110100100111100; + let b: __mmask16 = 0b1001011011000011; + let mut and_not: u8 = 0; + let r = _ktest_mask16_u8(a, b, &mut and_not); + assert_eq!(r, 1); + assert_eq!(and_not, 0); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_ktestc_mask16_u8() { + let a: __mmask16 = 0b0110100100111100; + let b: __mmask16 = 0b1001011011000011; + let r = _ktestc_mask16_u8(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_ktestz_mask16_u8() { + let a: __mmask16 = 0b0110100100111100; + let b: __mmask16 = 0b1001011011000011; + let r = _ktestz_mask16_u8(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_load_mask8() { + let a: __mmask8 = 0b01101001; + let r = _load_mask8(&a); + let e: __mmask8 = 0b01101001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_store_mask8() { + let a: __mmask8 = 0b01101001; + let mut r = 0; + _store_mask8(&mut r, a); + let e: __mmask8 = 0b01101001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_movepi32_mask() { + let a = _mm_set_epi32(0, -2, -3, 4); + let r = _mm_movepi32_mask(a); + let e = 0b0110; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_movepi32_mask() { + let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8); + let r = _mm256_movepi32_mask(a); + let e = 0b01101001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_movepi32_mask() { + let a = _mm512_set_epi32( + 0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16, + ); + let r = _mm512_movepi32_mask(a); + let e = 0b0110100100111100; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_movepi64_mask() { + let a = _mm_set_epi64x(0, -2); + let r = _mm_movepi64_mask(a); + let e = 0b01; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_movepi64_mask() { + let a = _mm256_set_epi64x(0, -2, -3, 4); + let r = _mm256_movepi64_mask(a); + let e = 0b0110; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_movepi64_mask() { + let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8); + let r = _mm512_movepi64_mask(a); + let e = 0b01101001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_movm_epi32() { + let a = 0b0110; + let r = _mm_movm_epi32(a); + let e = _mm_set_epi32(0, -1, -1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_movm_epi32() { + let a = 0b01101001; + let r = _mm256_movm_epi32(a); + let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_movm_epi32() { + let a = 0b0110100100111100; + let r = _mm512_movm_epi32(a); + let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_movm_epi64() { + let a = 0b01; + let r = _mm_movm_epi64(a); + let e = _mm_set_epi64x(0, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_movm_epi64() { + let a = 0b0110; + let r = _mm256_movm_epi64(a); + let e = _mm256_set_epi64x(0, -1, -1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_movm_epi64() { + let a = 0b01101001; + let r = _mm512_movm_epi64(a); + let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_range_round_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); + let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b); + let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_range_round_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); + let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b01101001, a, b); + let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_range_round_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); + let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b); + let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_range_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(2., 1.); + let r = _mm_range_pd::<0b0101>(a, b); + let e = _mm_set_pd(2., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_range_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(2., 1.); + let c = _mm_set_pd(3., 4.); + let r = _mm_mask_range_pd::<0b0101>(c, 0b01, a, b); + let e = _mm_set_pd(3., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_range_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(2., 1.); + let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b); + let e = _mm_set_pd(0., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_range_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(2., 1., 4., 3.); + let r = _mm256_range_pd::<0b0101>(a, b); + let e = _mm256_set_pd(2., 2., 4., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_range_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(2., 1., 4., 3.); + let c = _mm256_set_pd(5., 6., 7., 8.); + let r = _mm256_mask_range_pd::<0b0101>(c, 0b0110, a, b); + let e = _mm256_set_pd(5., 2., 4., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_range_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(2., 1., 4., 3.); + let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b); + let e = _mm256_set_pd(0., 2., 4., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_range_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); + let r = _mm512_range_pd::<0b0101>(a, b); + let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_range_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); + let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_range_pd::<0b0101>(c, 0b01101001, a, b); + let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_range_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); + let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b); + let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_range_round_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., + ); + let r = _mm512_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(a, b); + let e = _mm512_set_ps( + 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_range_round_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., + ); + let c = _mm512_set_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = + _mm512_mask_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0110100100111100, a, b); + let e = _mm512_set_ps( + 17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_range_round_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., + ); + let r = _mm512_maskz_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(0b0110100100111100, a, b); + let e = _mm512_set_ps( + 0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_range_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ps(2., 1., 4., 3.); + let r = _mm_range_ps::<0b0101>(a, b); + let e = _mm_set_ps(2., 2., 4., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_range_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ps(2., 1., 4., 3.); + let c = _mm_set_ps(5., 6., 7., 8.); + let r = _mm_mask_range_ps::<0b0101>(c, 0b0110, a, b); + let e = _mm_set_ps(5., 2., 4., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_range_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ps(2., 1., 4., 3.); + let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b); + let e = _mm_set_ps(0., 2., 4., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_range_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.); + let r = _mm256_range_ps::<0b0101>(a, b); + let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_range_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.); + let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm256_mask_range_ps::<0b0101>(c, 0b01101001, a, b); + let e = _mm256_set_ps(9., 2., 4., 12., 6., 14., 15., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_range_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.); + let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b); + let e = _mm256_set_ps(0., 2., 4., 0., 6., 0., 0., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_range_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., + ); + let r = _mm512_range_ps::<0b0101>(a, b); + let e = _mm512_set_ps( + 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_range_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., + ); + let c = _mm512_set_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = _mm512_mask_range_ps::<0b0101>(c, 0b0110100100111100, a, b); + let e = _mm512_set_ps( + 17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_range_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., + ); + let r = _mm512_maskz_range_ps::<0b0101>(0b0110100100111100, a, b); + let e = _mm512_set_ps( + 0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_range_round_sd() { + let a = _mm_set_sd(1.); + let b = _mm_set_sd(2.); + let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b); + let e = _mm_set_sd(2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_range_round_sd() { + let a = _mm_set_sd(1.); + let b = _mm_set_sd(2.); + let c = _mm_set_sd(3.); + let r = _mm_mask_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b); + let e = _mm_set_sd(3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_maskz_range_round_sd() { + let a = _mm_set_sd(1.); + let b = _mm_set_sd(2.); + let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b); + let e = _mm_set_sd(0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_range_sd() { + let a = _mm_set_sd(1.); + let b = _mm_set_sd(2.); + let c = _mm_set_sd(3.); + let r = _mm_mask_range_sd::<0b0101>(c, 0b0, a, b); + let e = _mm_set_sd(3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_maskz_range_sd() { + let a = _mm_set_sd(1.); + let b = _mm_set_sd(2.); + let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b); + let e = _mm_set_sd(0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_range_round_ss() { + let a = _mm_set_ss(1.); + let b = _mm_set_ss(2.); + let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b); + let e = _mm_set_ss(2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_range_round_ss() { + let a = _mm_set_ss(1.); + let b = _mm_set_ss(2.); + let c = _mm_set_ss(3.); + let r = _mm_mask_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b); + let e = _mm_set_ss(3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_maskz_range_round_ss() { + let a = _mm_set_ss(1.); + let b = _mm_set_ss(2.); + let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b); + let e = _mm_set_ss(0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_range_ss() { + let a = _mm_set_ss(1.); + let b = _mm_set_ss(2.); + let c = _mm_set_ss(3.); + let r = _mm_mask_range_ss::<0b0101>(c, 0b0, a, b); + let e = _mm_set_ss(3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_maskz_range_ss() { + let a = _mm_set_ss(1.); + let b = _mm_set_ss(2.); + let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b); + let e = _mm_set_ss(0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_reduce_round_pd() { + let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); + let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a); + let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_reduce_round_pd() { + let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); + let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.); + let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + src, 0b01101001, a, + ); + let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_reduce_round_pd() { + let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); + let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + 0b01101001, a, + ); + let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_reduce_pd() { + let a = _mm_set_pd(0.25, 0.50); + let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a); + let e = _mm_set_pd(0.25, 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_reduce_pd() { + let a = _mm_set_pd(0.25, 0.50); + let src = _mm_set_pd(3., 4.); + let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a); + let e = _mm_set_pd(3., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_reduce_pd() { + let a = _mm_set_pd(0.25, 0.50); + let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_reduce_pd() { + let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0); + let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a); + let e = _mm256_set_pd(0.25, 0., 0.25, 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_reduce_pd() { + let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0); + let src = _mm256_set_pd(3., 4., 5., 6.); + let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a); + let e = _mm256_set_pd(3., 0., 0.25, 6.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_reduce_pd() { + let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0); + let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a); + let e = _mm256_set_pd(0., 0., 0.25, 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_reduce_pd() { + let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); + let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a); + let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_reduce_pd() { + let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); + let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.); + let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a); + let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_reduce_pd() { + let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); + let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a); + let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_reduce_round_ps() { + let a = _mm512_set_ps( + 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, + 4.0, + ); + let r = _mm512_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a); + let e = _mm512_set_ps( + 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_reduce_round_ps() { + let a = _mm512_set_ps( + 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, + 4.0, + ); + let src = _mm512_set_ps( + 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., + ); + let r = _mm512_mask_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + src, + 0b0110100100111100, + a, + ); + let e = _mm512_set_ps( + 5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_reduce_round_ps() { + let a = _mm512_set_ps( + 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, + 4.0, + ); + let r = _mm512_maskz_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + 0b0110100100111100, + a, + ); + let e = _mm512_set_ps( + 0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_reduce_ps() { + let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0); + let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a); + let e = _mm_set_ps(0.25, 0., 0.25, 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_reduce_ps() { + let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0); + let src = _mm_set_ps(2., 3., 4., 5.); + let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a); + let e = _mm_set_ps(2., 0., 0.25, 5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_reduce_ps() { + let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0); + let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a); + let e = _mm_set_ps(0., 0., 0.25, 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_reduce_ps() { + let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); + let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a); + let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_reduce_ps() { + let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); + let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.); + let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a); + let e = _mm256_set_ps(3., 0., 0.25, 6., 0.25, 8., 9., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_reduce_ps() { + let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); + let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a); + let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_reduce_ps() { + let a = _mm512_set_ps( + 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, + 4.0, + ); + let r = _mm512_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a); + let e = _mm512_set_ps( + 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_reduce_ps() { + let a = _mm512_set_ps( + 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, + 4.0, + ); + let src = _mm512_set_ps( + 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., + ); + let r = _mm512_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110100100111100, a); + let e = _mm512_set_ps( + 5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_reduce_ps() { + let a = _mm512_set_ps( + 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, + 4.0, + ); + let r = _mm512_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110100100111100, a); + let e = _mm512_set_ps( + 0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_reduce_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_sd(0.25); + let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b); + let e = _mm_set_pd(1., 0.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_reduce_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_sd(0.25); + let c = _mm_set_pd(3., 4.); + let r = _mm_mask_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + c, 0b0, a, b, + ); + let e = _mm_set_pd(1., 4.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_maskz_reduce_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_sd(0.25); + let r = + _mm_maskz_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_reduce_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_sd(0.25); + let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b); + let e = _mm_set_pd(1., 0.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_reduce_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_sd(0.25); + let c = _mm_set_pd(3., 4.); + let r = _mm_mask_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b); + let e = _mm_set_pd(1., 4.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_maskz_reduce_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_sd(0.25); + let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_reduce_round_ss() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ss(0.25); + let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b); + let e = _mm_set_ps(1., 2., 3., 0.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_reduce_round_ss() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ss(0.25); + let c = _mm_set_ps(5., 6., 7., 8.); + let r = _mm_mask_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + c, 0b0, a, b, + ); + let e = _mm_set_ps(1., 2., 3., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_maskz_reduce_round_ss() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ss(0.25); + let r = + _mm_maskz_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b); + let e = _mm_set_ps(1., 2., 3., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_reduce_ss() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ss(0.25); + let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b); + let e = _mm_set_ps(1., 2., 3., 0.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_reduce_ss() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ss(0.25); + let c = _mm_set_ps(5., 6., 7., 8.); + let r = _mm_mask_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b); + let e = _mm_set_ps(1., 2., 3., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_maskz_reduce_ss() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ss(0.25); + let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b); + let e = _mm_set_ps(1., 2., 3., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_fpclass_pd_mask() { + let a = _mm_set_pd(1., f64::INFINITY); + let r = _mm_fpclass_pd_mask::<0x18>(a); + let e = 0b01; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_fpclass_pd_mask() { + let a = _mm_set_pd(1., f64::INFINITY); + let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a); + let e = 0b00; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_fpclass_pd_mask() { + let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0); + let r = _mm256_fpclass_pd_mask::<0x18>(a); + let e = 0b0110; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_fpclass_pd_mask() { + let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0); + let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a); + let e = 0b0010; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_fpclass_pd_mask() { + let a = _mm512_set_pd( + 1., + f64::INFINITY, + f64::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f64::NAN, + 1.0e-308, + ); + let r = _mm512_fpclass_pd_mask::<0x18>(a); + let e = 0b01100000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_fpclass_pd_mask() { + let a = _mm512_set_pd( + 1., + f64::INFINITY, + f64::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f64::NAN, + 1.0e-308, + ); + let r = _mm512_mask_fpclass_pd_mask::<0x18>(0b10101010, a); + let e = 0b00100000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_fpclass_ps_mask() { + let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0); + let r = _mm_fpclass_ps_mask::<0x18>(a); + let e = 0b0110; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_fpclass_ps_mask() { + let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0); + let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a); + let e = 0b0010; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_fpclass_ps_mask() { + let a = _mm256_set_ps( + 1., + f32::INFINITY, + f32::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f32::NAN, + 1.0e-38, + ); + let r = _mm256_fpclass_ps_mask::<0x18>(a); + let e = 0b01100000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_fpclass_ps_mask() { + let a = _mm256_set_ps( + 1., + f32::INFINITY, + f32::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f32::NAN, + 1.0e-38, + ); + let r = _mm256_mask_fpclass_ps_mask::<0x18>(0b10101010, a); + let e = 0b00100000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_fpclass_ps_mask() { + let a = _mm512_set_ps( + 1., + f32::INFINITY, + f32::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f32::NAN, + 1.0e-38, + -1., + f32::NEG_INFINITY, + f32::INFINITY, + -0.0, + 0.0, + 2.0, + f32::NAN, + -1.0e-38, + ); + let r = _mm512_fpclass_ps_mask::<0x18>(a); + let e = 0b0110000001100000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_fpclass_ps_mask() { + let a = _mm512_set_ps( + 1., + f32::INFINITY, + f32::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f32::NAN, + 1.0e-38, + -1., + f32::NEG_INFINITY, + f32::INFINITY, + -0.0, + 0.0, + 2.0, + f32::NAN, + -1.0e-38, + ); + let r = _mm512_mask_fpclass_ps_mask::<0x18>(0b1010101010101010, a); + let e = 0b0010000000100000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_fpclass_sd_mask() { + let a = _mm_set_pd(1., f64::INFINITY); + let r = _mm_fpclass_sd_mask::<0x18>(a); + let e = 0b1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_fpclass_sd_mask() { + let a = _mm_set_sd(f64::INFINITY); + let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a); + let e = 0b0; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_fpclass_ss_mask() { + let a = _mm_set_ss(f32::INFINITY); + let r = _mm_fpclass_ss_mask::<0x18>(a); + let e = 0b1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm_mask_fpclass_ss_mask() { + let a = _mm_set_ss(f32::INFINITY); + let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a); + let e = 0b0; + assert_eq!(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs new file mode 100644 index 000000000000..dd224616764d --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -0,0 +1,60683 @@ +use crate::{ + arch::asm, + core_arch::{simd::*, x86::*}, + intrinsics::simd::*, + intrinsics::{fmaf32, fmaf64}, + mem, ptr, +}; + +use core::hint::unreachable_unchecked; +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Computes the absolute values of packed 32-bit integers in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsd))] +pub fn _mm512_abs_epi32(a: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let r = simd_select::(simd_lt(a, i32x16::ZERO), simd_neg(a), a); + transmute(r) + } +} + +/// Computes the absolute value of packed 32-bit integers in `a`, and store the +/// unsigned results in `dst` using writemask `k` (elements are copied from +/// `src` when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsd))] +pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, abs, src.as_i32x16())) + } +} + +/// Computes the absolute value of packed 32-bit integers in `a`, and store the +/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when +/// the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsd))] +pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, abs, i32x16::ZERO)) + } +} + +/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsd))] +pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, abs, src.as_i32x8())) + } +} + +/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsd))] +pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, abs, i32x8::ZERO)) + } +} + +/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsd))] +pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, abs, src.as_i32x4())) + } +} + +/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsd))] +pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, abs, i32x4::ZERO)) + } +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub fn _mm512_abs_epi64(a: __m512i) -> __m512i { + unsafe { + let a = a.as_i64x8(); + let r = simd_select::(simd_lt(a, i64x8::ZERO), simd_neg(a), a); + transmute(r) + } +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, abs, src.as_i64x8())) + } +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, abs, i64x8::ZERO)) + } +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub fn _mm256_abs_epi64(a: __m256i) -> __m256i { + unsafe { + let a = a.as_i64x4(); + let r = simd_select::(simd_lt(a, i64x4::ZERO), simd_neg(a), a); + transmute(r) + } +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, abs, src.as_i64x4())) + } +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, abs, i64x4::ZERO)) + } +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub fn _mm_abs_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i64x2(); + let r = simd_select::(simd_lt(a, i64x2::ZERO), simd_neg(a), a); + transmute(r) + } +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, abs, src.as_i64x2())) + } +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, abs, i64x2::ZERO)) + } +} + +/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandd))] +pub fn _mm512_abs_ps(v2: __m512) -> __m512 { + unsafe { simd_fabs(v2) } +} + +/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandd))] +pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, simd_fabs(v2), src) } +} + +/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm512_abs_pd(v2: __m512d) -> __m512d { + unsafe { simd_fabs(v2) } +} + +/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, simd_fabs(v2), src) } +} + +/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i32x16(); + transmute(simd_select_bitmask(k, mov, src.as_i32x16())) + } +} + +/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i32x16(); + transmute(simd_select_bitmask(k, mov, i32x16::ZERO)) + } +} + +/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i32x8(); + transmute(simd_select_bitmask(k, mov, src.as_i32x8())) + } +} + +/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i32x8(); + transmute(simd_select_bitmask(k, mov, i32x8::ZERO)) + } +} + +/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i32x4(); + transmute(simd_select_bitmask(k, mov, src.as_i32x4())) + } +} + +/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i32x4(); + transmute(simd_select_bitmask(k, mov, i32x4::ZERO)) + } +} + +/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i64x8(); + transmute(simd_select_bitmask(k, mov, src.as_i64x8())) + } +} + +/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i64x8(); + transmute(simd_select_bitmask(k, mov, i64x8::ZERO)) + } +} + +/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i64x4(); + transmute(simd_select_bitmask(k, mov, src.as_i64x4())) + } +} + +/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i64x4(); + transmute(simd_select_bitmask(k, mov, i64x4::ZERO)) + } +} + +/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i64x2(); + transmute(simd_select_bitmask(k, mov, src.as_i64x2())) + } +} + +/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i64x2(); + transmute(simd_select_bitmask(k, mov, i64x2::ZERO)) + } +} + +/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovaps))] +pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov = a.as_f32x16(); + transmute(simd_select_bitmask(k, mov, src.as_f32x16())) + } +} + +/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovaps))] +pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov = a.as_f32x16(); + transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) + } +} + +/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovaps))] +pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = a.as_f32x8(); + transmute(simd_select_bitmask(k, mov, src.as_f32x8())) + } +} + +/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovaps))] +pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = a.as_f32x8(); + transmute(simd_select_bitmask(k, mov, f32x8::ZERO)) + } +} + +/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovaps))] +pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = a.as_f32x4(); + transmute(simd_select_bitmask(k, mov, src.as_f32x4())) + } +} + +/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovaps))] +pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = a.as_f32x4(); + transmute(simd_select_bitmask(k, mov, f32x4::ZERO)) + } +} + +/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovapd))] +pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let mov = a.as_f64x8(); + transmute(simd_select_bitmask(k, mov, src.as_f64x8())) + } +} + +/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovapd))] +pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let mov = a.as_f64x8(); + transmute(simd_select_bitmask(k, mov, f64x8::ZERO)) + } +} + +/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovapd))] +pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + let mov = a.as_f64x4(); + transmute(simd_select_bitmask(k, mov, src.as_f64x4())) + } +} + +/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovapd))] +pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + let mov = a.as_f64x4(); + transmute(simd_select_bitmask(k, mov, f64x4::ZERO)) + } +} + +/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovapd))] +pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + let mov = a.as_f64x2(); + transmute(simd_select_bitmask(k, mov, src.as_f64x2())) + } +} + +/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovapd))] +pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { + let mov = a.as_f64x2(); + transmute(simd_select_bitmask(k, mov, f64x2::ZERO)) + } +} + +/// Add packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddd))] +pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) } +} + +/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddd))] +pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, add, src.as_i32x16())) + } +} + +/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddd))] +pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, add, i32x16::ZERO)) + } +} + +/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddd))] +pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, add, src.as_i32x8())) + } +} + +/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddd))] +pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, add, i32x8::ZERO)) + } +} + +/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddd))] +pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, add, src.as_i32x4())) + } +} + +/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddd))] +pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, add, i32x4::ZERO)) + } +} + +/// Add packed 64-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddq))] +pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) } +} + +/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddq))] +pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, add, src.as_i64x8())) + } +} + +/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddq))] +pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, add, i64x8::ZERO)) + } +} + +/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddq))] +pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, add, src.as_i64x4())) + } +} + +/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddq))] +pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, add, i64x4::ZERO)) + } +} + +/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddq))] +pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, add, src.as_i64x2())) + } +} + +/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpaddq))] +pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, add, i64x2::ZERO)) + } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps))] +pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps))] +pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let add = _mm512_add_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, add, src.as_f32x16())) + } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps))] +pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let add = _mm512_add_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, add, f32x16::ZERO)) + } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps))] +pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let add = _mm256_add_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, add, src.as_f32x8())) + } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps))] +pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let add = _mm256_add_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, add, f32x8::ZERO)) + } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps))] +pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let add = _mm_add_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, add, src.as_f32x4())) + } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps))] +pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let add = _mm_add_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, add, f32x4::ZERO)) + } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd))] +pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd))] +pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let add = _mm512_add_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, add, src.as_f64x8())) + } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd))] +pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let add = _mm512_add_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, add, f64x8::ZERO)) + } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd))] +pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let add = _mm256_add_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, add, src.as_f64x4())) + } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd))] +pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let add = _mm256_add_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, add, f64x4::ZERO)) + } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd))] +pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let add = _mm_add_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, add, src.as_f64x2())) + } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd))] +pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let add = _mm_add_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, add, f64x2::ZERO)) + } +} + +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubd))] +pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) } +} + +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubd))] +pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, sub, src.as_i32x16())) + } +} + +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubd))] +pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, sub, i32x16::ZERO)) + } +} + +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubd))] +pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, sub, src.as_i32x8())) + } +} + +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubd))] +pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, sub, i32x8::ZERO)) + } +} + +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubd))] +pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, sub, src.as_i32x4())) + } +} + +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubd))] +pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, sub, i32x4::ZERO)) + } +} + +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubq))] +pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) } +} + +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubq))] +pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, sub, src.as_i64x8())) + } +} + +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubq))] +pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, sub, i64x8::ZERO)) + } +} + +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubq))] +pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, sub, src.as_i64x4())) + } +} + +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubq))] +pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, sub, i64x4::ZERO)) + } +} + +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubq))] +pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, sub, src.as_i64x2())) + } +} + +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsubq))] +pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, sub, i64x2::ZERO)) + } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps))] +pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps))] +pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let sub = _mm512_sub_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, sub, src.as_f32x16())) + } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps))] +pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let sub = _mm512_sub_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, sub, f32x16::ZERO)) + } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps))] +pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let sub = _mm256_sub_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, sub, src.as_f32x8())) + } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps))] +pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let sub = _mm256_sub_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, sub, f32x8::ZERO)) + } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps))] +pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let sub = _mm_sub_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, sub, src.as_f32x4())) + } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps))] +pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let sub = _mm_sub_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, sub, f32x4::ZERO)) + } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd))] +pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd))] +pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let sub = _mm512_sub_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, sub, src.as_f64x8())) + } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd))] +pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let sub = _mm512_sub_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, sub, f64x8::ZERO)) + } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd))] +pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let sub = _mm256_sub_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, sub, src.as_f64x4())) + } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd))] +pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let sub = _mm256_sub_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, sub, f64x4::ZERO)) + } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd))] +pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let sub = _mm_sub_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, sub, src.as_f64x2())) + } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd))] +pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let sub = _mm_sub_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, sub, f64x2::ZERO)) + } +} + +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuldq))] +pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8())); + let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8())); + transmute(simd_mul(a, b)) + } +} + +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuldq))] +pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mul_epi32(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, mul, src.as_i64x8())) + } +} + +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuldq))] +pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mul_epi32(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, mul, i64x8::ZERO)) + } +} + +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuldq))] +pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mul_epi32(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, mul, src.as_i64x4())) + } +} + +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuldq))] +pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mul_epi32(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, mul, i64x4::ZERO)) + } +} + +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuldq))] +pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mul_epi32(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, mul, src.as_i64x2())) + } +} + +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuldq))] +pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mul_epi32(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, mul, i64x2::ZERO)) + } +} + +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulld))] +pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) } +} + +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulld))] +pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullo_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, mul, src.as_i32x16())) + } +} + +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulld))] +pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullo_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, mul, i32x16::ZERO)) + } +} + +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulld))] +pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mullo_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, mul, src.as_i32x8())) + } +} + +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulld))] +pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mullo_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, mul, i32x8::ZERO)) + } +} + +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulld))] +pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mullo_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, mul, src.as_i32x4())) + } +} + +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmulld))] +pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mullo_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, mul, i32x4::ZERO)) + } +} + +/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017) +/// +/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic. +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) } +} + +/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016) +/// +/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic. +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullox_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, mul, src.as_i64x8())) + } +} + +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuludq))] +pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u64x8(); + let b = b.as_u64x8(); + let mask = u64x8::splat(u32::MAX.into()); + transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) + } +} + +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuludq))] +pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mul_epu32(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, mul, src.as_u64x8())) + } +} + +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuludq))] +pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mul_epu32(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, mul, u64x8::ZERO)) + } +} + +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuludq))] +pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mul_epu32(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, mul, src.as_u64x4())) + } +} + +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuludq))] +pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mul_epu32(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, mul, u64x4::ZERO)) + } +} + +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuludq))] +pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mul_epu32(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, mul, src.as_u64x2())) + } +} + +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmuludq))] +pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mul_epu32(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, mul, u64x2::ZERO)) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps))] +pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps))] +pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let mul = _mm512_mul_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, mul, src.as_f32x16())) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps))] +pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let mul = _mm512_mul_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, mul, f32x16::ZERO)) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps))] +pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let mul = _mm256_mul_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, mul, src.as_f32x8())) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps))] +pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let mul = _mm256_mul_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, mul, f32x8::ZERO)) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps))] +pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mul = _mm_mul_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, mul, src.as_f32x4())) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps))] +pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mul = _mm_mul_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, mul, f32x4::ZERO)) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd))] +pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd))] +pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let mul = _mm512_mul_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, mul, src.as_f64x8())) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd))] +pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let mul = _mm512_mul_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, mul, f64x8::ZERO)) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd))] +pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let mul = _mm256_mul_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, mul, src.as_f64x4())) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd))] +pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let mul = _mm256_mul_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, mul, f64x4::ZERO)) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd))] +pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mul = _mm_mul_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, mul, src.as_f64x2())) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd))] +pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mul = _mm_mul_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, mul, f64x2::ZERO)) + } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps))] +pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps))] +pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let div = _mm512_div_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, div, src.as_f32x16())) + } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps))] +pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let div = _mm512_div_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, div, f32x16::ZERO)) + } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps))] +pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let div = _mm256_div_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, div, src.as_f32x8())) + } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps))] +pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let div = _mm256_div_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, div, f32x8::ZERO)) + } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps))] +pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let div = _mm_div_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, div, src.as_f32x4())) + } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps))] +pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let div = _mm_div_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, div, f32x4::ZERO)) + } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd))] +pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd))] +pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let div = _mm512_div_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, div, src.as_f64x8())) + } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd))] +pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let div = _mm512_div_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, div, f64x8::ZERO)) + } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd))] +pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let div = _mm256_div_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, div, src.as_f64x4())) + } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd))] +pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let div = _mm256_div_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, div, f64x4::ZERO)) + } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd))] +pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let div = _mm_div_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, div, src.as_f64x2())) + } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd))] +pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let div = _mm_div_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, div, f64x2::ZERO)) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let b = b.as_i32x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, max, src.as_i32x16())) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, max, i32x16::ZERO)) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, max, src.as_i32x8())) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, max, i32x8::ZERO)) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, max, src.as_i32x4())) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, max, i32x4::ZERO)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i64x8(); + let b = b.as_i64x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, max, src.as_i64x8())) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, max, i64x8::ZERO)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i64x4(); + let b = b.as_i64x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, max, src.as_i64x4())) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, max, i64x4::ZERO)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i64x2(); + let b = b.as_i64x2(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, max, src.as_i64x2())) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, max, i64x2::ZERO)) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps))] +pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vmaxps( + a.as_f32x16(), + b.as_f32x16(), + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps))] +pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let max = _mm512_max_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, max, src.as_f32x16())) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps))] +pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let max = _mm512_max_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, max, f32x16::ZERO)) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps))] +pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let max = _mm256_max_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, max, src.as_f32x8())) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps))] +pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let max = _mm256_max_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, max, f32x8::ZERO)) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps))] +pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let max = _mm_max_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, max, src.as_f32x4())) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps))] +pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let max = _mm_max_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, max, f32x4::ZERO)) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd))] +pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd))] +pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let max = _mm512_max_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, max, src.as_f64x8())) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd))] +pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let max = _mm512_max_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, max, f64x8::ZERO)) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd))] +pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let max = _mm256_max_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, max, src.as_f64x4())) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd))] +pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let max = _mm256_max_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, max, f64x4::ZERO)) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd))] +pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let max = _mm_max_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, max, src.as_f64x2())) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd))] +pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let max = _mm_max_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, max, f64x2::ZERO)) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxud))] +pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u32x16(); + let b = b.as_u32x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxud))] +pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, max, src.as_u32x16())) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxud))] +pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, max, u32x16::ZERO)) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxud))] +pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu32(a, b).as_u32x8(); + transmute(simd_select_bitmask(k, max, src.as_u32x8())) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxud))] +pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu32(a, b).as_u32x8(); + transmute(simd_select_bitmask(k, max, u32x8::ZERO)) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxud))] +pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu32(a, b).as_u32x4(); + transmute(simd_select_bitmask(k, max, src.as_u32x4())) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxud))] +pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu32(a, b).as_u32x4(); + transmute(simd_select_bitmask(k, max, u32x4::ZERO)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u64x8(); + let b = b.as_u64x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, max, src.as_u64x8())) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, max, u64x8::ZERO)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u64x4(); + let b = b.as_u64x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu64(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, max, src.as_u64x4())) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu64(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, max, u64x4::ZERO)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u64x2(); + let b = b.as_u64x2(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu64(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, max, src.as_u64x2())) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu64(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, max, u64x2::ZERO)) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsd))] +pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let b = b.as_i32x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsd))] +pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, min, src.as_i32x16())) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsd))] +pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, min, i32x16::ZERO)) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsd))] +pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, min, src.as_i32x8())) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsd))] +pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, min, i32x8::ZERO)) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsd))] +pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, min, src.as_i32x4())) + } +} + +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsd))] +pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, min, i32x4::ZERO)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i64x8(); + let b = b.as_i64x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, min, src.as_i64x8())) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, min, i64x8::ZERO)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i64x4(); + let b = b.as_i64x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, min, src.as_i64x4())) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, min, i64x4::ZERO)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i64x2(); + let b = b.as_i64x2(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, min, src.as_i64x2())) + } +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, min, i64x2::ZERO)) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps))] +pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vminps( + a.as_f32x16(), + b.as_f32x16(), + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps))] +pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let min = _mm512_min_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, min, src.as_f32x16())) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps))] +pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let min = _mm512_min_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, min, f32x16::ZERO)) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps))] +pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let min = _mm256_min_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, min, src.as_f32x8())) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps))] +pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let min = _mm256_min_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, min, f32x8::ZERO)) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps))] +pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let min = _mm_min_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, min, src.as_f32x4())) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps))] +pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let min = _mm_min_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, min, f32x4::ZERO)) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd))] +pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd))] +pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let min = _mm512_min_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, min, src.as_f64x8())) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd))] +pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let min = _mm512_min_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, min, f64x8::ZERO)) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd))] +pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let min = _mm256_min_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, min, src.as_f64x4())) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd))] +pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let min = _mm256_min_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, min, f64x4::ZERO)) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd))] +pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let min = _mm_min_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, min, src.as_f64x2())) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd))] +pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let min = _mm_min_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, min, f64x2::ZERO)) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminud))] +pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u32x16(); + let b = b.as_u32x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminud))] +pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, min, src.as_u32x16())) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminud))] +pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, min, u32x16::ZERO)) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminud))] +pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu32(a, b).as_u32x8(); + transmute(simd_select_bitmask(k, min, src.as_u32x8())) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminud))] +pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu32(a, b).as_u32x8(); + transmute(simd_select_bitmask(k, min, u32x8::ZERO)) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminud))] +pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu32(a, b).as_u32x4(); + transmute(simd_select_bitmask(k, min, src.as_u32x4())) + } +} + +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminud))] +pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu32(a, b).as_u32x4(); + transmute(simd_select_bitmask(k, min, u32x4::ZERO)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuq))] +pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u64x8(); + let b = b.as_u64x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuq))] +pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, min, src.as_u64x8())) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuq))] +pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, min, u64x8::ZERO)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuq))] +pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u64x4(); + let b = b.as_u64x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuq))] +pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu64(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, min, src.as_u64x4())) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuq))] +pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu64(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, min, u64x4::ZERO)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuq))] +pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u64x2(); + let b = b.as_u64x2(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuq))] +pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu64(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, min, src.as_u64x2())) + } +} + +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpminuq))] +pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu64(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, min, u64x2::ZERO)) + } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps))] +pub fn _mm512_sqrt_ps(a: __m512) -> __m512 { + unsafe { simd_fsqrt(a) } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps))] +pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps))] +pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps))] +pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps))] +pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps))] +pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps))] +pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d { + unsafe { simd_fsqrt(a) } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_fma(a, b, c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_fma(a, b, c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_fma(a, b, simd_neg(c)) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub +pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_fma(a, b, simd_neg(c)) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub +pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!( + add, + sub, + [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15] + ) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!( + add, + sub, + [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31] + ) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15]) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_fma(simd_neg(a), b, c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_fma(simd_neg(a), b, c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) } +} + +/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ps))] +pub fn _mm512_rcp14_ps(a: __m512) -> __m512 { + unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) } +} + +/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ps))] +pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) } +} + +/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ps))] +pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) } +} + +/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ps))] +pub fn _mm256_rcp14_ps(a: __m256) -> __m256 { + unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) } +} + +/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ps))] +pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) } +} + +/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ps))] +pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) } +} + +/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ps))] +pub fn _mm_rcp14_ps(a: __m128) -> __m128 { + unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) } +} + +/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ps))] +pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) } +} + +/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ps))] +pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) } +} + +/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14pd))] +pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d { + unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) } +} + +/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14pd))] +pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) } +} + +/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14pd))] +pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) } +} + +/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14pd))] +pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d { + unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) } +} + +/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14pd))] +pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) } +} + +/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14pd))] +pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) } +} + +/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14pd))] +pub fn _mm_rcp14_pd(a: __m128d) -> __m128d { + unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) } +} + +/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14pd))] +pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) } +} + +/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14pd))] +pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) } +} + +/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ps))] +pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 { + unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) } +} + +/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ps))] +pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) } +} + +/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ps))] +pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) } +} + +/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ps))] +pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 { + unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) } +} + +/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ps))] +pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) } +} + +/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ps))] +pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) } +} + +/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ps))] +pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) } +} + +/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ps))] +pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) } +} + +/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ps))] +pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) } +} + +/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14pd))] +pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d { + unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) } +} + +/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14pd))] +pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) } +} + +/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14pd))] +pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) } +} + +/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14pd))] +pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d { + unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) } +} + +/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14pd))] +pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) } +} + +/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14pd))] +pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) } +} + +/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14pd))] +pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) } +} + +/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14pd))] +pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) } +} + +/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14pd))] +pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps))] +pub fn _mm512_getexp_ps(a: __m512) -> __m512 { + unsafe { + transmute(vgetexpps( + a.as_f32x16(), + f32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps))] +pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + transmute(vgetexpps( + a.as_f32x16(), + src.as_f32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps))] +pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + transmute(vgetexpps( + a.as_f32x16(), + f32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps))] +pub fn _mm256_getexp_ps(a: __m256) -> __m256 { + unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps))] +pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps))] +pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps))] +pub fn _mm_getexp_ps(a: __m128) -> __m128 { + unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps))] +pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps))] +pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd))] +pub fn _mm512_getexp_pd(a: __m512d) -> __m512d { + unsafe { + transmute(vgetexppd( + a.as_f64x8(), + f64x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd))] +pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + transmute(vgetexppd( + a.as_f64x8(), + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd))] +pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + transmute(vgetexppd( + a.as_f64x8(), + f64x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd))] +pub fn _mm256_getexp_pd(a: __m256d) -> __m256d { + unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd))] +pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd))] +pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd))] +pub fn _mm_getexp_pd(a: __m128d) -> __m128d { + unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd))] +pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd))] +pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_roundscale_ps(a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let r = vrndscaleps( + a, + IMM8, + f32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_roundscale_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))] +#[rustc_legacy_const_generics(1)] +pub fn _mm256_roundscale_ps(a: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_roundscale_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let src = src.as_f32x8(); + let r = vrndscaleps256(a, IMM8, src, k); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_roundscale_ps(a: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_roundscale_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let src = src.as_f32x4(); + let r = vrndscaleps128(a, IMM8, src, k); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_roundscale_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_roundscale_pd( + src: __m512d, + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub fn _mm256_roundscale_pd(a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_roundscale_pd( + src: __m256d, + k: __mmask8, + a: __m256d, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let src = src.as_f64x4(); + let r = vrndscalepd256(a, IMM8, src, k); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_roundscale_pd(a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_roundscale_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let src = src.as_f64x2(); + let r = vrndscalepd128(a, IMM8, src, k); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_roundscale_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k); + transmute(r) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps))] +pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vscalefps( + a.as_f32x16(), + b.as_f32x16(), + f32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps))] +pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vscalefps( + a.as_f32x16(), + b.as_f32x16(), + src.as_f32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps))] +pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vscalefps( + a.as_f32x16(), + b.as_f32x16(), + f32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps))] +pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + transmute(vscalefps256( + a.as_f32x8(), + b.as_f32x8(), + f32x8::ZERO, + 0b11111111, + )) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps))] +pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps))] +pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps))] +pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vscalefps128( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + 0b00001111, + )) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps))] +pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps))] +pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd))] +pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + transmute(vscalefpd( + a.as_f64x8(), + b.as_f64x8(), + f64x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd))] +pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + transmute(vscalefpd( + a.as_f64x8(), + b.as_f64x8(), + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd))] +pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + transmute(vscalefpd( + a.as_f64x8(), + b.as_f64x8(), + f64x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd))] +pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + transmute(vscalefpd256( + a.as_f64x4(), + b.as_f64x4(), + f64x4::ZERO, + 0b00001111, + )) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd))] +pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd))] +pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd))] +pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vscalefpd128( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + 0b00000011, + )) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd))] +pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd))] +pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m512i) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fixupimm_ps( + a: __m512, + k: __mmask16, + b: __m512, + c: __m512i, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fixupimm_ps( + k: __mmask16, + a: __m512, + b: __m512, + c: __m512i, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_fixupimm_ps(a: __m256, b: __m256, c: __m256i) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let c = c.as_i32x8(); + let r = vfixupimmps256(a, b, c, IMM8, 0b11111111); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_fixupimm_ps( + a: __m256, + k: __mmask8, + b: __m256, + c: __m256i, +) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let c = c.as_i32x8(); + let r = vfixupimmps256(a, b, c, IMM8, k); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_maskz_fixupimm_ps( + k: __mmask8, + a: __m256, + b: __m256, + c: __m256i, +) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let c = c.as_i32x8(); + let r = vfixupimmpsz256(a, b, c, IMM8, k); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fixupimm_ps(a: __m128, b: __m128, c: __m128i) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmps128(a, b, c, IMM8, 0b00001111); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fixupimm_ps( + a: __m128, + k: __mmask8, + b: __m128, + c: __m128i, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmps128(a, b, c, IMM8, k); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fixupimm_ps( + k: __mmask8, + a: __m128, + b: __m128, + c: __m128i, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmpsz128(a, b, c, IMM8, k); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m512i) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fixupimm_pd( + a: __m512d, + k: __mmask8, + b: __m512d, + c: __m512i, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fixupimm_pd( + k: __mmask8, + a: __m512d, + b: __m512d, + c: __m512i, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_fixupimm_pd(a: __m256d, b: __m256d, c: __m256i) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let c = c.as_i64x4(); + let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_fixupimm_pd( + a: __m256d, + k: __mmask8, + b: __m256d, + c: __m256i, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let c = c.as_i64x4(); + let r = vfixupimmpd256(a, b, c, IMM8, k); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_maskz_fixupimm_pd( + k: __mmask8, + a: __m256d, + b: __m256d, + c: __m256i, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let c = c.as_i64x4(); + let r = vfixupimmpdz256(a, b, c, IMM8, k); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fixupimm_pd(a: __m128d, b: __m128d, c: __m128i) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fixupimm_pd( + a: __m128d, + k: __mmask8, + b: __m128d, + c: __m128i, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmpd128(a, b, c, IMM8, k); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fixupimm_pd( + k: __mmask8, + a: __m128d, + b: __m128d, + c: __m128i, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmpdz128(a, b, c, IMM8, k); + transmute(r) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_ternarylogic_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let c = c.as_i32x16(); + let r = vpternlogd(a, b, c, IMM8); + transmute(r) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_ternarylogic_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i32x16(); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let r = vpternlogd(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_ternarylogic_epi32( + k: __mmask16, + a: __m512i, + b: __m512i, + c: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let c = c.as_i32x16(); + let r = vpternlogd(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_ternarylogic_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let c = c.as_i32x8(); + let r = vpternlogd256(a, b, c, IMM8); + transmute(r) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_ternarylogic_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i32x8(); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let r = vpternlogd256(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_maskz_ternarylogic_epi32( + k: __mmask8, + a: __m256i, + b: __m256i, + c: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let c = c.as_i32x8(); + let r = vpternlogd256(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_ternarylogic_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let c = c.as_i32x4(); + let r = vpternlogd128(a, b, c, IMM8); + transmute(r) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_ternarylogic_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i32x4(); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let r = vpternlogd128(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_ternarylogic_epi32( + k: __mmask8, + a: __m128i, + b: __m128i, + c: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let c = c.as_i32x4(); + let r = vpternlogd128(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_ternarylogic_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let c = c.as_i64x8(); + let r = vpternlogq(a, b, c, IMM8); + transmute(r) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_ternarylogic_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i64x8(); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let r = vpternlogq(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_ternarylogic_epi64( + k: __mmask8, + a: __m512i, + b: __m512i, + c: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let c = c.as_i64x8(); + let r = vpternlogq(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_ternarylogic_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let c = c.as_i64x4(); + let r = vpternlogq256(a, b, c, IMM8); + transmute(r) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_ternarylogic_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i64x4(); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let r = vpternlogq256(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_maskz_ternarylogic_epi64( + k: __mmask8, + a: __m256i, + b: __m256i, + c: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let c = c.as_i64x4(); + let r = vpternlogq256(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_ternarylogic_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let c = c.as_i64x2(); + let r = vpternlogq128(a, b, c, IMM8); + transmute(r) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_ternarylogic_epi64( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i64x2(); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let r = vpternlogq128(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } +} + +/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_ternarylogic_epi64( + k: __mmask8, + a: __m128i, + b: __m128i, + c: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let c = c.as_i64x2(); + let r = vpternlogq128(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign. +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// The sign is determined by sc which can take the following values: +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(1, 2)] +pub fn _mm512_getmant_ps( + a: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x16(); + let zero = f32x16::ZERO; + let r = vgetmantps( + a, + SIGN << 2 | NORM, + zero, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm512_mask_getmant_ps< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m512, + k: __mmask16, + a: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm512_maskz_getmant_ps< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask16, + a: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x16(); + let r = vgetmantps( + a, + SIGN << 2 | NORM, + f32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign. +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// The sign is determined by sc which can take the following values: +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(1, 2)] +pub fn _mm256_getmant_ps( + a: __m256, +) -> __m256 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x8(); + let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm256_mask_getmant_ps< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m256, + k: __mmask8, + a: __m256, +) -> __m256 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x8(); + let src = src.as_f32x8(); + let r = vgetmantps256(a, SIGN << 2 | NORM, src, k); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm256_maskz_getmant_ps< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask8, + a: __m256, +) -> __m256 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x8(); + let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign. +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// The sign is determined by sc which can take the following values: +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(1, 2)] +pub fn _mm_getmant_ps( + a: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_mask_getmant_ps< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m128, + k: __mmask8, + a: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let src = src.as_f32x4(); + let r = vgetmantps128(a, SIGN << 2 | NORM, src, k); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_maskz_getmant_ps< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask8, + a: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(1, 2)] +pub fn _mm512_getmant_pd( + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x8(); + let zero = f64x8::ZERO; + let r = vgetmantpd( + a, + SIGN << 2 | NORM, + zero, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm512_mask_getmant_pd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m512d, + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm512_maskz_getmant_pd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x8(); + let r = vgetmantpd( + a, + SIGN << 2 | NORM, + f64x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(1, 2)] +pub fn _mm256_getmant_pd( + a: __m256d, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x4(); + let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm256_mask_getmant_pd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m256d, + k: __mmask8, + a: __m256d, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x4(); + let src = src.as_f64x4(); + let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm256_maskz_getmant_pd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask8, + a: __m256d, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x4(); + let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(1, 2)] +pub fn _mm_getmant_pd( + a: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_mask_getmant_pd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m128d, + k: __mmask8, + a: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let src = src.as_f64x2(); + let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_maskz_getmant_pd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask8, + a: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k); + transmute(r) + } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_add_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vaddps(a, b, ROUNDING); + transmute(r) + } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_add_round_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vaddps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } +} + +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_add_round_ps( + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vaddps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_add_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vaddpd(a, b, ROUNDING); + transmute(r) + } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_add_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vaddpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } +} + +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_add_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vaddpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_sub_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vsubps(a, b, ROUNDING); + transmute(r) + } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_sub_round_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vsubps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } +} + +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_sub_round_ps( + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vsubps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_sub_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vsubpd(a, b, ROUNDING); + transmute(r) + } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_sub_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vsubpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_sub_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vsubpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_mul_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmulps(a, b, ROUNDING); + transmute(r) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_mul_round_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmulps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_mul_round_ps( + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmulps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_mul_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmulpd(a, b, ROUNDING); + transmute(r) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_mul_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmulpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_mul_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmulpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_div_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vdivps(a, b, ROUNDING); + transmute(r) + } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_div_round_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vdivps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } +} + +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_div_round_ps( + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vdivps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_div_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vdivpd(a, b, ROUNDING); + transmute(r) + } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_div_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vdivpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } +} + +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_div_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vdivpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_sqrt_round_ps(a: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vsqrtps(a, ROUNDING); + transmute(r) + } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_sqrt_round_ps( + src: __m512, + k: __mmask16, + a: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vsqrtps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } +} + +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_sqrt_round_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vsqrtps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_sqrt_round_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vsqrtpd(a, ROUNDING); + transmute(r) + } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_sqrt_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vsqrtpd(a, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } +} + +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_sqrt_round_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vsqrtpd(a, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fmadd_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132psround(a, b, c, ROUNDING) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fmadd_round_ps( + a: __m512, + k: __mmask16, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fmadd_round_ps( + k: __mmask16, + a: __m512, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps()) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fmadd_round_ps( + a: __m512, + b: __m512, + c: __m512, + k: __mmask16, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fmadd_round_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132pdround(a, b, c, ROUNDING) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fmadd_round_pd( + a: __m512d, + k: __mmask8, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fmadd_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd()) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fmadd_round_pd( + a: __m512d, + b: __m512d, + c: __m512d, + k: __mmask8, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fmsub_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132psround(a, b, simd_neg(c), ROUNDING) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fmsub_round_ps( + a: __m512, + k: __mmask16, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fmsub_round_ps( + k: __mmask16, + a: __m512, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_ps()) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fmsub_round_ps( + a: __m512, + b: __m512, + c: __m512, + k: __mmask16, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fmsub_round_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132pdround(a, b, simd_neg(c), ROUNDING) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fmsub_round_pd( + a: __m512d, + k: __mmask8, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fmsub_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_pd()) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fmsub_round_pd( + a: __m512d, + b: __m512d, + c: __m512d, + k: __mmask8, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fmaddsub_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubpsround(a, b, c, ROUNDING) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fmaddsub_round_ps( + a: __m512, + k: __mmask16, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fmaddsub_round_ps( + k: __mmask16, + a: __m512, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps()) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fmaddsub_round_ps( + a: __m512, + b: __m512, + c: __m512, + k: __mmask16, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fmaddsub_round_pd( + a: __m512d, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubpdround(a, b, c, ROUNDING) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fmaddsub_round_pd( + a: __m512d, + k: __mmask8, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fmaddsub_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd()) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fmaddsub_round_pd( + a: __m512d, + b: __m512d, + c: __m512d, + k: __mmask8, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fmsubadd_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubpsround(a, b, simd_neg(c), ROUNDING) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fmsubadd_round_ps( + a: __m512, + k: __mmask16, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fmsubadd_round_ps( + k: __mmask16, + a: __m512, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_ps()) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fmsubadd_round_ps( + a: __m512, + b: __m512, + c: __m512, + k: __mmask16, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fmsubadd_round_pd( + a: __m512d, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubpdround(a, b, simd_neg(c), ROUNDING) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fmsubadd_round_pd( + a: __m512d, + k: __mmask8, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fmsubadd_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_pd()) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fmsubadd_round_pd( + a: __m512d, + b: __m512d, + c: __m512d, + k: __mmask8, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fnmadd_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132psround(simd_neg(a), b, c, ROUNDING) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fnmadd_round_ps( + a: __m512, + k: __mmask16, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, a) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fnmadd_round_ps( + k: __mmask16, + a: __m512, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_ps()) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fnmadd_round_ps( + a: __m512, + b: __m512, + c: __m512, + k: __mmask16, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, c) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fnmadd_round_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132pdround(simd_neg(a), b, c, ROUNDING) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fnmadd_round_pd( + a: __m512d, + k: __mmask8, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, a) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fnmadd_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_pd()) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fnmadd_round_pd( + a: __m512d, + b: __m512d, + c: __m512d, + k: __mmask8, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, c) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fnmsub_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fnmsub_round_ps( + a: __m512, + k: __mmask16, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fnmsub_round_ps( + k: __mmask16, + a: __m512, + b: __m512, + c: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_ps()) + } +} + +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fnmsub_round_ps( + a: __m512, + b: __m512, + c: __m512, + k: __mmask16, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +#[rustc_legacy_const_generics(3)] +pub fn _mm512_fnmsub_round_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_fnmsub_round_pd( + a: __m512d, + k: __mmask8, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_maskz_fnmsub_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, + c: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_pd()) + } +} + +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask3_fnmsub_round_pd( + a: __m512d, + b: __m512d, + c: __m512d, + k: __mmask8, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_max_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmaxps(a, b, SAE); + transmute(r) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_max_round_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmaxps(a, b, SAE); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_max_round_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmaxps(a, b, SAE); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_max_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmaxpd(a, b, SAE); + transmute(r) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_max_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmaxpd(a, b, SAE); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_max_round_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmaxpd(a, b, SAE); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_min_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vminps(a, b, SAE); + transmute(r) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_min_round_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vminps(a, b, SAE); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminps, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_min_round_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vminps(a, b, SAE); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_min_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vminpd(a, b, SAE); + transmute(r) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_min_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vminpd(a, b, SAE); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminpd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_min_round_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vminpd(a, b, SAE); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_getexp_round_ps(a: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_getexp_round_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vgetexpps(a, src, k, SAE); + transmute(r) + } +} + +/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vgetexpps(a, f32x16::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_getexp_round_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE); + transmute(r) + } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_getexp_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vgetexppd(a, src, k, SAE); + transmute(r) + } +} + +/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_getexp_round_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vgetexppd(a, f64x8::ZERO, k, SAE); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(1, 2)] +pub fn _mm512_roundscale_round_ps(a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm512_mask_roundscale_round_ps( + src: __m512, + k: __mmask16, + a: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vrndscaleps(a, IMM8, src, k, SAE); + transmute(r) + } +} + +/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm512_maskz_roundscale_round_ps( + k: __mmask16, + a: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(1, 2)] +pub fn _mm512_roundscale_round_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm512_mask_roundscale_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vrndscalepd(a, IMM8, src, k, SAE); + transmute(r) + } +} + +/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm512_maskz_roundscale_round_pd( + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE); + transmute(r) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_scalef_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING); + transmute(r) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_scalef_round_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let src = src.as_f32x16(); + let r = vscalefps(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_scalef_round_ps( + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_scalef_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING); + transmute(r) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_scalef_round_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let src = src.as_f64x8(); + let r = vscalefpd(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_scalef_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\ +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm512_fixupimm_round_ps( + a: __m512, + b: __m512, + c: __m512i, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\ +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm512_mask_fixupimm_round_ps( + a: __m512, + k: __mmask16, + b: __m512, + c: __m512i, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmps(a, b, c, IMM8, k, SAE); + transmute(r) + } +} + +/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\ +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm512_maskz_fixupimm_round_ps( + k: __mmask16, + a: __m512, + b: __m512, + c: __m512i, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmpsz(a, b, c, IMM8, k, SAE); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\ +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm512_fixupimm_round_pd( + a: __m512d, + b: __m512d, + c: __m512i, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\ +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm512_mask_fixupimm_round_pd( + a: __m512d, + k: __mmask8, + b: __m512d, + c: __m512i, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpd(a, b, c, IMM8, k, SAE); + transmute(r) + } +} + +/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\ +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm512_maskz_fixupimm_round_pd( + k: __mmask8, + a: __m512d, + b: __m512d, + c: __m512i, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpdz(a, b, c, IMM8, k, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(1, 2, 3)] +pub fn _mm512_getmant_round_ps< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + a: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(3, 4, 5)] +pub fn _mm512_mask_getmant_round_ps< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + src: __m512, + k: __mmask16, + a: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(2, 3, 4)] +pub fn _mm512_maskz_getmant_round_ps< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + k: __mmask16, + a: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(1, 2, 3)] +pub fn _mm512_getmant_round_pd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(3, 4, 5)] +pub fn _mm512_mask_getmant_round_pd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + src: __m512d, + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(2, 3, 4)] +pub fn _mm512_maskz_getmant_round_pd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + k: __mmask8, + a: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2dq( + a.as_f32x16(), + i32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2dq( + a.as_f32x16(), + src.as_i32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2dq( + a.as_f32x16(), + i32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + unsafe { + let convert = _mm256_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8())) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i { + unsafe { + let convert = _mm256_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { + let convert = _mm_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i { + unsafe { + let convert = _mm_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2udq( + a.as_f32x16(), + u32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2udq( + a.as_f32x16(), + src.as_u32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2udq( + a.as_f32x16(), + u32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i { + unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub fn _mm_cvtps_epu32(a: __m128) -> __m128i { + unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2pd))] +pub fn _mm512_cvtps_pd(a: __m256) -> __m512d { + unsafe { + transmute(vcvtps2pd( + a.as_f32x8(), + f64x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2pd))] +pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d { + unsafe { + transmute(vcvtps2pd( + a.as_f32x8(), + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2pd))] +pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d { + unsafe { + transmute(vcvtps2pd( + a.as_f32x8(), + f64x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2pd))] +pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d { + unsafe { + transmute(vcvtps2pd( + _mm512_castps512_ps256(v2).as_f32x8(), + f64x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2pd))] +pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d { + unsafe { + transmute(vcvtps2pd( + _mm512_castps512_ps256(v2).as_f32x8(), + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 { + unsafe { + transmute(vcvtpd2ps( + a.as_f64x8(), + f32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 { + unsafe { + transmute(vcvtpd2ps( + a.as_f64x8(), + src.as_f32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 { + unsafe { + transmute(vcvtpd2ps( + a.as_f64x8(), + f32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 { + unsafe { + let convert = _mm256_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 { + unsafe { + let convert = _mm256_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 { + unsafe { + let convert = _mm_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 { + unsafe { + let convert = _mm_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2dq( + a.as_f64x8(), + i32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2dq( + a.as_f64x8(), + src.as_i32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2dq( + a.as_f64x8(), + i32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + unsafe { + let convert = _mm256_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i { + unsafe { + let convert = _mm256_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { + let convert = _mm_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i { + unsafe { + let convert = _mm_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2udq( + a.as_f64x8(), + u32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2udq( + a.as_f64x8(), + src.as_u32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2udq( + a.as_f64x8(), + u32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i { + unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) } +} + +/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 { + unsafe { + let r: f32x8 = vcvtpd2ps( + v2.as_f64x8(), + f32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + ); + simd_shuffle!( + r, + f32x8::ZERO, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], + ) + } +} + +/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 { + unsafe { + let r: f32x8 = vcvtpd2ps( + v2.as_f64x8(), + _mm512_castps512_ps256(src).as_f32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + ); + simd_shuffle!( + r, + f32x8::ZERO, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], + ) + } +} + +/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbd))] +pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i8x16(); + transmute::(simd_cast(a)) + } +} + +/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbd))] +pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, src.as_i32x16())) + } +} + +/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbd))] +pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) + } +} + +/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbd))] +pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } +} + +/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbd))] +pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } +} + +/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbd))] +pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } +} + +/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbd))] +pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } +} + +/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i8x16(); + let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v64)) + } +} + +/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } +} + +/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } +} + +/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } +} + +/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } +} + +/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } +} + +/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i { + unsafe { + let a = a.as_u8x16(); + transmute::(simd_cast(a)) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, src.as_i32x16())) + } +} + +/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } +} + +/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i { + unsafe { + let a = a.as_u8x16(); + let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v64)) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } +} + +/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } +} + +/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwd))] +pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i { + unsafe { + let a = a.as_i16x16(); + transmute::(simd_cast(a)) + } +} + +/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwd))] +pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi16_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, src.as_i32x16())) + } +} + +/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwd))] +pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi16_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) + } +} + +/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwd))] +pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi16_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } +} + +/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwd))] +pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi16_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } +} + +/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwd))] +pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } +} + +/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwd))] +pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } +} + +/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwq))] +pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i16x8(); + transmute::(simd_cast(a)) + } +} + +/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwq))] +pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi16_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } +} + +/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwq))] +pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi16_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } +} + +/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwq))] +pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi16_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } +} + +/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwq))] +pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi16_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } +} + +/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwq))] +pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } +} + +/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxwq))] +pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i { + unsafe { + let a = a.as_u16x16(); + transmute::(simd_cast(a)) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu16_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, src.as_i32x16())) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu16_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu16_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu16_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu16_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu16_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i { + unsafe { + let a = a.as_u16x8(); + transmute::(simd_cast(a)) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu16_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu16_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } +} + +/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu16_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } +} + +/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu16_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } +} + +/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu16_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } +} + +/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu16_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } +} + +/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxdq))] +pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i { + unsafe { + let a = a.as_i32x8(); + transmute::(simd_cast(a)) + } +} + +/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxdq))] +pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi32_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } +} + +/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxdq))] +pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi32_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } +} + +/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxdq))] +pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi32_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } +} + +/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxdq))] +pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi32_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } +} + +/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxdq))] +pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi32_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } +} + +/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsxdq))] +pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi32_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i { + unsafe { + let a = a.as_u32x8(); + transmute::(simd_cast(a)) + } +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu32_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu32_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu32_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu32_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu32_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu32_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps))] +pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 { + unsafe { + let a = a.as_i32x16(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps))] +pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 { + unsafe { + let convert = _mm512_cvtepi32_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, convert, src.as_f32x16())) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps))] +pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 { + unsafe { + let convert = _mm512_cvtepi32_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, convert, f32x16::ZERO)) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps))] +pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 { + unsafe { + let convert = _mm256_cvtepi32_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, convert, src.as_f32x8())) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps))] +pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 { + unsafe { + let convert = _mm256_cvtepi32_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, convert, f32x8::ZERO)) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps))] +pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { + unsafe { + let convert = _mm_cvtepi32_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, convert, src.as_f32x4())) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps))] +pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 { + unsafe { + let convert = _mm_cvtepi32_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, convert, f32x4::ZERO)) + } +} + +/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d { + unsafe { + let a = a.as_i32x8(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d { + unsafe { + let convert = _mm512_cvtepi32_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, convert, src.as_f64x8())) + } +} + +/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d { + unsafe { + let convert = _mm512_cvtepi32_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, convert, f64x8::ZERO)) + } +} + +/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d { + unsafe { + let convert = _mm256_cvtepi32_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, convert, src.as_f64x4())) + } +} + +/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d { + unsafe { + let convert = _mm256_cvtepi32_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, convert, f64x4::ZERO)) + } +} + +/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let convert = _mm_cvtepi32_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, convert, src.as_f64x2())) + } +} + +/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let convert = _mm_cvtepi32_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, convert, f64x2::ZERO)) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2ps))] +pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 { + unsafe { + let a = a.as_u32x16(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2ps))] +pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 { + unsafe { + let convert = _mm512_cvtepu32_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, convert, src.as_f32x16())) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2ps))] +pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 { + unsafe { + let convert = _mm512_cvtepu32_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, convert, f32x16::ZERO)) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d { + unsafe { + let a = a.as_u32x8(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d { + unsafe { + let convert = _mm512_cvtepu32_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, convert, src.as_f64x8())) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d { + unsafe { + let convert = _mm512_cvtepu32_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, convert, f64x8::ZERO)) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d { + unsafe { + let a = a.as_u32x4(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d { + unsafe { + let convert = _mm256_cvtepu32_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, convert, src.as_f64x4())) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d { + unsafe { + let convert = _mm256_cvtepu32_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, convert, f64x4::ZERO)) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d { + unsafe { + let a = a.as_u32x4(); + let u64: u32x2 = simd_shuffle!(a, a, [0, 1]); + transmute::(simd_cast(u64)) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let convert = _mm_cvtepu32_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, convert, src.as_f64x2())) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let convert = _mm_cvtepu32_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, convert, f64x2::ZERO)) + } +} + +/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d { + unsafe { + let v2 = v2.as_i32x16(); + let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v256)) + } +} + +/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2pd))] +pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d { + unsafe { + let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8(); + transmute(simd_select_bitmask(k, convert, src.as_f64x8())) + } +} + +/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d { + unsafe { + let v2 = v2.as_u32x16(); + let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v256)) + } +} + +/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d { + unsafe { + let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8(); + transmute(simd_select_bitmask(k, convert, src.as_f64x8())) + } +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i { + unsafe { + let a = a.as_i32x16(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi32_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, src.as_i16x16())) + } +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi32_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) + } +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i { + unsafe { + let a = a.as_i32x8(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi32_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, src.as_i16x8())) + } +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi32_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) + } +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) } +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) } +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) } +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i { + unsafe { + let a = a.as_i32x16(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i { + unsafe { + let convert = _mm512_cvtepi32_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, convert, src.as_i8x16())) + } +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { + unsafe { + let convert = _mm512_cvtepi32_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) + } +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) } +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) } +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) } +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) } +} + +/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i { + unsafe { + let a = a.as_i64x8(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi64_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } +} + +/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi64_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } +} + +/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i { + unsafe { + let a = a.as_i64x4(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi64_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } +} + +/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi64_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } +} + +/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i { + unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) } +} + +/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) } +} + +/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) } +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i { + unsafe { + let a = a.as_i64x8(); + transmute::(simd_cast(a)) + } +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { + let convert = _mm512_cvtepi64_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, src.as_i16x8())) + } +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { + unsafe { + let convert = _mm512_cvtepi64_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) + } +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) } +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) } +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) } +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) } +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) } +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) } +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) } +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) } +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) } +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) } +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) } +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i { + unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) } +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i { + unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { + unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) } +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) } +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) } +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) } +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i { + unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) } +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i { + unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { + unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) } +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) } +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) } +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) } +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) } +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) } +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) } +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i { + unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i { + unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { + unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) } +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i { + unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i { + unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { + unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) } +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundps_epi32(a: __m512) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundps_epi32( + src: __m512i, + k: __mmask16, + a: __m512, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let src = src.as_i32x16(); + let r = vcvtps2dq(a, src, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundps_epu32(a: __m512) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundps_epu32( + src: __m512i, + k: __mmask16, + a: __m512, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let src = src.as_u32x16(); + let r = vcvtps2udq(a, src, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundps_epu32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundps_pd(a: __m256) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x8(); + let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x8(); + let src = src.as_f64x8(); + let r = vcvtps2pd(a, src, k, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x8(); + let r = vcvtps2pd(a, f64x8::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundpd_epi32(a: __m512d) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundpd_epi32( + src: __m256i, + k: __mmask8, + a: __m512d, +) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let src = src.as_i32x8(); + let r = vcvtpd2dq(a, src, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundpd_epu32(a: __m512d) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundpd_epu32( + src: __m256i, + k: __mmask8, + a: __m512d, +) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let src = src.as_u32x8(); + let r = vcvtpd2udq(a, src, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundpd_ps(a: __m512d) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundpd_ps( + src: __m256, + k: __mmask8, + a: __m512d, +) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let src = src.as_f32x8(); + let r = vcvtpd2ps(a, src, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundepi32_ps(a: __m512i) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_i32x16(); + let r = vcvtdq2ps(a, ROUNDING); + transmute(r) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundepi32_ps( + src: __m512, + k: __mmask16, + a: __m512i, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_i32x16(); + let r = vcvtdq2ps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } +} + +/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundepi32_ps(k: __mmask16, a: __m512i) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_i32x16(); + let r = vcvtdq2ps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundepu32_ps(a: __m512i) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_u32x16(); + let r = vcvtudq2ps(a, ROUNDING); + transmute(r) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundepu32_ps( + src: __m512, + k: __mmask16, + a: __m512i, +) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_u32x16(); + let r = vcvtudq2ps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundepu32_ps(k: __mmask16, a: __m512i) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_u32x16(); + let r = vcvtudq2ps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundps_ph(a: __m512) -> __m256i { + unsafe { + static_assert_extended_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundps_ph( + src: __m256i, + k: __mmask16, + a: __m512, +) -> __m256i { + unsafe { + static_assert_extended_rounding!(ROUNDING); + let a = a.as_f32x16(); + let src = src.as_i16x16(); + let r = vcvtps2ph(a, ROUNDING, src, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512) -> __m256i { + unsafe { + static_assert_extended_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_cvt_roundps_ph( + src: __m128i, + k: __mmask8, + a: __m256, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let src = src.as_i16x8(); + let r = vcvtps2ph256(a, IMM8, src, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_cvt_roundps_ph(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let src = src.as_i16x8(); + let r = vcvtps2ph128(a, IMM8, src, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvtps_ph(a: __m512) -> __m256i { + unsafe { + static_assert_extended_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512) -> __m256i { + unsafe { + static_assert_extended_rounding!(ROUNDING); + let a = a.as_f32x16(); + let src = src.as_i16x16(); + let r = vcvtps2ph(a, ROUNDING, src, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512) -> __m256i { + unsafe { + static_assert_extended_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m256) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let src = src.as_i16x8(); + let r = vcvtps2ph256(a, IMM8, src, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_cvtps_ph(k: __mmask8, a: __m256) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let src = src.as_i16x8(); + let r = vcvtps2ph128(a, IMM8, src, k); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k); + transmute(r) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvt_roundph_ps(a: __m256i) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_i16x16(); + let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvt_roundph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_i16x16(); + let src = src.as_f32x16(); + let r = vcvtph2ps(a, src, k, SAE); + transmute(r) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_i16x16(); + let r = vcvtph2ps(a, f32x16::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 { + unsafe { + transmute(vcvtph2ps( + a.as_i16x16(), + f32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_NO_EXC, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 { + unsafe { + transmute(vcvtph2ps( + a.as_i16x16(), + src.as_f32x16(), + k, + _MM_FROUND_NO_EXC, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 { + unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 { + unsafe { + let convert = _mm256_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8())) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 { + unsafe { + let convert = _mm256_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { + unsafe { + let convert = _mm_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 { + unsafe { + let convert = _mm_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvtt_roundps_epi32(a: __m512) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvtt_roundps_epi32( + src: __m512i, + k: __mmask16, + a: __m512, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_i32x16(); + let r = vcvttps2dq(a, src, k, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvttps2dq(a, i32x16::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvtt_roundps_epu32(a: __m512) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvtt_roundps_epu32( + src: __m512i, + k: __mmask16, + a: __m512, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_u32x16(); + let r = vcvttps2udq(a, src, k, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvttps2udq(a, u32x16::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvtt_roundpd_epi32(a: __m512d) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvtt_roundpd_epi32( + src: __m256i, + k: __mmask8, + a: __m512d, +) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_i32x8(); + let r = vcvttpd2dq(a, src, k, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_cvtt_roundpd_epu32(a: __m512d) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_cvtt_roundpd_epu32( + src: __m256i, + k: __mmask8, + a: __m512d, +) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_i32x8(); + let r = vcvttpd2udq(a, src, k, SAE); + transmute(r) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2dq( + a.as_f32x16(), + i32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2dq( + a.as_f32x16(), + src.as_i32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2dq( + a.as_f32x16(), + i32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2udq( + a.as_f32x16(), + u32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2udq( + a.as_f32x16(), + src.as_u32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2udq( + a.as_f32x16(), + u32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i { + unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) } +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) } +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub fn _mm_cvttps_epu32(a: __m128) -> __m128i { + unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) } +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) } +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2dq( + a.as_f64x8(), + i32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2dq( + a.as_f64x8(), + src.as_i32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2dq( + a.as_f64x8(), + i32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2udq( + a.as_f64x8(), + i32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2udq( + a.as_f64x8(), + src.as_i32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2udq( + a.as_f64x8(), + i32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) } +} + +/// Returns vector of type `__m512d` with all elements set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vxorps))] +pub fn _mm512_setzero_pd() -> __m512d { + // All-0 is a properly initialized __m512d + unsafe { const { mem::zeroed() } } +} + +/// Returns vector of type `__m512` with all elements set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vxorps))] +pub fn _mm512_setzero_ps() -> __m512 { + // All-0 is a properly initialized __m512 + unsafe { const { mem::zeroed() } } +} + +/// Return vector of type `__m512` with all elements set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vxorps))] +pub fn _mm512_setzero() -> __m512 { + // All-0 is a properly initialized __m512 + unsafe { const { mem::zeroed() } } +} + +/// Returns vector of type `__m512i` with all elements set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vxorps))] +pub fn _mm512_setzero_si512() -> __m512i { + // All-0 is a properly initialized __m512i + unsafe { const { mem::zeroed() } } +} + +/// Return vector of type `__m512i` with all elements set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vxorps))] +pub fn _mm512_setzero_epi32() -> __m512i { + // All-0 is a properly initialized __m512i + unsafe { const { mem::zeroed() } } +} + +/// Sets packed 32-bit integers in `dst` with the supplied values in reverse +/// order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_setr_epi32( + e15: i32, + e14: i32, + e13: i32, + e12: i32, + e11: i32, + e10: i32, + e9: i32, + e8: i32, + e7: i32, + e6: i32, + e5: i32, + e4: i32, + e3: i32, + e2: i32, + e1: i32, + e0: i32, +) -> __m512i { + unsafe { + let r = i32x16::new( + e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, + ); + transmute(r) + } +} + +/// Set packed 8-bit integers in dst with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set_epi8( + e63: i8, + e62: i8, + e61: i8, + e60: i8, + e59: i8, + e58: i8, + e57: i8, + e56: i8, + e55: i8, + e54: i8, + e53: i8, + e52: i8, + e51: i8, + e50: i8, + e49: i8, + e48: i8, + e47: i8, + e46: i8, + e45: i8, + e44: i8, + e43: i8, + e42: i8, + e41: i8, + e40: i8, + e39: i8, + e38: i8, + e37: i8, + e36: i8, + e35: i8, + e34: i8, + e33: i8, + e32: i8, + e31: i8, + e30: i8, + e29: i8, + e28: i8, + e27: i8, + e26: i8, + e25: i8, + e24: i8, + e23: i8, + e22: i8, + e21: i8, + e20: i8, + e19: i8, + e18: i8, + e17: i8, + e16: i8, + e15: i8, + e14: i8, + e13: i8, + e12: i8, + e11: i8, + e10: i8, + e9: i8, + e8: i8, + e7: i8, + e6: i8, + e5: i8, + e4: i8, + e3: i8, + e2: i8, + e1: i8, + e0: i8, +) -> __m512i { + unsafe { + let r = i8x64::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, + e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, + e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, + e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63, + ); + transmute(r) + } +} + +/// Set packed 16-bit integers in dst with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set_epi16( + e31: i16, + e30: i16, + e29: i16, + e28: i16, + e27: i16, + e26: i16, + e25: i16, + e24: i16, + e23: i16, + e22: i16, + e21: i16, + e20: i16, + e19: i16, + e18: i16, + e17: i16, + e16: i16, + e15: i16, + e14: i16, + e13: i16, + e12: i16, + e11: i16, + e10: i16, + e9: i16, + e8: i16, + e7: i16, + e6: i16, + e5: i16, + e4: i16, + e3: i16, + e2: i16, + e1: i16, + e0: i16, +) -> __m512i { + unsafe { + let r = i16x32::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, + e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, + ); + transmute(r) + } +} + +/// Set packed 32-bit integers in dst with the repeated 4 element sequence. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i { + _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a) +} + +/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 { + _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a) +} + +/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d { + _mm512_set_pd(d, c, b, a, d, c, b, a) +} + +/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i { + _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d) +} + +/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 { + _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d) +} + +/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d { + _mm512_set_pd(a, b, c, d, a, b, c, d) +} + +/// Set packed 64-bit integers in dst with the supplied values. +/// +/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set_epi64( + e0: i64, + e1: i64, + e2: i64, + e3: i64, + e4: i64, + e5: i64, + e6: i64, + e7: i64, +) -> __m512i { + _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) +} + +/// Set packed 64-bit integers in dst with the supplied values in reverse order. +/// +/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_setr_epi64( + e0: i64, + e1: i64, + e2: i64, + e3: i64, + e4: i64, + e5: i64, + e6: i64, + e7: i64, +) -> __m512i { + unsafe { + let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) + } +} + +/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_i32gather_pd( + offsets: __m256i, + slice: *const f64, +) -> __m512d { + static_assert_imm8_scale!(SCALE); + let zero = f64x8::ZERO; + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i32x8(); + let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE); + transmute(r) +} + +/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i32gather_pd( + src: __m512d, + mask: __mmask8, + offsets: __m256i, + slice: *const f64, +) -> __m512d { + static_assert_imm8_scale!(SCALE); + let src = src.as_f64x8(); + let slice = slice as *const i8; + let offsets = offsets.as_i32x8(); + let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE); + transmute(r) +} + +/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_i64gather_pd( + offsets: __m512i, + slice: *const f64, +) -> __m512d { + static_assert_imm8_scale!(SCALE); + let zero = f64x8::ZERO; + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE); + transmute(r) +} + +/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i64gather_pd( + src: __m512d, + mask: __mmask8, + offsets: __m512i, + slice: *const f64, +) -> __m512d { + static_assert_imm8_scale!(SCALE); + let src = src.as_f64x8(); + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE); + transmute(r) +} + +/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const f32) -> __m256 { + static_assert_imm8_scale!(SCALE); + let zero = f32x8::ZERO; + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + let r = vgatherqps(zero, slice, offsets, neg_one, SCALE); + transmute(r) +} + +/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i64gather_ps( + src: __m256, + mask: __mmask8, + offsets: __m512i, + slice: *const f32, +) -> __m256 { + static_assert_imm8_scale!(SCALE); + let src = src.as_f32x8(); + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + let r = vgatherqps(src, slice, offsets, mask as i8, SCALE); + transmute(r) +} + +/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const f32) -> __m512 { + static_assert_imm8_scale!(SCALE); + let zero = f32x16::ZERO; + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i32x16(); + let r = vgatherdps(zero, slice, offsets, neg_one, SCALE); + transmute(r) +} + +/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i32gather_ps( + src: __m512, + mask: __mmask16, + offsets: __m512i, + slice: *const f32, +) -> __m512 { + static_assert_imm8_scale!(SCALE); + let src = src.as_f32x16(); + let slice = slice as *const i8; + let offsets = offsets.as_i32x16(); + let r = vgatherdps(src, slice, offsets, mask as i16, SCALE); + transmute(r) +} + +/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_i32gather_epi32( + offsets: __m512i, + slice: *const i32, +) -> __m512i { + static_assert_imm8_scale!(SCALE); + let zero = i32x16::ZERO; + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i32x16(); + let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE); + transmute(r) +} + +/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i32gather_epi32( + src: __m512i, + mask: __mmask16, + offsets: __m512i, + slice: *const i32, +) -> __m512i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x16(); + let mask = mask as i16; + let slice = slice as *const i8; + let offsets = offsets.as_i32x16(); + let r = vpgatherdd(src, slice, offsets, mask, SCALE); + transmute(r) +} + +/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_i32gather_epi64( + offsets: __m256i, + slice: *const i64, +) -> __m512i { + static_assert_imm8_scale!(SCALE); + let zero = i64x8::ZERO; + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i32x8(); + let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE); + transmute(r) +} + +/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i32gather_epi64( + src: __m512i, + mask: __mmask8, + offsets: __m256i, + slice: *const i64, +) -> __m512i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x8(); + let mask = mask as i8; + let slice = slice as *const i8; + let offsets = offsets.as_i32x8(); + let r = vpgatherdq(src, slice, offsets, mask, SCALE); + transmute(r) +} + +/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_i64gather_epi64( + offsets: __m512i, + slice: *const i64, +) -> __m512i { + static_assert_imm8_scale!(SCALE); + let zero = i64x8::ZERO; + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE); + transmute(r) +} + +/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i64gather_epi64( + src: __m512i, + mask: __mmask8, + offsets: __m512i, + slice: *const i64, +) -> __m512i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x8(); + let mask = mask as i8; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + let r = vpgatherqq(src, slice, offsets, mask, SCALE); + transmute(r) +} + +/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_i64gather_epi32( + offsets: __m512i, + slice: *const i32, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + let zeros = i32x8::ZERO; + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE); + transmute(r) +} + +/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i64gather_epi32( + src: __m256i, + mask: __mmask8, + offsets: __m512i, + slice: *const i32, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x8(); + let mask = mask as i8; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + let r = vpgatherqd(src, slice, offsets, mask, SCALE); + transmute(r) +} + +/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_i32scatter_pd( + slice: *mut f64, + offsets: __m256i, + src: __m512d, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_f64x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x8(); + vscatterdpd(slice, neg_one, offsets, src, SCALE); +} + +/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i32scatter_pd( + slice: *mut f64, + mask: __mmask8, + offsets: __m256i, + src: __m512d, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_f64x8(); + let slice = slice as *mut i8; + let offsets = offsets.as_i32x8(); + vscatterdpd(slice, mask as i8, offsets, src, SCALE); +} + +/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_i64scatter_pd( + slice: *mut f64, + offsets: __m512i, + src: __m512d, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_f64x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + vscatterqpd(slice, neg_one, offsets, src, SCALE); +} + +/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i64scatter_pd( + slice: *mut f64, + mask: __mmask8, + offsets: __m512i, + src: __m512d, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_f64x8(); + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + vscatterqpd(slice, mask as i8, offsets, src, SCALE); +} + +/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_i32scatter_ps( + slice: *mut f32, + offsets: __m512i, + src: __m512, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_f32x16(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x16(); + vscatterdps(slice, neg_one, offsets, src, SCALE); +} + +/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i32scatter_ps( + slice: *mut f32, + mask: __mmask16, + offsets: __m512i, + src: __m512, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_f32x16(); + let slice = slice as *mut i8; + let offsets = offsets.as_i32x16(); + vscatterdps(slice, mask as i16, offsets, src, SCALE); +} + +/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_i64scatter_ps( + slice: *mut f32, + offsets: __m512i, + src: __m256, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_f32x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + vscatterqps(slice, neg_one, offsets, src, SCALE); +} + +/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i64scatter_ps( + slice: *mut f32, + mask: __mmask8, + offsets: __m512i, + src: __m256, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_f32x8(); + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + vscatterqps(slice, mask as i8, offsets, src, SCALE); +} + +/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_i32scatter_epi64( + slice: *mut i64, + offsets: __m256i, + src: __m512i, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x8(); + vpscatterdq(slice, neg_one, offsets, src, SCALE); +} + +/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i32scatter_epi64( + slice: *mut i64, + mask: __mmask8, + offsets: __m256i, + src: __m512i, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x8(); + let mask = mask as i8; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x8(); + vpscatterdq(slice, mask, offsets, src, SCALE); +} + +/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_i64scatter_epi64( + slice: *mut i64, + offsets: __m512i, + src: __m512i, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + vpscatterqq(slice, neg_one, offsets, src, SCALE); +} + +/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i64scatter_epi64( + slice: *mut i64, + mask: __mmask8, + offsets: __m512i, + src: __m512i, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x8(); + let mask = mask as i8; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + vpscatterqq(slice, mask, offsets, src, SCALE); +} + +/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_i32scatter_epi32( + slice: *mut i32, + offsets: __m512i, + src: __m512i, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x16(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x16(); + vpscatterdd(slice, neg_one, offsets, src, SCALE); +} + +/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i32scatter_epi32( + slice: *mut i32, + mask: __mmask16, + offsets: __m512i, + src: __m512i, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x16(); + let mask = mask as i16; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x16(); + vpscatterdd(slice, mask, offsets, src, SCALE); +} + +/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_i64scatter_epi32( + slice: *mut i32, + offsets: __m512i, + src: __m256i, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + vpscatterqd(slice, neg_one, offsets, src, SCALE); +} + +/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_i64scatter_epi32( + slice: *mut i32, + mask: __mmask8, + offsets: __m512i, + src: __m256i, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_i32x8(); + let mask = mask as i8; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + vpscatterqd(slice, mask, offsets, src, SCALE); +} + +/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer +/// indices stored in the lower half of vindex scaled by scale and stores them in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_i32logather_epi64( + vindex: __m512i, + base_addr: *const i64, +) -> __m512i { + _mm512_i32gather_epi64::(_mm512_castsi512_si256(vindex), base_addr) +} + +/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer +/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_i32logather_epi64( + src: __m512i, + k: __mmask8, + vindex: __m512i, + base_addr: *const i64, +) -> __m512i { + _mm512_mask_i32gather_epi64::(src, k, _mm512_castsi512_si256(vindex), base_addr) +} + +/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr +/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_i32logather_pd( + vindex: __m512i, + base_addr: *const f64, +) -> __m512d { + _mm512_i32gather_pd::(_mm512_castsi512_si256(vindex), base_addr) +} + +/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr +/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst +/// using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_i32logather_pd( + src: __m512d, + k: __mmask8, + vindex: __m512i, + base_addr: *const f64, +) -> __m512d { + _mm512_mask_i32gather_pd::(src, k, _mm512_castsi512_si256(vindex), base_addr) +} + +/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer +/// indices stored in the lower half of vindex scaled by scale. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_i32loscatter_epi64( + base_addr: *mut i64, + vindex: __m512i, + a: __m512i, +) { + _mm512_i32scatter_epi64::(base_addr, _mm512_castsi512_si256(vindex), a) +} + +/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer +/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding +/// mask bit is not set are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_i32loscatter_epi64( + base_addr: *mut i64, + k: __mmask8, + vindex: __m512i, + a: __m512i, +) { + _mm512_mask_i32scatter_epi64::(base_addr, k, _mm512_castsi512_si256(vindex), a) +} + +/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_i32loscatter_pd( + base_addr: *mut f64, + vindex: __m512i, + a: __m512d, +) { + _mm512_i32scatter_pd::(base_addr, _mm512_castsi512_si256(vindex), a) +} + +/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k +/// (elements whose corresponding mask bit is not set are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_i32loscatter_pd( + base_addr: *mut f64, + k: __mmask8, + vindex: __m512i, + a: __m512d, +) { + _mm512_mask_i32scatter_pd::(base_addr, k, _mm512_castsi512_si256(vindex), a) +} + +/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_i32scatter_epi32( + base_addr: *mut i32, + vindex: __m256i, + a: __m256i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE) +} + +/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set +/// are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_i32scatter_epi32( + base_addr: *mut i32, + k: __mmask8, + vindex: __m256i, + a: __m256i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE) +} + +/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_i32scatter_epi64( + slice: *mut i64, + offsets: __m128i, + src: __m256i, +) { + static_assert_imm8_scale!(SCALE); + let src = src.as_i64x4(); + let slice = slice as *mut i8; + let offsets = offsets.as_i32x4(); + vpscatterdq_256(slice, 0xff, offsets, src, SCALE); +} + +/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set +/// are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_i32scatter_epi64( + base_addr: *mut i64, + k: __mmask8, + vindex: __m128i, + a: __m256i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE) +} + +/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_i32scatter_pd( + base_addr: *mut f64, + vindex: __m128i, + a: __m256d, +) { + static_assert_imm8_scale!(SCALE); + vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE) +} + +/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding +/// mask bit is not set are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_i32scatter_pd( + base_addr: *mut f64, + k: __mmask8, + vindex: __m128i, + a: __m256d, +) { + static_assert_imm8_scale!(SCALE); + vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE) +} + +/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_i32scatter_ps( + base_addr: *mut f32, + vindex: __m256i, + a: __m256, +) { + static_assert_imm8_scale!(SCALE); + vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE) +} + +/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding +/// mask bit is not set are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_i32scatter_ps( + base_addr: *mut f32, + k: __mmask8, + vindex: __m256i, + a: __m256, +) { + static_assert_imm8_scale!(SCALE); + vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE) +} + +/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_i64scatter_epi32( + base_addr: *mut i32, + vindex: __m256i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE) +} + +/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set +/// are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_i64scatter_epi32( + base_addr: *mut i32, + k: __mmask8, + vindex: __m256i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE) +} + +/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_i64scatter_epi64( + base_addr: *mut i64, + vindex: __m256i, + a: __m256i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE) +} + +/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set +/// are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_i64scatter_epi64( + base_addr: *mut i64, + k: __mmask8, + vindex: __m256i, + a: __m256i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE) +} + +/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_i64scatter_pd( + base_addr: *mut f64, + vindex: __m256i, + a: __m256d, +) { + static_assert_imm8_scale!(SCALE); + vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE) +} + +/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding +/// mask bit is not set are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_i64scatter_pd( + base_addr: *mut f64, + k: __mmask8, + vindex: __m256i, + a: __m256d, +) { + static_assert_imm8_scale!(SCALE); + vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE) +} + +/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_i64scatter_ps( + base_addr: *mut f32, + vindex: __m256i, + a: __m128, +) { + static_assert_imm8_scale!(SCALE); + vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE) +} + +/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding +/// mask bit is not set are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_i64scatter_ps( + base_addr: *mut f32, + k: __mmask8, + vindex: __m256i, + a: __m128, +) { + static_assert_imm8_scale!(SCALE); + vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE) +} + +/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mmask_i32gather_epi32( + src: __m256i, + k: __mmask8, + vindex: __m256i, + base_addr: *const i32, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + transmute(vpgatherdd_256( + src.as_i32x8(), + base_addr as _, + vindex.as_i32x8(), + k, + SCALE, + )) +} + +/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mmask_i32gather_epi64( + src: __m256i, + k: __mmask8, + vindex: __m128i, + base_addr: *const i64, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + transmute(vpgatherdq_256( + src.as_i64x4(), + base_addr as _, + vindex.as_i32x4(), + k, + SCALE, + )) +} + +/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mmask_i32gather_pd( + src: __m256d, + k: __mmask8, + vindex: __m128i, + base_addr: *const f64, +) -> __m256d { + static_assert_imm8_scale!(SCALE); + transmute(vgatherdpd_256( + src.as_f64x4(), + base_addr as _, + vindex.as_i32x4(), + k, + SCALE, + )) +} + +/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mmask_i32gather_ps( + src: __m256, + k: __mmask8, + vindex: __m256i, + base_addr: *const f32, +) -> __m256 { + static_assert_imm8_scale!(SCALE); + transmute(vgatherdps_256( + src.as_f32x8(), + base_addr as _, + vindex.as_i32x8(), + k, + SCALE, + )) +} + +/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mmask_i64gather_epi32( + src: __m128i, + k: __mmask8, + vindex: __m256i, + base_addr: *const i32, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + transmute(vpgatherqd_256( + src.as_i32x4(), + base_addr as _, + vindex.as_i64x4(), + k, + SCALE, + )) +} + +/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mmask_i64gather_epi64( + src: __m256i, + k: __mmask8, + vindex: __m256i, + base_addr: *const i64, +) -> __m256i { + static_assert_imm8_scale!(SCALE); + transmute(vpgatherqq_256( + src.as_i64x4(), + base_addr as _, + vindex.as_i64x4(), + k, + SCALE, + )) +} + +/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mmask_i64gather_pd( + src: __m256d, + k: __mmask8, + vindex: __m256i, + base_addr: *const f64, +) -> __m256d { + static_assert_imm8_scale!(SCALE); + transmute(vgatherqpd_256( + src.as_f64x4(), + base_addr as _, + vindex.as_i64x4(), + k, + SCALE, + )) +} + +/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mmask_i64gather_ps( + src: __m128, + k: __mmask8, + vindex: __m256i, + base_addr: *const f32, +) -> __m128 { + static_assert_imm8_scale!(SCALE); + transmute(vgatherqps_256( + src.as_f32x4(), + base_addr as _, + vindex.as_i64x4(), + k, + SCALE, + )) +} + +/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_i32scatter_epi32( + base_addr: *mut i32, + vindex: __m128i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE) +} + +/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set +/// are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_i32scatter_epi32( + base_addr: *mut i32, + k: __mmask8, + vindex: __m128i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE) +} + +/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_i32scatter_epi64( + base_addr: *mut i64, + vindex: __m128i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE) +} + +/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set +/// are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_i32scatter_epi64( + base_addr: *mut i64, + k: __mmask8, + vindex: __m128i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE) +} + +/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_i32scatter_pd( + base_addr: *mut f64, + vindex: __m128i, + a: __m128d, +) { + static_assert_imm8_scale!(SCALE); + vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE) +} + +/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding +/// mask bit is not set are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_i32scatter_pd( + base_addr: *mut f64, + k: __mmask8, + vindex: __m128i, + a: __m128d, +) { + static_assert_imm8_scale!(SCALE); + vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE) +} + +/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_i32scatter_ps(base_addr: *mut f32, vindex: __m128i, a: __m128) { + static_assert_imm8_scale!(SCALE); + vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE) +} + +/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding +/// mask bit is not set are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_i32scatter_ps( + base_addr: *mut f32, + k: __mmask8, + vindex: __m128i, + a: __m128, +) { + static_assert_imm8_scale!(SCALE); + vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE) +} + +/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_i64scatter_epi32( + base_addr: *mut i32, + vindex: __m128i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE) +} + +/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set +/// are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_i64scatter_epi32( + base_addr: *mut i32, + k: __mmask8, + vindex: __m128i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE) +} + +/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_i64scatter_epi64( + base_addr: *mut i64, + vindex: __m128i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE) +} + +/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set +/// are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_i64scatter_epi64( + base_addr: *mut i64, + k: __mmask8, + vindex: __m128i, + a: __m128i, +) { + static_assert_imm8_scale!(SCALE); + vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE) +} + +/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_i64scatter_pd( + base_addr: *mut f64, + vindex: __m128i, + a: __m128d, +) { + static_assert_imm8_scale!(SCALE); + vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE) +} + +/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding +/// mask bit is not set are not written to memory). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_i64scatter_pd( + base_addr: *mut f64, + k: __mmask8, + vindex: __m128i, + a: __m128d, +) { + static_assert_imm8_scale!(SCALE); + vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE) +} + +/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_i64scatter_ps(base_addr: *mut f32, vindex: __m128i, a: __m128) { + static_assert_imm8_scale!(SCALE); + vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE) +} + +/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_i64scatter_ps( + base_addr: *mut f32, + k: __mmask8, + vindex: __m128i, + a: __m128, +) { + static_assert_imm8_scale!(SCALE); + vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE) +} + +/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mmask_i32gather_epi32( + src: __m128i, + k: __mmask8, + vindex: __m128i, + base_addr: *const i32, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + transmute(vpgatherdd_128( + src.as_i32x4(), + base_addr as _, + vindex.as_i32x4(), + k, + SCALE, + )) +} + +/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mmask_i32gather_epi64( + src: __m128i, + k: __mmask8, + vindex: __m128i, + base_addr: *const i64, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + transmute(vpgatherdq_128( + src.as_i64x2(), + base_addr as _, + vindex.as_i32x4(), + k, + SCALE, + )) +} + +/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mmask_i32gather_pd( + src: __m128d, + k: __mmask8, + vindex: __m128i, + base_addr: *const f64, +) -> __m128d { + static_assert_imm8_scale!(SCALE); + transmute(vgatherdpd_128( + src.as_f64x2(), + base_addr as _, + vindex.as_i32x4(), + k, + SCALE, + )) +} + +/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr +/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mmask_i32gather_ps( + src: __m128, + k: __mmask8, + vindex: __m128i, + base_addr: *const f32, +) -> __m128 { + static_assert_imm8_scale!(SCALE); + transmute(vgatherdps_128( + src.as_f32x4(), + base_addr as _, + vindex.as_i32x4(), + k, + SCALE, + )) +} + +/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mmask_i64gather_epi32( + src: __m128i, + k: __mmask8, + vindex: __m128i, + base_addr: *const i32, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + transmute(vpgatherqd_128( + src.as_i32x4(), + base_addr as _, + vindex.as_i64x2(), + k, + SCALE, + )) +} + +/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer +/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mmask_i64gather_epi64( + src: __m128i, + k: __mmask8, + vindex: __m128i, + base_addr: *const i64, +) -> __m128i { + static_assert_imm8_scale!(SCALE); + transmute(vpgatherqq_128( + src.as_i64x2(), + base_addr as _, + vindex.as_i64x2(), + k, + SCALE, + )) +} + +/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mmask_i64gather_pd( + src: __m128d, + k: __mmask8, + vindex: __m128i, + base_addr: *const f64, +) -> __m128d { + static_assert_imm8_scale!(SCALE); + transmute(vgatherqpd_128( + src.as_f64x2(), + base_addr as _, + vindex.as_i64x2(), + k, + SCALE, + )) +} + +/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr +/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mmask_i64gather_ps( + src: __m128, + k: __mmask8, + vindex: __m128i, + base_addr: *const f32, +) -> __m128 { + static_assert_imm8_scale!(SCALE); + transmute(vgatherqps_128( + src.as_f32x4(), + base_addr as _, + vindex.as_i64x2(), + k, + SCALE, + )) +} + +/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressd))] +pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) } +} + +/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressd))] +pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) } +} + +/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressd))] +pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) } +} + +/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressd))] +pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) } +} + +/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressd))] +pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) } +} + +/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressd))] +pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) } +} + +/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressq))] +pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) } +} + +/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressq))] +pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) } +} + +/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressq))] +pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) } +} + +/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressq))] +pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) } +} + +/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressq))] +pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) } +} + +/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressq))] +pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) } +} + +/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompressps))] +pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) } +} + +/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompressps))] +pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) } +} + +/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompressps))] +pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) } +} + +/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompressps))] +pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) } +} + +/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompressps))] +pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) } +} + +/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompressps))] +pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) } +} + +/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompresspd))] +pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) } +} + +/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompresspd))] +pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) } +} + +/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompresspd))] +pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) } +} + +/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompresspd))] +pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) } +} + +/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompresspd))] +pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) } +} + +/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompresspd))] +pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) } +} + +/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressd))] +pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) { + vcompressstored(base_addr as *mut _, a.as_i32x16(), k) +} + +/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressd))] +pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) { + vcompressstored256(base_addr as *mut _, a.as_i32x8(), k) +} + +/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressd))] +pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) { + vcompressstored128(base_addr as *mut _, a.as_i32x4(), k) +} + +/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressq))] +pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) { + vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k) +} + +/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressq))] +pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) { + vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k) +} + +/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressq))] +pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) { + vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k) +} + +/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompressps))] +pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) { + vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k) +} + +/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompressps))] +pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) { + vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k) +} + +/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompressps))] +pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) { + vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k) +} + +/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompresspd))] +pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) { + vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k) +} + +/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompresspd))] +pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) { + vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k) +} + +/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcompresspd))] +pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) { + vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k) +} + +/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandd))] +pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) } +} + +/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandd))] +pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) } +} + +/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandd))] +pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) } +} + +/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandd))] +pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) } +} + +/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandd))] +pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) } +} + +/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandd))] +pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) } +} + +/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandq))] +pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) } +} + +/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandq))] +pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) } +} + +/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandq))] +pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) } +} + +/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandq))] +pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) } +} + +/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandq))] +pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) } +} + +/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandq))] +pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) } +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandps))] +pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) } +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandps))] +pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) } +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandps))] +pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) } +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandps))] +pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) } +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandps))] +pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) } +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandps))] +pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) } +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandpd))] +pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) } +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandpd))] +pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) } +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandpd))] +pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) } +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandpd))] +pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) } +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandpd))] +pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) } +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vexpandpd))] +pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_rol_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprold(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprold(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprold(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm256_rol_epi32(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprold256(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_rol_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprold256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprold256(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_rol_epi32(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprold128(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_rol_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprold128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprold128(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_ror_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprord(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprord(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprord(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm256_ror_epi32(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprord256(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_ror_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprord256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprord256(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_ror_epi32(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprord128(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_ror_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprord128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprord128(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_rol_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprolq(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprolq(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprolq(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm256_rol_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprolq256(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_rol_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprolq256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprolq256(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_rol_epi64(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprolq128(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_rol_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprolq128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprolq128(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_ror_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprorq(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprorq(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprorq(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(1)] +pub fn _mm256_ror_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprorq256(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_ror_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprorq256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprorq256(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_ror_epi64(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprorq128(a, IMM8); + transmute(r) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_ror_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprorq128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprorq128(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) + } +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_slli_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm512_setzero_si512() + } else { + transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8))) + } + } +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 32 { + u32x16::ZERO + } else { + simd_shl(a.as_u32x16(), u32x16::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u32x16())) + } +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm512_setzero_si512() + } else { + let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8)); + transmute(simd_select_bitmask(k, shf, u32x16::ZERO)) + } + } +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_slli_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 32 { + u32x8::ZERO + } else { + simd_shl(a.as_u32x8(), u32x8::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, r, src.as_u32x8())) + } +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_slli_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm256_setzero_si256() + } else { + let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8)); + transmute(simd_select_bitmask(k, r, u32x8::ZERO)) + } + } +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_slli_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 32 { + u32x4::ZERO + } else { + simd_shl(a.as_u32x4(), u32x4::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, r, src.as_u32x4())) + } +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_slli_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm_setzero_si128() + } else { + let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8)); + transmute(simd_select_bitmask(k, r, u32x4::ZERO)) + } + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_srli_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm512_setzero_si512() + } else { + transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8))) + } + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 32 { + u32x16::ZERO + } else { + simd_shr(a.as_u32x16(), u32x16::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u32x16())) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm512_setzero_si512() + } else { + let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8)); + transmute(simd_select_bitmask(k, shf, u32x16::ZERO)) + } + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_srli_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 32 { + u32x8::ZERO + } else { + simd_shr(a.as_u32x8(), u32x8::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, r, src.as_u32x8())) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_srli_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm256_setzero_si256() + } else { + let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8)); + transmute(simd_select_bitmask(k, r, u32x8::ZERO)) + } + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_srli_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 32 { + u32x4::ZERO + } else { + simd_shr(a.as_u32x4(), u32x4::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, r, src.as_u32x4())) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_srli_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm_setzero_si128() + } else { + let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8)); + transmute(simd_select_bitmask(k, r, u32x4::ZERO)) + } + } +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_slli_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm512_setzero_si512() + } else { + transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))) + } + } +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 64 { + u64x8::ZERO + } else { + simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u64x8())) + } +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm512_setzero_si512() + } else { + let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, shf, u64x8::ZERO)) + } + } +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_slli_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 64 { + u64x4::ZERO + } else { + simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, r, src.as_u64x4())) + } +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_slli_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm256_setzero_si256() + } else { + let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, r, u64x4::ZERO)) + } + } +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_slli_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 64 { + u64x2::ZERO + } else { + simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, r, src.as_u64x2())) + } +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_slli_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm_setzero_si128() + } else { + let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, r, u64x2::ZERO)) + } + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_srli_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm512_setzero_si512() + } else { + transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))) + } + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 64 { + u64x8::ZERO + } else { + simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u64x8())) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm512_setzero_si512() + } else { + let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, shf, u64x8::ZERO)) + } + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_srli_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 64 { + u64x4::ZERO + } else { + simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, r, src.as_u64x4())) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_srli_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm256_setzero_si256() + } else { + let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, r, u64x4::ZERO)) + } + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_srli_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 64 { + u64x2::ZERO + } else { + simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, r, src.as_u64x2())) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_srli_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm_setzero_si128() + } else { + let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, r, u64x2::ZERO)) + } + } +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld))] +pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) } +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld))] +pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld))] +pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld))] +pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld))] +pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld))] +pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpslld))] +pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) } +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) } +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) } +} + +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) } +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) } +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) } +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) } +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_srai_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32))) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_srai_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_srai_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64))) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm256_srai_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64))) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_srai_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_srai_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_srai_epi64(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64))) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_srai_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_srai_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, rol, src.as_i32x16())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, rol, i32x16::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vprolvd256(a.as_i32x8(), b.as_i32x8())) } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let rol = _mm256_rolv_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, rol, src.as_i32x8())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let rol = _mm256_rolv_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, rol, i32x8::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vprolvd128(a.as_i32x4(), b.as_i32x4())) } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let rol = _mm_rolv_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, rol, src.as_i32x4())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let rol = _mm_rolv_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, rol, i32x4::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, ror, src.as_i32x16())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, ror, i32x16::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vprorvd256(a.as_i32x8(), b.as_i32x8())) } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let ror = _mm256_rorv_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, ror, src.as_i32x8())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let ror = _mm256_rorv_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, ror, i32x8::ZERO)) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vprorvd128(a.as_i32x4(), b.as_i32x4())) } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let ror = _mm_rorv_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, ror, src.as_i32x4())) + } +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let ror = _mm_rorv_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, ror, i32x4::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, rol, src.as_i64x8())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, rol, i64x8::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vprolvq256(a.as_i64x4(), b.as_i64x4())) } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let rol = _mm256_rolv_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, rol, src.as_i64x4())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let rol = _mm256_rolv_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, rol, i64x4::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vprolvq128(a.as_i64x2(), b.as_i64x2())) } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let rol = _mm_rolv_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, rol, src.as_i64x2())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let rol = _mm_rolv_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, rol, i64x2::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, ror, src.as_i64x8())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, ror, i64x8::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vprorvq256(a.as_i64x4(), b.as_i64x4())) } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let ror = _mm256_rorv_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, ror, src.as_i64x4())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let ror = _mm256_rorv_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, ror, i64x4::ZERO)) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vprorvq128(a.as_i64x2(), b.as_i64x2())) } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let ror = _mm_rorv_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, ror, src.as_i64x2())) + } +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let ror = _mm_rorv_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, ror, i64x2::ZERO)) + } +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) } +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) } +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_permute_ps(a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + (MASK as u32 & 0b11) + 8, + ((MASK as u32 >> 2) & 0b11) + 8, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 12, + ((MASK as u32 >> 2) & 0b11) + 12, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_permute_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_permute_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + let r = _mm256_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_permute_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + let r = _mm256_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_permute_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let r = _mm_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_permute_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let r = _mm_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_permute_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b1, + ((MASK as u32 >> 1) & 0b1), + ((MASK as u32 >> 2) & 0b1) + 2, + ((MASK as u32 >> 3) & 0b1) + 2, + ((MASK as u32 >> 4) & 0b1) + 4, + ((MASK as u32 >> 5) & 0b1) + 4, + ((MASK as u32 >> 6) & 0b1) + 6, + ((MASK as u32 >> 7) & 0b1) + 6, + ], + ) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_permute_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_permute_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 4); + let r = _mm256_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_permute_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 4); + let r = _mm256_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_permute_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let r = _mm_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_permute_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let r = _mm_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO)) + } +} + +/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq +#[rustc_legacy_const_generics(1)] +pub fn _mm512_permutex_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ) + } +} + +/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_permutex_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permutex_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + } +} + +/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permutex_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) + } +} + +/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq +#[rustc_legacy_const_generics(1)] +pub fn _mm256_permutex_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + ], + ) + } +} + +/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_permutex_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_permutex_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + } +} + +/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_permutex_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_permutex_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd +#[rustc_legacy_const_generics(1)] +pub fn _mm512_permutex_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_permutex_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let r = _mm512_permutex_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let r = _mm512_permutex_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd +#[rustc_legacy_const_generics(1)] +pub fn _mm256_permutex_pd(a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + ], + ) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_permutex_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_permutex_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_permutex_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_permutex_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } +} + +/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermd +pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) } +} + +/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermd))] +pub fn _mm512_mask_permutevar_epi32( + src: __m512i, + k: __mmask16, + idx: __m512i, + a: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutevar_epi32(idx, a).as_i32x16(); + transmute(simd_select_bitmask(k, permute, src.as_i32x16())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilps))] +pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 { + unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilps))] +pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 { + unsafe { + let permute = _mm512_permutevar_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, permute, src.as_f32x16())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilps))] +pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 { + unsafe { + let permute = _mm512_permutevar_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilps))] +pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 { + unsafe { + let permute = _mm256_permutevar_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, permute, src.as_f32x8())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilps))] +pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 { + unsafe { + let permute = _mm256_permutevar_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilps))] +pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 { + unsafe { + let permute = _mm_permutevar_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, permute, src.as_f32x4())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilps))] +pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 { + unsafe { + let permute = _mm_permutevar_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, permute, f32x4::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilpd))] +pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d { + unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilpd))] +pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d { + unsafe { + let permute = _mm512_permutevar_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, permute, src.as_f64x8())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilpd))] +pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d { + unsafe { + let permute = _mm512_permutevar_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilpd))] +pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d { + unsafe { + let permute = _mm256_permutevar_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, permute, src.as_f64x4())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilpd))] +pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d { + unsafe { + let permute = _mm256_permutevar_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilpd))] +pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d { + unsafe { + let permute = _mm_permutevar_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, permute, src.as_f64x2())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermilpd))] +pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d { + unsafe { + let permute = _mm_permutevar_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, permute, f64x2::ZERO)) + } +} + +/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermd +pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) } +} + +/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermd))] +pub fn _mm512_mask_permutexvar_epi32( + src: __m512i, + k: __mmask16, + idx: __m512i, + a: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16(); + transmute(simd_select_bitmask(k, permute, src.as_i32x16())) + } +} + +/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermd))] +pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16(); + transmute(simd_select_bitmask(k, permute, i32x16::ZERO)) + } +} + +/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermd +pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i { + _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd +} + +/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermd))] +pub fn _mm256_mask_permutexvar_epi32( + src: __m256i, + k: __mmask8, + idx: __m256i, + a: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8(); + transmute(simd_select_bitmask(k, permute, src.as_i32x8())) + } +} + +/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermd))] +pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8(); + transmute(simd_select_bitmask(k, permute, i32x8::ZERO)) + } +} + +/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermq +pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) } +} + +/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermq))] +pub fn _mm512_mask_permutexvar_epi64( + src: __m512i, + k: __mmask8, + idx: __m512i, + a: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8(); + transmute(simd_select_bitmask(k, permute, src.as_i64x8())) + } +} + +/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermq))] +pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8(); + transmute(simd_select_bitmask(k, permute, i64x8::ZERO)) + } +} + +/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermq +pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i { + unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) } +} + +/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermq))] +pub fn _mm256_mask_permutexvar_epi64( + src: __m256i, + k: __mmask8, + idx: __m256i, + a: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4(); + transmute(simd_select_bitmask(k, permute, src.as_i64x4())) + } +} + +/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermq))] +pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4(); + transmute(simd_select_bitmask(k, permute, i64x4::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermps))] +pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 { + unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermps))] +pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutexvar_ps(idx, a).as_f32x16(); + transmute(simd_select_bitmask(k, permute, src.as_f32x16())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermps))] +pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutexvar_ps(idx, a).as_f32x16(); + transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermps))] +pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 { + _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps +} + +/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermps))] +pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutexvar_ps(idx, a).as_f32x8(); + transmute(simd_select_bitmask(k, permute, src.as_f32x8())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermps))] +pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutexvar_ps(idx, a).as_f32x8(); + transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermpd))] +pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d { + unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermpd))] +pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutexvar_pd(idx, a).as_f64x8(); + transmute(simd_select_bitmask(k, permute, src.as_f64x8())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermpd))] +pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutexvar_pd(idx, a).as_f64x8(); + transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermpd))] +pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d { + unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermpd))] +pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutexvar_pd(idx, a).as_f64x4(); + transmute(simd_select_bitmask(k, permute, src.as_f64x4())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermpd))] +pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutexvar_pd(idx, a).as_f64x4(); + transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) + } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d +pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2d))] +pub fn _mm512_mask_permutex2var_epi32( + a: __m512i, + k: __mmask16, + idx: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); + transmute(simd_select_bitmask(k, permute, a.as_i32x16())) + } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d +pub fn _mm512_maskz_permutex2var_epi32( + k: __mmask16, + a: __m512i, + idx: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); + transmute(simd_select_bitmask(k, permute, i32x16::ZERO)) + } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2d))] +pub fn _mm512_mask2_permutex2var_epi32( + a: __m512i, + idx: __m512i, + k: __mmask16, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); + transmute(simd_select_bitmask(k, permute, idx.as_i32x16())) + } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d +pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2d))] +pub fn _mm256_mask_permutex2var_epi32( + a: __m256i, + k: __mmask8, + idx: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); + transmute(simd_select_bitmask(k, permute, a.as_i32x8())) + } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d +pub fn _mm256_maskz_permutex2var_epi32( + k: __mmask8, + a: __m256i, + idx: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); + transmute(simd_select_bitmask(k, permute, i32x8::ZERO)) + } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2d))] +pub fn _mm256_mask2_permutex2var_epi32( + a: __m256i, + idx: __m256i, + k: __mmask8, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); + transmute(simd_select_bitmask(k, permute, idx.as_i32x8())) + } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d +pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2d))] +pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); + transmute(simd_select_bitmask(k, permute, a.as_i32x4())) + } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d +pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); + transmute(simd_select_bitmask(k, permute, i32x4::ZERO)) + } +} + +/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2d))] +pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); + transmute(simd_select_bitmask(k, permute, idx.as_i32x4())) + } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q +pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2q))] +pub fn _mm512_mask_permutex2var_epi64( + a: __m512i, + k: __mmask8, + idx: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); + transmute(simd_select_bitmask(k, permute, a.as_i64x8())) + } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q +pub fn _mm512_maskz_permutex2var_epi64( + k: __mmask8, + a: __m512i, + idx: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); + transmute(simd_select_bitmask(k, permute, i64x8::ZERO)) + } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2q))] +pub fn _mm512_mask2_permutex2var_epi64( + a: __m512i, + idx: __m512i, + k: __mmask8, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); + transmute(simd_select_bitmask(k, permute, idx.as_i64x8())) + } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q +pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2q))] +pub fn _mm256_mask_permutex2var_epi64( + a: __m256i, + k: __mmask8, + idx: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); + transmute(simd_select_bitmask(k, permute, a.as_i64x4())) + } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q +pub fn _mm256_maskz_permutex2var_epi64( + k: __mmask8, + a: __m256i, + idx: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); + transmute(simd_select_bitmask(k, permute, i64x4::ZERO)) + } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2q))] +pub fn _mm256_mask2_permutex2var_epi64( + a: __m256i, + idx: __m256i, + k: __mmask8, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); + transmute(simd_select_bitmask(k, permute, idx.as_i64x4())) + } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q +pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2q))] +pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); + transmute(simd_select_bitmask(k, permute, a.as_i64x2())) + } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q +pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); + transmute(simd_select_bitmask(k, permute, i64x2::ZERO)) + } +} + +/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2q))] +pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); + transmute(simd_select_bitmask(k, permute, idx.as_i64x2())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps +pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 { + unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2ps))] +pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); + transmute(simd_select_bitmask(k, permute, a.as_f32x16())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps +pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); + transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps +pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); + let idx = _mm512_castsi512_ps(idx).as_f32x16(); + transmute(simd_select_bitmask(k, permute, idx)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps +pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 { + unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2ps))] +pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); + transmute(simd_select_bitmask(k, permute, a.as_f32x8())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps +pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); + transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps +pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); + let idx = _mm256_castsi256_ps(idx).as_f32x8(); + transmute(simd_select_bitmask(k, permute, idx)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps +pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 { + unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2ps))] +pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 { + unsafe { + let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); + transmute(simd_select_bitmask(k, permute, a.as_f32x4())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps +pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 { + unsafe { + let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); + transmute(simd_select_bitmask(k, permute, f32x4::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps +pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 { + unsafe { + let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); + let idx = _mm_castsi128_ps(idx).as_f32x4(); + transmute(simd_select_bitmask(k, permute, idx)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd +pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d { + unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2pd))] +pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); + transmute(simd_select_bitmask(k, permute, a.as_f64x8())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd +pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); + transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd +pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); + let idx = _mm512_castsi512_pd(idx).as_f64x8(); + transmute(simd_select_bitmask(k, permute, idx)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd +pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d { + unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2pd))] +pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); + transmute(simd_select_bitmask(k, permute, a.as_f64x4())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd +pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); + transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd +pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); + let idx = _mm256_castsi256_pd(idx).as_f64x4(); + transmute(simd_select_bitmask(k, permute, idx)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd +pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d { + unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2pd))] +pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d { + unsafe { + let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); + transmute(simd_select_bitmask(k, permute, a.as_f64x2())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd +pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d { + unsafe { + let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); + transmute(simd_select_bitmask(k, permute, f64x2::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd +pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d { + unsafe { + let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); + let idx = _mm_castsi128_pd(idx).as_f64x2(); + transmute(simd_select_bitmask(k, permute, idx)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd +#[rustc_legacy_const_generics(1)] +pub fn _mm512_shuffle_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r: i32x16 = simd_shuffle!( + a.as_i32x16(), + a.as_i32x16(), + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + (MASK as u32 >> 4) & 0b11, + (MASK as u32 >> 6) & 0b11, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + (MASK as u32 & 0b11) + 8, + ((MASK as u32 >> 2) & 0b11) + 8, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 12, + ((MASK as u32 >> 2) & 0b11) + 12, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ); + transmute(r) + } +} + +/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_shuffle_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + } +} + +/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_shuffle_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) + } +} + +/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_shuffle_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + } +} + +/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_shuffle_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) + } +} + +/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_mask_shuffle_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + } +} + +/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_maskz_shuffle_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shuffle_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11) + 16, + ((MASK as u32 >> 6) & 0b11) + 16, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 20, + ((MASK as u32 >> 6) & 0b11) + 20, + (MASK as u32 & 0b11) + 8, + ((MASK as u32 >> 2) & 0b11) + 8, + ((MASK as u32 >> 4) & 0b11) + 24, + ((MASK as u32 >> 6) & 0b11) + 24, + (MASK as u32 & 0b11) + 12, + ((MASK as u32 >> 2) & 0b11) + 12, + ((MASK as u32 >> 4) & 0b11) + 28, + ((MASK as u32 >> 6) & 0b11) + 28, + ], + ) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shuffle_ps( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shuffle_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shuffle_ps( + src: __m256, + k: __mmask8, + a: __m256, + b: __m256, +) -> __m256 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shuffle_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_shuffle_ps( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) + } +} + +/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shuffle_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b1, + ((MASK as u32 >> 1) & 0b1) + 8, + ((MASK as u32 >> 2) & 0b1) + 2, + ((MASK as u32 >> 3) & 0b1) + 10, + ((MASK as u32 >> 4) & 0b1) + 4, + ((MASK as u32 >> 5) & 0b1) + 12, + ((MASK as u32 >> 6) & 0b1) + 6, + ((MASK as u32 >> 7) & 0b1) + 14, + ], + ) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shuffle_pd( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shuffle_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shuffle_pd( + src: __m256d, + k: __mmask8, + a: __m256d, + b: __m256d, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shuffle_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_shuffle_pd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2())) + } +} + +/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_shuffle_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO)) + } +} + +/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4 +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let r: i32x16 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b11) * 4 + 0, + (MASK as u32 & 0b11) * 4 + 1, + (MASK as u32 & 0b11) * 4 + 2, + (MASK as u32 & 0b11) * 4 + 3, + ((MASK as u32 >> 2) & 0b11) * 4 + 0, + ((MASK as u32 >> 2) & 0b11) * 4 + 1, + ((MASK as u32 >> 2) & 0b11) * 4 + 2, + ((MASK as u32 >> 2) & 0b11) * 4 + 3, + ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, + ], + ); + transmute(r) + } +} + +/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shuffle_i32x4( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_i32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + } +} + +/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shuffle_i32x4( + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_i32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) + } +} + +/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4 +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let r: i32x8 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b1) * 4 + 0, + (MASK as u32 & 0b1) * 4 + 1, + (MASK as u32 & 0b1) * 4 + 2, + (MASK as u32 & 0b1) * 4 + 3, + ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, + ], + ); + transmute(r) + } +} + +/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shuffle_i32x4( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_i32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + } +} + +/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shuffle_i32x4(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_i32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) + } +} + +/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let r: i64x8 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b11) * 2 + 0, + (MASK as u32 & 0b11) * 2 + 1, + ((MASK as u32 >> 2) & 0b11) * 2 + 0, + ((MASK as u32 >> 2) & 0b11) * 2 + 1, + ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, + ], + ); + transmute(r) + } +} + +/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shuffle_i64x2( + src: __m512i, + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_i64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + } +} + +/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shuffle_i64x2(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_i64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) + } +} + +/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2 +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let r: i64x4 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b1) * 2 + 0, + (MASK as u32 & 0b1) * 2 + 1, + ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, + ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, + ], + ); + transmute(r) + } +} + +/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shuffle_i64x2( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_i64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + } +} + +/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shuffle_i64x2(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_i64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) + } +} + +/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2 +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shuffle_f32x4(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r: f32x16 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b11) * 4 + 0, + (MASK as u32 & 0b11) * 4 + 1, + (MASK as u32 & 0b11) * 4 + 2, + (MASK as u32 & 0b11) * 4 + 3, + ((MASK as u32 >> 2) & 0b11) * 4 + 0, + ((MASK as u32 >> 2) & 0b11) * 4 + 1, + ((MASK as u32 >> 2) & 0b11) * 4 + 2, + ((MASK as u32 >> 2) & 0b11) * 4 + 3, + ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, + ], + ); + transmute(r) + } +} + +/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shuffle_f32x4( + src: __m512, + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_f32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + } +} + +/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_f32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) + } +} + +/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4 +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shuffle_f32x4(a: __m256, b: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let r: f32x8 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b1) * 4 + 0, + (MASK as u32 & 0b1) * 4 + 1, + (MASK as u32 & 0b1) * 4 + 2, + (MASK as u32 & 0b1) * 4 + 3, + ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, + ], + ); + transmute(r) + } +} + +/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shuffle_f32x4( + src: __m256, + k: __mmask8, + a: __m256, + b: __m256, +) -> __m256 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_f32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + } +} + +/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shuffle_f32x4(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_f32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) + } +} + +/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r: f64x8 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b11) * 2 + 0, + (MASK as u32 & 0b11) * 2 + 1, + ((MASK as u32 >> 2) & 0b11) * 2 + 0, + ((MASK as u32 >> 2) & 0b11) * 2 + 1, + ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, + ], + ); + transmute(r) + } +} + +/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shuffle_f64x2( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_f64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } +} + +/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shuffle_f64x2(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_f64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } +} + +/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2 +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let r: f64x4 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b1) * 2 + 0, + (MASK as u32 & 0b1) * 2 + 1, + ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, + ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, + ], + ); + transmute(r) + } +} + +/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shuffle_f64x2( + src: __m256d, + k: __mmask8, + a: __m256d, + b: __m256d, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_f64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } +} + +/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shuffle_f64x2(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_f64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } +} + +/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_extractf32x4_ps(a: __m512) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + match IMM8 & 0x3 { + 0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]), + 1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]), + 2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]), + _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]), + } + } +} + +/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_extractf32x4_ps(src: __m128, k: __mmask8, a: __m512) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_extractf32x4_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) + } +} + +/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_extractf32x4_ps(k: __mmask8, a: __m512) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_extractf32x4_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) + } +} + +/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + test, + assert_instr(vextract, IMM8 = 1) //should be vextractf32x4 +)] +#[rustc_legacy_const_generics(1)] +pub fn _mm256_extractf32x4_ps(a: __m256) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + match IMM8 & 0x1 { + 0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]), + _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]), + } + } +} + +/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_extractf32x4_ps(src: __m128, k: __mmask8, a: __m256) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_extractf32x4_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) + } +} + +/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_extractf32x4_ps(k: __mmask8, a: __m256) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_extractf32x4_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) + } +} + +/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + test, + assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4 +)] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_extracti64x4_epi64(a: __m512i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + match IMM1 { + 0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]), + _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]), + } + } +} + +/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_extracti64x4_epi64( + src: __m256i, + k: __mmask8, + a: __m512i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let r = _mm512_extracti64x4_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + } +} + +/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: __m512i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let r = _mm512_extracti64x4_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) + } +} + +/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_extractf64x4_pd(a: __m512d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + match IMM8 & 0x1 { + 0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]), + _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]), + } + } +} + +/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_extractf64x4_pd( + src: __m256d, + k: __mmask8, + a: __m512d, +) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_extractf64x4_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } +} + +/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m512d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_extractf64x4_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } +} + +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + test, + assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4 +)] +#[rustc_legacy_const_generics(1)] +pub fn _mm512_extracti32x4_epi32(a: __m512i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let a = a.as_i32x16(); + let zero = i32x16::ZERO; + let extract: i32x4 = match IMM2 { + 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, zero, [4, 5, 6, 7]), + 2 => simd_shuffle!(a, zero, [8, 9, 10, 11]), + _ => simd_shuffle!(a, zero, [12, 13, 14, 15]), + }; + transmute(extract) + } +} + +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_mask_extracti32x4_epi32( + src: __m128i, + k: __mmask8, + a: __m512i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let r = _mm512_extracti32x4_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + } +} + +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: __m512i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let r = _mm512_extracti32x4_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) + } +} + +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + test, + assert_instr(vextract, IMM1 = 1) //should be vextracti32x4 +)] +#[rustc_legacy_const_generics(1)] +pub fn _mm256_extracti32x4_epi32(a: __m256i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let a = a.as_i32x8(); + let zero = i32x8::ZERO; + let extract: i32x4 = match IMM1 { + 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]), + _ => simd_shuffle!(a, zero, [4, 5, 6, 7]), + }; + transmute(extract) + } +} + +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_mask_extracti32x4_epi32( + src: __m128i, + k: __mmask8, + a: __m256i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let r = _mm256_extracti32x4_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + } +} + +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_maskz_extracti32x4_epi32(k: __mmask8, a: __m256i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let r = _mm256_extracti32x4_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) + } +} + +/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovsldup))] +pub fn _mm512_moveldup_ps(a: __m512) -> __m512 { + unsafe { + let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); + transmute(r) + } +} + +/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovsldup))] +pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov: f32x16 = + simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); + transmute(simd_select_bitmask(k, mov, src.as_f32x16())) + } +} + +/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovsldup))] +pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov: f32x16 = + simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); + transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) + } +} + +/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovsldup))] +pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = _mm256_moveldup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8())) + } +} + +/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovsldup))] +pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = _mm256_moveldup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO)) + } +} + +/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovsldup))] +pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = _mm_moveldup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4())) + } +} + +/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovsldup))] +pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = _mm_moveldup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO)) + } +} + +/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovshdup))] +pub fn _mm512_movehdup_ps(a: __m512) -> __m512 { + unsafe { + let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); + transmute(r) + } +} + +/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovshdup))] +pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov: f32x16 = + simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); + transmute(simd_select_bitmask(k, mov, src.as_f32x16())) + } +} + +/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovshdup))] +pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov: f32x16 = + simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); + transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) + } +} + +/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovshdup))] +pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = _mm256_movehdup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8())) + } +} + +/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovshdup))] +pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = _mm256_movehdup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO)) + } +} + +/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovshdup))] +pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = _mm_movehdup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4())) + } +} + +/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovshdup))] +pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = _mm_movehdup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO)) + } +} + +/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovddup))] +pub fn _mm512_movedup_pd(a: __m512d) -> __m512d { + unsafe { + let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); + transmute(r) + } +} + +/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovddup))] +pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); + transmute(simd_select_bitmask(k, mov, src.as_f64x8())) + } +} + +/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovddup))] +pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); + transmute(simd_select_bitmask(k, mov, f64x8::ZERO)) + } +} + +/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovddup))] +pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + let mov = _mm256_movedup_pd(a); + transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4())) + } +} + +/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovddup))] +pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + let mov = _mm256_movedup_pd(a); + transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO)) + } +} + +/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovddup))] +pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + let mov = _mm_movedup_pd(a); + transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2())) + } +} + +/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovddup))] +pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { + let mov = _mm_movedup_pd(a); + transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO)) + } +} + +/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4 +#[rustc_legacy_const_generics(2)] +pub fn _mm512_inserti32x4(a: __m512i, b: __m128i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let a = a.as_i32x16(); + let b = _mm512_castsi128_si512(b).as_i32x16(); + let ret: i32x16 = match IMM8 & 0b11 { + 0 => { + simd_shuffle!( + a, + b, + [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ) + } + 1 => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15], + ) + } + 2 => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15], + ) + } + _ => { + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]) + } + }; + transmute(ret) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_inserti32x4( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m128i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_inserti32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_inserti32x4(k: __mmask16, a: __m512i, b: __m128i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_inserti32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) + } +} + +/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + test, + assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4 +)] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_inserti32x4(a: __m256i, b: __m128i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i32x8(); + let b = _mm256_castsi128_si256(b).as_i32x8(); + let ret: i32x8 = match IMM8 & 0b1 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + }; + transmute(ret) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_inserti32x4( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m128i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_inserti32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_inserti32x4(k: __mmask8, a: __m256i, b: __m128i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_inserti32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) + } +} + +/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4 +#[rustc_legacy_const_generics(2)] +pub fn _mm512_inserti64x4(a: __m512i, b: __m256i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_castsi256_si512(b); + match IMM8 & 0b1 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + } + } +} + +/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_inserti64x4( + src: __m512i, + k: __mmask8, + a: __m512i, + b: __m256i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_inserti64x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + } +} + +/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_inserti64x4(k: __mmask8, a: __m512i, b: __m256i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_inserti64x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) + } +} + +/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_insertf32x4(a: __m512, b: __m128) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_castps128_ps512(b); + match IMM8 & 0b11 { + 0 => { + simd_shuffle!( + a, + b, + [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ) + } + 1 => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15], + ) + } + 2 => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15], + ) + } + _ => { + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]) + } + } + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_insertf32x4( + src: __m512, + k: __mmask16, + a: __m512, + b: __m128, +) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_insertf32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_insertf32x4(k: __mmask16, a: __m512, b: __m128) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_insertf32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) + } +} + +/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + test, + assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4 +)] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_insertf32x4(a: __m256, b: __m128) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_castps128_ps256(b); + match IMM8 & 0b1 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + } + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_insertf32x4( + src: __m256, + k: __mmask8, + a: __m256, + b: __m128, +) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_insertf32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + } +} + +/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_insertf32x4(k: __mmask8, a: __m256, b: __m128) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_insertf32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) + } +} + +/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_insertf64x4(a: __m512d, b: __m256d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_castpd256_pd512(b); + match IMM8 & 0b1 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + } + } +} + +/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_insertf64x4( + src: __m512d, + k: __mmask8, + a: __m512d, + b: __m256d, +) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_insertf64x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } +} + +/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_insertf64x4(k: __mmask8, a: __m512d, b: __m256d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_insertf64x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } +} + +/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq +pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let b = b.as_i32x16(); + #[rustfmt::skip] + let r: i32x16 = simd_shuffle!( + a, b, + [ 2, 18, 3, 19, + 2 + 4, 18 + 4, 3 + 4, 19 + 4, + 2 + 8, 18 + 8, 3 + 8, 19 + 8, + 2 + 12, 18 + 12, 3 + 12, 19 + 12], + ); + transmute(r) + } +} + +/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhdq))] +pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16())) + } +} + +/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhdq))] +pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO)) + } +} + +/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhdq))] +pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8())) + } +} + +/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhdq))] +pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO)) + } +} + +/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhdq))] +pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4())) + } +} + +/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhdq))] +pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO)) + } +} + +/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq +pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) } +} + +/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhqdq))] +pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8())) + } +} + +/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhqdq))] +pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO)) + } +} + +/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhqdq))] +pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4())) + } +} + +/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhqdq))] +pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO)) + } +} + +/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhqdq))] +pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2())) + } +} + +/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckhqdq))] +pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO)) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhps))] +pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + #[rustfmt::skip] + simd_shuffle!( + a, b, + [ 2, 18, 3, 19, + 2 + 4, 18 + 4, 3 + 4, 19 + 4, + 2 + 8, 18 + 8, 3 + 8, 19 + 8, + 2 + 12, 18 + 12, 3 + 12, 19 + 12], + ) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhps))] +pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16())) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhps))] +pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO)) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhps))] +pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8())) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhps))] +pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO)) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhps))] +pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4())) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhps))] +pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO)) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhpd))] +pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhpd))] +pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8())) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhpd))] +pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO)) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhpd))] +pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4())) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhpd))] +pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO)) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhpd))] +pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2())) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpckhpd))] +pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO)) + } +} + +/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq +pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let b = b.as_i32x16(); + #[rustfmt::skip] + let r: i32x16 = simd_shuffle!( + a, b, + [ 0, 16, 1, 17, + 0 + 4, 16 + 4, 1 + 4, 17 + 4, + 0 + 8, 16 + 8, 1 + 8, 17 + 8, + 0 + 12, 16 + 12, 1 + 12, 17 + 12], + ); + transmute(r) + } +} + +/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckldq))] +pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16())) + } +} + +/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckldq))] +pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO)) + } +} + +/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckldq))] +pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8())) + } +} + +/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckldq))] +pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO)) + } +} + +/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckldq))] +pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4())) + } +} + +/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpckldq))] +pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO)) + } +} + +/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq +pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) } +} + +/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklqdq))] +pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8())) + } +} + +/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklqdq))] +pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO)) + } +} + +/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklqdq))] +pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4())) + } +} + +/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklqdq))] +pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO)) + } +} + +/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklqdq))] +pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2())) + } +} + +/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpunpcklqdq))] +pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO)) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklps))] +pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + #[rustfmt::skip] + simd_shuffle!(a, b, + [ 0, 16, 1, 17, + 0 + 4, 16 + 4, 1 + 4, 17 + 4, + 0 + 8, 16 + 8, 1 + 8, 17 + 8, + 0 + 12, 16 + 12, 1 + 12, 17 + 12], + ) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklps))] +pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16())) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklps))] +pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO)) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklps))] +pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8())) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklps))] +pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO)) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklps))] +pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4())) + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklps))] +pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO)) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklpd))] +pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklpd))] +pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8())) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklpd))] +pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO)) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklpd))] +pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4())) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklpd))] +pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO)) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklpd))] +pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2())) + } +} + +/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vunpcklpd))] +pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO)) + } +} + +/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castps128_ps512(a: __m128) -> __m512 { + unsafe { + simd_shuffle!( + a, + _mm_undefined_ps(), + [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], + ) + } +} + +/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castps256_ps512(a: __m256) -> __m512 { + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ps(), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], + ) + } +} + +/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 { + unsafe { + simd_shuffle!( + a, + _mm_set1_ps(0.), + [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], + ) + } +} + +/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 { + unsafe { + simd_shuffle!( + a, + _mm256_set1_ps(0.), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], + ) + } +} + +/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castps512_ps128(a: __m512) -> __m128 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +} + +/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castps512_ps256(a: __m512) -> __m256 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +} + +/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castps_pd(a: __m512) -> __m512d { + unsafe { transmute(a) } +} + +/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castps_si512(a: __m512) -> __m512i { + unsafe { transmute(a) } +} + +/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d { + unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) } +} + +/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d { + unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) } +} + +/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d { + unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) } +} + +/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d { + unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) } +} + +/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d { + unsafe { simd_shuffle!(a, a, [0, 1]) } +} + +/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +} + +/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castpd_ps(a: __m512d) -> __m512 { + unsafe { transmute(a) } +} + +/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castpd_si512(a: __m512d) -> __m512i { + unsafe { transmute(a) } +} + +/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i { + unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) } +} + +/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i { + unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) } +} + +/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i { + unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) } +} + +/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i { + unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) } +} + +/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i { + unsafe { simd_shuffle!(a, a, [0, 1]) } +} + +/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +} + +/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 { + unsafe { transmute(a) } +} + +/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d { + unsafe { transmute(a) } +} + +/// Copy the lower 32-bit integer in a to dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovd))] +pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 { + unsafe { simd_extract!(a.as_i32x16(), 0) } +} + +/// Copy the lower single-precision (32-bit) floating-point element of a to dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtss_f32(a: __m512) -> f32 { + unsafe { simd_extract!(a, 0) } +} + +/// Copy the lower double-precision (64-bit) floating-point element of a to dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 { + unsafe { simd_extract!(a, 0) } +} + +/// Broadcast the low packed 32-bit integer from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd +pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i { + unsafe { + let a = _mm512_castsi128_si512(a).as_i32x16(); + let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + transmute(ret) + } +} + +/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd +pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastd_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x16())) + } +} + +/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd +pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastd_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO)) + } +} + +/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd +pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastd_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x8())) + } +} + +/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd +pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastd_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO)) + } +} + +/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd +pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastd_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x4())) + } +} + +/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd +pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastd_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO)) + } +} + +/// Broadcast the low packed 64-bit integer from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq +pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i { + unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) } +} + +/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq +pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastq_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i64x8())) + } +} + +/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq +pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastq_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO)) + } +} + +/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq +pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastq_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, broadcast, src.as_i64x4())) + } +} + +/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq +pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastq_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO)) + } +} + +/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq +pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastq_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, broadcast, src.as_i64x2())) + } +} + +/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq +pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastq_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO)) + } +} + +/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 { + unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) } +} + +/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 { + unsafe { + let broadcast = _mm512_broadcastss_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x16())) + } +} + +/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 { + unsafe { + let broadcast = _mm512_broadcastss_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO)) + } +} + +/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 { + unsafe { + let broadcast = _mm256_broadcastss_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x8())) + } +} + +/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 { + unsafe { + let broadcast = _mm256_broadcastss_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO)) + } +} + +/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let broadcast = _mm_broadcastss_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x4())) + } +} + +/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastss))] +pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let broadcast = _mm_broadcastss_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO)) + } +} + +/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastsd))] +pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d { + unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) } +} + +/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastsd))] +pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d { + unsafe { + let broadcast = _mm512_broadcastsd_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_f64x8())) + } +} + +/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastsd))] +pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d { + unsafe { + let broadcast = _mm512_broadcastsd_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO)) + } +} + +/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastsd))] +pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d { + unsafe { + let broadcast = _mm256_broadcastsd_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, broadcast, src.as_f64x4())) + } +} + +/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vbroadcastsd))] +pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d { + unsafe { + let broadcast = _mm256_broadcastsd_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO)) + } +} + +/// Broadcast the 4 packed 32-bit integers from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i32x4(); + let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]); + transmute(ret) + } +} + +/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcast_i32x4(a).as_i32x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x16())) + } +} + +/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcast_i32x4(a).as_i32x16(); + transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO)) + } +} + +/// Broadcast the 4 packed 32-bit integers from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i32x4(); + let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]); + transmute(ret) + } +} + +/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcast_i32x4(a).as_i32x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x8())) + } +} + +/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcast_i32x4(a).as_i32x8(); + transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO)) + } +} + +/// Broadcast the 4 packed 64-bit integers from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) } +} + +/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcast_i64x4(a).as_i64x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i64x8())) + } +} + +/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcast_i64x4(a).as_i64x8(); + transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO)) + } +} + +/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) } +} + +/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 { + unsafe { + let broadcast = _mm512_broadcast_f32x4(a).as_f32x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x16())) + } +} + +/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 { + unsafe { + let broadcast = _mm512_broadcast_f32x4(a).as_f32x16(); + transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO)) + } +} + +/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) } +} + +/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 { + unsafe { + let broadcast = _mm256_broadcast_f32x4(a).as_f32x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x8())) + } +} + +/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 { + unsafe { + let broadcast = _mm256_broadcast_f32x4(a).as_f32x8(); + transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO)) + } +} + +/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) } +} + +/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d { + unsafe { + let broadcast = _mm512_broadcast_f64x4(a).as_f64x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_f64x8())) + } +} + +/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497) +#[inline] +#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d { + unsafe { + let broadcast = _mm512_broadcast_f64x4(a).as_f64x8(); + transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO)) + } +} + +/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd +pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) } +} + +/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd +pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) } +} + +/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd +pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) } +} + +/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq +pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) } +} + +/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq +pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) } +} + +/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq +pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) } +} + +/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps +pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) } +} + +/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps +pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) } +} + +/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps +pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) } +} + +/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd +pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) } +} + +/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd +pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) } +} + +/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd +pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) } +} + +/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst. +/// +///
Only lowest 4 bits are used from the mask (shift at maximum by 60 bytes)!
+/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_alignr_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let imm8: i32 = IMM8 % 16; + let r: i32x16 = match imm8 { + 0 => simd_shuffle!( + a, + b, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ], + ), + 1 => simd_shuffle!( + a, + b, + [ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, + ], + ), + 2 => simd_shuffle!( + a, + b, + [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1], + ), + 3 => simd_shuffle!( + a, + b, + [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2], + ), + 4 => simd_shuffle!( + a, + b, + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3], + ), + 5 => simd_shuffle!( + a, + b, + [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4], + ), + 6 => simd_shuffle!( + a, + b, + [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5], + ), + 7 => simd_shuffle!( + a, + b, + [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6], + ), + 8 => simd_shuffle!( + a, + b, + [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7], + ), + 9 => simd_shuffle!( + a, + b, + [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8], + ), + 10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + 11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + 12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), + 13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + 14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]), + 15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + }; + transmute(r) + } +} + +/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_alignr_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + } +} + +/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_alignr_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) + } +} + +/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst. +/// +///
Only lowest 3 bits are used from the mask (shift at maximum by 28 bytes)!
+/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_alignr_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let imm8: i32 = IMM8 % 8; + let r: i32x8 = match imm8 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), + 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), + 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), + 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), + 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), + 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), + 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), + _ => unreachable_unchecked(), + }; + transmute(r) + } +} + +/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_alignr_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + } +} + +/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_alignr_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) + } +} + +/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst. +/// +///
Only lowest 2 bits are used from the mask (shift at maximum by 12 bytes)!
+/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd +#[rustc_legacy_const_generics(2)] +pub fn _mm_alignr_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let imm8: i32 = IMM8 % 4; + let r: i32x4 = match imm8 { + 0 => simd_shuffle!(a, b, [4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [5, 6, 7, 0]), + 2 => simd_shuffle!(a, b, [6, 7, 0, 1]), + 3 => simd_shuffle!(a, b, [7, 0, 1, 2]), + _ => unreachable_unchecked(), + }; + transmute(r) + } +} + +/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_alignr_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + } +} + +/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_alignr_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) + } +} + +/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst. +/// +///
Only lowest 3 bits are used from the mask (shift at maximum by 56 bytes)!
+/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_alignr_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let imm8: i32 = IMM8 % 8; + let r: i64x8 = match imm8 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), + 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), + 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), + 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), + 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), + 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), + 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), + _ => unreachable_unchecked(), + }; + transmute(r) + } +} + +/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_alignr_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + } +} + +/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_alignr_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) + } +} + +/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst. +/// +///
Only lowest 2 bits are used from the mask (shift at maximum by 24 bytes)!
+/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_alignr_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let imm8: i32 = IMM8 % 4; + let r: i64x4 = match imm8 { + 0 => simd_shuffle!(a, b, [4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [5, 6, 7, 0]), + 2 => simd_shuffle!(a, b, [6, 7, 0, 1]), + 3 => simd_shuffle!(a, b, [7, 0, 1, 2]), + _ => unreachable_unchecked(), + }; + transmute(r) + } +} + +/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_alignr_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + } +} + +/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_alignr_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) + } +} + +/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst. +/// +///
Only lowest bit is used from the mask (shift at maximum by 8 bytes)!
+/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq +#[rustc_legacy_const_generics(2)] +pub fn _mm_alignr_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let imm8: i32 = IMM8 % 2; + let r: i64x2 = match imm8 { + 0 => simd_shuffle!(a, b, [2, 3]), + 1 => simd_shuffle!(a, b, [3, 0]), + _ => unreachable_unchecked(), + }; + transmute(r) + } +} + +/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_alignr_epi64( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2())) + } +} + +/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_alignr_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO)) + } +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq +pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } +} + +/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandd))] +pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let and = _mm512_and_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, and, src.as_i32x16())) + } +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandd))] +pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let and = _mm512_and_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, and, i32x16::ZERO)) + } +} + +/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandd))] +pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let and = simd_and(a.as_i32x8(), b.as_i32x8()); + transmute(simd_select_bitmask(k, and, src.as_i32x8())) + } +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandd))] +pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let and = simd_and(a.as_i32x8(), b.as_i32x8()); + transmute(simd_select_bitmask(k, and, i32x8::ZERO)) + } +} + +/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandd))] +pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let and = simd_and(a.as_i32x4(), b.as_i32x4()); + transmute(simd_select_bitmask(k, and, src.as_i32x4())) + } +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandd))] +pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let and = simd_and(a.as_i32x4(), b.as_i32x4()); + transmute(simd_select_bitmask(k, and, i32x4::ZERO)) + } +} + +/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) } +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let and = _mm512_and_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, and, src.as_i64x8())) + } +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let and = _mm512_and_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, and, i64x8::ZERO)) + } +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let and = simd_and(a.as_i64x4(), b.as_i64x4()); + transmute(simd_select_bitmask(k, and, src.as_i64x4())) + } +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let and = simd_and(a.as_i64x4(), b.as_i64x4()); + transmute(simd_select_bitmask(k, and, i64x4::ZERO)) + } +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let and = simd_and(a.as_i64x2(), b.as_i64x2()); + transmute(simd_select_bitmask(k, and, src.as_i64x2())) + } +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let and = simd_and(a.as_i64x2(), b.as_i64x2()); + transmute(simd_select_bitmask(k, and, i64x2::ZERO)) + } +} + +/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandq))] +pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vporq))] +pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpord))] +pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let or = _mm512_or_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, or, src.as_i32x16())) + } +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpord))] +pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let or = _mm512_or_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, or, i32x16::ZERO)) + } +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vor))] //should be vpord +pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpord))] +pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let or = _mm256_or_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, or, src.as_i32x8())) + } +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpord))] +pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let or = _mm256_or_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, or, i32x8::ZERO)) + } +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vor))] //should be vpord +pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpord))] +pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let or = _mm_or_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, or, src.as_i32x4())) + } +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpord))] +pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let or = _mm_or_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, or, i32x4::ZERO)) + } +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vporq))] +pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) } +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vporq))] +pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let or = _mm512_or_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, or, src.as_i64x8())) + } +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vporq))] +pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let or = _mm512_or_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, or, i64x8::ZERO)) + } +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vor))] //should be vporq +pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vporq))] +pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let or = _mm256_or_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, or, src.as_i64x4())) + } +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vporq))] +pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let or = _mm256_or_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, or, i64x4::ZERO)) + } +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vor))] //should be vporq +pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) } +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vporq))] +pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let or = _mm_or_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, or, src.as_i64x2())) + } +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vporq))] +pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let or = _mm_or_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, or, i64x2::ZERO)) + } +} + +/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vporq))] +pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord +pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxord))] +pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, xor, src.as_i32x16())) + } +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxord))] +pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, xor, i32x16::ZERO)) + } +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vxor))] //should be vpxord +pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxord))] +pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let xor = _mm256_xor_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, xor, src.as_i32x8())) + } +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxord))] +pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let xor = _mm256_xor_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, xor, i32x8::ZERO)) + } +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vxor))] //should be vpxord +pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxord))] +pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let xor = _mm_xor_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, xor, src.as_i32x4())) + } +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxord))] +pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let xor = _mm_xor_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, xor, i32x4::ZERO)) + } +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) } +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, xor, src.as_i64x8())) + } +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, xor, i64x8::ZERO)) + } +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq +pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let xor = _mm256_xor_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, xor, src.as_i64x4())) + } +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let xor = _mm256_xor_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, xor, i64x4::ZERO)) + } +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq +pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) } +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let xor = _mm_xor_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, xor, src.as_i64x2())) + } +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let xor = _mm_xor_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, xor, i64x2::ZERO)) + } +} + +/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd +pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i { + _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b) +} + +/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnd))] +pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let andnot = _mm512_andnot_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, andnot, src.as_i32x16())) + } +} + +/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnd))] +pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let andnot = _mm512_andnot_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, andnot, i32x16::ZERO)) + } +} + +/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnd))] +pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32)); + let andnot = simd_and(not.as_i32x8(), b.as_i32x8()); + transmute(simd_select_bitmask(k, andnot, src.as_i32x8())) + } +} + +/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnd))] +pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32)); + let andnot = simd_and(not.as_i32x8(), b.as_i32x8()); + transmute(simd_select_bitmask(k, andnot, i32x8::ZERO)) + } +} + +/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnd))] +pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32)); + let andnot = simd_and(not.as_i32x4(), b.as_i32x4()); + transmute(simd_select_bitmask(k, andnot, src.as_i32x4())) + } +} + +/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnd))] +pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32)); + let andnot = simd_and(not.as_i32x4(), b.as_i32x4()); + transmute(simd_select_bitmask(k, andnot, i32x4::ZERO)) + } +} + +/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd +pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i { + _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b) +} + +/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnq))] +pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let andnot = _mm512_andnot_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, andnot, src.as_i64x8())) + } +} + +/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnq))] +pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let andnot = _mm512_andnot_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, andnot, i64x8::ZERO)) + } +} + +/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnq))] +pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64)); + let andnot = simd_and(not.as_i64x4(), b.as_i64x4()); + transmute(simd_select_bitmask(k, andnot, src.as_i64x4())) + } +} + +/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnq))] +pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64)); + let andnot = simd_and(not.as_i64x4(), b.as_i64x4()); + transmute(simd_select_bitmask(k, andnot, i64x4::ZERO)) + } +} + +/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnq))] +pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64)); + let andnot = simd_and(not.as_i64x2(), b.as_i64x2()); + transmute(simd_select_bitmask(k, andnot, src.as_i64x2())) + } +} + +/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnq))] +pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64)); + let andnot = simd_and(not.as_i64x2(), b.as_i64x2()); + transmute(simd_select_bitmask(k, andnot, i64x2::ZERO)) + } +} + +/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpandnq))] +pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i { + _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b) +} + +/// Convert 16-bit mask a into an integer value, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _cvtmask16_u32(a: __mmask16) -> u32 { + a as u32 +} + +/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _cvtu32_mask16(a: u32) -> __mmask16 { + a as __mmask16 +} + +/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw +pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + a & b +} + +/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw +pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { + a & b +} + +/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw +pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + a | b +} + +/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw +pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { + a | b +} + +/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw +pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + a ^ b +} + +/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw +pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { + a ^ b +} + +/// Compute the bitwise NOT of 16-bit mask a, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _knot_mask16(a: __mmask16) -> __mmask16 { + a ^ 0b11111111_11111111 +} + +/// Compute the bitwise NOT of 16-bit mask a, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_knot(a: __mmask16) -> __mmask16 { + a ^ 0b11111111_11111111 +} + +/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw +pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + _mm512_kand(_mm512_knot(a), b) +} + +/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw +pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 { + _mm512_kand(_mm512_knot(a), b) +} + +/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw +pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + _mm512_knot(_mm512_kxor(a, b)) +} + +/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw +pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 { + _mm512_knot(_mm512_kxor(a, b)) +} + +/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 { + let tmp = _kor_mask16(a, b); + *all_ones = (tmp == 0xffff) as u8; + (tmp == 0) as u8 +} + +/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { + (_kor_mask16(a, b) == 0xffff) as u8 +} + +/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise +/// store 0 in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { + (_kor_mask16(a, b) == 0) as u8 +} + +/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kshiftli_mask16(a: __mmask16) -> __mmask16 { + a << COUNT +} + +/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _kshiftri_mask16(a: __mmask16) -> __mmask16 { + a >> COUNT +} + +/// Load 16-bit mask from memory +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 { + *mem_addr +} + +/// Store 16-bit mask to memory +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) { + *mem_addr = a; +} + +/// Copy 16-bit mask a to k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw +pub fn _mm512_kmov(a: __mmask16) -> __mmask16 { + a +} + +/// Converts integer mask into bitmask, storing the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189) +#[inline] +#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_int2mask(mask: i32) -> __mmask16 { + mask as u16 +} + +/// Converts bit mask k1 into an integer value, storing the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw +pub fn _mm512_mask2int(k1: __mmask16) -> i32 { + k1 as i32 +} + +/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw +pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 { + ((a & 0xff) << 8) | (b & 0xff) +} + +/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw +pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 { + let r = (a | b) == 0b11111111_11111111; + r as i32 +} + +/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw +pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 { + let r = (a | b) == 0; + r as i32 +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmd))] +pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + let and = _mm512_and_epi32(a, b); + let zero = _mm512_setzero_si512(); + _mm512_cmpneq_epi32_mask(and, zero) +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmd))] +pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + let and = _mm512_and_epi32(a, b); + let zero = _mm512_setzero_si512(); + _mm512_mask_cmpneq_epi32_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmd))] +pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_cmpneq_epi32_mask(and, zero) +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmd))] +pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_mask_cmpneq_epi32_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmd))] +pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_cmpneq_epi32_mask(and, zero) +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmd))] +pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_mask_cmpneq_epi32_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmq))] +pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + let and = _mm512_and_epi64(a, b); + let zero = _mm512_setzero_si512(); + _mm512_cmpneq_epi64_mask(and, zero) +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmq))] +pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + let and = _mm512_and_epi64(a, b); + let zero = _mm512_setzero_si512(); + _mm512_mask_cmpneq_epi64_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmq))] +pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_cmpneq_epi64_mask(and, zero) +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmq))] +pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_mask_cmpneq_epi64_mask(k, and, zero) +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmq))] +pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_cmpneq_epi64_mask(and, zero) +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestmq))] +pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_mask_cmpneq_epi64_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmd))] +pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + let and = _mm512_and_epi32(a, b); + let zero = _mm512_setzero_si512(); + _mm512_cmpeq_epi32_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmd))] +pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + let and = _mm512_and_epi32(a, b); + let zero = _mm512_setzero_si512(); + _mm512_mask_cmpeq_epi32_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmd))] +pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_cmpeq_epi32_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmd))] +pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_mask_cmpeq_epi32_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmd))] +pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_cmpeq_epi32_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmd))] +pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_mask_cmpeq_epi32_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmq))] +pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + let and = _mm512_and_epi64(a, b); + let zero = _mm512_setzero_si512(); + _mm512_cmpeq_epi64_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmq))] +pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + let and = _mm512_and_epi64(a, b); + let zero = _mm512_setzero_si512(); + _mm512_mask_cmpeq_epi64_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmq))] +pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_cmpeq_epi64_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmq))] +pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + let and = _mm256_and_si256(a, b); + let zero = _mm256_setzero_si256(); + _mm256_mask_cmpeq_epi64_mask(k, and, zero) +} + +/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmq))] +pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_cmpeq_epi64_mask(and, zero) +} + +/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vptestnmq))] +pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + let and = _mm_and_si128(a, b); + let zero = _mm_setzero_si128(); + _mm_mask_cmpeq_epi64_mask(k, and, zero) +} + +/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovntps))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) { + crate::arch::asm!( + vps!("vmovntps", ",{a}"), + p = in(reg) mem_addr, + a = in(zmm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovntpd))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) { + crate::arch::asm!( + vps!("vmovntpd", ",{a}"), + p = in(reg) mem_addr, + a = in(zmm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovntdq))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) { + crate::arch::asm!( + vps!("vmovntdq", ",{a}"), + p = in(reg) mem_addr, + a = in(zmm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr +/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To +/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i { + let dst: __m512i; + crate::arch::asm!( + vpl!("vmovntdqa {a}"), + a = out(zmm_reg) dst, + p = in(reg) mem_addr, + options(pure, readonly, nostack, preserves_flags), + ); + dst +} + +/// Sets packed 32-bit integers in `dst` with the supplied values. +/// +/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set_ps( + e0: f32, + e1: f32, + e2: f32, + e3: f32, + e4: f32, + e5: f32, + e6: f32, + e7: f32, + e8: f32, + e9: f32, + e10: f32, + e11: f32, + e12: f32, + e13: f32, + e14: f32, + e15: f32, +) -> __m512 { + _mm512_setr_ps( + e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, + ) +} + +/// Sets packed 32-bit integers in `dst` with the supplied values in +/// reverse order. +/// +/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_setr_ps( + e0: f32, + e1: f32, + e2: f32, + e3: f32, + e4: f32, + e5: f32, + e6: f32, + e7: f32, + e8: f32, + e9: f32, + e10: f32, + e11: f32, + e12: f32, + e13: f32, + e14: f32, + e15: f32, +) -> __m512 { + unsafe { + let r = f32x16::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ); + transmute(r) + } +} + +/// Broadcast 64-bit float `a` to all elements of `dst`. +/// +/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set1_pd(a: f64) -> __m512d { + unsafe { transmute(f64x8::splat(a)) } +} + +/// Broadcast 32-bit float `a` to all elements of `dst`. +/// +/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set1_ps(a: f32) -> __m512 { + unsafe { transmute(f32x16::splat(a)) } +} + +/// Sets packed 32-bit integers in `dst` with the supplied values. +/// +/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set_epi32( + e15: i32, + e14: i32, + e13: i32, + e12: i32, + e11: i32, + e10: i32, + e9: i32, + e8: i32, + e7: i32, + e6: i32, + e5: i32, + e4: i32, + e3: i32, + e2: i32, + e1: i32, + e0: i32, +) -> __m512i { + _mm512_setr_epi32( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ) +} + +/// Broadcast 8-bit integer a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set1_epi8(a: i8) -> __m512i { + unsafe { transmute(i8x64::splat(a)) } +} + +/// Broadcast the low packed 16-bit integer from a to all elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set1_epi16(a: i16) -> __m512i { + unsafe { transmute(i16x32::splat(a)) } +} + +/// Broadcast 32-bit integer `a` to all elements of `dst`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set1_epi32(a: i32) -> __m512i { + unsafe { transmute(i32x16::splat(a)) } +} + +/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastd))] +pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i { + unsafe { + let r = _mm512_set1_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } +} + +/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastd))] +pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i { + unsafe { + let r = _mm512_set1_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } +} + +/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastd))] +pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i { + unsafe { + let r = _mm256_set1_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } +} + +/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastd))] +pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i { + unsafe { + let r = _mm256_set1_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } +} + +/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastd))] +pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i { + unsafe { + let r = _mm_set1_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } +} + +/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastd))] +pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i { + unsafe { + let r = _mm_set1_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } +} + +/// Broadcast 64-bit integer `a` to all elements of `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set1_epi64(a: i64) -> __m512i { + unsafe { transmute(i64x8::splat(a)) } +} + +/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastq))] +pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i { + unsafe { + let r = _mm512_set1_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) + } +} + +/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastq))] +pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i { + unsafe { + let r = _mm512_set1_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) + } +} + +/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastq))] +pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i { + unsafe { + let r = _mm256_set1_epi64x(a).as_i64x4(); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) + } +} + +/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastq))] +pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i { + unsafe { + let r = _mm256_set1_epi64x(a).as_i64x4(); + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) + } +} + +/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastq))] +pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i { + unsafe { + let r = _mm_set1_epi64x(a).as_i64x2(); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) + } +} + +/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpbroadcastq))] +pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i { + unsafe { + let r = _mm_set1_epi64x(a).as_i64x2(); + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) + } +} + +/// Set packed 64-bit integers in dst with the repeated 4 element sequence. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i { + _mm512_set_epi64(d, c, b, a, d, c, b, a) +} + +/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i { + _mm512_set_epi64(a, b, c, d, a, b, c, d) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm512_cmp_ps_mask(a: __m512, b: __m512) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm512_mask_cmp_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm256_cmp_ps_mask(a: __m256, b: __m256) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let r = vcmpps256(a, b, IMM8, neg_one); + r.cast_unsigned() + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm256_mask_cmp_ps_mask(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let r = vcmpps256(a, b, IMM8, k1 as i8); + r.cast_unsigned() + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm_cmp_ps_mask(a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vcmpps128(a, b, IMM8, neg_one); + r.cast_unsigned() + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm_mask_cmp_ps_mask(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vcmpps128(a, b, IMM8, k1 as i8); + r.cast_unsigned() + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm512_cmp_round_ps_mask( + a: __m512, + b: __m512, +) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let neg_one = -1; + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vcmpps(a, b, IMM5, neg_one, SAE); + r.cast_unsigned() + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm512_mask_cmp_round_ps_mask( + m: __mmask16, + a: __m512, + b: __m512, +) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vcmpps(a, b, IMM5, m as i16, SAE); + r.cast_unsigned() + } +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps +pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps +pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm512_mask_cmp_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm256_cmp_pd_mask(a: __m256d, b: __m256d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let r = vcmppd256(a, b, IMM8, neg_one); + r.cast_unsigned() + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm256_mask_cmp_pd_mask(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let r = vcmppd256(a, b, IMM8, k1 as i8); + r.cast_unsigned() + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm_cmp_pd_mask(a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vcmppd128(a, b, IMM8, neg_one); + r.cast_unsigned() + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm_mask_cmp_pd_mask(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vcmppd128(a, b, IMM8, k1 as i8); + r.cast_unsigned() + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm512_cmp_round_pd_mask( + a: __m512d, + b: __m512d, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let neg_one = -1; + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vcmppd(a, b, IMM5, neg_one, SAE); + r.cast_unsigned() + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm512_mask_cmp_round_pd_mask( + k1: __mmask8, + a: __m512d, + b: __m512d, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vcmppd(a, b, IMM5, k1 as i8, SAE); + r.cast_unsigned() + } +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd +pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm_cmp_ss_mask(a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm_mask_cmp_ss_mask(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_cmp_round_ss_mask(a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let neg_one = -1; + let r = vcmpss(a, b, IMM5, neg_one, SAE); + r.cast_unsigned() + } +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_mask_cmp_round_ss_mask( + k1: __mmask8, + a: __m128, + b: __m128, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let r = vcmpss(a, b, IMM5, k1 as i8, SAE); + r.cast_unsigned() + } +} + +/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm_cmp_sd_mask(a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } +} + +/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] +pub fn _mm_mask_cmp_sd_mask(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } +} + +/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let neg_one = -1; + let r = vcmpsd(a, b, IMM5, neg_one, SAE); + r.cast_unsigned() + } +} + +/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_mask_cmp_round_sd_mask( + k1: __mmask8, + a: __m128d, + b: __m128d, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let r = vcmpsd(a, b, IMM5, k1 as i8, SAE); + r.cast_unsigned() + } +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_u32x8(), b.as_u32x8())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_u32x4(), b.as_u32x4())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_u32x16(), b.as_u32x16())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_u32x8(), b.as_u32x8())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_u32x4(), b.as_u32x4())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_u32x8(), b.as_u32x8())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_u32x4(), b.as_u32x4())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_u32x16(), b.as_u32x16())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_u32x8(), b.as_u32x8())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_u32x4(), b.as_u32x4())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_u32x16(), b.as_u32x16())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_u32x8(), b.as_u32x8())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_u32x4(), b.as_u32x4())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_u32x16(), b.as_u32x16())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_u32x8(), b.as_u32x8())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_u32x4(), b.as_u32x4())) } +} + +/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud +pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x16(); + let b = b.as_u32x16(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x16::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm512_mask_cmp_epu32_mask( + k1: __mmask16, + a: __m512i, + b: __m512i, +) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x16(); + let b = b.as_u32x16(); + let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm256_cmp_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x8(); + let b = b.as_u32x8(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x8::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm256_mask_cmp_epu32_mask( + k1: __mmask8, + a: __m256i, + b: __m256i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x8(); + let b = b.as_u32x8(); + let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm_cmp_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x4(); + let b = b.as_u32x4(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x4::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x4::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm_mask_cmp_epu32_mask( + k1: __mmask8, + a: __m128i, + b: __m128i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x4(); + let b = b.as_u32x4(); + let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x4::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_i32x16(), b.as_i32x16())) } +} + +/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_i32x8(), b.as_i32x8())) } +} + +/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd +pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x16::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm512_mask_cmp_epi32_mask( + k1: __mmask16, + a: __m512i, + b: __m512i, +) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm256_cmp_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x8::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm256_mask_cmp_epi32_mask( + k1: __mmask8, + a: __m256i, + b: __m256i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm_cmp_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x4::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x4::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm_mask_cmp_epi32_mask( + k1: __mmask8, + a: __m128i, + b: __m128i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x4::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) } +} + +/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq +pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x8(); + let b = b.as_u64x8(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x8::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm512_mask_cmp_epu64_mask( + k1: __mmask8, + a: __m512i, + b: __m512i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x8(); + let b = b.as_u64x8(); + let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm256_cmp_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x4(); + let b = b.as_u64x4(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x4::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x4::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm256_mask_cmp_epu64_mask( + k1: __mmask8, + a: __m256i, + b: __m256i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x4(); + let b = b.as_u64x4(); + let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x4::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm_cmp_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x2(); + let b = b.as_u64x2(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x2::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x2::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm_mask_cmp_epu64_mask( + k1: __mmask8, + a: __m128i, + b: __m128i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x2(); + let b = b.as_u64x2(); + let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x2::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) } +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) } +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) } +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) } +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) } +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) } +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) } +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) } +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b) +} + +/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) } +} + +/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) } +} + +/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) } +} + +/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) } +} + +/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { + _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) } +} + +/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq +pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b) +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x8::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm512_mask_cmp_epi64_mask( + k1: __mmask8, + a: __m512i, + b: __m512i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm256_cmp_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x4::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x4::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm256_mask_cmp_epi64_mask( + k1: __mmask8, + a: __m256i, + b: __m256i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x4::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm_cmp_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x2::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x2::splat(-1), + }; + simd_bitmask(r) + } +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] +pub fn _mm_mask_cmp_epi64_mask( + k1: __mmask8, + a: __m128i, + b: __m128i, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x2::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } +} + +/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_add_unordered(a.as_i32x16()) } +} + +/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) } +} + +/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_add_unordered(a.as_i64x8()) } +} + +/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) } +} + +/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_add_ps(a: __m512) -> f32 { + unsafe { + // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ + let a = _mm256_add_ps( + simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + ); + let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); + let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); + simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1) + } +} + +/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 { + unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) } +} + +/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 { + unsafe { + let a = _mm256_add_pd( + _mm512_extractf64x4_pd::<0>(a), + _mm512_extractf64x4_pd::<1>(a), + ); + let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); + simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1) + } +} + +/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 { + unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) } +} + +/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_mul_unordered(a.as_i32x16()) } +} + +/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { + simd_reduce_mul_unordered(simd_select_bitmask( + k, + a.as_i32x16(), + _mm512_set1_epi32(1).as_i32x16(), + )) + } +} + +/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_mul_unordered(a.as_i64x8()) } +} + +/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { + simd_reduce_mul_unordered(simd_select_bitmask( + k, + a.as_i64x8(), + _mm512_set1_epi64(1).as_i64x8(), + )) + } +} + +/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 { + unsafe { + // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ + let a = _mm256_mul_ps( + simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + ); + let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); + let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); + simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1) + } +} + +/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 { + unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) } +} + +/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 { + unsafe { + let a = _mm256_mul_pd( + _mm512_extractf64x4_pd::<0>(a), + _mm512_extractf64x4_pd::<1>(a), + ); + let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); + simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1) + } +} + +/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 { + unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) } +} + +/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_max(a.as_i32x16()) } +} + +/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { + simd_reduce_max(simd_select_bitmask( + k, + a.as_i32x16(), + i32x16::splat(i32::MIN), + )) + } +} + +/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_max(a.as_i64x8()) } +} + +/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) } +} + +/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 { + unsafe { simd_reduce_max(a.as_u32x16()) } +} + +/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) } +} + +/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 { + unsafe { simd_reduce_max(a.as_u64x8()) } +} + +/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) } +} + +/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_max_ps(a: __m512) -> f32 { + unsafe { + let a = _mm256_max_ps( + simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + ); + let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); + let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); + _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a))) + } +} + +/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 { + _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a)) +} + +/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 { + unsafe { + let a = _mm256_max_pd( + _mm512_extractf64x4_pd::<0>(a), + _mm512_extractf64x4_pd::<1>(a), + ); + let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); + _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0]))) + } +} + +/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 { + _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a)) +} + +/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_min(a.as_i32x16()) } +} + +/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { + simd_reduce_min(simd_select_bitmask( + k, + a.as_i32x16(), + i32x16::splat(i32::MAX), + )) + } +} + +/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_min(a.as_i64x8()) } +} + +/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) } +} + +/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 { + unsafe { simd_reduce_min(a.as_u32x16()) } +} + +/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 { + unsafe { + simd_reduce_min(simd_select_bitmask( + k, + a.as_u32x16(), + u32x16::splat(u32::MAX), + )) + } +} + +/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 { + unsafe { simd_reduce_min(a.as_u64x8()) } +} + +/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) } +} + +/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_min_ps(a: __m512) -> f32 { + unsafe { + let a = _mm256_min_ps( + simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + ); + let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); + let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); + _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a))) + } +} + +/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 { + _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a)) +} + +/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 { + unsafe { + let a = _mm256_min_pd( + _mm512_extractf64x4_pd::<0>(a), + _mm512_extractf64x4_pd::<1>(a), + ); + let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); + _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0]))) + } +} + +/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 { + _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a)) +} + +/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_and(a.as_i32x16()) } +} + +/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) } +} + +/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_and(a.as_i64x8()) } +} + +/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) } +} + +/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_or(a.as_i32x16()) } +} + +/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) } +} + +/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_or(a.as_i64x8()) } +} + +/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) } +} + +/// Returns vector of type `__m512d` with indeterminate elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +// This intrinsic has no corresponding instruction. +pub fn _mm512_undefined_pd() -> __m512d { + unsafe { const { mem::zeroed() } } +} + +/// Returns vector of type `__m512` with indeterminate elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +// This intrinsic has no corresponding instruction. +pub fn _mm512_undefined_ps() -> __m512 { + unsafe { const { mem::zeroed() } } +} + +/// Return vector of type __m512i with indeterminate elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +// This intrinsic has no corresponding instruction. +pub fn _mm512_undefined_epi32() -> __m512i { + unsafe { const { mem::zeroed() } } +} + +/// Return vector of type __m512 with indeterminate elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +// This intrinsic has no corresponding instruction. +pub fn _mm512_undefined() -> __m512 { + unsafe { const { mem::zeroed() } } +} + +/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32 +pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i { + ptr::read_unaligned(mem_addr as *const __m512i) +} + +/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32 +pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i { + ptr::read_unaligned(mem_addr as *const __m256i) +} + +/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32 +pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i { + ptr::read_unaligned(mem_addr as *const __m128i) +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) { + vpmovdwmem(mem_addr.cast(), a.as_i32x16(), k); +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), k); +} + +/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdw))] +pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) { + vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) { + vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), k); +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) { + vpmovdbmem(mem_addr, a.as_i32x16(), k); +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovdbmem256(mem_addr, a.as_i32x8(), k); +} + +/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovdb))] +pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovdbmem128(mem_addr, a.as_i32x4(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) { + vpmovsdbmem(mem_addr, a.as_i32x16(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovsdbmem256(mem_addr, a.as_i32x8(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovsdbmem128(mem_addr, a.as_i32x4(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) { + vpmovusdbmem(mem_addr, a.as_i32x16(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovusdbmem256(mem_addr, a.as_i32x8(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovusdbmem128(mem_addr, a.as_i32x4(), k); +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) { + vpmovqwmem(mem_addr.cast(), a.as_i64x8(), k); +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), k); +} + +/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqw))] +pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) { + vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) { + vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), k); +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) { + vpmovqbmem(mem_addr, a.as_i64x8(), k); +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovqbmem256(mem_addr, a.as_i64x4(), k); +} + +/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqb))] +pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovqbmem128(mem_addr, a.as_i64x2(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) { + vpmovsqbmem(mem_addr, a.as_i64x8(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovsqbmem256(mem_addr, a.as_i64x4(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovsqbmem128(mem_addr, a.as_i64x2(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) { + vpmovusqbmem(mem_addr, a.as_i64x8(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovusqbmem256(mem_addr, a.as_i64x4(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovusqbmem128(mem_addr, a.as_i64x2(), k); +} + +///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) { + vpmovqdmem(mem_addr.cast(), a.as_i64x8(), k); +} + +///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) { + vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), k); +} + +///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovqd))] +pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) { + vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) { + vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) { + vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) { + vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) { + vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) { + vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) { + vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), k); +} + +/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32 +pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) { + ptr::write_unaligned(mem_addr as *mut __m512i, a); +} + +/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32 +pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) { + ptr::write_unaligned(mem_addr as *mut __m256i, a); +} + +/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32 +pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) { + ptr::write_unaligned(mem_addr as *mut __m128i, a); +} + +/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64 +pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i { + ptr::read_unaligned(mem_addr as *const __m512i) +} + +/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64 +pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i { + ptr::read_unaligned(mem_addr as *const __m256i) +} + +/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64 +pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i { + ptr::read_unaligned(mem_addr as *const __m128i) +} + +/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64 +pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) { + ptr::write_unaligned(mem_addr as *mut __m512i, a); +} + +/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64 +pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) { + ptr::write_unaligned(mem_addr as *mut __m256i, a); +} + +/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64 +pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) { + ptr::write_unaligned(mem_addr as *mut __m128i, a); +} + +/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32 +pub unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i { + ptr::read_unaligned(mem_addr) +} + +/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32 +pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) { + ptr::write_unaligned(mem_addr, a); +} + +/// Loads 512-bits (composed of 8 packed double-precision (64-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d { + ptr::read_unaligned(mem_addr as *const __m512d) +} + +/// Stores 512-bits (composed of 8 packed double-precision (64-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) { + ptr::write_unaligned(mem_addr as *mut __m512d, a); +} + +/// Loads 512-bits (composed of 16 packed single-precision (32-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 { + ptr::read_unaligned(mem_addr as *const __m512) +} + +/// Stores 512-bits (composed of 16 packed single-precision (32-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) { + ptr::write_unaligned(mem_addr as *mut __m512, a); +} + +/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa32 +pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i { + ptr::read(mem_addr) +} + +/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa32 +pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) { + ptr::write(mem_addr, a); +} + +/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa32 +pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i { + ptr::read(mem_addr as *const __m512i) +} + +/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa32 +pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i { + ptr::read(mem_addr as *const __m256i) +} + +/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa32 +pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i { + ptr::read(mem_addr as *const __m128i) +} + +/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa32 +pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) { + ptr::write(mem_addr as *mut __m512i, a); +} + +/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa32 +pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) { + ptr::write(mem_addr as *mut __m256i, a); +} + +/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa32 +pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) { + ptr::write(mem_addr as *mut __m128i, a); +} + +/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa64 +pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i { + ptr::read(mem_addr as *const __m512i) +} + +/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa64 +pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i { + ptr::read(mem_addr as *const __m256i) +} + +/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa64 +pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i { + ptr::read(mem_addr as *const __m128i) +} + +/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa64 +pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) { + ptr::write(mem_addr as *mut __m512i, a); +} + +/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa64 +pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) { + ptr::write(mem_addr as *mut __m256i, a); +} + +/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovdqa64 +pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) { + ptr::write(mem_addr as *mut __m128i, a); +} + +/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] +pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 { + ptr::read(mem_addr as *const __m512) +} + +/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] +pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) { + ptr::write(mem_addr as *mut __m512, a); +} + +/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovapd +pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d { + ptr::read(mem_addr as *const __m512d) +} + +/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(vmovaps) +)] //should be vmovapd +pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) { + ptr::write(mem_addr as *mut __m512d, a); +} + +/// Load packed 32-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqu32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i { + transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k)) +} + +/// Load packed 32-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqu32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i { + _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load packed 64-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqu64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i { + transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k)) +} + +/// Load packed 64-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqu64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i { + _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 { + transmute(loadups_512(mem_addr, src.as_f32x16(), k)) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 { + _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovupd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d { + transmute(loadupd_512(mem_addr, src.as_f64x8(), k)) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovupd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d { + _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr) +} + +/// Load packed 32-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i { + transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k)) +} + +/// Load packed 32-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i { + _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load packed 64-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i { + transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k)) +} + +/// Load packed 64-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i { + _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 { + transmute(loadups_256(mem_addr, src.as_f32x8(), k)) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 { + _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovupd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d { + transmute(loadupd_256(mem_addr, src.as_f64x4(), k)) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovupd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d { + _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr) +} + +/// Load packed 32-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i { + transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k)) +} + +/// Load packed 32-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i { + _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr) +} + +/// Load packed 64-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i { + transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k)) +} + +/// Load packed 64-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i { + _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 { + transmute(loadups_128(mem_addr, src.as_f32x4(), k)) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 { + _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovupd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d { + transmute(loadupd_128(mem_addr, src.as_f64x2(), k)) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovupd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d { + _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr) +} + +/// Load packed 32-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i { + transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k)) +} + +/// Load packed 32-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i { + _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load packed 64-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i { + transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k)) +} + +/// Load packed 64-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i { + _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 { + transmute(loadaps_512(mem_addr, src.as_f32x16(), k)) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 { + _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovapd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d { + transmute(loadapd_512(mem_addr, src.as_f64x8(), k)) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovapd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d { + _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr) +} + +/// Load packed 32-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i { + transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k)) +} + +/// Load packed 32-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i { + _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load packed 64-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i { + transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k)) +} + +/// Load packed 64-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i { + _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 { + transmute(loadaps_256(mem_addr, src.as_f32x8(), k)) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 { + _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovapd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d { + transmute(loadapd_256(mem_addr, src.as_f64x4(), k)) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovapd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d { + _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr) +} + +/// Load packed 32-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i { + transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k)) +} + +/// Load packed 32-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i { + _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr) +} + +/// Load packed 64-bit integers from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i { + transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k)) +} + +/// Load packed 64-bit integers from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i { + _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 { + transmute(loadaps_128(mem_addr, src.as_f32x4(), k)) +} + +/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 { + _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovapd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d { + transmute(loadapd_128(mem_addr, src.as_f64x2(), k)) +} + +/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k +/// (elements are zeroed out when the corresponding mask bit is not set). +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovapd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d { + _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr) +} + +/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst +/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper +/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection +/// exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss) +#[inline] +#[cfg_attr(test, assert_instr(vmovss))] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 { + let mut dst: __m128 = src; + asm!( + vpl!("vmovss {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack, preserves_flags), + ); + dst +} + +/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst +/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed +/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection +/// exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss) +#[inline] +#[cfg_attr(test, assert_instr(vmovss))] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 { + let mut dst: __m128; + asm!( + vpl!("vmovss {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack, preserves_flags), + ); + dst +} + +/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst +/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper +/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection +/// exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd) +#[inline] +#[cfg_attr(test, assert_instr(vmovsd))] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d { + let mut dst: __m128d = src; + asm!( + vpl!("vmovsd {dst}{{{k}}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = inout(xmm_reg) dst, + options(pure, readonly, nostack, preserves_flags), + ); + dst +} + +/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst +/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element +/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception +/// may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd) +#[inline] +#[cfg_attr(test, assert_instr(vmovsd))] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d { + let mut dst: __m128d; + asm!( + vpl!("vmovsd {dst}{{{k}}} {{z}}"), + p = in(reg) mem_addr, + k = in(kreg) k, + dst = out(xmm_reg) dst, + options(pure, readonly, nostack, preserves_flags), + ); + dst +} + +/// Store packed 32-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqu32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) { + storedqu32_512(mem_addr, a.as_i32x16(), mask) +} + +/// Store packed 64-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqu64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) { + storedqu64_512(mem_addr, a.as_i64x8(), mask) +} + +/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) { + storeups_512(mem_addr, a.as_f32x16(), mask) +} + +/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovupd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) { + storeupd_512(mem_addr, a.as_f64x8(), mask) +} + +/// Store packed 32-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) { + storedqu32_256(mem_addr, a.as_i32x8(), mask) +} + +/// Store packed 64-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) { + storedqu64_256(mem_addr, a.as_i64x4(), mask) +} + +/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) { + storeups_256(mem_addr, a.as_f32x8(), mask) +} + +/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovupd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) { + storeupd_256(mem_addr, a.as_f64x4(), mask) +} + +/// Store packed 32-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) { + storedqu32_128(mem_addr, a.as_i32x4(), mask) +} + +/// Store packed 64-bit integers from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqu64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) { + storedqu64_128(mem_addr, a.as_i64x2(), mask) +} + +/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) { + storeups_128(mem_addr, a.as_f32x4(), mask) +} + +/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k. +/// mem_addr does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovupd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) { + storeupd_128(mem_addr, a.as_f64x2(), mask) +} + +/// Store packed 32-bit integers from a into memory using writemask k. +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) { + storedqa32_512(mem_addr, a.as_i32x16(), mask) +} + +/// Store packed 64-bit integers from a into memory using writemask k. +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) { + storedqa64_512(mem_addr, a.as_i64x8(), mask) +} + +/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k. +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) { + storeaps_512(mem_addr, a.as_f32x16(), mask) +} + +/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k. +/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovapd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) { + storeapd_512(mem_addr, a.as_f64x8(), mask) +} + +/// Store packed 32-bit integers from a into memory using writemask k. +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) { + storedqa32_256(mem_addr, a.as_i32x8(), mask) +} + +/// Store packed 64-bit integers from a into memory using writemask k. +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) { + storedqa64_256(mem_addr, a.as_i64x4(), mask) +} + +/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k. +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) { + storeaps_256(mem_addr, a.as_f32x8(), mask) +} + +/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k. +/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovapd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) { + storeapd_256(mem_addr, a.as_f64x4(), mask) +} + +/// Store packed 32-bit integers from a into memory using writemask k. +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa32))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) { + storedqa32_128(mem_addr, a.as_i32x4(), mask) +} + +/// Store packed 64-bit integers from a into memory using writemask k. +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovdqa64))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) { + storedqa64_128(mem_addr, a.as_i64x2(), mask) +} + +/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k. +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovaps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) { + storeaps_128(mem_addr, a.as_f32x4(), mask) +} + +/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k. +/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vmovapd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) { + storeapd_128(mem_addr, a.as_f64x2(), mask) +} + +/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr +/// must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss) +#[inline] +#[cfg_attr(test, assert_instr(vmovss))] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) { + asm!( + vps!("vmovss", "{{{k}}}, {a}"), + p = in(reg) mem_addr, + k = in(kreg) k, + a = in(xmm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr +/// must be aligned on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd) +#[inline] +#[cfg_attr(test, assert_instr(vmovsd))] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) { + asm!( + vps!("vmovsd", "{{{k}}}, {a}"), + p = in(reg) mem_addr, + k = in(kreg) k, + a = in(xmm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpexpandd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_expandloadu_epi32( + src: __m512i, + k: __mmask16, + mem_addr: *const i32, +) -> __m512i { + transmute(expandloadd_512(mem_addr, src.as_i32x16(), k)) +} + +/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpexpandd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i { + _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_expandloadu_epi32( + src: __m256i, + k: __mmask8, + mem_addr: *const i32, +) -> __m256i { + transmute(expandloadd_256(mem_addr, src.as_i32x8(), k)) +} + +/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i { + _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_expandloadu_epi32( + src: __m128i, + k: __mmask8, + mem_addr: *const i32, +) -> __m128i { + transmute(expandloadd_128(mem_addr, src.as_i32x4(), k)) +} + +/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i { + _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr) +} + +/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpexpandq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_expandloadu_epi64( + src: __m512i, + k: __mmask8, + mem_addr: *const i64, +) -> __m512i { + transmute(expandloadq_512(mem_addr, src.as_i64x8(), k)) +} + +/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpexpandq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i { + _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_expandloadu_epi64( + src: __m256i, + k: __mmask8, + mem_addr: *const i64, +) -> __m256i { + transmute(expandloadq_256(mem_addr, src.as_i64x4(), k)) +} + +/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i { + _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_expandloadu_epi64( + src: __m128i, + k: __mmask8, + mem_addr: *const i64, +) -> __m128i { + transmute(expandloadq_128(mem_addr, src.as_i64x2(), k)) +} + +/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandq))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i { + _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr) +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vexpandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_expandloadu_ps( + src: __m512, + k: __mmask16, + mem_addr: *const f32, +) -> __m512 { + transmute(expandloadps_512(mem_addr, src.as_f32x16(), k)) +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vexpandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 { + _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr) +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vexpandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 { + transmute(expandloadps_256(mem_addr, src.as_f32x8(), k)) +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vexpandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 { + _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr) +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vexpandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 { + transmute(expandloadps_128(mem_addr, src.as_f32x4(), k)) +} + +/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vexpandps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 { + _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr) +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vexpandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_expandloadu_pd( + src: __m512d, + k: __mmask8, + mem_addr: *const f64, +) -> __m512d { + transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k)) +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vexpandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d { + _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr) +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vexpandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_expandloadu_pd( + src: __m256d, + k: __mmask8, + mem_addr: *const f64, +) -> __m256d { + transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k)) +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vexpandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d { + _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr) +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vexpandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d { + transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k)) +} + +/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vexpandpd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d { + _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr) +} + +/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order. +/// +/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_setr_pd( + e0: f64, + e1: f64, + e2: f64, + e3: f64, + e4: f64, + e5: f64, + e6: f64, + e7: f64, +) -> __m512d { + unsafe { + let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) + } +} + +/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values. +/// +/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm512_set_pd( + e0: f64, + e1: f64, + e2: f64, + e3: f64, + e4: f64, + e5: f64, + e6: f64, + e7: f64, +) -> __m512d { + _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) +} + +/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovss))] +pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut mov: f32 = extractsrc; + if (k & 0b00000001) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) + } +} + +/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovss))] +pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut mov: f32 = 0.; + if (k & 0b00000001) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) + } +} + +/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovsd))] +pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut mov: f64 = extractsrc; + if (k & 0b00000001) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) + } +} + +/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmovsd))] +pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut mov: f64 = 0.; + if (k & 0b00000001) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) + } +} + +/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddss))] +pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut add: f32 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddss))] +pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut add: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddsd))] +pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut add: f64 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddsd))] +pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut add: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubss))] +pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut add: f32 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubss))] +pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut add: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubsd))] +pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut add: f64 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubsd))] +pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut add: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulss))] +pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut add: f32 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulss))] +pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut add: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulsd))] +pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut add: f64 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulsd))] +pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut add: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivss))] +pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut add: f32 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivss))] +pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut add: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivsd))] +pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut add: f64 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivsd))] +pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut add: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxss))] +pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vmaxss( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxss))] +pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vmaxss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxsd))] +pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vmaxsd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxsd))] +pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vmaxsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminss))] +pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vminss( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminss))] +pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vminss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminsd))] +pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vminsd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminsd))] +pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vminsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtss))] +pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtss))] +pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtsd))] +pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtsd))] +pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ss))] +pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) } +} + +/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ss))] +pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) } +} + +/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14ss))] +pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) } +} + +/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14sd))] +pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) } +} + +/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14sd))] +pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) } +} + +/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrsqrt14sd))] +pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) } +} + +/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ss))] +pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) } +} + +/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ss))] +pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) } +} + +/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14ss))] +pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) } +} + +/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14sd))] +pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) } +} + +/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14sd))] +pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) } +} + +/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrcp14sd))] +pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) } +} + +/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpss))] +pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vgetexpss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + 0b1, + _MM_FROUND_NO_EXC, + )) + } +} + +/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpss))] +pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vgetexpss( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + _MM_FROUND_NO_EXC, + )) + } +} + +/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpss))] +pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vgetexpss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + k, + _MM_FROUND_NO_EXC, + )) + } +} + +/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpsd))] +pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vgetexpsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + 0b1, + _MM_FROUND_NO_EXC, + )) + } +} + +/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpsd))] +pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vgetexpsd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + _MM_FROUND_NO_EXC, + )) + } +} + +/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpsd))] +pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vgetexpsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + k, + _MM_FROUND_NO_EXC, + )) + } +} + +/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_getmant_ss( + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetmantss( + a, + b, + SIGN << 2 | NORM, + f32x4::ZERO, + 0b1, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm_mask_getmant_ss< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_maskz_getmant_ss< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetmantss( + a, + b, + SIGN << 2 | NORM, + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_getmant_sd( + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetmantsd( + a, + b, + SIGN << 2 | NORM, + f64x2::ZERO, + 0b1, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm_mask_getmant_sd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_maskz_getmant_sd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetmantsd( + a, + b, + SIGN << 2 | NORM, + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_roundscale_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vrndscaless( + a, + b, + f32x4::ZERO, + 0b11111111, + IMM8, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_roundscale_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_roundscale_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_roundscale_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vrndscalesd( + a, + b, + f64x2::ZERO, + 0b11111111, + IMM8, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } +} + +/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_roundscale_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_roundscale_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefss))] +pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + let a = a.as_f32x4(); + let b = b.as_f32x4(); + transmute(vscalefss( + a, + b, + f32x4::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefss))] +pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION)) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefss))] +pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vscalefss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefsd))] +pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vscalefsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefsd))] +pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vscalefsd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefsd))] +pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vscalefsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] +pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fmadd: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fmadd = fmaf32(fmadd, extractb, extractc); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] +pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fmadd: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fmadd = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] +pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { + let mut fmadd: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + fmadd = fmaf32(extracta, extractb, fmadd); + } + simd_insert!(c, 0, fmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] +pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fmadd: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fmadd = fmaf64(fmadd, extractb, extractc); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] +pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fmadd: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fmadd = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd))] +pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { + let mut fmadd: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + fmadd = fmaf64(extracta, extractb, fmadd); + } + simd_insert!(c, 0, fmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] +pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fmsub: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = fmaf32(fmsub, extractb, extractc); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] +pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fmsub: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] +pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { + let mut fmsub: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc = -fmsub; + fmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(c, 0, fmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] +pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fmsub: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = fmaf64(fmsub, extractb, extractc); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] +pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fmsub: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub))] +pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { + let mut fmsub: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc = -fmsub; + fmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(c, 0, fmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] +pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fnmadd: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmadd; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fnmadd = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] +pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fnmadd: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fnmadd = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] +pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { + let mut fnmadd: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + fnmadd = fmaf32(extracta, extractb, fnmadd); + } + simd_insert!(c, 0, fnmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] +pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fnmadd: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmadd; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fnmadd = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] +pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fnmadd: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fnmadd = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd))] +pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { + let mut fnmadd: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + fnmadd = fmaf64(extracta, extractb, fnmadd); + } + simd_insert!(c, 0, fnmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] +pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fnmsub: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmsub; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] +pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fnmsub: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] +pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { + let mut fnmsub: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc = -fnmsub; + fnmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(c, 0, fnmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] +pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fnmsub: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmsub; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] +pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fnmsub: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub))] +pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { + let mut fnmsub: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc = -fnmsub; + fnmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(c, 0, fnmsub) + } +} + +/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_add_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING); + transmute(r) + } +} + +/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_add_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vaddss(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_add_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_add_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING); + transmute(r) + } +} + +/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_add_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vaddsd(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_add_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_sub_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING); + transmute(r) + } +} + +/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_sub_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vsubss(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_sub_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_sub_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING); + transmute(r) + } +} + +/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_sub_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vsubsd(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_sub_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_mul_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING); + transmute(r) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_mul_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vmulss(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_mul_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_mul_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING); + transmute(r) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_mul_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vmulsd(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_mul_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_div_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING); + transmute(r) + } +} + +/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_div_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vdivss(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_div_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_div_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING); + transmute(r) + } +} + +/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_div_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vdivsd(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_div_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_max_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE); + transmute(r) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_max_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vmaxss(a, b, src, k, SAE); + transmute(r) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vmaxss(a, b, f32x4::ZERO, k, SAE); + transmute(r) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_max_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE); + transmute(r) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_max_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vmaxsd(a, b, src, k, SAE); + transmute(r) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_max_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vmaxsd(a, b, f64x2::ZERO, k, SAE); + transmute(r) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_min_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vminss(a, b, f32x4::ZERO, 0b1, SAE); + transmute(r) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_min_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vminss(a, b, src, k, SAE); + transmute(r) + } +} + +/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vminss(a, b, f32x4::ZERO, k, SAE); + transmute(r) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminsd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_min_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE); + transmute(r) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminsd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_min_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vminsd(a, b, src, k, SAE); + transmute(r) + } +} + +/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vminsd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_min_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vminsd(a, b, f64x2::ZERO, k, SAE); + transmute(r) + } +} + +/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_sqrt_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING) + } +} + +/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_sqrt_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtss(a, b, src, k, ROUNDING) + } +} + +/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_sqrt_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING) + } +} + +/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_sqrt_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING) + } +} + +/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_sqrt_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtsd(a, b, src, k, ROUNDING) + } +} + +/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_sqrt_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING) + } +} + +/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_getexp_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE); + transmute(r) + } +} + +/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_getexp_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vgetexpss(a, b, src, k, SAE); + transmute(r) + } +} + +/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_getexp_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetexpss(a, b, f32x4::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_getexp_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE); + transmute(r) + } +} + +/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_getexp_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vgetexpsd(a, b, src, k, SAE); + transmute(r) + } +} + +/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_getexp_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(2, 3, 4)] +pub fn _mm_getmant_round_ss< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(4, 5, 6)] +pub fn _mm_mask_getmant_round_ss< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(3, 4, 5)] +pub fn _mm_maskz_getmant_round_ss< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(2, 3, 4)] +pub fn _mm_getmant_round_sd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(4, 5, 6)] +pub fn _mm_mask_getmant_round_sd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE); + transmute(r) + } +} + +/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ +/// The mantissa is normalized to the interval specified by interv, which can take the following values:\ +/// _MM_MANT_NORM_1_2 // interval [1, 2)\ +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\ +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\ +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\ +/// The sign is determined by sc which can take the following values:\ +/// _MM_MANT_SIGN_src // sign = sign(src)\ +/// _MM_MANT_SIGN_zero // sign = 0\ +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))] +#[rustc_legacy_const_generics(3, 4, 5)] +pub fn _mm_maskz_getmant_round_sd< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE); + transmute(r) + } +} + +/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_roundscale_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE); + transmute(r) + } +} + +/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm_mask_roundscale_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vrndscaless(a, b, src, k, IMM8, SAE); + transmute(r) + } +} + +/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_maskz_roundscale_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE); + transmute(r) + } +} + +/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_roundscale_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE); + transmute(r) + } +} + +/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm_mask_roundscale_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vrndscalesd(a, b, src, k, IMM8, SAE); + transmute(r) + } +} + +/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_maskz_roundscale_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE); + transmute(r) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_scalef_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING); + transmute(r) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_scalef_round_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vscalefss(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_scalef_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_scalef_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING); + transmute(r) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_scalef_round_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vscalefsd(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_scalef_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let r = vfmaddssround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, r) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fmadd_round_ss( + a: __m128, + k: __mmask8, + b: __m128, + c: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fmadd_round_ss( + k: __mmask8, + a: __m128, + b: __m128, + c: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask3_fmadd_round_ss( + a: __m128, + b: __m128, + c: __m128, + k: __mmask8, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING); + } + simd_insert!(c, 0, fmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fmadd_round_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fmadd_round_sd( + a: __m128d, + k: __mmask8, + b: __m128d, + c: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fmadd_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask3_fmadd_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, + k: __mmask8, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING); + } + simd_insert!(c, 0, fmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fmsub_round_ss( + a: __m128, + k: __mmask8, + b: __m128, + c: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fmsub_round_ss( + k: __mmask8, + a: __m128, + b: __m128, + c: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask3_fmsub_round_ss( + a: __m128, + b: __m128, + c: __m128, + k: __mmask8, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc = -fmsub; + fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(c, 0, fmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fmsub_round_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fmsub_round_sd( + a: __m128d, + k: __mmask8, + b: __m128d, + c: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fmsub_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask3_fmsub_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, + k: __mmask8, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc = -fmsub; + fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(c, 0, fmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fnmadd_round_ss( + a: __m128, + k: __mmask8, + b: __m128, + c: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmadd; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fnmadd_round_ss( + k: __mmask8, + a: __m128, + b: __m128, + c: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask3_fnmadd_round_ss( + a: __m128, + b: __m128, + c: __m128, + k: __mmask8, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING); + } + simd_insert!(c, 0, fnmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fnmadd_round_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fnmadd_round_sd( + a: __m128d, + k: __mmask8, + b: __m128d, + c: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmadd; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fnmadd_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask3_fnmadd_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, + k: __mmask8, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING); + } + simd_insert!(c, 0, fnmadd) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fnmsub_round_ss( + a: __m128, + k: __mmask8, + b: __m128, + c: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmsub; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fnmsub_round_ss( + k: __mmask8, + a: __m128, + b: __m128, + c: __m128, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask3_fnmsub_round_ss( + a: __m128, + b: __m128, + c: __m128, + k: __mmask8, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc = -fnmsub; + fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(c, 0, fnmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fnmsub_round_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fnmsub_round_sd( + a: __m128d, + k: __mmask8, + b: __m128d, + c: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmsub; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fnmsub_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask3_fnmsub_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, + k: __mmask8, +) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc = -fnmsub; + fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(c, 0, fnmsub) + } +} + +/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fixupimm_ss(a: __m128, b: __m128, c: __m128i) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fixupimm_ss( + a: __m128, + k: __mmask8, + b: __m128, + c: __m128i, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f32 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fixupimm_ss( + k: __mmask8, + a: __m128, + b: __m128, + c: __m128i, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f32 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_fixupimm_sd(a: __m128d, b: __m128d, c: __m128i) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f64 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_fixupimm_sd( + a: __m128d, + k: __mmask8, + b: __m128d, + c: __m128i, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f64 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_maskz_fixupimm_sd( + k: __mmask8, + a: __m128d, + b: __m128d, + c: __m128i, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f64 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_fixupimm_round_ss( + a: __m128, + b: __m128, + c: __m128i, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm_mask_fixupimm_round_ss( + a: __m128, + k: __mmask8, + b: __m128, + c: __m128i, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmss(a, b, c, IMM8, k, SAE); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm_maskz_fixupimm_round_ss( + k: __mmask8, + a: __m128, + b: __m128, + c: __m128i, +) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmssz(a, b, c, IMM8, k, SAE); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +pub fn _mm_fixupimm_round_sd( + a: __m128d, + b: __m128d, + c: __m128i, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE); + let fixupimm: f64 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm_mask_fixupimm_round_sd( + a: __m128d, + k: __mmask8, + b: __m128d, + c: __m128i, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmsd(a, b, c, IMM8, k, SAE); + let fixupimm: f64 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +pub fn _mm_maskz_fixupimm_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, + c: __m128i, +) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmsdz(a, b, c, IMM8, k, SAE); + let fixupimm: f64 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2sd))] +pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d { + unsafe { + transmute(vcvtss2sd( + a.as_f64x2(), + b.as_f32x4(), + src.as_f64x2(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2sd))] +pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d { + unsafe { + transmute(vcvtss2sd( + a.as_f64x2(), + b.as_f32x4(), + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2ss))] +pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 { + unsafe { + transmute(vcvtsd2ss( + a.as_f32x4(), + b.as_f64x2(), + src.as_f32x4(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2ss))] +pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 { + unsafe { + transmute(vcvtsd2ss( + a.as_f32x4(), + b.as_f64x2(), + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundss_sd(a: __m128d, b: __m128) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f32x4(); + let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE); + transmute(r) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_cvt_roundss_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128, +) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f32x4(); + let src = src.as_f64x2(); + let r = vcvtss2sd(a, b, src, k, SAE); + transmute(r) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f32x4(); + let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE); + transmute(r) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f64x2(); + let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING); + transmute(r) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_cvt_roundsd_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128d, +) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f64x2(); + let src = src.as_f32x4(); + let r = vcvtsd2ss(a, b, src, k, ROUNDING); + transmute(r) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_cvt_roundsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f64x2(); + let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundss_si32(a: __m128) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2si(a, ROUNDING) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundss_i32(a: __m128) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2si(a, ROUNDING) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundss_u32(a: __m128) -> u32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2usi(a, ROUNDING) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2si))] +pub fn _mm_cvtss_i32(a: __m128) -> i32 { + unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2usi))] +pub fn _mm_cvtss_u32(a: __m128) -> u32 { + unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundsd_si32(a: __m128d) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2si(a, ROUNDING) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundsd_i32(a: __m128d) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2si(a, ROUNDING) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundsd_u32(a: __m128d) -> u32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2usi(a, ROUNDING) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2si))] +pub fn _mm_cvtsd_i32(a: __m128d) -> i32 { + unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2usi))] +pub fn _mm_cvtsd_u32(a: __m128d) -> u32 { + unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundi32_ss(a: __m128, b: i32) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss(a, b, ROUNDING); + transmute(r) + } +} + +/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundsi32_ss(a: __m128, b: i32) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss(a, b, ROUNDING); + transmute(r) + } +} + +/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundu32_ss(a: __m128, b: u32) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtusi2ss(a, b, ROUNDING); + transmute(r) + } +} + +/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2ss))] +pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 { + unsafe { + let b = b as f32; + simd_insert!(a, 0, b) + } +} + +/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2sd))] +pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d { + unsafe { + let b = b as f64; + simd_insert!(a, 0, b) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundss_si32(a: __m128) -> i32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2si(a, SAE) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundss_i32(a: __m128) -> i32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2si(a, SAE) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundss_u32(a: __m128) -> u32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2usi(a, SAE) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2si))] +pub fn _mm_cvttss_i32(a: __m128) -> i32 { + unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2usi))] +pub fn _mm_cvttss_u32(a: __m128) -> u32 { + unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundsd_si32(a: __m128d) -> i32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2si(a, SAE) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundsd_i32(a: __m128d) -> i32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2si(a, SAE) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundsd_u32(a: __m128d) -> u32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2usi(a, SAE) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2si))] +pub fn _mm_cvttsd_i32(a: __m128d) -> i32 { + unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2usi))] +pub fn _mm_cvttsd_u32(a: __m128d) -> u32 { + unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtusi2ss))] +pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 { + unsafe { + let b = b as f32; + simd_insert!(a, 0, b) + } +} + +/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtusi2sd))] +pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d { + unsafe { + let b = b as f64; + simd_insert!(a, 0, b) + } +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_comi_round_ss(a: __m128, b: __m128) -> i32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + vcomiss(a, b, IMM5, SAE) + } +} + +/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd +#[rustc_legacy_const_generics(2, 3)] +pub fn _mm_comi_round_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + vcomisd(a, b, IMM5, SAE) + } +} + +/// Equal +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00; +/// Less-than +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01; +/// Less-than-or-equal +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02; +/// False +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03; +/// Not-equal +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04; +/// Not less-than +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05; +/// Not less-than-or-equal +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06; +/// True +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07; + +/// interval [1, 2) +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00; +/// interval [0.5, 2) +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01; +/// interval [0.5, 1) +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02; +/// interval [0.75, 1.5) +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03; + +/// sign = sign(SRC) +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00; +/// sign = 0 +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01; +/// DEST = NaN if sign(SRC) = 1 +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02; + +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.sqrt.ps.512"] + fn vsqrtps(a: f32x16, rounding: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.sqrt.pd.512"] + fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8; + + #[link_name = "llvm.x86.avx512.vfmadd.ps.512"] + fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; + #[link_name = "llvm.x86.avx512.vfmadd.pd.512"] + fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; + + #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"] + fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang + #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"] + fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang + + #[link_name = "llvm.x86.avx512.add.ps.512"] + fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.add.pd.512"] + fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.sub.ps.512"] + fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.sub.pd.512"] + fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.mul.ps.512"] + fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.mul.pd.512"] + fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.div.ps.512"] + fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.div.pd.512"] + fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8; + + #[link_name = "llvm.x86.avx512.max.ps.512"] + fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.max.pd.512"] + fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.min.ps.512"] + fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.min.pd.512"] + fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8; + + #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"] + fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16; + + #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"] + fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"] + fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"] + fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"] + fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"] + fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"] + fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"] + fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"] + fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"] + fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"] + fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"] + fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"] + fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"] + fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"] + fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"] + fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"] + fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"] + fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"] + fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"] + fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"] + fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"] + fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"] + fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"] + fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"] + fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"] + fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"] + fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"] + fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"] + fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"] + fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.pternlog.d.512"] + fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.pternlog.d.256"] + fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8; + #[link_name = "llvm.x86.avx512.pternlog.d.128"] + fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4; + + #[link_name = "llvm.x86.avx512.pternlog.q.512"] + fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.pternlog.q.256"] + fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4; + #[link_name = "llvm.x86.avx512.pternlog.q.128"] + fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2; + + #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"] + fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"] + fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"] + fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"] + fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"] + fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"] + fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.rcp14.ps.512"] + fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16; + #[link_name = "llvm.x86.avx512.rcp14.ps.256"] + fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.rcp14.ps.128"] + fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.rcp14.pd.512"] + fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8; + #[link_name = "llvm.x86.avx512.rcp14.pd.256"] + fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.rcp14.pd.128"] + fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"] + fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16; + #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"] + fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"] + fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"] + fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8; + #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"] + fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"] + fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"] + fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16; + + #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"] + fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16; + #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"] + fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"] + fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4; + + #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"] + fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"] + fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8; + + #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"] + fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8; + + #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"] + fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"] + fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"] + fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4; + + #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"] + fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"] + fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16; + + #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"] + fn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16; + #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"] + fn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"] + fn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8; + + #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"] + fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16; + + #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"] + fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"] + fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"] + fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"] + fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16; + #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"] + fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"] + fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4; + + #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"] + fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"] + fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"] + fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"] + fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"] + fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"] + fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4; + + #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"] + fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmov.db.256"] + fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmov.db.128"] + fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16; + + #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"] + fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"] + fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"] + fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"] + fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"] + fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"] + fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"] + fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"] + fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"] + fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"] + fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"] + fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"] + fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"] + fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"] + fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"] + fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"] + fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"] + fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"] + fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"] + fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"] + fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"] + fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"] + fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"] + fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"] + fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"] + fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"] + fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"] + fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"] + fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"] + fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"] + fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"] + fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"] + fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"] + fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"] + fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"] + fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"] + fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"] + fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"] + fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"] + fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"] + fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"] + fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"] + fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"] + fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"] + fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"] + fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"] + fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"] + fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"] + fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"] + fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"] + fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"] + fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16; + + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"] + fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"] + fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"] + fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8; + + #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"] + fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"] + fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"] + fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16; + + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"] + fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"] + fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"] + fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"] + fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"] + fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"] + fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8; + + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"] + fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"] + fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"] + fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16; + + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"] + fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"] + fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"] + fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8; + + #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"] + fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"] + fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"] + fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16; + + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"] + fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"] + fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"] + fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4; + + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"] + fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"] + fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"] + fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8; + + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"] + fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"] + fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"] + fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16; + + #[link_name = "llvm.x86.avx512.gather.dpd.512"] + fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.gather.dps.512"] + fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.gather.qpd.512"] + fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.gather.qps.512"] + fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8; + #[link_name = "llvm.x86.avx512.gather.dpq.512"] + fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.gather.dpi.512"] + fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.gather.qpq.512"] + fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.gather.qpi.512"] + fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8; + + #[link_name = "llvm.x86.avx512.scatter.dpd.512"] + fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.dps.512"] + fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qpd.512"] + fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qps.512"] + fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.dpq.512"] + fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32); + + #[link_name = "llvm.x86.avx512.scatter.dpi.512"] + fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qpq.512"] + fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qpi.512"] + fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32); + + #[link_name = "llvm.x86.avx512.scattersiv4.si"] + fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32); + #[link_name = "llvm.x86.avx512.scattersiv2.di"] + fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32); + #[link_name = "llvm.x86.avx512.scattersiv2.df"] + fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32); + #[link_name = "llvm.x86.avx512.scattersiv4.sf"] + fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32); + #[link_name = "llvm.x86.avx512.scatterdiv4.si"] + fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32); + #[link_name = "llvm.x86.avx512.scatterdiv2.di"] + fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32); + #[link_name = "llvm.x86.avx512.scatterdiv2.df"] + fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32); + #[link_name = "llvm.x86.avx512.scatterdiv4.sf"] + fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32); + + #[link_name = "llvm.x86.avx512.scattersiv8.si"] + fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32); + #[link_name = "llvm.x86.avx512.scattersiv4.di"] + fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32); + #[link_name = "llvm.x86.avx512.scattersiv4.df"] + fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32); + #[link_name = "llvm.x86.avx512.scattersiv8.sf"] + fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatterdiv8.si"] + fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32); + #[link_name = "llvm.x86.avx512.scatterdiv4.di"] + fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32); + #[link_name = "llvm.x86.avx512.scatterdiv4.df"] + fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32); + #[link_name = "llvm.x86.avx512.scatterdiv8.sf"] + fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32); + + #[link_name = "llvm.x86.avx512.gather3siv4.si"] + fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4; + #[link_name = "llvm.x86.avx512.gather3siv2.di"] + fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2; + #[link_name = "llvm.x86.avx512.gather3siv2.df"] + fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.gather3siv4.sf"] + fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.gather3div4.si"] + fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4; + #[link_name = "llvm.x86.avx512.gather3div2.di"] + fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2; + #[link_name = "llvm.x86.avx512.gather3div2.df"] + fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.gather3div4.sf"] + fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4; + + #[link_name = "llvm.x86.avx512.gather3siv8.si"] + fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8; + #[link_name = "llvm.x86.avx512.gather3siv4.di"] + fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4; + #[link_name = "llvm.x86.avx512.gather3siv4.df"] + fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4; + #[link_name = "llvm.x86.avx512.gather3siv8.sf"] + fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8; + #[link_name = "llvm.x86.avx512.gather3div8.si"] + fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4; + #[link_name = "llvm.x86.avx512.gather3div4.di"] + fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4; + #[link_name = "llvm.x86.avx512.gather3div4.df"] + fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4; + #[link_name = "llvm.x86.avx512.gather3div8.sf"] + fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.cmp.ss"] + fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.sd"] + fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8; + + #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"] + fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16; + #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"] + fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"] + fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8; + + #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"] + fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"] + fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"] + fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8; + + #[link_name = "llvm.x86.avx512.mask.prol.d.512"] + fn vprold(a: i32x16, i8: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prol.d.256"] + fn vprold256(a: i32x8, i8: i32) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.prol.d.128"] + fn vprold128(a: i32x4, i8: i32) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.pror.d.512"] + fn vprord(a: i32x16, i8: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.pror.d.256"] + fn vprord256(a: i32x8, i8: i32) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.pror.d.128"] + fn vprord128(a: i32x4, i8: i32) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.prol.q.512"] + fn vprolq(a: i64x8, i8: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.prol.q.256"] + fn vprolq256(a: i64x4, i8: i32) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.prol.q.128"] + fn vprolq128(a: i64x2, i8: i32) -> i64x2; + + #[link_name = "llvm.x86.avx512.mask.pror.q.512"] + fn vprorq(a: i64x8, i8: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.pror.q.256"] + fn vprorq256(a: i64x4, i8: i32) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.pror.q.128"] + fn vprorq128(a: i64x2, i8: i32) -> i64x2; + + #[link_name = "llvm.x86.avx512.mask.prolv.d.512"] + fn vprolvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prolv.d.256"] + fn vprolvd256(a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.prolv.d.128"] + fn vprolvd128(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.prorv.d.512"] + fn vprorvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prorv.d.256"] + fn vprorvd256(a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.prorv.d.128"] + fn vprorvd128(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.prolv.q.512"] + fn vprolvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.prolv.q.256"] + fn vprolvq256(a: i64x4, b: i64x4) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.prolv.q.128"] + fn vprolvq128(a: i64x2, b: i64x2) -> i64x2; + + #[link_name = "llvm.x86.avx512.mask.prorv.q.512"] + fn vprorvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.prorv.q.256"] + fn vprorvq256(a: i64x4, b: i64x4) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.prorv.q.128"] + fn vprorvq128(a: i64x2, b: i64x2) -> i64x2; + + #[link_name = "llvm.x86.avx512.psllv.d.512"] + fn vpsllvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psrlv.d.512"] + fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psllv.q.512"] + fn vpsllvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.psrlv.q.512"] + fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.psll.d.512"] + fn vpslld(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psrl.d.512"] + fn vpsrld(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psll.q.512"] + fn vpsllq(a: i64x8, count: i64x2) -> i64x8; + #[link_name = "llvm.x86.avx512.psrl.q.512"] + fn vpsrlq(a: i64x8, count: i64x2) -> i64x8; + + #[link_name = "llvm.x86.avx512.psra.d.512"] + fn vpsrad(a: i32x16, count: i32x4) -> i32x16; + + #[link_name = "llvm.x86.avx512.psra.q.512"] + fn vpsraq(a: i64x8, count: i64x2) -> i64x8; + #[link_name = "llvm.x86.avx512.psra.q.256"] + fn vpsraq256(a: i64x4, count: i64x2) -> i64x4; + #[link_name = "llvm.x86.avx512.psra.q.128"] + fn vpsraq128(a: i64x2, count: i64x2) -> i64x2; + + #[link_name = "llvm.x86.avx512.psrav.d.512"] + fn vpsravd(a: i32x16, count: i32x16) -> i32x16; + + #[link_name = "llvm.x86.avx512.psrav.q.512"] + fn vpsravq(a: i64x8, count: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.psrav.q.256"] + fn vpsravq256(a: i64x4, count: i64x4) -> i64x4; + #[link_name = "llvm.x86.avx512.psrav.q.128"] + fn vpsravq128(a: i64x2, count: i64x2) -> i64x2; + + #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"] + fn vpermilps(a: f32x16, b: i32x16) -> f32x16; + #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"] + fn vpermilpd(a: f64x8, b: i64x8) -> f64x8; + + #[link_name = "llvm.x86.avx512.permvar.si.512"] + fn vpermd(a: i32x16, idx: i32x16) -> i32x16; + + #[link_name = "llvm.x86.avx512.permvar.di.512"] + fn vpermq(a: i64x8, idx: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.permvar.di.256"] + fn vpermq256(a: i64x4, idx: i64x4) -> i64x4; + + #[link_name = "llvm.x86.avx512.permvar.sf.512"] + fn vpermps(a: f32x16, idx: i32x16) -> f32x16; + + #[link_name = "llvm.x86.avx512.permvar.df.512"] + fn vpermpd(a: f64x8, idx: i64x8) -> f64x8; + #[link_name = "llvm.x86.avx512.permvar.df.256"] + fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4; + + #[link_name = "llvm.x86.avx512.vpermi2var.d.512"] + fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.vpermi2var.d.256"] + fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx512.vpermi2var.d.128"] + fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.avx512.vpermi2var.q.512"] + fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.vpermi2var.q.256"] + fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4; + #[link_name = "llvm.x86.avx512.vpermi2var.q.128"] + fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2; + + #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"] + fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16; + #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"] + fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8; + #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"] + fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4; + + #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"] + fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8; + #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"] + fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4; + #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"] + fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2; + + #[link_name = "llvm.x86.avx512.mask.compress.d.512"] + fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.compress.d.256"] + fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.compress.d.128"] + fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.compress.q.512"] + fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.compress.q.256"] + fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.compress.q.128"] + fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2; + + #[link_name = "llvm.x86.avx512.mask.compress.ps.512"] + fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16; + #[link_name = "llvm.x86.avx512.mask.compress.ps.256"] + fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.compress.ps.128"] + fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.compress.pd.512"] + fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8; + #[link_name = "llvm.x86.avx512.mask.compress.pd.256"] + fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.compress.pd.128"] + fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"] + fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"] + fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"] + fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"] + fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"] + fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"] + fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"] + fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"] + fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"] + fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"] + fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"] + fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"] + fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.expand.d.512"] + fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.expand.d.256"] + fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.expand.d.128"] + fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4; + + #[link_name = "llvm.x86.avx512.mask.expand.q.512"] + fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.expand.q.256"] + fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.expand.q.128"] + fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2; + + #[link_name = "llvm.x86.avx512.mask.expand.ps.512"] + fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16; + #[link_name = "llvm.x86.avx512.mask.expand.ps.256"] + fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.expand.ps.128"] + fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4; + + #[link_name = "llvm.x86.avx512.mask.expand.pd.512"] + fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8; + #[link_name = "llvm.x86.avx512.mask.expand.pd.256"] + fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.expand.pd.128"] + fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.mask.add.ss.round"] + fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.add.sd.round"] + fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.sub.ss.round"] + fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.sub.sd.round"] + fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.mul.ss.round"] + fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.mul.sd.round"] + fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.div.ss.round"] + fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.div.sd.round"] + fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.max.ss.round"] + fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.max.sd.round"] + fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.min.ss.round"] + fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.min.sd.round"] + fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.sqrt.ss"] + fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128; + #[link_name = "llvm.x86.avx512.mask.sqrt.sd"] + fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d; + #[link_name = "llvm.x86.avx512.mask.getexp.ss"] + fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.getexp.sd"] + fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.getmant.ss"] + fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.getmant.sd"] + fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2; + + #[link_name = "llvm.x86.avx512.rsqrt14.ss"] + fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4; + #[link_name = "llvm.x86.avx512.rsqrt14.sd"] + fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2; + #[link_name = "llvm.x86.avx512.rcp14.ss"] + fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4; + #[link_name = "llvm.x86.avx512.rcp14.sd"] + fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2; + + #[link_name = "llvm.x86.avx512.mask.rndscale.ss"] + fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.rndscale.sd"] + fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.scalef.ss"] + fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.scalef.sd"] + fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2; + + #[link_name = "llvm.x86.avx512.vfmadd.f32"] + fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32; + #[link_name = "llvm.x86.avx512.vfmadd.f64"] + fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64; + + #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"] + fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"] + fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"] + fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"] + fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2; + + #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"] + fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"] + fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4; + + #[link_name = "llvm.x86.avx512.vcvtss2si32"] + fn vcvtss2si(a: f32x4, rounding: i32) -> i32; + #[link_name = "llvm.x86.avx512.vcvtss2usi32"] + fn vcvtss2usi(a: f32x4, rounding: i32) -> u32; + + #[link_name = "llvm.x86.avx512.vcvtsd2si32"] + fn vcvtsd2si(a: f64x2, rounding: i32) -> i32; + #[link_name = "llvm.x86.avx512.vcvtsd2usi32"] + fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32; + + #[link_name = "llvm.x86.avx512.cvtsi2ss32"] + fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4; + + #[link_name = "llvm.x86.avx512.cvtusi2ss"] + fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4; + + #[link_name = "llvm.x86.avx512.cvttss2si"] + fn vcvttss2si(a: f32x4, rounding: i32) -> i32; + #[link_name = "llvm.x86.avx512.cvttss2usi"] + fn vcvttss2usi(a: f32x4, rounding: i32) -> u32; + + #[link_name = "llvm.x86.avx512.cvttsd2si"] + fn vcvttsd2si(a: f64x2, rounding: i32) -> i32; + #[link_name = "llvm.x86.avx512.cvttsd2usi"] + fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32; + + #[link_name = "llvm.x86.avx512.vcomi.ss"] + fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32; + #[link_name = "llvm.x86.avx512.vcomi.sd"] + fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32; + + #[link_name = "llvm.x86.avx512.mask.loadu.d.128"] + fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.loadu.q.128"] + fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2; + #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"] + fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"] + fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.loadu.d.256"] + fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.loadu.q.256"] + fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"] + fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"] + fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.loadu.d.512"] + fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.loadu.q.512"] + fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"] + fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16; + #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"] + fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8; + + #[link_name = "llvm.x86.avx512.mask.load.d.128"] + fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.load.q.128"] + fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2; + #[link_name = "llvm.x86.avx512.mask.load.ps.128"] + fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.load.pd.128"] + fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.load.d.256"] + fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.load.q.256"] + fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.load.ps.256"] + fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.load.pd.256"] + fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.load.d.512"] + fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.load.q.512"] + fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.load.ps.512"] + fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16; + #[link_name = "llvm.x86.avx512.mask.load.pd.512"] + fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8; + + #[link_name = "llvm.x86.avx512.mask.storeu.d.128"] + fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.q.128"] + fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"] + fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"] + fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.d.256"] + fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.q.256"] + fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"] + fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"] + fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.d.512"] + fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.storeu.q.512"] + fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"] + fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"] + fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.store.d.128"] + fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.store.q.128"] + fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8); + #[link_name = "llvm.x86.avx512.mask.store.ps.128"] + fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.store.pd.128"] + fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8); + #[link_name = "llvm.x86.avx512.mask.store.d.256"] + fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.store.q.256"] + fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.store.ps.256"] + fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.store.pd.256"] + fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.store.d.512"] + fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.store.q.512"] + fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.store.ps.512"] + fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.store.pd.512"] + fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"] + fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"] + fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2; + #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"] + fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4; + #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"] + fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2; + #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"] + fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"] + fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4; + #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"] + fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"] + fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4; + #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"] + fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"] + fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"] + fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16; + #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"] + fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8; + +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::hint::black_box; + use crate::mem::{self}; + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm512_abs_epi32(a); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + 0, 1, 1, i32::MAX, + i32::MAX.wrapping_add(1), 100, 100, 32, + 0, 1, 1, i32::MAX, + i32::MAX.wrapping_add(1), 100, 100, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm512_mask_abs_epi32(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + 0, 1, 1, i32::MAX, + i32::MAX.wrapping_add(1), 100, 100, 32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_abs_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm512_maskz_abs_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + 0, 1, 1, i32::MAX, + i32::MAX.wrapping_add(1), 100, 100, 32, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_abs_epi32() { + #[rustfmt::skip] + let a = _mm256_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm256_mask_abs_epi32(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_abs_epi32(a, 0b00001111, a); + #[rustfmt::skip] + let e = _mm256_setr_epi32( + 0, 1, 1, i32::MAX, + i32::MAX.wrapping_add(1), 100, -100, -32, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_abs_epi32() { + #[rustfmt::skip] + let a = _mm256_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm256_maskz_abs_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_abs_epi32(0b00001111, a); + #[rustfmt::skip] + let e = _mm256_setr_epi32( + 0, 1, 1, i32::MAX, + 0, 0, 0, 0, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_abs_epi32() { + let a = _mm_setr_epi32(i32::MIN, 100, -100, -32); + let r = _mm_mask_abs_epi32(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_abs_epi32(a, 0b00001111, a); + let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_abs_epi32() { + let a = _mm_setr_epi32(i32::MIN, 100, -100, -32); + let r = _mm_maskz_abs_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_abs_epi32(0b00001111, a); + let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let r = _mm512_abs_ps(a); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 1., 1., f32::MAX, + f32::MAX, 100., 100., 32., + 0., 1., 1., f32::MAX, + f32::MAX, 100., 100., 32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let r = _mm512_mask_abs_ps(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 1., 1., f32::MAX, + f32::MAX, 100., 100., 32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mov_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(2); + let r = _mm512_mask_mov_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mov_epi32() { + let a = _mm512_set1_epi32(2); + let r = _mm512_maskz_mov_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mov_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(2); + let r = _mm256_mask_mov_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_mov_epi32(src, 0b11111111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mov_epi32() { + let a = _mm256_set1_epi32(2); + let r = _mm256_maskz_mov_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mov_epi32(0b11111111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mov_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(2); + let r = _mm_mask_mov_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_mov_epi32(src, 0b00001111, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mov_epi32() { + let a = _mm_set1_epi32(2); + let r = _mm_maskz_mov_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mov_epi32(0b00001111, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mov_ps() { + let src = _mm512_set1_ps(1.); + let a = _mm512_set1_ps(2.); + let r = _mm512_mask_mov_ps(src, 0, a); + assert_eq_m512(r, src); + let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a); + assert_eq_m512(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mov_ps() { + let a = _mm512_set1_ps(2.); + let r = _mm512_maskz_mov_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_mov_ps(0b11111111_11111111, a); + assert_eq_m512(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mov_ps() { + let src = _mm256_set1_ps(1.); + let a = _mm256_set1_ps(2.); + let r = _mm256_mask_mov_ps(src, 0, a); + assert_eq_m256(r, src); + let r = _mm256_mask_mov_ps(src, 0b11111111, a); + assert_eq_m256(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mov_ps() { + let a = _mm256_set1_ps(2.); + let r = _mm256_maskz_mov_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_mov_ps(0b11111111, a); + assert_eq_m256(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mov_ps() { + let src = _mm_set1_ps(1.); + let a = _mm_set1_ps(2.); + let r = _mm_mask_mov_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_mov_ps(src, 0b00001111, a); + assert_eq_m128(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mov_ps() { + let a = _mm_set1_ps(2.); + let r = _mm_maskz_mov_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_mov_ps(0b00001111, a); + assert_eq_m128(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_add_epi32(a, b); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + 1, 2, 0, i32::MIN, + i32::MIN + 1, 101, -99, -31, + 1, 2, 0, i32::MIN, + i32::MIN + 1, 101, -99, -31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_mask_add_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + 1, 2, 0, i32::MIN, + i32::MIN + 1, 101, -99, -31, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_maskz_add_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + 1, 2, 0, i32::MIN, + i32::MIN + 1, 101, -99, -31, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_add_epi32() { + let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32); + let b = _mm256_set1_epi32(1); + let r = _mm256_mask_add_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_add_epi32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_add_epi32() { + let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32); + let b = _mm256_set1_epi32(1); + let r = _mm256_maskz_add_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_add_epi32(0b11111111, a, b); + let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_add_epi32() { + let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN); + let b = _mm_set1_epi32(1); + let r = _mm_mask_add_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_add_epi32(a, 0b00001111, a, b); + let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_add_epi32() { + let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN); + let b = _mm_set1_epi32(1); + let r = _mm_maskz_add_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_add_epi32(0b00001111, a, b); + let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_add_ps(a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 1., 2., 0., f32::MAX, + f32::MIN + 1., 101., -99., -31., + 1., 2., 0., f32::MAX, + f32::MIN + 1., 101., -99., -31., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_mask_add_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 1., 2., 0., f32::MAX, + f32::MIN + 1., 101., -99., -31., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_maskz_add_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 1., 2., 0., f32::MAX, + f32::MIN + 1., 101., -99., -31., + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_add_ps() { + let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.); + let b = _mm256_set1_ps(1.); + let r = _mm256_mask_add_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_add_ps(a, 0b11111111, a, b); + let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_add_ps() { + let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.); + let b = _mm256_set1_ps(1.); + let r = _mm256_maskz_add_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_add_ps(0b11111111, a, b); + let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_add_ps() { + let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN); + let b = _mm_set1_ps(1.); + let r = _mm_mask_add_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_add_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_add_ps() { + let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN); + let b = _mm_set1_ps(1.); + let r = _mm_maskz_add_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_add_ps(0b00001111, a, b); + let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_sub_epi32(a, b); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + -1, 0, -2, i32::MAX - 1, + i32::MAX, 99, -101, -33, + -1, 0, -2, i32::MAX - 1, + i32::MAX, 99, -101, -33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_mask_sub_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + -1, 0, -2, i32::MAX - 1, + i32::MAX, 99, -101, -33, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_maskz_sub_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + -1, 0, -2, i32::MAX - 1, + i32::MAX, 99, -101, -33, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sub_epi32() { + let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32); + let b = _mm256_set1_epi32(1); + let r = _mm256_mask_sub_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sub_epi32() { + let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32); + let b = _mm256_set1_epi32(1); + let r = _mm256_maskz_sub_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sub_epi32(0b11111111, a, b); + let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sub_epi32() { + let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN); + let b = _mm_set1_epi32(1); + let r = _mm_mask_sub_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_sub_epi32(a, 0b00001111, a, b); + let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sub_epi32() { + let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN); + let b = _mm_set1_epi32(1); + let r = _mm_maskz_sub_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sub_epi32(0b00001111, a, b); + let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_sub_ps(a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -1., 0., -2., f32::MAX - 1., + f32::MIN, 99., -101., -33., + -1., 0., -2., f32::MAX - 1., + f32::MIN, 99., -101., -33., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_mask_sub_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -1., 0., -2., f32::MAX - 1., + f32::MIN, 99., -101., -33., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_maskz_sub_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -1., 0., -2., f32::MAX - 1., + f32::MIN, 99., -101., -33., + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sub_ps() { + let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.); + let b = _mm256_set1_ps(1.); + let r = _mm256_mask_sub_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_sub_ps(a, 0b11111111, a, b); + let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sub_ps() { + let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.); + let b = _mm256_set1_ps(1.); + let r = _mm256_maskz_sub_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_sub_ps(0b11111111, a, b); + let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sub_ps() { + let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN); + let b = _mm_set1_ps(1.); + let r = _mm_mask_sub_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_sub_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sub_ps() { + let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN); + let b = _mm_set1_ps(1.); + let r = _mm_maskz_sub_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_sub_ps(0b00001111, a, b); + let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mullo_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(2); + let r = _mm512_mullo_epi32(a, b); + let e = _mm512_setr_epi32( + 0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mullo_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(2); + let r = _mm512_mask_mullo_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_epi32( + 0, 2, -2, -2, + 0, 200, -200, -64, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mullo_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(2); + let r = _mm512_maskz_mullo_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mullo_epi32() { + let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32); + let b = _mm256_set1_epi32(2); + let r = _mm256_mask_mullo_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mullo_epi32() { + let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32); + let b = _mm256_set1_epi32(2); + let r = _mm256_maskz_mullo_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mullo_epi32(0b11111111, a, b); + let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mullo_epi32() { + let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN); + let b = _mm_set1_epi32(2); + let r = _mm_mask_mullo_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b); + let e = _mm_set_epi32(2, -2, -2, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mullo_epi32() { + let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN); + let b = _mm_set1_epi32(2); + let r = _mm_maskz_mullo_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mullo_epi32(0b00001111, a, b); + let e = _mm_set_epi32(2, -2, -2, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let b = _mm512_set1_ps(2.); + let r = _mm512_mul_ps(a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 2., -2., f32::INFINITY, + f32::NEG_INFINITY, 200., -200., -64., + 0., 2., -2., f32::INFINITY, + f32::NEG_INFINITY, 200., -200., + -64., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let b = _mm512_set1_ps(2.); + let r = _mm512_mask_mul_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 2., -2., f32::INFINITY, + f32::NEG_INFINITY, 200., -200., -64., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let b = _mm512_set1_ps(2.); + let r = _mm512_maskz_mul_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 2., -2., f32::INFINITY, + f32::NEG_INFINITY, 200., -200., -64., + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mul_ps() { + let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.); + let b = _mm256_set1_ps(2.); + let r = _mm256_mask_mul_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_mul_ps(a, 0b11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_ps( + 0., 2., -2., f32::INFINITY, + f32::NEG_INFINITY, 200., -200., -64., + ); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mul_ps() { + let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.); + let b = _mm256_set1_ps(2.); + let r = _mm256_maskz_mul_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_mul_ps(0b11111111, a, b); + #[rustfmt::skip] + let e = _mm256_set_ps( + 0., 2., -2., f32::INFINITY, + f32::NEG_INFINITY, 200., -200., -64., + ); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mul_ps() { + let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN); + let b = _mm_set1_ps(2.); + let r = _mm_mask_mul_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_mul_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mul_ps() { + let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN); + let b = _mm_set1_ps(2.); + let r = _mm_maskz_mul_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_mul_ps(0b00001111, a, b); + let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_div_ps() { + let a = _mm512_setr_ps( + 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32., + ); + let b = _mm512_setr_ps( + 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2., + ); + let r = _mm512_div_ps(a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 0.5, -0.5, -1., + 50., f32::INFINITY, -50., -16., + 0., 0.5, -0.5, 500., + f32::NEG_INFINITY, 50., -50., -16., + ); + assert_eq_m512(r, e); // 0/0 = NAN + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_div_ps() { + let a = _mm512_setr_ps( + 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32., + ); + let b = _mm512_setr_ps( + 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2., + ); + let r = _mm512_mask_div_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 0.5, -0.5, -1., + 50., f32::INFINITY, -50., -16., + 0., 1., -1., 1000., + -131., 100., -100., -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_div_ps() { + let a = _mm512_setr_ps( + 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32., + ); + let b = _mm512_setr_ps( + 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2., + ); + let r = _mm512_maskz_div_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 0.5, -0.5, -1., + 50., f32::INFINITY, -50., -16., + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_div_ps() { + let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.); + let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.); + let r = _mm256_mask_div_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_div_ps(a, 0b11111111, a, b); + let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_div_ps() { + let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.); + let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.); + let r = _mm256_maskz_div_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_div_ps(0b11111111, a, b); + let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_div_ps() { + let a = _mm_set_ps(100., 100., -100., -32.); + let b = _mm_set_ps(2., 0., 2., 2.); + let r = _mm_mask_div_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_div_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(50., f32::INFINITY, -50., -16.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_div_ps() { + let a = _mm_set_ps(100., 100., -100., -32.); + let b = _mm_set_ps(2., 0., 2., 2.); + let r = _mm_maskz_div_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_div_ps(0b00001111, a, b); + let e = _mm_set_ps(50., f32::INFINITY, -50., -16.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epi32(a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_max_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epi32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_max_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epi32(0b11111111, a, b); + let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let b = _mm_set_epi32(3, 2, 1, 0); + let r = _mm_mask_max_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epi32(a, 0b00001111, a, b); + let e = _mm_set_epi32(3, 2, 2, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let b = _mm_set_epi32(3, 2, 1, 0); + let r = _mm_maskz_max_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epi32(0b00001111, a, b); + let e = _mm_set_epi32(3, 2, 2, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_max_ps(a, b); + let e = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_mask_max_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_maskz_max_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm256_mask_max_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_max_ps(a, 0b11111111, a, b); + let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm256_maskz_max_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_max_ps(0b11111111, a, b); + let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(3., 2., 1., 0.); + let r = _mm_mask_max_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_max_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(3., 2., 2., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(3., 2., 1., 0.); + let r = _mm_maskz_max_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_mask_max_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(3., 2., 2., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epu32(a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epu32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epu32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epu32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_max_epu32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epu32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epu32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_max_epu32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epu32(0b11111111, a, b); + let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epu32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let b = _mm_set_epi32(3, 2, 1, 0); + let r = _mm_mask_max_epu32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epu32(a, 0b00001111, a, b); + let e = _mm_set_epi32(3, 2, 2, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epu32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let b = _mm_set_epi32(3, 2, 1, 0); + let r = _mm_maskz_max_epu32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epu32(0b00001111, a, b); + let e = _mm_set_epi32(3, 2, 2, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epi32(a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_min_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epi32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_min_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epi32(0b11111111, a, b); + let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let b = _mm_set_epi32(3, 2, 1, 0); + let r = _mm_mask_min_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epi32(a, 0b00001111, a, b); + let e = _mm_set_epi32(0, 1, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let b = _mm_set_epi32(3, 2, 1, 0); + let r = _mm_maskz_min_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epi32(0b00001111, a, b); + let e = _mm_set_epi32(0, 1, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_min_ps(a, b); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_mask_min_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_maskz_min_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm256_mask_min_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_min_ps(a, 0b11111111, a, b); + let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm256_maskz_min_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_min_ps(0b11111111, a, b); + let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(3., 2., 1., 0.); + let r = _mm_mask_min_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_min_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(0., 1., 1., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(3., 2., 1., 0.); + let r = _mm_maskz_min_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_min_ps(0b00001111, a, b); + let e = _mm_set_ps(0., 1., 1., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epu32(a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epu32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epu32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epu32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_mask_min_epu32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epu32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epu32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_maskz_min_epu32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epu32(0b11111111, a, b); + let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epu32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let b = _mm_set_epi32(3, 2, 1, 0); + let r = _mm_mask_min_epu32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epu32(a, 0b00001111, a, b); + let e = _mm_set_epi32(0, 1, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epu32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let b = _mm_set_epi32(3, 2, 1, 0); + let r = _mm_maskz_min_epu32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epu32(0b00001111, a, b); + let e = _mm_set_epi32(0, 1, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sqrt_ps() { + let a = _mm512_setr_ps( + 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225., + ); + let r = _mm512_sqrt_ps(a); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sqrt_ps() { + let a = _mm512_setr_ps( + 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225., + ); + let r = _mm512_mask_sqrt_ps(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sqrt_ps() { + let a = _mm512_setr_ps( + 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225., + ); + let r = _mm512_maskz_sqrt_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sqrt_ps() { + let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm256_mask_sqrt_ps(a, 0, a); + assert_eq_m256(r, a); + let r = _mm256_mask_sqrt_ps(a, 0b11111111, a); + let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sqrt_ps() { + let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm256_maskz_sqrt_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_sqrt_ps(0b11111111, a); + let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sqrt_ps() { + let a = _mm_set_ps(0., 1., 4., 9.); + let r = _mm_mask_sqrt_ps(a, 0, a); + assert_eq_m128(r, a); + let r = _mm_mask_sqrt_ps(a, 0b00001111, a); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sqrt_ps() { + let a = _mm_set_ps(0., 1., 4., 9.); + let r = _mm_maskz_sqrt_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_sqrt_ps(0b00001111, a); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_fmadd_ps(a, b, c); + let e = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask_fmadd_ps(a, 0, b, c); + assert_eq_m512(r, a); + let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c); + let e = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_maskz_fmadd_ps(0, a, b, c); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c); + let e = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(2.); + let r = _mm512_mask3_fmadd_ps(a, b, c, 0); + assert_eq_m512(r, c); + let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111); + let e = _mm512_setr_ps( + 2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmadd_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask_fmadd_ps(a, 0, b, c); + assert_eq_m256(r, a); + let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmadd_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_maskz_fmadd_ps(0, a, b, c); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmadd_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask3_fmadd_ps(a, b, c, 0); + assert_eq_m256(r, c); + let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmadd_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask_fmadd_ps(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmadd_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_maskz_fmadd_ps(0, a, b, c); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmadd_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask3_fmadd_ps(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsub_ps() { + let a = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + ); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + ); + let r = _mm512_fmsub_ps(a, b, c); + let e = _mm512_setr_ps( + -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask_fmsub_ps(a, 0, b, c); + assert_eq_m512(r, a); + let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c); + let e = _mm512_setr_ps( + -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_maskz_fmsub_ps(0, a, b, c); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c); + let e = _mm512_setr_ps( + -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., + ); + let r = _mm512_mask3_fmsub_ps(a, b, c, 0); + assert_eq_m512(r, c); + let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111); + let e = _mm512_setr_ps( + -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmsub_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask_fmsub_ps(a, 0, b, c); + assert_eq_m256(r, a); + let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c); + let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmsub_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_maskz_fmsub_ps(0, a, b, c); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c); + let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmsub_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask3_fmsub_ps(a, b, c, 0); + assert_eq_m256(r, c); + let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111); + let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmsub_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask_fmsub_ps(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c); + let e = _mm_set_ps(-1., 0., 1., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmsub_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_maskz_fmsub_ps(0, a, b, c); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c); + let e = _mm_set_ps(-1., 0., 1., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmsub_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask3_fmsub_ps(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111); + let e = _mm_set_ps(-1., 0., 1., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmaddsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_fmaddsub_ps(a, b, c); + let e = _mm512_setr_ps( + -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmaddsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask_fmaddsub_ps(a, 0, b, c); + assert_eq_m512(r, a); + let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c); + let e = _mm512_setr_ps( + -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmaddsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_maskz_fmaddsub_ps(0, a, b, c); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c); + let e = _mm512_setr_ps( + -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmaddsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., + ); + let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0); + assert_eq_m512(r, c); + let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111); + let e = _mm512_setr_ps( + -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmaddsub_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask_fmaddsub_ps(a, 0, b, c); + assert_eq_m256(r, a); + let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c); + let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmaddsub_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_maskz_fmaddsub_ps(0, a, b, c); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c); + let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmaddsub_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0); + assert_eq_m256(r, c); + let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111); + let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmaddsub_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask_fmaddsub_ps(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c); + let e = _mm_set_ps(1., 0., 3., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmaddsub_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_maskz_fmaddsub_ps(0, a, b, c); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c); + let e = _mm_set_ps(1., 0., 3., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmaddsub_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask3_fmaddsub_ps(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111); + let e = _mm_set_ps(1., 0., 3., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsubadd_ps() { + let a = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + ); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + ); + let r = _mm512_fmsubadd_ps(a, b, c); + let e = _mm512_setr_ps( + 1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsubadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask_fmsubadd_ps(a, 0, b, c); + assert_eq_m512(r, a); + let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c); + let e = _mm512_setr_ps( + 1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsubadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_maskz_fmsubadd_ps(0, a, b, c); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c); + let e = _mm512_setr_ps( + 1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsubadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., + ); + let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0); + assert_eq_m512(r, c); + let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111); + let e = _mm512_setr_ps( + 1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmsubadd_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask_fmsubadd_ps(a, 0, b, c); + assert_eq_m256(r, a); + let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c); + let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmsubadd_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_maskz_fmsubadd_ps(0, a, b, c); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c); + let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmsubadd_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0); + assert_eq_m256(r, c); + let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111); + let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmsubadd_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask_fmsubadd_ps(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c); + let e = _mm_set_ps(-1., 2., 1., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmsubadd_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_maskz_fmsubadd_ps(0, a, b, c); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c); + let e = _mm_set_ps(-1., 2., 1., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmsubadd_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask3_fmsubadd_ps(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111); + let e = _mm_set_ps(-1., 2., 1., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_fnmadd_ps(a, b, c); + let e = _mm512_setr_ps( + 1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask_fnmadd_ps(a, 0, b, c); + assert_eq_m512(r, a); + let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c); + let e = _mm512_setr_ps( + 1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_maskz_fnmadd_ps(0, a, b, c); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c); + let e = _mm512_setr_ps( + 1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmadd_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., + ); + let r = _mm512_mask3_fnmadd_ps(a, b, c, 0); + assert_eq_m512(r, c); + let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111); + let e = _mm512_setr_ps( + 1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fnmadd_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask_fnmadd_ps(a, 0, b, c); + assert_eq_m256(r, a); + let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c); + let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fnmadd_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_maskz_fnmadd_ps(0, a, b, c); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c); + let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fnmadd_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask3_fnmadd_ps(a, b, c, 0); + assert_eq_m256(r, c); + let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111); + let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fnmadd_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask_fnmadd_ps(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c); + let e = _mm_set_ps(1., 0., -1., -2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fnmadd_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_maskz_fnmadd_ps(0, a, b, c); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c); + let e = _mm_set_ps(1., 0., -1., -2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fnmadd_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask3_fnmadd_ps(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111); + let e = _mm_set_ps(1., 0., -1., -2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_fnmsub_ps(a, b, c); + let e = _mm512_setr_ps( + -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask_fnmsub_ps(a, 0, b, c); + assert_eq_m512(r, a); + let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c); + let e = _mm512_setr_ps( + -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_set1_ps(1.); + let r = _mm512_maskz_fnmsub_ps(0, a, b, c); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c); + let e = _mm512_setr_ps( + -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmsub_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let c = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., + ); + let r = _mm512_mask3_fnmsub_ps(a, b, c, 0); + assert_eq_m512(r, c); + let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111); + let e = _mm512_setr_ps( + -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fnmsub_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask_fnmsub_ps(a, 0, b, c); + assert_eq_m256(r, a); + let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c); + let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fnmsub_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_maskz_fnmsub_ps(0, a, b, c); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c); + let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fnmsub_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm256_set1_ps(1.); + let r = _mm256_mask3_fnmsub_ps(a, b, c, 0); + assert_eq_m256(r, c); + let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111); + let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fnmsub_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask_fnmsub_ps(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c); + let e = _mm_set_ps(-1., -2., -3., -4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fnmsub_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_maskz_fnmsub_ps(0, a, b, c); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c); + let e = _mm_set_ps(-1., -2., -3., -4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fnmsub_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set_ps(0., 1., 2., 3.); + let c = _mm_set1_ps(1.); + let r = _mm_mask3_fnmsub_ps(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111); + let e = _mm_set_ps(-1., -2., -3., -4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rcp14_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_rcp14_ps(a); + let e = _mm512_set1_ps(0.33333206); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rcp14_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_mask_rcp14_ps(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a); + let e = _mm512_setr_ps( + 3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206, + 0.33333206, 0.33333206, 0.33333206, 0.33333206, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rcp14_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_maskz_rcp14_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a); + let e = _mm512_setr_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206, + 0.33333206, 0.33333206, 0.33333206, 0.33333206, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rcp14_ps() { + let a = _mm256_set1_ps(3.); + let r = _mm256_rcp14_ps(a); + let e = _mm256_set1_ps(0.33333206); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rcp14_ps() { + let a = _mm256_set1_ps(3.); + let r = _mm256_mask_rcp14_ps(a, 0, a); + assert_eq_m256(r, a); + let r = _mm256_mask_rcp14_ps(a, 0b11111111, a); + let e = _mm256_set1_ps(0.33333206); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rcp14_ps() { + let a = _mm256_set1_ps(3.); + let r = _mm256_maskz_rcp14_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_rcp14_ps(0b11111111, a); + let e = _mm256_set1_ps(0.33333206); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rcp14_ps() { + let a = _mm_set1_ps(3.); + let r = _mm_rcp14_ps(a); + let e = _mm_set1_ps(0.33333206); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rcp14_ps() { + let a = _mm_set1_ps(3.); + let r = _mm_mask_rcp14_ps(a, 0, a); + assert_eq_m128(r, a); + let r = _mm_mask_rcp14_ps(a, 0b00001111, a); + let e = _mm_set1_ps(0.33333206); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rcp14_ps() { + let a = _mm_set1_ps(3.); + let r = _mm_maskz_rcp14_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_rcp14_ps(0b00001111, a); + let e = _mm_set1_ps(0.33333206); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rsqrt14_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_rsqrt14_ps(a); + let e = _mm512_set1_ps(0.5773392); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rsqrt14_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_mask_rsqrt14_ps(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a); + let e = _mm512_setr_ps( + 3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392, + 0.5773392, 0.5773392, 0.5773392, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rsqrt14_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_maskz_rsqrt14_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a); + let e = _mm512_setr_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392, + 0.5773392, 0.5773392, 0.5773392, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rsqrt14_ps() { + let a = _mm256_set1_ps(3.); + let r = _mm256_rsqrt14_ps(a); + let e = _mm256_set1_ps(0.5773392); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rsqrt14_ps() { + let a = _mm256_set1_ps(3.); + let r = _mm256_mask_rsqrt14_ps(a, 0, a); + assert_eq_m256(r, a); + let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a); + let e = _mm256_set1_ps(0.5773392); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rsqrt14_ps() { + let a = _mm256_set1_ps(3.); + let r = _mm256_maskz_rsqrt14_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_rsqrt14_ps(0b11111111, a); + let e = _mm256_set1_ps(0.5773392); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rsqrt14_ps() { + let a = _mm_set1_ps(3.); + let r = _mm_rsqrt14_ps(a); + let e = _mm_set1_ps(0.5773392); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rsqrt14_ps() { + let a = _mm_set1_ps(3.); + let r = _mm_mask_rsqrt14_ps(a, 0, a); + assert_eq_m128(r, a); + let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a); + let e = _mm_set1_ps(0.5773392); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rsqrt14_ps() { + let a = _mm_set1_ps(3.); + let r = _mm_maskz_rsqrt14_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_rsqrt14_ps(0b00001111, a); + let e = _mm_set1_ps(0.5773392); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getexp_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_getexp_ps(a); + let e = _mm512_set1_ps(1.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getexp_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_mask_getexp_ps(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a); + let e = _mm512_setr_ps( + 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getexp_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_maskz_getexp_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a); + let e = _mm512_setr_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_getexp_ps() { + let a = _mm256_set1_ps(3.); + let r = _mm256_getexp_ps(a); + let e = _mm256_set1_ps(1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_getexp_ps() { + let a = _mm256_set1_ps(3.); + let r = _mm256_mask_getexp_ps(a, 0, a); + assert_eq_m256(r, a); + let r = _mm256_mask_getexp_ps(a, 0b11111111, a); + let e = _mm256_set1_ps(1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_getexp_ps() { + let a = _mm256_set1_ps(3.); + let r = _mm256_maskz_getexp_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_getexp_ps(0b11111111, a); + let e = _mm256_set1_ps(1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_getexp_ps() { + let a = _mm_set1_ps(3.); + let r = _mm_getexp_ps(a); + let e = _mm_set1_ps(1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_getexp_ps() { + let a = _mm_set1_ps(3.); + let r = _mm_mask_getexp_ps(a, 0, a); + assert_eq_m128(r, a); + let r = _mm_mask_getexp_ps(a, 0b00001111, a); + let e = _mm_set1_ps(1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_getexp_ps() { + let a = _mm_set1_ps(3.); + let r = _mm_maskz_getexp_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_getexp_ps(0b00001111, a); + let e = _mm_set1_ps(1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_roundscale_ps() { + let a = _mm512_set1_ps(1.1); + let r = _mm512_roundscale_ps::<0b00_00_00_00>(a); + let e = _mm512_set1_ps(1.0); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_roundscale_ps() { + let a = _mm512_set1_ps(1.1); + let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a); + let e = _mm512_set1_ps(1.1); + assert_eq_m512(r, e); + let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a); + let e = _mm512_set1_ps(1.0); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_roundscale_ps() { + let a = _mm512_set1_ps(1.1); + let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a); + let e = _mm512_set1_ps(1.0); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_roundscale_ps() { + let a = _mm256_set1_ps(1.1); + let r = _mm256_roundscale_ps::<0b00_00_00_00>(a); + let e = _mm256_set1_ps(1.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_roundscale_ps() { + let a = _mm256_set1_ps(1.1); + let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a); + let e = _mm256_set1_ps(1.1); + assert_eq_m256(r, e); + let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a); + let e = _mm256_set1_ps(1.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_roundscale_ps() { + let a = _mm256_set1_ps(1.1); + let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a); + let e = _mm256_set1_ps(1.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_roundscale_ps() { + let a = _mm_set1_ps(1.1); + let r = _mm_roundscale_ps::<0b00_00_00_00>(a); + let e = _mm_set1_ps(1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_roundscale_ps() { + let a = _mm_set1_ps(1.1); + let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a); + let e = _mm_set1_ps(1.1); + assert_eq_m128(r, e); + let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a); + let e = _mm_set1_ps(1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_roundscale_ps() { + let a = _mm_set1_ps(1.1); + let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a); + let e = _mm_set1_ps(1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_scalef_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(3.); + let r = _mm512_scalef_ps(a, b); + let e = _mm512_set1_ps(8.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_scalef_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(3.); + let r = _mm512_mask_scalef_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b); + let e = _mm512_set_ps( + 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_scalef_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(3.); + let r = _mm512_maskz_scalef_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b); + let e = _mm512_set_ps( + 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_scalef_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set1_ps(3.); + let r = _mm256_scalef_ps(a, b); + let e = _mm256_set1_ps(8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_scalef_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set1_ps(3.); + let r = _mm256_mask_scalef_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b); + let e = _mm256_set1_ps(8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_scalef_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set1_ps(3.); + let r = _mm256_maskz_scalef_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_scalef_ps(0b11111111, a, b); + let e = _mm256_set1_ps(8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_scalef_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(3.); + let r = _mm_scalef_ps(a, b); + let e = _mm_set1_ps(8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_scalef_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(3.); + let r = _mm_mask_scalef_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_scalef_ps(a, 0b00001111, a, b); + let e = _mm_set1_ps(8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_scalef_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(3.); + let r = _mm_maskz_scalef_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_scalef_ps(0b00001111, a, b); + let e = _mm_set1_ps(8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fixupimm_ps() { + let a = _mm512_set1_ps(f32::NAN); + let b = _mm512_set1_ps(f32::MAX); + let c = _mm512_set1_epi32(i32::MAX); + //let r = _mm512_fixupimm_ps(a, b, c, 5); + let r = _mm512_fixupimm_ps::<5>(a, b, c); + let e = _mm512_set1_ps(0.0); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fixupimm_ps() { + #[rustfmt::skip] + let a = _mm512_set_ps( + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + 1., 1., 1., 1., + 1., 1., 1., 1., + ); + let b = _mm512_set1_ps(f32::MAX); + let c = _mm512_set1_epi32(i32::MAX); + let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fixupimm_ps() { + #[rustfmt::skip] + let a = _mm512_set_ps( + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + 1., 1., 1., 1., + 1., 1., 1., 1., + ); + let b = _mm512_set1_ps(f32::MAX); + let c = _mm512_set1_epi32(i32::MAX); + let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_fixupimm_ps() { + let a = _mm256_set1_ps(f32::NAN); + let b = _mm256_set1_ps(f32::MAX); + let c = _mm256_set1_epi32(i32::MAX); + let r = _mm256_fixupimm_ps::<5>(a, b, c); + let e = _mm256_set1_ps(0.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fixupimm_ps() { + let a = _mm256_set1_ps(f32::NAN); + let b = _mm256_set1_ps(f32::MAX); + let c = _mm256_set1_epi32(i32::MAX); + let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c); + let e = _mm256_set1_ps(0.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fixupimm_ps() { + let a = _mm256_set1_ps(f32::NAN); + let b = _mm256_set1_ps(f32::MAX); + let c = _mm256_set1_epi32(i32::MAX); + let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c); + let e = _mm256_set1_ps(0.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_fixupimm_ps() { + let a = _mm_set1_ps(f32::NAN); + let b = _mm_set1_ps(f32::MAX); + let c = _mm_set1_epi32(i32::MAX); + let r = _mm_fixupimm_ps::<5>(a, b, c); + let e = _mm_set1_ps(0.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fixupimm_ps() { + let a = _mm_set1_ps(f32::NAN); + let b = _mm_set1_ps(f32::MAX); + let c = _mm_set1_epi32(i32::MAX); + let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c); + let e = _mm_set1_ps(0.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fixupimm_ps() { + let a = _mm_set1_ps(f32::NAN); + let b = _mm_set1_ps(f32::MAX); + let c = _mm_set1_epi32(i32::MAX); + let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c); + let e = _mm_set1_ps(0.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_ternarylogic_epi32() { + let a = _mm512_set1_epi32(1 << 2); + let b = _mm512_set1_epi32(1 << 1); + let c = _mm512_set1_epi32(1 << 0); + let r = _mm512_ternarylogic_epi32::<8>(a, b, c); + let e = _mm512_set1_epi32(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_ternarylogic_epi32() { + let src = _mm512_set1_epi32(1 << 2); + let a = _mm512_set1_epi32(1 << 1); + let b = _mm512_set1_epi32(1 << 0); + let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b); + assert_eq_m512i(r, src); + let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_ternarylogic_epi32() { + let a = _mm512_set1_epi32(1 << 2); + let b = _mm512_set1_epi32(1 << 1); + let c = _mm512_set1_epi32(1 << 0); + let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c); + let e = _mm512_set1_epi32(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_ternarylogic_epi32() { + let a = _mm256_set1_epi32(1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let c = _mm256_set1_epi32(1 << 0); + let r = _mm256_ternarylogic_epi32::<8>(a, b, c); + let e = _mm256_set1_epi32(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_ternarylogic_epi32() { + let src = _mm256_set1_epi32(1 << 2); + let a = _mm256_set1_epi32(1 << 1); + let b = _mm256_set1_epi32(1 << 0); + let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b); + assert_eq_m256i(r, src); + let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b); + let e = _mm256_set1_epi32(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_ternarylogic_epi32() { + let a = _mm256_set1_epi32(1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let c = _mm256_set1_epi32(1 << 0); + let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c); + let e = _mm256_set1_epi32(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_ternarylogic_epi32() { + let a = _mm_set1_epi32(1 << 2); + let b = _mm_set1_epi32(1 << 1); + let c = _mm_set1_epi32(1 << 0); + let r = _mm_ternarylogic_epi32::<8>(a, b, c); + let e = _mm_set1_epi32(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_ternarylogic_epi32() { + let src = _mm_set1_epi32(1 << 2); + let a = _mm_set1_epi32(1 << 1); + let b = _mm_set1_epi32(1 << 0); + let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b); + assert_eq_m128i(r, src); + let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b); + let e = _mm_set1_epi32(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_ternarylogic_epi32() { + let a = _mm_set1_epi32(1 << 2); + let b = _mm_set1_epi32(1 << 1); + let c = _mm_set1_epi32(1 << 0); + let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c); + let e = _mm_set1_epi32(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getmant_ps() { + let a = _mm512_set1_ps(10.); + let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); + let e = _mm512_set1_ps(1.25); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getmant_ps() { + let a = _mm512_set1_ps(10.); + let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>( + a, + 0b11111111_00000000, + a, + ); + let e = _mm512_setr_ps( + 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getmant_ps() { + let a = _mm512_set1_ps(10.); + let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = + _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a); + let e = _mm512_setr_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_getmant_ps() { + let a = _mm256_set1_ps(10.); + let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); + let e = _mm256_set1_ps(1.25); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_getmant_ps() { + let a = _mm256_set1_ps(10.); + let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a); + assert_eq_m256(r, a); + let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a); + let e = _mm256_set1_ps(1.25); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_getmant_ps() { + let a = _mm256_set1_ps(10.); + let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a); + let e = _mm256_set1_ps(1.25); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_getmant_ps() { + let a = _mm_set1_ps(10.); + let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); + let e = _mm_set1_ps(1.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_getmant_ps() { + let a = _mm_set1_ps(10.); + let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a); + assert_eq_m128(r, a); + let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a); + let e = _mm_set1_ps(1.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_getmant_ps() { + let a = _mm_set1_ps(10.); + let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a); + let e = _mm_set1_ps(1.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_round_ps() { + let a = _mm512_setr_ps( + 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007, + ); + let b = _mm512_set1_ps(-1.); + let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -1., 0.5, 1., 2.5, + 3., 4.5, 5., 6.5, + 7., 8.5, 9., 10.5, + 11., 12.5, 13., -0.99999994, + ); + assert_eq_m512(r, e); + let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_ps( + -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_round_ps() { + let a = _mm512_setr_ps( + 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007, + ); + let b = _mm512_set1_ps(-1.); + let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b11111111_00000000, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 1.5, 2., 3.5, + 4., 5.5, 6., 7.5, + 7., 8.5, 9., 10.5, + 11., 12.5, 13., -0.99999994, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_round_ps() { + let a = _mm512_setr_ps( + 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007, + ); + let b = _mm512_set1_ps(-1.); + let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111_00000000, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 0., 0., 0., + 0., 0., 0., 0., + 7., 8.5, 9., 10.5, + 11., 12.5, 13., -0.99999994, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_round_ps() { + let a = _mm512_setr_ps( + 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007, + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -1., 0.5, 1., 2.5, + 3., 4.5, 5., 6.5, + 7., 8.5, 9., 10.5, + 11., 12.5, 13., -0.99999994, + ); + assert_eq_m512(r, e); + let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_ps( + -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_round_ps() { + let a = _mm512_setr_ps( + 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007, + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b11111111_00000000, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 1.5, 2., 3.5, + 4., 5.5, 6., 7.5, + 7., 8.5, 9., 10.5, + 11., 12.5, 13., -0.99999994, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_round_ps() { + let a = _mm512_setr_ps( + 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007, + ); + let b = _mm512_set1_ps(1.); + let r = + _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111_00000000, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 0., 0., 0., + 0., 0., 0., 0., + 7., 8.5, 9., 10.5, + 11., 12.5, 13., -0.99999994, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_round_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1.5, 2., 3.5, + 4., 5.5, 6., 7.5, + 8., 9.5, 10., 11.5, + 12., 13.5, 14., 0.00000000000000000000007, + ); + let b = _mm512_set1_ps(0.1); + let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 0.15, 0.2, 0.35, + 0.4, 0.55, 0.6, 0.75, + 0.8, 0.95, 1.0, 1.15, + 1.2, 1.35, 1.4, 0.000000000000000000000007000001, + ); + assert_eq_m512(r, e); + let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 0.14999999, 0.2, 0.35, + 0.4, 0.54999995, 0.59999996, 0.75, + 0.8, 0.95, 1.0, 1.15, + 1.1999999, 1.3499999, 1.4, 0.000000000000000000000007, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_round_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1.5, 2., 3.5, + 4., 5.5, 6., 7.5, + 8., 9.5, 10., 11.5, + 12., 13.5, 14., 0.00000000000000000000007, + ); + let b = _mm512_set1_ps(0.1); + let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b11111111_00000000, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 1.5, 2., 3.5, + 4., 5.5, 6., 7.5, + 0.8, 0.95, 1.0, 1.15, + 1.2, 1.35, 1.4, 0.000000000000000000000007000001, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_round_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1.5, 2., 3.5, + 4., 5.5, 6., 7.5, + 8., 9.5, 10., 11.5, + 12., 13.5, 14., 0.00000000000000000000007, + ); + let b = _mm512_set1_ps(0.1); + let r = + _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111_00000000, + a, + b, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 0., 0., 0., + 0., 0., 0., 0., + 0.8, 0.95, 1.0, 1.15, + 1.2, 1.35, 1.4, 0.000000000000000000000007000001, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_div_round_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(3.); + let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_ps(0.33333334); + assert_eq_m512(r, e); + let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_ps(0.3333333); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_div_round_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(3.); + let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b11111111_00000000, + a, + b, + ); + let e = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334, + 0.33333334, 0.33333334, 0.33333334, 0.33333334, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_div_round_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(3.); + let r = + _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111_00000000, + a, + b, + ); + let e = _mm512_setr_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334, + 0.33333334, 0.33333334, 0.33333334, 0.33333334, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sqrt_round_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set1_ps(1.7320508); + assert_eq_m512(r, e); + let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set1_ps(1.7320509); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sqrt_round_ps() { + let a = _mm512_set1_ps(3.); + let r = + _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b11111111_00000000, + a, + ); + let e = _mm512_setr_ps( + 3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508, + 1.7320508, 1.7320508, 1.7320508, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sqrt_round_ps() { + let a = _mm512_set1_ps(3.); + let r = + _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111_00000000, + a, + ); + let e = _mm512_setr_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508, + 1.7320508, 1.7320508, 1.7320508, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ps(-0.99999994); + assert_eq_m512(r, e); + let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ps(-0.9999999); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b00000000_11111111, + b, + c, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512(r, _mm512_setzero_ps()); + #[rustfmt::skip] + let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + b, + c, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512(r, c); + let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b00000000_11111111, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -1., -1., -1., -1., + -1., -1., -1., -1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(1.); + let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ps(-0.99999994); + assert_eq_m512(r, e); + let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ps(-0.9999999); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b00000000_11111111, + b, + c, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(1.); + let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + b, + c, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512(r, c); + let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b00000000_11111111, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 1., 1., 1., 1., + 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmaddsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = + _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + ); + assert_eq_m512(r, e); + let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_ps( + 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., + -0.9999999, 1., -0.9999999, 1., -0.9999999, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmaddsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b00000000_11111111, + b, + c, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmaddsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + b, + c, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmaddsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512(r, c); + let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b00000000_11111111, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + -1., -1., -1., -1., + -1., -1., -1., -1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsubadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = + _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + ); + assert_eq_m512(r, e); + let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_ps( + -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., + -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsubadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b00000000_11111111, + b, + c, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsubadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + b, + c, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsubadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512(r, c); + let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b00000000_11111111, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -1., -1., -1., -1., + -1., -1., -1., -1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(1.); + let r = + _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ps(0.99999994); + assert_eq_m512(r, e); + let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ps(0.9999999); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b00000000_11111111, + b, + c, + ); + let e = _mm512_setr_ps( + 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, + 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(1.); + let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + b, + c, + ); + let e = _mm512_setr_ps( + 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, + 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmadd_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(1.); + let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512(r, c); + let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b00000000_11111111, + ); + let e = _mm512_setr_ps( + 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, + 0.99999994, 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = + _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ps(0.99999994); + assert_eq_m512(r, e); + let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ps(0.9999999); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b00000000_11111111, + b, + c, + ); + let e = _mm512_setr_ps( + 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, + 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + b, + c, + ); + let e = _mm512_setr_ps( + 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, + 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmsub_round_ps() { + let a = _mm512_set1_ps(0.00000007); + let b = _mm512_set1_ps(1.); + let c = _mm512_set1_ps(-1.); + let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512(r, c); + let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b00000000_11111111, + ); + let e = _mm512_setr_ps( + 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, + 0.99999994, -1., -1., -1., -1., -1., -1., -1., -1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_round_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_round_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_round_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_round_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_round_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_round_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getexp_round_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm512_set1_ps(1.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getexp_round_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a); + let e = _mm512_setr_ps( + 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getexp_round_ps() { + let a = _mm512_set1_ps(3.); + let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a); + let e = _mm512_setr_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_roundscale_round_ps() { + let a = _mm512_set1_ps(1.1); + let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a); + let e = _mm512_set1_ps(1.0); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_roundscale_round_ps() { + let a = _mm512_set1_ps(1.1); + let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a); + let e = _mm512_set1_ps(1.1); + assert_eq_m512(r, e); + let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>( + a, + 0b11111111_11111111, + a, + ); + let e = _mm512_set1_ps(1.0); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_roundscale_round_ps() { + let a = _mm512_set1_ps(1.1); + let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = + _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a); + let e = _mm512_set1_ps(1.0); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_scalef_round_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(3.); + let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_ps(8.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_scalef_round_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(3.); + let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512(r, a); + let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b11111111_00000000, + a, + b, + ); + let e = _mm512_set_ps( + 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_scalef_round_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(3.); + let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, + ); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111_00000000, + a, + b, + ); + let e = _mm512_set_ps( + 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fixupimm_round_ps() { + let a = _mm512_set1_ps(f32::NAN); + let b = _mm512_set1_ps(f32::MAX); + let c = _mm512_set1_epi32(i32::MAX); + let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c); + let e = _mm512_set1_ps(0.0); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fixupimm_round_ps() { + #[rustfmt::skip] + let a = _mm512_set_ps( + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + 1., 1., 1., 1., + 1., 1., 1., 1., + ); + let b = _mm512_set1_ps(f32::MAX); + let c = _mm512_set1_epi32(i32::MAX); + let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>( + a, + 0b11111111_00000000, + b, + c, + ); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fixupimm_round_ps() { + #[rustfmt::skip] + let a = _mm512_set_ps( + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + 1., 1., 1., 1., + 1., 1., 1., 1., + ); + let b = _mm512_set1_ps(f32::MAX); + let c = _mm512_set1_epi32(i32::MAX); + let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>( + 0b11111111_00000000, + a, + b, + c, + ); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getmant_round_ps() { + let a = _mm512_set1_ps(10.); + let r = _mm512_getmant_round_ps::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a); + let e = _mm512_set1_ps(1.25); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getmant_round_ps() { + let a = _mm512_set1_ps(10.); + let r = _mm512_mask_getmant_round_ps::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_getmant_round_ps::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0b11111111_00000000, a); + let e = _mm512_setr_ps( + 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getmant_round_ps() { + let a = _mm512_set1_ps(10.); + let r = _mm512_maskz_getmant_round_ps::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_getmant_round_ps::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0b11111111_00000000, a); + let e = _mm512_setr_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtps_epi32() { + let a = _mm512_setr_ps( + 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_cvtps_epi32(a); + let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtps_epi32() { + let a = _mm512_setr_ps( + 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let src = _mm512_set1_epi32(0); + let r = _mm512_mask_cvtps_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtps_epi32() { + let a = _mm512_setr_ps( + 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_maskz_cvtps_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtps_epi32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let src = _mm256_set1_epi32(0); + let r = _mm256_mask_cvtps_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtps_epi32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_maskz_cvtps_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtps_epi32(0b11111111, a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtps_epi32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtps_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtps_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtps_epi32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_maskz_cvtps_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtps_epi32(0b00001111, a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtps_epu32() { + let a = _mm512_setr_ps( + 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_cvtps_epu32(a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtps_epu32() { + let a = _mm512_setr_ps( + 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let src = _mm512_set1_epi32(0); + let r = _mm512_mask_cvtps_epu32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtps_epu32() { + let a = _mm512_setr_ps( + 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_maskz_cvtps_epu32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_cvtps_epu32(a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let src = _mm256_set1_epi32(0); + let r = _mm256_mask_cvtps_epu32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_maskz_cvtps_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtps_epu32(0b11111111, a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_cvtps_epu32(a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtps_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtps_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_maskz_cvtps_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtps_epu32(0b00001111, a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi8_epi32(a); + let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi32(-1); + let r = _mm512_mask_cvtepi8_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a); + let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi8_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi32(-1); + let r = _mm256_mask_cvtepi8_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepi8_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi32(-1); + let r = _mm_mask_cvtepi8_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepi8_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi8_epi32(0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu8_epi32(a); + let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi32(-1); + let r = _mm512_mask_cvtepu8_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a); + let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu8_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi32(-1); + let r = _mm256_mask_cvtepu8_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu8_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi32(-1); + let r = _mm_mask_cvtepu8_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu8_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu8_epi32(0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi16_epi32() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi16_epi32(a); + let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi16_epi32() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi32(-1); + let r = _mm512_mask_cvtepi16_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a); + let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi16_epi32() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi16_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi16_epi32() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let src = _mm256_set1_epi32(-1); + let r = _mm256_mask_cvtepi16_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi16_epi32() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_maskz_cvtepi16_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a); + let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi16_epi32() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let src = _mm_set1_epi32(-1); + let r = _mm_mask_cvtepi16_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi16_epi32() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_maskz_cvtepi16_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi16_epi32(0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu16_epi32() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu16_epi32(a); + let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu16_epi32() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi32(-1); + let r = _mm512_mask_cvtepu16_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a); + let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu16_epi32() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu16_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu16_epi32() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi32(-1); + let r = _mm256_mask_cvtepu16_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu16_epi32() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu16_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu16_epi32() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi32(-1); + let r = _mm_mask_cvtepu16_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu16_epi32() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu16_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu16_epi32(0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi32_ps() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi32_ps(a); + let e = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32_ps() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_ps(-1.); + let r = _mm512_mask_cvtepi32_ps(src, 0, a); + assert_eq_m512(r, src); + let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a); + let e = _mm512_set_ps( + -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi32_ps() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi32_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_ps() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm256_set1_ps(-1.); + let r = _mm256_mask_cvtepi32_ps(src, 0, a); + assert_eq_m256(r, src); + let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi32_ps() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_cvtepi32_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_cvtepi32_ps(0b11111111, a); + let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_ps() { + let a = _mm_set_epi32(1, 2, 3, 4); + let src = _mm_set1_ps(-1.); + let r = _mm_mask_cvtepi32_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi32_ps() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_maskz_cvtepi32_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_cvtepi32_ps(0b00001111, a); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu32_ps() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu32_ps(a); + let e = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu32_ps() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_ps(-1.); + let r = _mm512_mask_cvtepu32_ps(src, 0, a); + assert_eq_m512(r, src); + let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a); + let e = _mm512_set_ps( + -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu32_ps() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu32_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi32_epi16() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi32_epi16(a); + let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32_epi16() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi16(-1); + let r = _mm512_mask_cvtepi32_epi16(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a); + let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi32_epi16() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi32_epi16(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepi32_epi16() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_cvtepi32_epi16(a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_epi16() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let src = _mm_set1_epi16(-1); + let r = _mm256_mask_cvtepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi32_epi16() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_maskz_cvtepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepi32_epi16() { + let a = _mm_set_epi32(4, 5, 6, 7); + let r = _mm_cvtepi32_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_epi16() { + let a = _mm_set_epi32(4, 5, 6, 7); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi32_epi16() { + let a = _mm_set_epi32(4, 5, 6, 7); + let r = _mm_maskz_cvtepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi32_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi32_epi8() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi32_epi8(a); + let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32_epi8() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi8(-1); + let r = _mm512_mask_cvtepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a); + let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi32_epi8() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepi32_epi8() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_cvtepi32_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_epi8() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi32_epi8() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_maskz_cvtepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepi32_epi8() { + let a = _mm_set_epi32(4, 5, 6, 7); + let r = _mm_cvtepi32_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_epi8() { + let a = _mm_set_epi32(4, 5, 6, 7); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi32_epi8() { + let a = _mm_set_epi32(4, 5, 6, 7); + let r = _mm_maskz_cvtepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi32_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsepi32_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, + ); + let r = _mm512_cvtsepi32_epi16(a); + #[rustfmt::skip] + let e = _mm256_set_epi16( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i16::MIN, i16::MAX, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi32_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, + ); + let src = _mm256_set1_epi16(-1); + let r = _mm512_mask_cvtsepi32_epi16(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a); + #[rustfmt::skip] + let e = _mm256_set_epi16( + -1, -1, -1, -1, + -1, -1, -1, -1, + 8, 9, 10, 11, + 12, 13, i16::MIN, i16::MAX, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtsepi32_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, + ); + let r = _mm512_maskz_cvtsepi32_epi16(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a); + #[rustfmt::skip] + let e = _mm256_set_epi16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 8, 9, 10, 11, + 12, 13, i16::MIN, i16::MAX, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi32_epi16() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_cvtsepi32_epi16(a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi32_epi16() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let src = _mm_set1_epi16(-1); + let r = _mm256_mask_cvtsepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi32_epi16() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_maskz_cvtsepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi32_epi16() { + let a = _mm_set_epi32(4, 5, 6, 7); + let r = _mm_cvtsepi32_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi32_epi16() { + let a = _mm_set_epi32(4, 5, 6, 7); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtsepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi32_epi16() { + let a = _mm_set_epi32(4, 5, 6, 7); + let r = _mm_maskz_cvtsepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsepi32_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, + ); + let r = _mm512_cvtsepi32_epi8(a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i8::MIN, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi32_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, + ); + let src = _mm_set1_epi8(-1); + let r = _mm512_mask_cvtsepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + -1, -1, -1, -1, + -1, -1, -1, -1, + 8, 9, 10, 11, + 12, 13, i8::MIN, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtsepi32_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, + ); + let r = _mm512_maskz_cvtsepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 8, 9, 10, 11, + 12, 13, i8::MIN, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi32_epi8() { + let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm256_cvtsepi32_epi8(a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 9, 10, 11, 12, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi32_epi8() { + let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtsepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 9, 10, 11, 12, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi32_epi8() { + let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm256_maskz_cvtsepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 9, 10, 11, 12, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi32_epi8() { + let a = _mm_set_epi32(13, 14, 15, 16); + let r = _mm_cvtsepi32_epi8(a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi32_epi8() { + let a = _mm_set_epi32(13, 14, 15, 16); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtsepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi32_epi8() { + let a = _mm_set_epi32(13, 14, 15, 16); + let r = _mm_maskz_cvtsepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtusepi32_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, + ); + let r = _mm512_cvtusepi32_epi16(a); + let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi32_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, + ); + let src = _mm256_set1_epi16(-1); + let r = _mm512_mask_cvtusepi32_epi16(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a); + let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtusepi32_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, + ); + let r = _mm512_maskz_cvtusepi32_epi16(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi32_epi16() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_cvtusepi32_epi16(a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi32_epi16() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtusepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi32_epi16() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_cvtusepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi32_epi16() { + let a = _mm_set_epi32(5, 6, 7, 8); + let r = _mm_cvtusepi32_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi32_epi16() { + let a = _mm_set_epi32(5, 6, 7, 8); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtusepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi32_epi16() { + let a = _mm_set_epi32(5, 6, 7, 8); + let r = _mm_maskz_cvtusepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtusepi32_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, + ); + let r = _mm512_cvtusepi32_epi8(a); + let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi32_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, + ); + let src = _mm_set1_epi8(-1); + let r = _mm512_mask_cvtusepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a); + let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtusepi32_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, + ); + let r = _mm512_maskz_cvtusepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi32_epi8() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX); + let r = _mm256_cvtusepi32_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi32_epi8() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtusepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi32_epi8() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX); + let r = _mm256_maskz_cvtusepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi32_epi8() { + let a = _mm_set_epi32(5, 6, 7, i32::MAX); + let r = _mm_cvtusepi32_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi32_epi8() { + let a = _mm_set_epi32(5, 6, 7, i32::MAX); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtusepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi32_epi8() { + let a = _mm_set_epi32(5, 6, 7, i32::MAX); + let r = _mm_maskz_cvtusepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundps_epi32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m512i(r, e); + let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundps_epi32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let src = _mm512_set1_epi32(0); + let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b00000000_11111111, + a, + ); + let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundps_epi32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + ); + let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundps_epu32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m512i(r, e); + let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundps_epu32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let src = _mm512_set1_epi32(0); + let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b00000000_11111111, + a, + ); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundps_epu32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + ); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundepi32_ps() { + let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); + let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_setr_ps( + 0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundepi32_ps() { + let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); + let src = _mm512_set1_ps(0.); + let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); + assert_eq_m512(r, src); + let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b00000000_11111111, + a, + ); + let e = _mm512_setr_ps( + 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundepi32_ps() { + let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); + let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + ); + let e = _mm512_setr_ps( + 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundepu32_ps() { + let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); + let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 4294967300., 2., 4294967300., + 4., 4294967300., 6., 4294967300., + 8., 10., 10., 12., + 12., 14., 14., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundepu32_ps() { + let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); + let src = _mm512_set1_ps(0.); + let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); + assert_eq_m512(r, src); + let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b00000000_11111111, + a, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 4294967300., 2., 4294967300., + 4., 4294967300., 6., 4294967300., + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundepu32_ps() { + let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); + let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00000000_11111111, + a, + ); + #[rustfmt::skip] + let e = _mm512_setr_ps( + 0., 4294967300., 2., 4294967300., + 4., 4294967300., 6., 4294967300., + 0., 0., 0., 0., + 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundps_ph() { + let a = _mm512_set1_ps(1.); + let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a); + let e = _mm256_setr_epi64x( + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundps_ph() { + let a = _mm512_set1_ps(1.); + let src = _mm256_set1_epi16(0); + let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); + let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundps_ph() { + let a = _mm512_set1_ps(1.); + let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); + let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvt_roundps_ph() { + let a = _mm256_set1_ps(1.); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a); + let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvt_roundps_ph() { + let a = _mm256_set1_ps(1.); + let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a); + let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvt_roundps_ph() { + let a = _mm_set1_ps(1.); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); + let e = _mm_setr_epi64x(4323521613979991040, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvt_roundps_ph() { + let a = _mm_set1_ps(1.); + let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a); + let e = _mm_setr_epi64x(4323521613979991040, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtps_ph() { + let a = _mm512_set1_ps(1.); + let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a); + let e = _mm256_setr_epi64x( + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtps_ph() { + let a = _mm512_set1_ps(1.); + let src = _mm256_set1_epi16(0); + let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); + let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtps_ph() { + let a = _mm512_set1_ps(1.); + let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); + let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtps_ph() { + let a = _mm256_set1_ps(1.); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a); + let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtps_ph() { + let a = _mm256_set1_ps(1.); + let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a); + let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtps_ph() { + let a = _mm_set1_ps(1.); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); + let e = _mm_setr_epi64x(4323521613979991040, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtps_ph() { + let a = _mm_set1_ps(1.); + let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a); + let e = _mm_setr_epi64x(4323521613979991040, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundph_ps() { + let a = _mm256_setr_epi64x( + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + ); + let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set1_ps(1.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundph_ps() { + let a = _mm256_setr_epi64x( + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + ); + let src = _mm512_set1_ps(0.); + let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m512(r, src); + let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); + let e = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundph_ps() { + let a = _mm256_setr_epi64x( + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + ); + let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); + let e = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtph_ps() { + let a = _mm256_setr_epi64x( + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + ); + let r = _mm512_cvtph_ps(a); + let e = _mm512_set1_ps(1.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtph_ps() { + let a = _mm256_setr_epi64x( + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + ); + let src = _mm512_set1_ps(0.); + let r = _mm512_mask_cvtph_ps(src, 0, a); + assert_eq_m512(r, src); + let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a); + let e = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtph_ps() { + let a = _mm256_setr_epi64x( + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + 4323521613979991040, + ); + let r = _mm512_maskz_cvtph_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a); + let e = _mm512_setr_ps( + 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtph_ps() { + let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + let src = _mm256_set1_ps(0.); + let r = _mm256_mask_cvtph_ps(src, 0, a); + assert_eq_m256(r, src); + let r = _mm256_mask_cvtph_ps(src, 0b11111111, a); + let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtph_ps() { + let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + let r = _mm256_maskz_cvtph_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_cvtph_ps(0b11111111, a); + let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtph_ps() { + let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + let src = _mm_set1_ps(0.); + let r = _mm_mask_cvtph_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_cvtph_ps(src, 0b00001111, a); + let e = _mm_setr_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtph_ps() { + let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + let r = _mm_maskz_cvtph_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_cvtph_ps(0b00001111, a); + let e = _mm_setr_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtt_roundps_epi32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtt_roundps_epi32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let src = _mm512_set1_epi32(0); + let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtt_roundps_epi32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtt_roundps_epu32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtt_roundps_epu32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let src = _mm512_set1_epi32(0); + let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtt_roundps_epu32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvttps_epi32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_cvttps_epi32(a); + let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvttps_epi32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let src = _mm512_set1_epi32(0); + let r = _mm512_mask_cvttps_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvttps_epi32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_maskz_cvttps_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttps_epi32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let src = _mm256_set1_epi32(0); + let r = _mm256_mask_cvttps_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttps_epi32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_maskz_cvttps_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvttps_epi32(0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttps_epi32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttps_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttps_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttps_epi32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_maskz_cvttps_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttps_epi32(0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvttps_epu32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_cvttps_epu32(a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvttps_epu32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let src = _mm512_set1_epi32(0); + let r = _mm512_mask_cvttps_epu32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvttps_epu32() { + let a = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, + ); + let r = _mm512_maskz_cvttps_epu32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a); + let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvttps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_cvttps_epu32(a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let src = _mm256_set1_epi32(0); + let r = _mm256_mask_cvttps_epu32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_maskz_cvttps_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvttps_epu32(0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvttps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_cvttps_epu32(a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttps_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttps_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_maskz_cvttps_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttps_epu32(0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32gather_ps() { + let arr: [f32; 256] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 120, 128, 136, 144, 152, 160, 168, 176); + let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr()); + #[rustfmt::skip] + assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112., + 120., 128., 136., 144., 152., 160., 168., 176.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32gather_ps() { + let arr: [f32; 256] = core::array::from_fn(|i| i as f32); + let src = _mm512_set1_ps(2.); + let mask = 0b10101010_10101010; + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 120, 128, 136, 144, 152, 160, 168, 176); + // A multiplier of 4 is word-addressing + let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr()); + #[rustfmt::skip] + assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112., + 2., 128., 2., 144., 2., 160., 2., 176.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32gather_epi32() { + let arr: [i32; 256] = core::array::from_fn(|i| i as i32); + // A multiplier of 4 is word-addressing + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 120, 128, 136, 144, 152, 160, 168, 176); + let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr()); + #[rustfmt::skip] + assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 120, 128, 136, 144, 152, 160, 168, 176)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32gather_epi32() { + let arr: [i32; 256] = core::array::from_fn(|i| i as i32); + let src = _mm512_set1_epi32(2); + let mask = 0b10101010_10101010; + let index = _mm512_setr_epi32( + 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, + ); + // A multiplier of 4 is word-addressing + let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr()); + assert_eq_m512i( + r, + _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240), + ); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32scatter_ps() { + let mut arr = [0f32; 256]; + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 240); + let src = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + // A multiplier of 4 is word-addressing + _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src); + let mut expected = [0f32; 256]; + for i in 0..16 { + expected[i * 16] = (i + 1) as f32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32scatter_ps() { + let mut arr = [0f32; 256]; + let mask = 0b10101010_10101010; + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 240); + let src = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + // A multiplier of 4 is word-addressing + _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src); + let mut expected = [0f32; 256]; + for i in 0..8 { + expected[i * 32 + 16] = 2. * (i + 1) as f32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32scatter_epi32() { + let mut arr = [0i32; 256]; + #[rustfmt::skip] + + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 240); + let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + // A multiplier of 4 is word-addressing + _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src); + let mut expected = [0i32; 256]; + for i in 0..16 { + expected[i * 16] = (i + 1) as i32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32scatter_epi32() { + let mut arr = [0i32; 256]; + let mask = 0b10101010_10101010; + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 240); + let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + // A multiplier of 4 is word-addressing + _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src); + let mut expected = [0i32; 256]; + for i in 0..8 { + expected[i * 32 + 16] = 2 * (i + 1) as i32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let m = _mm512_cmplt_ps_mask(a, b); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmplt_ps_mask(mask, a, b); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpnlt_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpnlt_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01111010_01111010; + assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpnle_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let m = _mm512_cmpnle_ps_mask(b, a); + assert_eq!(m, 0b00001101_00001101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpnle_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmpnle_ps_mask(mask, b, a); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01111010_01111010; + assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); + #[rustfmt::skip] + let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); + let m = _mm512_cmpeq_ps_mask(b, a); + assert_eq!(m, 0b11001101_11001101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); + #[rustfmt::skip] + let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpeq_ps_mask(mask, b, a); + assert_eq!(r, 0b01001000_01001000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpneq_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); + #[rustfmt::skip] + let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); + let m = _mm512_cmpneq_ps_mask(b, a); + assert_eq!(m, 0b00110010_00110010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); + #[rustfmt::skip] + let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpneq_ps_mask(mask, b, a); + assert_eq!(r, 0b00110010_00110010) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmp_ps_mask() { + let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm256_set1_ps(-1.); + let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmp_ps_mask() { + let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm256_set1_ps(-1.); + let mask = 0b01100110; + let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmp_ps_mask() { + let a = _mm_set_ps(0., 1., -1., 13.); + let b = _mm_set1_ps(1.); + let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b); + assert_eq!(m, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmp_ps_mask() { + let a = _mm_set_ps(0., 1., -1., 13.); + let b = _mm_set1_ps(1.); + let mask = 0b11111111; + let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b); + assert_eq!(r, 0b00001010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_round_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_round_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpord_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., + f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); + #[rustfmt::skip] + let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); + let m = _mm512_cmpord_ps_mask(a, b); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpord_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., + f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); + #[rustfmt::skip] + let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); + let mask = 0b11000011_11000011; + let m = _mm512_mask_cmpord_ps_mask(mask, a, b); + assert_eq!(m, 0b00000001_00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpunord_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., + f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); + #[rustfmt::skip] + let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); + let m = _mm512_cmpunord_ps_mask(a, b); + + assert_eq!(m, 0b11111010_11111010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpunord_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., + f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); + #[rustfmt::skip] + let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); + let mask = 0b00001111_00001111; + let m = _mm512_mask_cmpunord_ps_mask(mask, a, b); + assert_eq!(m, 0b000001010_00001010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cmp_ss_mask() { + let a = _mm_setr_ps(2., 1., 1., 1.); + let b = _mm_setr_ps(1., 2., 2., 2.); + let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cmp_ss_mask() { + let a = _mm_setr_ps(2., 1., 1., 1.); + let b = _mm_setr_ps(1., 2., 2., 2.); + let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b); + assert_eq!(m, 0); + let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cmp_round_ss_mask() { + let a = _mm_setr_ps(2., 1., 1., 1.); + let b = _mm_setr_ps(1., 2., 2., 2.); + let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cmp_round_ss_mask() { + let a = _mm_setr_ps(2., 1., 1., 1.); + let b = _mm_setr_ps(1., 2., 2., 2.); + let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b); + assert_eq!(m, 0); + let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cmp_sd_mask() { + let a = _mm_setr_pd(2., 1.); + let b = _mm_setr_pd(1., 2.); + let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cmp_sd_mask() { + let a = _mm_setr_pd(2., 1.); + let b = _mm_setr_pd(1., 2.); + let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b); + assert_eq!(m, 0); + let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cmp_round_sd_mask() { + let a = _mm_setr_pd(2., 1.); + let b = _mm_setr_pd(1., 2.); + let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cmp_round_sd_mask() { + let a = _mm_setr_pd(2., 1.); + let b = _mm_setr_pd(1., 2.); + let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b); + assert_eq!(m, 0); + let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmplt_epu32_mask(a, b); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmplt_epu32_mask(mask, a, b); + assert_eq!(r, 0b01001010_01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmplt_epu32_mask() { + let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99); + let b = _mm256_set1_epi32(1); + let r = _mm256_cmplt_epu32_mask(a, b); + assert_eq!(r, 0b10000000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epu32_mask() { + let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99); + let b = _mm256_set1_epi32(1); + let mask = 0b11111111; + let r = _mm256_mask_cmplt_epu32_mask(mask, a, b); + assert_eq!(r, 0b10000000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmplt_epu32_mask() { + let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32); + let b = _mm_set1_epi32(1); + let r = _mm_cmplt_epu32_mask(a, b); + assert_eq!(r, 0b00001000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmplt_epu32_mask() { + let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32); + let b = _mm_set1_epi32(1); + let mask = 0b11111111; + let r = _mm_mask_cmplt_epu32_mask(mask, a, b); + assert_eq!(r, 0b00001000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpgt_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmpgt_epu32_mask(b, a); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpgt_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a); + assert_eq!(r, 0b01001010_01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpgt_epu32_mask() { + let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101); + let b = _mm256_set1_epi32(1); + let r = _mm256_cmpgt_epu32_mask(a, b); + assert_eq!(r, 0b00111111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epu32_mask() { + let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101); + let b = _mm256_set1_epi32(1); + let mask = 0b11111111; + let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b); + assert_eq!(r, 0b00111111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpgt_epu32_mask() { + let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32); + let b = _mm_set1_epi32(1); + let r = _mm_cmpgt_epu32_mask(a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epu32_mask() { + let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32); + let b = _mm_set1_epi32(1); + let mask = 0b11111111; + let r = _mm_mask_cmpgt_epu32_mask(mask, a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + assert_eq!( + _mm512_cmple_epu32_mask(a, b), + !_mm512_cmpgt_epu32_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + assert_eq!( + _mm512_mask_cmple_epu32_mask(mask, a, b), + 0b01111010_01111010 + ); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmple_epu32_mask() { + let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101); + let b = _mm256_set1_epi32(1); + let r = _mm256_cmple_epu32_mask(a, b); + assert_eq!(r, 0b11000000) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmple_epu32_mask() { + let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101); + let b = _mm256_set1_epi32(1); + let mask = 0b11111111; + let r = _mm256_mask_cmple_epu32_mask(mask, a, b); + assert_eq!(r, 0b11000000) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmple_epu32_mask() { + let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32); + let b = _mm_set1_epi32(1); + let r = _mm_cmple_epu32_mask(a, b); + assert_eq!(r, 0b00001100) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmple_epu32_mask() { + let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32); + let b = _mm_set1_epi32(1); + let mask = 0b11111111; + let r = _mm_mask_cmple_epu32_mask(mask, a, b); + assert_eq!(r, 0b00001100) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpge_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + assert_eq!( + _mm512_cmpge_epu32_mask(a, b), + !_mm512_cmplt_epu32_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpge_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpge_epu32_mask() { + let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200); + let b = _mm256_set1_epi32(1); + let r = _mm256_cmpge_epu32_mask(a, b); + assert_eq!(r, 0b01111111) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epu32_mask() { + let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200); + let b = _mm256_set1_epi32(1); + let mask = 0b11111111; + let r = _mm256_mask_cmpge_epu32_mask(mask, a, b); + assert_eq!(r, 0b01111111) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpge_epu32_mask() { + let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32); + let b = _mm_set1_epi32(1); + let r = _mm_cmpge_epu32_mask(a, b); + assert_eq!(r, 0b00000111) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpge_epu32_mask() { + let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32); + let b = _mm_set1_epi32(1); + let mask = 0b11111111; + let r = _mm_mask_cmpge_epu32_mask(mask, a, b); + assert_eq!(r, 0b00000111) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm512_cmpeq_epu32_mask(b, a); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a); + assert_eq!(r, 0b01001010_01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpeq_epu32_mask() { + let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm256_cmpeq_epu32_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epu32_mask() { + let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpeq_epu32_mask() { + let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32); + let b = _mm_set_epi32(0, 1, 13, 42); + let m = _mm_cmpeq_epu32_mask(b, a); + assert_eq!(m, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epu32_mask() { + let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32); + let b = _mm_set_epi32(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm_mask_cmpeq_epu32_mask(mask, b, a); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpneq_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm512_cmpneq_epu32_mask(b, a); + assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a); + assert_eq!(r, 0b00110010_00110010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpneq_epu32_mask() { + let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100); + let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100); + let r = _mm256_cmpneq_epu32_mask(b, a); + assert_eq!(r, 0b00110000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epu32_mask() { + let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100); + let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100); + let mask = 0b11111111; + let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a); + assert_eq!(r, 0b00110000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpneq_epu32_mask() { + let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32); + let b = _mm_set_epi32(0, 1, 13, 42); + let r = _mm_cmpneq_epu32_mask(b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epu32_mask() { + let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32); + let b = _mm_set_epi32(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm_mask_cmpneq_epu32_mask(mask, b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01001010_01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmp_epu32_mask() { + let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmp_epu32_mask() { + let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b11001111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmp_epu32_mask() { + let a = _mm_set_epi32(0, 1, -1, i32::MAX); + let b = _mm_set1_epi32(1); + let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00001000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmp_epu32_mask() { + let a = _mm_set_epi32(0, 1, -1, i32::MAX); + let b = _mm_set1_epi32(1); + let mask = 0b11111111; + let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00001000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmplt_epi32_mask(a, b); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmplt_epi32_mask(mask, a, b); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmplt_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let r = _mm256_cmplt_epi32_mask(a, b); + assert_eq!(r, 0b00000101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmplt_epi32_mask(mask, a, b); + assert_eq!(r, 0b00000101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmplt_epi32_mask() { + let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100); + let b = _mm_set1_epi32(-1); + let r = _mm_cmplt_epi32_mask(a, b); + assert_eq!(r, 0b00000101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmplt_epi32_mask() { + let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100); + let b = _mm_set1_epi32(-1); + let mask = 0b11111111; + let r = _mm_mask_cmplt_epi32_mask(mask, a, b); + assert_eq!(r, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpgt_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmpgt_epi32_mask(b, a); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpgt_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpgt_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let r = _mm256_cmpgt_epi32_mask(a, b); + assert_eq!(r, 0b11011010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b); + assert_eq!(r, 0b11011010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpgt_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 13); + let b = _mm_set1_epi32(-1); + let r = _mm_cmpgt_epi32_mask(a, b); + assert_eq!(r, 0b00001101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 13); + let b = _mm_set1_epi32(-1); + let mask = 0b11111111; + let r = _mm_mask_cmpgt_epi32_mask(mask, a, b); + assert_eq!(r, 0b00001101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + assert_eq!( + _mm512_cmple_epi32_mask(a, b), + !_mm512_cmpgt_epi32_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmple_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let r = _mm256_cmple_epi32_mask(a, b); + assert_eq!(r, 0b00100101) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmple_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmple_epi32_mask(mask, a, b); + assert_eq!(r, 0b00100101) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmple_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 200); + let b = _mm_set1_epi32(-1); + let r = _mm_cmple_epi32_mask(a, b); + assert_eq!(r, 0b00000010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmple_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 200); + let b = _mm_set1_epi32(-1); + let mask = 0b11111111; + let r = _mm_mask_cmple_epi32_mask(mask, a, b); + assert_eq!(r, 0b00000010) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpge_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + assert_eq!( + _mm512_cmpge_epi32_mask(a, b), + !_mm512_cmplt_epi32_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpge_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + assert_eq!( + _mm512_mask_cmpge_epi32_mask(mask, a, b), + 0b01111010_01111010 + ); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpge_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let r = _mm256_cmpge_epi32_mask(a, b); + assert_eq!(r, 0b11111010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmpge_epi32_mask(mask, a, b); + assert_eq!(r, 0b11111010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpge_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32); + let b = _mm_set1_epi32(-1); + let r = _mm_cmpge_epi32_mask(a, b); + assert_eq!(r, 0b00001111) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpge_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32); + let b = _mm_set1_epi32(-1); + let mask = 0b11111111; + let r = _mm_mask_cmpge_epi32_mask(mask, a, b); + assert_eq!(r, 0b00001111) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm512_cmpeq_epi32_mask(b, a); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a); + assert_eq!(r, 0b01001010_01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpeq_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm256_cmpeq_epi32_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpeq_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 13); + let b = _mm_set_epi32(0, 1, 13, 42); + let m = _mm_cmpeq_epi32_mask(b, a); + assert_eq!(m, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 13); + let b = _mm_set_epi32(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm_mask_cmpeq_epi32_mask(mask, b, a); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpneq_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm512_cmpneq_epi32_mask(b, a); + assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100, + 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a); + assert_eq!(r, 0b00110010_00110010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpneq_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm256_cmpneq_epi32_mask(b, a); + assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a)); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100); + let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b11111111; + let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a); + assert_eq!(r, 0b00110011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpneq_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 13); + let b = _mm_set_epi32(0, 1, 13, 42); + let r = _mm_cmpneq_epi32_mask(b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 13); + let b = _mm_set_epi32(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm_mask_cmpneq_epi32_mask(mask, b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmp_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmp_epi32_mask() { + let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm256_set1_epi32(-1); + let mask = 0b01100110; + let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmp_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 13); + let b = _mm_set1_epi32(1); + let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmp_epi32_mask() { + let a = _mm_set_epi32(0, 1, -1, 13); + let b = _mm_set1_epi32(1); + let mask = 0b11111111; + let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00001010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_epi8() { + let r = _mm512_set1_epi8(2); + assert_eq_m512i( + r, + _mm512_set_epi8( + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + ), + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_epi16() { + let r = _mm512_set1_epi16(2); + assert_eq_m512i( + r, + _mm512_set_epi16( + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, + ), + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_epi32() { + let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i( + r, + _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr_epi32() { + let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i( + r, + _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set1_epi8() { + let r = _mm512_set_epi8( + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, + ); + assert_eq_m512i(r, _mm512_set1_epi8(2)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set1_epi16() { + let r = _mm512_set_epi16( + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, + ); + assert_eq_m512i(r, _mm512_set1_epi16(2)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set1_epi32() { + let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, _mm512_set1_epi32(2)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setzero_si512() { + assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512()); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setzero_epi32() { + assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32()); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_ps() { + let r = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512( + r, + _mm512_set_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ), + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr_ps() { + let r = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512( + r, + _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ), + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set1_ps() { + #[rustfmt::skip] + let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2., + 2., 2., 2., 2., 2., 2., 2., 2.); + assert_eq_m512(expected, _mm512_set1_ps(2.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set4_epi32() { + let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1); + assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set4_ps() { + let r = _mm512_set_ps( + 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., + ); + assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr4_epi32() { + let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1); + assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr4_ps() { + let r = _mm512_set_ps( + 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., + ); + assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setzero_ps() { + assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setzero() { + assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_loadu_pd() { + let a = &[4., 3., 2., 5., 8., 9., 64., 50.]; + let p = a.as_ptr(); + let r = _mm512_loadu_pd(black_box(p)); + let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_storeu_pd() { + let a = _mm512_set1_pd(9.); + let mut r = _mm512_undefined_pd(); + _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a); + assert_eq_m512d(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_loadu_ps() { + let a = &[ + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., + ]; + let p = a.as_ptr(); + let r = _mm512_loadu_ps(black_box(p)); + let e = _mm512_setr_ps( + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_storeu_ps() { + let a = _mm512_set1_ps(9.); + let mut r = _mm512_undefined_ps(); + _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a); + assert_eq_m512(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_loadu_epi32() { + let src = _mm512_set1_epi32(42); + let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_mask_loadu_epi32(src, m, black_box(p)); + let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_loadu_epi32() { + let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_maskz_loadu_epi32(m, black_box(p)); + let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_load_epi32() { + #[repr(align(64))] + struct Align { + data: [i32; 16], // 64 bytes + } + let src = _mm512_set1_epi32(42); + let a = Align { + data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + }; + let p = a.data.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_mask_load_epi32(src, m, black_box(p)); + let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_load_epi32() { + #[repr(align(64))] + struct Align { + data: [i32; 16], // 64 bytes + } + let a = Align { + data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + }; + let p = a.data.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_maskz_load_epi32(m, black_box(p)); + let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_storeu_epi32() { + let mut r = [42_i32; 16]; + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let m = 0b11101000_11001010; + _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a); + let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16); + assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_store_epi32() { + #[repr(align(64))] + struct Align { + data: [i32; 16], + } + let mut r = Align { data: [42; 16] }; + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let m = 0b11101000_11001010; + _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a); + let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16); + assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_loadu_epi64() { + let src = _mm512_set1_epi64(42); + let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm512_mask_loadu_epi64(src, m, black_box(p)); + let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_loadu_epi64() { + let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm512_maskz_loadu_epi64(m, black_box(p)); + let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_load_epi64() { + #[repr(align(64))] + struct Align { + data: [i64; 8], // 64 bytes + } + let src = _mm512_set1_epi64(42); + let a = Align { + data: [1_i64, 2, 3, 4, 5, 6, 7, 8], + }; + let p = a.data.as_ptr(); + let m = 0b11001010; + let r = _mm512_mask_load_epi64(src, m, black_box(p)); + let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_load_epi64() { + #[repr(align(64))] + struct Align { + data: [i64; 8], // 64 bytes + } + let a = Align { + data: [1_i64, 2, 3, 4, 5, 6, 7, 8], + }; + let p = a.data.as_ptr(); + let m = 0b11001010; + let r = _mm512_maskz_load_epi64(m, black_box(p)); + let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_storeu_epi64() { + let mut r = [42_i64; 8]; + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let m = 0b11001010; + _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a); + let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8); + assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_store_epi64() { + #[repr(align(64))] + struct Align { + data: [i64; 8], + } + let mut r = Align { data: [42; 8] }; + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let m = 0b11001010; + let p = r.data.as_mut_ptr(); + _mm512_mask_store_epi64(p, m, a); + let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8); + assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_loadu_ps() { + let src = _mm512_set1_ps(42.0); + let a = &[ + 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, + 16.0, + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_mask_loadu_ps(src, m, black_box(p)); + let e = _mm512_setr_ps( + 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0, + 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_loadu_ps() { + let a = &[ + 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, + 16.0, + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_maskz_loadu_ps(m, black_box(p)); + let e = _mm512_setr_ps( + 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_load_ps() { + #[repr(align(64))] + struct Align { + data: [f32; 16], // 64 bytes + } + let src = _mm512_set1_ps(42.0); + let a = Align { + data: [ + 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, + 15.0, 16.0, + ], + }; + let p = a.data.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_mask_load_ps(src, m, black_box(p)); + let e = _mm512_setr_ps( + 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0, + 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_load_ps() { + #[repr(align(64))] + struct Align { + data: [f32; 16], // 64 bytes + } + let a = Align { + data: [ + 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, + 15.0, 16.0, + ], + }; + let p = a.data.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_maskz_load_ps(m, black_box(p)); + let e = _mm512_setr_ps( + 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_storeu_ps() { + let mut r = [42_f32; 16]; + let a = _mm512_setr_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let m = 0b11101000_11001010; + _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a); + let e = _mm512_setr_ps( + 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0, + 16.0, + ); + assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_store_ps() { + #[repr(align(64))] + struct Align { + data: [f32; 16], + } + let mut r = Align { data: [42.0; 16] }; + let a = _mm512_setr_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let m = 0b11101000_11001010; + _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a); + let e = _mm512_setr_ps( + 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0, + 16.0, + ); + assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_loadu_pd() { + let src = _mm512_set1_pd(42.0); + let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm512_mask_loadu_pd(src, m, black_box(p)); + let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_loadu_pd() { + let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm512_maskz_loadu_pd(m, black_box(p)); + let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_load_pd() { + #[repr(align(64))] + struct Align { + data: [f64; 8], // 64 bytes + } + let src = _mm512_set1_pd(42.0); + let a = Align { + data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], + }; + let p = a.data.as_ptr(); + let m = 0b11001010; + let r = _mm512_mask_load_pd(src, m, black_box(p)); + let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_load_pd() { + #[repr(align(64))] + struct Align { + data: [f64; 8], // 64 bytes + } + let a = Align { + data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], + }; + let p = a.data.as_ptr(); + let m = 0b11001010; + let r = _mm512_maskz_load_pd(m, black_box(p)); + let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_storeu_pd() { + let mut r = [42_f64; 8]; + let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let m = 0b11001010; + _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a); + let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0); + assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_store_pd() { + #[repr(align(64))] + struct Align { + data: [f64; 8], + } + let mut r = Align { data: [42.0; 8] }; + let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let m = 0b11001010; + _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a); + let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0); + assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_loadu_epi32() { + let src = _mm256_set1_epi32(42); + let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm256_mask_loadu_epi32(src, m, black_box(p)); + let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_loadu_epi32() { + let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm256_maskz_loadu_epi32(m, black_box(p)); + let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_load_epi32() { + #[repr(align(32))] + struct Align { + data: [i32; 8], // 32 bytes + } + let src = _mm256_set1_epi32(42); + let a = Align { + data: [1_i32, 2, 3, 4, 5, 6, 7, 8], + }; + let p = a.data.as_ptr(); + let m = 0b11001010; + let r = _mm256_mask_load_epi32(src, m, black_box(p)); + let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_load_epi32() { + #[repr(align(32))] + struct Align { + data: [i32; 8], // 32 bytes + } + let a = Align { + data: [1_i32, 2, 3, 4, 5, 6, 7, 8], + }; + let p = a.data.as_ptr(); + let m = 0b11001010; + let r = _mm256_maskz_load_epi32(m, black_box(p)); + let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_storeu_epi32() { + let mut r = [42_i32; 8]; + let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let m = 0b11001010; + _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a); + let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8); + assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_store_epi32() { + #[repr(align(64))] + struct Align { + data: [i32; 8], + } + let mut r = Align { data: [42; 8] }; + let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let m = 0b11001010; + _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a); + let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8); + assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_loadu_epi64() { + let src = _mm256_set1_epi64x(42); + let a = &[1_i64, 2, 3, 4]; + let p = a.as_ptr(); + let m = 0b1010; + let r = _mm256_mask_loadu_epi64(src, m, black_box(p)); + let e = _mm256_setr_epi64x(42, 2, 42, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_loadu_epi64() { + let a = &[1_i64, 2, 3, 4]; + let p = a.as_ptr(); + let m = 0b1010; + let r = _mm256_maskz_loadu_epi64(m, black_box(p)); + let e = _mm256_setr_epi64x(0, 2, 0, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_load_epi64() { + #[repr(align(32))] + struct Align { + data: [i64; 4], // 32 bytes + } + let src = _mm256_set1_epi64x(42); + let a = Align { + data: [1_i64, 2, 3, 4], + }; + let p = a.data.as_ptr(); + let m = 0b1010; + let r = _mm256_mask_load_epi64(src, m, black_box(p)); + let e = _mm256_setr_epi64x(42, 2, 42, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_load_epi64() { + #[repr(align(32))] + struct Align { + data: [i64; 4], // 32 bytes + } + let a = Align { + data: [1_i64, 2, 3, 4], + }; + let p = a.data.as_ptr(); + let m = 0b1010; + let r = _mm256_maskz_load_epi64(m, black_box(p)); + let e = _mm256_setr_epi64x(0, 2, 0, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_storeu_epi64() { + let mut r = [42_i64; 4]; + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let m = 0b1010; + _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a); + let e = _mm256_setr_epi64x(42, 2, 42, 4); + assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_store_epi64() { + #[repr(align(32))] + struct Align { + data: [i64; 4], + } + let mut r = Align { data: [42; 4] }; + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let m = 0b1010; + _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a); + let e = _mm256_setr_epi64x(42, 2, 42, 4); + assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_loadu_ps() { + let src = _mm256_set1_ps(42.0); + let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm256_mask_loadu_ps(src, m, black_box(p)); + let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_loadu_ps() { + let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + let p = a.as_ptr(); + let m = 0b11001010; + let r = _mm256_maskz_loadu_ps(m, black_box(p)); + let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_load_ps() { + #[repr(align(32))] + struct Align { + data: [f32; 8], // 32 bytes + } + let src = _mm256_set1_ps(42.0); + let a = Align { + data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], + }; + let p = a.data.as_ptr(); + let m = 0b11001010; + let r = _mm256_mask_load_ps(src, m, black_box(p)); + let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_load_ps() { + #[repr(align(32))] + struct Align { + data: [f32; 8], // 32 bytes + } + let a = Align { + data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], + }; + let p = a.data.as_ptr(); + let m = 0b11001010; + let r = _mm256_maskz_load_ps(m, black_box(p)); + let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_storeu_ps() { + let mut r = [42_f32; 8]; + let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let m = 0b11001010; + _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a); + let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0); + assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_store_ps() { + #[repr(align(32))] + struct Align { + data: [f32; 8], + } + let mut r = Align { data: [42.0; 8] }; + let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let m = 0b11001010; + _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a); + let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0); + assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_loadu_pd() { + let src = _mm256_set1_pd(42.0); + let a = &[1.0_f64, 2.0, 3.0, 4.0]; + let p = a.as_ptr(); + let m = 0b1010; + let r = _mm256_mask_loadu_pd(src, m, black_box(p)); + let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_loadu_pd() { + let a = &[1.0_f64, 2.0, 3.0, 4.0]; + let p = a.as_ptr(); + let m = 0b1010; + let r = _mm256_maskz_loadu_pd(m, black_box(p)); + let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_load_pd() { + #[repr(align(32))] + struct Align { + data: [f64; 4], // 32 bytes + } + let src = _mm256_set1_pd(42.0); + let a = Align { + data: [1.0_f64, 2.0, 3.0, 4.0], + }; + let p = a.data.as_ptr(); + let m = 0b1010; + let r = _mm256_mask_load_pd(src, m, black_box(p)); + let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_load_pd() { + #[repr(align(32))] + struct Align { + data: [f64; 4], // 32 bytes + } + let a = Align { + data: [1.0_f64, 2.0, 3.0, 4.0], + }; + let p = a.data.as_ptr(); + let m = 0b1010; + let r = _mm256_maskz_load_pd(m, black_box(p)); + let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_storeu_pd() { + let mut r = [42_f64; 4]; + let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0); + let m = 0b1010; + _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a); + let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0); + assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_store_pd() { + #[repr(align(32))] + struct Align { + data: [f64; 4], + } + let mut r = Align { data: [42.0; 4] }; + let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0); + let m = 0b1010; + _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a); + let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0); + assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_loadu_epi32() { + let src = _mm_set1_epi32(42); + let a = &[1_i32, 2, 3, 4]; + let p = a.as_ptr(); + let m = 0b1010; + let r = _mm_mask_loadu_epi32(src, m, black_box(p)); + let e = _mm_setr_epi32(42, 2, 42, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_loadu_epi32() { + let a = &[1_i32, 2, 3, 4]; + let p = a.as_ptr(); + let m = 0b1010; + let r = _mm_maskz_loadu_epi32(m, black_box(p)); + let e = _mm_setr_epi32(0, 2, 0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_load_epi32() { + #[repr(align(16))] + struct Align { + data: [i32; 4], // 32 bytes + } + let src = _mm_set1_epi32(42); + let a = Align { + data: [1_i32, 2, 3, 4], + }; + let p = a.data.as_ptr(); + let m = 0b1010; + let r = _mm_mask_load_epi32(src, m, black_box(p)); + let e = _mm_setr_epi32(42, 2, 42, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_load_epi32() { + #[repr(align(16))] + struct Align { + data: [i32; 4], // 16 bytes + } + let a = Align { + data: [1_i32, 2, 3, 4], + }; + let p = a.data.as_ptr(); + let m = 0b1010; + let r = _mm_maskz_load_epi32(m, black_box(p)); + let e = _mm_setr_epi32(0, 2, 0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_storeu_epi32() { + let mut r = [42_i32; 4]; + let a = _mm_setr_epi32(1, 2, 3, 4); + let m = 0b1010; + _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a); + let e = _mm_setr_epi32(42, 2, 42, 4); + assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_store_epi32() { + #[repr(align(16))] + struct Align { + data: [i32; 4], // 16 bytes + } + let mut r = Align { data: [42; 4] }; + let a = _mm_setr_epi32(1, 2, 3, 4); + let m = 0b1010; + _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a); + let e = _mm_setr_epi32(42, 2, 42, 4); + assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_loadu_epi64() { + let src = _mm_set1_epi64x(42); + let a = &[1_i64, 2]; + let p = a.as_ptr(); + let m = 0b10; + let r = _mm_mask_loadu_epi64(src, m, black_box(p)); + let e = _mm_setr_epi64x(42, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_loadu_epi64() { + let a = &[1_i64, 2]; + let p = a.as_ptr(); + let m = 0b10; + let r = _mm_maskz_loadu_epi64(m, black_box(p)); + let e = _mm_setr_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_load_epi64() { + #[repr(align(16))] + struct Align { + data: [i64; 2], // 16 bytes + } + let src = _mm_set1_epi64x(42); + let a = Align { data: [1_i64, 2] }; + let p = a.data.as_ptr(); + let m = 0b10; + let r = _mm_mask_load_epi64(src, m, black_box(p)); + let e = _mm_setr_epi64x(42, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_load_epi64() { + #[repr(align(16))] + struct Align { + data: [i64; 2], // 16 bytes + } + let a = Align { data: [1_i64, 2] }; + let p = a.data.as_ptr(); + let m = 0b10; + let r = _mm_maskz_load_epi64(m, black_box(p)); + let e = _mm_setr_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_storeu_epi64() { + let mut r = [42_i64; 2]; + let a = _mm_setr_epi64x(1, 2); + let m = 0b10; + _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a); + let e = _mm_setr_epi64x(42, 2); + assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_store_epi64() { + #[repr(align(16))] + struct Align { + data: [i64; 2], // 16 bytes + } + let mut r = Align { data: [42; 2] }; + let a = _mm_setr_epi64x(1, 2); + let m = 0b10; + _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a); + let e = _mm_setr_epi64x(42, 2); + assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_loadu_ps() { + let src = _mm_set1_ps(42.0); + let a = &[1.0_f32, 2.0, 3.0, 4.0]; + let p = a.as_ptr(); + let m = 0b1010; + let r = _mm_mask_loadu_ps(src, m, black_box(p)); + let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_loadu_ps() { + let a = &[1.0_f32, 2.0, 3.0, 4.0]; + let p = a.as_ptr(); + let m = 0b1010; + let r = _mm_maskz_loadu_ps(m, black_box(p)); + let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_load_ps() { + #[repr(align(16))] + struct Align { + data: [f32; 4], // 16 bytes + } + let src = _mm_set1_ps(42.0); + let a = Align { + data: [1.0_f32, 2.0, 3.0, 4.0], + }; + let p = a.data.as_ptr(); + let m = 0b1010; + let r = _mm_mask_load_ps(src, m, black_box(p)); + let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_load_ps() { + #[repr(align(16))] + struct Align { + data: [f32; 4], // 16 bytes + } + let a = Align { + data: [1.0_f32, 2.0, 3.0, 4.0], + }; + let p = a.data.as_ptr(); + let m = 0b1010; + let r = _mm_maskz_load_ps(m, black_box(p)); + let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_storeu_ps() { + let mut r = [42_f32; 4]; + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let m = 0b1010; + _mm_mask_storeu_ps(r.as_mut_ptr(), m, a); + let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0); + assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_store_ps() { + #[repr(align(16))] + struct Align { + data: [f32; 4], // 16 bytes + } + let mut r = Align { data: [42.0; 4] }; + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let m = 0b1010; + _mm_mask_store_ps(r.data.as_mut_ptr(), m, a); + let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0); + assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_loadu_pd() { + let src = _mm_set1_pd(42.0); + let a = &[1.0_f64, 2.0]; + let p = a.as_ptr(); + let m = 0b10; + let r = _mm_mask_loadu_pd(src, m, black_box(p)); + let e = _mm_setr_pd(42.0, 2.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_loadu_pd() { + let a = &[1.0_f64, 2.0]; + let p = a.as_ptr(); + let m = 0b10; + let r = _mm_maskz_loadu_pd(m, black_box(p)); + let e = _mm_setr_pd(0.0, 2.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_load_pd() { + #[repr(align(16))] + struct Align { + data: [f64; 2], // 16 bytes + } + let src = _mm_set1_pd(42.0); + let a = Align { + data: [1.0_f64, 2.0], + }; + let p = a.data.as_ptr(); + let m = 0b10; + let r = _mm_mask_load_pd(src, m, black_box(p)); + let e = _mm_setr_pd(42.0, 2.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_load_pd() { + #[repr(align(16))] + struct Align { + data: [f64; 2], // 16 bytes + } + let a = Align { + data: [1.0_f64, 2.0], + }; + let p = a.data.as_ptr(); + let m = 0b10; + let r = _mm_maskz_load_pd(m, black_box(p)); + let e = _mm_setr_pd(0.0, 2.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_load_ss() { + #[repr(align(16))] + struct Align { + data: f32, + } + let src = _mm_set_ss(2.0); + let mem = Align { data: 1.0 }; + let r = _mm_mask_load_ss(src, 0b1, &mem.data); + assert_eq_m128(r, _mm_set_ss(1.0)); + let r = _mm_mask_load_ss(src, 0b0, &mem.data); + assert_eq_m128(r, _mm_set_ss(2.0)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_load_ss() { + #[repr(align(16))] + struct Align { + data: f32, + } + let mem = Align { data: 1.0 }; + let r = _mm_maskz_load_ss(0b1, &mem.data); + assert_eq_m128(r, _mm_set_ss(1.0)); + let r = _mm_maskz_load_ss(0b0, &mem.data); + assert_eq_m128(r, _mm_set_ss(0.0)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_load_sd() { + #[repr(align(16))] + struct Align { + data: f64, + } + let src = _mm_set_sd(2.0); + let mem = Align { data: 1.0 }; + let r = _mm_mask_load_sd(src, 0b1, &mem.data); + assert_eq_m128d(r, _mm_set_sd(1.0)); + let r = _mm_mask_load_sd(src, 0b0, &mem.data); + assert_eq_m128d(r, _mm_set_sd(2.0)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_load_sd() { + #[repr(align(16))] + struct Align { + data: f64, + } + let mem = Align { data: 1.0 }; + let r = _mm_maskz_load_sd(0b1, &mem.data); + assert_eq_m128d(r, _mm_set_sd(1.0)); + let r = _mm_maskz_load_sd(0b0, &mem.data); + assert_eq_m128d(r, _mm_set_sd(0.0)); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_storeu_pd() { + let mut r = [42_f64; 2]; + let a = _mm_setr_pd(1.0, 2.0); + let m = 0b10; + _mm_mask_storeu_pd(r.as_mut_ptr(), m, a); + let e = _mm_setr_pd(42.0, 2.0); + assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_store_pd() { + #[repr(align(16))] + struct Align { + data: [f64; 2], // 16 bytes + } + let mut r = Align { data: [42.0; 2] }; + let a = _mm_setr_pd(1.0, 2.0); + let m = 0b10; + _mm_mask_store_pd(r.data.as_mut_ptr(), m, a); + let e = _mm_setr_pd(42.0, 2.0); + assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_store_ss() { + #[repr(align(16))] + struct Align { + data: f32, + } + let a = _mm_set_ss(2.0); + let mut mem = Align { data: 1.0 }; + _mm_mask_store_ss(&mut mem.data, 0b1, a); + assert_eq!(mem.data, 2.0); + _mm_mask_store_ss(&mut mem.data, 0b0, a); + assert_eq!(mem.data, 2.0); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_store_sd() { + #[repr(align(16))] + struct Align { + data: f64, + } + let a = _mm_set_sd(2.0); + let mut mem = Align { data: 1.0 }; + _mm_mask_store_sd(&mut mem.data, 0b1, a); + assert_eq!(mem.data, 2.0); + _mm_mask_store_sd(&mut mem.data, 0b0, a); + assert_eq!(mem.data, 2.0); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr_pd() { + let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_pd() { + let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rol_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_rol_epi32::<1>(a); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rol_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_mask_rol_epi32::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rol_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + let r = _mm512_maskz_rol_epi32::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rol_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let r = _mm256_rol_epi32::<1>(a); + let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rol_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let r = _mm256_mask_rol_epi32::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a); + let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rol_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let r = _mm256_maskz_rol_epi32::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a); + let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rol_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let r = _mm_rol_epi32::<1>(a); + let e = _mm_set_epi32(1 << 0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rol_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let r = _mm_mask_rol_epi32::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a); + let e = _mm_set_epi32(1 << 0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rol_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let r = _mm_maskz_rol_epi32::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_rol_epi32::<1>(0b00001111, a); + let e = _mm_set_epi32(1 << 0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_ror_epi32() { + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_ror_epi32::<1>(a); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_ror_epi32() { + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_mask_ror_epi32::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_ror_epi32() { + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0); + let r = _mm512_maskz_ror_epi32::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_ror_epi32() { + let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + let r = _mm256_ror_epi32::<1>(a); + let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_ror_epi32() { + let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + let r = _mm256_mask_ror_epi32::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a); + let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_ror_epi32() { + let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + let r = _mm256_maskz_ror_epi32::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a); + let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_ror_epi32() { + let a = _mm_set_epi32(1 << 0, 2, 2, 2); + let r = _mm_ror_epi32::<1>(a); + let e = _mm_set_epi32(1 << 31, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_ror_epi32() { + let a = _mm_set_epi32(1 << 0, 2, 2, 2); + let r = _mm_mask_ror_epi32::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a); + let e = _mm_set_epi32(1 << 31, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_ror_epi32() { + let a = _mm_set_epi32(1 << 0, 2, 2, 2); + let r = _mm_maskz_ror_epi32::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_ror_epi32::<1>(0b00001111, a); + let e = _mm_set_epi32(1 << 31, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_slli_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_slli_epi32::<1>(a); + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_slli_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_mask_slli_epi32::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a); + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_slli_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + let r = _mm512_maskz_slli_epi32::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_slli_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let r = _mm256_mask_slli_epi32::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a); + let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_slli_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let r = _mm256_maskz_slli_epi32::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a); + let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_slli_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let r = _mm_mask_slli_epi32::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a); + let e = _mm_set_epi32(0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_slli_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let r = _mm_maskz_slli_epi32::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_slli_epi32::<1>(0b00001111, a); + let e = _mm_set_epi32(0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srli_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_srli_epi32::<1>(a); + let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srli_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_mask_srli_epi32::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a); + let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srli_epi32() { + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0); + let r = _mm512_maskz_srli_epi32::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srli_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let r = _mm256_mask_srli_epi32::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srli_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let r = _mm256_maskz_srli_epi32::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srli_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let r = _mm_mask_srli_epi32::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srli_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let r = _mm_maskz_srli_epi32::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srli_epi32::<1>(0b00001111, a); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rolv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_set1_epi32(1); + let r = _mm512_rolv_epi32(a, b); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rolv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_set1_epi32(1); + let r = _mm512_mask_rolv_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rolv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + let b = _mm512_set1_epi32(1); + let r = _mm512_maskz_rolv_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rolv_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let b = _mm256_set1_epi32(1); + let r = _mm256_rolv_epi32(a, b); + let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rolv_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let b = _mm256_set1_epi32(1); + let r = _mm256_mask_rolv_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rolv_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let b = _mm256_set1_epi32(1); + let r = _mm256_maskz_rolv_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_rolv_epi32(0b11111111, a, b); + let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rolv_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let b = _mm_set1_epi32(1); + let r = _mm_rolv_epi32(a, b); + let e = _mm_set_epi32(1 << 0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rolv_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let b = _mm_set1_epi32(1); + let r = _mm_mask_rolv_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b); + let e = _mm_set_epi32(1 << 0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rolv_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let b = _mm_set1_epi32(1); + let r = _mm_maskz_rolv_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_rolv_epi32(0b00001111, a, b); + let e = _mm_set_epi32(1 << 0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rorv_epi32() { + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let b = _mm512_set1_epi32(1); + let r = _mm512_rorv_epi32(a, b); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rorv_epi32() { + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let b = _mm512_set1_epi32(1); + let r = _mm512_mask_rorv_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rorv_epi32() { + let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0); + let b = _mm512_set1_epi32(1); + let r = _mm512_maskz_rorv_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rorv_epi32() { + let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + let b = _mm256_set1_epi32(1); + let r = _mm256_rorv_epi32(a, b); + let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rorv_epi32() { + let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + let b = _mm256_set1_epi32(1); + let r = _mm256_mask_rorv_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rorv_epi32() { + let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); + let b = _mm256_set1_epi32(1); + let r = _mm256_maskz_rorv_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_rorv_epi32(0b11111111, a, b); + let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rorv_epi32() { + let a = _mm_set_epi32(1 << 0, 2, 2, 2); + let b = _mm_set1_epi32(1); + let r = _mm_rorv_epi32(a, b); + let e = _mm_set_epi32(1 << 31, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rorv_epi32() { + let a = _mm_set_epi32(1 << 0, 2, 2, 2); + let b = _mm_set1_epi32(1); + let r = _mm_mask_rorv_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b); + let e = _mm_set_epi32(1 << 31, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rorv_epi32() { + let a = _mm_set_epi32(1 << 0, 2, 2, 2); + let b = _mm_set1_epi32(1); + let r = _mm_maskz_rorv_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_rorv_epi32(0b00001111, a, b); + let e = _mm_set_epi32(1 << 31, 1, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sllv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let count = _mm512_set1_epi32(1); + let r = _mm512_sllv_epi32(a, count); + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sllv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let count = _mm512_set1_epi32(1); + let r = _mm512_mask_sllv_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sllv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_maskz_sllv_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sllv_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let count = _mm256_set1_epi32(1); + let r = _mm256_mask_sllv_epi32(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count); + let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sllv_epi32() { + let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); + let count = _mm256_set1_epi32(1); + let r = _mm256_maskz_sllv_epi32(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sllv_epi32(0b11111111, a, count); + let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sllv_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let count = _mm_set1_epi32(1); + let r = _mm_mask_sllv_epi32(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count); + let e = _mm_set_epi32(0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sllv_epi32() { + let a = _mm_set_epi32(1 << 31, 1, 1, 1); + let count = _mm_set1_epi32(1); + let r = _mm_maskz_sllv_epi32(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sllv_epi32(0b00001111, a, count); + let e = _mm_set_epi32(0, 2, 2, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srlv_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let count = _mm512_set1_epi32(1); + let r = _mm512_srlv_epi32(a, count); + let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srlv_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let count = _mm512_set1_epi32(1); + let r = _mm512_mask_srlv_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srlv_epi32() { + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0); + let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_maskz_srlv_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srlv_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let count = _mm256_set1_epi32(1); + let r = _mm256_mask_srlv_epi32(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srlv_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let count = _mm256_set1_epi32(1); + let r = _mm256_maskz_srlv_epi32(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srlv_epi32(0b11111111, a, count); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srlv_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let count = _mm_set1_epi32(1); + let r = _mm_mask_srlv_epi32(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srlv_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let count = _mm_set1_epi32(1); + let r = _mm_maskz_srlv_epi32(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srlv_epi32(0b00001111, a, count); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sll_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 31, 1 << 0, 1 << 1, 1 << 2, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_sll_epi32(a, count); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 0, 1 << 2, 1 << 3, 1 << 4, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sll_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 31, 1 << 0, 1 << 1, 1 << 2, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_mask_sll_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 0, 1 << 2, 1 << 3, 1 << 4, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sll_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 31, 1 << 0, 1 << 1, 1 << 2, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 31, + ); + let count = _mm_set_epi32(2, 0, 0, 2); + let r = _mm512_maskz_sll_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sll_epi32() { + let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm256_mask_sll_epi32(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count); + let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sll_epi32() { + let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm256_maskz_sll_epi32(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sll_epi32(0b11111111, a, count); + let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sll_epi32() { + let a = _mm_set_epi32(1 << 13, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm_mask_sll_epi32(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sll_epi32(a, 0b00001111, a, count); + let e = _mm_set_epi32(1 << 14, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sll_epi32() { + let a = _mm_set_epi32(1 << 13, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm_maskz_sll_epi32(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sll_epi32(0b00001111, a, count); + let e = _mm_set_epi32(1 << 14, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srl_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 31, 1 << 0, 1 << 1, 1 << 2, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_srl_epi32(a, count); + let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srl_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 31, 1 << 0, 1 << 1, 1 << 2, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_mask_srl_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srl_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 31, 1 << 0, 1 << 1, 1 << 2, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 31, + ); + let count = _mm_set_epi32(2, 0, 0, 2); + let r = _mm512_maskz_srl_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srl_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm256_mask_srl_epi32(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srl_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm256_maskz_srl_epi32(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srl_epi32(0b11111111, a, count); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srl_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm_mask_srl_epi32(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srl_epi32(a, 0b00001111, a, count); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srl_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm_maskz_srl_epi32(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srl_epi32(0b00001111, a, count); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sra_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let count = _mm_set_epi32(1, 0, 0, 2); + let r = _mm512_sra_epi32(a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sra_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_mask_sra_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sra_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14); + let count = _mm_set_epi32(2, 0, 0, 2); + let r = _mm512_maskz_sra_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sra_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm256_mask_sra_epi32(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sra_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm256_maskz_sra_epi32(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sra_epi32(0b11111111, a, count); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sra_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm_mask_sra_epi32(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sra_epi32(a, 0b00001111, a, count); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sra_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 1); + let r = _mm_maskz_sra_epi32(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sra_epi32(0b00001111, a, count); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srav_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r = _mm512_srav_epi32(a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srav_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16); + let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let r = _mm512_mask_srav_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srav_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14); + let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2); + let r = _mm512_maskz_srav_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srav_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let count = _mm256_set1_epi32(1); + let r = _mm256_mask_srav_epi32(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srav_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let count = _mm256_set1_epi32(1); + let r = _mm256_maskz_srav_epi32(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srav_epi32(0b11111111, a, count); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srav_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let count = _mm_set1_epi32(1); + let r = _mm_mask_srav_epi32(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srav_epi32(a, 0b00001111, a, count); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srav_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let count = _mm_set1_epi32(1); + let r = _mm_maskz_srav_epi32(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srav_epi32(0b00001111, a, count); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srai_epi32() { + let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15); + let r = _mm512_srai_epi32::<2>(a); + let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srai_epi32() { + let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); + let r = _mm512_mask_srai_epi32::<2>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a); + let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srai_epi32() { + let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); + let r = _mm512_maskz_srai_epi32::<2>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srai_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let r = _mm256_mask_srai_epi32::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srai_epi32() { + let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); + let r = _mm256_maskz_srai_epi32::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a); + let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srai_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let r = _mm_mask_srai_epi32::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srai_epi32() { + let a = _mm_set_epi32(1 << 5, 0, 0, 0); + let r = _mm_maskz_srai_epi32::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srai_epi32::<1>(0b00001111, a); + let e = _mm_set_epi32(1 << 4, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permute_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_permute_ps::<0b11_11_11_11>(a); + let e = _mm512_setr_ps( + 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permute_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a); + let e = _mm512_setr_ps( + 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permute_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a); + let e = _mm512_setr_ps( + 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permute_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a); + assert_eq_m256(r, a); + let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a); + let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permute_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a); + let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permute_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a); + assert_eq_m128(r, a); + let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a); + let e = _mm_set_ps(0., 0., 0., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permute_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a); + let e = _mm_set_ps(0., 0., 0., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutevar_epi32() { + let idx = _mm512_set1_epi32(1); + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_permutevar_epi32(idx, a); + let e = _mm512_set1_epi32(14); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutevar_epi32() { + let idx = _mm512_set1_epi32(1); + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_mask_permutevar_epi32(a, 0, idx, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a); + let e = _mm512_set1_epi32(14); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutevar_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_set1_epi32(0b01); + let r = _mm512_permutevar_ps(a, b); + let e = _mm512_set_ps( + 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutevar_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_set1_epi32(0b01); + let r = _mm512_mask_permutevar_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b); + let e = _mm512_set_ps( + 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutevar_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_set1_epi32(0b01); + let r = _mm512_maskz_permutevar_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutevar_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm256_set1_epi32(0b01); + let r = _mm256_mask_permutevar_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b); + let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutevar_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm256_set1_epi32(0b01); + let r = _mm256_maskz_permutevar_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_permutevar_ps(0b11111111, a, b); + let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permutevar_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set1_epi32(0b01); + let r = _mm_mask_permutevar_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(2., 2., 2., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permutevar_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set1_epi32(0b01); + let r = _mm_maskz_permutevar_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_permutevar_ps(0b00001111, a, b); + let e = _mm_set_ps(2., 2., 2., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutexvar_epi32() { + let idx = _mm512_set1_epi32(1); + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_permutexvar_epi32(idx, a); + let e = _mm512_set1_epi32(14); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutexvar_epi32() { + let idx = _mm512_set1_epi32(1); + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a); + let e = _mm512_set1_epi32(14); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutexvar_epi32() { + let idx = _mm512_set1_epi32(1); + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_permutexvar_epi32(0, idx, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutexvar_epi32() { + let idx = _mm256_set1_epi32(1); + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_permutexvar_epi32(idx, a); + let e = _mm256_set1_epi32(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutexvar_epi32() { + let idx = _mm256_set1_epi32(1); + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a); + let e = _mm256_set1_epi32(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutexvar_epi32() { + let idx = _mm256_set1_epi32(1); + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_maskz_permutexvar_epi32(0, idx, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a); + let e = _mm256_set1_epi32(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutexvar_ps() { + let idx = _mm512_set1_epi32(1); + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_permutexvar_ps(idx, a); + let e = _mm512_set1_ps(14.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutexvar_ps() { + let idx = _mm512_set1_epi32(1); + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_mask_permutexvar_ps(a, 0, idx, a); + assert_eq_m512(r, a); + let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a); + let e = _mm512_set1_ps(14.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutexvar_ps() { + let idx = _mm512_set1_epi32(1); + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_maskz_permutexvar_ps(0, idx, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutexvar_ps() { + let idx = _mm256_set1_epi32(1); + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm256_permutexvar_ps(idx, a); + let e = _mm256_set1_ps(6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutexvar_ps() { + let idx = _mm256_set1_epi32(1); + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm256_mask_permutexvar_ps(a, 0, idx, a); + assert_eq_m256(r, a); + let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a); + let e = _mm256_set1_ps(6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutexvar_ps() { + let idx = _mm256_set1_epi32(1); + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm256_maskz_permutexvar_ps(0, idx, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a); + let e = _mm256_set1_ps(6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex2var_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm512_set_epi32( + 1, 1 << 4, 2, 1 << 4, + 3, 1 << 4, 4, 1 << 4, + 5, 1 << 4, 6, 1 << 4, + 7, 1 << 4, 8, 1 << 4, + ); + let b = _mm512_set1_epi32(100); + let r = _mm512_permutex2var_epi32(a, idx, b); + let e = _mm512_set_epi32( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex2var_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm512_set_epi32( + 1, 1 << 4, 2, 1 << 4, + 3, 1 << 4, 4, 1 << 4, + 5, 1 << 4, 6, 1 << 4, + 7, 1 << 4, 8, 1 << 4, + ); + let b = _mm512_set1_epi32(100); + let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b); + let e = _mm512_set_epi32( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex2var_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm512_set_epi32( + 1, 1 << 4, 2, 1 << 4, + 3, 1 << 4, 4, 1 << 4, + 5, 1 << 4, 6, 1 << 4, + 7, 1 << 4, 8, 1 << 4, + ); + let b = _mm512_set1_epi32(100); + let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask2_permutex2var_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm512_set_epi32( + 1000, 1 << 4, 2000, 1 << 4, + 3000, 1 << 4, 4000, 1 << 4, + 5, 1 << 4, 6, 1 << 4, + 7, 1 << 4, 8, 1 << 4, + ); + let b = _mm512_set1_epi32(100); + let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b); + assert_eq_m512i(r, idx); + let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 1000, 1 << 4, 2000, 1 << 4, + 3000, 1 << 4, 4000, 1 << 4, + 10, 100, 9, 100, + 8, 100, 7, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex2var_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm256_set1_epi32(100); + let r = _mm256_permutex2var_epi32(a, idx, b); + let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex2var_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm256_set1_epi32(100); + let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b); + let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutex2var_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm256_set1_epi32(100); + let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b); + let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask2_permutex2var_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm256_set1_epi32(100); + let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b); + assert_eq_m256i(r, idx); + let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b); + let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_permutex2var_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2); + let b = _mm_set1_epi32(100); + let r = _mm_permutex2var_epi32(a, idx, b); + let e = _mm_set_epi32(2, 100, 1, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permutex2var_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2); + let b = _mm_set1_epi32(100); + let r = _mm_mask_permutex2var_epi32(a, 0, idx, b); + assert_eq_m128i(r, a); + let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b); + let e = _mm_set_epi32(2, 100, 1, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permutex2var_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2); + let b = _mm_set1_epi32(100); + let r = _mm_maskz_permutex2var_epi32(0, a, idx, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b); + let e = _mm_set_epi32(2, 100, 1, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask2_permutex2var_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2); + let b = _mm_set1_epi32(100); + let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b); + assert_eq_m128i(r, idx); + let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b); + let e = _mm_set_epi32(2, 100, 1, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex2var_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + #[rustfmt::skip] + let idx = _mm512_set_epi32( + 1, 1 << 4, 2, 1 << 4, + 3, 1 << 4, 4, 1 << 4, + 5, 1 << 4, 6, 1 << 4, + 7, 1 << 4, 8, 1 << 4, + ); + let b = _mm512_set1_ps(100.); + let r = _mm512_permutex2var_ps(a, idx, b); + let e = _mm512_set_ps( + 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex2var_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + #[rustfmt::skip] + let idx = _mm512_set_epi32( + 1, 1 << 4, 2, 1 << 4, + 3, 1 << 4, 4, 1 << 4, + 5, 1 << 4, 6, 1 << 4, + 7, 1 << 4, 8, 1 << 4, + ); + let b = _mm512_set1_ps(100.); + let r = _mm512_mask_permutex2var_ps(a, 0, idx, b); + assert_eq_m512(r, a); + let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b); + let e = _mm512_set_ps( + 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex2var_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + #[rustfmt::skip] + let idx = _mm512_set_epi32( + 1, 1 << 4, 2, 1 << 4, + 3, 1 << 4, 4, 1 << 4, + 5, 1 << 4, 6, 1 << 4, + 7, 1 << 4, 8, 1 << 4, + ); + let b = _mm512_set1_ps(100.); + let r = _mm512_maskz_permutex2var_ps(0, a, idx, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask2_permutex2var_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + #[rustfmt::skip] + let idx = _mm512_set_epi32( + 1, 1 << 4, 2, 1 << 4, + 3, 1 << 4, 4, 1 << 4, + 5, 1 << 4, 6, 1 << 4, + 7, 1 << 4, 8, 1 << 4, + ); + let b = _mm512_set1_ps(100.); + let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b); + assert_eq_m512(r, _mm512_castsi512_ps(idx)); + let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b); + let e = _mm512_set_ps( + 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex2var_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm256_set1_ps(100.); + let r = _mm256_permutex2var_ps(a, idx, b); + let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex2var_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm256_set1_ps(100.); + let r = _mm256_mask_permutex2var_ps(a, 0, idx, b); + assert_eq_m256(r, a); + let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b); + let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutex2var_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm256_set1_ps(100.); + let r = _mm256_maskz_permutex2var_ps(0, a, idx, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b); + let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask2_permutex2var_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm256_set1_ps(100.); + let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b); + assert_eq_m256(r, _mm256_castsi256_ps(idx)); + let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b); + let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_permutex2var_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2); + let b = _mm_set1_ps(100.); + let r = _mm_permutex2var_ps(a, idx, b); + let e = _mm_set_ps(2., 100., 1., 100.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permutex2var_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2); + let b = _mm_set1_ps(100.); + let r = _mm_mask_permutex2var_ps(a, 0, idx, b); + assert_eq_m128(r, a); + let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b); + let e = _mm_set_ps(2., 100., 1., 100.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permutex2var_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2); + let b = _mm_set1_ps(100.); + let r = _mm_maskz_permutex2var_ps(0, a, idx, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b); + let e = _mm_set_ps(2., 100., 1., 100.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask2_permutex2var_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2); + let b = _mm_set1_ps(100.); + let r = _mm_mask2_permutex2var_ps(a, idx, 0, b); + assert_eq_m128(r, _mm_castsi128_ps(idx)); + let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b); + let e = _mm_set_ps(2., 100., 1., 100.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_epi32() { + let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); + let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a); + let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_epi32() { + let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); + let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a); + let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_epi32() { + let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); + let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a); + let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_epi32() { + let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); + let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a); + let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_epi32() { + let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); + let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a); + let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_shuffle_epi32() { + let a = _mm_set_epi32(1, 4, 5, 8); + let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a); + let e = _mm_set_epi32(8, 8, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_shuffle_epi32() { + let a = _mm_set_epi32(1, 4, 5, 8); + let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a); + let e = _mm_set_epi32(8, 8, 1, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_ps() { + let a = _mm512_setr_ps( + 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16., + ); + let b = _mm512_setr_ps( + 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., + ); + let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b); + let e = _mm512_setr_ps( + 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_ps() { + let a = _mm512_setr_ps( + 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16., + ); + let b = _mm512_setr_ps( + 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., + ); + let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b); + let e = _mm512_setr_ps( + 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_ps() { + let a = _mm512_setr_ps( + 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16., + ); + let b = _mm512_setr_ps( + 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., + ); + let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_ps() { + let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b); + let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_ps() { + let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b); + let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_shuffle_ps() { + let a = _mm_set_ps(1., 4., 5., 8.); + let b = _mm_set_ps(2., 3., 6., 7.); + let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b); + let e = _mm_set_ps(7., 7., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_shuffle_ps() { + let a = _mm_set_ps(1., 4., 5., 8.); + let b = _mm_set_ps(2., 3., 6., 7.); + let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b); + let e = _mm_set_ps(7., 7., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_i32x4() { + let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b); + let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_i32x4() { + let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b); + let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_i32x4() { + let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_shuffle_i32x4() { + let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm256_shuffle_i32x4::<0b00>(a, b); + let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_i32x4() { + let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b); + let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_i32x4() { + let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b); + let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_f32x4() { + let a = _mm512_setr_ps( + 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16., + ); + let b = _mm512_setr_ps( + 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., + ); + let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b); + let e = _mm512_setr_ps( + 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_f32x4() { + let a = _mm512_setr_ps( + 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16., + ); + let b = _mm512_setr_ps( + 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., + ); + let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b); + let e = _mm512_setr_ps( + 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_f32x4() { + let a = _mm512_setr_ps( + 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16., + ); + let b = _mm512_setr_ps( + 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., + ); + let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_shuffle_f32x4() { + let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_shuffle_f32x4::<0b00>(a, b); + let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_f32x4() { + let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b); + let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_f32x4() { + let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b); + let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_extractf32x4_ps() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_extractf32x4_ps::<1>(a); + let e = _mm_setr_ps(5., 6., 7., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_extractf32x4_ps() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let src = _mm_set1_ps(100.); + let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a); + assert_eq_m128(r, src); + let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a); + let e = _mm_setr_ps(5., 6., 7., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_extractf32x4_ps() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_maskz_extractf32x4_ps::<1>(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a); + let e = _mm_setr_ps(5., 0., 0., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_extractf32x4_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_extractf32x4_ps::<1>(a); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_extractf32x4_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let src = _mm_set1_ps(100.); + let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a); + assert_eq_m128(r, src); + let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_extractf32x4_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_maskz_extractf32x4_ps::<1>(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a); + let e = _mm_set_ps(1., 2., 3., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_extracti32x4_epi32() { + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_extracti32x4_epi32::<1>(a); + let e = _mm_setr_epi32(5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_extracti32x4_epi32() { + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let src = _mm_set1_epi32(100); + let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a); + let e = _mm_setr_epi32(5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm512_maskz_extracti32x4_epi32() { + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a); + let e = _mm_setr_epi32(5, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_extracti32x4_epi32() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_extracti32x4_epi32::<1>(a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_extracti32x4_epi32() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set1_epi32(100); + let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_extracti32x4_epi32() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_moveldup_ps() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_moveldup_ps(a); + let e = _mm512_setr_ps( + 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_moveldup_ps() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_mask_moveldup_ps(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a); + let e = _mm512_setr_ps( + 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_moveldup_ps() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_maskz_moveldup_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a); + let e = _mm512_setr_ps( + 1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_moveldup_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_moveldup_ps(a, 0, a); + assert_eq_m256(r, a); + let r = _mm256_mask_moveldup_ps(a, 0b11111111, a); + let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_moveldup_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_maskz_moveldup_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_moveldup_ps(0b11111111, a); + let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_moveldup_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_moveldup_ps(a, 0, a); + assert_eq_m128(r, a); + let r = _mm_mask_moveldup_ps(a, 0b00001111, a); + let e = _mm_set_ps(2., 2., 4., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_moveldup_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_maskz_moveldup_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_moveldup_ps(0b00001111, a); + let e = _mm_set_ps(2., 2., 4., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_movehdup_ps() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_movehdup_ps(a); + let e = _mm512_setr_ps( + 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_movehdup_ps() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_mask_movehdup_ps(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a); + let e = _mm512_setr_ps( + 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_movehdup_ps() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_maskz_movehdup_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a); + let e = _mm512_setr_ps( + 2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_movehdup_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_movehdup_ps(a, 0, a); + assert_eq_m256(r, a); + let r = _mm256_mask_movehdup_ps(a, 0b11111111, a); + let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_movehdup_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_maskz_movehdup_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_movehdup_ps(0b11111111, a); + let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_movehdup_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_movehdup_ps(a, 0, a); + assert_eq_m128(r, a); + let r = _mm_mask_movehdup_ps(a, 0b00001111, a); + let e = _mm_set_ps(1., 1., 3., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_movehdup_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_maskz_movehdup_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_movehdup_ps(0b00001111, a); + let e = _mm_set_ps(1., 1., 3., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_inserti32x4() { + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm_setr_epi32(17, 18, 19, 20); + let r = _mm512_inserti32x4::<0>(a, b); + let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_inserti32x4() { + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm_setr_epi32(17, 18, 19, 20); + let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b); + let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_inserti32x4() { + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm_setr_epi32(17, 18, 19, 20); + let r = _mm512_maskz_inserti32x4::<0>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_inserti32x4() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi32(17, 18, 19, 20); + let r = _mm256_inserti32x4::<1>(a, b); + let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_inserti32x4() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi32(17, 18, 19, 20); + let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b); + let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_inserti32x4() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_set_epi32(17, 18, 19, 20); + let r = _mm256_maskz_inserti32x4::<0>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b); + let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_insertf32x4() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm_setr_ps(17., 18., 19., 20.); + let r = _mm512_insertf32x4::<0>(a, b); + let e = _mm512_setr_ps( + 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_insertf32x4() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm_setr_ps(17., 18., 19., 20.); + let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b); + let e = _mm512_setr_ps( + 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_insertf32x4() { + let a = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm_setr_ps(17., 18., 19., 20.); + let r = _mm512_maskz_insertf32x4::<0>(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_insertf32x4() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm_set_ps(17., 18., 19., 20.); + let r = _mm256_insertf32x4::<1>(a, b); + let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_insertf32x4() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm_set_ps(17., 18., 19., 20.); + let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b); + let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_insertf32x4() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm_set_ps(17., 18., 19., 20.); + let r = _mm256_maskz_insertf32x4::<0>(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b); + let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castps128_ps512() { + let a = _mm_setr_ps(17., 18., 19., 20.); + let r = _mm512_castps128_ps512(a); + assert_eq_m128(_mm512_castps512_ps128(r), a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castps256_ps512() { + let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_castps256_ps512(a); + assert_eq_m256(_mm512_castps512_ps256(r), a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextps128_ps512() { + let a = _mm_setr_ps(17., 18., 19., 20.); + let r = _mm512_zextps128_ps512(a); + let e = _mm512_setr_ps( + 17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextps256_ps512() { + let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_zextps256_ps512(a); + let e = _mm512_setr_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castps512_ps128() { + let a = _mm512_setr_ps( + 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., + ); + let r = _mm512_castps512_ps128(a); + let e = _mm_setr_ps(17., 18., 19., 20.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castps512_ps256() { + let a = _mm512_setr_ps( + 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1., + ); + let r = _mm512_castps512_ps256(a); + let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castps_pd() { + let a = _mm512_set1_ps(1.); + let r = _mm512_castps_pd(a); + let e = _mm512_set1_pd(0.007812501848093234); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castps_si512() { + let a = _mm512_set1_ps(1.); + let r = _mm512_castps_si512(a); + let e = _mm512_set1_epi32(1065353216); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcastd_epi32() { + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm512_broadcastd_epi32(a); + let e = _mm512_set1_epi32(20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcastd_epi32() { + let src = _mm512_set1_epi32(20); + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm512_mask_broadcastd_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a); + let e = _mm512_set1_epi32(20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcastd_epi32() { + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm512_maskz_broadcastd_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a); + let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_broadcastd_epi32() { + let src = _mm256_set1_epi32(20); + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm256_mask_broadcastd_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a); + let e = _mm256_set1_epi32(20); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_broadcastd_epi32() { + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm256_maskz_broadcastd_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_broadcastd_epi32(0b11111111, a); + let e = _mm256_set1_epi32(20); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_broadcastd_epi32() { + let src = _mm_set1_epi32(20); + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm_mask_broadcastd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a); + let e = _mm_set1_epi32(20); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_broadcastd_epi32() { + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm_maskz_broadcastd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_broadcastd_epi32(0b00001111, a); + let e = _mm_set1_epi32(20); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcastss_ps() { + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm512_broadcastss_ps(a); + let e = _mm512_set1_ps(20.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcastss_ps() { + let src = _mm512_set1_ps(20.); + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm512_mask_broadcastss_ps(src, 0, a); + assert_eq_m512(r, src); + let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a); + let e = _mm512_set1_ps(20.); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcastss_ps() { + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm512_maskz_broadcastss_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a); + let e = _mm512_setr_ps( + 20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_broadcastss_ps() { + let src = _mm256_set1_ps(20.); + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm256_mask_broadcastss_ps(src, 0, a); + assert_eq_m256(r, src); + let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a); + let e = _mm256_set1_ps(20.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_broadcastss_ps() { + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm256_maskz_broadcastss_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_broadcastss_ps(0b11111111, a); + let e = _mm256_set1_ps(20.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_broadcastss_ps() { + let src = _mm_set1_ps(20.); + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm_mask_broadcastss_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_broadcastss_ps(src, 0b00001111, a); + let e = _mm_set1_ps(20.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_broadcastss_ps() { + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm_maskz_broadcastss_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_broadcastss_ps(0b00001111, a); + let e = _mm_set1_ps(20.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcast_i32x4() { + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm512_broadcast_i32x4(a); + let e = _mm512_set_epi32( + 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcast_i32x4() { + let src = _mm512_set1_epi32(20); + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm512_mask_broadcast_i32x4(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a); + let e = _mm512_set_epi32( + 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcast_i32x4() { + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm512_maskz_broadcast_i32x4(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_broadcast_i32x4() { + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm256_broadcast_i32x4(a); + let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_broadcast_i32x4() { + let src = _mm256_set1_epi32(20); + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm256_mask_broadcast_i32x4(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a); + let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_broadcast_i32x4() { + let a = _mm_set_epi32(17, 18, 19, 20); + let r = _mm256_maskz_broadcast_i32x4(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_broadcast_i32x4(0b11111111, a); + let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcast_f32x4() { + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm512_broadcast_f32x4(a); + let e = _mm512_set_ps( + 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcast_f32x4() { + let src = _mm512_set1_ps(20.); + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm512_mask_broadcast_f32x4(src, 0, a); + assert_eq_m512(r, src); + let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a); + let e = _mm512_set_ps( + 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcast_f32x4() { + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm512_maskz_broadcast_f32x4(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_broadcast_f32x4() { + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm256_broadcast_f32x4(a); + let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_broadcast_f32x4() { + let src = _mm256_set1_ps(20.); + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm256_mask_broadcast_f32x4(src, 0, a); + assert_eq_m256(r, src); + let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a); + let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_broadcast_f32x4() { + let a = _mm_set_ps(17., 18., 19., 20.); + let r = _mm256_maskz_broadcast_f32x4(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_broadcast_f32x4(0b11111111, a); + let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_blend_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_set1_epi32(2); + let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b); + let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_blend_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set1_epi32(2); + let r = _mm256_mask_blend_epi32(0b11111111, a, b); + let e = _mm256_set1_epi32(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_blend_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set1_epi32(2); + let r = _mm_mask_blend_epi32(0b00001111, a, b); + let e = _mm_set1_epi32(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_blend_ps() { + let a = _mm512_set1_ps(1.); + let b = _mm512_set1_ps(2.); + let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b); + let e = _mm512_set_ps( + 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_blend_ps() { + let a = _mm256_set1_ps(1.); + let b = _mm256_set1_ps(2.); + let r = _mm256_mask_blend_ps(0b11111111, a, b); + let e = _mm256_set1_ps(2.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_blend_ps() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let r = _mm_mask_blend_ps(0b00001111, a, b); + let e = _mm_set1_ps(2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpackhi_epi32() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm512_set_epi32( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_unpackhi_epi32(a, b); + let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpackhi_epi32() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm512_set_epi32( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_mask_unpackhi_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b); + let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpackhi_epi32() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm512_set_epi32( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_maskz_unpackhi_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpackhi_epi32() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm256_mask_unpackhi_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpackhi_epi32() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm256_maskz_unpackhi_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b); + let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpackhi_epi32() { + let a = _mm_set_epi32(1, 2, 3, 4); + let b = _mm_set_epi32(17, 18, 19, 20); + let r = _mm_mask_unpackhi_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b); + let e = _mm_set_epi32(17, 1, 18, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpackhi_epi32() { + let a = _mm_set_epi32(1, 2, 3, 4); + let b = _mm_set_epi32(17, 18, 19, 20); + let r = _mm_maskz_unpackhi_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b); + let e = _mm_set_epi32(17, 1, 18, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpackhi_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = _mm512_unpackhi_ps(a, b); + let e = _mm512_set_ps( + 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpackhi_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = _mm512_mask_unpackhi_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b); + let e = _mm512_set_ps( + 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpackhi_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = _mm512_maskz_unpackhi_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpackhi_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm256_mask_unpackhi_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b); + let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpackhi_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm256_maskz_unpackhi_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b); + let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpackhi_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ps(17., 18., 19., 20.); + let r = _mm_mask_unpackhi_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(17., 1., 18., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpackhi_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ps(17., 18., 19., 20.); + let r = _mm_maskz_unpackhi_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_unpackhi_ps(0b00001111, a, b); + let e = _mm_set_ps(17., 1., 18., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpacklo_epi32() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm512_set_epi32( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_unpacklo_epi32(a, b); + let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpacklo_epi32() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm512_set_epi32( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_mask_unpacklo_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b); + let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpacklo_epi32() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm512_set_epi32( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_maskz_unpacklo_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpacklo_epi32() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm256_mask_unpacklo_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b); + let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpacklo_epi32() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm256_maskz_unpacklo_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b); + let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpacklo_epi32() { + let a = _mm_set_epi32(1, 2, 3, 4); + let b = _mm_set_epi32(17, 18, 19, 20); + let r = _mm_mask_unpacklo_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b); + let e = _mm_set_epi32(19, 3, 20, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpacklo_epi32() { + let a = _mm_set_epi32(1, 2, 3, 4); + let b = _mm_set_epi32(17, 18, 19, 20); + let r = _mm_maskz_unpacklo_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b); + let e = _mm_set_epi32(19, 3, 20, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpacklo_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = _mm512_unpacklo_ps(a, b); + let e = _mm512_set_ps( + 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpacklo_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = _mm512_mask_unpacklo_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b); + let e = _mm512_set_ps( + 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpacklo_ps() { + let a = _mm512_set_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let b = _mm512_set_ps( + 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = _mm512_maskz_unpacklo_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpacklo_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm256_mask_unpacklo_ps(a, 0, a, b); + assert_eq_m256(r, a); + let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b); + let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpacklo_ps() { + let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm256_maskz_unpacklo_ps(0, a, b); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b); + let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpacklo_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ps(17., 18., 19., 20.); + let r = _mm_mask_unpacklo_ps(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b); + let e = _mm_set_ps(19., 3., 20., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpacklo_ps() { + let a = _mm_set_ps(1., 2., 3., 4.); + let b = _mm_set_ps(17., 18., 19., 20.); + let r = _mm_maskz_unpacklo_ps(0, a, b); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_unpacklo_ps(0b00001111, a, b); + let e = _mm_set_ps(19., 3., 20., 4.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_alignr_epi32() { + let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm512_set_epi32( + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + ); + let r = _mm512_alignr_epi32::<0>(a, b); + assert_eq_m512i(r, b); + let r = _mm512_alignr_epi32::<16>(a, b); + assert_eq_m512i(r, b); + let r = _mm512_alignr_epi32::<1>(a, b); + let e = _mm512_set_epi32( + 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_alignr_epi32() { + let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm512_set_epi32( + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + ); + let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b); + let e = _mm512_set_epi32( + 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_alignr_epi32() { + let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm512_set_epi32( + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + ); + let r = _mm512_maskz_alignr_epi32::<1>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_alignr_epi32() { + let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9); + let r = _mm256_alignr_epi32::<0>(a, b); + assert_eq_m256i(r, b); + let r = _mm256_alignr_epi32::<1>(a, b); + let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_alignr_epi32() { + let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9); + let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b); + let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_alignr_epi32() { + let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9); + let r = _mm256_maskz_alignr_epi32::<1>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b); + let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_alignr_epi32() { + let a = _mm_set_epi32(4, 3, 2, 1); + let b = _mm_set_epi32(8, 7, 6, 5); + let r = _mm_alignr_epi32::<0>(a, b); + assert_eq_m128i(r, b); + let r = _mm_alignr_epi32::<1>(a, b); + let e = _mm_set_epi32(1, 8, 7, 6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_alignr_epi32() { + let a = _mm_set_epi32(4, 3, 2, 1); + let b = _mm_set_epi32(8, 7, 6, 5); + let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b); + let e = _mm_set_epi32(1, 8, 7, 6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_alignr_epi32() { + let a = _mm_set_epi32(4, 3, 2, 1); + let b = _mm_set_epi32(8, 7, 6, 5); + let r = _mm_maskz_alignr_epi32::<1>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b); + let e = _mm_set_epi32(1, 8, 7, 6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_and_epi32(a, b); + let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_and_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_mask_and_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_and_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_maskz_and_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_and_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_mask_and_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_and_epi32(a, 0b11111111, a, b); + let e = _mm256_set1_epi32(1 << 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_and_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_maskz_and_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_and_epi32(0b11111111, a, b); + let e = _mm256_set1_epi32(1 << 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_and_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_mask_and_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_and_epi32(a, 0b00001111, a, b); + let e = _mm_set1_epi32(1 << 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_and_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_maskz_and_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_and_epi32(0b00001111, a, b); + let e = _mm_set1_epi32(1 << 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_si512() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_and_epi32(a, b); + let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_or_epi32(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_or_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_mask_or_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_or_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_maskz_or_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_or_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_or_epi32(a, b); + let e = _mm256_set1_epi32(1 << 1 | 1 << 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_or_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_mask_or_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_or_epi32(a, 0b11111111, a, b); + let e = _mm256_set1_epi32(1 << 1 | 1 << 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_or_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_maskz_or_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_or_epi32(0b11111111, a, b); + let e = _mm256_set1_epi32(1 << 1 | 1 << 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_or_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_or_epi32(a, b); + let e = _mm_set1_epi32(1 << 1 | 1 << 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_or_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_mask_or_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_or_epi32(a, 0b00001111, a, b); + let e = _mm_set1_epi32(1 << 1 | 1 << 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_or_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_maskz_or_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_or_epi32(0b00001111, a, b); + let e = _mm_set1_epi32(1 << 1 | 1 << 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_si512() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_or_epi32(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_xor_epi32(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_xor_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_mask_xor_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_xor_epi32() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_maskz_xor_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_xor_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_xor_epi32(a, b); + let e = _mm256_set1_epi32(1 << 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_xor_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_mask_xor_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b); + let e = _mm256_set1_epi32(1 << 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_xor_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_maskz_xor_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_xor_epi32(0b11111111, a, b); + let e = _mm256_set1_epi32(1 << 2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_xor_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_xor_epi32(a, b); + let e = _mm_set1_epi32(1 << 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_xor_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_mask_xor_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_xor_epi32(a, 0b00001111, a, b); + let e = _mm_set1_epi32(1 << 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_xor_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_maskz_xor_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_xor_epi32(0b00001111, a, b); + let e = _mm_set1_epi32(1 << 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_si512() { + #[rustfmt::skip] + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 3, + ); + #[rustfmt::skip] + let b = _mm512_set_epi32( + 1 << 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 3 | 1 << 4, + ); + let r = _mm512_xor_epi32(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 1 << 2, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_andnot_epi32() { + let a = _mm512_set1_epi32(0); + let b = _mm512_set1_epi32(1 << 3 | 1 << 4); + let r = _mm512_andnot_epi32(a, b); + let e = _mm512_set1_epi32(1 << 3 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_andnot_epi32() { + let a = _mm512_set1_epi32(1 << 1 | 1 << 2); + let b = _mm512_set1_epi32(1 << 3 | 1 << 4); + let r = _mm512_mask_andnot_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(1 << 3 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_andnot_epi32() { + let a = _mm512_set1_epi32(1 << 1 | 1 << 2); + let b = _mm512_set1_epi32(1 << 3 | 1 << 4); + let r = _mm512_maskz_andnot_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi32( + 0, 0, 0, 0, + 0, 0, 0, 0, + 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, + 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_andnot_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 3 | 1 << 4); + let r = _mm256_mask_andnot_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b); + let e = _mm256_set1_epi32(1 << 3 | 1 << 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_andnot_epi32() { + let a = _mm256_set1_epi32(1 << 1 | 1 << 2); + let b = _mm256_set1_epi32(1 << 3 | 1 << 4); + let r = _mm256_maskz_andnot_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_andnot_epi32(0b11111111, a, b); + let e = _mm256_set1_epi32(1 << 3 | 1 << 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_andnot_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 3 | 1 << 4); + let r = _mm_mask_andnot_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b); + let e = _mm_set1_epi32(1 << 3 | 1 << 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_andnot_epi32() { + let a = _mm_set1_epi32(1 << 1 | 1 << 2); + let b = _mm_set1_epi32(1 << 3 | 1 << 4); + let r = _mm_maskz_andnot_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_andnot_epi32(0b00001111, a, b); + let e = _mm_set1_epi32(1 << 3 | 1 << 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_cvtmask16_u32() { + let a: __mmask16 = 0b11001100_00110011; + let r = _cvtmask16_u32(a); + let e: u32 = 0b11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_cvtu32_mask16() { + let a: u32 = 0b11001100_00110011; + let r = _cvtu32_mask16(a); + let e: __mmask16 = 0b11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kand() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b11001100_00110011; + let r = _mm512_kand(a, b); + let e: u16 = 0b11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_kand_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b11001100_00110011; + let r = _kand_mask16(a, b); + let e: u16 = 0b11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kor() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kor(a, b); + let e: u16 = 0b11101110_00111011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_kor_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _kor_mask16(a, b); + let e: u16 = 0b11101110_00111011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kxor() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kxor(a, b); + let e: u16 = 0b11100010_00111000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_kxor_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _kxor_mask16(a, b); + let e: u16 = 0b11100010_00111000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_knot() { + let a: u16 = 0b11001100_00110011; + let r = _mm512_knot(a); + let e: u16 = 0b00110011_11001100; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_knot_mask16() { + let a: u16 = 0b11001100_00110011; + let r = _knot_mask16(a); + let e: u16 = 0b00110011_11001100; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kandn() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kandn(a, b); + let e: u16 = 0b00100010_00001000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_kandn_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _kandn_mask16(a, b); + let e: u16 = 0b00100010_00001000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kxnor() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kxnor(a, b); + let e: u16 = 0b00011101_11000111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_kxnor_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _kxnor_mask16(a, b); + let e: u16 = 0b00011101_11000111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kortest_mask16_u8() { + let a: __mmask16 = 0b0110100101101001; + let b: __mmask16 = 0b1011011010110110; + let mut all_ones: u8 = 0; + let r = _kortest_mask16_u8(a, b, &mut all_ones); + assert_eq!(r, 0); + assert_eq!(all_ones, 1); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kortestc_mask16_u8() { + let a: __mmask16 = 0b0110100101101001; + let b: __mmask16 = 0b1011011010110110; + let r = _kortestc_mask16_u8(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kortestz_mask16_u8() { + let a: __mmask16 = 0b0110100101101001; + let b: __mmask16 = 0b1011011010110110; + let r = _kortestz_mask16_u8(a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kshiftli_mask16() { + let a: __mmask16 = 0b1001011011000011; + let r = _kshiftli_mask16::<3>(a); + let e: __mmask16 = 0b1011011000011000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_kshiftri_mask16() { + let a: __mmask16 = 0b0110100100111100; + let r = _kshiftri_mask16::<3>(a); + let e: __mmask16 = 0b0000110100100111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_load_mask16() { + let a: __mmask16 = 0b1001011011000011; + let r = _load_mask16(&a); + let e: __mmask16 = 0b1001011011000011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_store_mask16() { + let a: __mmask16 = 0b0110100100111100; + let mut r = 0; + _store_mask16(&mut r, a); + let e: __mmask16 = 0b0110100100111100; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kmov() { + let a: u16 = 0b11001100_00110011; + let r = _mm512_kmov(a); + let e: u16 = 0b11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_int2mask() { + let a: i32 = 0b11001100_00110011; + let r = _mm512_int2mask(a); + let e: u16 = 0b11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask2int() { + let k1: __mmask16 = 0b11001100_00110011; + let r = _mm512_mask2int(k1); + let e: i32 = 0b11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kunpackb() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kunpackb(a, b); + let e: u16 = 0b00110011_00001011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kortestc() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kortestc(a, b); + assert_eq!(r, 0); + let b: u16 = 0b11111111_11111111; + let r = _mm512_kortestc(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kortestz() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kortestz(a, b); + assert_eq!(r, 0); + let r = _mm512_kortestz(0, 0); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_test_epi32_mask() { + let a = _mm512_set1_epi32(1 << 0); + let b = _mm512_set1_epi32(1 << 0 | 1 << 1); + let r = _mm512_test_epi32_mask(a, b); + let e: __mmask16 = 0b11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_test_epi32_mask() { + let a = _mm512_set1_epi32(1 << 0); + let b = _mm512_set1_epi32(1 << 0 | 1 << 1); + let r = _mm512_mask_test_epi32_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b); + let e: __mmask16 = 0b11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_test_epi32_mask() { + let a = _mm256_set1_epi32(1 << 0); + let b = _mm256_set1_epi32(1 << 0 | 1 << 1); + let r = _mm256_test_epi32_mask(a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_test_epi32_mask() { + let a = _mm256_set1_epi32(1 << 0); + let b = _mm256_set1_epi32(1 << 0 | 1 << 1); + let r = _mm256_mask_test_epi32_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_test_epi32_mask(0b11111111, a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_test_epi32_mask() { + let a = _mm_set1_epi32(1 << 0); + let b = _mm_set1_epi32(1 << 0 | 1 << 1); + let r = _mm_test_epi32_mask(a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_test_epi32_mask() { + let a = _mm_set1_epi32(1 << 0); + let b = _mm_set1_epi32(1 << 0 | 1 << 1); + let r = _mm_mask_test_epi32_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_test_epi32_mask(0b11111111, a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_testn_epi32_mask() { + let a = _mm512_set1_epi32(1 << 0); + let b = _mm512_set1_epi32(1 << 0 | 1 << 1); + let r = _mm512_testn_epi32_mask(a, b); + let e: __mmask16 = 0b00000000_00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_testn_epi32_mask() { + let a = _mm512_set1_epi32(1 << 0); + let b = _mm512_set1_epi32(1 << 1); + let r = _mm512_mask_test_epi32_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b); + let e: __mmask16 = 0b11111111_11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_testn_epi32_mask() { + let a = _mm256_set1_epi32(1 << 0); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_testn_epi32_mask(a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_testn_epi32_mask() { + let a = _mm256_set1_epi32(1 << 0); + let b = _mm256_set1_epi32(1 << 1); + let r = _mm256_mask_test_epi32_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_testn_epi32_mask() { + let a = _mm_set1_epi32(1 << 0); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_testn_epi32_mask(a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_testn_epi32_mask() { + let a = _mm_set1_epi32(1 << 0); + let b = _mm_set1_epi32(1 << 1); + let r = _mm_mask_test_epi32_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_testn_epi32_mask(0b11111111, a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + #[cfg_attr(miri, ignore)] + unsafe fn test_mm512_stream_ps() { + #[repr(align(64))] + struct Memory { + pub data: [f32; 16], // 64 bytes + } + let a = _mm512_set1_ps(7.0); + let mut mem = Memory { data: [-1.0; 16] }; + + _mm512_stream_ps(&mut mem.data[0] as *mut f32, a); + for i in 0..16 { + assert_eq!(mem.data[i], get_m512(a, i)); + } + } + + #[simd_test(enable = "avx512f")] + #[cfg_attr(miri, ignore)] + unsafe fn test_mm512_stream_pd() { + #[repr(align(64))] + struct Memory { + pub data: [f64; 8], + } + let a = _mm512_set1_pd(7.0); + let mut mem = Memory { data: [-1.0; 8] }; + + _mm512_stream_pd(&mut mem.data[0] as *mut f64, a); + for i in 0..8 { + assert_eq!(mem.data[i], get_m512d(a, i)); + } + } + + #[simd_test(enable = "avx512f")] + #[cfg_attr(miri, ignore)] + unsafe fn test_mm512_stream_si512() { + #[repr(align(64))] + struct Memory { + pub data: [i64; 8], + } + let a = _mm512_set1_epi32(7); + let mut mem = Memory { data: [-1; 8] }; + + _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a); + for i in 0..8 { + assert_eq!(mem.data[i], get_m512i(a, i)); + } + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_stream_load_si512() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _); + assert_eq_m512i(a, r); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_add_epi32() { + let a = _mm512_set1_epi32(1); + let e: i32 = _mm512_reduce_add_epi32(a); + assert_eq!(16, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_add_epi32() { + let a = _mm512_set1_epi32(1); + let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a); + assert_eq!(8, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_add_ps() { + let a = _mm512_set1_ps(1.); + let e: f32 = _mm512_reduce_add_ps(a); + assert_eq!(16., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_add_ps() { + let a = _mm512_set1_ps(1.); + let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a); + assert_eq!(8., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_mul_epi32() { + let a = _mm512_set1_epi32(2); + let e: i32 = _mm512_reduce_mul_epi32(a); + assert_eq!(65536, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_mul_epi32() { + let a = _mm512_set1_epi32(2); + let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a); + assert_eq!(256, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_mul_ps() { + let a = _mm512_set1_ps(2.); + let e: f32 = _mm512_reduce_mul_ps(a); + assert_eq!(65536., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_mul_ps() { + let a = _mm512_set1_ps(2.); + let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a); + assert_eq!(256., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_max_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i32 = _mm512_reduce_max_epi32(a); + assert_eq!(15, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_max_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_max_epu32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u32 = _mm512_reduce_max_epu32(a); + assert_eq!(15, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_max_epu32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_max_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let e: f32 = _mm512_reduce_max_ps(a); + assert_eq!(15., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_max_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a); + assert_eq!(7., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_min_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i32 = _mm512_reduce_min_epi32(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_min_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_min_epu32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u32 = _mm512_reduce_min_epu32(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_min_epu32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_min_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let e: f32 = _mm512_reduce_min_ps(a); + assert_eq!(0., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_min_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a); + assert_eq!(0., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_and_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i32 = _mm512_reduce_and_epi32(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_and_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_or_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i32 = _mm512_reduce_or_epi32(a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_or_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compress_epi32() { + let src = _mm512_set1_epi32(200); + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_mask_compress_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a); + let e = _mm512_set_epi32( + 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_compress_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_compress_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compress_epi32() { + let src = _mm256_set1_epi32(200); + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_mask_compress_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_compress_epi32(src, 0b01010101, a); + let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_compress_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_maskz_compress_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_compress_epi32(0b01010101, a); + let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compress_epi32() { + let src = _mm_set1_epi32(200); + let a = _mm_set_epi32(0, 1, 2, 3); + let r = _mm_mask_compress_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_compress_epi32(src, 0b00000101, a); + let e = _mm_set_epi32(200, 200, 1, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_compress_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let r = _mm_maskz_compress_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_compress_epi32(0b00000101, a); + let e = _mm_set_epi32(0, 0, 1, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compress_ps() { + let src = _mm512_set1_ps(200.); + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_mask_compress_ps(src, 0, a); + assert_eq_m512(r, src); + let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a); + let e = _mm512_set_ps( + 200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_compress_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_maskz_compress_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_compress_ps(0b01010101_01010101, a); + let e = _mm512_set_ps( + 0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compress_ps() { + let src = _mm256_set1_ps(200.); + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm256_mask_compress_ps(src, 0, a); + assert_eq_m256(r, src); + let r = _mm256_mask_compress_ps(src, 0b01010101, a); + let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_compress_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm256_maskz_compress_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_compress_ps(0b01010101, a); + let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compress_ps() { + let src = _mm_set1_ps(200.); + let a = _mm_set_ps(0., 1., 2., 3.); + let r = _mm_mask_compress_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_compress_ps(src, 0b00000101, a); + let e = _mm_set_ps(200., 200., 1., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_compress_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let r = _mm_maskz_compress_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_compress_ps(0b00000101, a); + let e = _mm_set_ps(0., 0., 1., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compressstoreu_epi32() { + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let mut r = [0_i32; 16]; + _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i32; 16]); + _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a); + assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compressstoreu_epi32() { + let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let mut r = [0_i32; 8]; + _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i32; 8]); + _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a); + assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compressstoreu_epi32() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let mut r = [0_i32; 4]; + _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i32; 4]); + _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a); + assert_eq!(&r, &[1, 2, 4, 0]); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compressstoreu_epi64() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let mut r = [0_i64; 8]; + _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i64; 8]); + _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a); + assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compressstoreu_epi64() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let mut r = [0_i64; 4]; + _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i64; 4]); + _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a); + assert_eq!(&r, &[1, 2, 4, 0]); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compressstoreu_epi64() { + let a = _mm_setr_epi64x(1, 2); + let mut r = [0_i64; 2]; + _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i64; 2]); + _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a); + assert_eq!(&r, &[2, 0]); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compressstoreu_ps() { + let a = _mm512_setr_ps( + 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32, + 13_f32, 14_f32, 15_f32, 16_f32, + ); + let mut r = [0_f32; 16]; + _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_f32; 16]); + _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a); + assert_eq!( + &r, + &[ + 2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32, + 0_f32, 0_f32, 0_f32, 0_f32, 0_f32 + ] + ); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compressstoreu_ps() { + let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32); + let mut r = [0_f32; 8]; + _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_f32; 8]); + _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a); + assert_eq!( + &r, + &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32] + ); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compressstoreu_ps() { + let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32); + let mut r = [0.; 4]; + _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0.; 4]); + _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a); + assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compressstoreu_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let mut r = [0.; 8]; + _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0.; 8]); + _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a); + assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compressstoreu_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let mut r = [0.; 4]; + _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0.; 4]); + _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a); + assert_eq!(&r, &[1., 2., 4., 0.]); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compressstoreu_pd() { + let a = _mm_setr_pd(1., 2.); + let mut r = [0.; 2]; + _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0.; 2]); + _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a); + assert_eq!(&r, &[2., 0.]); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expand_epi32() { + let src = _mm512_set1_epi32(200); + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_mask_expand_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a); + let e = _mm512_set_epi32( + 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expand_epi32() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_expand_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a); + let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expand_epi32() { + let src = _mm256_set1_epi32(200); + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_mask_expand_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_expand_epi32(src, 0b01010101, a); + let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expand_epi32() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_maskz_expand_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_expand_epi32(0b01010101, a); + let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expand_epi32() { + let src = _mm_set1_epi32(200); + let a = _mm_set_epi32(0, 1, 2, 3); + let r = _mm_mask_expand_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_expand_epi32(src, 0b00000101, a); + let e = _mm_set_epi32(200, 2, 200, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expand_epi32() { + let a = _mm_set_epi32(0, 1, 2, 3); + let r = _mm_maskz_expand_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_expand_epi32(0b00000101, a); + let e = _mm_set_epi32(0, 2, 0, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expand_ps() { + let src = _mm512_set1_ps(200.); + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_mask_expand_ps(src, 0, a); + assert_eq_m512(r, src); + let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a); + let e = _mm512_set_ps( + 200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expand_ps() { + let a = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let r = _mm512_maskz_expand_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_expand_ps(0b01010101_01010101, a); + let e = _mm512_set_ps( + 0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expand_ps() { + let src = _mm256_set1_ps(200.); + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm256_mask_expand_ps(src, 0, a); + assert_eq_m256(r, src); + let r = _mm256_mask_expand_ps(src, 0b01010101, a); + let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expand_ps() { + let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm256_maskz_expand_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_expand_ps(0b01010101, a); + let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expand_ps() { + let src = _mm_set1_ps(200.); + let a = _mm_set_ps(0., 1., 2., 3.); + let r = _mm_mask_expand_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_expand_ps(src, 0b00000101, a); + let e = _mm_set_ps(200., 2., 200., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expand_ps() { + let a = _mm_set_ps(0., 1., 2., 3.); + let r = _mm_maskz_expand_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_expand_ps(0b00000101, a); + let e = _mm_set_ps(0., 2., 0., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_loadu_epi32() { + let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50]; + let p = a.as_ptr(); + let r = _mm512_loadu_epi32(black_box(p)); + let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_loadu_epi32() { + let a = &[4, 3, 2, 5, 8, 9, 64, 50]; + let p = a.as_ptr(); + let r = _mm256_loadu_epi32(black_box(p)); + let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_loadu_epi32() { + let a = &[4, 3, 2, 5]; + let p = a.as_ptr(); + let r = _mm_loadu_epi32(black_box(p)); + let e = _mm_setr_epi32(4, 3, 2, 5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() { + let a = _mm512_set1_epi32(9); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(9); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() { + let a = _mm256_set1_epi32(9); + let mut r = _mm_undefined_si128(); + _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set1_epi16(9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_storeu_epi16() { + let a = _mm_set1_epi32(9); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() { + let a = _mm512_set1_epi32(i32::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(i16::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() { + let a = _mm256_set1_epi32(i32::MAX); + let mut r = _mm_undefined_si128(); + _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set1_epi16(i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() { + let a = _mm_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() { + let a = _mm512_set1_epi32(i32::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(u16::MAX as i16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() { + let a = _mm256_set1_epi32(i32::MAX); + let mut r = _mm_undefined_si128(); + _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set1_epi16(u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() { + let a = _mm_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set_epi16( + 0, + 0, + 0, + 0, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() { + let a = _mm512_set1_epi32(9); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm_set1_epi8(9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() { + let a = _mm256_set1_epi32(9); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_storeu_epi8() { + let a = _mm_set1_epi32(9); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() { + let a = _mm512_set1_epi32(i32::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm_set1_epi8(i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() { + let a = _mm256_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() { + let a = _mm_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() { + let a = _mm512_set1_epi32(i32::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm_set1_epi8(u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() { + let a = _mm256_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() { + let a = _mm_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_storeu_epi32() { + let a = _mm512_set1_epi32(9); + let mut r = _mm512_undefined_epi32(); + _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_storeu_epi32() { + let a = _mm256_set1_epi32(9); + let mut r = _mm256_undefined_si256(); + _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_storeu_epi32() { + let a = _mm_set1_epi32(9); + let mut r = _mm_undefined_si128(); + _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_loadu_si512() { + let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50]; + let p = a.as_ptr().cast(); + let r = _mm512_loadu_si512(black_box(p)); + let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_storeu_si512() { + let a = _mm512_set1_epi32(9); + let mut r = _mm512_undefined_epi32(); + _mm512_storeu_si512(&mut r as *mut _, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_load_si512() { + #[repr(align(64))] + struct Align { + data: [i32; 16], // 64 bytes + } + let a = Align { + data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50], + }; + let p = (a.data).as_ptr().cast(); + let r = _mm512_load_si512(black_box(p)); + let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_store_si512() { + let a = _mm512_set1_epi32(9); + let mut r = _mm512_undefined_epi32(); + _mm512_store_si512(&mut r as *mut _, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_load_epi32() { + #[repr(align(64))] + struct Align { + data: [i32; 16], // 64 bytes + } + let a = Align { + data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50], + }; + let p = (a.data).as_ptr(); + let r = _mm512_load_epi32(black_box(p)); + let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_load_epi32() { + #[repr(align(64))] + struct Align { + data: [i32; 8], + } + let a = Align { + data: [4, 3, 2, 5, 8, 9, 64, 50], + }; + let p = (a.data).as_ptr(); + let r = _mm256_load_epi32(black_box(p)); + let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_load_epi32() { + #[repr(align(64))] + struct Align { + data: [i32; 4], + } + let a = Align { data: [4, 3, 2, 5] }; + let p = (a.data).as_ptr(); + let r = _mm_load_epi32(black_box(p)); + let e = _mm_setr_epi32(4, 3, 2, 5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_store_epi32() { + let a = _mm512_set1_epi32(9); + let mut r = _mm512_undefined_epi32(); + _mm512_store_epi32(&mut r as *mut _ as *mut i32, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_store_epi32() { + let a = _mm256_set1_epi32(9); + let mut r = _mm256_undefined_si256(); + _mm256_store_epi32(&mut r as *mut _ as *mut i32, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_store_epi32() { + let a = _mm_set1_epi32(9); + let mut r = _mm_undefined_si128(); + _mm_store_epi32(&mut r as *mut _ as *mut i32, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_load_ps() { + #[repr(align(64))] + struct Align { + data: [f32; 16], // 64 bytes + } + let a = Align { + data: [ + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., + ], + }; + let p = (a.data).as_ptr(); + let r = _mm512_load_ps(black_box(p)); + let e = _mm512_setr_ps( + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_store_ps() { + let a = _mm512_set1_ps(9.); + let mut r = _mm512_undefined_ps(); + _mm512_store_ps(&mut r as *mut _ as *mut f32, a); + assert_eq_m512(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_set1_epi32() { + let src = _mm512_set1_epi32(2); + let a: i32 = 11; + let r = _mm512_mask_set1_epi32(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a); + let e = _mm512_set1_epi32(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_set1_epi32() { + let a: i32 = 11; + let r = _mm512_maskz_set1_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a); + let e = _mm512_set1_epi32(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_set1_epi32() { + let src = _mm256_set1_epi32(2); + let a: i32 = 11; + let r = _mm256_mask_set1_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_set1_epi32(src, 0b11111111, a); + let e = _mm256_set1_epi32(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm256_maskz_set1_epi32() { + let a: i32 = 11; + let r = _mm256_maskz_set1_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_set1_epi32(0b11111111, a); + let e = _mm256_set1_epi32(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_set1_epi32() { + let src = _mm_set1_epi32(2); + let a: i32 = 11; + let r = _mm_mask_set1_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_set1_epi32(src, 0b00001111, a); + let e = _mm_set1_epi32(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_set1_epi32() { + let a: i32 = 11; + let r = _mm_maskz_set1_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_set1_epi32(0b00001111, a); + let e = _mm_set1_epi32(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_move_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mask_move_ss(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_move_ss(src, 0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 40.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_move_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_maskz_move_ss(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_move_ss(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 40.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_move_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_move_sd(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_move_sd(src, 0b11111111, a, b); + let e = _mm_set_pd(1., 4.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_move_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_move_sd(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_move_sd(0b11111111, a, b); + let e = _mm_set_pd(1., 4.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_add_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mask_add_ss(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_add_ss(src, 0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 60.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_add_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_maskz_add_ss(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_add_ss(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 60.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_add_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_add_sd(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_add_sd(src, 0b11111111, a, b); + let e = _mm_set_pd(1., 6.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_add_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_add_sd(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_add_sd(0b11111111, a, b); + let e = _mm_set_pd(1., 6.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_sub_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mask_sub_ss(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_sub_ss(src, 0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., -20.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_sub_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_maskz_sub_ss(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_sub_ss(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., -20.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_sub_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_sub_sd(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_sub_sd(src, 0b11111111, a, b); + let e = _mm_set_pd(1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_sub_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_sub_sd(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_sub_sd(0b11111111, a, b); + let e = _mm_set_pd(1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_mul_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mask_mul_ss(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_mul_ss(src, 0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 800.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_mul_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_maskz_mul_ss(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_mul_ss(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 800.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_mul_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_mul_sd(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_mul_sd(src, 0b11111111, a, b); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_mul_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_mul_sd(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_mul_sd(0b11111111, a, b); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_div_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mask_div_ss(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_div_ss(src, 0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 0.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_div_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_maskz_div_ss(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_div_ss(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 0.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_div_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_div_sd(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_div_sd(src, 0b11111111, a, b); + let e = _mm_set_pd(1., 0.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_div_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_div_sd(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_div_sd(0b11111111, a, b); + let e = _mm_set_pd(1., 0.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_max_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_mask_max_ss(a, 0, a, b); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + let r = _mm_mask_max_ss(a, 0b11111111, a, b); + let e = _mm_set_ps(0., 1., 2., 7.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_max_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_maskz_max_ss(0, a, b); + let e = _mm_set_ps(0., 1., 2., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_max_ss(0b11111111, a, b); + let e = _mm_set_ps(0., 1., 2., 7.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_max_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_mask_max_sd(a, 0, a, b); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + let r = _mm_mask_max_sd(a, 0b11111111, a, b); + let e = _mm_set_pd(0., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_max_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_maskz_max_sd(0, a, b); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_max_sd(0b11111111, a, b); + let e = _mm_set_pd(0., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_min_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_mask_min_ss(a, 0, a, b); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + let r = _mm_mask_min_ss(a, 0b11111111, a, b); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_min_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_maskz_min_ss(0, a, b); + let e = _mm_set_ps(0., 1., 2., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_min_ss(0b11111111, a, b); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_min_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_mask_min_sd(a, 0, a, b); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + let r = _mm_mask_min_sd(a, 0b11111111, a, b); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_min_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_maskz_min_sd(0, a, b); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_min_sd(0b11111111, a, b); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_sqrt_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_mask_sqrt_ss(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_sqrt_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_maskz_sqrt_ss(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_sqrt_ss(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_sqrt_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_sqrt_sd(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_sqrt_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_sqrt_sd(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_sqrt_sd(0b11111111, a, b); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_rsqrt14_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_rsqrt14_ss(a, b); + let e = _mm_set_ps(1., 2., 10., 0.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_rsqrt14_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_mask_rsqrt14_ss(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 0.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_rsqrt14_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_maskz_rsqrt14_ss(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 0.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_rsqrt14_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_rsqrt14_sd(a, b); + let e = _mm_set_pd(1., 0.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_rsqrt14_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_rsqrt14_sd(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b); + let e = _mm_set_pd(1., 0.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_rsqrt14_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_rsqrt14_sd(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b); + let e = _mm_set_pd(1., 0.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_rcp14_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_rcp14_ss(a, b); + let e = _mm_set_ps(1., 2., 10., 0.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_rcp14_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_mask_rcp14_ss(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 0.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_rcp14_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_maskz_rcp14_ss(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_rcp14_ss(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 0.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_rcp14_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_rcp14_sd(a, b); + let e = _mm_set_pd(1., 0.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_rcp14_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_rcp14_sd(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b); + let e = _mm_set_pd(1., 0.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_rcp14_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_rcp14_sd(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_rcp14_sd(0b11111111, a, b); + let e = _mm_set_pd(1., 0.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_getexp_ss() { + let a = _mm_set1_ps(2.); + let b = _mm_set1_ps(3.); + let r = _mm_getexp_ss(a, b); + let e = _mm_set_ps(2., 2., 2., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_getexp_ss() { + let a = _mm_set1_ps(2.); + let b = _mm_set1_ps(3.); + let r = _mm_mask_getexp_ss(a, 0, a, b); + let e = _mm_set_ps(2., 2., 2., 2.); + assert_eq_m128(r, e); + let r = _mm_mask_getexp_ss(a, 0b11111111, a, b); + let e = _mm_set_ps(2., 2., 2., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_getexp_ss() { + let a = _mm_set1_ps(2.); + let b = _mm_set1_ps(3.); + let r = _mm_maskz_getexp_ss(0, a, b); + let e = _mm_set_ps(2., 2., 2., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_getexp_ss(0b11111111, a, b); + let e = _mm_set_ps(2., 2., 2., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_getexp_sd() { + let a = _mm_set1_pd(2.); + let b = _mm_set1_pd(3.); + let r = _mm_getexp_sd(a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_getexp_sd() { + let a = _mm_set1_pd(2.); + let b = _mm_set1_pd(3.); + let r = _mm_mask_getexp_sd(a, 0, a, b); + let e = _mm_set_pd(2., 2.); + assert_eq_m128d(r, e); + let r = _mm_mask_getexp_sd(a, 0b11111111, a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_getexp_sd() { + let a = _mm_set1_pd(2.); + let b = _mm_set1_pd(3.); + let r = _mm_maskz_getexp_sd(0, a, b); + let e = _mm_set_pd(2., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_getexp_sd(0b11111111, a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_getmant_ss() { + let a = _mm_set1_ps(20.); + let b = _mm_set1_ps(10.); + let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b); + let e = _mm_set_ps(20., 20., 20., 1.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_getmant_ss() { + let a = _mm_set1_ps(20.); + let b = _mm_set1_ps(10.); + let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b); + let e = _mm_set_ps(20., 20., 20., 20.); + assert_eq_m128(r, e); + let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b); + let e = _mm_set_ps(20., 20., 20., 1.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_getmant_ss() { + let a = _mm_set1_ps(20.); + let b = _mm_set1_ps(10.); + let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b); + let e = _mm_set_ps(20., 20., 20., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b); + let e = _mm_set_ps(20., 20., 20., 1.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_getmant_sd() { + let a = _mm_set1_pd(20.); + let b = _mm_set1_pd(10.); + let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b); + let e = _mm_set_pd(20., 1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_getmant_sd() { + let a = _mm_set1_pd(20.); + let b = _mm_set1_pd(10.); + let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b); + let e = _mm_set_pd(20., 20.); + assert_eq_m128d(r, e); + let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b); + let e = _mm_set_pd(20., 1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_getmant_sd() { + let a = _mm_set1_pd(20.); + let b = _mm_set1_pd(10.); + let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b); + let e = _mm_set_pd(20., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b); + let e = _mm_set_pd(20., 1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_roundscale_ss() { + let a = _mm_set1_ps(2.2); + let b = _mm_set1_ps(1.1); + let r = _mm_roundscale_ss::<0>(a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_roundscale_ss() { + let a = _mm_set1_ps(2.2); + let b = _mm_set1_ps(1.1); + let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2); + assert_eq_m128(r, e); + let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_roundscale_ss() { + let a = _mm_set1_ps(2.2); + let b = _mm_set1_ps(1.1); + let r = _mm_maskz_roundscale_ss::<0>(0, a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0); + assert_eq_m128(r, e); + let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_roundscale_sd() { + let a = _mm_set1_pd(2.2); + let b = _mm_set1_pd(1.1); + let r = _mm_roundscale_sd::<0>(a, b); + let e = _mm_set_pd(2.2, 1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_roundscale_sd() { + let a = _mm_set1_pd(2.2); + let b = _mm_set1_pd(1.1); + let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b); + let e = _mm_set_pd(2.2, 2.2); + assert_eq_m128d(r, e); + let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b); + let e = _mm_set_pd(2.2, 1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_roundscale_sd() { + let a = _mm_set1_pd(2.2); + let b = _mm_set1_pd(1.1); + let r = _mm_maskz_roundscale_sd::<0>(0, a, b); + let e = _mm_set_pd(2.2, 0.0); + assert_eq_m128d(r, e); + let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b); + let e = _mm_set_pd(2.2, 1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_scalef_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(3.); + let r = _mm_scalef_ss(a, b); + let e = _mm_set_ps(1., 1., 1., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_scalef_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(3.); + let r = _mm_mask_scalef_ss(a, 0, a, b); + let e = _mm_set_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + let r = _mm_mask_scalef_ss(a, 0b11111111, a, b); + let e = _mm_set_ps(1., 1., 1., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_scalef_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(3.); + let r = _mm_maskz_scalef_ss(0, a, b); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_scalef_ss(0b11111111, a, b); + let e = _mm_set_ps(1., 1., 1., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_scalef_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_scalef_sd(a, b); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_scalef_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_mask_scalef_sd(a, 0, a, b); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + let r = _mm_mask_scalef_sd(a, 0b11111111, a, b); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_scalef_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_maskz_scalef_sd(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_scalef_sd(0b11111111, a, b); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fmadd_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask_fmadd_ss(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c); + let e = _mm_set_ps(1., 1., 1., 5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fmadd_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_maskz_fmadd_ss(0, a, b, c); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c); + let e = _mm_set_ps(1., 1., 1., 5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fmadd_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask3_fmadd_ss(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111); + let e = _mm_set_ps(3., 3., 3., 5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fmadd_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask_fmadd_sd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c); + let e = _mm_set_pd(1., 5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fmadd_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_maskz_fmadd_sd(0, a, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c); + let e = _mm_set_pd(1., 5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fmadd_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask3_fmadd_sd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111); + let e = _mm_set_pd(3., 5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fmsub_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask_fmsub_ss(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c); + let e = _mm_set_ps(1., 1., 1., -1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fmsub_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_maskz_fmsub_ss(0, a, b, c); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c); + let e = _mm_set_ps(1., 1., 1., -1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fmsub_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask3_fmsub_ss(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111); + let e = _mm_set_ps(3., 3., 3., -1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fmsub_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask_fmsub_sd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c); + let e = _mm_set_pd(1., -1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fmsub_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_maskz_fmsub_sd(0, a, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c); + let e = _mm_set_pd(1., -1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fmsub_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask3_fmsub_sd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111); + let e = _mm_set_pd(3., -1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fnmadd_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask_fnmadd_ss(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c); + let e = _mm_set_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fnmadd_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_maskz_fnmadd_ss(0, a, b, c); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c); + let e = _mm_set_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fnmadd_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask3_fnmadd_ss(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111); + let e = _mm_set_ps(3., 3., 3., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fnmadd_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask_fnmadd_sd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fnmadd_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_maskz_fnmadd_sd(0, a, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fnmadd_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask3_fnmadd_sd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111); + let e = _mm_set_pd(3., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fnmsub_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask_fnmsub_ss(a, 0, b, c); + assert_eq_m128(r, a); + let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c); + let e = _mm_set_ps(1., 1., 1., -5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fnmsub_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_maskz_fnmsub_ss(0, a, b, c); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c); + let e = _mm_set_ps(1., 1., 1., -5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fnmsub_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask3_fnmsub_ss(a, b, c, 0); + assert_eq_m128(r, c); + let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111); + let e = _mm_set_ps(3., 3., 3., -5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fnmsub_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask_fnmsub_sd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c); + let e = _mm_set_pd(1., -5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fnmsub_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_maskz_fnmsub_sd(0, a, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c); + let e = _mm_set_pd(1., -5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fnmsub_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask3_fnmsub_sd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111); + let e = _mm_set_pd(3., -5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_add_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(1., 2., 10., 60.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_add_round_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_ps(1., 2., 10., 60.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_add_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = + _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 60.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_add_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 6.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_add_round_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_pd(1., 6.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_add_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = + _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_pd(1., 6.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_sub_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(1., 2., 10., -20.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_sub_round_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_ps(1., 2., 10., -20.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_sub_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = + _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., -20.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_sub_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_sub_round_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_pd(1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_sub_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = + _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_pd(1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mul_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(1., 2., 10., 800.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_mul_round_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_ps(1., 2., 10., 800.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_mul_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = + _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 800.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mul_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_mul_round_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_mul_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = + _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_div_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(1., 2., 10., 0.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_div_round_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_ps(1., 2., 10., 0.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_div_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 40.); + let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = + _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 0.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_div_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 0.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_div_round_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_pd(1., 0.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_div_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = + _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_pd(1., 0.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_max_round_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm_set_ps(0., 1., 2., 7.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_max_round_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); + let e = _mm_set_ps(0., 1., 2., 7.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_max_round_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + let e = _mm_set_ps(0., 1., 2., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); + let e = _mm_set_ps(0., 1., 2., 7.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_max_round_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm_set_pd(0., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_max_round_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); + let e = _mm_set_pd(0., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_max_round_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); + let e = _mm_set_pd(0., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_min_round_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_min_round_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_min_round_ss() { + let a = _mm_set_ps(0., 1., 2., 3.); + let b = _mm_set_ps(4., 5., 6., 7.); + let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + let e = _mm_set_ps(0., 1., 2., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); + let e = _mm_set_ps(0., 1., 2., 3.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_min_round_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_min_round_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_min_round_sd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(2., 3.); + let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_sqrt_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(1., 2., 10., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_sqrt_round_ss() { + let src = _mm_set_ps(10., 11., 100., 110.); + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_ps(1., 2., 10., 110.); + assert_eq_m128(r, e); + let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_ps(1., 2., 10., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_sqrt_round_ss() { + let a = _mm_set_ps(1., 2., 10., 20.); + let b = _mm_set_ps(3., 4., 30., 4.); + let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_ps(1., 2., 10., 0.); + assert_eq_m128(r, e); + let r = + _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_ps(1., 2., 10., 2.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_sqrt_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_sqrt_round_sd() { + let src = _mm_set_pd(10., 11.); + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); + let e = _mm_set_pd(1., 11.); + assert_eq_m128d(r, e); + let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, + ); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_sqrt_round_sd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(3., 4.); + let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = + _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_getexp_round_ss() { + let a = _mm_set1_ps(2.); + let b = _mm_set1_ps(3.); + let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm_set_ps(2., 2., 2., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_getexp_round_ss() { + let a = _mm_set1_ps(2.); + let b = _mm_set1_ps(3.); + let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + let e = _mm_set_ps(2., 2., 2., 2.); + assert_eq_m128(r, e); + let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); + let e = _mm_set_ps(2., 2., 2., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_getexp_round_ss() { + let a = _mm_set1_ps(2.); + let b = _mm_set1_ps(3.); + let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + let e = _mm_set_ps(2., 2., 2., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); + let e = _mm_set_ps(2., 2., 2., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_getexp_round_sd() { + let a = _mm_set1_pd(2.); + let b = _mm_set1_pd(3.); + let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_getexp_round_sd() { + let a = _mm_set1_pd(2.); + let b = _mm_set1_pd(3.); + let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + let e = _mm_set_pd(2., 2.); + assert_eq_m128d(r, e); + let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_getexp_round_sd() { + let a = _mm_set1_pd(2.); + let b = _mm_set1_pd(3.); + let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + let e = _mm_set_pd(2., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_getmant_round_ss() { + let a = _mm_set1_ps(20.); + let b = _mm_set1_ps(10.); + let r = + _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>( + a, b, + ); + let e = _mm_set_ps(20., 20., 20., 1.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_getmant_round_ss() { + let a = _mm_set1_ps(20.); + let b = _mm_set1_ps(10.); + let r = _mm_mask_getmant_round_ss::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0, a, b); + let e = _mm_set_ps(20., 20., 20., 20.); + assert_eq_m128(r, e); + let r = _mm_mask_getmant_round_ss::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0b11111111, a, b); + let e = _mm_set_ps(20., 20., 20., 1.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_getmant_round_ss() { + let a = _mm_set1_ps(20.); + let b = _mm_set1_ps(10.); + let r = _mm_maskz_getmant_round_ss::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0, a, b); + let e = _mm_set_ps(20., 20., 20., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_getmant_round_ss::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0b11111111, a, b); + let e = _mm_set_ps(20., 20., 20., 1.25); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_getmant_round_sd() { + let a = _mm_set1_pd(20.); + let b = _mm_set1_pd(10.); + let r = + _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>( + a, b, + ); + let e = _mm_set_pd(20., 1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_getmant_round_sd() { + let a = _mm_set1_pd(20.); + let b = _mm_set1_pd(10.); + let r = _mm_mask_getmant_round_sd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0, a, b); + let e = _mm_set_pd(20., 20.); + assert_eq_m128d(r, e); + let r = _mm_mask_getmant_round_sd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0b11111111, a, b); + let e = _mm_set_pd(20., 1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_getmant_round_sd() { + let a = _mm_set1_pd(20.); + let b = _mm_set1_pd(10.); + let r = _mm_maskz_getmant_round_sd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0, a, b); + let e = _mm_set_pd(20., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_getmant_round_sd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0b11111111, a, b); + let e = _mm_set_pd(20., 1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_roundscale_round_ss() { + let a = _mm_set1_ps(2.2); + let b = _mm_set1_ps(1.1); + let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_roundscale_round_ss() { + let a = _mm_set1_ps(2.2); + let b = _mm_set1_ps(1.1); + let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2); + assert_eq_m128(r, e); + let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_roundscale_round_ss() { + let a = _mm_set1_ps(2.2); + let b = _mm_set1_ps(1.1); + let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0); + assert_eq_m128(r, e); + let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); + let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_roundscale_round_sd() { + let a = _mm_set1_pd(2.2); + let b = _mm_set1_pd(1.1); + let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm_set_pd(2.2, 1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_roundscale_round_sd() { + let a = _mm_set1_pd(2.2); + let b = _mm_set1_pd(1.1); + let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + let e = _mm_set_pd(2.2, 2.2); + assert_eq_m128d(r, e); + let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); + let e = _mm_set_pd(2.2, 1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_roundscale_round_sd() { + let a = _mm_set1_pd(2.2); + let b = _mm_set1_pd(1.1); + let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b); + let e = _mm_set_pd(2.2, 0.0); + assert_eq_m128d(r, e); + let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); + let e = _mm_set_pd(2.2, 1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_scalef_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(3.); + let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(1., 1., 1., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_scalef_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(3.); + let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + let e = _mm_set_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, a, b, + ); + let e = _mm_set_ps(1., 1., 1., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_scalef_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(3.); + let r = + _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, + ); + let e = _mm_set_ps(1., 1., 1., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_scalef_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_scalef_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, a, b, + ); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_scalef_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = + _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, + ); + let e = _mm_set_pd(1., 8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fmadd_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_set_ps(1., 1., 1., 5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fmadd_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m128(r, a); + let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, + ); + let e = _mm_set_ps(1., 1., 1., 5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fmadd_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, + ); + let e = _mm_set_ps(1., 1., 1., 5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fmadd_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m128(r, c); + let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, + ); + let e = _mm_set_ps(3., 3., 3., 5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fmadd_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_set_pd(1., 5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fmadd_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m128d(r, a); + let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, + ); + let e = _mm_set_pd(1., 5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fmadd_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, + ); + let e = _mm_set_pd(1., 5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fmadd_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, + ); + let e = _mm_set_pd(3., 5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fmsub_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_set_ps(1., 1., 1., -1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fmsub_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m128(r, a); + let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, + ); + let e = _mm_set_ps(1., 1., 1., -1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fmsub_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, + ); + let e = _mm_set_ps(1., 1., 1., -1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fmsub_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m128(r, c); + let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, + ); + let e = _mm_set_ps(3., 3., 3., -1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fmsub_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_set_pd(1., -1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fmsub_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m128d(r, a); + let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, + ); + let e = _mm_set_pd(1., -1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fmsub_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, + ); + let e = _mm_set_pd(1., -1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fmsub_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, + ); + let e = _mm_set_pd(3., -1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fnmadd_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_set_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fnmadd_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m128(r, a); + let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, + ); + let e = _mm_set_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fnmadd_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, + ); + let e = _mm_set_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fnmadd_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m128(r, c); + let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, + ); + let e = _mm_set_ps(3., 3., 3., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fnmadd_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fnmadd_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m128d(r, a); + let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, + ); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fnmadd_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, + ); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fnmadd_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m128d(r, c); + let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, + ); + let e = _mm_set_pd(3., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fnmsub_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_set_ps(1., 1., 1., -5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fnmsub_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m128(r, a); + let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, + ); + let e = _mm_set_ps(1., 1., 1., -5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fnmsub_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_set_ps(1., 1., 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, + ); + let e = _mm_set_ps(1., 1., 1., -5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fnmsub_round_ss() { + let a = _mm_set1_ps(1.); + let b = _mm_set1_ps(2.); + let c = _mm_set1_ps(3.); + let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m128(r, c); + let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, + ); + let e = _mm_set_ps(3., 3., 3., -5.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fnmsub_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_set_pd(1., -5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fnmsub_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m128d(r, a); + let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, + ); + let e = _mm_set_pd(1., -5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fnmsub_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, + ); + let e = _mm_set_pd(1., -5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask3_fnmsub_round_sd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let c = _mm_set1_pd(3.); + let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m128d(r, c); + let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, + ); + let e = _mm_set_pd(3., -5.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fixupimm_ss() { + let a = _mm_set_ps(0., 0., 0., f32::NAN); + let b = _mm_set1_ps(f32::MAX); + let c = _mm_set1_epi32(i32::MAX); + let r = _mm_fixupimm_ss::<5>(a, b, c); + let e = _mm_set_ps(0., 0., 0., -0.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fixupimm_ss() { + let a = _mm_set_ps(0., 0., 0., f32::NAN); + let b = _mm_set1_ps(f32::MAX); + let c = _mm_set1_epi32(i32::MAX); + let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c); + let e = _mm_set_ps(0., 0., 0., -0.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fixupimm_ss() { + let a = _mm_set_ps(0., 0., 0., f32::NAN); + let b = _mm_set1_ps(f32::MAX); + let c = _mm_set1_epi32(i32::MAX); + let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c); + let e = _mm_set_ps(0., 0., 0., 0.0); + assert_eq_m128(r, e); + let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c); + let e = _mm_set_ps(0., 0., 0., -0.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fixupimm_sd() { + let a = _mm_set_pd(0., f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_fixupimm_sd::<5>(a, b, c); + let e = _mm_set_pd(0., -0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fixupimm_sd() { + let a = _mm_set_pd(0., f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c); + let e = _mm_set_pd(0., -0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fixupimm_sd() { + let a = _mm_set_pd(0., f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c); + let e = _mm_set_pd(0., 0.0); + assert_eq_m128d(r, e); + let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c); + let e = _mm_set_pd(0., -0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fixupimm_round_ss() { + let a = _mm_set_ps(1., 0., 0., f32::NAN); + let b = _mm_set1_ps(f32::MAX); + let c = _mm_set1_epi32(i32::MAX); + let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c); + let e = _mm_set_ps(1., 0., 0., -0.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fixupimm_round_ss() { + let a = _mm_set_ps(0., 0., 0., f32::NAN); + let b = _mm_set1_ps(f32::MAX); + let c = _mm_set1_epi32(i32::MAX); + let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c); + let e = _mm_set_ps(0., 0., 0., -0.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fixupimm_round_ss() { + let a = _mm_set_ps(0., 0., 0., f32::NAN); + let b = _mm_set1_ps(f32::MAX); + let c = _mm_set1_epi32(i32::MAX); + let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c); + let e = _mm_set_ps(0., 0., 0., 0.0); + assert_eq_m128(r, e); + let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c); + let e = _mm_set_ps(0., 0., 0., -0.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_fixupimm_round_sd() { + let a = _mm_set_pd(0., f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c); + let e = _mm_set_pd(0., -0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_fixupimm_round_sd() { + let a = _mm_set_pd(0., f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c); + let e = _mm_set_pd(0., -0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_fixupimm_round_sd() { + let a = _mm_set_pd(0., f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c); + let e = _mm_set_pd(0., 0.0); + assert_eq_m128d(r, e); + let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c); + let e = _mm_set_pd(0., -0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cvtss_sd() { + let a = _mm_set_pd(6., -7.5); + let b = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_mask_cvtss_sd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b); + let e = _mm_set_pd(6., -1.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_cvtss_sd() { + let a = _mm_set_pd(6., -7.5); + let b = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_maskz_cvtss_sd(0, a, b); + let e = _mm_set_pd(6., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_cvtss_sd(0b11111111, a, b); + let e = _mm_set_pd(6., -1.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cvtsd_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b = _mm_set_pd(6., -7.5); + let r = _mm_mask_cvtsd_ss(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b); + let e = _mm_set_ps(0., -0.5, 1., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_cvtsd_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvtsd_ss(0, a, b); + let e = _mm_set_ps(0., -0.5, 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_cvtsd_ss(0b11111111, a, b); + let e = _mm_set_ps(0., -0.5, 1., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_sd() { + let a = _mm_set_pd(6., -7.5); + let b = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm_set_pd(6., -1.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cvt_roundss_sd() { + let a = _mm_set_pd(6., -7.5); + let b = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); + let e = _mm_set_pd(6., -1.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_cvt_roundss_sd() { + let a = _mm_set_pd(6., -7.5); + let b = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + let e = _mm_set_pd(6., 0.); + assert_eq_m128d(r, e); + let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); + let e = _mm_set_pd(6., -1.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b = _mm_set_pd(6., -7.5); + let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cvt_roundsd_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b = _mm_set_pd(6., -7.5); + let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b); + assert_eq_m128(r, a); + let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + a, 0b11111111, a, b, + ); + let e = _mm_set_ps(0., -0.5, 1., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_maskz_cvt_roundsd_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_ps(0., -0.5, 1., 0.); + assert_eq_m128(r, e); + let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, + ); + let e = _mm_set_ps(0., -0.5, 1., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_si32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_i32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_u32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: u32 = u32::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtss_i32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtss_i32(a); + let e: i32 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtss_u32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtss_u32(a); + let e: u32 = u32::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_si32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_i32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_u32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: u32 = u32::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtsd_i32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtsd_i32(a); + let e: i32 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtsd_u32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtsd_u32(a); + let e: u32 = u32::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundi32_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i32 = 9; + let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsi32_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i32 = 9; + let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundu32_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: u32 = 9; + let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvti32_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i32 = 9; + let r = _mm_cvti32_ss(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvti32_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i32 = 9; + let r = _mm_cvti32_sd(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_si32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_i32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_u32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a); + let e: u32 = u32::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttss_i32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvttss_i32(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttss_u32() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvttss_u32(a); + let e: u32 = u32::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_si32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_i32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_u32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a); + let e: u32 = u32::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttsd_i32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvttsd_i32(a); + let e: i32 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttsd_u32() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvttsd_u32(a); + let e: u32 = u32::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtu32_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: u32 = 9; + let r = _mm_cvtu32_ss(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtu32_sd() { + let a = _mm_set_pd(1., -1.5); + let b: u32 = 9; + let r = _mm_cvtu32_sd(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_comi_round_ss() { + let a = _mm_set1_ps(2.2); + let b = _mm_set1_ps(1.1); + let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b); + let e: i32 = 0; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_comi_round_sd() { + let a = _mm_set1_pd(2.2); + let b = _mm_set1_pd(1.1); + let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b); + let e: i32 = 0; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsi512_si32() { + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_cvtsi512_si32(a); + let e: i32 = 1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtss_f32() { + let a = _mm512_setr_ps( + 312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., + ); + assert_eq!(_mm512_cvtss_f32(a), 312.0134); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsd_f64() { + let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8)); + assert_eq!(r, -1.1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_pd() { + let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.); + let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.); + let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b); + let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_pd() { + let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.); + let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.); + let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b); + let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_pd() { + let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.); + let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.); + let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b); + let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expandloadu_epi32() { + let src = _mm512_set1_epi32(42); + let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p)); + let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expandloadu_epi32() { + let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_maskz_expandloadu_epi32(m, black_box(p)); + let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expandloadu_epi32() { + let src = _mm256_set1_epi32(42); + let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p)); + let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expandloadu_epi32() { + let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm256_maskz_expandloadu_epi32(m, black_box(p)); + let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expandloadu_epi32() { + let src = _mm_set1_epi32(42); + let a = &[1_i32, 2, 3, 4]; + let p = a.as_ptr(); + let m = 0b11111000; + let r = _mm_mask_expandloadu_epi32(src, m, black_box(p)); + let e = _mm_set_epi32(1, 42, 42, 42); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expandloadu_epi32() { + let a = &[1_i32, 2, 3, 4]; + let p = a.as_ptr(); + let m = 0b11111000; + let r = _mm_maskz_expandloadu_epi32(m, black_box(p)); + let e = _mm_set_epi32(1, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expandloadu_epi64() { + let src = _mm512_set1_epi64(42); + let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p)); + let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expandloadu_epi64() { + let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm512_maskz_expandloadu_epi64(m, black_box(p)); + let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expandloadu_epi64() { + let src = _mm256_set1_epi64x(42); + let a = &[1_i64, 2, 3, 4]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p)); + let e = _mm256_set_epi64x(1, 42, 42, 42); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expandloadu_epi64() { + let a = &[1_i64, 2, 3, 4]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm256_maskz_expandloadu_epi64(m, black_box(p)); + let e = _mm256_set_epi64x(1, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expandloadu_epi64() { + let src = _mm_set1_epi64x(42); + let a = &[1_i64, 2]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm_mask_expandloadu_epi64(src, m, black_box(p)); + let e = _mm_set_epi64x(42, 42); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expandloadu_epi64() { + let a = &[1_i64, 2]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm_maskz_expandloadu_epi64(m, black_box(p)); + let e = _mm_set_epi64x(0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expandloadu_ps() { + let src = _mm512_set1_ps(42.); + let a = &[ + 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_mask_expandloadu_ps(src, m, black_box(p)); + let e = _mm512_set_ps( + 8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expandloadu_ps() { + let a = &[ + 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm512_maskz_expandloadu_ps(m, black_box(p)); + let e = _mm512_set_ps( + 8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expandloadu_ps() { + let src = _mm256_set1_ps(42.); + let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm256_mask_expandloadu_ps(src, m, black_box(p)); + let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expandloadu_ps() { + let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm256_maskz_expandloadu_ps(m, black_box(p)); + let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expandloadu_ps() { + let src = _mm_set1_ps(42.); + let a = &[1.0f32, 2., 3., 4.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm_mask_expandloadu_ps(src, m, black_box(p)); + let e = _mm_set_ps(1., 42., 42., 42.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expandloadu_ps() { + let a = &[1.0f32, 2., 3., 4.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm_maskz_expandloadu_ps(m, black_box(p)); + let e = _mm_set_ps(1., 0., 0., 0.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expandloadu_pd() { + let src = _mm512_set1_pd(42.); + let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm512_mask_expandloadu_pd(src, m, black_box(p)); + let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expandloadu_pd() { + let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm512_maskz_expandloadu_pd(m, black_box(p)); + let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expandloadu_pd() { + let src = _mm256_set1_pd(42.); + let a = &[1.0f64, 2., 3., 4.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm256_mask_expandloadu_pd(src, m, black_box(p)); + let e = _mm256_set_pd(1., 42., 42., 42.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expandloadu_pd() { + let a = &[1.0f64, 2., 3., 4.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm256_maskz_expandloadu_pd(m, black_box(p)); + let e = _mm256_set_pd(1., 0., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expandloadu_pd() { + let src = _mm_set1_pd(42.); + let a = &[1.0f64, 2.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm_mask_expandloadu_pd(src, m, black_box(p)); + let e = _mm_set_pd(42., 42.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expandloadu_pd() { + let a = &[1.0f64, 2.]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm_maskz_expandloadu_pd(m, black_box(p)); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs new file mode 100644 index 000000000000..0a81a0581f97 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs @@ -0,0 +1,27263 @@ +use crate::arch::asm; +use crate::core_arch::{simd::*, x86::*}; +use crate::intrinsics::{fmaf16, simd::*}; +use crate::ptr; + +/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_set_ph( + e7: f16, + e6: f16, + e5: f16, + e4: f16, + e3: f16, + e2: f16, + e1: f16, + e0: f16, +) -> __m128h { + __m128h([e0, e1, e2, e3, e4, e5, e6, e7]) +} + +/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_set_ph( + e15: f16, + e14: f16, + e13: f16, + e12: f16, + e11: f16, + e10: f16, + e9: f16, + e8: f16, + e7: f16, + e6: f16, + e5: f16, + e4: f16, + e3: f16, + e2: f16, + e1: f16, + e0: f16, +) -> __m256h { + __m256h([ + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ]) +} + +/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_set_ph( + e31: f16, + e30: f16, + e29: f16, + e28: f16, + e27: f16, + e26: f16, + e25: f16, + e24: f16, + e23: f16, + e22: f16, + e21: f16, + e20: f16, + e19: f16, + e18: f16, + e17: f16, + e16: f16, + e15: f16, + e14: f16, + e13: f16, + e12: f16, + e11: f16, + e10: f16, + e9: f16, + e8: f16, + e7: f16, + e6: f16, + e5: f16, + e4: f16, + e3: f16, + e2: f16, + e1: f16, + e0: f16, +) -> __m512h { + __m512h([ + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, + e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, + ]) +} + +/// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero +/// the upper 7 elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_set_sh(a: f16) -> __m128h { + __m128h([a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) +} + +/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_set1_ph(a: f16) -> __m128h { + unsafe { transmute(f16x8::splat(a)) } +} + +/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_set1_ph(a: f16) -> __m256h { + unsafe { transmute(f16x16::splat(a)) } +} + +/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_set1_ph(a: f16) -> __m512h { + unsafe { transmute(f16x32::splat(a)) } +} + +/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_setr_ph( + e0: f16, + e1: f16, + e2: f16, + e3: f16, + e4: f16, + e5: f16, + e6: f16, + e7: f16, +) -> __m128h { + __m128h([e0, e1, e2, e3, e4, e5, e6, e7]) +} + +/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_setr_ph( + e0: f16, + e1: f16, + e2: f16, + e3: f16, + e4: f16, + e5: f16, + e6: f16, + e7: f16, + e8: f16, + e9: f16, + e10: f16, + e11: f16, + e12: f16, + e13: f16, + e14: f16, + e15: f16, +) -> __m256h { + __m256h([ + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ]) +} + +/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_setr_ph( + e0: f16, + e1: f16, + e2: f16, + e3: f16, + e4: f16, + e5: f16, + e6: f16, + e7: f16, + e8: f16, + e9: f16, + e10: f16, + e11: f16, + e12: f16, + e13: f16, + e14: f16, + e15: f16, + e16: f16, + e17: f16, + e18: f16, + e19: f16, + e20: f16, + e21: f16, + e22: f16, + e23: f16, + e24: f16, + e25: f16, + e26: f16, + e27: f16, + e28: f16, + e29: f16, + e30: f16, + e31: f16, +) -> __m512h { + __m512h([ + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, + e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, + ]) +} + +/// Return vector of type __m128h with all elements set to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_setzero_ph() -> __m128h { + unsafe { transmute(f16x8::ZERO) } +} + +/// Return vector of type __m256h with all elements set to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_setzero_ph() -> __m256h { + f16x16::ZERO.as_m256h() +} + +/// Return vector of type __m512h with all elements set to zero. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_setzero_ph() -> __m512h { + f16x32::ZERO.as_m512h() +} + +/// Return vector of type `__m128h` with indetermination elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit). +/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_undefined_ph() -> __m128h { + f16x8::ZERO.as_m128h() +} + +/// Return vector of type `__m256h` with indetermination elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit). +/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_undefined_ph() -> __m256h { + f16x16::ZERO.as_m256h() +} + +/// Return vector of type `__m512h` with indetermination elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit). +/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_undefined_ph() -> __m512h { + f16x32::ZERO.as_m512h() +} + +/// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_castpd_ph(a: __m128d) -> __m128h { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_castpd_ph(a: __m256d) -> __m256h { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castpd_ph(a: __m512d) -> __m512h { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_castph_pd(a: __m128h) -> __m128d { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_castph_pd(a: __m256h) -> __m256d { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castph_pd(a: __m512h) -> __m512d { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_castps_ph(a: __m128) -> __m128h { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_castps_ph(a: __m256) -> __m256h { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castps_ph(a: __m512) -> __m512h { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_castph_ps(a: __m128h) -> __m128 { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_castph_ps(a: __m256h) -> __m256 { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castph_ps(a: __m512h) -> __m512 { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_castsi128_ph(a: __m128i) -> __m128h { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_castph_si128(a: __m128h) -> __m128i { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_castph_si256(a: __m256h) -> __m256i { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castph_si512(a: __m512h) -> __m512i { + unsafe { transmute(a) } +} + +/// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +} + +/// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +} + +/// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and +/// does not generate any instructions, thus it has zero latency. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +} + +/// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined. +/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction, +/// but most of the time it does not generate any instructions. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h { + unsafe { + simd_shuffle!( + a, + _mm_undefined_ph(), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] + ) + } +} + +/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined. +/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction, +/// but most of the time it does not generate any instructions. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h { + unsafe { + simd_shuffle!( + a, + _mm_undefined_ph(), + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8 + ] + ) + } +} + +/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined. +/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction, +/// but most of the time it does not generate any instructions. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h { + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ph(), + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16 + ] + ) + } +} + +/// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed. +/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate +/// any instructions. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h { + unsafe { + simd_shuffle!( + a, + _mm_setzero_ph(), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] + ) + } +} + +/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed. +/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate +/// any instructions. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h { + unsafe { + simd_shuffle!( + a, + _mm256_setzero_ph(), + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16 + ] + ) + } +} + +/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed. +/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate +/// any instructions. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h { + unsafe { + simd_shuffle!( + a, + _mm_setzero_ph(), + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8 + ] + ) + } +} + +macro_rules! cmp_asm { // FIXME: use LLVM intrinsics + ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{ + let dst: $mask_type; + asm!( + "vcmpph {k}, {a}, {b}, {imm8}", + k = lateout(kreg) dst, + a = in($reg) $a, + b = in($reg) $b, + imm8 = const IMM5, + options(pure, nomem, nostack) + ); + dst + }}; + ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{ + let dst: $mask_type; + asm!( + "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}", + k = lateout(kreg) dst, + mask = in(kreg) $mask, + a = in($reg) $a, + b = in($reg) $b, + imm8 = const IMM5, + options(pure, nomem, nostack) + ); + dst + }}; +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cmp_ph_mask(a: __m128h, b: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask8, xmm_reg, a, b) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are +/// zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cmp_ph_mask(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask8, k1, xmm_reg, a, b) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cmp_ph_mask(a: __m256h, b: __m256h) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask16, ymm_reg, a, b) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are +/// zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cmp_ph_mask( + k1: __mmask16, + a: __m256h, + b: __m256h, +) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask16, k1, ymm_reg, a, b) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the results in mask vector k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cmp_ph_mask(a: __m512h, b: __m512h) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask32, zmm_reg, a, b) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are +/// zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cmp_ph_mask( + k1: __mmask32, + a: __m512h, + b: __m512h, +) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask32, k1, zmm_reg, a, b) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the results in mask vector k. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cmp_round_ph_mask( + a: __m512h, + b: __m512h, +) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_sae!(SAE); + if SAE == _MM_FROUND_NO_EXC { + let dst: __mmask32; + asm!( + "vcmpph {k}, {a}, {b}, {{sae}}, {imm8}", + k = lateout(kreg) dst, + a = in(zmm_reg) a, + b = in(zmm_reg) b, + imm8 = const IMM5, + options(pure, nomem, nostack) + ); + dst + } else { + cmp_asm!(__mmask32, zmm_reg, a, b) + } + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are +/// zeroed out when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cmp_round_ph_mask( + k1: __mmask32, + a: __m512h, + b: __m512h, +) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_sae!(SAE); + if SAE == _MM_FROUND_NO_EXC { + let dst: __mmask32; + asm!( + "vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}", + k = lateout(kreg) dst, + k1 = in(kreg) k1, + a = in(zmm_reg) a, + b = in(zmm_reg) b, + imm8 = const IMM5, + options(pure, nomem, nostack) + ); + dst + } else { + cmp_asm!(__mmask32, k1, zmm_reg, a, b) + } + } +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by +/// passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cmp_round_sh_mask(a: __m128h, b: __m128h) -> __mmask8 { + static_assert_uimm_bits!(IMM5, 5); + static_assert_sae!(SAE); + _mm_mask_cmp_round_sh_mask::(0xff, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be +/// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cmp_round_sh_mask( + k1: __mmask8, + a: __m128h, + b: __m128h, +) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_sae!(SAE); + vcmpsh(a, b, IMM5, k1, SAE) + } +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the result in mask vector k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cmp_sh_mask(a: __m128h, b: __m128h) -> __mmask8 { + static_assert_uimm_bits!(IMM5, 5); + _mm_cmp_round_sh_mask::(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and store the result in mask vector k using zeromask k1. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cmp_sh_mask(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 { + static_assert_uimm_bits!(IMM5, 5); + _mm_mask_cmp_round_sh_mask::(k1, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and return the boolean result (0 or 1). +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_comi_round_sh(a: __m128h, b: __m128h) -> i32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_sae!(SAE); + vcomish(a, b, IMM5, SAE) + } +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison +/// operand specified by imm8, and return the boolean result (0 or 1). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_comi_sh(a: __m128h, b: __m128h) -> i32 { + static_assert_uimm_bits!(IMM5, 5); + _mm_comi_round_sh::(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return +/// the boolean result (0 or 1). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_EQ_OS>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal, +/// and return the boolean result (0 or 1). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_GE_OS>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return +/// the boolean result (0 or 1). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_GT_OS>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and +/// return the boolean result (0 or 1). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_LE_OS>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return +/// the boolean result (0 or 1). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_LT_OS>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return +/// the boolean result (0 or 1). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_NEQ_OS>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and +/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_EQ_OQ>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal, +/// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_GE_OQ>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return +/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_GT_OQ>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and +/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_LE_OQ>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return +/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_LT_OQ>(a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return +/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 { + _mm_comi_sh::<_CMP_NEQ_OQ>(a, b) +} + +/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into +/// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h { + *mem_addr.cast() +} + +/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into +/// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h { + *mem_addr.cast() +} + +/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into +/// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h { + *mem_addr.cast() +} + +/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector, +/// and zero the upper elements +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h { + _mm_set_sh(*mem_addr) +} + +/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector +/// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h { + let mut dst = src; + asm!( + vpl!("vmovsh {dst}{{{k}}}"), + dst = inout(xmm_reg) dst, + k = in(kreg) k, + p = in(reg) mem_addr, + options(pure, readonly, nostack, preserves_flags) + ); + dst +} + +/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector +/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h { + let mut dst: __m128h; + asm!( + vpl!("vmovsh {dst}{{{k}}}{{z}}"), + dst = out(xmm_reg) dst, + k = in(kreg) k, + p = in(reg) mem_addr, + options(pure, readonly, nostack, preserves_flags) + ); + dst +} + +/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into +/// a new vector. The address does not need to be aligned to any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h { + ptr::read_unaligned(mem_addr.cast()) +} + +/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into +/// a new vector. The address does not need to be aligned to any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h { + ptr::read_unaligned(mem_addr.cast()) +} + +/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into +/// a new vector. The address does not need to be aligned to any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h { + ptr::read_unaligned(mem_addr.cast()) +} + +/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst +/// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper +/// 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let mut mov: f16 = simd_extract!(src, 0); + if (k & 1) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) + } +} + +/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst +/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed +/// elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let mut mov: f16 = 0.; + if (k & 1) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) + } +} + +/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst, +/// and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h { + unsafe { + let mov: f16 = simd_extract!(b, 0); + simd_insert!(a, 0, mov) + } +} + +/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory. +/// The address must be aligned to 16 bytes or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) { + *mem_addr.cast() = a; +} + +/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory. +/// The address must be aligned to 32 bytes or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) { + *mem_addr.cast() = a; +} + +/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory. +/// The address must be aligned to 64 bytes or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) { + *mem_addr.cast() = a; +} + +/// Store the lower half-precision (16-bit) floating-point element from a into memory. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) { + *mem_addr = simd_extract!(a, 0); +} + +/// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) { + asm!( + vps!("vmovdqu16", "{{{k}}}, {src}"), + p = in(reg) mem_addr, + k = in(kreg) k, + src = in(xmm_reg) a, + options(nostack, preserves_flags) + ); +} + +/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory. +/// The address does not need to be aligned to any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) { + ptr::write_unaligned(mem_addr.cast(), a); +} + +/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory. +/// The address does not need to be aligned to any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) { + ptr::write_unaligned(mem_addr.cast(), a); +} + +/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory. +/// The address does not need to be aligned to any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) { + ptr::write_unaligned(mem_addr.cast(), a); +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vaddph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_add(a, b) } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vaddph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_add_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vaddph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_add_ph(a, b); + simd_select_bitmask(k, r, _mm_setzero_ph()) + } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vaddph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_add(a, b) } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vaddph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_add_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vaddph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_add_ph(a, b); + simd_select_bitmask(k, r, _mm256_setzero_ph()) + } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_add(a, b) } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_add_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_add_ph(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_add_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vaddph(a, b, ROUNDING) + } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_add_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_add_round_ph::(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_add_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_add_round_ph::(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } +} + +/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_add_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_add_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_add_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vaddsh(a, b, src, k, ROUNDING) + } +} + +/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_add_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_add_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) +} + +/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vaddsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsubph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_sub(a, b) } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsubph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_sub_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsubph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_sub_ph(a, b); + simd_select_bitmask(k, r, _mm_setzero_ph()) + } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsubph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_sub(a, b) } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsubph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_sub_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsubph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_sub_ph(a, b); + simd_select_bitmask(k, r, _mm256_setzero_ph()) + } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_sub(a, b) } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_sub_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_sub_ph(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_sub_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vsubph(a, b, ROUNDING) + } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_sub_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_sub_round_ph::(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_sub_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_sub_round_ph::(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } +} + +/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_sub_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_sub_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_sub_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vsubsh(a, b, src, k, ROUNDING) + } +} + +/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_sub_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_sub_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) +} + +/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsubsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmulph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_mul(a, b) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmulph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_mul_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmulph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_mul_ph(a, b); + simd_select_bitmask(k, r, _mm_setzero_ph()) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmulph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_mul(a, b) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmulph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_mul_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmulph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_mul_ph(a, b); + simd_select_bitmask(k, r, _mm256_setzero_ph()) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_mul(a, b) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_mul_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_mul_ph(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mul_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vmulph(a, b, ROUNDING) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_mul_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_mul_round_ph::(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_mul_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_mul_round_ph::(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mul_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_mul_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_mul_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vmulsh(a, b, src, k, ROUNDING) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_mul_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_mul_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmulsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vdivph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_div(a, b) } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vdivph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_div_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vdivph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_div_ph(a, b); + simd_select_bitmask(k, r, _mm_setzero_ph()) + } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vdivph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_div(a, b) } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vdivph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_div_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vdivph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_div_ph(a, b); + simd_select_bitmask(k, r, _mm256_setzero_ph()) + } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_div(a, b) } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_div_ph(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_div_ph(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_div_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vdivph(a, b, ROUNDING) + } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using +/// writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_div_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_div_round_ph::(a, b); + simd_select_bitmask(k, r, src) + } +} + +/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using +/// zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_div_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_div_round_ph::(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } +} + +/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_div_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_div_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_div_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vdivsh(a, b, src, k, ROUNDING) + } +} + +/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_div_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_div_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) +} + +/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the +/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vdivsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_mul_pch(_mm_undefined_ph(), 0xff, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { transmute(vfmulcph_128(transmute(a), transmute(b), transmute(src), k)) } +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_mul_pch(_mm_setzero_ph(), k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h { + _mm256_mask_mul_pch(_mm256_undefined_ph(), 0xff, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + unsafe { transmute(vfmulcph_256(transmute(a), transmute(b), transmute(src), k)) } +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + _mm256_mask_mul_pch(_mm256_setzero_ph(), k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_mul_pch(_mm512_undefined_ph(), 0xffff, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_mul_pch(_mm512_setzero_ph(), k, a, b) +} + +/// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mul_round_pch(a: __m512h, b: __m512h) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_mul_round_pch::(_mm512_undefined_ph(), 0xffff, a, b) +} + +/// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_mul_round_pch( + src: __m512h, + k: __mmask16, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmulcph_512( + transmute(a), + transmute(b), + transmute(src), + k, + ROUNDING, + )) + } +} + +/// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_mul_round_pch( + k: __mmask16, + a: __m512h, + b: __m512h, +) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_mul_round_pch::(_mm512_setzero_ph(), k, a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst, +/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_mul_sch(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed +/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements +/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_mul_sch(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst, +/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mul_round_sch(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_mul_round_sch::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using +/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed +/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_mul_round_sch( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmulcsh( + transmute(a), + transmute(b), + transmute(src), + k, + ROUNDING, + )) + } +} + +/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using +/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements +/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_mul_round_sch( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_mul_round_sch::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h { + _mm_mul_pch(a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_mul_pch(src, k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_maskz_mul_pch(k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h { + _mm256_mul_pch(a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + _mm256_mask_mul_pch(src, k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + _mm256_maskz_mul_pch(k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed +/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h { + _mm512_mul_pch(a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_mul_pch(src, k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { + _mm512_maskz_mul_pch(k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed +/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmul_round_pch(a: __m512h, b: __m512h) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mul_round_pch::(a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmul_round_pch( + src: __m512h, + k: __mmask16, + a: __m512h, + b: __m512h, +) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_mul_round_pch::(src, k, a, b) +} + +/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmul_round_pch( + k: __mmask16, + a: __m512h, + b: __m512h, +) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_maskz_mul_round_pch::(k, a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h { + _mm_mul_sch(a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_mul_sch(src, k, a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_maskz_mul_sch(k, a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed +/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmul_round_sch(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mul_round_sch::(a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element +/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmul_round_sch( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_mul_round_sch::(src, k, a, b) +} + +/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmul_round_sch( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_maskz_mul_round_sch::(k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_cmul_pch(_mm_undefined_ph(), 0xff, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { transmute(vfcmulcph_128(transmute(a), transmute(b), transmute(src), k)) } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_cmul_pch(_mm_setzero_ph(), k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h { + _mm256_mask_cmul_pch(_mm256_undefined_ph(), 0xff, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + unsafe { transmute(vfcmulcph_256(transmute(a), transmute(b), transmute(src), k)) } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + _mm256_mask_cmul_pch(_mm256_setzero_ph(), k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_cmul_pch(_mm512_undefined_ph(), 0xffff, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_cmul_pch(_mm512_setzero_ph(), k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cmul_round_pch(a: __m512h, b: __m512h) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cmul_round_pch::(_mm512_undefined_ph(), 0xffff, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cmul_round_pch( + src: __m512h, + k: __mmask16, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmulcph_512( + transmute(a), + transmute(b), + transmute(src), + k, + ROUNDING, + )) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cmul_round_pch( + k: __mmask16, + a: __m512h, + b: __m512h, +) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cmul_round_pch::(_mm512_setzero_ph(), k, a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_cmul_sch(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_cmul_sch(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cmul_round_sch(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_cmul_round_sch::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cmul_round_sch( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmulcsh( + transmute(a), + transmute(b), + transmute(src), + k, + ROUNDING, + )) + } +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cmul_round_sch( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_cmul_round_sch::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h { + _mm_cmul_pch(a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_cmul_pch(src, k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_maskz_cmul_pch(k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h { + _mm256_cmul_pch(a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + _mm256_mask_cmul_pch(src, k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + _mm256_maskz_cmul_pch(k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h { + _mm512_cmul_pch(a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_cmul_pch(src, k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { + _mm512_maskz_cmul_pch(k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fcmul_round_pch(a: __m512h, b: __m512h) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_cmul_round_pch::(a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fcmul_round_pch( + src: __m512h, + k: __mmask16, + a: __m512h, + b: __m512h, +) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cmul_round_pch::(src, k, a, b) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and +/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fcmul_round_pch( + k: __mmask16, + a: __m512h, + b: __m512h, +) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_maskz_cmul_round_pch::(k, a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h { + _mm_cmul_sch(a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_cmul_sch(src, k, a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_maskz_cmul_sch(k, a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fcmul_round_sch(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_cmul_round_sch::(a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fcmul_round_sch( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_cmul_round_sch::(src, k, a, b) +} + +/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, +/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fcmul_round_sch( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_maskz_cmul_round_sch::(k, a, b) +} + +/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing +/// the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_abs_ph(v2: __m128h) -> __m128h { + unsafe { transmute(_mm_and_si128(transmute(v2), _mm_set1_epi16(i16::MAX))) } +} + +/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing +/// the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_abs_ph(v2: __m256h) -> __m256h { + unsafe { transmute(_mm256_and_si256(transmute(v2), _mm256_set1_epi16(i16::MAX))) } +} + +/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing +/// the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_abs_ph(v2: __m512h) -> __m512h { + unsafe { transmute(_mm512_and_si512(transmute(v2), _mm512_set1_epi16(i16::MAX))) } +} + +/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex +/// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines +/// the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate +/// `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_conj_pch(a: __m128h) -> __m128h { + unsafe { transmute(_mm_xor_si128(transmute(a), _mm_set1_epi32(i32::MIN))) } +} + +/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k +/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two +/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number +/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { + let r: __m128 = transmute(_mm_conj_pch(a)); + transmute(simd_select_bitmask(k, r, transmute(src))) + } +} + +/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k +/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h { + _mm_mask_conj_pch(_mm_setzero_ph(), k, a) +} + +/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number +/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_conj_pch(a: __m256h) -> __m256h { + unsafe { transmute(_mm256_xor_si256(transmute(a), _mm256_set1_epi32(i32::MIN))) } +} + +/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k +/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two +/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h { + unsafe { + let r: __m256 = transmute(_mm256_conj_pch(a)); + transmute(simd_select_bitmask(k, r, transmute(src))) + } +} + +/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k +/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h { + _mm256_mask_conj_pch(_mm256_setzero_ph(), k, a) +} + +/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number +/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_conj_pch(a: __m512h) -> __m512h { + unsafe { transmute(_mm512_xor_si512(transmute(a), _mm512_set1_epi32(i32::MIN))) } +} + +/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k +/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two +/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h { + unsafe { + let r: __m512 = transmute(_mm512_conj_pch(a)); + transmute(simd_select_bitmask(k, r, transmute(src))) + } +} + +/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k +/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h { + _mm512_mask_conj_pch(_mm512_setzero_ph(), k, a) +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + _mm_mask3_fmadd_pch(a, b, c, 0xff) +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using writemask k (the element is copied from a when the corresponding +/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let r: __m128 = transmute(_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using writemask k (the element is copied from c when the corresponding +/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + transmute(vfmaddcph_mask3_128( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask +/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point +/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + transmute(vfmaddcph_maskz_128( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + _mm256_mask3_fmadd_pch(a, b, c, 0xff) +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask +/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point +/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { + unsafe { + let r: __m256 = transmute(_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using writemask k (the element is copied from c when the corresponding +/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { + unsafe { + transmute(vfmaddcph_mask3_256( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask +/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point +/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { + transmute(vfmaddcph_maskz_256( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask +/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point +/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { + _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using writemask k (the element is copied from c when the corresponding +/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { + _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask +/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point +/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmadd_round_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask3_fmadd_round_pch::(a, b, c, 0xffff) +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask +/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point +/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmadd_round_pch( + a: __m512h, + k: __mmask16, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r: __m512 = transmute(_mm512_mask3_fmadd_round_pch::(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using writemask k (the element is copied from c when the corresponding +/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmadd_round_pch( + a: __m512h, + b: __m512h, + c: __m512h, + k: __mmask16, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmaddcph_mask3_512( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } +} + +/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, +/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask +/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point +/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmadd_round_pch( + k: __mmask16, + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmaddcph_maskz_512( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } +} + +/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and +/// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the +/// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) +} + +/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and +/// store the result in the lower elements of dst using writemask k (elements are copied from a when +/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, +/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) +} + +/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and +/// store the result in the lower elements of dst using writemask k (elements are copied from c when +/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, +/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) +} + +/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and +/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask +/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each +/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) +} + +/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and +/// store the result in the lower elements of dst. Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmadd_round_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmaddcsh_mask( + transmute(a), + transmute(b), + transmute(c), + 0xff, + ROUNDING, + )) + } +} + +/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and +/// store the result in the lower elements of dst using writemask k (elements are copied from a when +/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, +/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmadd_round_sch( + a: __m128h, + k: __mmask8, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let a = transmute(a); + let r = vfmaddcsh_mask(a, transmute(b), transmute(c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does + transmute(_mm_mask_move_ss(a, k, a, r)) + } +} + +/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and +/// store the result in the lower elements of dst using writemask k (elements are copied from c when +/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. +/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, +/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmadd_round_sch( + a: __m128h, + b: __m128h, + c: __m128h, + k: __mmask8, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let c = transmute(c); + let r = vfmaddcsh_mask(transmute(a), transmute(b), c, k, ROUNDING); + transmute(_mm_move_ss(c, r)) + } +} + +/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and +/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask +/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each +/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which +/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmadd_round_sch( + k: __mmask8, + a: __m128h, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmaddcsh_maskz( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed +/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number +/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + _mm_mask3_fcmadd_pch(a, b, c, 0xff) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is +/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let r: __m128 = transmute(_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is +/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + transmute(vfcmaddcph_mask3_128( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is +/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + transmute(vfcmaddcph_maskz_128( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed +/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number +/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + _mm256_mask3_fcmadd_pch(a, b, c, 0xff) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is +/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { + unsafe { + let r: __m256 = transmute(_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is +/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { + unsafe { + transmute(vfcmaddcph_mask3_256( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is +/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { + transmute(vfcmaddcph_maskz_256( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed +/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number +/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is +/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { + _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is +/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { + _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is +/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed +/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number +/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fcmadd_round_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask3_fcmadd_round_pch::(a, b, c, 0xffff) +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is +/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fcmadd_round_pch( + a: __m512h, + k: __mmask16, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + let r: __m512 = transmute(_mm512_mask3_fcmadd_round_pch::(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding +/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex +/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fcmadd_round_pch( + a: __m512h, + b: __m512h, + c: __m512h, + k: __mmask16, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmaddcph_mask3_512( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } +} + +/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate +/// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding +/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision +/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex +/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fcmadd_round_pch( + k: __mmask16, + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmaddcph_maskz_512( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } +} + +/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, +/// accumulate to the lower complex number in c, and store the result in the lower elements of dst, +/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) +} + +/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, +/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using +/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper +/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) +} + +/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, +/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using +/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper +/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) +} + +/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, +/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using +/// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper +/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) +} + +/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, +/// accumulate to the lower complex number in c, and store the result in the lower elements of dst, +/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is +/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex +/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fcmadd_round_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmaddcsh_mask( + transmute(a), + transmute(b), + transmute(c), + 0xff, + ROUNDING, + )) + } +} + +/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, +/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using +/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper +/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fcmadd_round_sch( + a: __m128h, + k: __mmask8, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let a = transmute(a); + let r = vfcmaddcsh_mask(a, transmute(b), transmute(c), k, ROUNDING); + transmute(_mm_mask_move_ss(a, k, a, r)) + } +} + +/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, +/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using +/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper +/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent +/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, +/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fcmadd_round_sch( + a: __m128h, + b: __m128h, + c: __m128h, + k: __mmask8, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let c = transmute(c); + let r = vfcmaddcsh_mask(transmute(a), transmute(b), c, k, ROUNDING); + transmute(_mm_move_ss(c, r)) + } +} + +/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, +/// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding +/// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements +/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit) +/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex +/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fcmadd_round_sch( + k: __mmask8, + a: __m128h, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmaddcsh_maskz( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_fma(a, b, c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), _mm_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_fma(a, b, c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), _mm256_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_fma(a, b, c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), _mm512_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmadd_round_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddph_512(a, b, c, ROUNDING) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmadd_round_ph( + a: __m512h, + k: __mmask32, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmadd_round_ph::(a, b, c), a) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmadd_round_ph( + a: __m512h, + b: __m512h, + c: __m512h, + k: __mmask32, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmadd_round_ph::(a, b, c), c) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate +/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmadd_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fmadd_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate +/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper +/// 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = fmaf16(extracta, extractb, extractc); + simd_insert!(a, 0, r) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate +/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fmadd: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmadd = fmaf16(fmadd, extractb, extractc); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate +/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + let mut fmadd: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fmadd = fmaf16(extracta, extractb, fmadd); + } + simd_insert!(c, 0, fmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate +/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fmadd: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmadd = fmaf16(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate +/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper +/// 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmadd_round_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = vfmaddsh(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, r) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate +/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmadd_round_sh( + a: __m128h, + k: __mmask8, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmadd = vfmaddsh(fmadd, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate +/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the +/// upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmadd_round_sh( + a: __m128h, + b: __m128h, + c: __m128h, + k: __mmask8, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fmadd = vfmaddsh(extracta, extractb, fmadd, ROUNDING); + } + simd_insert!(c, 0, fmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate +/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmadd_round_sh( + k: __mmask8, + a: __m128h, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmadd = vfmaddsh(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst. +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_fma(a, b, simd_neg(c)) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), _mm_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_fma(a, b, simd_neg(c)) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), _mm256_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_fma(a, b, simd_neg(c)) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), _mm512_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmsub_round_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddph_512(a, b, simd_neg(c), ROUNDING) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmsub_round_ph( + a: __m512h, + k: __mmask32, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmsub_round_ph::(a, b, c), a) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmsub_round_ph( + a: __m512h, + b: __m512h, + c: __m512h, + k: __mmask32, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmsub_round_ph::(a, b, c), c) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmsub_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fmsub_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements +/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper +/// 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = fmaf16(extracta, extractb, -extractc); + simd_insert!(a, 0, r) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements +/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element +/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fmsub: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmsub = fmaf16(fmsub, extractb, -extractc); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements +/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element +/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + let mut fmsub: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fmsub = fmaf16(extracta, extractb, -fmsub); + } + simd_insert!(c, 0, fmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements +/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fmsub: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmsub = fmaf16(extracta, extractb, -extractc); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements +/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper +/// 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmsub_round_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = vfmaddsh(extracta, extractb, -extractc, ROUNDING); + simd_insert!(a, 0, r) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements +/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element +/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmsub_round_sh( + a: __m128h, + k: __mmask8, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmsub = vfmaddsh(fmsub, extractb, -extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements +/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element +/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the +/// upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmsub_round_sh( + a: __m128h, + b: __m128h, + c: __m128h, + k: __mmask8, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fmsub = vfmaddsh(extracta, extractb, -fmsub, ROUNDING); + } + simd_insert!(c, 0, fmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements +/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmsub_round_sh( + k: __mmask8, + a: __m128h, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmsub = vfmaddsh(extracta, extractb, -extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_fma(simd_neg(a), b, c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), _mm_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_fma(simd_neg(a), b, c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), _mm256_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_fma(simd_neg(a), b, c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), _mm512_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fnmadd_round_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddph_512(simd_neg(a), b, c, ROUNDING) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using writemask k (the element is copied +/// from a when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fnmadd_round_ph( + a: __m512h, + k: __mmask32, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fnmadd_round_ph::(a, b, c), a) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using writemask k (the element is copied +/// from c when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fnmadd_round_ph( + a: __m512h, + b: __m512h, + c: __m512h, + k: __mmask32, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fnmadd_round_ph::(a, b, c), c) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate +/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed +/// out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fnmadd_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fnmadd_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed +/// elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = fmaf16(-extracta, extractb, extractc); + simd_insert!(a, 0, r) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fnmadd: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmadd = fmaf16(-fnmadd, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + let mut fnmadd: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fnmadd = fmaf16(-extracta, extractb, fnmadd); + } + simd_insert!(c, 0, fnmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fnmadd: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmadd = fmaf16(-extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed +/// elements from a to the upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fnmadd_round_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = vfmaddsh(-extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, r) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fnmadd_round_sh( + a: __m128h, + k: __mmask8, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmadd = vfmaddsh(-fnmadd, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper +/// elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fnmadd_round_sh( + a: __m128h, + b: __m128h, + c: __m128h, + k: __mmask8, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fnmadd = vfmaddsh(-extracta, extractb, fnmadd, ROUNDING); + } + simd_insert!(c, 0, fnmadd) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fnmadd_round_sh( + k: __mmask8, + a: __m128h, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmadd = vfmaddsh(-extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is +/// copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is +/// copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is +/// zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), _mm_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is +/// copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is +/// copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is +/// zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), _mm256_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is +/// copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is +/// copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is +/// zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), _mm512_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fnmsub_round_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddph_512(simd_neg(a), b, simd_neg(c), ROUNDING) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is +/// copied from a when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fnmsub_round_ph( + a: __m512h, + k: __mmask32, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fnmsub_round_ph::(a, b, c), a) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is +/// copied from c when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fnmsub_round_ph( + a: __m512h, + b: __m512h, + c: __m512h, + k: __mmask32, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fnmsub_round_ph::(a, b, c), c) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements +/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is +/// zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fnmsub_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fnmsub_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed +/// elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = fmaf16(-extracta, extractb, -extractc); + simd_insert!(a, 0, r) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fnmsub: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmsub = fmaf16(-fnmsub, extractb, -extractc); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + let mut fnmsub: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fnmsub = fmaf16(-extracta, extractb, -fnmsub); + } + simd_insert!(c, 0, fnmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fnmsub: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmsub = fmaf16(-extracta, extractb, -extractc); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed +/// elements from a to the upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fnmsub_round_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = vfmaddsh(-extracta, extractb, -extractc, ROUNDING); + simd_insert!(a, 0, r) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fnmsub_round_sh( + a: __m128h, + k: __mmask8, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmsub = vfmaddsh(-fnmsub, extractb, -extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element +/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper +/// elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fnmsub_round_sh( + a: __m128h, + b: __m128h, + c: __m128h, + k: __mmask8, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fnmsub = vfmaddsh(-extracta, extractb, -fnmsub, ROUNDING); + } + simd_insert!(c, 0, fnmsub) + } +} + +/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate +/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fnmsub_round_sh( + k: __mmask8, + a: __m128h, + b: __m128h, + c: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmsub = vfmaddsh(-extracta, extractb, -extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { vfmaddsubph_128(a, b, c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k +/// (the element is zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), _mm_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { vfmaddsubph_256(a, b, c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k +/// (the element is zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), _mm256_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + _mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k +/// (the element is zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), _mm512_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmaddsub_round_ph( + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubph_512(a, b, c, ROUNDING) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from a when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmaddsub_round_ph( + a: __m512h, + k: __mmask32, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmaddsub_round_ph::(a, b, c), a) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from c when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmaddsub_round_ph( + a: __m512h, + b: __m512h, + c: __m512h, + k: __mmask32, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmaddsub_round_ph::(a, b, c), c) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and +/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k +/// (the element is zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmaddsub_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fmaddsub_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { vfmaddsubph_128(a, b, simd_neg(c)) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k +/// (the element is zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), _mm_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { vfmaddsubph_256(a, b, simd_neg(c)) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k +/// (the element is zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), _mm256_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + _mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), a) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from c when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), c) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k +/// (the element is zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), _mm512_setzero_ph()) } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fmsubadd_round_ph( + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubph_512(a, b, simd_neg(c), ROUNDING) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from a when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fmsubadd_round_ph( + a: __m512h, + k: __mmask32, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmsubadd_round_ph::(a, b, c), a) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k +/// (the element is copied from c when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask3_fmsubadd_round_ph( + a: __m512h, + b: __m512h, + c: __m512h, + k: __mmask32, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmsubadd_round_ph::(a, b, c), c) + } +} + +/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract +/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k +/// (the element is zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_fmsubadd_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, + c: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fmsubadd_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } +} + +/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrcpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_rcp_ph(a: __m128h) -> __m128h { + _mm_mask_rcp_ph(_mm_undefined_ph(), 0xff, a) +} + +/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` +/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrcpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { vrcpph_128(a, src, k) } +} + +/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` +/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrcpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h { + _mm_mask_rcp_ph(_mm_setzero_ph(), k, a) +} + +/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrcpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_rcp_ph(a: __m256h) -> __m256h { + _mm256_mask_rcp_ph(_mm256_undefined_ph(), 0xffff, a) +} + +/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` +/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrcpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { vrcpph_256(a, src, k) } +} + +/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` +/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrcpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h { + _mm256_mask_rcp_ph(_mm256_setzero_ph(), k, a) +} + +/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrcpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_rcp_ph(a: __m512h) -> __m512h { + _mm512_mask_rcp_ph(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` +/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrcpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { + unsafe { vrcpph_512(a, src, k) } +} + +/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` +/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrcpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h { + _mm512_mask_rcp_ph(_mm512_setzero_ph(), k, a) +} + +/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, +/// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrcpsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_rcp_sh(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, +/// store the result in the lower element of dst using writemask k (the element is copied from src when +/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrcpsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { vrcpsh(a, b, src, k) } +} + +/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, +/// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 +/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrcpsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_rcp_sh(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point +/// elements in a, and store the results in dst. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h { + _mm_mask_rsqrt_ph(_mm_undefined_ph(), 0xff, a) +} + +/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point +/// elements in a, and store the results in dst using writemask k (elements are copied from src when +/// the corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { vrsqrtph_128(a, src, k) } +} + +/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point +/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h { + _mm_mask_rsqrt_ph(_mm_setzero_ph(), k, a) +} + +/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point +/// elements in a, and store the results in dst. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h { + _mm256_mask_rsqrt_ph(_mm256_undefined_ph(), 0xffff, a) +} + +/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point +/// elements in a, and store the results in dst using writemask k (elements are copied from src when +/// the corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { vrsqrtph_256(a, src, k) } +} + +/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point +/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h { + _mm256_mask_rsqrt_ph(_mm256_setzero_ph(), k, a) +} + +/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point +/// elements in a, and store the results in dst. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h { + _mm512_mask_rsqrt_ph(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point +/// elements in a, and store the results in dst using writemask k (elements are copied from src when +/// the corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { + unsafe { vrsqrtph_512(a, src, k) } +} + +/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point +/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h { + _mm512_mask_rsqrt_ph(_mm512_setzero_ph(), k, a) +} + +/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point +/// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a +/// to the upper elements of dst. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrsqrtsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_rsqrt_sh(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point +/// element in b, store the result in the lower element of dst using writemask k (the element is copied from src +/// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrsqrtsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { vrsqrtsh(a, b, src, k) } +} + +/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point +/// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when +/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// The maximum relative error for this approximation is less than `1.5*2^-12`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrsqrtsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_rsqrt_sh(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_sqrt_ph(a: __m128h) -> __m128h { + unsafe { simd_fsqrt(a) } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_sqrt_ph(a), src) } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_sqrt_ph(a), _mm_setzero_ph()) } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h { + unsafe { simd_fsqrt(a) } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_sqrt_ph(a), src) } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_sqrt_ph(a), _mm256_setzero_ph()) } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h { + unsafe { simd_fsqrt(a) } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_sqrt_ph(a), src) } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_sqrt_ph(a), _mm512_setzero_ph()) } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_sqrt_round_ph(a: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtph_512(a, ROUNDING) + } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_sqrt_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_sqrt_round_ph::(a), src) + } +} + +/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the +/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_sqrt_round_ph(k: __mmask32, a: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_sqrt_round_ph::(a), _mm512_setzero_ph()) + } +} + +/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_sqrt_sh(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst using writemask k (the element is copied from src when mask +/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 +/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_sqrt_sh(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_sqrt_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_sqrt_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst using writemask k (the element is copied from src when mask +/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_sqrt_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtsh(a, b, src, k, ROUNDING) + } +} + +/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 +/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_sqrt_round_sh( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_sqrt_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum +/// value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { vmaxph_128(a, b) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_max_ph(a, b), src) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_max_ph(a, b), _mm_setzero_ph()) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum +/// value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { vmaxph_256(a, b) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_max_ph(a, b), src) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_max_ph(a, b), _mm256_setzero_ph()) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum +/// value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmaxph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h { + _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b) +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmaxph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_max_ph(a, b), src) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmaxph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_max_ph(a, b), _mm512_setzero_ph()) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_max_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_sae!(SAE); + vmaxph_512(a, b, SAE) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the +/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_max_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_sae!(SAE); + simd_select_bitmask(k, _mm512_max_round_ph::(a, b), src) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum +/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the +/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_max_round_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_sae!(SAE); + simd_select_bitmask(k, _mm512_max_round_ph::(a, b), _mm512_setzero_ph()) + } +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum +/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements +/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value +/// when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_max_sh(_mm_undefined_ph(), 0xff, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum +/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 +/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow +/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value +/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and +/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard +/// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_max_sh(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value +/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the +/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_max_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_sae!(SAE); + _mm_mask_max_round_sh::(_mm_undefined_ph(), 0xff, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value +/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by +/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic +/// (IEEE 754) maximum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_max_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_sae!(SAE); + vmaxsh(a, b, src, k, SAE) + } +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value +/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and +/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by +/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic +/// (IEEE 754) maximum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_max_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + static_assert_sae!(SAE); + _mm_mask_max_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value +/// when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { vminph_128(a, b) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_min_ph(a, b), src) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_min_ph(a, b), _mm_setzero_ph()) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value +/// when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { vminph_256(a, b) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_min_ph(a, b), src) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_min_ph(a, b), _mm256_setzero_ph()) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value +/// when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vminph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h { + _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b) +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vminph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_min_ph(a, b), src) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are +/// NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vminph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_min_ph(a, b), _mm512_setzero_ph()) } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not +/// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vminph, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_min_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_sae!(SAE); + vminph_512(a, b, SAE) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the +/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vminph, SAE = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_min_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_sae!(SAE); + simd_select_bitmask(k, _mm512_min_round_ph::(a, b), src) + } +} + +/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum +/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the +/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vminph, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_min_round_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_sae!(SAE); + simd_select_bitmask(k, _mm512_min_round_ph::(a, b), _mm512_setzero_ph()) + } +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum +/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements +/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when +/// inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_min_sh(_mm_undefined_ph(), 0xff, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum +/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 +/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow +/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value +/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and +/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard +/// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_min_sh(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value +/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the +/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminsh, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_min_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_sae!(SAE); + _mm_mask_min_round_sh::(_mm_undefined_ph(), 0xff, a, b) +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value +/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by +/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic +/// (IEEE 754) minimum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminsh, SAE = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_min_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_sae!(SAE); + vminsh(a, b, src, k, SAE) + } +} + +/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value +/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and +/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by +/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic +/// (IEEE 754) minimum value when inputs are NaN or signed-zero values. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vminsh, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_min_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + static_assert_sae!(SAE); + _mm_mask_min_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst. +/// This intrinsic essentially calculates `floor(log2(x))` for each element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetexpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_getexp_ph(a: __m128h) -> __m128h { + _mm_mask_getexp_ph(_mm_undefined_ph(), 0xff, a) +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates +/// `floor(log2(x))` for each element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetexpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { vgetexpph_128(a, src, k) } +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask +/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates +/// `floor(log2(x))` for each element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetexpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h { + _mm_mask_getexp_ph(_mm_setzero_ph(), k, a) +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst. +/// This intrinsic essentially calculates `floor(log2(x))` for each element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetexpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_getexp_ph(a: __m256h) -> __m256h { + _mm256_mask_getexp_ph(_mm256_undefined_ph(), 0xffff, a) +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates +/// `floor(log2(x))` for each element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetexpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { vgetexpph_256(a, src, k) } +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask +/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates +/// `floor(log2(x))` for each element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetexpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h { + _mm256_mask_getexp_ph(_mm256_setzero_ph(), k, a) +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst. +/// This intrinsic essentially calculates `floor(log2(x))` for each element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_getexp_ph(a: __m512h) -> __m512h { + _mm512_mask_getexp_ph(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates +/// `floor(log2(x))` for each element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { + _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a) +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask +/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates +/// `floor(log2(x))` for each element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h { + _mm512_mask_getexp_ph(_mm512_setzero_ph(), k, a) +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst. +/// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed +/// by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_getexp_round_ph(a: __m512h) -> __m512h { + static_assert_sae!(SAE); + _mm512_mask_getexp_round_ph::(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k +/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates +/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_getexp_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, +) -> __m512h { + unsafe { + static_assert_sae!(SAE); + vgetexpph_512(a, src, k, SAE) + } +} + +/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision +/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask +/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates +/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_getexp_round_ph(k: __mmask32, a: __m512h) -> __m512h { + static_assert_sae!(SAE); + _mm512_mask_getexp_round_ph::(_mm512_setzero_ph(), k, a) +} + +/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision +/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element +/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially +/// calculates `floor(log2(x))` for the lower element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_getexp_sh(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision +/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element +/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7 +/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` +/// for the lower element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision +/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element +/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed +/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the +/// lower element. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_getexp_sh(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision +/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element +/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially +/// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC +/// in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_getexp_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_sae!(SAE); + _mm_mask_getexp_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision +/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element +/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7 +/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` +/// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_getexp_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_sae!(SAE); + vgetexpsh(a, b, src, k, SAE) + } +} + +/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision +/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element +/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed +/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the +/// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_getexp_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + static_assert_sae!(SAE); + _mm_mask_getexp_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends +/// on the interval range defined by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(1, 2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_getmant_ph( + a: __m128h, +) -> __m128h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm_mask_getmant_ph::(_mm_undefined_ph(), 0xff, a) +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined +/// by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_getmant_ph< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m128h, + k: __mmask8, + a: __m128h, +) -> __m128h { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + vgetmantph_128(a, (SIGN << 2) | NORM, src, k) + } +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined +/// by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_getmant_ph< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask8, + a: __m128h, +) -> __m128h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm_mask_getmant_ph::(_mm_setzero_ph(), k, a) +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends +/// on the interval range defined by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(1, 2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_getmant_ph( + a: __m256h, +) -> __m256h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm256_mask_getmant_ph::(_mm256_undefined_ph(), 0xffff, a) +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined +/// by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_getmant_ph< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m256h, + k: __mmask16, + a: __m256h, +) -> __m256h { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + vgetmantph_256(a, (SIGN << 2) | NORM, src, k) + } +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined +/// by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_getmant_ph< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask16, + a: __m256h, +) -> __m256h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm256_mask_getmant_ph::(_mm256_setzero_ph(), k, a) +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends +/// on the interval range defined by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(1, 2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_getmant_ph( + a: __m512h, +) -> __m512h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm512_mask_getmant_ph::(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined +/// by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_getmant_ph< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m512h, + k: __mmask32, + a: __m512h, +) -> __m512h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm512_mask_getmant_round_ph::(src, k, a) +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined +/// by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_getmant_ph< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask32, + a: __m512h, +) -> __m512h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm512_mask_getmant_ph::(_mm512_setzero_ph(), k, a) +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends +/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can +/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] +#[rustc_legacy_const_generics(1, 2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_getmant_round_ph< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + a: __m512h, +) -> __m512h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_sae!(SAE); + _mm512_mask_getmant_round_ph::(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined +/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC +/// in the sae parameter +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4, 5)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_getmant_round_ph< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + src: __m512h, + k: __mmask32, + a: __m512h, +) -> __m512h { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_sae!(SAE); + vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE) + } +} + +/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined +/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC +/// in the sae parameter +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_getmant_round_ph< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + k: __mmask32, + a: __m512h, +) -> __m512h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_sae!(SAE); + _mm512_mask_getmant_round_ph::(_mm512_setzero_ph(), k, a) +} + +/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper +/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends +/// on the interval range defined by norm and the sign depends on sign and the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_getmant_sh( + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm_mask_getmant_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates +/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and +/// the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(4, 5)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_getmant_sh< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm_mask_getmant_round_sh::(src, k, a, b) +} + +/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates +/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and +/// the source sign. +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_getmant_sh< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, +>( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + _mm_mask_getmant_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper +/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends +/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can +/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_getmant_round_sh< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_sae!(SAE); + _mm_mask_getmant_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates +/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and +/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5, 6)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_getmant_round_sh< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_sae!(SAE); + vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE) + } +} + +/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store +/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates +/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and +/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// The mantissa is normalized to the interval specified by interv, which can take the following values: +/// +/// _MM_MANT_NORM_1_2 // interval [1, 2) +/// _MM_MANT_NORM_p5_2 // interval [0.5, 2) +/// _MM_MANT_NORM_p5_1 // interval [0.5, 1) +/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) +/// +/// The sign is determined by sc which can take the following values: +/// +/// _MM_MANT_SIGN_src // sign = sign(src) +/// _MM_MANT_SIGN_zero // sign = 0 +/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4, 5)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_getmant_round_sh< + const NORM: _MM_MANTISSA_NORM_ENUM, + const SIGN: _MM_MANTISSA_SIGN_ENUM, + const SAE: i32, +>( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_sae!(SAE); + _mm_mask_getmant_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_roundscale_ph(a: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_roundscale_ph::(_mm_undefined_ph(), 0xff, a) +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when +/// the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_roundscale_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vrndscaleph_128(a, IMM8, src, k) + } +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_roundscale_ph(k: __mmask8, a: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_roundscale_ph::(_mm_setzero_ph(), k, a) +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_roundscale_ph(a: __m256h) -> __m256h { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_roundscale_ph::(_mm256_undefined_ph(), 0xffff, a) +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when +/// the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_roundscale_ph( + src: __m256h, + k: __mmask16, + a: __m256h, +) -> __m256h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vrndscaleph_256(a, IMM8, src, k) + } +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_roundscale_ph(k: __mmask16, a: __m256h) -> __m256h { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_roundscale_ph::(_mm256_setzero_ph(), k, a) +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_roundscale_ph(a: __m512h) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_roundscale_ph::(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when +/// the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_roundscale_ph( + src: __m512h, + k: __mmask32, + a: __m512h, +) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_roundscale_round_ph::(src, k, a) +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_roundscale_ph(k: __mmask32, a: __m512h) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_roundscale_ph::(_mm512_setzero_ph(), k, a) +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC +/// in the sae parameter +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(1, 2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_roundscale_round_ph(a: __m512h) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm512_mask_roundscale_round_ph::(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when +/// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC +/// in the sae parameter +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_roundscale_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, +) -> __m512h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + vrndscaleph_512(a, IMM8, src, k, SAE) + } +} + +/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits +/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_roundscale_round_ph( + k: __mmask32, + a: __m512h, +) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm512_mask_roundscale_round_ph::(_mm512_setzero_ph(), k, a) +} + +/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits +/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements +/// from a to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_roundscale_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_roundscale_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits +/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied +/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_roundscale_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_roundscale_round_sh::(src, k, a, b) +} + +/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits +/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed +/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_roundscale_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_roundscale_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits +/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements +/// from a to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_roundscale_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm_mask_roundscale_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits +/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied +/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_roundscale_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + vrndscalesh(a, b, src, k, IMM8, SAE) + } +} + +/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits +/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed +/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_roundscale_round_sh( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm_mask_roundscale_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vscalefph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_scalef_ph(_mm_undefined_ph(), 0xff, a, b) +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vscalefph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { vscalefph_128(a, b, src, k) } +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vscalefph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_scalef_ph(_mm_setzero_ph(), k, a, b) +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vscalefph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h { + _mm256_mask_scalef_ph(_mm256_undefined_ph(), 0xffff, a, b) +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vscalefph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { vscalefph_256(a, b, src, k) } +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vscalefph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + _mm256_mask_scalef_ph(_mm256_setzero_ph(), k, a, b) +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_scalef_ph(_mm512_undefined_ph(), 0xffffffff, a, b) +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + _mm512_mask_scalef_ph(_mm512_setzero_ph(), k, a, b) +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_scalef_round_ph(a: __m512h, b: __m512h) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_scalef_round_ph::(_mm512_undefined_ph(), 0xffffffff, a, b) +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_scalef_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vscalefph_512(a, b, src, k, ROUNDING) + } +} + +/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_scalef_round_ph( + k: __mmask32, + a: __m512h, + b: __m512h, +) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_scalef_round_ph::(_mm512_setzero_ph(), k, a, b) +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store +/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h { + _mm_mask_scalef_sh(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store +/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store +/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefsh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + _mm_mask_scalef_sh(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store +/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_scalef_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_scalef_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store +/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_scalef_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vscalefsh(a, b, src, k, ROUNDING) + } +} + +/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store +/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), +/// and copy the upper 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_scalef_round_sh( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_scalef_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_reduce_ph(a: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_ph::(_mm_undefined_ph(), 0xff, a) +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_reduce_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vreduceph_128(a, IMM8, src, k) + } +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_reduce_ph(k: __mmask8, a: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_ph::(_mm_setzero_ph(), k, a) +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_reduce_ph(a: __m256h) -> __m256h { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_reduce_ph::(_mm256_undefined_ph(), 0xffff, a) +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_reduce_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vreduceph_256(a, IMM8, src, k) + } +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_reduce_ph(k: __mmask16, a: __m256h) -> __m256h { + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_reduce_ph::(_mm256_setzero_ph(), k, a) +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_reduce_ph(a: __m512h) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_reduce_ph::(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_reduce_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_reduce_round_ph::(src, k, a) +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_reduce_ph(k: __mmask32, a: __m512h) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_reduce_ph::(_mm512_setzero_ph(), k, a) +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(1, 2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_reduce_round_ph(a: __m512h) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm512_mask_reduce_round_ph::(_mm512_undefined_ph(), 0xffffffff, a) +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied +/// from src when the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_reduce_round_ph( + src: __m512h, + k: __mmask32, + a: __m512h, +) -> __m512h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + vreduceph_512(a, IMM8, src, k, SAE) + } +} + +/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the +/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_reduce_round_ph( + k: __mmask32, + a: __m512h, +) -> __m512h { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm512_mask_reduce_round_ph::(_mm512_setzero_ph(), k, a) +} + +/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by +/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the +/// upper 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_reduce_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by +/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k +/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from +/// a to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_reduce_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_round_sh::(src, k, a, b) +} + +/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by +/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k +/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a +/// to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_reduce_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_reduce_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by +/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper +/// 7 packed elements from a to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_reduce_round_sh(a: __m128h, b: __m128h) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm_mask_reduce_round_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by +/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k +/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a +/// to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(4, 5)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_reduce_round_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + vreducesh(a, b, src, k, IMM8, SAE) + } +} + +/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by +/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k +/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a +/// to the upper elements of dst. +/// +/// Rounding is done according to the imm8 parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] : round down +/// * [`_MM_FROUND_TO_POS_INF`] : round up +/// * [`_MM_FROUND_TO_ZERO`] : truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] +#[rustc_legacy_const_generics(3, 4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_reduce_round_sh( + k: __mmask8, + a: __m128h, + b: __m128h, +) -> __m128h { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + _mm_mask_reduce_round_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the +/// sum of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_reduce_add_ph(a: __m128h) -> f16 { + unsafe { + let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); + let a = _mm_add_ph(a, b); + let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); + let a = _mm_add_ph(a, b); + simd_extract::<_, f16>(a, 0) + simd_extract::<_, f16>(a, 1) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the +/// sum of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + _mm_reduce_add_ph(_mm_add_ph(p, q)) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the +/// sum of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let q = simd_shuffle!( + a, + a, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + ); + _mm256_reduce_add_ph(_mm256_add_ph(p, q)) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns +/// the product of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 { + unsafe { + let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); + let a = _mm_mul_ph(a, b); + let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); + let a = _mm_mul_ph(a, b); + simd_extract::<_, f16>(a, 0) * simd_extract::<_, f16>(a, 1) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns +/// the product of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + _mm_reduce_mul_ph(_mm_mul_ph(p, q)) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns +/// the product of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let q = simd_shuffle!( + a, + a, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + ); + _mm256_reduce_mul_ph(_mm256_mul_ph(p, q)) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the +/// minimum of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_reduce_min_ph(a: __m128h) -> f16 { + unsafe { + let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); + let a = _mm_min_ph(a, b); + let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); + let a = _mm_min_ph(a, b); + let b = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); + simd_extract!(_mm_min_sh(a, b), 0) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the +/// minimum of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + _mm_reduce_min_ph(_mm_min_ph(p, q)) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the +/// minimum of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let q = simd_shuffle!( + a, + a, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + ); + _mm256_reduce_min_ph(_mm256_min_ph(p, q)) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the +/// maximum of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_reduce_max_ph(a: __m128h) -> f16 { + unsafe { + let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); + let a = _mm_max_ph(a, b); + let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); + let a = _mm_max_ph(a, b); + let b = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); + simd_extract!(_mm_max_sh(a, b), 0) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the +/// maximum of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + _mm_reduce_max_ph(_mm_max_ph(p, q)) + } +} + +/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the +/// maximum of all elements in a. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let q = simd_shuffle!( + a, + a, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + ); + _mm256_reduce_max_ph(_mm256_max_ph(p, q)) + } +} + +macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics + ($mask_type: ty, $reg: ident, $a: expr) => {{ + let dst: $mask_type; + asm!( + "vfpclassph {k}, {src}, {imm8}", + k = lateout(kreg) dst, + src = in($reg) $a, + imm8 = const IMM8, + options(pure, nomem, nostack) + ); + dst + }}; + ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{ + let dst: $mask_type; + asm!( + "vfpclassph {k} {{ {mask} }}, {src}, {imm8}", + k = lateout(kreg) dst, + mask = in(kreg) $mask, + src = in($reg) $a, + imm8 = const IMM8, + options(pure, nomem, nostack) + ); + dst + }}; +} + +/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// 0x01 // QNaN +/// 0x02 // Positive Zero +/// 0x04 // Negative Zero +/// 0x08 // Positive Infinity +/// 0x10 // Negative Infinity +/// 0x20 // Denormal +/// 0x40 // Negative +/// 0x80 // SNaN +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fpclass_ph_mask(a: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask8, xmm_reg, a) + } +} + +/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// 0x01 // QNaN +/// 0x02 // Positive Zero +/// 0x04 // Negative Zero +/// 0x08 // Positive Infinity +/// 0x10 // Negative Infinity +/// 0x20 // Denormal +/// 0x40 // Negative +/// 0x80 // SNaN +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fpclass_ph_mask(k1: __mmask8, a: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask8, k1, xmm_reg, a) + } +} + +/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// 0x01 // QNaN +/// 0x02 // Positive Zero +/// 0x04 // Negative Zero +/// 0x08 // Positive Infinity +/// 0x10 // Negative Infinity +/// 0x20 // Denormal +/// 0x40 // Negative +/// 0x80 // SNaN +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_fpclass_ph_mask(a: __m256h) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask16, ymm_reg, a) + } +} + +/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// 0x01 // QNaN +/// 0x02 // Positive Zero +/// 0x04 // Negative Zero +/// 0x08 // Positive Infinity +/// 0x10 // Negative Infinity +/// 0x20 // Denormal +/// 0x40 // Negative +/// 0x80 // SNaN +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_fpclass_ph_mask(k1: __mmask16, a: __m256h) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask16, k1, ymm_reg, a) + } +} + +/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k. +/// imm can be a combination of: +/// +/// 0x01 // QNaN +/// 0x02 // Positive Zero +/// 0x04 // Negative Zero +/// 0x08 // Positive Infinity +/// 0x10 // Negative Infinity +/// 0x20 // Denormal +/// 0x40 // Negative +/// 0x80 // SNaN +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_fpclass_ph_mask(a: __m512h) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask32, zmm_reg, a) + } +} + +/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified +/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// 0x01 // QNaN +/// 0x02 // Positive Zero +/// 0x04 // Negative Zero +/// 0x08 // Positive Infinity +/// 0x10 // Negative Infinity +/// 0x20 // Denormal +/// 0x40 // Negative +/// 0x80 // SNaN +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_fpclass_ph_mask(k1: __mmask32, a: __m512h) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask32, k1, zmm_reg, a) + } +} + +/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified +/// by imm8, and store the result in mask vector k. +/// imm can be a combination of: +/// +/// 0x01 // QNaN +/// 0x02 // Positive Zero +/// 0x04 // Negative Zero +/// 0x08 // Positive Infinity +/// 0x10 // Negative Infinity +/// 0x20 // Denormal +/// 0x40 // Negative +/// 0x80 // SNaN +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_fpclass_sh_mask(a: __m128h) -> __mmask8 { + _mm_mask_fpclass_sh_mask::(0xff, a) +} + +/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified +/// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// imm can be a combination of: +/// +/// 0x01 // QNaN +/// 0x02 // Positive Zero +/// 0x04 // Negative Zero +/// 0x08 // Positive Infinity +/// 0x10 // Negative Infinity +/// 0x20 // Denormal +/// 0x40 // Negative +/// 0x80 // SNaN +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_fpclass_sh_mask(k1: __mmask8, a: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vfpclasssh(a, IMM8, k1) + } +} + +/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, b, a) } +} + +/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, b, a) } +} + +/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, b, a) } +} + +/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector +/// and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h { + _mm_castsi128_ph(_mm_permutex2var_epi16( + _mm_castph_si128(a), + idx, + _mm_castph_si128(b), + )) +} + +/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector +/// and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h { + _mm256_castsi256_ph(_mm256_permutex2var_epi16( + _mm256_castph_si256(a), + idx, + _mm256_castph_si256(b), + )) +} + +/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector +/// and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h { + _mm512_castsi512_ph(_mm512_permutex2var_epi16( + _mm512_castph_si512(a), + idx, + _mm512_castph_si512(b), + )) +} + +/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h { + _mm_castsi128_ph(_mm_permutexvar_epi16(idx, _mm_castph_si128(a))) +} + +/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h { + _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, _mm256_castph_si256(a))) +} + +/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h { + _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, _mm512_castph_si512(a))) +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h { + unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_cvtepi16_ph(a), src) } +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h { + _mm_mask_cvtepi16_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h { + unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_cvtepi16_ph(a), src) } +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h { + _mm256_mask_cvtepi16_ph(_mm256_setzero_ph(), k, a) +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h { + unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_cvtepi16_ph(a), src) } +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h { + _mm512_mask_cvtepi16_ph(_mm512_setzero_ph(), k, a) +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundepi16_ph(a: __m512i) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtw2ph_512(a.as_i16x32(), ROUNDING) + } +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundepi16_ph( + src: __m512h, + k: __mmask32, + a: __m512i, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepi16_ph::(a), src) + } +} + +/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundepi16_ph(k: __mmask32, a: __m512i) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundepi16_ph::(_mm512_setzero_ph(), k, a) +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h { + unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_cvtepu16_ph(a), src) } +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h { + _mm_mask_cvtepu16_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h { + unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_cvtepu16_ph(a), src) } +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h { + _mm256_mask_cvtepu16_ph(_mm256_setzero_ph(), k, a) +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h { + unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_cvtepu16_ph(a), src) } +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuw2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h { + _mm512_mask_cvtepu16_ph(_mm512_setzero_ph(), k, a) +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundepu16_ph(a: __m512i) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtuw2ph_512(a.as_u16x32(), ROUNDING) + } +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundepu16_ph( + src: __m512h, + k: __mmask32, + a: __m512i, +) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepu16_ph::(a), src) + } +} + +/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundepu16_ph(k: __mmask32, a: __m512i) -> __m512h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundepu16_ph::(_mm512_setzero_ph(), k, a) +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtdq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h { + _mm_mask_cvtepi32_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtdq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) } +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtdq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h { + _mm_mask_cvtepi32_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtdq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h { + unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtdq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm256_cvtepi32_ph(a), src) } +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtdq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h { + _mm256_mask_cvtepi32_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtdq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h { + unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtdq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { + unsafe { simd_select_bitmask(k, _mm512_cvtepi32_ph(a), src) } +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtdq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h { + _mm512_mask_cvtepi32_ph(f16x16::ZERO.as_m256h(), k, a) +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundepi32_ph(a: __m512i) -> __m256h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtdq2ph_512(a.as_i32x16(), ROUNDING) + } +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundepi32_ph( + src: __m256h, + k: __mmask16, + a: __m512i, +) -> __m256h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepi32_ph::(a), src) + } +} + +/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundepi32_ph(k: __mmask16, a: __m512i) -> __m256h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundepi32_ph::(f16x16::ZERO.as_m256h(), k, a) +} + +/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the +/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements +/// of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsi2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h { + unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the +/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements +/// of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundi32_sh(a: __m128h, b: i32) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsi2sh(a, b, ROUNDING) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h { + _mm_mask_cvtepu32_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) } +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h { + _mm_mask_cvtepu32_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h { + unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm256_cvtepu32_ph(a), src) } +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h { + _mm256_mask_cvtepu32_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtudq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h { + unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtudq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { + unsafe { simd_select_bitmask(k, _mm512_cvtepu32_ph(a), src) } +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtudq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h { + _mm512_mask_cvtepu32_ph(f16x16::ZERO.as_m256h(), k, a) +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundepu32_ph(a: __m512i) -> __m256h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtudq2ph_512(a.as_u32x16(), ROUNDING) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundepu32_ph( + src: __m256h, + k: __mmask16, + a: __m512i, +) -> __m256h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepu32_ph::(a), src) + } +} + +/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundepu32_ph(k: __mmask16, a: __m512i) -> __m256h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundepu32_ph::(f16x16::ZERO.as_m256h(), k, a) +} + +/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the +/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements +/// of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtusi2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h { + unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the +/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements +/// of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundu32_sh(a: __m128h, b: u32) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtusi2sh(a, b, ROUNDING) + } +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. The upper 96 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h { + _mm_mask_cvtepi64_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). The upper 96 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) } +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// The upper 96 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h { + _mm_mask_cvtepi64_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h { + _mm256_mask_cvtepi64_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { + unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) } +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h { + _mm256_mask_cvtepi64_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h { + unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm512_cvtepi64_ph(a), src) } +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h { + _mm512_mask_cvtepi64_ph(f16x8::ZERO.as_m128h(), k, a) +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundepi64_ph(a: __m512i) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtqq2ph_512(a.as_i64x8(), ROUNDING) + } +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundepi64_ph( + src: __m128h, + k: __mmask8, + a: __m512i, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepi64_ph::(a), src) + } +} + +/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundepi64_ph(k: __mmask8, a: __m512i) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundepi64_ph::(f16x8::ZERO.as_m128h(), k, a) +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. The upper 96 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h { + _mm_mask_cvtepu64_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). The upper 96 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) } +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// The upper 96 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h { + _mm_mask_cvtepu64_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h { + _mm256_mask_cvtepu64_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { + unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) } +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h { + _mm256_mask_cvtepu64_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h { + unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm512_cvtepu64_ph(a), src) } +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h { + _mm512_mask_cvtepu64_ph(f16x8::ZERO.as_m128h(), k, a) +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundepu64_ph(a: __m512i) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtuqq2ph_512(a.as_u64x8(), ROUNDING) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundepu64_ph( + src: __m128h, + k: __mmask8, + a: __m512i, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepu64_ph::(a), src) + } +} + +/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundepu64_ph(k: __mmask8, a: __m512i) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundepu64_ph::(f16x8::ZERO.as_m128h(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2phx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtxps_ph(a: __m128) -> __m128h { + _mm_mask_cvtxps_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst +/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2phx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h { + unsafe { vcvtps2phx_128(a, src, k) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2phx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h { + _mm_mask_cvtxps_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2phx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h { + _mm256_mask_cvtxps_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst +/// when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2phx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h { + unsafe { vcvtps2phx_256(a, src, k) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2phx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h { + _mm256_mask_cvtxps_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtps2phx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h { + _mm512_mask_cvtxps_ph(f16x16::ZERO.as_m256h(), 0xffff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst +/// when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtps2phx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h { + unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtps2phx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h { + _mm512_mask_cvtxps_ph(f16x16::ZERO.as_m256h(), k, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtx_roundps_ph(a: __m512) -> __m256h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvtx_roundps_ph::(f16x16::ZERO.as_m256h(), 0xffff, a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst +/// when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtx_roundps_ph( + src: __m256h, + k: __mmask16, + a: __m512, +) -> __m256h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtps2phx_512(a, src, k, ROUNDING) + } +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtx_roundps_ph(k: __mmask16, a: __m512) -> __m256h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvtx_roundps_ph::(f16x16::ZERO.as_m256h(), k, a) +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed +/// elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtss2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h { + _mm_mask_cvtss_sh(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst using writemask k (the element +/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtss2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h { + unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtss2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h { + _mm_mask_cvtss_sh(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed +/// elements from a to the upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundss_sh(a: __m128h, b: __m128) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_cvt_roundss_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst using writemask k (the element +/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvt_roundss_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtss2sh(a, b, src, k, ROUNDING) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvt_roundss_sh( + k: __mmask8, + a: __m128h, + b: __m128, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_cvt_roundss_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h { + _mm_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst +/// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h { + unsafe { vcvtpd2ph_128(a, src, k) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h { + _mm_mask_cvtpd_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h { + _mm256_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst +/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h { + unsafe { vcvtpd2ph_256(a, src, k) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h { + _mm256_mask_cvtpd_ph(_mm_setzero_ph(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtpd2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h { + _mm512_mask_cvtpd_ph(f16x8::ZERO.as_m128h(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst +/// when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtpd2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h { + unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtpd2ph))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h { + _mm512_mask_cvtpd_ph(f16x8::ZERO.as_m128h(), k, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundpd_ph(a: __m512d) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundpd_ph::(f16x8::ZERO.as_m128h(), 0xff, a) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst +/// when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundpd_ph( + src: __m128h, + k: __mmask8, + a: __m512d, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtpd2ph_512(a, src, k, ROUNDING) + } +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundpd_ph(k: __mmask8, a: __m512d) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundpd_ph::(f16x8::ZERO.as_m128h(), k, a) +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed +/// elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsd2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h { + _mm_mask_cvtsd_sh(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst using writemask k (the element +/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsd2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h { + unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsd2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h { + _mm_mask_cvtsd_sh(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed +/// elements from a to the upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundsd_sh(a: __m128h, b: __m128d) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_cvt_roundsd_sh::(f16x8::ZERO.as_m128h(), 0xff, a, b) +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst using writemask k (the element +/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the +/// upper elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvt_roundsd_sh( + src: __m128h, + k: __mmask8, + a: __m128h, + b: __m128d, +) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsd2sh(a, b, src, k, ROUNDING) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) +/// floating-point elements, store the result in the lower element of dst using zeromask k (the element +/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper +/// elements of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvt_roundsd_sh( + k: __mmask8, + a: __m128h, + b: __m128d, +) -> __m128h { + static_assert_rounding!(ROUNDING); + _mm_mask_cvt_roundsd_sh::(f16x8::ZERO.as_m128h(), k, a, b) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i { + _mm_mask_cvtph_epi16(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2w_128(a, src.as_i16x8(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvtph_epi16(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i { + _mm256_mask_cvtph_epi16(_mm256_undefined_si256(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { + unsafe { transmute(vcvtph2w_256(a, src.as_i16x16(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i { + _mm256_mask_cvtph_epi16(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i { + _mm512_mask_cvtph_epi16(_mm512_undefined_epi32(), 0xffffffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { + unsafe { + transmute(vcvtph2w_512( + a, + src.as_i16x32(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i { + _mm512_mask_cvtph_epi16(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundph_epi16(a: __m512h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epi16::(_mm512_undefined_epi32(), 0xffffffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundph_epi16( + src: __m512i, + k: __mmask32, + a: __m512h, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundph_epi16(k: __mmask32, a: __m512h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epi16::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i { + _mm_mask_cvtph_epu16(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2uw_128(a, src.as_u16x8(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvtph_epu16(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i { + _mm256_mask_cvtph_epu16(_mm256_undefined_si256(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { + unsafe { transmute(vcvtph2uw_256(a, src.as_u16x16(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i { + _mm256_mask_cvtph_epu16(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i { + _mm512_mask_cvtph_epu16(_mm512_undefined_epi32(), 0xffffffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { + unsafe { + transmute(vcvtph2uw_512( + a, + src.as_u16x32(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i { + _mm512_mask_cvtph_epu16(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst. +/// +/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundph_epu16(a: __m512h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundph_epu16( + src: __m512i, + k: __mmask32, + a: __m512h, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvtph2uw_512(a, src.as_u16x32(), k, SAE)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, +/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundph_epu16(k: __mmask32, a: __m512h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvt_roundph_epu16::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i { + _mm_mask_cvttph_epi16(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2w_128(a, src.as_i16x8(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvttph_epi16(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i { + _mm256_mask_cvttph_epi16(_mm256_undefined_si256(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { + unsafe { transmute(vcvttph2w_256(a, src.as_i16x16(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i { + _mm256_mask_cvttph_epi16(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i { + _mm512_mask_cvttph_epi16(_mm512_undefined_epi32(), 0xffffffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { + unsafe { + transmute(vcvttph2w_512( + a, + src.as_i16x32(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2w))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i { + _mm512_mask_cvttph_epi16(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtt_roundph_epi16(a: __m512h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epi16::(_mm512_undefined_epi32(), 0xffffffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtt_roundph_epi16( + src: __m512i, + k: __mmask32, + a: __m512h, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2w_512(a, src.as_i16x32(), k, SAE)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with +/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtt_roundph_epi16(k: __mmask32, a: __m512h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epi16::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i { + _mm_mask_cvttph_epu16(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2uw_128(a, src.as_u16x8(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvttph_epu16(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i { + _mm256_mask_cvttph_epu16(_mm256_undefined_si256(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { + unsafe { transmute(vcvttph2uw_256(a, src.as_u16x16(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i { + _mm256_mask_cvttph_epu16(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i { + _mm512_mask_cvttph_epu16(_mm512_undefined_epi32(), 0xffffffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { + unsafe { + transmute(vcvttph2uw_512( + a, + src.as_u16x32(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uw))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i { + _mm512_mask_cvttph_epu16(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtt_roundph_epu16(a: __m512h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding +/// mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtt_roundph_epu16( + src: __m512i, + k: __mmask32, + a: __m512h, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2uw_512(a, src.as_u16x32(), k, SAE)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with +/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding +/// mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtt_roundph_epu16(k: __mmask32, a: __m512h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epu16::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i { + _mm_mask_cvtph_epi32(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2dq_128(a, src.as_i32x4(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvtph_epi32(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i { + _mm256_mask_cvtph_epi32(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvtph2dq_256(a, src.as_i32x8(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i { + _mm256_mask_cvtph_epi32(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i { + _mm512_mask_cvtph_epi32(_mm512_undefined_epi32(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { + unsafe { + transmute(vcvtph2dq_512( + a, + src.as_i32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i { + _mm512_mask_cvtph_epi32(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundph_epi32(a: __m256h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epi32::(_mm512_undefined_epi32(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundph_epi32( + src: __m512i, + k: __mmask16, + a: __m256h, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundph_epi32(k: __mmask16, a: __m256h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epi32::(_mm512_setzero_si512(), k, a) +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store +/// the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2si))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsh_i32(a: __m128h) -> i32 { + unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store +/// the result in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundsh_i32(a: __m128h) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsh2si32(a, ROUNDING) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the +/// results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i { + _mm_mask_cvtph_epu32(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2udq_128(a, src.as_u32x4(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvtph_epu32(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i { + _mm256_mask_cvtph_epu32(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvtph2udq_256(a, src.as_u32x8(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i { + _mm256_mask_cvtph_epu32(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i { + _mm512_mask_cvtph_epu32(_mm512_undefined_epi32(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { + unsafe { + transmute(vcvtph2udq_512( + a, + src.as_u32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i { + _mm512_mask_cvtph_epu32(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundph_epu32(a: __m256h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epu32::(_mm512_undefined_epi32(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundph_epu32( + src: __m512i, + k: __mmask16, + a: __m256h, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store +/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundph_epu32(k: __mmask16, a: __m256h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epu32::(_mm512_setzero_si512(), k, a) +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store +/// the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2usi))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsh_u32(a: __m128h) -> u32 { + unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store +/// the result in dst. +/// +/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundsh_u32(a: __m128h) -> u32 { + unsafe { + static_assert_rounding!(SAE); + vcvtsh2usi32(a, SAE) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i { + _mm_mask_cvttph_epi32(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2dq_128(a, src.as_i32x4(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvttph_epi32(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i { + _mm256_mask_cvttph_epi32(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvttph2dq_256(a, src.as_i32x8(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i { + _mm256_mask_cvttph_epi32(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i { + _mm512_mask_cvttph_epi32(_mm512_undefined_epi32(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { + unsafe { + transmute(vcvttph2dq_512( + a, + src.as_i32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2dq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i { + _mm512_mask_cvttph_epi32(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst. +/// +/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtt_roundph_epi32(a: __m256h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epi32::(_mm512_undefined_epi32(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtt_roundph_epi32( + src: __m512i, + k: __mmask16, + a: __m256h, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2dq_512(a, src.as_i32x16(), k, SAE)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtt_roundph_epi32(k: __mmask16, a: __m256h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epi32::(_mm512_setzero_si512(), k, a) +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store +/// the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttsh2si))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttsh_i32(a: __m128h) -> i32 { + unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store +/// the result in dst. +/// +/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtt_roundsh_i32(a: __m128h) -> i32 { + unsafe { + static_assert_sae!(SAE); + vcvttsh2si32(a, SAE) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i { + _mm_mask_cvttph_epu32(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2udq_128(a, src.as_u32x4(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvttph_epu32(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i { + _mm256_mask_cvttph_epu32(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvttph2udq_256(a, src.as_u32x8(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i { + _mm256_mask_cvttph_epu32(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i { + _mm512_mask_cvttph_epu32(_mm512_undefined_epi32(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { + unsafe { + transmute(vcvttph2udq_512( + a, + src.as_u32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2udq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i { + _mm512_mask_cvttph_epu32(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst. +/// +/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtt_roundph_epu32(a: __m256h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epu32::(_mm512_undefined_epi32(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtt_roundph_epu32( + src: __m512i, + k: __mmask16, + a: __m256h, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2udq_512(a, src.as_u32x16(), k, SAE)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtt_roundph_epu32(k: __mmask16, a: __m256h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epu32::(_mm512_setzero_si512(), k, a) +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store +/// the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttsh2usi))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttsh_u32(a: __m128h) -> u32 { + unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store +/// the result in dst. +/// +/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtt_roundsh_u32(a: __m128h) -> u32 { + unsafe { + static_assert_sae!(SAE); + vcvttsh2usi32(a, SAE) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i { + _mm_mask_cvtph_epi64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2qq_128(a, src.as_i64x2(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvtph_epi64(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i { + _mm256_mask_cvtph_epi64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvtph2qq_256(a, src.as_i64x4(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i { + _mm256_mask_cvtph_epi64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i { + _mm512_mask_cvtph_epi64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { + unsafe { + transmute(vcvtph2qq_512( + a, + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i { + _mm512_mask_cvtph_epi64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundph_epi64(a: __m128h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epi64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundph_epi64( + src: __m512i, + k: __mmask8, + a: __m128h, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundph_epi64(k: __mmask8, a: __m128h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epi64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i { + _mm_mask_cvtph_epu64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2uqq_128(a, src.as_u64x2(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvtph_epu64(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i { + _mm256_mask_cvtph_epu64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvtph2uqq_256(a, src.as_u64x4(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i { + _mm256_mask_cvtph_epu64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i { + _mm512_mask_cvtph_epu64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { + unsafe { + transmute(vcvtph2uqq_512( + a, + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i { + _mm512_mask_cvtph_epu64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundph_epu64(a: __m128h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epu64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundph_epu64( + src: __m512i, + k: __mmask8, + a: __m128h, +) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundph_epu64(k: __mmask8, a: __m128h) -> __m512i { + static_assert_rounding!(ROUNDING); + _mm512_mask_cvt_roundph_epu64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i { + _mm_mask_cvttph_epi64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2qq_128(a, src.as_i64x2(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvttph_epi64(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i { + _mm256_mask_cvttph_epi64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvttph2qq_256(a, src.as_i64x4(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i { + _mm256_mask_cvttph_epi64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i { + _mm512_mask_cvttph_epi64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { + unsafe { + transmute(vcvttph2qq_512( + a, + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2qq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i { + _mm512_mask_cvttph_epi64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtt_roundph_epi64(a: __m128h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epi64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtt_roundph_epi64( + src: __m512i, + k: __mmask8, + a: __m128h, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2qq_512(a, src.as_i64x8(), k, SAE)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtt_roundph_epi64(k: __mmask8, a: __m128h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epi64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i { + _mm_mask_cvttph_epu64(_mm_undefined_si128(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2uqq_128(a, src.as_u64x2(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i { + _mm_mask_cvttph_epu64(_mm_setzero_si128(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i { + _mm256_mask_cvttph_epu64(_mm256_undefined_si256(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvttph2uqq_256(a, src.as_u64x4(), k)) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i { + _mm256_mask_cvttph_epu64(_mm256_setzero_si256(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i { + _mm512_mask_cvttph_epu64(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { + unsafe { + transmute(vcvttph2uqq_512( + a, + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uqq))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i { + _mm512_mask_cvttph_epu64(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtt_roundph_epu64(a: __m128h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epu64::(_mm512_undefined_epi32(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtt_roundph_epu64( + src: __m512i, + k: __mmask8, + a: __m128h, +) -> __m512i { + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2uqq_512(a, src.as_u64x8(), k, SAE)) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and +/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtt_roundph_epu64(k: __mmask8, a: __m128h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvtt_roundph_epu64::(_mm512_setzero_si512(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2psx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 { + _mm_mask_cvtxph_ps(_mm_setzero_ps(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to +/// dst when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2psx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 { + unsafe { vcvtph2psx_128(a, src, k) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2psx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 { + _mm_mask_cvtxph_ps(_mm_setzero_ps(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2psx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 { + _mm256_mask_cvtxph_ps(_mm256_setzero_ps(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to +/// dst when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2psx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 { + unsafe { vcvtph2psx_256(a, src, k) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2psx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 { + _mm256_mask_cvtxph_ps(_mm256_setzero_ps(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2psx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 { + _mm512_mask_cvtxph_ps(_mm512_setzero_ps(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to +/// dst when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2psx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 { + unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2psx))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 { + _mm512_mask_cvtxph_ps(_mm512_setzero_ps(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtx_roundph_ps(a: __m256h) -> __m512 { + static_assert_sae!(SAE); + _mm512_mask_cvtx_roundph_ps::(_mm512_setzero_ps(), 0xffff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to +/// dst when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtx_roundph_ps( + src: __m512, + k: __mmask16, + a: __m256h, +) -> __m512 { + unsafe { + static_assert_sae!(SAE); + vcvtph2psx_512(a, src, k, SAE) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtx_roundph_ps(k: __mmask16, a: __m256h) -> __m512 { + static_assert_sae!(SAE); + _mm512_mask_cvtx_roundph_ps::(_mm512_setzero_ps(), k, a) +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) +/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed +/// elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2ss))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 { + _mm_mask_cvtsh_ss(a, 0xff, a, b) +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) +/// floating-point element, store the result in the lower element of dst using writemask k (the element is +/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the +/// upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2ss))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 { + unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) +/// floating-point element, store the result in the lower element of dst using zeromask k (the element is +/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements +/// of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2ss))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 { + _mm_mask_cvtsh_ss(_mm_set_ss(0.0), k, a, b) +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) +/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements +/// from a to the upper elements of dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundsh_ss(a: __m128, b: __m128h) -> __m128 { + static_assert_sae!(SAE); + _mm_mask_cvt_roundsh_ss::(_mm_undefined_ps(), 0xff, a, b) +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) +/// floating-point element, store the result in the lower element of dst using writemask k (the element is +/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the +/// upper elements of dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvt_roundsh_ss( + src: __m128, + k: __mmask8, + a: __m128, + b: __m128h, +) -> __m128 { + unsafe { + static_assert_sae!(SAE); + vcvtsh2ss(a, b, src, k, SAE) + } +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) +/// floating-point element, store the result in the lower element of dst using zeromask k (the element is +/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements +/// of dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvt_roundsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 { + static_assert_sae!(SAE); + _mm_mask_cvt_roundsh_ss::(_mm_set_ss(0.0), k, a, b) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2pd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtph_pd(a: __m128h) -> __m128d { + _mm_mask_cvtph_pd(_mm_setzero_pd(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to +/// dst when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2pd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d { + unsafe { vcvtph2pd_128(a, src, k) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2pd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d { + _mm_mask_cvtph_pd(_mm_setzero_pd(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2pd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d { + _mm256_mask_cvtph_pd(_mm256_setzero_pd(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to +/// dst when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2pd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d { + unsafe { vcvtph2pd_256(a, src, k) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd) +#[inline] +#[target_feature(enable = "avx512fp16,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2pd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d { + _mm256_mask_cvtph_pd(_mm256_setzero_pd(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2pd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d { + _mm512_mask_cvtph_pd(_mm512_setzero_pd(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to +/// dst when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2pd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d { + unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2pd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d { + _mm512_mask_cvtph_pd(_mm512_setzero_pd(), k, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvt_roundph_pd(a: __m128h) -> __m512d { + static_assert_sae!(SAE); + _mm512_mask_cvt_roundph_pd::(_mm512_setzero_pd(), 0xff, a) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to +/// dst when the corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_mask_cvt_roundph_pd( + src: __m512d, + k: __mmask8, + a: __m128h, +) -> __m512d { + unsafe { + static_assert_sae!(SAE); + vcvtph2pd_512(a, src, k, SAE) + } +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) +/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the +/// corresponding mask bit is not set). +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_maskz_cvt_roundph_pd(k: __mmask8, a: __m128h) -> __m512d { + static_assert_sae!(SAE); + _mm512_mask_cvt_roundph_pd::(_mm512_setzero_pd(), k, a) +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) +/// floating-point element, store the result in the lower element of dst, and copy the upper element +/// from a to the upper element of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2sd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d { + _mm_mask_cvtsh_sd(a, 0xff, a, b) +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) +/// floating-point element, store the result in the lower element of dst using writemask k (the element is +/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element +/// of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2sd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d { + unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) +/// floating-point element, store the result in the lower element of dst using zeromask k (the element is +/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2sd))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d { + _mm_mask_cvtsh_sd(_mm_set_sd(0.0), k, a, b) +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) +/// floating-point element, store the result in the lower element of dst, and copy the upper element from a +/// to the upper element of dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundsh_sd(a: __m128d, b: __m128h) -> __m128d { + static_assert_sae!(SAE); + _mm_mask_cvt_roundsh_sd::(a, 0xff, a, b) +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) +/// floating-point element, store the result in the lower element of dst using writemask k (the element is +/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element +/// of dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_mask_cvt_roundsh_sd( + src: __m128d, + k: __mmask8, + a: __m128d, + b: __m128h, +) -> __m128d { + unsafe { + static_assert_sae!(SAE); + vcvtsh2sd(a, b, src, k, SAE) + } +} + +/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) +/// floating-point element, store the result in the lower element of dst using zeromask k (the element is +/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_maskz_cvt_roundsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d { + static_assert_sae!(SAE); + _mm_mask_cvt_roundsh_sd::(_mm_set_sd(0.0), k, a, b) +} + +/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsh_h(a: __m128h) -> f16 { + unsafe { simd_extract!(a, 0) } +} + +/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm256_cvtsh_h(a: __m256h) -> f16 { + unsafe { simd_extract!(a, 0) } +} + +/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm512_cvtsh_h(a: __m512h) -> f16 { + unsafe { simd_extract!(a, 0) } +} + +/// Copy the lower 16-bit integer in a to dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 { + unsafe { simd_extract!(a.as_i16x8(), 0) } +} + +/// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsi16_si128(a: i16) -> __m128i { + unsafe { transmute(simd_insert!(i16x8::ZERO, 0, a)) } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"] + fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8; + #[link_name = "llvm.x86.avx512fp16.vcomi.sh"] + fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32; + + #[link_name = "llvm.x86.avx512fp16.add.ph.512"] + fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.sub.ph.512"] + fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mul.ph.512"] + fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.div.ph.512"] + fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; + + #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"] + fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"] + fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"] + fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"] + fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"] + fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128; + #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"] + fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256; + #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"] + fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512; + #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"] + fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128; + + #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"] + fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128; + #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"] + fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256; + #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"] + fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512; + #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"] + fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128; + + #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"] + fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; + #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"] + fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; + #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"] + fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; + #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"] + fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; + #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"] + fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512; + #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"] + fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512; + #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"] + fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; + #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"] + fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; + + #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"] + fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; + #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"] + fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; + #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"] + fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; + #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"] + fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; + #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"] + fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) + -> __m512; + #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"] + fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) + -> __m512; + #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"] + fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; + #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"] + fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; + + #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"] + fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"] + fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16; + + #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"] + fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"] + fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"] + fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h; + + #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"] + fn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"] + fn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"] + fn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"] + fn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"] + fn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"] + fn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"] + fn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"] + fn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"] + fn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"] + fn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.max.ph.128"] + fn vmaxph_128(a: __m128h, b: __m128h) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.max.ph.256"] + fn vmaxph_256(a: __m256h, b: __m256h) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.max.ph.512"] + fn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"] + fn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.min.ph.128"] + fn vminph_128(a: __m128h, b: __m128h) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.min.ph.256"] + fn vminph_256(a: __m256h, b: __m256h) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.min.ph.512"] + fn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"] + fn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"] + fn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"] + fn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"] + fn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"] + fn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"] + fn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"] + fn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"] + fn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"] + fn vgetmantsh( + a: __m128h, + b: __m128h, + imm8: i32, + src: __m128h, + k: __mmask8, + sae: i32, + ) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"] + fn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"] + fn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"] + fn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"] + fn vrndscalesh( + a: __m128h, + b: __m128h, + src: __m128h, + k: __mmask8, + imm8: i32, + sae: i32, + ) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"] + fn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"] + fn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"] + fn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"] + fn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"] + fn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"] + fn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"] + fn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h; + #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"] + fn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32) + -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"] + fn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8; + + #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16"] + fn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16"] + fn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h; + #[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16"] + fn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h; + #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i16"] + fn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i16"] + fn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h; + #[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32i16"] + fn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128"] + fn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32"] + fn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32"] + fn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.vcvtsi2sh"] + fn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128"] + fn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i32"] + fn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i32"] + fn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.vcvtusi2sh"] + fn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128"] + fn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256"] + fn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64"] + fn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128"] + fn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256"] + fn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i64"] + fn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128"] + fn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256"] + fn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512"] + fn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round"] + fn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128"] + fn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256"] + fn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512"] + fn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round"] + fn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128"] + fn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256"] + fn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512"] + fn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128"] + fn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"] + fn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"] + fn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"] + fn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256"] + fn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512"] + fn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128"] + fn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256"] + fn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512"] + fn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128"] + fn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256"] + fn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512"] + fn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16; + #[link_name = "llvm.x86.avx512fp16.vcvtsh2si32"] + fn vcvtsh2si32(a: __m128h, rounding: i32) -> i32; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128"] + fn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256"] + fn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512"] + fn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16; + #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32"] + fn vcvtsh2usi32(a: __m128h, sae: i32) -> u32; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128"] + fn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256"] + fn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512"] + fn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16; + #[link_name = "llvm.x86.avx512fp16.vcvttsh2si32"] + fn vcvttsh2si32(a: __m128h, sae: i32) -> i32; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128"] + fn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256"] + fn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512"] + fn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16; + #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32"] + fn vcvttsh2usi32(a: __m128h, sae: i32) -> u32; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128"] + fn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256"] + fn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512"] + fn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128"] + fn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256"] + fn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512"] + fn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128"] + fn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256"] + fn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512"] + fn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128"] + fn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256"] + fn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4; + #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512"] + fn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128"] + fn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256"] + fn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512"] + fn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round"] + fn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128; + + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128"] + fn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256"] + fn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512"] + fn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d; + #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round"] + fn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d; + +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + use crate::mem::transmute; + use crate::ptr::{addr_of, addr_of_mut}; + use stdarch_test::simd_test; + + #[target_feature(enable = "avx512fp16")] + unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h { + _mm_setr_ph(re, im, re, im, re, im, re, im) + } + + #[target_feature(enable = "avx512fp16")] + unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h { + _mm256_setr_ph( + re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, + ) + } + + #[target_feature(enable = "avx512fp16")] + unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h { + _mm512_setr_ph( + re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, + re, im, re, im, re, im, re, im, re, im, + ) + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_set_ph() { + let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_set_ph() { + let r = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let e = _mm256_setr_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_set_ph() { + let r = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let e = _mm512_setr_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_set_sh() { + let r = _mm_set_sh(1.0); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_set1_ph() { + let r = _mm_set1_ph(1.0); + let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_set1_ph() { + let r = _mm256_set1_ph(1.0); + let e = _mm256_set_ph( + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_set1_ph() { + let r = _mm512_set1_ph(1.0); + let e = _mm512_set_ph( + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_setr_ph() { + let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_setr_ph() { + let r = _mm256_setr_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let e = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_setr_ph() { + let r = _mm512_setr_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let e = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_setzero_ph() { + let r = _mm_setzero_ph(); + let e = _mm_set1_ph(0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_setzero_ph() { + let r = _mm256_setzero_ph(); + let e = _mm256_set1_ph(0.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_setzero_ph() { + let r = _mm512_setzero_ph(); + let e = _mm512_set1_ph(0.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_castsi128_ph() { + let a = _mm_set1_epi16(0x3c00); + let r = _mm_castsi128_ph(a); + let e = _mm_set1_ph(1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_castsi256_ph() { + let a = _mm256_set1_epi16(0x3c00); + let r = _mm256_castsi256_ph(a); + let e = _mm256_set1_ph(1.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_castsi512_ph() { + let a = _mm512_set1_epi16(0x3c00); + let r = _mm512_castsi512_ph(a); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_castph_si128() { + let a = _mm_set1_ph(1.0); + let r = _mm_castph_si128(a); + let e = _mm_set1_epi16(0x3c00); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm256_castph_si256() { + let a = _mm256_set1_ph(1.0); + let r = _mm256_castph_si256(a); + let e = _mm256_set1_epi16(0x3c00); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_castph_si512() { + let a = _mm512_set1_ph(1.0); + let r = _mm512_castph_si512(a); + let e = _mm512_set1_epi16(0x3c00); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_castps_ph() { + let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00)); + let r = _mm_castps_ph(a); + let e = _mm_set1_ph(1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_castps_ph() { + let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00)); + let r = _mm256_castps_ph(a); + let e = _mm256_set1_ph(1.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_castps_ph() { + let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00)); + let r = _mm512_castps_ph(a); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_castph_ps() { + let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000)); + let r = _mm_castph_ps(a); + let e = _mm_set1_ps(1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm256_castph_ps() { + let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000)); + let r = _mm256_castph_ps(a); + let e = _mm256_set1_ps(1.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_castph_ps() { + let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000)); + let r = _mm512_castph_ps(a); + let e = _mm512_set1_ps(1.0); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_castpd_ph() { + let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00)); + let r = _mm_castpd_ph(a); + let e = _mm_set1_ph(1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_castpd_ph() { + let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00)); + let r = _mm256_castpd_ph(a); + let e = _mm256_set1_ph(1.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_castpd_ph() { + let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00)); + let r = _mm512_castpd_ph(a); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_castph_pd() { + let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000)); + let r = _mm_castph_pd(a); + let e = _mm_set1_pd(1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm256_castph_pd() { + let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000)); + let r = _mm256_castph_pd(a); + let e = _mm256_set1_pd(1.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_castph_pd() { + let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000)); + let r = _mm512_castph_pd(a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_castph256_ph128() { + let a = _mm256_setr_ph( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm256_castph256_ph128(a); + let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm512_castph512_ph128() { + let a = _mm512_setr_ph( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = _mm512_castph512_ph128(a); + let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm512_castph512_ph256() { + let a = _mm512_setr_ph( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., + ); + let r = _mm512_castph512_ph256(a); + let e = _mm256_setr_ph( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_castph128_ph256() { + let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_castph128_ph256(a); + assert_eq_m128h(_mm256_castph256_ph128(r), a); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm512_castph128_ph512() { + let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_castph128_ph512(a); + assert_eq_m128h(_mm512_castph512_ph128(r), a); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm512_castph256_ph512() { + let a = _mm256_setr_ph( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_castph256_ph512(a); + assert_eq_m256h(_mm512_castph512_ph256(r), a); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_zextph128_ph256() { + let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_zextph128_ph256(a); + let e = _mm256_setr_ph( + 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_zextph128_ph512() { + let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_zextph128_ph512(a); + let e = _mm512_setr_ph( + 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_zextph256_ph512() { + let a = _mm256_setr_ph( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + let r = _mm512_zextph256_ph512(a); + let e = _mm512_setr_ph( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cmp_ph_mask() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0); + let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); + assert_eq!(r, 0b11110000); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cmp_ph_mask() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0); + let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b); + assert_eq!(r, 0b01010000); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cmp_ph_mask() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, + -16.0, + ); + let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); + assert_eq!(r, 0b1111000011110000); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cmp_ph_mask() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, + -16.0, + ); + let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b); + assert_eq!(r, 0b0101000001010000); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cmp_ph_mask() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, + -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, + -29.0, -30.0, -31.0, -32.0, + ); + let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); + assert_eq!(r, 0b11110000111100001111000011110000); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cmp_ph_mask() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, + -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, + -29.0, -30.0, -31.0, -32.0, + ); + let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b); + assert_eq!(r, 0b01010000010100000101000001010000); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cmp_round_ph_mask() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, + -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, + -29.0, -30.0, -31.0, -32.0, + ); + let r = _mm512_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b); + assert_eq!(r, 0b11110000111100001111000011110000); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cmp_round_ph_mask() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, + -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, + -29.0, -30.0, -31.0, -32.0, + ); + let r = _mm512_mask_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>( + 0b01010101010101010101010101010101, + a, + b, + ); + assert_eq!(r, 0b01010000010100000101000001010000); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cmp_round_sh_mask() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(1.0); + let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cmp_round_sh_mask() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(1.0); + let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cmp_sh_mask() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(1.0); + let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cmp_sh_mask() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(1.0); + let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b); + assert_eq!(r, 0); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_comi_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(1.0); + let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_comi_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(1.0); + let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_comieq_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(1.0); + let r = _mm_comieq_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_comige_sh() { + let a = _mm_set_sh(2.0); + let b = _mm_set_sh(1.0); + let r = _mm_comige_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_comigt_sh() { + let a = _mm_set_sh(2.0); + let b = _mm_set_sh(1.0); + let r = _mm_comigt_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_comile_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_comile_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_comilt_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_comilt_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_comineq_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_comineq_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_ucomieq_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(1.0); + let r = _mm_ucomieq_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_ucomige_sh() { + let a = _mm_set_sh(2.0); + let b = _mm_set_sh(1.0); + let r = _mm_ucomige_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_ucomigt_sh() { + let a = _mm_set_sh(2.0); + let b = _mm_set_sh(1.0); + let r = _mm_ucomigt_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_ucomile_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_ucomile_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_ucomilt_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_ucomilt_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_ucomineq_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_ucomineq_sh(a, b); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_load_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_load_ph(addr_of!(a).cast()); + assert_eq_m128h(a, b); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_load_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_load_ph(addr_of!(a).cast()); + assert_eq_m256h(a, b); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_load_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_load_ph(addr_of!(a).cast()); + assert_eq_m512h(a, b); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_load_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_load_sh(addr_of!(a).cast()); + assert_eq_m128h(a, b); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_load_sh() { + let a = _mm_set_sh(1.0); + let src = _mm_set_sh(2.); + let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast()); + assert_eq_m128h(a, b); + let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast()); + assert_eq_m128h(src, b); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_load_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_maskz_load_sh(1, addr_of!(a).cast()); + assert_eq_m128h(a, b); + let b = _mm_maskz_load_sh(0, addr_of!(a).cast()); + assert_eq_m128h(_mm_setzero_ph(), b); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_loadu_ph() { + let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + let r = _mm_loadu_ph(array.as_ptr()); + let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_loadu_ph() { + let array = [ + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ]; + let r = _mm256_loadu_ph(array.as_ptr()); + let e = _mm256_setr_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_loadu_ph() { + let array = [ + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ]; + let r = _mm512_loadu_ph(array.as_ptr()); + let e = _mm512_setr_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_move_sh() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_sh(9.0); + let r = _mm_move_sh(a, b); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_move_sh() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_sh(9.0); + let src = _mm_set_sh(10.0); + let r = _mm_mask_move_sh(src, 0, a, b); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_move_sh() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_sh(9.0); + let r = _mm_maskz_move_sh(0, a, b); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_store_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let mut b = _mm_setzero_ph(); + _mm_store_ph(addr_of_mut!(b).cast(), a); + assert_eq_m128h(a, b); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_store_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let mut b = _mm256_setzero_ph(); + _mm256_store_ph(addr_of_mut!(b).cast(), a); + assert_eq_m256h(a, b); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_store_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let mut b = _mm512_setzero_ph(); + _mm512_store_ph(addr_of_mut!(b).cast(), a); + assert_eq_m512h(a, b); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_store_sh() { + let a = _mm_set_sh(1.0); + let mut b = _mm_setzero_ph(); + _mm_store_sh(addr_of_mut!(b).cast(), a); + assert_eq_m128h(a, b); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_store_sh() { + let a = _mm_set_sh(1.0); + let mut b = _mm_setzero_ph(); + _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a); + assert_eq_m128h(_mm_setzero_ph(), b); + _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a); + assert_eq_m128h(a, b); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_storeu_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let mut array = [0.0; 8]; + _mm_storeu_ph(array.as_mut_ptr(), a); + assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr())); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_storeu_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let mut array = [0.0; 16]; + _mm256_storeu_ph(array.as_mut_ptr(), a); + assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr())); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_storeu_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let mut array = [0.0; 32]; + _mm512_storeu_ph(array.as_mut_ptr(), a); + assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr())); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_add_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + let r = _mm_add_ph(a, b); + let e = _mm_set1_ph(9.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_add_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_add_ph(src, 0b01010101, a, b); + let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_add_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + let r = _mm_maskz_add_ph(0b01010101, a, b); + let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_add_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + let r = _mm256_add_ph(a, b); + let e = _mm256_set1_ph(17.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_add_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + let src = _mm256_set_ph( + 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., + ); + let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b); + let e = _mm256_set_ph( + 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_add_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + let r = _mm256_maskz_add_ph(0b0101010101010101, a, b); + let e = _mm256_set_ph( + 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_add_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_add_ph(a, b); + let e = _mm512_set1_ph(33.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_add_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let src = _mm512_set_ph( + 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., + 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., + ); + let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50., + 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_add_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., + 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_add_round_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_ph(33.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_add_round_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let src = _mm512_set_ph( + 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., + 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., + ); + let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50., + 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_add_round_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., + 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_add_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_sh(3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_add_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let src = _mm_set_sh(4.0); + let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_set_sh(4.0); + assert_eq_m128h(r, e); + let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_set_sh(3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_add_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = + _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_sh(0.0); + assert_eq_m128h(r, e); + let r = + _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_set_sh(3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_add_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_add_sh(a, b); + let e = _mm_set_sh(3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_add_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let src = _mm_set_sh(4.0); + let r = _mm_mask_add_sh(src, 0, a, b); + let e = _mm_set_sh(4.0); + assert_eq_m128h(r, e); + let r = _mm_mask_add_sh(src, 1, a, b); + let e = _mm_set_sh(3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_add_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_maskz_add_sh(0, a, b); + let e = _mm_set_sh(0.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_add_sh(1, a, b); + let e = _mm_set_sh(3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_sub_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + let r = _mm_sub_ph(a, b); + let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_sub_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_sub_ph(src, 0b01010101, a, b); + let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_sub_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + let r = _mm_maskz_sub_ph(0b01010101, a, b); + let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_sub_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + let r = _mm256_sub_ph(a, b); + let e = _mm256_set_ph( + -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, + 15.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_sub_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + let src = _mm256_set_ph( + 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., + ); + let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b); + let e = _mm256_set_ph( + 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_sub_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b); + let e = _mm256_set_ph( + 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_sub_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_sub_ph(a, b); + let e = _mm512_set_ph( + -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0, + -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, + 23.0, 25.0, 27.0, 29.0, 31.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_sub_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let src = _mm512_set_ph( + 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., + 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., + ); + let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1., + 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_sub_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., + 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_sub_round_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set_ph( + -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0, + -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, + 23.0, 25.0, 27.0, 29.0, 31.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_sub_round_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let src = _mm512_set_ph( + 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., + 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., + ); + let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1., + 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_sub_round_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., + 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_sub_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_sh(-1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_sub_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let src = _mm_set_sh(4.0); + let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_set_sh(4.0); + assert_eq_m128h(r, e); + let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_set_sh(-1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_sub_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = + _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_sh(0.0); + assert_eq_m128h(r, e); + let r = + _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_set_sh(-1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_sub_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_sub_sh(a, b); + let e = _mm_set_sh(-1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_sub_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let src = _mm_set_sh(4.0); + let r = _mm_mask_sub_sh(src, 0, a, b); + let e = _mm_set_sh(4.0); + assert_eq_m128h(r, e); + let r = _mm_mask_sub_sh(src, 1, a, b); + let e = _mm_set_sh(-1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_sub_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_maskz_sub_sh(0, a, b); + let e = _mm_set_sh(0.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_sub_sh(1, a, b); + let e = _mm_set_sh(-1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mul_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + let r = _mm_mul_ph(a, b); + let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_mul_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_mul_ph(src, 0b01010101, a, b); + let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_mul_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); + let r = _mm_maskz_mul_ph(0b01010101, a, b); + let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mul_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + let r = _mm256_mul_ph(a, b); + let e = _mm256_set_ph( + 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0, + 30.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_mul_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + let src = _mm256_set_ph( + 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., + ); + let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b); + let e = _mm256_set_ph( + 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_mul_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, + ); + let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b); + let e = _mm256_set_ph( + 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mul_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_mul_ph(a, b); + let e = _mm512_set_ph( + 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0, + 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0, + 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_mul_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let src = _mm512_set_ph( + 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., + 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., + ); + let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272., + 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_mul_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0., + 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mul_round_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set_ph( + 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0, + 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0, + 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_mul_round_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let src = _mm512_set_ph( + 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., + 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., + ); + let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272., + 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_mul_round_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, + 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, + 3.0, 2.0, 1.0, + ); + let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0., + 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mul_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_sh(2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_mul_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let src = _mm_set_sh(4.0); + let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_set_sh(4.0); + assert_eq_m128h(r, e); + let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_set_sh(2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_mul_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = + _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_sh(0.0); + assert_eq_m128h(r, e); + let r = + _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_set_sh(2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mul_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_mul_sh(a, b); + let e = _mm_set_sh(2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_mul_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let src = _mm_set_sh(4.0); + let r = _mm_mask_mul_sh(src, 0, a, b); + let e = _mm_set_sh(4.0); + assert_eq_m128h(r, e); + let r = _mm_mask_mul_sh(src, 1, a, b); + let e = _mm_set_sh(2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_mul_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_maskz_mul_sh(0, a, b); + let e = _mm_set_sh(0.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_mul_sh(1, a, b); + let e = _mm_set_sh(2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_div_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let r = _mm_div_ph(a, b); + let e = _mm_set1_ph(0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_div_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0); + let r = _mm_mask_div_ph(src, 0b01010101, a, b); + let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_div_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let r = _mm_maskz_div_ph(0b01010101, a, b); + let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_div_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let r = _mm256_div_ph(a, b); + let e = _mm256_set1_ph(0.5); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_div_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let src = _mm256_set_ph( + 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, + 19.0, + ); + let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b); + let e = _mm256_set_ph( + 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_div_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let r = _mm256_maskz_div_ph(0b0101010101010101, a, b); + let e = _mm256_set_ph( + 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_div_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let r = _mm512_div_ph(a, b); + let e = _mm512_set1_ph(0.5); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_div_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let src = _mm512_set_ph( + 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, + 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, + 33.0, 34.0, 35.0, + ); + let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5, + 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_div_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, + 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_div_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_ph(0.5); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_div_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let src = _mm512_set_ph( + 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, + 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, + 33.0, 34.0, 35.0, + ); + let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5, + 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_div_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, + 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_div_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_sh(0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_div_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let src = _mm_set_sh(4.0); + let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_set_sh(4.0); + assert_eq_m128h(r, e); + let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_set_sh(0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_div_round_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = + _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_set_sh(0.0); + assert_eq_m128h(r, e); + let r = + _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_set_sh(0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_div_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_div_sh(a, b); + let e = _mm_set_sh(0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_div_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let src = _mm_set_sh(4.0); + let r = _mm_mask_div_sh(src, 0, a, b); + let e = _mm_set_sh(4.0); + assert_eq_m128h(r, e); + let r = _mm_mask_div_sh(src, 1, a, b); + let e = _mm_set_sh(0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_div_sh() { + let a = _mm_set_sh(1.0); + let b = _mm_set_sh(2.0); + let r = _mm_maskz_div_sh(0, a, b); + let e = _mm_set_sh(0.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_div_sh(1, a, b); + let e = _mm_set_sh(0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 1.0); + let r = _mm_mul_pch(a, b); + let e = _mm_set1_pch(-1.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_mul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 1.0); + let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); + let r = _mm_mask_mul_pch(src, 0b0101, a, b); + let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_mul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 1.0); + let r = _mm_maskz_mul_pch(0b0101, a, b); + let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 1.0); + let r = _mm256_mul_pch(a, b); + let e = _mm256_set1_pch(-1.0, 0.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_mul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 1.0); + let src = _mm256_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + ); + let r = _mm256_mask_mul_pch(src, 0b01010101, a, b); + let e = _mm256_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_mul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 1.0); + let r = _mm256_maskz_mul_pch(0b01010101, a, b); + let e = _mm256_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_mul_pch(a, b); + let e = _mm512_set1_pch(-1.0, 0.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_mul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let src = _mm512_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, + ); + let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b); + let e = _mm512_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, + 33.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_mul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b); + let e = _mm512_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pch(-1.0, 0.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_mul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let src = _mm512_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, + ); + let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b0101010101010101, + a, + b, + ); + let e = _mm512_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, + 33.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_mul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + b, + ); + let e = _mm512_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_mul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); + let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_mul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let r = + _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let r = _mm_mul_sch(a, b); + let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_mul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); + let r = _mm_mask_mul_sch(src, 0, a, b); + let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_mul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let r = _mm_maskz_mul_sch(0, a, b); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fmul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 1.0); + let r = _mm_fmul_pch(a, b); + let e = _mm_set1_pch(-1.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fmul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 1.0); + let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); + let r = _mm_mask_fmul_pch(src, 0b0101, a, b); + let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fmul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 1.0); + let r = _mm_maskz_fmul_pch(0b0101, a, b); + let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fmul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 1.0); + let r = _mm256_fmul_pch(a, b); + let e = _mm256_set1_pch(-1.0, 0.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fmul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 1.0); + let src = _mm256_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + ); + let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b); + let e = _mm256_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fmul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 1.0); + let r = _mm256_maskz_fmul_pch(0b01010101, a, b); + let e = _mm256_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_fmul_pch(a, b); + let e = _mm512_set1_pch(-1.0, 0.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let src = _mm512_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, + ); + let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b); + let e = _mm512_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, + 33.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b); + let e = _mm512_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pch(-1.0, 0.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let src = _mm512_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, + ); + let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b0101010101010101, + a, + b, + ); + let e = _mm512_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, + 33.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + b, + ); + let e = _mm512_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fmul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fmul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); + let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fmul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let r = + _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fmul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let r = _mm_fmul_sch(a, b); + let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fmul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); + let r = _mm_mask_fmul_sch(src, 0, a, b); + let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fmul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let r = _mm_maskz_fmul_sch(0, a, b); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cmul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, -1.0); + let r = _mm_cmul_pch(a, b); + let e = _mm_set1_pch(-1.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cmul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, -1.0); + let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); + let r = _mm_mask_cmul_pch(src, 0b0101, a, b); + let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cmul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, -1.0); + let r = _mm_maskz_cmul_pch(0b0101, a, b); + let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cmul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, -1.0); + let r = _mm256_cmul_pch(a, b); + let e = _mm256_set1_pch(-1.0, 0.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cmul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, -1.0); + let src = _mm256_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + ); + let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b); + let e = _mm256_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cmul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, -1.0); + let r = _mm256_maskz_cmul_pch(0b01010101, a, b); + let e = _mm256_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cmul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let r = _mm512_cmul_pch(a, b); + let e = _mm512_set1_pch(-1.0, 0.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cmul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let src = _mm512_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, + ); + let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b); + let e = _mm512_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, + 33.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cmul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b); + let e = _mm512_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cmul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pch(-1.0, 0.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cmul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let src = _mm512_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, + ); + let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b0101010101010101, + a, + b, + ); + let e = _mm512_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, + 33.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cmul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + b, + ); + let e = _mm512_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cmul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let r = _mm_cmul_sch(a, b); + let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cmul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); + let r = _mm_mask_cmul_sch(src, 0, a, b); + let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cmul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let r = _mm_maskz_cmul_sch(0, a, b); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cmul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cmul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); + let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cmul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let r = + _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fcmul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, -1.0); + let r = _mm_fcmul_pch(a, b); + let e = _mm_set1_pch(-1.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fcmul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, -1.0); + let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); + let r = _mm_mask_fcmul_pch(src, 0b0101, a, b); + let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fcmul_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, -1.0); + let r = _mm_maskz_fcmul_pch(0b0101, a, b); + let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fcmul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, -1.0); + let r = _mm256_fcmul_pch(a, b); + let e = _mm256_set1_pch(-1.0, 0.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fcmul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, -1.0); + let src = _mm256_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + ); + let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b); + let e = _mm256_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fcmul_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, -1.0); + let r = _mm256_maskz_fcmul_pch(0b01010101, a, b); + let e = _mm256_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fcmul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let r = _mm512_fcmul_pch(a, b); + let e = _mm512_set1_pch(-1.0, 0.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fcmul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let src = _mm512_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, + ); + let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b); + let e = _mm512_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, + 33.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fcmul_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b); + let e = _mm512_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fcmul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pch(-1.0, 0.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fcmul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let src = _mm512_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, + ); + let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b0101010101010101, + a, + b, + ); + let e = _mm512_setr_ph( + -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, + -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, + 33.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fcmul_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, -1.0); + let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + b, + ); + let e = _mm512_setr_ph( + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fcmul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let r = _mm_fcmul_sch(a, b); + let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fcmul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); + let r = _mm_mask_fcmul_sch(src, 0, a, b); + let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fcmul_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let r = _mm_maskz_fcmul_sch(0, a, b); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fcmul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fcmul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); + let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fcmul_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); + let r = + _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_abs_ph() { + let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0); + let r = _mm_abs_ph(a); + let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_abs_ph() { + let a = _mm256_set_ph( + -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, + -14.0, + ); + let r = _mm256_abs_ph(a); + let e = _mm256_set_ph( + 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_abs_ph() { + let a = _mm512_set_ph( + -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, + -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, + 27.0, -28.0, 29.0, -30.0, + ); + let r = _mm512_abs_ph(a); + let e = _mm512_set_ph( + 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, + 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, + 29.0, 30.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_conj_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let r = _mm_conj_pch(a); + let e = _mm_set1_pch(0.0, -1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_conj_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); + let r = _mm_mask_conj_pch(src, 0b0101, a); + let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_conj_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let r = _mm_maskz_conj_pch(0b0101, a); + let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_conj_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let r = _mm256_conj_pch(a); + let e = _mm256_set1_pch(0.0, -1.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_conj_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let src = _mm256_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + ); + let r = _mm256_mask_conj_pch(src, 0b01010101, a); + let e = _mm256_setr_ph( + 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_conj_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let r = _mm256_maskz_conj_pch(0b01010101, a); + let e = _mm256_setr_ph( + 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_conj_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_conj_pch(a); + let e = _mm512_set1_pch(0.0, -1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_conj_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let src = _mm512_setr_ph( + 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, + 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, + ); + let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a); + let e = _mm512_setr_ph( + 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0, + 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0, + 33.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_conj_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let r = _mm512_maskz_conj_pch(0b0101010101010101, a); + let e = _mm512_setr_ph( + 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, + 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fmadd_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 2.0); + let c = _mm_set1_pch(0.0, 3.0); + let r = _mm_fmadd_pch(a, b, c); + let e = _mm_set1_pch(-2.0, 3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fmadd_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 2.0); + let c = _mm_set1_pch(0.0, 3.0); + let r = _mm_mask_fmadd_pch(a, 0b0101, b, c); + let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask3_fmadd_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 2.0); + let c = _mm_set1_pch(0.0, 3.0); + let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101); + let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fmadd_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 2.0); + let c = _mm_set1_pch(0.0, 3.0); + let r = _mm_maskz_fmadd_pch(0b0101, a, b, c); + let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fmadd_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 2.0); + let c = _mm256_set1_pch(0.0, 3.0); + let r = _mm256_fmadd_pch(a, b, c); + let e = _mm256_set1_pch(-2.0, 3.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fmadd_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 2.0); + let c = _mm256_set1_pch(0.0, 3.0); + let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c); + let e = _mm256_setr_ph( + -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask3_fmadd_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 2.0); + let c = _mm256_set1_pch(0.0, 3.0); + let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101); + let e = _mm256_setr_ph( + -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fmadd_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 2.0); + let c = _mm256_set1_pch(0.0, 3.0); + let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c); + let e = _mm256_setr_ph( + -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmadd_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_fmadd_pch(a, b, c); + let e = _mm512_set1_pch(-2.0, 3.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmadd_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c); + let e = _mm512_setr_ph( + -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, + -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmadd_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101); + let e = _mm512_setr_ph( + -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, + -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmadd_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c); + let e = _mm512_setr_ph( + -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, + -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmadd_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = + _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pch(-2.0, 3.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmadd_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b0101010101010101, + b, + c, + ); + let e = _mm512_setr_ph( + -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, + -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmadd_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b0101010101010101, + ); + let e = _mm512_setr_ph( + -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, + -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmadd_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + b, + c, + ); + let e = _mm512_setr_ph( + -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, + -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fmadd_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_fmadd_sch(a, b, c); + let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fmadd_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_mask_fmadd_sch(a, 0, b, c); + let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + let r = _mm_mask_fmadd_sch(a, 1, b, c); + let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fmadd_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_mask3_fmadd_sch(a, b, c, 0); + let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + assert_eq_m128h(r, e); + let r = _mm_mask3_fmadd_sch(a, b, c, 1); + let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fmadd_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_maskz_fmadd_sch(0, a, b, c); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_fmadd_sch(1, a, b, c); + let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fmadd_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fmadd_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 1, b, c, + ); + let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fmadd_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + assert_eq_m128h(r, e); + let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 1, + ); + let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fmadd_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 1, a, b, c, + ); + let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fcmadd_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 2.0); + let c = _mm_set1_pch(0.0, 3.0); + let r = _mm_fcmadd_pch(a, b, c); + let e = _mm_set1_pch(2.0, 3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fcmadd_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 2.0); + let c = _mm_set1_pch(0.0, 3.0); + let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c); + let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask3_fcmadd_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 2.0); + let c = _mm_set1_pch(0.0, 3.0); + let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101); + let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fcmadd_pch() { + let a = _mm_set1_pch(0.0, 1.0); + let b = _mm_set1_pch(0.0, 2.0); + let c = _mm_set1_pch(0.0, 3.0); + let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c); + let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fcmadd_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 2.0); + let c = _mm256_set1_pch(0.0, 3.0); + let r = _mm256_fcmadd_pch(a, b, c); + let e = _mm256_set1_pch(2.0, 3.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fcmadd_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 2.0); + let c = _mm256_set1_pch(0.0, 3.0); + let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c); + let e = _mm256_setr_ph( + 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask3_fcmadd_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 2.0); + let c = _mm256_set1_pch(0.0, 3.0); + let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101); + let e = _mm256_setr_ph( + 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fcmadd_pch() { + let a = _mm256_set1_pch(0.0, 1.0); + let b = _mm256_set1_pch(0.0, 2.0); + let c = _mm256_set1_pch(0.0, 3.0); + let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c); + let e = _mm256_setr_ph( + 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fcmadd_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_fcmadd_pch(a, b, c); + let e = _mm512_set1_pch(2.0, 3.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fcmadd_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c); + let e = _mm512_setr_ph( + 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, + 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fcmadd_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101); + let e = _mm512_setr_ph( + 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, + 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fcmadd_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c); + let e = _mm512_setr_ph( + 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, + 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fcmadd_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = + _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pch(2.0, 3.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fcmadd_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b0101010101010101, + b, + c, + ); + let e = _mm512_setr_ph( + 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, + 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fcmadd_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b0101010101010101, + ); + let e = _mm512_setr_ph( + 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, + 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fcmadd_round_pch() { + let a = _mm512_set1_pch(0.0, 1.0); + let b = _mm512_set1_pch(0.0, 2.0); + let c = _mm512_set1_pch(0.0, 3.0); + let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + b, + c, + ); + let e = _mm512_setr_ph( + 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, + 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fcmadd_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_fcmadd_sch(a, b, c); + let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fcmadd_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_mask_fcmadd_sch(a, 0, b, c); + let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + let r = _mm_mask_fcmadd_sch(a, 1, b, c); + let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fcmadd_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_mask3_fcmadd_sch(a, b, c, 0); + let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + assert_eq_m128h(r, e); + let r = _mm_mask3_fcmadd_sch(a, b, c, 1); + let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fcmadd_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_maskz_fcmadd_sch(0, a, b, c); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_fcmadd_sch(1, a, b, c); + let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fcmadd_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fcmadd_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 1, b, c, + ); + let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fcmadd_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + assert_eq_m128h(r, e); + let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 1, + ); + let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fcmadd_round_sch() { + let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); + let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); + let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 1, a, b, c, + ); + let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fmadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_fmadd_ph(a, b, c); + let e = _mm_set1_ph(5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fmadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c); + let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask3_fmadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101); + let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fmadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c); + let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fmadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_fmadd_ph(a, b, c); + let e = _mm256_set1_ph(5.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fmadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c); + let e = _mm256_set_ph( + 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask3_fmadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101); + let e = _mm256_set_ph( + 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fmadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c); + let e = _mm256_set_ph( + 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_fmadd_ph(a, b, c); + let e = _mm512_set1_ph(5.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c); + let e = _mm512_set_ph( + 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, + 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101); + let e = _mm512_set_ph( + 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, + 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c); + let e = _mm512_set_ph( + 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, + 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ph(5.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b01010101010101010101010101010101, + b, + c, + ); + let e = _mm512_set_ph( + 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, + 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b01010101010101010101010101010101, + ); + let e = _mm512_set_ph( + 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, + 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + c, + ); + let e = _mm512_set_ph( + 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, + 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fmadd_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_fmadd_sh(a, b, c); + let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fmadd_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_fmadd_sh(a, 0, b, c); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_fmadd_sh(a, 1, b, c); + let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fmadd_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask3_fmadd_sh(a, b, c, 0); + let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + let r = _mm_mask3_fmadd_sh(a, b, c, 1); + let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fmadd_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_maskz_fmadd_sh(0, a, b, c); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_fmadd_sh(1, a, b, c); + let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fmadd_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fmadd_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 1, b, c, + ); + let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fmadd_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 1, + ); + let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fmadd_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 1, a, b, c, + ); + let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fmsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_fmsub_ph(a, b, c); + let e = _mm_set1_ph(-1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fmsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c); + let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask3_fmsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101); + let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fmsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c); + let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fmsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_fmsub_ph(a, b, c); + let e = _mm256_set1_ph(-1.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fmsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c); + let e = _mm256_set_ph( + 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask3_fmsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101); + let e = _mm256_set_ph( + 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fmsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c); + let e = _mm256_set_ph( + 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_fmsub_ph(a, b, c); + let e = _mm512_set1_ph(-1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c); + let e = _mm512_set_ph( + 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, + 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101); + let e = _mm512_set_ph( + 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, + 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c); + let e = _mm512_set_ph( + 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, + 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ph(-1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b01010101010101010101010101010101, + b, + c, + ); + let e = _mm512_set_ph( + 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, + 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b01010101010101010101010101010101, + ); + let e = _mm512_set_ph( + 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, + 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + c, + ); + let e = _mm512_set_ph( + 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, + 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fmsub_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_fmsub_sh(a, b, c); + let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fmsub_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_fmsub_sh(a, 0, b, c); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_fmsub_sh(a, 1, b, c); + let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fmsub_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask3_fmsub_sh(a, b, c, 0); + let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + let r = _mm_mask3_fmsub_sh(a, b, c, 1); + let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fmsub_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_maskz_fmsub_sh(0, a, b, c); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_fmsub_sh(1, a, b, c); + let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fmsub_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fmsub_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 1, b, c, + ); + let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fmsub_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 1, + ); + let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fmsub_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 1, a, b, c, + ); + let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fnmadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_fnmadd_ph(a, b, c); + let e = _mm_set1_ph(1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fnmadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c); + let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask3_fnmadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101); + let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fnmadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c); + let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fnmadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_fnmadd_ph(a, b, c); + let e = _mm256_set1_ph(1.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fnmadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c); + let e = _mm256_set_ph( + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask3_fnmadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101); + let e = _mm256_set_ph( + 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fnmadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c); + let e = _mm256_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fnmadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_fnmadd_ph(a, b, c); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fnmadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c); + let e = _mm512_set_ph( + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fnmadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101); + let e = _mm512_set_ph( + 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, + 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fnmadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c); + let e = _mm512_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fnmadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = + _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fnmadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b01010101010101010101010101010101, + b, + c, + ); + let e = _mm512_set_ph( + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fnmadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b01010101010101010101010101010101, + ); + let e = _mm512_set_ph( + 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, + 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fnmadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + c, + ); + let e = _mm512_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fnmadd_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_fnmadd_sh(a, b, c); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fnmadd_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_fnmadd_sh(a, 0, b, c); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_fnmadd_sh(a, 1, b, c); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fnmadd_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask3_fnmadd_sh(a, b, c, 0); + let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + let r = _mm_mask3_fnmadd_sh(a, b, c, 1); + let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fnmadd_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_maskz_fnmadd_sh(0, a, b, c); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_fnmadd_sh(1, a, b, c); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fnmadd_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fnmadd_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 1, b, c, + ); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fnmadd_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 1, + ); + let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fnmadd_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 1, a, b, c, + ); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fnmsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_fnmsub_ph(a, b, c); + let e = _mm_set1_ph(-5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fnmsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c); + let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask3_fnmsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101); + let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fnmsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c); + let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fnmsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_fnmsub_ph(a, b, c); + let e = _mm256_set1_ph(-5.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fnmsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c); + let e = _mm256_set_ph( + 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask3_fnmsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101); + let e = _mm256_set_ph( + 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fnmsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c); + let e = _mm256_set_ph( + 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fnmsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_fnmsub_ph(a, b, c); + let e = _mm512_set1_ph(-5.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fnmsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c); + let e = _mm512_set_ph( + 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, + 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fnmsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101); + let e = _mm512_set_ph( + 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, + 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fnmsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c); + let e = _mm512_set_ph( + 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, + 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fnmsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = + _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_ph(-5.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fnmsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b01010101010101010101010101010101, + b, + c, + ); + let e = _mm512_set_ph( + 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, + 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fnmsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b01010101010101010101010101010101, + ); + let e = _mm512_set_ph( + 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, + 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fnmsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + c, + ); + let e = _mm512_set_ph( + 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, + 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fnmsub_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_fnmsub_sh(a, b, c); + let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fnmsub_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_fnmsub_sh(a, 0, b, c); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_fnmsub_sh(a, 1, b, c); + let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fnmsub_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask3_fnmsub_sh(a, b, c, 0); + let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + let r = _mm_mask3_fnmsub_sh(a, b, c, 1); + let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fnmsub_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_maskz_fnmsub_sh(0, a, b, c); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_fnmsub_sh(1, a, b, c); + let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fnmsub_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fnmsub_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 1, b, c, + ); + let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask3_fnmsub_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 1, + ); + let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_fnmsub_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); + let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 1, a, b, c, + ); + let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fmaddsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_fmaddsub_ph(a, b, c); + let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fmaddsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c); + let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask3_fmaddsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011); + let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fmaddsub_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c); + let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fmaddsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_fmaddsub_ph(a, b, c); + let e = _mm256_set_ph( + 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fmaddsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c); + let e = _mm256_set_ph( + 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask3_fmaddsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011); + let e = _mm256_set_ph( + 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fmaddsub_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c); + let e = _mm256_set_ph( + 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmaddsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_fmaddsub_ph(a, b, c); + let e = _mm512_set_ph( + 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, + 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmaddsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c); + let e = _mm512_set_ph( + 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, + 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmaddsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011); + let e = _mm512_set_ph( + 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, + 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmaddsub_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c); + let e = _mm512_set_ph( + 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, + 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmaddsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = + _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set_ph( + 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, + 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmaddsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b00110011001100110011001100110011, + b, + c, + ); + let e = _mm512_set_ph( + 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, + 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmaddsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b00110011001100110011001100110011, + ); + let e = _mm512_set_ph( + 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, + 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmaddsub_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00110011001100110011001100110011, + a, + b, + c, + ); + let e = _mm512_set_ph( + 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, + 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fmsubadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_fmsubadd_ph(a, b, c); + let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fmsubadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c); + let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask3_fmsubadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011); + let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_fmsubadd_ph() { + let a = _mm_set1_ph(1.0); + let b = _mm_set1_ph(2.0); + let c = _mm_set1_ph(3.0); + let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c); + let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fmsubadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_fmsubadd_ph(a, b, c); + let e = _mm256_set_ph( + -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fmsubadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c); + let e = _mm256_set_ph( + 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask3_fmsubadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011); + let e = _mm256_set_ph( + 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_fmsubadd_ph() { + let a = _mm256_set1_ph(1.0); + let b = _mm256_set1_ph(2.0); + let c = _mm256_set1_ph(3.0); + let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c); + let e = _mm256_set_ph( + 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmsubadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_fmsubadd_ph(a, b, c); + let e = _mm512_set_ph( + -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, + -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmsubadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c); + let e = _mm512_set_ph( + 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, + 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmsubadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011); + let e = _mm512_set_ph( + 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, + 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmsubadd_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c); + let e = _mm512_set_ph( + 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, + 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fmsubadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = + _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set_ph( + -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, + -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fmsubadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + 0b00110011001100110011001100110011, + b, + c, + ); + let e = _mm512_set_ph( + 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, + 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask3_fmsubadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, + b, + c, + 0b00110011001100110011001100110011, + ); + let e = _mm512_set_ph( + 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, + 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_fmsubadd_round_ph() { + let a = _mm512_set1_ph(1.0); + let b = _mm512_set1_ph(2.0); + let c = _mm512_set1_ph(3.0); + let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00110011001100110011001100110011, + a, + b, + c, + ); + let e = _mm512_set_ph( + 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, + 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_rcp_ph() { + let a = _mm_set1_ph(2.0); + let r = _mm_rcp_ph(a); + let e = _mm_set1_ph(0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_rcp_ph() { + let a = _mm_set1_ph(2.0); + let src = _mm_set1_ph(1.0); + let r = _mm_mask_rcp_ph(src, 0b01010101, a); + let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_rcp_ph() { + let a = _mm_set1_ph(2.0); + let r = _mm_maskz_rcp_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_rcp_ph() { + let a = _mm256_set1_ph(2.0); + let r = _mm256_rcp_ph(a); + let e = _mm256_set1_ph(0.5); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_rcp_ph() { + let a = _mm256_set1_ph(2.0); + let src = _mm256_set1_ph(1.0); + let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_rcp_ph() { + let a = _mm256_set1_ph(2.0); + let r = _mm256_maskz_rcp_ph(0b0101010101010101, a); + let e = _mm256_set_ph( + 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_rcp_ph() { + let a = _mm512_set1_ph(2.0); + let r = _mm512_rcp_ph(a); + let e = _mm512_set1_ph(0.5); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_rcp_ph() { + let a = _mm512_set1_ph(2.0); + let src = _mm512_set1_ph(1.0); + let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, + 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_rcp_ph() { + let a = _mm512_set1_ph(2.0); + let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, + 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_rcp_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = _mm_rcp_sh(a, b); + let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_rcp_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); + let r = _mm_mask_rcp_sh(src, 0, a, b); + let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_mask_rcp_sh(src, 1, a, b); + let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_rcp_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = _mm_maskz_rcp_sh(0, a, b); + let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_rcp_sh(1, a, b); + let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_rsqrt_ph() { + let a = _mm_set1_ph(4.0); + let r = _mm_rsqrt_ph(a); + let e = _mm_set1_ph(0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_rsqrt_ph() { + let a = _mm_set1_ph(4.0); + let src = _mm_set1_ph(1.0); + let r = _mm_mask_rsqrt_ph(src, 0b01010101, a); + let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_rsqrt_ph() { + let a = _mm_set1_ph(4.0); + let r = _mm_maskz_rsqrt_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_rsqrt_ph() { + let a = _mm256_set1_ph(4.0); + let r = _mm256_rsqrt_ph(a); + let e = _mm256_set1_ph(0.5); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_rsqrt_ph() { + let a = _mm256_set1_ph(4.0); + let src = _mm256_set1_ph(1.0); + let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_rsqrt_ph() { + let a = _mm256_set1_ph(4.0); + let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a); + let e = _mm256_set_ph( + 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_rsqrt_ph() { + let a = _mm512_set1_ph(4.0); + let r = _mm512_rsqrt_ph(a); + let e = _mm512_set1_ph(0.5); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_rsqrt_ph() { + let a = _mm512_set1_ph(4.0); + let src = _mm512_set1_ph(1.0); + let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, + 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_rsqrt_ph() { + let a = _mm512_set1_ph(4.0); + let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, + 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_rsqrt_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); + let r = _mm_rsqrt_sh(a, b); + let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_rsqrt_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); + let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); + let r = _mm_mask_rsqrt_sh(src, 0, a, b); + let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_mask_rsqrt_sh(src, 1, a, b); + let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_rsqrt_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); + let r = _mm_maskz_rsqrt_sh(0, a, b); + let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_rsqrt_sh(1, a, b); + let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_sqrt_ph() { + let a = _mm_set1_ph(4.0); + let r = _mm_sqrt_ph(a); + let e = _mm_set1_ph(2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_sqrt_ph() { + let a = _mm_set1_ph(4.0); + let src = _mm_set1_ph(1.0); + let r = _mm_mask_sqrt_ph(src, 0b01010101, a); + let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_sqrt_ph() { + let a = _mm_set1_ph(4.0); + let r = _mm_maskz_sqrt_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_sqrt_ph() { + let a = _mm256_set1_ph(4.0); + let r = _mm256_sqrt_ph(a); + let e = _mm256_set1_ph(2.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_sqrt_ph() { + let a = _mm256_set1_ph(4.0); + let src = _mm256_set1_ph(1.0); + let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_sqrt_ph() { + let a = _mm256_set1_ph(4.0); + let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a); + let e = _mm256_set_ph( + 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_sqrt_ph() { + let a = _mm512_set1_ph(4.0); + let r = _mm512_sqrt_ph(a); + let e = _mm512_set1_ph(2.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_sqrt_ph() { + let a = _mm512_set1_ph(4.0); + let src = _mm512_set1_ph(1.0); + let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, + 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_sqrt_ph() { + let a = _mm512_set1_ph(4.0); + let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, + 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_sqrt_round_ph() { + let a = _mm512_set1_ph(4.0); + let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set1_ph(2.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_sqrt_round_ph() { + let a = _mm512_set1_ph(4.0); + let src = _mm512_set1_ph(1.0); + let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, + 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_sqrt_round_ph() { + let a = _mm512_set1_ph(4.0); + let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, + 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_sqrt_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); + let r = _mm_sqrt_sh(a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_sqrt_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); + let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); + let r = _mm_mask_sqrt_sh(src, 0, a, b); + let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_mask_sqrt_sh(src, 1, a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_sqrt_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); + let r = _mm_maskz_sqrt_sh(0, a, b); + let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_sqrt_sh(1, a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_sqrt_round_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); + let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_sqrt_round_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); + let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); + let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_sqrt_round_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); + let r = + _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = + _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_max_ph() { + let a = _mm_set1_ph(2.0); + let b = _mm_set1_ph(1.0); + let r = _mm_max_ph(a, b); + let e = _mm_set1_ph(2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_max_ph() { + let a = _mm_set1_ph(2.0); + let b = _mm_set1_ph(1.0); + let src = _mm_set1_ph(3.0); + let r = _mm_mask_max_ph(src, 0b01010101, a, b); + let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_max_ph() { + let a = _mm_set1_ph(2.0); + let b = _mm_set1_ph(1.0); + let r = _mm_maskz_max_ph(0b01010101, a, b); + let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_max_ph() { + let a = _mm256_set1_ph(2.0); + let b = _mm256_set1_ph(1.0); + let r = _mm256_max_ph(a, b); + let e = _mm256_set1_ph(2.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_max_ph() { + let a = _mm256_set1_ph(2.0); + let b = _mm256_set1_ph(1.0); + let src = _mm256_set1_ph(3.0); + let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b); + let e = _mm256_set_ph( + 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_max_ph() { + let a = _mm256_set1_ph(2.0); + let b = _mm256_set1_ph(1.0); + let r = _mm256_maskz_max_ph(0b0101010101010101, a, b); + let e = _mm256_set_ph( + 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_max_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let r = _mm512_max_ph(a, b); + let e = _mm512_set1_ph(2.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_max_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let src = _mm512_set1_ph(3.0); + let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, + 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_max_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, + 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_max_round_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_ph(2.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_max_round_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let src = _mm512_set1_ph(3.0); + let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, + 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_max_round_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, + 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_max_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = _mm_max_sh(a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_max_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); + let r = _mm_mask_max_sh(src, 0, a, b); + let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_mask_max_sh(src, 1, a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_max_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = _mm_maskz_max_sh(0, a, b); + let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_max_sh(1, a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_max_round_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_max_round_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); + let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_max_round_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = + _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = + _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_min_ph() { + let a = _mm_set1_ph(2.0); + let b = _mm_set1_ph(1.0); + let r = _mm_min_ph(a, b); + let e = _mm_set1_ph(1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_min_ph() { + let a = _mm_set1_ph(2.0); + let b = _mm_set1_ph(1.0); + let src = _mm_set1_ph(3.0); + let r = _mm_mask_min_ph(src, 0b01010101, a, b); + let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_min_ph() { + let a = _mm_set1_ph(2.0); + let b = _mm_set1_ph(1.0); + let r = _mm_maskz_min_ph(0b01010101, a, b); + let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_min_ph() { + let a = _mm256_set1_ph(2.0); + let b = _mm256_set1_ph(1.0); + let r = _mm256_min_ph(a, b); + let e = _mm256_set1_ph(1.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_min_ph() { + let a = _mm256_set1_ph(2.0); + let b = _mm256_set1_ph(1.0); + let src = _mm256_set1_ph(3.0); + let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b); + let e = _mm256_set_ph( + 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_min_ph() { + let a = _mm256_set1_ph(2.0); + let b = _mm256_set1_ph(1.0); + let r = _mm256_maskz_min_ph(0b0101010101010101, a, b); + let e = _mm256_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_min_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let r = _mm512_min_ph(a, b); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_min_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let src = _mm512_set1_ph(3.0); + let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, + 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_min_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_min_round_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_min_round_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let src = _mm512_set1_ph(3.0); + let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, + 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_min_round_ph() { + let a = _mm512_set1_ph(2.0); + let b = _mm512_set1_ph(1.0); + let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_min_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = _mm_min_sh(a, b); + let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_min_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); + let r = _mm_mask_min_sh(src, 0, a, b); + let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_mask_min_sh(src, 1, a, b); + let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_min_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = _mm_maskz_min_sh(0, a, b); + let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_maskz_min_sh(1, a, b); + let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_min_round_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_min_round_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); + let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_min_round_sh() { + let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); + let r = + _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + let r = + _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_getexp_ph() { + let a = _mm_set1_ph(3.0); + let r = _mm_getexp_ph(a); + let e = _mm_set1_ph(1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_getexp_ph() { + let a = _mm_set1_ph(3.0); + let src = _mm_set1_ph(4.0); + let r = _mm_mask_getexp_ph(src, 0b01010101, a); + let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_getexp_ph() { + let a = _mm_set1_ph(3.0); + let r = _mm_maskz_getexp_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_getexp_ph() { + let a = _mm256_set1_ph(3.0); + let r = _mm256_getexp_ph(a); + let e = _mm256_set1_ph(1.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_getexp_ph() { + let a = _mm256_set1_ph(3.0); + let src = _mm256_set1_ph(4.0); + let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_getexp_ph() { + let a = _mm256_set1_ph(3.0); + let r = _mm256_maskz_getexp_ph(0b0101010101010101, a); + let e = _mm256_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_getexp_ph() { + let a = _mm512_set1_ph(3.0); + let r = _mm512_getexp_ph(a); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_getexp_ph() { + let a = _mm512_set1_ph(3.0); + let src = _mm512_set1_ph(4.0); + let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, + 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_getexp_ph() { + let a = _mm512_set1_ph(3.0); + let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_getexp_round_ph() { + let a = _mm512_set1_ph(3.0); + let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_getexp_round_ph() { + let a = _mm512_set1_ph(3.0); + let src = _mm512_set1_ph(4.0); + let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, + 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_getexp_round_ph() { + let a = _mm512_set1_ph(3.0); + let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_getexp_sh() { + let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_getexp_sh(a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_getexp_sh() { + let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_getexp_sh(src, 0, a, b); + let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_getexp_sh(src, 1, a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_getexp_sh() { + let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_maskz_getexp_sh(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_getexp_sh(1, a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_getexp_round_sh() { + let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_getexp_round_sh() { + let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b); + let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_getexp_round_sh() { + let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_getmant_ph() { + let a = _mm_set1_ph(10.0); + let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); + let e = _mm_set1_ph(1.25); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_getmant_ph() { + let a = _mm_set1_ph(10.0); + let src = _mm_set1_ph(20.0); + let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a); + let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_getmant_ph() { + let a = _mm_set1_ph(10.0); + let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a); + let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_getmant_ph() { + let a = _mm256_set1_ph(10.0); + let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); + let e = _mm256_set1_ph(1.25); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_getmant_ph() { + let a = _mm256_set1_ph(10.0); + let src = _mm256_set1_ph(20.0); + let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( + src, + 0b0101010101010101, + a, + ); + let e = _mm256_set_ph( + 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, + 20.0, 1.25, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_getmant_ph() { + let a = _mm256_set1_ph(10.0); + let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( + 0b0101010101010101, + a, + ); + let e = _mm256_set_ph( + 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_getmant_ph() { + let a = _mm512_set1_ph(10.0); + let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); + let e = _mm512_set1_ph(1.25); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_getmant_ph() { + let a = _mm512_set1_ph(10.0); + let src = _mm512_set1_ph(20.0); + let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, + 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, + 20.0, 1.25, 20.0, 1.25, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_getmant_ph() { + let a = _mm512_set1_ph(10.0); + let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, + 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_getmant_round_ph() { + let a = _mm512_set1_ph(10.0); + let r = + _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>( + a, + ); + let e = _mm512_set1_ph(1.25); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_getmant_round_ph() { + let a = _mm512_set1_ph(10.0); + let src = _mm512_set1_ph(20.0); + let r = _mm512_mask_getmant_round_ph::< + _MM_MANT_NORM_P75_1P5, + _MM_MANT_SIGN_NAN, + _MM_FROUND_NO_EXC, + >(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, + 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, + 20.0, 1.25, 20.0, 1.25, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_getmant_round_ph() { + let a = _mm512_set1_ph(10.0); + let r = _mm512_maskz_getmant_round_ph::< + _MM_MANT_NORM_P75_1P5, + _MM_MANT_SIGN_NAN, + _MM_FROUND_NO_EXC, + >(0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, + 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_getmant_sh() { + let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b); + let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_getmant_sh() { + let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b); + let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b); + let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_getmant_sh() { + let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b); + let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_getmant_round_sh() { + let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>( + a, b, + ); + let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_getmant_round_sh() { + let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_getmant_round_sh::< + _MM_MANT_NORM_P75_1P5, + _MM_MANT_SIGN_NAN, + _MM_FROUND_NO_EXC, + >(src, 0, a, b); + let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_getmant_round_sh::< + _MM_MANT_NORM_P75_1P5, + _MM_MANT_SIGN_NAN, + _MM_FROUND_NO_EXC, + >(src, 1, a, b); + let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_getmant_round_sh() { + let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_maskz_getmant_round_sh::< + _MM_MANT_NORM_P75_1P5, + _MM_MANT_SIGN_NAN, + _MM_FROUND_NO_EXC, + >(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_getmant_round_sh::< + _MM_MANT_NORM_P75_1P5, + _MM_MANT_SIGN_NAN, + _MM_FROUND_NO_EXC, + >(1, a, b); + let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_roundscale_ph() { + let a = _mm_set1_ph(1.1); + let r = _mm_roundscale_ph::<0>(a); + let e = _mm_set1_ph(1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_roundscale_ph() { + let a = _mm_set1_ph(1.1); + let src = _mm_set1_ph(2.0); + let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a); + let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_roundscale_ph() { + let a = _mm_set1_ph(1.1); + let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a); + let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_roundscale_ph() { + let a = _mm256_set1_ph(1.1); + let r = _mm256_roundscale_ph::<0>(a); + let e = _mm256_set1_ph(1.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_roundscale_ph() { + let a = _mm256_set1_ph(1.1); + let src = _mm256_set1_ph(2.0); + let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_roundscale_ph() { + let a = _mm256_set1_ph(1.1); + let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a); + let e = _mm256_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_roundscale_ph() { + let a = _mm512_set1_ph(1.1); + let r = _mm512_roundscale_ph::<0>(a); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_roundscale_ph() { + let a = _mm512_set1_ph(1.1); + let src = _mm512_set1_ph(2.0); + let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, + 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_roundscale_ph() { + let a = _mm512_set1_ph(1.1); + let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_roundscale_round_ph() { + let a = _mm512_set1_ph(1.1); + let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a); + let e = _mm512_set1_ph(1.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_roundscale_round_ph() { + let a = _mm512_set1_ph(1.1); + let src = _mm512_set1_ph(2.0); + let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, + 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_roundscale_round_ph() { + let a = _mm512_set1_ph(1.1); + let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_roundscale_sh() { + let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_roundscale_sh::<0>(a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_roundscale_sh() { + let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b); + let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_roundscale_sh() { + let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_maskz_roundscale_sh::<0>(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_roundscale_sh::<0>(1, a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_roundscale_round_sh() { + let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_roundscale_round_sh() { + let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b); + let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_roundscale_round_sh() { + let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b); + let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_scalef_ph() { + let a = _mm_set1_ph(1.); + let b = _mm_set1_ph(3.); + let r = _mm_scalef_ph(a, b); + let e = _mm_set1_ph(8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_scalef_ph() { + let a = _mm_set1_ph(1.); + let b = _mm_set1_ph(3.); + let src = _mm_set1_ph(2.); + let r = _mm_mask_scalef_ph(src, 0b01010101, a, b); + let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_scalef_ph() { + let a = _mm_set1_ph(1.); + let b = _mm_set1_ph(3.); + let r = _mm_maskz_scalef_ph(0b01010101, a, b); + let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_scalef_ph() { + let a = _mm256_set1_ph(1.); + let b = _mm256_set1_ph(3.); + let r = _mm256_scalef_ph(a, b); + let e = _mm256_set1_ph(8.0); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_scalef_ph() { + let a = _mm256_set1_ph(1.); + let b = _mm256_set1_ph(3.); + let src = _mm256_set1_ph(2.); + let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b); + let e = _mm256_set_ph( + 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_scalef_ph() { + let a = _mm256_set1_ph(1.); + let b = _mm256_set1_ph(3.); + let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b); + let e = _mm256_set_ph( + 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_scalef_ph() { + let a = _mm512_set1_ph(1.); + let b = _mm512_set1_ph(3.); + let r = _mm512_scalef_ph(a, b); + let e = _mm512_set1_ph(8.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_scalef_ph() { + let a = _mm512_set1_ph(1.); + let b = _mm512_set1_ph(3.); + let src = _mm512_set1_ph(2.); + let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, + 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_scalef_ph() { + let a = _mm512_set1_ph(1.); + let b = _mm512_set1_ph(3.); + let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, + 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_scalef_round_ph() { + let a = _mm512_set1_ph(1.); + let b = _mm512_set1_ph(3.); + let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_ph(8.0); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_scalef_round_ph() { + let a = _mm512_set1_ph(1.); + let b = _mm512_set1_ph(3.); + let src = _mm512_set1_ph(2.); + let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, + 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_scalef_round_ph() { + let a = _mm512_set1_ph(1.); + let b = _mm512_set1_ph(3.); + let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + b, + ); + let e = _mm512_set_ph( + 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, + 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_scalef_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_scalef_sh(a, b); + let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_scalef_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_scalef_sh(src, 0, a, b); + let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_scalef_sh(src, 1, a, b); + let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_scalef_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_maskz_scalef_sh(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_scalef_sh(1, a, b); + let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_scalef_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_scalef_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_scalef_round_sh() { + let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); + let r = + _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = + _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_reduce_ph() { + let a = _mm_set1_ph(1.25); + let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a); + let e = _mm_set1_ph(0.25); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_reduce_ph() { + let a = _mm_set1_ph(1.25); + let src = _mm_set1_ph(2.0); + let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a); + let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_reduce_ph() { + let a = _mm_set1_ph(1.25); + let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a); + let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_reduce_ph() { + let a = _mm256_set1_ph(1.25); + let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a); + let e = _mm256_set1_ph(0.25); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_reduce_ph() { + let a = _mm256_set1_ph(1.25); + let src = _mm256_set1_ph(2.0); + let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_reduce_ph() { + let a = _mm256_set1_ph(1.25); + let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a); + let e = _mm256_set_ph( + 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_reduce_ph() { + let a = _mm512_set1_ph(1.25); + let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a); + let e = _mm512_set1_ph(0.25); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_reduce_ph() { + let a = _mm512_set1_ph(1.25); + let src = _mm512_set1_ph(2.0); + let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, + 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_reduce_ph() { + let a = _mm512_set1_ph(1.25); + let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, + 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_reduce_round_ph() { + let a = _mm512_set1_ph(1.25); + let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a); + let e = _mm512_set1_ph(0.25); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_reduce_round_ph() { + let a = _mm512_set1_ph(1.25); + let src = _mm512_set1_ph(2.0); + let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, + 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_reduce_round_ph() { + let a = _mm512_set1_ph(1.25); + let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, + 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_reduce_sh() { + let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b); + let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_reduce_sh() { + let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b); + let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b); + let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_reduce_sh() { + let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b); + let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_reduce_round_sh() { + let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); + let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b); + let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_reduce_round_sh() { + let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); + let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); + let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + src, 0, a, b, + ); + let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( + src, 1, a, b, + ); + let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_reduce_round_sh() { + let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); + let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); + let r = + _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b); + let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + let r = + _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b); + let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_reduce_add_ph() { + let a = _mm_set1_ph(2.0); + let r = _mm_reduce_add_ph(a); + assert_eq!(r, 16.0); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_reduce_add_ph() { + let a = _mm256_set1_ph(2.0); + let r = _mm256_reduce_add_ph(a); + assert_eq!(r, 32.0); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_reduce_add_ph() { + let a = _mm512_set1_ph(2.0); + let r = _mm512_reduce_add_ph(a); + assert_eq!(r, 64.0); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_reduce_mul_ph() { + let a = _mm_set1_ph(2.0); + let r = _mm_reduce_mul_ph(a); + assert_eq!(r, 256.0); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_reduce_mul_ph() { + let a = _mm256_set1_ph(2.0); + let r = _mm256_reduce_mul_ph(a); + assert_eq!(r, 65536.0); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_reduce_mul_ph() { + let a = _mm512_set1_ph(2.0); + let r = _mm512_reduce_mul_ph(a); + assert_eq!(r, 16777216.0); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_reduce_max_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_reduce_max_ph(a); + assert_eq!(r, 8.0); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_reduce_max_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_reduce_max_ph(a); + assert_eq!(r, 16.0); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_reduce_max_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_reduce_max_ph(a); + assert_eq!(r, 32.0); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_reduce_min_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_reduce_min_ph(a); + assert_eq!(r, 1.0); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_reduce_min_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_reduce_min_ph(a); + assert_eq!(r, 1.0); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_reduce_min_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_reduce_min_ph(a); + assert_eq!(r, 1.0); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_fpclass_ph_mask() { + let a = _mm_set_ph( + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + ); + let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities + assert_eq!(r, 0b01100000); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_fpclass_ph_mask() { + let a = _mm_set_ph( + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + ); + let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a); + assert_eq!(r, 0b01000000); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_fpclass_ph_mask() { + let a = _mm256_set_ph( + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + ); + let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities + assert_eq!(r, 0b0110000001100000); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_fpclass_ph_mask() { + let a = _mm256_set_ph( + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + ); + let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a); + assert_eq!(r, 0b0100000001000000); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_fpclass_ph_mask() { + let a = _mm512_set_ph( + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + ); + let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities + assert_eq!(r, 0b01100000011000000110000001100000); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_fpclass_ph_mask() { + let a = _mm512_set_ph( + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + 1., + f16::INFINITY, + f16::NEG_INFINITY, + 0.0, + -0.0, + -2.0, + f16::NAN, + 5.9e-8, // Denormal + ); + let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a); + assert_eq!(r, 0b01000000010000000100000001000000); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_fpclass_sh_mask() { + let a = _mm_set_sh(f16::INFINITY); + let r = _mm_fpclass_sh_mask::<0x18>(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_fpclass_sh_mask() { + let a = _mm_set_sh(f16::INFINITY); + let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a); + assert_eq!(r, 0); + let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_blend_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0); + let r = _mm_mask_blend_ph(0b01010101, a, b); + let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_blend_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_set_ph( + -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, + -14.0, -15.0, -16.0, + ); + let r = _mm256_mask_blend_ph(0b0101010101010101, a, b); + let e = _mm256_set_ph( + 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0, + -16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_blend_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_set_ph( + -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, + -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0, + -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0, + ); + let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b); + let e = _mm512_set_ph( + 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0, + -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0, + 29.0, -30.0, 31.0, -32.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_permutex2var_ph() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14); + let r = _mm_permutex2var_ph(a, idx, b); + let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_permutex2var_ph() { + let a = _mm256_setr_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let b = _mm256_setr_ph( + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + let r = _mm256_permutex2var_ph(a, idx, b); + let e = _mm256_setr_ph( + 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0, + 31.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_permutex2var_ph() { + let a = _mm512_setr_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let b = _mm512_setr_ph( + 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, + 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, + 61.0, 62.0, 63.0, 64.0, + ); + let idx = _mm512_set_epi16( + 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20, + 18, 16, 14, 12, 10, 8, 6, 4, 2, 0, + ); + let r = _mm512_permutex2var_ph(a, idx, b); + let e = _mm512_setr_ph( + 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0, + 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0, + 59.0, 61.0, 63.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_permutexvar_ph() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7); + let r = _mm_permutexvar_ph(idx, a); + let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_permutexvar_ph() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + let r = _mm256_permutexvar_ph(idx, a); + let e = _mm256_setr_ph( + 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_permutexvar_ph() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let idx = _mm512_set_epi16( + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31, + ); + let r = _mm512_permutexvar_ph(idx, a); + let e = _mm512_setr_ph( + 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0, + 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, + 30.0, 32.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtepi16_ph() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm_cvtepi16_ph(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtepi16_ph() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_cvtepi16_ph(src, 0b01010101, a); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtepi16_ph() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm_maskz_cvtepi16_ph(0b01010101, a); + let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtepi16_ph() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm256_cvtepi16_ph(a); + let e = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtepi16_ph() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let src = _mm256_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + ); + let r = _mm256_mask_cvtepi16_ph(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi16_ph() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm256_maskz_cvtepi16_ph(0b0101010101010101, a); + let e = _mm256_set_ph( + 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtepi16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_cvtepi16_ph(a); + let e = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtepi16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let src = _mm512_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., + 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., + ); + let r = _mm512_mask_cvtepi16_ph(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., + 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtepi16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_maskz_cvtepi16_ph(0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., + 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundepi16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundepi16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let src = _mm512_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., + 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., + ); + let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., + 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundepi16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., + 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtepu16_ph() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm_cvtepu16_ph(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtepu16_ph() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_cvtepu16_ph(src, 0b01010101, a); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtepu16_ph() { + let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm_maskz_cvtepu16_ph(0b01010101, a); + let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtepu16_ph() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm256_cvtepu16_ph(a); + let e = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtepu16_ph() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let src = _mm256_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + ); + let r = _mm256_mask_cvtepu16_ph(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu16_ph() { + let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm256_maskz_cvtepu16_ph(0b0101010101010101, a); + let e = _mm256_set_ph( + 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtepu16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_cvtepu16_ph(a); + let e = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtepu16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let src = _mm512_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., + 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., + ); + let r = _mm512_mask_cvtepu16_ph(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., + 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtepu16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_maskz_cvtepu16_ph(0b01010101010101010101010101010101, a); + let e = _mm512_set_ph( + 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., + 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundepu16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundepu16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let src = _mm512_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., + 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., + ); + let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., + 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundepu16_ph() { + let a = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_ph( + 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., + 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., + ); + assert_eq_m512h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtepi32_ph() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_cvtepi32_ph(a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_ph() { + let a = _mm_set_epi32(1, 2, 3, 4); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_cvtepi32_ph(src, 0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtepi32_ph() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_maskz_cvtepi32_ph(0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtepi32_ph() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_cvtepi32_ph(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_ph() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm256_mask_cvtepi32_ph(src, 0b01010101, a); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi32_ph() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_cvtepi32_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtepi32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_cvtepi32_ph(a); + let e = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtepi32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let src = _mm256_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + ); + let r = _mm512_mask_cvtepi32_ph(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtepi32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a); + let e = _mm256_set_ph( + 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundepi32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundepi32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let src = _mm256_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + ); + let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b0101010101010101, + a, + ); + let e = _mm256_set_ph( + 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundepi32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + ); + let e = _mm256_set_ph( + 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvti32_sh() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvti32_sh(a, 10); + let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundi32_sh() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10); + let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtepu32_ph() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_cvtepu32_ph(a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtepu32_ph() { + let a = _mm_set_epi32(1, 2, 3, 4); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_cvtepu32_ph(src, 0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtepu32_ph() { + let a = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_maskz_cvtepu32_ph(0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtepu32_ph() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_cvtepu32_ph(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtepu32_ph() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm256_mask_cvtepu32_ph(src, 0b01010101, a); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu32_ph() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_cvtepu32_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtepu32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_cvtepu32_ph(a); + let e = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtepu32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let src = _mm256_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + ); + let r = _mm512_mask_cvtepu32_ph(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtepu32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a); + let e = _mm256_set_ph( + 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundepu32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundepu32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let src = _mm256_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + ); + let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b0101010101010101, + a, + ); + let e = _mm256_set_ph( + 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, + 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundepu32_ph() { + let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + ); + let e = _mm256_set_ph( + 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtu32_sh() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtu32_sh(a, 10); + let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundu32_sh() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10); + let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtepi64_ph() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_cvtepi64_ph(a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_ph() { + let a = _mm_set_epi64x(1, 2); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_cvtepi64_ph(src, 0b01, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtepi64_ph() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_maskz_cvtepi64_ph(0b01, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtepi64_ph() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_cvtepi64_ph(a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_ph() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm256_mask_cvtepi64_ph(src, 0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi64_ph() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_maskz_cvtepi64_ph(0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtepi64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvtepi64_ph(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtepi64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm512_mask_cvtepi64_ph(src, 0b01010101, a); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtepi64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvtepi64_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundepi64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundepi64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0b01010101, a, + ); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundepi64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101, a, + ); + let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtepu64_ph() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_cvtepu64_ph(a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtepu64_ph() { + let a = _mm_set_epi64x(1, 2); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_cvtepu64_ph(src, 0b01, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtepu64_ph() { + let a = _mm_set_epi64x(1, 2); + let r = _mm_maskz_cvtepu64_ph(0b01, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtepu64_ph() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_cvtepu64_ph(a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtepu64_ph() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm256_mask_cvtepu64_ph(src, 0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu64_ph() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_maskz_cvtepu64_ph(0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtepu64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvtepu64_ph(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtepu64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm512_mask_cvtepu64_ph(src, 0b01010101, a); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtepu64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvtepu64_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundepu64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundepu64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0b01010101, a, + ); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundepu64_ph() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101, a, + ); + let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtxps_ph() { + let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_cvtxps_ph(a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtxps_ph() { + let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_cvtxps_ph(src, 0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16., 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtxps_ph() { + let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_maskz_cvtxps_ph(0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtxps_ph() { + let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_cvtxps_ph(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtxps_ph() { + let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm256_mask_cvtxps_ph(src, 0b01010101, a); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtxps_ph() { + let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_maskz_cvtxps_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtxps_ph() { + let a = _mm512_set_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvtxps_ph(a); + let e = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtxps_ph() { + let a = _mm512_set_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm256_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + ); + let r = _mm512_mask_cvtxps_ph(src, 0b0101010101010101, a); + let e = _mm256_set_ph( + 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtxps_ph() { + let a = _mm512_set_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvtxps_ph(0b0101010101010101, a); + let e = _mm256_set_ph( + 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtx_roundps_ph() { + let a = _mm512_set_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtx_roundps_ph() { + let a = _mm512_set_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm256_set_ph( + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + ); + let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b0101010101010101, + a, + ); + let e = _mm256_set_ph( + 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, + 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtx_roundps_ph() { + let a = _mm512_set_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + ); + let e = _mm256_set_ph( + 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, + ); + assert_eq_m256h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtss_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_cvtss_sh(a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cvtss_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); + let r = _mm_mask_cvtss_sh(src, 0, a, b); + let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + let r = _mm_mask_cvtss_sh(src, 1, a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cvtss_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_maskz_cvtss_sh(0, a, b); + let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + let r = _mm_maskz_cvtss_sh(1, a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundss_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cvt_roundss_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); + let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cvt_roundss_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = + _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + let r = + _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtpd_ph() { + let a = _mm_set_pd(1.0, 2.0); + let r = _mm_cvtpd_ph(a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtpd_ph() { + let a = _mm_set_pd(1.0, 2.0); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm_mask_cvtpd_ph(src, 0b01, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_ph() { + let a = _mm_set_pd(1.0, 2.0); + let r = _mm_maskz_cvtpd_ph(0b01, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtpd_ph() { + let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); + let r = _mm256_cvtpd_ph(a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_ph() { + let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm256_mask_cvtpd_ph(src, 0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_ph() { + let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); + let r = _mm256_maskz_cvtpd_ph(0b0101, a); + let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtpd_ph() { + let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvtpd_ph(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtpd_ph() { + let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm512_mask_cvtpd_ph(src, 0b01010101, a); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtpd_ph() { + let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvtpd_ph(0b01010101, a); + let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundpd_ph() { + let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundpd_ph() { + let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0b01010101, a, + ); + let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundpd_ph() { + let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101, a, + ); + let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsd_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_pd(1.0, 2.0); + let r = _mm_cvtsd_sh(a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cvtsd_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_pd(1.0, 2.0); + let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); + let r = _mm_mask_cvtsd_sh(src, 0, a, b); + let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + let r = _mm_mask_cvtsd_sh(src, 1, a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cvtsd_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_pd(1.0, 2.0); + let r = _mm_maskz_cvtsd_sh(0, a, b); + let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + let r = _mm_maskz_cvtsd_sh(1, a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundsd_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_pd(1.0, 2.0); + let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cvt_roundsd_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_pd(1.0, 2.0); + let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); + let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, b, + ); + let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 1, a, b, + ); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cvt_roundsd_sh() { + let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); + let b = _mm_setr_pd(1.0, 2.0); + let r = + _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + let r = + _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); + let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtph_epi16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvttph_epi16(a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtph_epi16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); + let r = _mm_mask_cvttph_epi16(src, 0b01010101, a); + let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtph_epi16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_maskz_cvttph_epi16(0b01010101, a); + let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtph_epi16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_cvttph_epi16(a); + let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtph_epi16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm256_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a); + let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtph_epi16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a); + let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_cvttph_epi16(a); + let e = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let src = _mm512_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + ); + let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_epi16( + 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, + 24, 34, 26, 36, 28, 38, 30, 40, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a); + let e = _mm512_set_epi16( + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, + 0, 28, 0, 30, 0, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let src = _mm512_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + ); + let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_epi16( + 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, + 24, 34, 26, 36, 28, 38, 30, 40, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_epi16( + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, + 0, 28, 0, 30, 0, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtph_epu16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvttph_epu16(a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtph_epu16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); + let r = _mm_mask_cvttph_epu16(src, 0b01010101, a); + let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtph_epu16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_maskz_cvttph_epu16(0b01010101, a); + let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtph_epu16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_cvttph_epu16(a); + let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtph_epu16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm256_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a); + let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtph_epu16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a); + let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_cvttph_epu16(a); + let e = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let src = _mm512_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + ); + let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_epi16( + 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, + 24, 34, 26, 36, 28, 38, 30, 40, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a); + let e = _mm512_set_epi16( + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, + 0, 28, 0, 30, 0, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let src = _mm512_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + ); + let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_epi16( + 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, + 24, 34, 26, 36, 28, 38, 30, 40, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_epi16( + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, + 0, 28, 0, 30, 0, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvttph_epi16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvttph_epi16(a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvttph_epi16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); + let r = _mm_mask_cvttph_epi16(src, 0b01010101, a); + let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvttph_epi16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_maskz_cvttph_epi16(0b01010101, a); + let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvttph_epi16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_cvttph_epi16(a); + let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvttph_epi16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm256_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a); + let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvttph_epi16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a); + let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvttph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_cvttph_epi16(a); + let e = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvttph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let src = _mm512_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + ); + let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_epi16( + 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, + 24, 34, 26, 36, 28, 38, 30, 40, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvttph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a); + let e = _mm512_set_epi16( + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, + 0, 28, 0, 30, 0, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtt_roundph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtt_roundph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let src = _mm512_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + ); + let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_epi16( + 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, + 24, 34, 26, 36, 28, 38, 30, 40, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtt_roundph_epi16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_epi16( + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, + 0, 28, 0, 30, 0, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvttph_epu16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvttph_epu16(a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvttph_epu16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); + let r = _mm_mask_cvttph_epu16(src, 0b01010101, a); + let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvttph_epu16() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_maskz_cvttph_epu16(0b01010101, a); + let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvttph_epu16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_cvttph_epu16(a); + let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvttph_epu16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm256_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a); + let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvttph_epu16() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a); + let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvttph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_cvttph_epu16(a); + let e = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvttph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let src = _mm512_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + ); + let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a); + let e = _mm512_set_epi16( + 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, + 24, 34, 26, 36, 28, 38, 30, 40, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvttph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a); + let e = _mm512_set_epi16( + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, + 0, 28, 0, 30, 0, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtt_roundph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi16( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtt_roundph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let src = _mm512_set_epi16( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + ); + let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>( + src, + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_epi16( + 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, + 24, 34, 26, 36, 28, 38, 30, 40, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtt_roundph_epu16() { + let a = _mm512_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>( + 0b01010101010101010101010101010101, + a, + ); + let e = _mm512_set_epi16( + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, + 0, 28, 0, 30, 0, 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtph_epi32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_cvtph_epi32(a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtph_epi32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let src = _mm_set_epi32(10, 11, 12, 13); + let r = _mm_mask_cvtph_epi32(src, 0b0101, a); + let e = _mm_set_epi32(10, 2, 12, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtph_epi32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_maskz_cvtph_epi32(0b0101, a); + let e = _mm_set_epi32(0, 2, 0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtph_epi32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_cvtph_epi32(a); + let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtph_epi32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); + let r = _mm256_mask_cvtph_epi32(src, 0b01010101, a); + let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtph_epi32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_maskz_cvtph_epi32(0b01010101, a); + let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvtph_epi32(a); + let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm512_set_epi32( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm512_mask_cvtph_epi32(src, 0b0101010101010101, a); + let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvtph_epi32(0b0101010101010101, a); + let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm512_set_epi32( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b0101010101010101, + a, + ); + let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + ); + let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsh_i32() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsh_i32(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundsh_i32() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtph_epu32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_cvtph_epu32(a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtph_epu32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let src = _mm_set_epi32(10, 11, 12, 13); + let r = _mm_mask_cvtph_epu32(src, 0b0101, a); + let e = _mm_set_epi32(10, 2, 12, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtph_epu32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_maskz_cvtph_epu32(0b0101, a); + let e = _mm_set_epi32(0, 2, 0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtph_epu32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_cvtph_epu32(a); + let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtph_epu32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); + let r = _mm256_mask_cvtph_epu32(src, 0b01010101, a); + let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtph_epu32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_maskz_cvtph_epu32(0b01010101, a); + let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvtph_epu32(a); + let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm512_set_epi32( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm512_mask_cvtph_epu32(src, 0b0101010101010101, a); + let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvtph_epu32(0b0101010101010101, a); + let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm512_set_epi32( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, + 0b0101010101010101, + a, + ); + let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b0101010101010101, + a, + ); + let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsh_u32() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsh_u32(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundsh_u32() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvttph_epi32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_cvttph_epi32(a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvttph_epi32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let src = _mm_set_epi32(10, 11, 12, 13); + let r = _mm_mask_cvttph_epi32(src, 0b0101, a); + let e = _mm_set_epi32(10, 2, 12, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvttph_epi32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_maskz_cvttph_epi32(0b0101, a); + let e = _mm_set_epi32(0, 2, 0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvttph_epi32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_cvttph_epi32(a); + let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvttph_epi32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); + let r = _mm256_mask_cvttph_epi32(src, 0b01010101, a); + let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvttph_epi32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_maskz_cvttph_epi32(0b01010101, a); + let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvttph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvttph_epi32(a); + let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvttph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm512_set_epi32( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm512_mask_cvttph_epi32(src, 0b0101010101010101, a); + let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvttph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvttph_epi32(0b0101010101010101, a); + let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtt_roundph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtt_roundph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm512_set_epi32( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a); + let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtt_roundph_epi32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a); + let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvttsh_i32() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvttsh_i32(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtt_roundsh_i32() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvttph_epu32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_cvttph_epu32(a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvttph_epu32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let src = _mm_set_epi32(10, 11, 12, 13); + let r = _mm_mask_cvttph_epu32(src, 0b0101, a); + let e = _mm_set_epi32(10, 2, 12, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvttph_epu32() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_maskz_cvttph_epu32(0b0101, a); + let e = _mm_set_epi32(0, 2, 0, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvttph_epu32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_cvttph_epu32(a); + let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvttph_epu32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); + let r = _mm256_mask_cvttph_epu32(src, 0b01010101, a); + let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvttph_epu32() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_maskz_cvttph_epu32(0b01010101, a); + let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvttph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvttph_epu32(a); + let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvttph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm512_set_epi32( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm512_mask_cvttph_epu32(src, 0b0101010101010101, a); + let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvttph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvttph_epu32(0b0101010101010101, a); + let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtt_roundph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtt_roundph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let src = _mm512_set_epi32( + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ); + let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a); + let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtt_roundph_epu32() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a); + let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvttsh_u32() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvttsh_u32(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtt_roundsh_u32() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtph_epi64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_cvtph_epi64(a); + let e = _mm_set_epi64x(1, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtph_epi64() { + let src = _mm_set_epi64x(3, 4); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_mask_cvtph_epi64(src, 0b01, a); + let e = _mm_set_epi64x(3, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtph_epi64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_maskz_cvtph_epi64(0b01, a); + let e = _mm_set_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtph_epi64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_cvtph_epi64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtph_epi64() { + let src = _mm256_set_epi64x(5, 6, 7, 8); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_mask_cvtph_epi64(src, 0b0101, a); + let e = _mm256_set_epi64x(5, 2, 7, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtph_epi64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_maskz_cvtph_epi64(0b0101, a); + let e = _mm256_set_epi64x(0, 2, 0, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtph_epi64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvtph_epi64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtph_epi64() { + let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvtph_epi64(src, 0b01010101, a); + let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtph_epi64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvtph_epi64(0b01010101, a); + let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundph_epi64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundph_epi64() { + let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0b01010101, a, + ); + let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundph_epi64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101, a, + ); + let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtph_epu64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_cvtph_epu64(a); + let e = _mm_set_epi64x(1, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtph_epu64() { + let src = _mm_set_epi64x(3, 4); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_mask_cvtph_epu64(src, 0b01, a); + let e = _mm_set_epi64x(3, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtph_epu64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_maskz_cvtph_epu64(0b01, a); + let e = _mm_set_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtph_epu64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_cvtph_epu64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtph_epu64() { + let src = _mm256_set_epi64x(5, 6, 7, 8); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_mask_cvtph_epu64(src, 0b0101, a); + let e = _mm256_set_epi64x(5, 2, 7, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtph_epu64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_maskz_cvtph_epu64(0b0101, a); + let e = _mm256_set_epi64x(0, 2, 0, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtph_epu64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvtph_epu64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtph_epu64() { + let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvtph_epu64(src, 0b01010101, a); + let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtph_epu64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvtph_epu64(0b01010101, a); + let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundph_epu64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundph_epu64() { + let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0b01010101, a, + ); + let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundph_epu64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b01010101, a, + ); + let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvttph_epi64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_cvttph_epi64(a); + let e = _mm_set_epi64x(1, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvttph_epi64() { + let src = _mm_set_epi64x(3, 4); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_mask_cvttph_epi64(src, 0b01, a); + let e = _mm_set_epi64x(3, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvttph_epi64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_maskz_cvttph_epi64(0b01, a); + let e = _mm_set_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvttph_epi64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_cvttph_epi64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvttph_epi64() { + let src = _mm256_set_epi64x(5, 6, 7, 8); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_mask_cvttph_epi64(src, 0b0101, a); + let e = _mm256_set_epi64x(5, 2, 7, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvttph_epi64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_maskz_cvttph_epi64(0b0101, a); + let e = _mm256_set_epi64x(0, 2, 0, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvttph_epi64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvttph_epi64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvttph_epi64() { + let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvttph_epi64(src, 0b01010101, a); + let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvttph_epi64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvttph_epi64(0b01010101, a); + let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtt_roundph_epi64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtt_roundph_epi64() { + let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a); + let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtt_roundph_epi64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(0b01010101, a); + let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvttph_epu64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_cvttph_epu64(a); + let e = _mm_set_epi64x(1, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvttph_epu64() { + let src = _mm_set_epi64x(3, 4); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_mask_cvttph_epu64(src, 0b01, a); + let e = _mm_set_epi64x(3, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvttph_epu64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_maskz_cvttph_epu64(0b01, a); + let e = _mm_set_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvttph_epu64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_cvttph_epu64(a); + let e = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvttph_epu64() { + let src = _mm256_set_epi64x(5, 6, 7, 8); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_mask_cvttph_epu64(src, 0b0101, a); + let e = _mm256_set_epi64x(5, 2, 7, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvttph_epu64() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_maskz_cvttph_epu64(0b0101, a); + let e = _mm256_set_epi64x(0, 2, 0, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvttph_epu64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvttph_epu64(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvttph_epu64() { + let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvttph_epu64(src, 0b01010101, a); + let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvttph_epu64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvttph_epu64(0b01010101, a); + let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtt_roundph_epu64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtt_roundph_epu64() { + let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a); + let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtt_roundph_epu64() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(0b01010101, a); + let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtxph_ps() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_cvtxph_ps(a); + let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtxph_ps() { + let src = _mm_set_ps(10.0, 11.0, 12.0, 13.0); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_mask_cvtxph_ps(src, 0b0101, a); + let e = _mm_set_ps(10.0, 2.0, 12.0, 4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtxph_ps() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm_maskz_cvtxph_ps(0b0101, a); + let e = _mm_set_ps(0.0, 2.0, 0.0, 4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtxph_ps() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_cvtxph_ps(a); + let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtxph_ps() { + let src = _mm256_set_ps(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_mask_cvtxph_ps(src, 0b01010101, a); + let e = _mm256_set_ps(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtxph_ps() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_maskz_cvtxph_ps(0b01010101, a); + let e = _mm256_set_ps(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtxph_ps() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvtxph_ps(a); + let e = _mm512_set_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtxph_ps() { + let src = _mm512_set_ps( + 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, + 24.0, 25.0, + ); + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_mask_cvtxph_ps(src, 0b0101010101010101, a); + let e = _mm512_set_ps( + 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, + 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtxph_ps() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvtxph_ps(0b0101010101010101, a); + let e = _mm512_set_ps( + 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtx_roundph_ps() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_ps( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtx_roundph_ps() { + let src = _mm512_set_ps( + 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, + 24.0, 25.0, + ); + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a); + let e = _mm512_set_ps( + 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, + 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtx_roundph_ps() { + let a = _mm256_set_ph( + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(0b0101010101010101, a); + let e = _mm512_set_ps( + 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsh_ss() { + let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_cvtsh_ss(a, b); + let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cvtsh_ss() { + let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0); + let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_mask_cvtsh_ss(src, 0, a, b); + let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + let r = _mm_mask_cvtsh_ss(src, 1, a, b); + let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cvtsh_ss() { + let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_maskz_cvtsh_ss(0, a, b); + let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + let r = _mm_maskz_cvtsh_ss(1, a, b); + let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundsh_ss() { + let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b); + let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cvt_roundsh_ss() { + let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0); + let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 0, a, b); + let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 1, a, b); + let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cvt_roundsh_ss() { + let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(0, a, b); + let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(1, a, b); + let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtph_pd() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_cvtph_pd(a); + let e = _mm_set_pd(1.0, 2.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_mask_cvtph_pd() { + let src = _mm_set_pd(10.0, 11.0); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_mask_cvtph_pd(src, 0b01, a); + let e = _mm_set_pd(10.0, 2.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_maskz_cvtph_pd() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); + let r = _mm_maskz_cvtph_pd(0b01, a); + let e = _mm_set_pd(0.0, 2.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_cvtph_pd() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_cvtph_pd(a); + let e = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_mask_cvtph_pd() { + let src = _mm256_set_pd(10.0, 11.0, 12.0, 13.0); + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_mask_cvtph_pd(src, 0b0101, a); + let e = _mm256_set_pd(10.0, 2.0, 12.0, 4.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm256_maskz_cvtph_pd() { + let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); + let r = _mm256_maskz_cvtph_pd(0b0101, a); + let e = _mm256_set_pd(0.0, 2.0, 0.0, 4.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtph_pd() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvtph_pd(a); + let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvtph_pd() { + let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvtph_pd(src, 0b01010101, a); + let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvtph_pd() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvtph_pd(0b01010101, a); + let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvt_roundph_pd() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a); + let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_mask_cvt_roundph_pd() { + let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0); + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, 0b01010101, a); + let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_maskz_cvt_roundph_pd() { + let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(0b01010101, a); + let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsh_sd() { + let a = _mm_setr_pd(2.0, 20.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_cvtsh_sd(a, b); + let e = _mm_setr_pd(1.0, 20.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cvtsh_sd() { + let src = _mm_setr_pd(3.0, 11.0); + let a = _mm_setr_pd(2.0, 20.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_mask_cvtsh_sd(src, 0, a, b); + let e = _mm_setr_pd(3.0, 20.0); + assert_eq_m128d(r, e); + let r = _mm_mask_cvtsh_sd(src, 1, a, b); + let e = _mm_setr_pd(1.0, 20.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cvtsh_sd() { + let a = _mm_setr_pd(2.0, 20.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_maskz_cvtsh_sd(0, a, b); + let e = _mm_setr_pd(0.0, 20.0); + assert_eq_m128d(r, e); + let r = _mm_maskz_cvtsh_sd(1, a, b); + let e = _mm_setr_pd(1.0, 20.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundsh_sd() { + let a = _mm_setr_pd(2.0, 20.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b); + let e = _mm_setr_pd(1.0, 20.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_mask_cvt_roundsh_sd() { + let src = _mm_setr_pd(3.0, 11.0); + let a = _mm_setr_pd(2.0, 20.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 0, a, b); + let e = _mm_setr_pd(3.0, 20.0); + assert_eq_m128d(r, e); + let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 1, a, b); + let e = _mm_setr_pd(1.0, 20.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_maskz_cvt_roundsh_sd() { + let a = _mm_setr_pd(2.0, 20.0); + let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); + let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(0, a, b); + let e = _mm_setr_pd(0.0, 20.0); + assert_eq_m128d(r, e); + let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(1, a, b); + let e = _mm_setr_pd(1.0, 20.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsh_h() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsh_h(a); + assert_eq!(r, 1.0); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm256_cvtsh_h() { + let a = _mm256_setr_ph( + 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ); + let r = _mm256_cvtsh_h(a); + assert_eq!(r, 1.0); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm512_cvtsh_h() { + let a = _mm512_setr_ph( + 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, + 31.0, 32.0, + ); + let r = _mm512_cvtsh_h(a); + assert_eq!(r, 1.0); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsi128_si16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm_cvtsi128_si16(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsi16_si128() { + let a = 1; + let r = _mm_cvtsi16_si128(a); + let e = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs b/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs new file mode 100644 index 000000000000..7c9d07f69095 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs @@ -0,0 +1,693 @@ +use crate::core_arch::x86::*; +use crate::intrinsics::simd::simd_select_bitmask; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52hi_epu64) +#[inline] +#[target_feature(enable = "avx512ifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { vpmadd52huq_512(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are copied +/// from `k` when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52hi_epu64) +#[inline] +#[target_feature(enable = "avx512ifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { + unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52hi_epu64) +#[inline] +#[target_feature(enable = "avx512ifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52lo_epu64) +#[inline] +#[target_feature(enable = "avx512ifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { vpmadd52luq_512(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are copied +/// from `k` when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52lo_epu64) +#[inline] +#[target_feature(enable = "avx512ifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { + unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52lo_epu64) +#[inline] +#[target_feature(enable = "avx512ifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52hi_avx_epu64) +#[inline] +#[target_feature(enable = "avxifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { vpmadd52huq_256(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52hi_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { vpmadd52huq_256(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are copied +/// from `k` when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52hi_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52hi_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52lo_avx_epu64) +#[inline] +#[target_feature(enable = "avxifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { vpmadd52luq_256(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52lo_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { vpmadd52luq_256(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are copied +/// from `k` when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52lo_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52lo_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52hi_avx_epu64) +#[inline] +#[target_feature(enable = "avxifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { vpmadd52huq_128(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52hi_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { vpmadd52huq_128(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are copied +/// from `k` when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52hi_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52hi_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52huq))] +pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52lo_avx_epu64) +#[inline] +#[target_feature(enable = "avxifma")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { vpmadd52luq_128(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52lo_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { vpmadd52luq_128(a, b, c) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are copied +/// from `k` when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52lo_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) } +} + +/// Multiply packed unsigned 52-bit integers in each 64-bit element of +/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit +/// unsigned integer from the intermediate result with the +/// corresponding unsigned 64-bit integer in `a`, and store the +/// results in `dst` using writemask `k` (elements are zeroed +/// out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52lo_epu64) +#[inline] +#[target_feature(enable = "avx512ifma,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmadd52luq))] +pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"] + fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i; + #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"] + fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i; + #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"] + fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i; + #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"] + fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i; + #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"] + fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i; + #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"] + fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i; +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + const K: __mmask8 = 0b01101101; + + #[simd_test(enable = "avx512ifma")] + unsafe fn test_mm512_madd52hi_epu64() { + let a = _mm512_set1_epi64(10 << 40); + let b = _mm512_set1_epi64((11 << 40) + 4); + let c = _mm512_set1_epi64((12 << 40) + 3); + + let actual = _mm512_madd52hi_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let expected = _mm512_set1_epi64(11030549757952); + + assert_eq_m512i(expected, actual); + } + + #[simd_test(enable = "avx512ifma")] + unsafe fn test_mm512_mask_madd52hi_epu64() { + let a = _mm512_set1_epi64(10 << 40); + let b = _mm512_set1_epi64((11 << 40) + 4); + let c = _mm512_set1_epi64((12 << 40) + 3); + + let actual = _mm512_mask_madd52hi_epu64(a, K, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let mut expected = _mm512_set1_epi64(11030549757952); + expected = _mm512_mask_blend_epi64(K, a, expected); + + assert_eq_m512i(expected, actual); + } + + #[simd_test(enable = "avx512ifma")] + unsafe fn test_mm512_maskz_madd52hi_epu64() { + let a = _mm512_set1_epi64(10 << 40); + let b = _mm512_set1_epi64((11 << 40) + 4); + let c = _mm512_set1_epi64((12 << 40) + 3); + + let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let mut expected = _mm512_set1_epi64(11030549757952); + expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected); + + assert_eq_m512i(expected, actual); + } + + #[simd_test(enable = "avx512ifma")] + unsafe fn test_mm512_madd52lo_epu64() { + let a = _mm512_set1_epi64(10 << 40); + let b = _mm512_set1_epi64((11 << 40) + 4); + let c = _mm512_set1_epi64((12 << 40) + 3); + + let actual = _mm512_madd52lo_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let expected = _mm512_set1_epi64(100055558127628); + + assert_eq_m512i(expected, actual); + } + + #[simd_test(enable = "avx512ifma")] + unsafe fn test_mm512_mask_madd52lo_epu64() { + let a = _mm512_set1_epi64(10 << 40); + let b = _mm512_set1_epi64((11 << 40) + 4); + let c = _mm512_set1_epi64((12 << 40) + 3); + + let actual = _mm512_mask_madd52lo_epu64(a, K, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let mut expected = _mm512_set1_epi64(100055558127628); + expected = _mm512_mask_blend_epi64(K, a, expected); + + assert_eq_m512i(expected, actual); + } + + #[simd_test(enable = "avx512ifma")] + unsafe fn test_mm512_maskz_madd52lo_epu64() { + let a = _mm512_set1_epi64(10 << 40); + let b = _mm512_set1_epi64((11 << 40) + 4); + let c = _mm512_set1_epi64((12 << 40) + 3); + + let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let mut expected = _mm512_set1_epi64(100055558127628); + expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected); + + assert_eq_m512i(expected, actual); + } + + #[simd_test(enable = "avxifma")] + unsafe fn test_mm256_madd52hi_avx_epu64() { + let a = _mm256_set1_epi64x(10 << 40); + let b = _mm256_set1_epi64x((11 << 40) + 4); + let c = _mm256_set1_epi64x((12 << 40) + 3); + + let actual = _mm256_madd52hi_avx_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let expected = _mm256_set1_epi64x(11030549757952); + + assert_eq_m256i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm256_madd52hi_epu64() { + let a = _mm256_set1_epi64x(10 << 40); + let b = _mm256_set1_epi64x((11 << 40) + 4); + let c = _mm256_set1_epi64x((12 << 40) + 3); + + let actual = _mm256_madd52hi_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let expected = _mm256_set1_epi64x(11030549757952); + + assert_eq_m256i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm256_mask_madd52hi_epu64() { + let a = _mm256_set1_epi64x(10 << 40); + let b = _mm256_set1_epi64x((11 << 40) + 4); + let c = _mm256_set1_epi64x((12 << 40) + 3); + + let actual = _mm256_mask_madd52hi_epu64(a, K, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let mut expected = _mm256_set1_epi64x(11030549757952); + expected = _mm256_mask_blend_epi64(K, a, expected); + + assert_eq_m256i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm256_maskz_madd52hi_epu64() { + let a = _mm256_set1_epi64x(10 << 40); + let b = _mm256_set1_epi64x((11 << 40) + 4); + let c = _mm256_set1_epi64x((12 << 40) + 3); + + let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let mut expected = _mm256_set1_epi64x(11030549757952); + expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected); + + assert_eq_m256i(expected, actual); + } + + #[simd_test(enable = "avxifma")] + unsafe fn test_mm256_madd52lo_avx_epu64() { + let a = _mm256_set1_epi64x(10 << 40); + let b = _mm256_set1_epi64x((11 << 40) + 4); + let c = _mm256_set1_epi64x((12 << 40) + 3); + + let actual = _mm256_madd52lo_avx_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let expected = _mm256_set1_epi64x(100055558127628); + + assert_eq_m256i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm256_madd52lo_epu64() { + let a = _mm256_set1_epi64x(10 << 40); + let b = _mm256_set1_epi64x((11 << 40) + 4); + let c = _mm256_set1_epi64x((12 << 40) + 3); + + let actual = _mm256_madd52lo_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let expected = _mm256_set1_epi64x(100055558127628); + + assert_eq_m256i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm256_mask_madd52lo_epu64() { + let a = _mm256_set1_epi64x(10 << 40); + let b = _mm256_set1_epi64x((11 << 40) + 4); + let c = _mm256_set1_epi64x((12 << 40) + 3); + + let actual = _mm256_mask_madd52lo_epu64(a, K, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let mut expected = _mm256_set1_epi64x(100055558127628); + expected = _mm256_mask_blend_epi64(K, a, expected); + + assert_eq_m256i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm256_maskz_madd52lo_epu64() { + let a = _mm256_set1_epi64x(10 << 40); + let b = _mm256_set1_epi64x((11 << 40) + 4); + let c = _mm256_set1_epi64x((12 << 40) + 3); + + let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let mut expected = _mm256_set1_epi64x(100055558127628); + expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected); + + assert_eq_m256i(expected, actual); + } + + #[simd_test(enable = "avxifma")] + unsafe fn test_mm_madd52hi_avx_epu64() { + let a = _mm_set1_epi64x(10 << 40); + let b = _mm_set1_epi64x((11 << 40) + 4); + let c = _mm_set1_epi64x((12 << 40) + 3); + + let actual = _mm_madd52hi_avx_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let expected = _mm_set1_epi64x(11030549757952); + + assert_eq_m128i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm_madd52hi_epu64() { + let a = _mm_set1_epi64x(10 << 40); + let b = _mm_set1_epi64x((11 << 40) + 4); + let c = _mm_set1_epi64x((12 << 40) + 3); + + let actual = _mm_madd52hi_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let expected = _mm_set1_epi64x(11030549757952); + + assert_eq_m128i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm_mask_madd52hi_epu64() { + let a = _mm_set1_epi64x(10 << 40); + let b = _mm_set1_epi64x((11 << 40) + 4); + let c = _mm_set1_epi64x((12 << 40) + 3); + + let actual = _mm_mask_madd52hi_epu64(a, K, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let mut expected = _mm_set1_epi64x(11030549757952); + expected = _mm_mask_blend_epi64(K, a, expected); + + assert_eq_m128i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm_maskz_madd52hi_epu64() { + let a = _mm_set1_epi64x(10 << 40); + let b = _mm_set1_epi64x((11 << 40) + 4); + let c = _mm_set1_epi64x((12 << 40) + 3); + + let actual = _mm_maskz_madd52hi_epu64(K, a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) + let mut expected = _mm_set1_epi64x(11030549757952); + expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected); + + assert_eq_m128i(expected, actual); + } + + #[simd_test(enable = "avxifma")] + unsafe fn test_mm_madd52lo_avx_epu64() { + let a = _mm_set1_epi64x(10 << 40); + let b = _mm_set1_epi64x((11 << 40) + 4); + let c = _mm_set1_epi64x((12 << 40) + 3); + + let actual = _mm_madd52lo_avx_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let expected = _mm_set1_epi64x(100055558127628); + + assert_eq_m128i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm_madd52lo_epu64() { + let a = _mm_set1_epi64x(10 << 40); + let b = _mm_set1_epi64x((11 << 40) + 4); + let c = _mm_set1_epi64x((12 << 40) + 3); + + let actual = _mm_madd52lo_epu64(a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let expected = _mm_set1_epi64x(100055558127628); + + assert_eq_m128i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm_mask_madd52lo_epu64() { + let a = _mm_set1_epi64x(10 << 40); + let b = _mm_set1_epi64x((11 << 40) + 4); + let c = _mm_set1_epi64x((12 << 40) + 3); + + let actual = _mm_mask_madd52lo_epu64(a, K, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let mut expected = _mm_set1_epi64x(100055558127628); + expected = _mm_mask_blend_epi64(K, a, expected); + + assert_eq_m128i(expected, actual); + } + + #[simd_test(enable = "avx512ifma,avx512vl")] + unsafe fn test_mm_maskz_madd52lo_epu64() { + let a = _mm_set1_epi64x(10 << 40); + let b = _mm_set1_epi64x((11 << 40) + 4); + let c = _mm_set1_epi64x((12 << 40) + 3); + + let actual = _mm_maskz_madd52lo_epu64(K, a, b, c); + + // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) + let mut expected = _mm_set1_epi64x(100055558127628); + expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected); + + assert_eq_m128i(expected, actual); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vbmi.rs b/library/stdarch/crates/core_arch/src/x86/avx512vbmi.rs new file mode 100644 index 000000000000..3527ccc9e44a --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512vbmi.rs @@ -0,0 +1,960 @@ +use crate::core_arch::{simd::*, x86::*}; +use crate::intrinsics::simd::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b +pub fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64())) } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2b))] +pub fn _mm512_mask_permutex2var_epi8( + a: __m512i, + k: __mmask64, + idx: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); + transmute(simd_select_bitmask(k, permute, a.as_i8x64())) + } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b +pub fn _mm512_maskz_permutex2var_epi8( + k: __mmask64, + a: __m512i, + idx: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); + transmute(simd_select_bitmask(k, permute, i8x64::ZERO)) + } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2b))] +pub fn _mm512_mask2_permutex2var_epi8( + a: __m512i, + idx: __m512i, + k: __mmask64, + b: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); + transmute(simd_select_bitmask(k, permute, idx.as_i8x64())) + } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b +pub fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32())) } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2b))] +pub fn _mm256_mask_permutex2var_epi8( + a: __m256i, + k: __mmask32, + idx: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); + transmute(simd_select_bitmask(k, permute, a.as_i8x32())) + } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b +pub fn _mm256_maskz_permutex2var_epi8( + k: __mmask32, + a: __m256i, + idx: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); + transmute(simd_select_bitmask(k, permute, i8x32::ZERO)) + } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2b))] +pub fn _mm256_mask2_permutex2var_epi8( + a: __m256i, + idx: __m256i, + k: __mmask32, + b: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); + transmute(simd_select_bitmask(k, permute, idx.as_i8x32())) + } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b +pub fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16())) } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermt2b))] +pub fn _mm_mask_permutex2var_epi8(a: __m128i, k: __mmask16, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); + transmute(simd_select_bitmask(k, permute, a.as_i8x16())) + } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b +pub fn _mm_maskz_permutex2var_epi8(k: __mmask16, a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); + transmute(simd_select_bitmask(k, permute, i8x16::ZERO)) + } +} + +/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermi2b))] +pub fn _mm_mask2_permutex2var_epi8(a: __m128i, idx: __m128i, k: __mmask16, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); + transmute(simd_select_bitmask(k, permute, idx.as_i8x16())) + } +} + +/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermb))] +pub fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermb(a.as_i8x64(), idx.as_i8x64())) } +} + +/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermb))] +pub fn _mm512_mask_permutexvar_epi8( + src: __m512i, + k: __mmask64, + idx: __m512i, + a: __m512i, +) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64(); + transmute(simd_select_bitmask(k, permute, src.as_i8x64())) + } +} + +/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermb))] +pub fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64(); + transmute(simd_select_bitmask(k, permute, i8x64::ZERO)) + } +} + +/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermb))] +pub fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i { + unsafe { transmute(vpermb256(a.as_i8x32(), idx.as_i8x32())) } +} + +/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermb))] +pub fn _mm256_mask_permutexvar_epi8( + src: __m256i, + k: __mmask32, + idx: __m256i, + a: __m256i, +) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32(); + transmute(simd_select_bitmask(k, permute, src.as_i8x32())) + } +} + +/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermb))] +pub fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32(); + transmute(simd_select_bitmask(k, permute, i8x32::ZERO)) + } +} + +/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermb))] +pub fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i { + unsafe { transmute(vpermb128(a.as_i8x16(), idx.as_i8x16())) } +} + +/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermb))] +pub fn _mm_mask_permutexvar_epi8(src: __m128i, k: __mmask16, idx: __m128i, a: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutexvar_epi8(idx, a).as_i8x16(); + transmute(simd_select_bitmask(k, permute, src.as_i8x16())) + } +} + +/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpermb))] +pub fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutexvar_epi8(idx, a).as_i8x16(); + transmute(simd_select_bitmask(k, permute, i8x16::ZERO)) + } +} + +/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmultishiftqb))] +pub fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpmultishiftqb(a.as_i8x64(), b.as_i8x64())) } +} + +/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmultishiftqb))] +pub fn _mm512_mask_multishift_epi64_epi8( + src: __m512i, + k: __mmask64, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, multishift, src.as_i8x64())) + } +} + +/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025) +#[inline] +#[target_feature(enable = "avx512vbmi")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmultishiftqb))] +pub fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, multishift, i8x64::ZERO)) + } +} + +/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmultishiftqb))] +pub fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpmultishiftqb256(a.as_i8x32(), b.as_i8x32())) } +} + +/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmultishiftqb))] +pub fn _mm256_mask_multishift_epi64_epi8( + src: __m256i, + k: __mmask32, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, multishift, src.as_i8x32())) + } +} + +/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmultishiftqb))] +pub fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, multishift, i8x32::ZERO)) + } +} + +/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_multishift_epi64_epi8&expand=4020) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmultishiftqb))] +pub fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpmultishiftqb128(a.as_i8x16(), b.as_i8x16())) } +} + +/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmultishiftqb))] +pub fn _mm_mask_multishift_epi64_epi8( + src: __m128i, + k: __mmask16, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, multishift, src.as_i8x16())) + } +} + +/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019) +#[inline] +#[target_feature(enable = "avx512vbmi,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpmultishiftqb))] +pub fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, multishift, i8x16::ZERO)) + } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.vpermi2var.qi.512"] + fn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64; + #[link_name = "llvm.x86.avx512.vpermi2var.qi.256"] + fn vpermi2b256(a: i8x32, idx: i8x32, b: i8x32) -> i8x32; + #[link_name = "llvm.x86.avx512.vpermi2var.qi.128"] + fn vpermi2b128(a: i8x16, idx: i8x16, b: i8x16) -> i8x16; + + #[link_name = "llvm.x86.avx512.permvar.qi.512"] + fn vpermb(a: i8x64, idx: i8x64) -> i8x64; + #[link_name = "llvm.x86.avx512.permvar.qi.256"] + fn vpermb256(a: i8x32, idx: i8x32) -> i8x32; + #[link_name = "llvm.x86.avx512.permvar.qi.128"] + fn vpermb128(a: i8x16, idx: i8x16) -> i8x16; + + #[link_name = "llvm.x86.avx512.pmultishift.qb.512"] + fn vpmultishiftqb(a: i8x64, b: i8x64) -> i8x64; + #[link_name = "llvm.x86.avx512.pmultishift.qb.256"] + fn vpmultishiftqb256(a: i8x32, b: i8x32) -> i8x32; + #[link_name = "llvm.x86.avx512.pmultishift.qb.128"] + fn vpmultishiftqb128(a: i8x16, b: i8x16) -> i8x16; +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_permutex2var_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + #[rustfmt::skip] + let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, + 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, + 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, + 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); + let b = _mm512_set1_epi8(100); + let r = _mm512_permutex2var_epi8(a, idx, b); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, + 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, + 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, + 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_mask_permutex2var_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + #[rustfmt::skip] + let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, + 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, + 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, + 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); + let b = _mm512_set1_epi8(100); + let r = _mm512_mask_permutex2var_epi8(a, 0, idx, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutex2var_epi8( + a, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + idx, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, + 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, + 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, + 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_maskz_permutex2var_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + #[rustfmt::skip] + let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, + 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, + 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, + 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); + let b = _mm512_set1_epi8(100); + let r = _mm512_maskz_permutex2var_epi8(0, a, idx, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutex2var_epi8( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + idx, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, + 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, + 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, + 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_mask2_permutex2var_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + #[rustfmt::skip] + let idx = _mm512_set_epi8(1, 1<<6, 2, 1<<6, 3, 1<<6, 4, 1<<6, 5, 1<<6, 6, 1<<6, 7, 1<<6, 8, 1<<6, + 9, 1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6, + 17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6, + 25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6); + let b = _mm512_set1_epi8(100); + let r = _mm512_mask2_permutex2var_epi8(a, idx, 0, b); + assert_eq_m512i(r, idx); + let r = _mm512_mask2_permutex2var_epi8( + a, + idx, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + b, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100, + 54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100, + 46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100, + 38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_permutex2var_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #[rustfmt::skip] + let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, + 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); + let b = _mm256_set1_epi8(100); + let r = _mm256_permutex2var_epi8(a, idx, b); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, + 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_mask_permutex2var_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #[rustfmt::skip] + let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, + 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); + let b = _mm256_set1_epi8(100); + let r = _mm256_mask_permutex2var_epi8(a, 0, idx, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutex2var_epi8(a, 0b11111111_11111111_11111111_11111111, idx, b); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, + 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_maskz_permutex2var_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #[rustfmt::skip] + let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, + 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); + let b = _mm256_set1_epi8(100); + let r = _mm256_maskz_permutex2var_epi8(0, a, idx, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutex2var_epi8(0b11111111_11111111_11111111_11111111, a, idx, b); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, + 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_mask2_permutex2var_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #[rustfmt::skip] + let idx = _mm256_set_epi8(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, + 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); + let b = _mm256_set1_epi8(100); + let r = _mm256_mask2_permutex2var_epi8(a, idx, 0, b); + assert_eq_m256i(r, idx); + let r = _mm256_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111_11111111_11111111, b); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, + 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_permutex2var_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); + let b = _mm_set1_epi8(100); + let r = _mm_permutex2var_epi8(a, idx, b); + let e = _mm_set_epi8( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_mask_permutex2var_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); + let b = _mm_set1_epi8(100); + let r = _mm_mask_permutex2var_epi8(a, 0, idx, b); + assert_eq_m128i(r, a); + let r = _mm_mask_permutex2var_epi8(a, 0b11111111_11111111, idx, b); + let e = _mm_set_epi8( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_maskz_permutex2var_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); + let b = _mm_set1_epi8(100); + let r = _mm_maskz_permutex2var_epi8(0, a, idx, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_permutex2var_epi8(0b11111111_11111111, a, idx, b); + let e = _mm_set_epi8( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_mask2_permutex2var_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4); + let b = _mm_set1_epi8(100); + let r = _mm_mask2_permutex2var_epi8(a, idx, 0, b); + assert_eq_m128i(r, idx); + let r = _mm_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111, b); + let e = _mm_set_epi8( + 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_permutexvar_epi8() { + let idx = _mm512_set1_epi8(1); + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let r = _mm512_permutexvar_epi8(idx, a); + let e = _mm512_set1_epi8(62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_mask_permutexvar_epi8() { + let idx = _mm512_set1_epi8(1); + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let r = _mm512_mask_permutexvar_epi8(a, 0, idx, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutexvar_epi8( + a, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + idx, + a, + ); + let e = _mm512_set1_epi8(62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_maskz_permutexvar_epi8() { + let idx = _mm512_set1_epi8(1); + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let r = _mm512_maskz_permutexvar_epi8(0, idx, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutexvar_epi8( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + idx, + a, + ); + let e = _mm512_set1_epi8(62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_permutexvar_epi8() { + let idx = _mm256_set1_epi8(1); + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm256_permutexvar_epi8(idx, a); + let e = _mm256_set1_epi8(30); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_mask_permutexvar_epi8() { + let idx = _mm256_set1_epi8(1); + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm256_mask_permutexvar_epi8(a, 0, idx, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutexvar_epi8(a, 0b11111111_11111111_11111111_11111111, idx, a); + let e = _mm256_set1_epi8(30); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_maskz_permutexvar_epi8() { + let idx = _mm256_set1_epi8(1); + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm256_maskz_permutexvar_epi8(0, idx, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutexvar_epi8(0b11111111_11111111_11111111_11111111, idx, a); + let e = _mm256_set1_epi8(30); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_permutexvar_epi8() { + let idx = _mm_set1_epi8(1); + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_permutexvar_epi8(idx, a); + let e = _mm_set1_epi8(14); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_mask_permutexvar_epi8() { + let idx = _mm_set1_epi8(1); + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_mask_permutexvar_epi8(a, 0, idx, a); + assert_eq_m128i(r, a); + let r = _mm_mask_permutexvar_epi8(a, 0b11111111_11111111, idx, a); + let e = _mm_set1_epi8(14); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_maskz_permutexvar_epi8() { + let idx = _mm_set1_epi8(1); + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_permutexvar_epi8(0, idx, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_permutexvar_epi8(0b11111111_11111111, idx, a); + let e = _mm_set1_epi8(14); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_multishift_epi64_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let r = _mm512_multishift_epi64_epi8(a, b); + let e = _mm512_set1_epi8(1 << 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_mask_multishift_epi64_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let r = _mm512_mask_multishift_epi64_epi8(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_multishift_epi64_epi8( + a, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + let e = _mm512_set1_epi8(1 << 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi")] + unsafe fn test_mm512_maskz_multishift_epi64_epi8() { + let a = _mm512_set1_epi8(1); + let b = _mm512_set1_epi8(1); + let r = _mm512_maskz_multishift_epi64_epi8(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_multishift_epi64_epi8( + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + a, + b, + ); + let e = _mm512_set1_epi8(1 << 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_multishift_epi64_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let r = _mm256_multishift_epi64_epi8(a, b); + let e = _mm256_set1_epi8(1 << 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_mask_multishift_epi64_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let r = _mm256_mask_multishift_epi64_epi8(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_multishift_epi64_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); + let e = _mm256_set1_epi8(1 << 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm256_maskz_multishift_epi64_epi8() { + let a = _mm256_set1_epi8(1); + let b = _mm256_set1_epi8(1); + let r = _mm256_maskz_multishift_epi64_epi8(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_multishift_epi64_epi8(0b11111111_11111111_11111111_11111111, a, b); + let e = _mm256_set1_epi8(1 << 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_multishift_epi64_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let r = _mm_multishift_epi64_epi8(a, b); + let e = _mm_set1_epi8(1 << 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_mask_multishift_epi64_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let r = _mm_mask_multishift_epi64_epi8(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_multishift_epi64_epi8(a, 0b11111111_11111111, a, b); + let e = _mm_set1_epi8(1 << 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi,avx512vl")] + unsafe fn test_mm_maskz_multishift_epi64_epi8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(1); + let r = _mm_maskz_multishift_epi64_epi8(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_multishift_epi64_epi8(0b11111111_11111111, a, b); + let e = _mm_set1_epi8(1 << 7); + assert_eq_m128i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs b/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs new file mode 100644 index 000000000000..c722f7b370ff --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs @@ -0,0 +1,3941 @@ +use crate::{ + core_arch::{simd::*, x86::*}, + intrinsics::simd::*, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[cfg_attr(test, assert_instr(vpexpandw))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_expandloadu_epi16( + src: __m512i, + k: __mmask32, + mem_addr: *const i16, +) -> __m512i { + transmute(expandloadw_512(mem_addr, src.as_i16x32(), k)) +} + +/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[cfg_attr(test, assert_instr(vpexpandw))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i { + _mm512_mask_expandloadu_epi16(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandw))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_expandloadu_epi16( + src: __m256i, + k: __mmask16, + mem_addr: *const i16, +) -> __m256i { + transmute(expandloadw_256(mem_addr, src.as_i16x16(), k)) +} + +/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandw))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i { + _mm256_mask_expandloadu_epi16(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandw))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_expandloadu_epi16( + src: __m128i, + k: __mmask8, + mem_addr: *const i16, +) -> __m128i { + transmute(expandloadw_128(mem_addr, src.as_i16x8(), k)) +} + +/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandw))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i { + _mm_mask_expandloadu_epi16(_mm_setzero_si128(), k, mem_addr) +} + +/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[cfg_attr(test, assert_instr(vpexpandb))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_mask_expandloadu_epi8( + src: __m512i, + k: __mmask64, + mem_addr: *const i8, +) -> __m512i { + transmute(expandloadb_512(mem_addr, src.as_i8x64(), k)) +} + +/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[cfg_attr(test, assert_instr(vpexpandb))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i { + _mm512_mask_expandloadu_epi8(_mm512_setzero_si512(), k, mem_addr) +} + +/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandb))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_mask_expandloadu_epi8( + src: __m256i, + k: __mmask32, + mem_addr: *const i8, +) -> __m256i { + transmute(expandloadb_256(mem_addr, src.as_i8x32(), k)) +} + +/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandb))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i { + _mm256_mask_expandloadu_epi8(_mm256_setzero_si256(), k, mem_addr) +} + +/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandb))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_mask_expandloadu_epi8( + src: __m128i, + k: __mmask16, + mem_addr: *const i8, +) -> __m128i { + transmute(expandloadb_128(mem_addr, src.as_i8x16(), k)) +} + +/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[cfg_attr(test, assert_instr(vpexpandb))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i { + _mm_mask_expandloadu_epi8(_mm_setzero_si128(), k, mem_addr) +} + +/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressw))] +pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask32, a: __m512i) { + vcompressstorew(base_addr as *mut _, a.as_i16x32(), k) +} + +/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressw))] +pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask16, a: __m256i) { + vcompressstorew256(base_addr as *mut _, a.as_i16x16(), k) +} + +/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressw))] +pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask8, a: __m128i) { + vcompressstorew128(base_addr as *mut _, a.as_i16x8(), k) +} + +/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressb))] +pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask64, a: __m512i) { + vcompressstoreb(base_addr, a.as_i8x64(), k) +} + +/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressb))] +pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask32, a: __m256i) { + vcompressstoreb256(base_addr, a.as_i8x32(), k) +} + +/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressb))] +pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask16, a: __m128i) { + vcompressstoreb128(base_addr, a.as_i8x16(), k) +} + +/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressw))] +pub fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressw(a.as_i16x32(), src.as_i16x32(), k)) } +} + +/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressw))] +pub fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressw(a.as_i16x32(), i16x32::ZERO, k)) } +} + +/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressw))] +pub fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressw256(a.as_i16x16(), src.as_i16x16(), k)) } +} + +/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressw))] +pub fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressw256(a.as_i16x16(), i16x16::ZERO, k)) } +} + +/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressw))] +pub fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressw128(a.as_i16x8(), src.as_i16x8(), k)) } +} + +/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressw))] +pub fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressw128(a.as_i16x8(), i16x8::ZERO, k)) } +} + +/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressb))] +pub fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressb(a.as_i8x64(), src.as_i8x64(), k)) } +} + +/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressb))] +pub fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressb(a.as_i8x64(), i8x64::ZERO, k)) } +} + +/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressb))] +pub fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressb256(a.as_i8x32(), src.as_i8x32(), k)) } +} + +/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressb))] +pub fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressb256(a.as_i8x32(), i8x32::ZERO, k)) } +} + +/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressb))] +pub fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressb128(a.as_i8x16(), src.as_i8x16(), k)) } +} + +/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpcompressb))] +pub fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressb128(a.as_i8x16(), i8x16::ZERO, k)) } +} + +/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandw))] +pub fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandw(a.as_i16x32(), src.as_i16x32(), k)) } +} + +/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandw))] +pub fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandw(a.as_i16x32(), i16x32::ZERO, k)) } +} + +/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandw))] +pub fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandw256(a.as_i16x16(), src.as_i16x16(), k)) } +} + +/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandw))] +pub fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandw256(a.as_i16x16(), i16x16::ZERO, k)) } +} + +/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandw))] +pub fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandw128(a.as_i16x8(), src.as_i16x8(), k)) } +} + +/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandw))] +pub fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandw128(a.as_i16x8(), i16x8::ZERO, k)) } +} + +/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandb))] +pub fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandb(a.as_i8x64(), src.as_i8x64(), k)) } +} + +/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandb))] +pub fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandb(a.as_i8x64(), i8x64::ZERO, k)) } +} + +/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandb))] +pub fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandb256(a.as_i8x32(), src.as_i8x32(), k)) } +} + +/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandb))] +pub fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandb256(a.as_i8x32(), i8x32::ZERO, k)) } +} + +/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandb))] +pub fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandb128(a.as_i8x16(), src.as_i8x16(), k)) } +} + +/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpexpandb))] +pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandb128(a.as_i8x16(), i8x16::ZERO, k)) } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvq))] +pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvq))] +pub fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi64(a, b, c).as_i64x8(); + transmute(simd_select_bitmask(k, shf, a.as_i64x8())) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvq))] +pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi64(a, b, c).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvq))] +pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvq))] +pub fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi64(a, b, c).as_i64x4(); + transmute(simd_select_bitmask(k, shf, a.as_i64x4())) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvq))] +pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi64(a, b, c).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvq))] +pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvq))] +pub fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi64(a, b, c).as_i64x2(); + transmute(simd_select_bitmask(k, shf, a.as_i64x2())) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvq))] +pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi64(a, b, c).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvd))] +pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvd))] +pub fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi32(a, b, c).as_i32x16(); + transmute(simd_select_bitmask(k, shf, a.as_i32x16())) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvd))] +pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi32(a, b, c).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvd))] +pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvd))] +pub fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi32(a, b, c).as_i32x8(); + transmute(simd_select_bitmask(k, shf, a.as_i32x8())) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvd))] +pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi32(a, b, c).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvd))] +pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvd))] +pub fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi32(a, b, c).as_i32x4(); + transmute(simd_select_bitmask(k, shf, a.as_i32x4())) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvd))] +pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi32(a, b, c).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvw))] +pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvw))] +pub fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi16(a, b, c).as_i16x32(); + transmute(simd_select_bitmask(k, shf, a.as_i16x32())) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvw))] +pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi16(a, b, c).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvw))] +pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvw))] +pub fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi16(a, b, c).as_i16x16(); + transmute(simd_select_bitmask(k, shf, a.as_i16x16())) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvw))] +pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi16(a, b, c).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvw))] +pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvw))] +pub fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi16(a, b, c).as_i16x8(); + transmute(simd_select_bitmask(k, shf, a.as_i16x8())) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldvw))] +pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi16(a, b, c).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvq))] +pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshrdvq(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvq))] +pub fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8(); + transmute(simd_select_bitmask(k, shf, a.as_i64x8())) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvq))] +pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvq))] +pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshrdvq256(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvq))] +pub fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4(); + transmute(simd_select_bitmask(k, shf, a.as_i64x4())) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvq))] +pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvq))] +pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshrdvq128(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvq))] +pub fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi64(a, b, c).as_i64x2(); + transmute(simd_select_bitmask(k, shf, a.as_i64x2())) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvq))] +pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi64(a, b, c).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvd))] +pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshrdvd(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvd))] +pub fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16(); + transmute(simd_select_bitmask(k, shf, a.as_i32x16())) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvd))] +pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvd))] +pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshrdvd256(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvd))] +pub fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8(); + transmute(simd_select_bitmask(k, shf, a.as_i32x8())) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvd))] +pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvd))] +pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshrdvd128(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvd))] +pub fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi32(a, b, c).as_i32x4(); + transmute(simd_select_bitmask(k, shf, a.as_i32x4())) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvd))] +pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi32(a, b, c).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvw))] +pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshrdvw(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvw))] +pub fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32(); + transmute(simd_select_bitmask(k, shf, a.as_i16x32())) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvw))] +pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvw))] +pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshrdvw256(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvw))] +pub fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16(); + transmute(simd_select_bitmask(k, shf, a.as_i16x16())) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvw))] +pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvw))] +pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshrdvw128(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvw))] +pub fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi16(a, b, c).as_i16x8(); + transmute(simd_select_bitmask(k, shf, a.as_i16x8())) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshrdvw))] +pub fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi16(a, b, c).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shldi_epi64(a: __m512i, b: __m512i) -> __m512i { + static_assert_uimm_bits!(IMM8, 8); + _mm512_shldv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64)) +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shldi_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi64::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shldi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi64::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shldi_epi64(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + _mm256_shldv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64)) +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shldi_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi64::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shldi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi64::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_shldi_epi64(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + _mm_shldv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64)) +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_shldi_epi64( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi64::(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } +} + +/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_shldi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi64::(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shldi_epi32(a: __m512i, b: __m512i) -> __m512i { + static_assert_uimm_bits!(IMM8, 8); + _mm512_shldv_epi32(a, b, _mm512_set1_epi32(IMM8)) +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shldi_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi32::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shldi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi32::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shldi_epi32(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + _mm256_shldv_epi32(a, b, _mm256_set1_epi32(IMM8)) +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shldi_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi32::(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shldi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi32::(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_shldi_epi32(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + _mm_shldv_epi32(a, b, _mm_set1_epi32(IMM8)) +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_shldi_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi32::(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } +} + +/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_shldi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi32::(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shldi_epi16(a: __m512i, b: __m512i) -> __m512i { + static_assert_uimm_bits!(IMM8, 8); + _mm512_shldv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16)) +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shldi_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi16::(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shldi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi16::(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shldi_epi16(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + _mm256_shldv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16)) +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shldi_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi16::(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shldi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi16::(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_shldi_epi16(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + _mm_shldv_epi16(a, b, _mm_set1_epi16(IMM8 as i16)) +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_shldi_epi16( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi16::(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } +} + +/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_shldi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi16::(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shrdi_epi64(a: __m512i, b: __m512i) -> __m512i { + static_assert_uimm_bits!(IMM8, 8); + _mm512_shrdv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64)) +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shrdi_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi64::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shrdi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi64::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shrdi_epi64(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + _mm256_shrdv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64)) +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shrdi_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi64::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shrdi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi64::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(2)] +pub fn _mm_shrdi_epi64(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + _mm_shrdv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64)) +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_shrdi_epi64( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi64::(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } +} + +/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_shrdi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi64::(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shrdi_epi32(a: __m512i, b: __m512i) -> __m512i { + static_assert_uimm_bits!(IMM8, 8); + _mm512_shrdv_epi32(a, b, _mm512_set1_epi32(IMM8)) +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shrdi_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi32::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shrdi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi32::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shrdi_epi32(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + _mm256_shrdv_epi32(a, b, _mm256_set1_epi32(IMM8)) +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shrdi_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi32::(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shrdi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi32::(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(2)] +pub fn _mm_shrdi_epi32(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + _mm_shrdv_epi32(a, b, _mm_set1_epi32(IMM8)) +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_shrdi_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi32::(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } +} + +/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_shrdi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi32::(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(2)] +pub fn _mm512_shrdi_epi16(a: __m512i, b: __m512i) -> __m512i { + static_assert_uimm_bits!(IMM8, 8); + _mm512_shrdv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16)) +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_shrdi_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi16::(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095) +#[inline] +#[target_feature(enable = "avx512vbmi2")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shrdi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi16::(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(2)] +pub fn _mm256_shrdi_epi16(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + _mm256_shrdv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16)) +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_shrdi_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi16::(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_shrdi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi16::(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(2)] +pub fn _mm_shrdi_epi16(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + _mm_shrdv_epi16(a, b, _mm_set1_epi16(IMM8 as i16)) +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_shrdi_epi16( + src: __m128i, + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi16::(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } +} + +/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089) +#[inline] +#[target_feature(enable = "avx512vbmi2,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_shrdi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi16::(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.mask.compress.store.w.512"] + fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32); + #[link_name = "llvm.x86.avx512.mask.compress.store.w.256"] + fn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.compress.store.w.128"] + fn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.compress.store.b.512"] + fn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64); + #[link_name = "llvm.x86.avx512.mask.compress.store.b.256"] + fn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32); + #[link_name = "llvm.x86.avx512.mask.compress.store.b.128"] + fn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16); + + #[link_name = "llvm.x86.avx512.mask.compress.w.512"] + fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32; + #[link_name = "llvm.x86.avx512.mask.compress.w.256"] + fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16; + #[link_name = "llvm.x86.avx512.mask.compress.w.128"] + fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8; + + #[link_name = "llvm.x86.avx512.mask.compress.b.512"] + fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64; + #[link_name = "llvm.x86.avx512.mask.compress.b.256"] + fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32; + #[link_name = "llvm.x86.avx512.mask.compress.b.128"] + fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16; + + #[link_name = "llvm.x86.avx512.mask.expand.w.512"] + fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32; + #[link_name = "llvm.x86.avx512.mask.expand.w.256"] + fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16; + #[link_name = "llvm.x86.avx512.mask.expand.w.128"] + fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8; + + #[link_name = "llvm.x86.avx512.mask.expand.b.512"] + fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64; + #[link_name = "llvm.x86.avx512.mask.expand.b.256"] + fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32; + #[link_name = "llvm.x86.avx512.mask.expand.b.128"] + fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16; + + #[link_name = "llvm.fshl.v8i64"] + fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8; + #[link_name = "llvm.fshl.v4i64"] + fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4; + #[link_name = "llvm.fshl.v2i64"] + fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2; + #[link_name = "llvm.fshl.v16i32"] + fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16; + #[link_name = "llvm.fshl.v8i32"] + fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8; + #[link_name = "llvm.fshl.v4i32"] + fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4; + #[link_name = "llvm.fshl.v32i16"] + fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32; + #[link_name = "llvm.fshl.v16i16"] + fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16; + #[link_name = "llvm.fshl.v8i16"] + fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8; + + #[link_name = "llvm.fshr.v8i64"] + fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8; + #[link_name = "llvm.fshr.v4i64"] + fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4; + #[link_name = "llvm.fshr.v2i64"] + fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2; + #[link_name = "llvm.fshr.v16i32"] + fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16; + #[link_name = "llvm.fshr.v8i32"] + fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8; + #[link_name = "llvm.fshr.v4i32"] + fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4; + #[link_name = "llvm.fshr.v32i16"] + fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32; + #[link_name = "llvm.fshr.v16i16"] + fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16; + #[link_name = "llvm.fshr.v8i16"] + fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8; + + #[link_name = "llvm.x86.avx512.mask.expand.load.b.128"] + fn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.expand.load.w.128"] + fn expandloadw_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.expand.load.b.256"] + fn expandloadb_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32; + #[link_name = "llvm.x86.avx512.mask.expand.load.w.256"] + fn expandloadw_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16; + #[link_name = "llvm.x86.avx512.mask.expand.load.b.512"] + fn expandloadb_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64; + #[link_name = "llvm.x86.avx512.mask.expand.load.w.512"] + fn expandloadw_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32; +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::hint::black_box; + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_compress_epi16() { + let src = _mm512_set1_epi16(200); + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_compress_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_compress_epi16() { + let src = _mm256_set1_epi16(200); + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a); + let e = _mm256_set_epi16( + 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_compress_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a); + let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_compress_epi16() { + let src = _mm_set1_epi16(200); + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_mask_compress_epi16(src, 0b01010101, a); + let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_compress_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_maskz_compress_epi16(0b01010101, a); + let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_compress_epi8() { + let src = _mm512_set1_epi8(100); + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let r = _mm512_mask_compress_epi8( + src, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, + a, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, + 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_compress_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let r = _mm512_maskz_compress_epi8( + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, + a, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, + 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_compress_epi8() { + let src = _mm256_set1_epi8(100); + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_compress_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_compress_epi8() { + let src = _mm_set1_epi8(100); + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a); + let e = _mm_set_epi8( + 100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_compress_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_compress_epi8(0b01010101_01010101, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_expand_epi16() { + let src = _mm512_set1_epi16(200); + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a); + #[rustfmt::skip] + let e = _mm512_set_epi16( + 200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23, + 200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_expand_epi16() { + #[rustfmt::skip] + let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a); + #[rustfmt::skip] + let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, + 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_expand_epi16() { + let src = _mm256_set1_epi16(200); + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a); + let e = _mm256_set_epi16( + 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_expand_epi16() { + let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a); + let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_expand_epi16() { + let src = _mm_set1_epi16(200); + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_mask_expand_epi16(src, 0b01010101, a); + let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_expand_epi16() { + let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_maskz_expand_epi16(0b01010101, a); + let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_expand_epi8() { + let src = _mm512_set1_epi8(100); + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let r = _mm512_mask_expand_epi8( + src, + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, + a, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39, + 100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47, + 100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55, + 100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_expand_epi8() { + #[rustfmt::skip] + let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); + let r = _mm512_maskz_expand_epi8( + 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101, + a, + ); + #[rustfmt::skip] + let e = _mm512_set_epi8( + 0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39, + 0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47, + 0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55, + 0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_expand_epi8() { + let src = _mm256_set1_epi8(100); + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23, + 100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_expand_epi8() { + #[rustfmt::skip] + let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a); + #[rustfmt::skip] + let e = _mm256_set_epi8( + 0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, + 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_expand_epi8() { + let src = _mm_set1_epi8(100); + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a); + let e = _mm_set_epi8( + 100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_expand_epi8() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_expand_epi8(0b01010101_01010101, a); + let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shldv_epi64() { + let a = _mm512_set1_epi64(1); + let b = _mm512_set1_epi64(1 << 63); + let c = _mm512_set1_epi64(2); + let r = _mm512_shldv_epi64(a, b, c); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shldv_epi64() { + let a = _mm512_set1_epi64(1); + let b = _mm512_set1_epi64(1 << 63); + let c = _mm512_set1_epi64(2); + let r = _mm512_mask_shldv_epi64(a, 0, b, c); + assert_eq_m512i(r, a); + let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shldv_epi64() { + let a = _mm512_set1_epi64(1); + let b = _mm512_set1_epi64(1 << 63); + let c = _mm512_set1_epi64(2); + let r = _mm512_maskz_shldv_epi64(0, a, b, c); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shldv_epi64() { + let a = _mm256_set1_epi64x(1); + let b = _mm256_set1_epi64x(1 << 63); + let c = _mm256_set1_epi64x(2); + let r = _mm256_shldv_epi64(a, b, c); + let e = _mm256_set1_epi64x(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shldv_epi64() { + let a = _mm256_set1_epi64x(1); + let b = _mm256_set1_epi64x(1 << 63); + let c = _mm256_set1_epi64x(2); + let r = _mm256_mask_shldv_epi64(a, 0, b, c); + assert_eq_m256i(r, a); + let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c); + let e = _mm256_set1_epi64x(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shldv_epi64() { + let a = _mm256_set1_epi64x(1); + let b = _mm256_set1_epi64x(1 << 63); + let c = _mm256_set1_epi64x(2); + let r = _mm256_maskz_shldv_epi64(0, a, b, c); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c); + let e = _mm256_set1_epi64x(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shldv_epi64() { + let a = _mm_set1_epi64x(1); + let b = _mm_set1_epi64x(1 << 63); + let c = _mm_set1_epi64x(2); + let r = _mm_shldv_epi64(a, b, c); + let e = _mm_set1_epi64x(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shldv_epi64() { + let a = _mm_set1_epi64x(1); + let b = _mm_set1_epi64x(1 << 63); + let c = _mm_set1_epi64x(2); + let r = _mm_mask_shldv_epi64(a, 0, b, c); + assert_eq_m128i(r, a); + let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c); + let e = _mm_set1_epi64x(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shldv_epi64() { + let a = _mm_set1_epi64x(1); + let b = _mm_set1_epi64x(1 << 63); + let c = _mm_set1_epi64x(2); + let r = _mm_maskz_shldv_epi64(0, a, b, c); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c); + let e = _mm_set1_epi64x(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shldv_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_set1_epi32(1 << 31); + let c = _mm512_set1_epi32(2); + let r = _mm512_shldv_epi32(a, b, c); + let e = _mm512_set1_epi32(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shldv_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_set1_epi32(1 << 31); + let c = _mm512_set1_epi32(2); + let r = _mm512_mask_shldv_epi32(a, 0, b, c); + assert_eq_m512i(r, a); + let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c); + let e = _mm512_set1_epi32(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shldv_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_set1_epi32(1 << 31); + let c = _mm512_set1_epi32(2); + let r = _mm512_maskz_shldv_epi32(0, a, b, c); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c); + let e = _mm512_set1_epi32(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shldv_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set1_epi32(1 << 31); + let c = _mm256_set1_epi32(2); + let r = _mm256_shldv_epi32(a, b, c); + let e = _mm256_set1_epi32(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shldv_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set1_epi32(1 << 31); + let c = _mm256_set1_epi32(2); + let r = _mm256_mask_shldv_epi32(a, 0, b, c); + assert_eq_m256i(r, a); + let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c); + let e = _mm256_set1_epi32(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shldv_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set1_epi32(1 << 31); + let c = _mm256_set1_epi32(2); + let r = _mm256_maskz_shldv_epi32(0, a, b, c); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c); + let e = _mm256_set1_epi32(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shldv_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set1_epi32(1 << 31); + let c = _mm_set1_epi32(2); + let r = _mm_shldv_epi32(a, b, c); + let e = _mm_set1_epi32(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shldv_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set1_epi32(1 << 31); + let c = _mm_set1_epi32(2); + let r = _mm_mask_shldv_epi32(a, 0, b, c); + assert_eq_m128i(r, a); + let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c); + let e = _mm_set1_epi32(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shldv_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set1_epi32(1 << 31); + let c = _mm_set1_epi32(2); + let r = _mm_maskz_shldv_epi32(0, a, b, c); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c); + let e = _mm_set1_epi32(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shldv_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1 << 15); + let c = _mm512_set1_epi16(2); + let r = _mm512_shldv_epi16(a, b, c); + let e = _mm512_set1_epi16(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shldv_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1 << 15); + let c = _mm512_set1_epi16(2); + let r = _mm512_mask_shldv_epi16(a, 0, b, c); + assert_eq_m512i(r, a); + let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c); + let e = _mm512_set1_epi16(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shldv_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1 << 15); + let c = _mm512_set1_epi16(2); + let r = _mm512_maskz_shldv_epi16(0, a, b, c); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c); + let e = _mm512_set1_epi16(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shldv_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1 << 15); + let c = _mm256_set1_epi16(2); + let r = _mm256_shldv_epi16(a, b, c); + let e = _mm256_set1_epi16(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shldv_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1 << 15); + let c = _mm256_set1_epi16(2); + let r = _mm256_mask_shldv_epi16(a, 0, b, c); + assert_eq_m256i(r, a); + let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c); + let e = _mm256_set1_epi16(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shldv_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1 << 15); + let c = _mm256_set1_epi16(2); + let r = _mm256_maskz_shldv_epi16(0, a, b, c); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c); + let e = _mm256_set1_epi16(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shldv_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1 << 15); + let c = _mm_set1_epi16(2); + let r = _mm_shldv_epi16(a, b, c); + let e = _mm_set1_epi16(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shldv_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1 << 15); + let c = _mm_set1_epi16(2); + let r = _mm_mask_shldv_epi16(a, 0, b, c); + assert_eq_m128i(r, a); + let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c); + let e = _mm_set1_epi16(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shldv_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1 << 15); + let c = _mm_set1_epi16(2); + let r = _mm_maskz_shldv_epi16(0, a, b, c); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c); + let e = _mm_set1_epi16(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shrdv_epi64() { + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); + let c = _mm512_set1_epi64(1); + let r = _mm512_shrdv_epi64(a, b, c); + let e = _mm512_set1_epi64(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shrdv_epi64() { + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); + let c = _mm512_set1_epi64(1); + let r = _mm512_mask_shrdv_epi64(a, 0, b, c); + assert_eq_m512i(r, a); + let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c); + let e = _mm512_set1_epi64(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shrdv_epi64() { + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); + let c = _mm512_set1_epi64(1); + let r = _mm512_maskz_shrdv_epi64(0, a, b, c); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c); + let e = _mm512_set1_epi64(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shrdv_epi64() { + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); + let c = _mm256_set1_epi64x(1); + let r = _mm256_shrdv_epi64(a, b, c); + let e = _mm256_set1_epi64x(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shrdv_epi64() { + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); + let c = _mm256_set1_epi64x(1); + let r = _mm256_mask_shrdv_epi64(a, 0, b, c); + assert_eq_m256i(r, a); + let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c); + let e = _mm256_set1_epi64x(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shrdv_epi64() { + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); + let c = _mm256_set1_epi64x(1); + let r = _mm256_maskz_shrdv_epi64(0, a, b, c); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c); + let e = _mm256_set1_epi64x(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shrdv_epi64() { + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); + let c = _mm_set1_epi64x(1); + let r = _mm_shrdv_epi64(a, b, c); + let e = _mm_set1_epi64x(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shrdv_epi64() { + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); + let c = _mm_set1_epi64x(1); + let r = _mm_mask_shrdv_epi64(a, 0, b, c); + assert_eq_m128i(r, a); + let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c); + let e = _mm_set1_epi64x(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shrdv_epi64() { + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); + let c = _mm_set1_epi64x(1); + let r = _mm_maskz_shrdv_epi64(0, a, b, c); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c); + let e = _mm_set1_epi64x(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shrdv_epi32() { + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); + let c = _mm512_set1_epi32(1); + let r = _mm512_shrdv_epi32(a, b, c); + let e = _mm512_set1_epi32(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shrdv_epi32() { + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); + let c = _mm512_set1_epi32(1); + let r = _mm512_mask_shrdv_epi32(a, 0, b, c); + assert_eq_m512i(r, a); + let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c); + let e = _mm512_set1_epi32(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shrdv_epi32() { + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); + let c = _mm512_set1_epi32(1); + let r = _mm512_maskz_shrdv_epi32(0, a, b, c); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c); + let e = _mm512_set1_epi32(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shrdv_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); + let c = _mm256_set1_epi32(1); + let r = _mm256_shrdv_epi32(a, b, c); + let e = _mm256_set1_epi32(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shrdv_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); + let c = _mm256_set1_epi32(1); + let r = _mm256_mask_shrdv_epi32(a, 0, b, c); + assert_eq_m256i(r, a); + let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c); + let e = _mm256_set1_epi32(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shrdv_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); + let c = _mm256_set1_epi32(1); + let r = _mm256_maskz_shrdv_epi32(0, a, b, c); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c); + let e = _mm256_set1_epi32(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shrdv_epi32() { + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); + let c = _mm_set1_epi32(1); + let r = _mm_shrdv_epi32(a, b, c); + let e = _mm_set1_epi32(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shrdv_epi32() { + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); + let c = _mm_set1_epi32(1); + let r = _mm_mask_shrdv_epi32(a, 0, b, c); + assert_eq_m128i(r, a); + let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c); + let e = _mm_set1_epi32(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shrdv_epi32() { + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); + let c = _mm_set1_epi32(1); + let r = _mm_maskz_shrdv_epi32(0, a, b, c); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c); + let e = _mm_set1_epi32(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shrdv_epi16() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); + let c = _mm512_set1_epi16(1); + let r = _mm512_shrdv_epi16(a, b, c); + let e = _mm512_set1_epi16(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shrdv_epi16() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); + let c = _mm512_set1_epi16(1); + let r = _mm512_mask_shrdv_epi16(a, 0, b, c); + assert_eq_m512i(r, a); + let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c); + let e = _mm512_set1_epi16(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shrdv_epi16() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); + let c = _mm512_set1_epi16(1); + let r = _mm512_maskz_shrdv_epi16(0, a, b, c); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c); + let e = _mm512_set1_epi16(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shrdv_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); + let c = _mm256_set1_epi16(1); + let r = _mm256_shrdv_epi16(a, b, c); + let e = _mm256_set1_epi16(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shrdv_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); + let c = _mm256_set1_epi16(1); + let r = _mm256_mask_shrdv_epi16(a, 0, b, c); + assert_eq_m256i(r, a); + let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c); + let e = _mm256_set1_epi16(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shrdv_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); + let c = _mm256_set1_epi16(1); + let r = _mm256_maskz_shrdv_epi16(0, a, b, c); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c); + let e = _mm256_set1_epi16(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shrdv_epi16() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); + let c = _mm_set1_epi16(1); + let r = _mm_shrdv_epi16(a, b, c); + let e = _mm_set1_epi16(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shrdv_epi16() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); + let c = _mm_set1_epi16(1); + let r = _mm_mask_shrdv_epi16(a, 0, b, c); + assert_eq_m128i(r, a); + let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c); + let e = _mm_set1_epi16(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shrdv_epi16() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); + let c = _mm_set1_epi16(1); + let r = _mm_maskz_shrdv_epi16(0, a, b, c); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c); + let e = _mm_set1_epi16(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shldi_epi64() { + let a = _mm512_set1_epi64(1); + let b = _mm512_set1_epi64(1 << 63); + let r = _mm512_shldi_epi64::<2>(a, b); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shldi_epi64() { + let a = _mm512_set1_epi64(1); + let b = _mm512_set1_epi64(1 << 63); + let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shldi_epi64() { + let a = _mm512_set1_epi64(1); + let b = _mm512_set1_epi64(1 << 63); + let r = _mm512_maskz_shldi_epi64::<2>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shldi_epi64() { + let a = _mm256_set1_epi64x(1); + let b = _mm256_set1_epi64x(1 << 63); + let r = _mm256_shldi_epi64::<2>(a, b); + let e = _mm256_set1_epi64x(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shldi_epi64() { + let a = _mm256_set1_epi64x(1); + let b = _mm256_set1_epi64x(1 << 63); + let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shldi_epi64() { + let a = _mm256_set1_epi64x(1); + let b = _mm256_set1_epi64x(1 << 63); + let r = _mm256_maskz_shldi_epi64::<2>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b); + let e = _mm256_set1_epi64x(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shldi_epi64() { + let a = _mm_set1_epi64x(1); + let b = _mm_set1_epi64x(1 << 63); + let r = _mm_shldi_epi64::<2>(a, b); + let e = _mm_set1_epi64x(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shldi_epi64() { + let a = _mm_set1_epi64x(1); + let b = _mm_set1_epi64x(1 << 63); + let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shldi_epi64() { + let a = _mm_set1_epi64x(1); + let b = _mm_set1_epi64x(1 << 63); + let r = _mm_maskz_shldi_epi64::<2>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b); + let e = _mm_set1_epi64x(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shldi_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_set1_epi32(1 << 31); + let r = _mm512_shldi_epi32::<2>(a, b); + let e = _mm512_set1_epi32(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shldi_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_set1_epi32(1 << 31); + let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shldi_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_set1_epi32(1 << 31); + let r = _mm512_maskz_shldi_epi32::<2>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shldi_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set1_epi32(1 << 31); + let r = _mm256_shldi_epi32::<2>(a, b); + let e = _mm256_set1_epi32(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shldi_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set1_epi32(1 << 31); + let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b); + let e = _mm256_set1_epi32(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shldi_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set1_epi32(1 << 31); + let r = _mm256_maskz_shldi_epi32::<2>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b); + let e = _mm256_set1_epi32(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shldi_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set1_epi32(1 << 31); + let r = _mm_shldi_epi32::<2>(a, b); + let e = _mm_set1_epi32(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shldi_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set1_epi32(1 << 31); + let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b); + let e = _mm_set1_epi32(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shldi_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set1_epi32(1 << 31); + let r = _mm_maskz_shldi_epi32::<2>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b); + let e = _mm_set1_epi32(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shldi_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1 << 15); + let r = _mm512_shldi_epi16::<2>(a, b); + let e = _mm512_set1_epi16(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shldi_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1 << 15); + let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b); + let e = _mm512_set1_epi16(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shldi_epi16() { + let a = _mm512_set1_epi16(1); + let b = _mm512_set1_epi16(1 << 15); + let r = _mm512_maskz_shldi_epi16::<2>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b); + let e = _mm512_set1_epi16(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shldi_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1 << 15); + let r = _mm256_shldi_epi16::<2>(a, b); + let e = _mm256_set1_epi16(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shldi_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1 << 15); + let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b); + let e = _mm256_set1_epi16(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shldi_epi16() { + let a = _mm256_set1_epi16(1); + let b = _mm256_set1_epi16(1 << 15); + let r = _mm256_maskz_shldi_epi16::<2>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b); + let e = _mm256_set1_epi16(6); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shldi_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1 << 15); + let r = _mm_shldi_epi16::<2>(a, b); + let e = _mm_set1_epi16(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shldi_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1 << 15); + let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b); + let e = _mm_set1_epi16(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shldi_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(1 << 15); + let r = _mm_maskz_shldi_epi16::<2>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b); + let e = _mm_set1_epi16(6); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shrdi_epi64() { + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); + let r = _mm512_shrdi_epi64::<1>(a, b); + let e = _mm512_set1_epi64(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shrdi_epi64() { + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); + let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b); + let e = _mm512_set1_epi64(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shrdi_epi64() { + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); + let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b); + let e = _mm512_set1_epi64(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shrdi_epi64() { + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); + let r = _mm256_shrdi_epi64::<1>(a, b); + let e = _mm256_set1_epi64x(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shrdi_epi64() { + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); + let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shrdi_epi64() { + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); + let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b); + let e = _mm256_set1_epi64x(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shrdi_epi64() { + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); + let r = _mm_shrdi_epi64::<1>(a, b); + let e = _mm_set1_epi64x(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shrdi_epi64() { + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); + let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shrdi_epi64() { + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); + let r = _mm_maskz_shrdi_epi64::<1>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b); + let e = _mm_set1_epi64x(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shrdi_epi32() { + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); + let r = _mm512_shrdi_epi32::<1>(a, b); + let e = _mm512_set1_epi32(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shrdi_epi32() { + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); + let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shrdi_epi32() { + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); + let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shrdi_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); + let r = _mm256_shrdi_epi32::<1>(a, b); + let e = _mm256_set1_epi32(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shrdi_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); + let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b); + let e = _mm256_set1_epi32(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shrdi_epi32() { + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); + let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b); + let e = _mm256_set1_epi32(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shrdi_epi32() { + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); + let r = _mm_shrdi_epi32::<1>(a, b); + let e = _mm_set1_epi32(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shrdi_epi32() { + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); + let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b); + let e = _mm_set1_epi32(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shrdi_epi32() { + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); + let r = _mm_maskz_shrdi_epi32::<1>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b); + let e = _mm_set1_epi32(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_shrdi_epi16() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); + let r = _mm512_shrdi_epi16::<1>(a, b); + let e = _mm512_set1_epi16(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_shrdi_epi16() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); + let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b); + let e = _mm512_set1_epi16(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_shrdi_epi16() { + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); + let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b); + let e = _mm512_set1_epi16(1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_shrdi_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); + let r = _mm256_shrdi_epi16::<1>(a, b); + let e = _mm256_set1_epi16(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_shrdi_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); + let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b); + let e = _mm256_set1_epi16(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_shrdi_epi16() { + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); + let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b); + let e = _mm256_set1_epi16(1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_shrdi_epi16() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); + let r = _mm_shrdi_epi16::<1>(a, b); + let e = _mm_set1_epi16(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_shrdi_epi16() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); + let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b); + let e = _mm_set1_epi16(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_shrdi_epi16() { + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); + let r = _mm_maskz_shrdi_epi16::<1>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b); + let e = _mm_set1_epi16(1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_expandloadu_epi16() { + let src = _mm512_set1_epi16(42); + let a = &[ + 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010_11110000_00001111; + let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p)); + let e = _mm512_set_epi16( + 16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42, + 42, 42, 42, 42, 42, 4, 3, 2, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_expandloadu_epi16() { + let a = &[ + 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010_11110000_00001111; + let r = _mm512_maskz_expandloadu_epi16(m, black_box(p)); + let e = _mm512_set_epi16( + 16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 3, 2, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_expandloadu_epi16() { + let src = _mm256_set1_epi16(42); + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p)); + let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_expandloadu_epi16() { + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm256_maskz_expandloadu_epi16(m, black_box(p)); + let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_expandloadu_epi16() { + let src = _mm_set1_epi16(42); + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm_mask_expandloadu_epi16(src, m, black_box(p)); + let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_expandloadu_epi16() { + let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; + let p = a.as_ptr(); + let m = 0b11101000; + let r = _mm_maskz_expandloadu_epi16(m, black_box(p)); + let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_expandloadu_epi8() { + let src = _mm512_set1_epi8(42); + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101; + let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p)); + let e = _mm512_set_epi8( + 32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42, + 42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42, + 42, 42, 42, 42, 8, 42, 7, 42, 6, 42, 5, 42, 42, 4, 42, 3, 42, 2, 42, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_maskz_expandloadu_epi8() { + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101; + let r = _mm512_maskz_expandloadu_epi8(m, black_box(p)); + let e = _mm512_set_epi8( + 32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0, + 0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, + 7, 0, 6, 0, 5, 0, 0, 4, 0, 3, 0, 2, 0, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_expandloadu_epi8() { + let src = _mm256_set1_epi8(42); + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010_11110000_00001111; + let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p)); + let e = _mm256_set_epi8( + 16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42, + 42, 42, 42, 42, 42, 4, 3, 2, 1, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_maskz_expandloadu_epi8() { + let a = &[ + 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let p = a.as_ptr(); + let m = 0b11101000_11001010_11110000_00001111; + let r = _mm256_maskz_expandloadu_epi8(m, black_box(p)); + let e = _mm256_set_epi8( + 16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 3, 2, 1, + ); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_expandloadu_epi8() { + let src = _mm_set1_epi8(42); + let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm_mask_expandloadu_epi8(src, m, black_box(p)); + let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_maskz_expandloadu_epi8() { + let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let p = a.as_ptr(); + let m = 0b11101000_11001010; + let r = _mm_maskz_expandloadu_epi8(m, black_box(p)); + let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_compressstoreu_epi16() { + let a = _mm512_set_epi16( + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, + 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + ); + let mut r = [0_i16; 32]; + _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i16; 32]); + _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a); + assert_eq!( + &r, + &[ + 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0 + ] + ); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_compressstoreu_epi16() { + let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let mut r = [0_i16; 16]; + _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i16; 16]); + _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a); + assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_compressstoreu_epi16() { + let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); + let mut r = [0_i16; 8]; + _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i16; 8]); + _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a); + assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]); + } + + #[simd_test(enable = "avx512vbmi2")] + unsafe fn test_mm512_mask_compressstoreu_epi8() { + let a = _mm512_set_epi8( + 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, + 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, + 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + ); + let mut r = [0_i8; 64]; + _mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i8; 64]); + _mm512_mask_compressstoreu_epi8( + r.as_mut_ptr(), + 0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111, + a, + ); + assert_eq!( + &r, + &[ + 1, 2, 3, 4, 13, 14, 15, 16, 17, 19, 21, 23, 26, 28, 30, 32, 41, 42, 43, 44, 45, 46, + 47, 48, 50, 52, 55, 56, 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + ] + ); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm256_mask_compressstoreu_epi8() { + let a = _mm256_set_epi8( + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, + 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + ); + let mut r = [0_i8; 32]; + _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i8; 32]); + _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a); + assert_eq!( + &r, + &[ + 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0 + ] + ); + } + + #[simd_test(enable = "avx512vbmi2,avx512vl")] + unsafe fn test_mm_mask_compressstoreu_epi8() { + let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let mut r = [0_i8; 16]; + _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a); + assert_eq!(&r, &[0_i8; 16]); + _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a); + assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vnni.rs b/library/stdarch/crates/core_arch/src/x86/avx512vnni.rs new file mode 100644 index 000000000000..93ea01cbb45b --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512vnni.rs @@ -0,0 +1,1699 @@ +use crate::core_arch::{simd::*, x86::*}; +use crate::intrinsics::simd::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_avx_epi32&expand=2713) +#[inline] +#[target_feature(enable = "avxvnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_avx_epi32&expand=2712) +#[inline] +#[target_feature(enable = "avxvnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpwssd_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssd))] +pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpwssd_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_avx_epi32&expand=2726) +#[inline] +#[target_feature(enable = "avxvnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_avx_epi32&expand=2725) +#[inline] +#[target_feature(enable = "avxvnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpwssds_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpwssds))] +pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpwssds_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_avx_epi32&expand=2683) +#[inline] +#[target_feature(enable = "avxvnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_avx_epi32&expand=2682) +#[inline] +#[target_feature(enable = "avxvnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpbusd_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusd))] +pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpbusd_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212) +#[inline] +#[target_feature(enable = "avx512vnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_avx_epi32&expand=2696) +#[inline] +#[target_feature(enable = "avxvnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_avx_epi32&expand=2695) +#[inline] +#[target_feature(enable = "avxvnni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpbusds_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206) +#[inline] +#[target_feature(enable = "avx512vnni,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpdpbusds))] +pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpbusds_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } +} + +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssd_epi32&expand=2674) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbssd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssd_epi32&expand=2675) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbssd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssds_epi32&expand=2676) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbssds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssds_epi32&expand=2677) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbssds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsud_epi32&expand=2678) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbsud))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsud_epi32&expand=2679) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbsud))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsuds_epi32&expand=2680) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbsuds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsuds_epi32&expand=2681) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbsuds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuud_epi32&expand=2708) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbuud))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuud_epi32&expand=2709) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbuud))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuuds_epi32&expand=2710) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbuuds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit +/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuuds_epi32&expand=2711) +#[inline] +#[target_feature(enable = "avxvnniint8")] +#[cfg_attr(test, assert_instr(vpdpbuuds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsud_epi32&expand=2738) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwsud))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsud_epi32&expand=2739) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwsud))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsuds_epi32&expand=2740) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwsuds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsuds_epi32&expand=2741) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwsuds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusd_epi32&expand=2742) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwusd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusd_epi32&expand=2743) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwusd))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusds_epi32&expand=2744) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwusds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusds_epi32&expand=2745) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwusds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuud_epi32&expand=2746) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwuud))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuud_epi32&expand=2747) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwuud))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuuds_epi32&expand=2748) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwuuds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit +/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding +/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuuds_epi32&expand=2749) +#[inline] +#[target_feature(enable = "avxvnniint16")] +#[cfg_attr(test, assert_instr(vpdpwuuds))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.vpdpwssd.512"] + fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.vpdpwssd.256"] + fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx512.vpdpwssd.128"] + fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.avx512.vpdpwssds.512"] + fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.vpdpwssds.256"] + fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx512.vpdpwssds.128"] + fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.avx512.vpdpbusd.512"] + fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.vpdpbusd.256"] + fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx512.vpdpbusd.128"] + fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.avx512.vpdpbusds.512"] + fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.vpdpbusds.256"] + fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx512.vpdpbusds.128"] + fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.avx2.vpdpbssd.128"] + fn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpbssd.256"] + fn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpbssds.128"] + fn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpbssds.256"] + fn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpbsud.128"] + fn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpbsud.256"] + fn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpbsuds.128"] + fn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpbsuds.256"] + fn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpbuud.128"] + fn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpbuud.256"] + fn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpbuuds.128"] + fn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpbuuds.256"] + fn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpwsud.128"] + fn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpwsud.256"] + fn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpwsuds.128"] + fn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpwsuds.256"] + fn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpwusd.128"] + fn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpwusd.256"] + fn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpwusds.128"] + fn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpwusds.256"] + fn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpwuud.128"] + fn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpwuud.256"] + fn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; + + #[link_name = "llvm.x86.avx2.vpdpwuuds.128"] + fn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.vpdpwuuds.256"] + fn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; +} + +#[cfg(test)] +mod tests { + + use crate::core_arch::x86::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_dpwssd_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 16 | 1 << 0); + let b = _mm512_set1_epi32(1 << 16 | 1 << 0); + let r = _mm512_dpwssd_epi32(src, a, b); + let e = _mm512_set1_epi32(3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_mask_dpwssd_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 16 | 1 << 0); + let b = _mm512_set1_epi32(1 << 16 | 1 << 0); + let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b); + assert_eq_m512i(r, src); + let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_maskz_dpwssd_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 16 | 1 << 0); + let b = _mm512_set1_epi32(1 << 16 | 1 << 0); + let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b); + let e = _mm512_set1_epi32(3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avxvnni")] + unsafe fn test_mm256_dpwssd_avx_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwssd_avx_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_dpwssd_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwssd_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_mask_dpwssd_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b); + assert_eq_m256i(r, src); + let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_maskz_dpwssd_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnni")] + unsafe fn test_mm_dpwssd_avx_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwssd_avx_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_dpwssd_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwssd_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_mask_dpwssd_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b); + assert_eq_m128i(r, src); + let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_maskz_dpwssd_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_dpwssds_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 16 | 1 << 0); + let b = _mm512_set1_epi32(1 << 16 | 1 << 0); + let r = _mm512_dpwssds_epi32(src, a, b); + let e = _mm512_set1_epi32(3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_mask_dpwssds_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 16 | 1 << 0); + let b = _mm512_set1_epi32(1 << 16 | 1 << 0); + let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b); + assert_eq_m512i(r, src); + let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_maskz_dpwssds_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 16 | 1 << 0); + let b = _mm512_set1_epi32(1 << 16 | 1 << 0); + let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b); + let e = _mm512_set1_epi32(3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avxvnni")] + unsafe fn test_mm256_dpwssds_avx_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwssds_avx_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_dpwssds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwssds_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_mask_dpwssds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b); + assert_eq_m256i(r, src); + let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_maskz_dpwssds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnni")] + unsafe fn test_mm_dpwssds_avx_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwssds_avx_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_dpwssds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwssds_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_mask_dpwssds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b); + assert_eq_m128i(r, src); + let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_maskz_dpwssds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_dpbusd_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm512_dpbusd_epi32(src, a, b); + let e = _mm512_set1_epi32(5); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_mask_dpbusd_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b); + assert_eq_m512i(r, src); + let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(5); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_maskz_dpbusd_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b); + let e = _mm512_set1_epi32(5); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avxvnni")] + unsafe fn test_mm256_dpbusd_avx_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbusd_avx_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_dpbusd_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbusd_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_mask_dpbusd_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b); + assert_eq_m256i(r, src); + let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_maskz_dpbusd_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnni")] + unsafe fn test_mm_dpbusd_avx_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbusd_avx_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_dpbusd_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbusd_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_mask_dpbusd_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b); + assert_eq_m128i(r, src); + let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_maskz_dpbusd_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_dpbusds_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm512_dpbusds_epi32(src, a, b); + let e = _mm512_set1_epi32(5); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_mask_dpbusds_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b); + assert_eq_m512i(r, src); + let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b); + let e = _mm512_set1_epi32(5); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512vnni")] + unsafe fn test_mm512_maskz_dpbusds_epi32() { + let src = _mm512_set1_epi32(1); + let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b); + let e = _mm512_set1_epi32(5); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avxvnni")] + unsafe fn test_mm256_dpbusds_avx_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbusds_avx_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_dpbusds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbusds_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_mask_dpbusds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b); + assert_eq_m256i(r, src); + let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm256_maskz_dpbusds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnni")] + unsafe fn test_mm_dpbusds_avx_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbusds_avx_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_dpbusds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbusds_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_mask_dpbusds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b); + assert_eq_m128i(r, src); + let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512vnni,avx512vl")] + unsafe fn test_mm_maskz_dpbusds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm_dpbssd_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbssd_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm256_dpbssd_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbssd_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm_dpbssds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbssds_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm256_dpbssds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbssds_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm_dpbsud_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbsud_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm256_dpbsud_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbsud_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm_dpbsuds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbsuds_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm256_dpbsuds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbsuds_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm_dpbuud_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbuud_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm256_dpbuud_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbuud_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm_dpbuuds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm_dpbuuds_epi32(src, a, b); + let e = _mm_set1_epi32(5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint8")] + unsafe fn test_mm256_dpbuuds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); + let r = _mm256_dpbuuds_epi32(src, a, b); + let e = _mm256_set1_epi32(5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm_dpwsud_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwsud_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm256_dpwsud_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwsud_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm_dpwsuds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwsuds_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm256_dpwsuds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwsuds_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm_dpwusd_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwusd_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm256_dpwusd_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwusd_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm_dpwusds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwusds_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm256_dpwusds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwusds_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm_dpwuud_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwuud_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm256_dpwuud_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwuud_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm_dpwuuds_epi32() { + let src = _mm_set1_epi32(1); + let a = _mm_set1_epi32(1 << 16 | 1 << 0); + let b = _mm_set1_epi32(1 << 16 | 1 << 0); + let r = _mm_dpwuuds_epi32(src, a, b); + let e = _mm_set1_epi32(3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avxvnniint16")] + unsafe fn test_mm256_dpwuuds_epi32() { + let src = _mm256_set1_epi32(1); + let a = _mm256_set1_epi32(1 << 16 | 1 << 0); + let b = _mm256_set1_epi32(1 << 16 | 1 << 0); + let r = _mm256_dpwuuds_epi32(src, a, b); + let e = _mm256_set1_epi32(3); + assert_eq_m256i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs b/library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs new file mode 100644 index 000000000000..e47a14b24dfc --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs @@ -0,0 +1,573 @@ +//! Vectorized Population Count Instructions for Double- and Quadwords (VPOPCNTDQ) +//! +//! The intrinsics here correspond to those in the `immintrin.h` C header. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +use crate::core_arch::simd::*; +use crate::core_arch::x86::__m128i; +use crate::core_arch::x86::__m256i; +use crate::core_arch::x86::__m512i; +use crate::core_arch::x86::__mmask8; +use crate::core_arch::x86::__mmask16; +use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask}; +use crate::mem::transmute; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// For each packed 32-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi32) +#[inline] +#[target_feature(enable = "avx512vpopcntdq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntd))] +pub fn _mm512_popcnt_epi32(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctpop(a.as_i32x16())) } +} + +/// For each packed 32-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi32) +#[inline] +#[target_feature(enable = "avx512vpopcntdq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntd))] +pub fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x16()), + i32x16::ZERO, + )) + } +} + +/// For each packed 32-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi32) +#[inline] +#[target_feature(enable = "avx512vpopcntdq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntd))] +pub fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x16()), + src.as_i32x16(), + )) + } +} + +/// For each packed 32-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi32) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntd))] +pub fn _mm256_popcnt_epi32(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctpop(a.as_i32x8())) } +} + +/// For each packed 32-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi32) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntd))] +pub fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x8()), + i32x8::ZERO, + )) + } +} + +/// For each packed 32-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi32) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntd))] +pub fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x8()), + src.as_i32x8(), + )) + } +} + +/// For each packed 32-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi32) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntd))] +pub fn _mm_popcnt_epi32(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctpop(a.as_i32x4())) } +} + +/// For each packed 32-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi32) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntd))] +pub fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x4()), + i32x4::ZERO, + )) + } +} + +/// For each packed 32-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi32) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntd))] +pub fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x4()), + src.as_i32x4(), + )) + } +} + +/// For each packed 64-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi64) +#[inline] +#[target_feature(enable = "avx512vpopcntdq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntq))] +pub fn _mm512_popcnt_epi64(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctpop(a.as_i64x8())) } +} + +/// For each packed 64-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi64) +#[inline] +#[target_feature(enable = "avx512vpopcntdq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntq))] +pub fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x8()), + i64x8::ZERO, + )) + } +} + +/// For each packed 64-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi64) +#[inline] +#[target_feature(enable = "avx512vpopcntdq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntq))] +pub fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x8()), + src.as_i64x8(), + )) + } +} + +/// For each packed 64-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi64) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntq))] +pub fn _mm256_popcnt_epi64(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctpop(a.as_i64x4())) } +} + +/// For each packed 64-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi64) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntq))] +pub fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x4()), + i64x4::ZERO, + )) + } +} + +/// For each packed 64-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi64) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntq))] +pub fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x4()), + src.as_i64x4(), + )) + } +} + +/// For each packed 64-bit integer maps the value to the number of logical 1 bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi64) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntq))] +pub fn _mm_popcnt_epi64(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctpop(a.as_i64x2())) } +} + +/// For each packed 64-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi64) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntq))] +pub fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x2()), + i64x2::ZERO, + )) + } +} + +/// For each packed 64-bit integer maps the value to the number of logical 1 bits. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi64) +#[inline] +#[target_feature(enable = "avx512vpopcntdq,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpopcntq))] +pub fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x2()), + src.as_i64x2(), + )) + } +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_popcnt_epi32() { + let test_data = _mm512_set_epi32( + 0, + 1, + -1, + 2, + 7, + 0xFF_FE, + 0x7F_FF_FF_FF, + -100, + 0x40_00_00_00, + 103, + 371, + 552, + 432_948, + 818_826_998, + 255, + 256, + ); + let actual_result = _mm512_popcnt_epi32(test_data); + let reference_result = + _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_mask_popcnt_epi32() { + let test_data = _mm512_set_epi32( + 0, + 1, + -1, + 2, + 7, + 0xFF_FE, + 0x7F_FF_FF_FF, + -100, + 0x40_00_00_00, + 103, + 371, + 552, + 432_948, + 818_826_998, + 255, + 256, + ); + let mask = 0xFF_00; + let actual_result = _mm512_mask_popcnt_epi32(test_data, mask, test_data); + let reference_result = _mm512_set_epi32( + 0, + 1, + 32, + 1, + 3, + 15, + 31, + 28, + 0x40_00_00_00, + 103, + 371, + 552, + 432_948, + 818_826_998, + 255, + 256, + ); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_maskz_popcnt_epi32() { + let test_data = _mm512_set_epi32( + 0, + 1, + -1, + 2, + 7, + 0xFF_FE, + 0x7F_FF_FF_FF, + -100, + 0x40_00_00_00, + 103, + 371, + 552, + 432_948, + 818_826_998, + 255, + 256, + ); + let mask = 0xFF_00; + let actual_result = _mm512_maskz_popcnt_epi32(mask, test_data); + let reference_result = _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm256_popcnt_epi32() { + let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); + let actual_result = _mm256_popcnt_epi32(test_data); + let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm256_mask_popcnt_epi32() { + let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); + let mask = 0xF0; + let actual_result = _mm256_mask_popcnt_epi32(test_data, mask, test_data); + let reference_result = _mm256_set_epi32(0, 1, 32, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm256_maskz_popcnt_epi32() { + let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); + let mask = 0xF0; + let actual_result = _mm256_maskz_popcnt_epi32(mask, test_data); + let reference_result = _mm256_set_epi32(0, 1, 32, 1, 0, 0, 0, 0); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm_popcnt_epi32() { + let test_data = _mm_set_epi32(0, 1, -1, -100); + let actual_result = _mm_popcnt_epi32(test_data); + let reference_result = _mm_set_epi32(0, 1, 32, 28); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm_mask_popcnt_epi32() { + let test_data = _mm_set_epi32(0, 1, -1, -100); + let mask = 0xE; + let actual_result = _mm_mask_popcnt_epi32(test_data, mask, test_data); + let reference_result = _mm_set_epi32(0, 1, 32, -100); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm_maskz_popcnt_epi32() { + let test_data = _mm_set_epi32(0, 1, -1, -100); + let mask = 0xE; + let actual_result = _mm_maskz_popcnt_epi32(mask, test_data); + let reference_result = _mm_set_epi32(0, 1, 32, 0); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_popcnt_epi64() { + let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); + let actual_result = _mm512_popcnt_epi64(test_data); + let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_mask_popcnt_epi64() { + let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); + let mask = 0xF0; + let actual_result = _mm512_mask_popcnt_epi64(test_data, mask, test_data); + let reference_result = + _mm512_set_epi64(0, 1, 64, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_maskz_popcnt_epi64() { + let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); + let mask = 0xF0; + let actual_result = _mm512_maskz_popcnt_epi64(mask, test_data); + let reference_result = _mm512_set_epi64(0, 1, 64, 1, 0, 0, 0, 0); + assert_eq_m512i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm256_popcnt_epi64() { + let test_data = _mm256_set_epi64x(0, 1, -1, -100); + let actual_result = _mm256_popcnt_epi64(test_data); + let reference_result = _mm256_set_epi64x(0, 1, 64, 60); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm256_mask_popcnt_epi64() { + let test_data = _mm256_set_epi64x(0, 1, -1, -100); + let mask = 0xE; + let actual_result = _mm256_mask_popcnt_epi64(test_data, mask, test_data); + let reference_result = _mm256_set_epi64x(0, 1, 64, -100); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm256_maskz_popcnt_epi64() { + let test_data = _mm256_set_epi64x(0, 1, -1, -100); + let mask = 0xE; + let actual_result = _mm256_maskz_popcnt_epi64(mask, test_data); + let reference_result = _mm256_set_epi64x(0, 1, 64, 0); + assert_eq_m256i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm_popcnt_epi64() { + let test_data = _mm_set_epi64x(0, 1); + let actual_result = _mm_popcnt_epi64(test_data); + let reference_result = _mm_set_epi64x(0, 1); + assert_eq_m128i(actual_result, reference_result); + let test_data = _mm_set_epi64x(-1, -100); + let actual_result = _mm_popcnt_epi64(test_data); + let reference_result = _mm_set_epi64x(64, 60); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm_mask_popcnt_epi64() { + let test_data = _mm_set_epi64x(0, -100); + let mask = 0x2; + let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data); + let reference_result = _mm_set_epi64x(0, -100); + assert_eq_m128i(actual_result, reference_result); + let test_data = _mm_set_epi64x(-1, 1); + let mask = 0x2; + let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data); + let reference_result = _mm_set_epi64x(64, 1); + assert_eq_m128i(actual_result, reference_result); + } + + #[simd_test(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm_maskz_popcnt_epi64() { + let test_data = _mm_set_epi64x(0, 1); + let mask = 0x2; + let actual_result = _mm_maskz_popcnt_epi64(mask, test_data); + let reference_result = _mm_set_epi64x(0, 0); + assert_eq_m128i(actual_result, reference_result); + let test_data = _mm_set_epi64x(-1, -100); + let mask = 0x2; + let actual_result = _mm_maskz_popcnt_epi64(mask, test_data); + let reference_result = _mm_set_epi64x(64, 0); + assert_eq_m128i(actual_result, reference_result); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/avxneconvert.rs b/library/stdarch/crates/core_arch/src/x86/avxneconvert.rs new file mode 100644 index 000000000000..b92ec823ec64 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/avxneconvert.rs @@ -0,0 +1,371 @@ +use crate::arch::asm; +use crate::core_arch::x86::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location +/// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit) +/// floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnebf16_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vbcstnebf162ps))] +#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] +pub unsafe fn _mm_bcstnebf16_ps(a: *const bf16) -> __m128 { + bcstnebf162ps_128(a) +} + +/// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location +/// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit) floating-point +/// elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnebf16_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vbcstnebf162ps))] +#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] +pub unsafe fn _mm256_bcstnebf16_ps(a: *const bf16) -> __m256 { + bcstnebf162ps_256(a) +} + +/// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting +/// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision +/// (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnesh_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vbcstnesh2ps))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_bcstnesh_ps(a: *const f16) -> __m128 { + bcstnesh2ps_128(a) +} + +/// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting +/// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision +/// (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnesh_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vbcstnesh2ps))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm256_bcstnesh_ps(a: *const f16) -> __m256 { + bcstnesh2ps_256(a) +} + +/// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at +/// location a to single precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneebf16_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneebf162ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_cvtneebf16_ps(a: *const __m128bh) -> __m128 { + transmute(cvtneebf162ps_128(a)) +} + +/// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at +/// location a to single precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneebf16_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneebf162ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_cvtneebf16_ps(a: *const __m256bh) -> __m256 { + transmute(cvtneebf162ps_256(a)) +} + +/// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at +/// location a to single precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneeph_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneeph2ps))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_cvtneeph_ps(a: *const __m128h) -> __m128 { + transmute(cvtneeph2ps_128(a)) +} + +/// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at +/// location a to single precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneeph_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneeph2ps))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm256_cvtneeph_ps(a: *const __m256h) -> __m256 { + transmute(cvtneeph2ps_256(a)) +} + +/// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at +/// location a to single precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneobf16_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneobf162ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm_cvtneobf16_ps(a: *const __m128bh) -> __m128 { + transmute(cvtneobf162ps_128(a)) +} + +/// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at +/// location a to single precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneobf16_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneobf162ps))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub unsafe fn _mm256_cvtneobf16_ps(a: *const __m256bh) -> __m256 { + transmute(cvtneobf162ps_256(a)) +} + +/// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at +/// location a to single precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneoph_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneoph2ps))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm_cvtneoph_ps(a: *const __m128h) -> __m128 { + transmute(cvtneoph2ps_128(a)) +} + +/// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at +/// location a to single precision (32-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneoph_ps) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneoph2ps))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 { + transmute(cvtneoph2ps_256(a)) +} + +/// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point +/// elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneps_avx_pbh) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneps2bf16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh { + unsafe { + let mut dst: __m128bh; + asm!( + "{{vex}}vcvtneps2bf16 {dst},{src}", + dst = lateout(xmm_reg) dst, + src = in(xmm_reg) a, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } +} + +/// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point +/// elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneps_avx_pbh) +#[inline] +#[target_feature(enable = "avxneconvert")] +#[cfg_attr(test, assert_instr(vcvtneps2bf16))] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh { + unsafe { + let mut dst: __m128bh; + asm!( + "{{vex}}vcvtneps2bf16 {dst},{src}", + dst = lateout(xmm_reg) dst, + src = in(ymm_reg) a, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.vbcstnebf162ps128"] + fn bcstnebf162ps_128(a: *const bf16) -> __m128; + #[link_name = "llvm.x86.vbcstnebf162ps256"] + fn bcstnebf162ps_256(a: *const bf16) -> __m256; + #[link_name = "llvm.x86.vbcstnesh2ps128"] + fn bcstnesh2ps_128(a: *const f16) -> __m128; + #[link_name = "llvm.x86.vbcstnesh2ps256"] + fn bcstnesh2ps_256(a: *const f16) -> __m256; + + #[link_name = "llvm.x86.vcvtneebf162ps128"] + fn cvtneebf162ps_128(a: *const __m128bh) -> __m128; + #[link_name = "llvm.x86.vcvtneebf162ps256"] + fn cvtneebf162ps_256(a: *const __m256bh) -> __m256; + #[link_name = "llvm.x86.vcvtneeph2ps128"] + fn cvtneeph2ps_128(a: *const __m128h) -> __m128; + #[link_name = "llvm.x86.vcvtneeph2ps256"] + fn cvtneeph2ps_256(a: *const __m256h) -> __m256; + + #[link_name = "llvm.x86.vcvtneobf162ps128"] + fn cvtneobf162ps_128(a: *const __m128bh) -> __m128; + #[link_name = "llvm.x86.vcvtneobf162ps256"] + fn cvtneobf162ps_256(a: *const __m256bh) -> __m256; + #[link_name = "llvm.x86.vcvtneoph2ps128"] + fn cvtneoph2ps_128(a: *const __m128h) -> __m128; + #[link_name = "llvm.x86.vcvtneoph2ps256"] + fn cvtneoph2ps_256(a: *const __m256h) -> __m256; +} + +#[cfg(test)] +mod tests { + use crate::core_arch::simd::{u16x4, u16x8}; + use crate::core_arch::x86::*; + use crate::mem::transmute_copy; + use std::ptr::addr_of; + use stdarch_test::simd_test; + + const BF16_ONE: u16 = 0b0_01111111_0000000; + const BF16_TWO: u16 = 0b0_10000000_0000000; + const BF16_THREE: u16 = 0b0_10000000_1000000; + const BF16_FOUR: u16 = 0b0_10000001_0000000; + const BF16_FIVE: u16 = 0b0_10000001_0100000; + const BF16_SIX: u16 = 0b0_10000001_1000000; + const BF16_SEVEN: u16 = 0b0_10000001_1100000; + const BF16_EIGHT: u16 = 0b0_10000010_0000000; + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm_bcstnebf16_ps() { + let a = bf16::from_bits(BF16_ONE); + let r = _mm_bcstnebf16_ps(addr_of!(a)); + let e = _mm_set_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm256_bcstnebf16_ps() { + let a = bf16::from_bits(BF16_ONE); + let r = _mm256_bcstnebf16_ps(addr_of!(a)); + let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm_bcstnesh_ps() { + let a = 1.0_f16; + let r = _mm_bcstnesh_ps(addr_of!(a)); + let e = _mm_set_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm256_bcstnesh_ps() { + let a = 1.0_f16; + let r = _mm256_bcstnesh_ps(addr_of!(a)); + let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm_cvtneebf16_ps() { + let a = __m128bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let r = _mm_cvtneebf16_ps(addr_of!(a)); + let e = _mm_setr_ps(1., 3., 5., 7.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm256_cvtneebf16_ps() { + let a = __m256bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let r = _mm256_cvtneebf16_ps(addr_of!(a)); + let e = _mm256_setr_ps(1., 3., 5., 7., 1., 3., 5., 7.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm_cvtneeph_ps() { + let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + let r = _mm_cvtneeph_ps(addr_of!(a)); + let e = _mm_setr_ps(1., 3., 5., 7.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm256_cvtneeph_ps() { + let a = __m256h([ + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ]); + let r = _mm256_cvtneeph_ps(addr_of!(a)); + let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm_cvtneobf16_ps() { + let a = __m128bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let r = _mm_cvtneobf16_ps(addr_of!(a)); + let e = _mm_setr_ps(2., 4., 6., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm256_cvtneobf16_ps() { + let a = __m256bh([ + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ]); + let r = _mm256_cvtneobf16_ps(addr_of!(a)); + let e = _mm256_setr_ps(2., 4., 6., 8., 2., 4., 6., 8.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm_cvtneoph_ps() { + let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + let r = _mm_cvtneoph_ps(addr_of!(a)); + let e = _mm_setr_ps(2., 4., 6., 8.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm256_cvtneoph_ps() { + let a = __m256h([ + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ]); + let r = _mm256_cvtneoph_ps(addr_of!(a)); + let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm_cvtneps_avx_pbh() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let r: u16x4 = transmute_copy(&_mm_cvtneps_avx_pbh(a)); + let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR); + assert_eq!(r, e); + } + + #[simd_test(enable = "avxneconvert")] + unsafe fn test_mm256_cvtneps_avx_pbh() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r: u16x8 = transmute(_mm256_cvtneps_avx_pbh(a)); + let e = u16x8::new( + BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, + ); + assert_eq!(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/bmi1.rs b/library/stdarch/crates/core_arch/src/x86/bmi1.rs new file mode 100644 index 000000000000..eb7242944abc --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/bmi1.rs @@ -0,0 +1,198 @@ +//! Bit Manipulation Instruction (BMI) Set 1.0. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions +//! available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Extracts bits in range [`start`, `start` + `length`) from `a` into +/// the least significant bits of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u32) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(bextr))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { + _bextr2_u32(a, (start & 0xff_u32) | ((len & 0xff_u32) << 8_u32)) +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits `[7,0]` of `control` specify the index to the first bit in the range +/// to be extracted, and bits `[15,8]` specify the length of the range. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr2_u32) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(bextr))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _bextr2_u32(a: u32, control: u32) -> u32 { + unsafe { x86_bmi_bextr_32(a, control) } +} + +/// Bitwise logical `AND` of inverted `a` with `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_andn_u32) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(andn))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _andn_u32(a: u32, b: u32) -> u32 { + !a & b +} + +/// Extracts lowest set isolated bit. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsi_u32) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(blsi))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _blsi_u32(x: u32) -> u32 { + x & x.wrapping_neg() +} + +/// Gets mask up to lowest set bit. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsmsk_u32) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(blsmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _blsmsk_u32(x: u32) -> u32 { + x ^ (x.wrapping_sub(1_u32)) +} + +/// Resets the lowest set bit of `x`. +/// +/// If `x` is sets CF. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsr_u32) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(blsr))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _blsr_u32(x: u32) -> u32 { + x & (x.wrapping_sub(1)) +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is `0`, it returns its size in bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tzcnt_u16) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub fn _tzcnt_u16(x: u16) -> u16 { + x.trailing_zeros() as u16 +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is `0`, it returns its size in bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tzcnt_u32) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _tzcnt_u32(x: u32) -> u32 { + x.trailing_zeros() +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is `0`, it returns its size in bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_tzcnt_32) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_tzcnt_32(x: u32) -> i32 { + x.trailing_zeros() as i32 +} + +unsafe extern "C" { + #[link_name = "llvm.x86.bmi.bextr.32"] + fn x86_bmi_bextr_32(x: u32, y: u32) -> u32; +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "bmi1")] + unsafe fn test_bextr_u32() { + let r = _bextr_u32(0b0101_0000u32, 4, 4); + assert_eq!(r, 0b0000_0101u32); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_andn_u32() { + assert_eq!(_andn_u32(0, 0), 0); + assert_eq!(_andn_u32(0, 1), 1); + assert_eq!(_andn_u32(1, 0), 0); + assert_eq!(_andn_u32(1, 1), 0); + + let r = _andn_u32(0b0000_0000u32, 0b0000_0000u32); + assert_eq!(r, 0b0000_0000u32); + + let r = _andn_u32(0b0000_0000u32, 0b1111_1111u32); + assert_eq!(r, 0b1111_1111u32); + + let r = _andn_u32(0b1111_1111u32, 0b0000_0000u32); + assert_eq!(r, 0b0000_0000u32); + + let r = _andn_u32(0b1111_1111u32, 0b1111_1111u32); + assert_eq!(r, 0b0000_0000u32); + + let r = _andn_u32(0b0100_0000u32, 0b0101_1101u32); + assert_eq!(r, 0b0001_1101u32); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_blsi_u32() { + assert_eq!(_blsi_u32(0b1101_0000u32), 0b0001_0000u32); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_blsmsk_u32() { + let r = _blsmsk_u32(0b0011_0000u32); + assert_eq!(r, 0b0001_1111u32); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_blsr_u32() { + // TODO: test the behavior when the input is `0`. + let r = _blsr_u32(0b0011_0000u32); + assert_eq!(r, 0b0010_0000u32); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_tzcnt_u16() { + assert_eq!(_tzcnt_u16(0b0000_0001u16), 0u16); + assert_eq!(_tzcnt_u16(0b0000_0000u16), 16u16); + assert_eq!(_tzcnt_u16(0b1001_0000u16), 4u16); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_tzcnt_u32() { + assert_eq!(_tzcnt_u32(0b0000_0001u32), 0u32); + assert_eq!(_tzcnt_u32(0b0000_0000u32), 32u32); + assert_eq!(_tzcnt_u32(0b1001_0000u32), 4u32); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/bmi2.rs b/library/stdarch/crates/core_arch/src/x86/bmi2.rs new file mode 100644 index 000000000000..83cf650923f7 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/bmi2.rs @@ -0,0 +1,133 @@ +//! Bit Manipulation Instruction (BMI) Set 2.0. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions +//! available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [wikipedia_bmi]: +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Unsigned multiply without affecting flags. +/// +/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with +/// the low half and the high half of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mulx_u32) +#[inline] +// LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232 +#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(imul))] +#[cfg_attr(all(test, target_arch = "x86"), assert_instr(mul))] +#[target_feature(enable = "bmi2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 { + let result: u64 = (a as u64) * (b as u64); + *hi = (result >> 32) as u32; + result as u32 +} + +/// Zeroes higher bits of `a` >= `index`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32) +#[inline] +#[target_feature(enable = "bmi2")] +#[cfg_attr(test, assert_instr(bzhi))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _bzhi_u32(a: u32, index: u32) -> u32 { + unsafe { x86_bmi2_bzhi_32(a, index) } +} + +/// Scatter contiguous low order bits of `a` to the result at the positions +/// specified by the `mask`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32) +#[inline] +#[target_feature(enable = "bmi2")] +#[cfg_attr(test, assert_instr(pdep))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _pdep_u32(a: u32, mask: u32) -> u32 { + unsafe { x86_bmi2_pdep_32(a, mask) } +} + +/// Gathers the bits of `x` specified by the `mask` into the contiguous low +/// order bit positions of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32) +#[inline] +#[target_feature(enable = "bmi2")] +#[cfg_attr(test, assert_instr(pext))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _pext_u32(a: u32, mask: u32) -> u32 { + unsafe { x86_bmi2_pext_32(a, mask) } +} + +unsafe extern "C" { + #[link_name = "llvm.x86.bmi.bzhi.32"] + fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32; + #[link_name = "llvm.x86.bmi.pdep.32"] + fn x86_bmi2_pdep_32(x: u32, y: u32) -> u32; + #[link_name = "llvm.x86.bmi.pext.32"] + fn x86_bmi2_pext_32(x: u32, y: u32) -> u32; +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "bmi2")] + unsafe fn test_pext_u32() { + let n = 0b1011_1110_1001_0011u32; + + let m0 = 0b0110_0011_1000_0101u32; + let s0 = 0b0000_0000_0011_0101u32; + + let m1 = 0b1110_1011_1110_1111u32; + let s1 = 0b0001_0111_0100_0011u32; + + assert_eq!(_pext_u32(n, m0), s0); + assert_eq!(_pext_u32(n, m1), s1); + } + + #[simd_test(enable = "bmi2")] + unsafe fn test_pdep_u32() { + let n = 0b1011_1110_1001_0011u32; + + let m0 = 0b0110_0011_1000_0101u32; + let s0 = 0b0000_0010_0000_0101u32; + + let m1 = 0b1110_1011_1110_1111u32; + let s1 = 0b1110_1001_0010_0011u32; + + assert_eq!(_pdep_u32(n, m0), s0); + assert_eq!(_pdep_u32(n, m1), s1); + } + + #[simd_test(enable = "bmi2")] + unsafe fn test_bzhi_u32() { + let n = 0b1111_0010u32; + let s = 0b0001_0010u32; + assert_eq!(_bzhi_u32(n, 5), s); + } + + #[simd_test(enable = "bmi2")] + unsafe fn test_mulx_u32() { + let a: u32 = 4_294_967_200; + let b: u32 = 2; + let mut hi = 0; + let lo = _mulx_u32(a, b, &mut hi); + /* + result = 8589934400 + = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64 + ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + assert_eq!(lo, 0b1111_1111_1111_1111_1111_1111_0100_0000u32); + assert_eq!(hi, 0b0001u32); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/bswap.rs b/library/stdarch/crates/core_arch/src/x86/bswap.rs new file mode 100644 index 000000000000..0db9acbd0ddf --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/bswap.rs @@ -0,0 +1,28 @@ +//! Byte swap intrinsics. +#![allow(clippy::module_name_repetitions)] + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Returns an integer with the reversed byte order of x +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bswap) +#[inline] +#[cfg_attr(test, assert_instr(bswap))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _bswap(x: i32) -> i32 { + x.swap_bytes() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bswap() { + unsafe { + assert_eq!(_bswap(0x0EADBE0F), 0x0FBEAD0E); + assert_eq!(_bswap(0x00000000), 0x00000000); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/bt.rs b/library/stdarch/crates/core_arch/src/x86/bt.rs new file mode 100644 index 000000000000..06cc2833f4e6 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/bt.rs @@ -0,0 +1,147 @@ +use crate::arch::asm; +#[cfg(test)] +use stdarch_test::assert_instr; + +// x32 wants to use a 32-bit address size, but asm! defaults to using the full +// register name (e.g. rax). We have to explicitly override the placeholder to +// use the 32-bit register name in that case. +#[cfg(target_pointer_width = "32")] +macro_rules! bt { + ($inst:expr) => { + concat!($inst, " {b:e}, ({p:e})") + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! bt { + ($inst:expr) => { + concat!($inst, " {b:e}, ({p})") + }; +} + +/// Returns the bit in position `b` of the memory addressed by `p`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bittest) +#[inline] +#[cfg_attr(test, assert_instr(bt))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittest(p: *const i32, b: i32) -> u8 { + let r: u8; + asm!( + bt!("btl"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(readonly, nostack, pure, att_syntax) + ); + r +} + +/// Returns the bit in position `b` of the memory addressed by `p`, then sets the bit to `1`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bittestandset) +#[inline] +#[cfg_attr(test, assert_instr(bts))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittestandset(p: *mut i32, b: i32) -> u8 { + let r: u8; + asm!( + bt!("btsl"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(nostack, att_syntax) + ); + r +} + +/// Returns the bit in position `b` of the memory addressed by `p`, then resets that bit to `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bittestandreset) +#[inline] +#[cfg_attr(test, assert_instr(btr))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittestandreset(p: *mut i32, b: i32) -> u8 { + let r: u8; + asm!( + bt!("btrl"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(nostack, att_syntax) + ); + r +} + +/// Returns the bit in position `b` of the memory addressed by `p`, then inverts that bit. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bittestandcomplement) +#[inline] +#[cfg_attr(test, assert_instr(btc))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittestandcomplement(p: *mut i32, b: i32) -> u8 { + let r: u8; + asm!( + bt!("btcl"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(nostack, att_syntax) + ); + r +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + + #[test] + #[cfg_attr(miri, ignore)] // Uses inline assembly + fn test_bittest() { + unsafe { + let a = 0b0101_0000i32; + assert_eq!(_bittest(&a as _, 4), 1); + assert_eq!(_bittest(&a as _, 5), 0); + } + } + + #[test] + #[cfg_attr(miri, ignore)] // Uses inline assembly + fn test_bittestandset() { + unsafe { + let mut a = 0b0101_0000i32; + assert_eq!(_bittestandset(&mut a as _, 4), 1); + assert_eq!(_bittestandset(&mut a as _, 4), 1); + assert_eq!(_bittestandset(&mut a as _, 5), 0); + assert_eq!(_bittestandset(&mut a as _, 5), 1); + } + } + + #[test] + #[cfg_attr(miri, ignore)] // Uses inline assembly + fn test_bittestandreset() { + unsafe { + let mut a = 0b0101_0000i32; + assert_eq!(_bittestandreset(&mut a as _, 4), 1); + assert_eq!(_bittestandreset(&mut a as _, 4), 0); + assert_eq!(_bittestandreset(&mut a as _, 5), 0); + assert_eq!(_bittestandreset(&mut a as _, 5), 0); + } + } + + #[test] + #[cfg_attr(miri, ignore)] // Uses inline assembly + fn test_bittestandcomplement() { + unsafe { + let mut a = 0b0101_0000i32; + assert_eq!(_bittestandcomplement(&mut a as _, 4), 1); + assert_eq!(_bittestandcomplement(&mut a as _, 4), 0); + assert_eq!(_bittestandcomplement(&mut a as _, 4), 1); + assert_eq!(_bittestandcomplement(&mut a as _, 5), 0); + assert_eq!(_bittestandcomplement(&mut a as _, 5), 1); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs new file mode 100644 index 000000000000..0634f10a99fd --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs @@ -0,0 +1,112 @@ +//! `cpuid` intrinsics +#![allow(clippy::module_name_repetitions)] + +use crate::arch::asm; +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Result of the `cpuid` instruction. +#[allow(clippy::missing_inline_in_public_items)] +// ^^ the derived impl of Debug for CpuidResult is not #[inline] and that's OK. +#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub struct CpuidResult { + /// EAX register. + #[stable(feature = "simd_x86", since = "1.27.0")] + pub eax: u32, + /// EBX register. + #[stable(feature = "simd_x86", since = "1.27.0")] + pub ebx: u32, + /// ECX register. + #[stable(feature = "simd_x86", since = "1.27.0")] + pub ecx: u32, + /// EDX register. + #[stable(feature = "simd_x86", since = "1.27.0")] + pub edx: u32, +} + +/// Returns the result of the `cpuid` instruction for a given `leaf` (`EAX`) +/// and `sub_leaf` (`ECX`). +/// +/// The highest-supported leaf value is returned by the first tuple argument of +/// [`__get_cpuid_max(0)`](fn.__get_cpuid_max.html). For leaves containing +/// sub-leaves, the second tuple argument returns the highest-supported +/// sub-leaf value. +/// +/// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which +/// information using the `EAX` and `ECX` registers, and the interpretation of +/// the results returned in `EAX`, `EBX`, `ECX`, and `EDX`. +/// +/// The references are: +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +/// Instruction Set Reference, A-Z][intel64_ref]. +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +/// System Instructions][amd64_ref]. +/// +/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID +/// [intel64_ref]: https://cdrdv2-public.intel.com/671110/325383-sdm-vol-2abcd.pdf +/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +#[inline] +#[cfg_attr(test, assert_instr(cpuid))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult { + let eax; + let ebx; + let ecx; + let edx; + + // LLVM sometimes reserves `ebx` for its internal use, we so we need to use + // a scratch register for it instead. + #[cfg(target_arch = "x86")] + { + asm!( + "mov {0}, ebx", + "cpuid", + "xchg {0}, ebx", + out(reg) ebx, + inout("eax") leaf => eax, + inout("ecx") sub_leaf => ecx, + out("edx") edx, + options(nostack, preserves_flags), + ); + } + #[cfg(target_arch = "x86_64")] + { + asm!( + "mov {0:r}, rbx", + "cpuid", + "xchg {0:r}, rbx", + out(reg) ebx, + inout("eax") leaf => eax, + inout("ecx") sub_leaf => ecx, + out("edx") edx, + options(nostack, preserves_flags), + ); + } + CpuidResult { eax, ebx, ecx, edx } +} + +/// See [`__cpuid_count`](fn.__cpuid_count.html). +#[inline] +#[cfg_attr(test, assert_instr(cpuid))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn __cpuid(leaf: u32) -> CpuidResult { + __cpuid_count(leaf, 0) +} + +/// Returns the highest-supported `leaf` (`EAX`) and sub-leaf (`ECX`) `cpuid` +/// values. +/// +/// If `cpuid` is supported, and `leaf` is zero, then the first tuple argument +/// contains the highest `leaf` value that `cpuid` supports. For `leaf`s +/// containing sub-leafs, the second tuple argument contains the +/// highest-supported sub-leaf value. +/// +/// See also [`__cpuid`](fn.__cpuid.html) and +/// [`__cpuid_count`](fn.__cpuid_count.html). +#[inline] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) { + let CpuidResult { eax, ebx, .. } = __cpuid(leaf); + (eax, ebx) +} diff --git a/library/stdarch/crates/core_arch/src/x86/eflags.rs b/library/stdarch/crates/core_arch/src/x86/eflags.rs new file mode 100644 index 000000000000..5ae656db3876 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/eflags.rs @@ -0,0 +1,86 @@ +//! `i386` intrinsics + +use crate::arch::asm; + +/// Reads EFLAGS. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__readeflags) +#[cfg(target_arch = "x86")] +#[inline(always)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.29.0", + note = "See issue #51810 - use inline assembly instead" +)] +#[doc(hidden)] +pub unsafe fn __readeflags() -> u32 { + let eflags: u32; + asm!("pushfd", "pop {}", out(reg) eflags, options(nomem, att_syntax)); + eflags +} + +/// Reads EFLAGS. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__readeflags) +#[cfg(target_arch = "x86_64")] +#[inline(always)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.29.0", + note = "See issue #51810 - use inline assembly instead" +)] +#[doc(hidden)] +pub unsafe fn __readeflags() -> u64 { + let eflags: u64; + asm!("pushfq", "pop {}", out(reg) eflags, options(nomem, att_syntax)); + eflags +} + +/// Write EFLAGS. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__writeeflags) +#[cfg(target_arch = "x86")] +#[inline(always)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.29.0", + note = "See issue #51810 - use inline assembly instead" +)] +#[doc(hidden)] +pub unsafe fn __writeeflags(eflags: u32) { + asm!("push {}", "popfd", in(reg) eflags, options(nomem, att_syntax)); +} + +/// Write EFLAGS. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__writeeflags) +#[cfg(target_arch = "x86_64")] +#[inline(always)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.29.0", + note = "See issue #51810 - use inline assembly instead" +)] +#[doc(hidden)] +pub unsafe fn __writeeflags(eflags: u64) { + asm!("push {}", "popfq", in(reg) eflags, options(nomem, att_syntax)); +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + + #[test] + #[cfg_attr(miri, ignore)] // Uses inline assembly + #[allow(deprecated)] + fn test_readeflags() { + unsafe { + // reads eflags, writes them back, reads them again, + // and compare for equality: + let v = __readeflags(); + __writeeflags(v); + let u = __readeflags(); + assert_eq!(v, u); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/f16c.rs b/library/stdarch/crates/core_arch/src/x86/f16c.rs new file mode 100644 index 000000000000..7686b317d4d4 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/f16c.rs @@ -0,0 +1,149 @@ +//! [F16C intrinsics]. +//! +//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769 + +use crate::core_arch::{simd::*, x86::*}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.x86.vcvtph2ps.128"] + fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4; + #[link_name = "llvm.x86.vcvtph2ps.256"] + fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8; + #[link_name = "llvm.x86.vcvtps2ph.128"] + fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8; + #[link_name = "llvm.x86.vcvtps2ph.256"] + fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8; +} + +/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of +/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide +/// vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps) +#[inline] +#[target_feature(enable = "f16c")] +#[cfg_attr(test, assert_instr("vcvtph2ps"))] +#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] +pub fn _mm_cvtph_ps(a: __m128i) -> __m128 { + unsafe { transmute(llvm_vcvtph2ps_128(transmute(a))) } +} + +/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector +/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps) +#[inline] +#[target_feature(enable = "f16c")] +#[cfg_attr(test, assert_instr("vcvtph2ps"))] +#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] +pub fn _mm256_cvtph_ps(a: __m128i) -> __m256 { + unsafe { transmute(llvm_vcvtph2ps_256(transmute(a))) } +} + +/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x +/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit +/// vector. +/// +/// Rounding is done according to the `imm_rounding` parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph) +#[inline] +#[target_feature(enable = "f16c")] +#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] +pub fn _mm_cvtps_ph(a: __m128) -> __m128i { + static_assert_uimm_bits!(IMM_ROUNDING, 3); + unsafe { + let a = a.as_f32x4(); + let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING); + transmute(r) + } +} + +/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x +/// 16-bit half-precision float values stored in a 128-bit wide vector. +/// +/// Rounding is done according to the `imm_rounding` parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph) +#[inline] +#[target_feature(enable = "f16c")] +#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] +pub fn _mm256_cvtps_ph(a: __m256) -> __m128i { + static_assert_uimm_bits!(IMM_ROUNDING, 3); + unsafe { + let a = a.as_f32x8(); + let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING); + transmute(r) + } +} + +#[cfg(test)] +mod tests { + use crate::{core_arch::x86::*, mem::transmute}; + use stdarch_test::simd_test; + + const F16_ONE: i16 = 0x3c00; + const F16_TWO: i16 = 0x4000; + const F16_THREE: i16 = 0x4200; + const F16_FOUR: i16 = 0x4400; + const F16_FIVE: i16 = 0x4500; + const F16_SIX: i16 = 0x4600; + const F16_SEVEN: i16 = 0x4700; + const F16_EIGHT: i16 = 0x4800; + + #[simd_test(enable = "f16c")] + unsafe fn test_mm_cvtph_ps() { + let a = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR); + let r = _mm_cvtph_ps(a); + let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "f16c")] + unsafe fn test_mm256_cvtph_ps() { + let a = _mm_set_epi16( + F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT, + ); + let r = _mm256_cvtph_ps(a); + let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "f16c")] + unsafe fn test_mm_cvtps_ph() { + let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "f16c")] + unsafe fn test_mm256_cvtps_ph() { + let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm256_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm_set_epi16( + F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT, + ); + assert_eq_m128i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/fma.rs b/library/stdarch/crates/core_arch/src/x86/fma.rs new file mode 100644 index 000000000000..d3988422b9a4 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/fma.rs @@ -0,0 +1,816 @@ +//! Fused Multiply-Add instruction set (FMA) +//! +//! The FMA instruction set is an extension to the 128 and 256-bit SSE +//! instructions in the x86 microprocessor instruction set to perform fused +//! multiply–add (FMA) operations. +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +//! Instruction Set Reference, A-Z][intel64_ref]. +//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +//! System Instructions][amd64_ref]. +//! +//! Wikipedia's [FMA][wiki_fma] page provides a quick overview of the +//! instructions available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate + +use crate::core_arch::x86::*; +use crate::intrinsics::simd::{simd_fma, simd_neg}; +use crate::intrinsics::{fmaf32, fmaf64}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and add the intermediate result to packed elements in `c`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(a, b, c) } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and add the intermediate result to packed elements in `c`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(a, b, c) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and add the intermediate result to packed elements in `c`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(a, b, c) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and add the intermediate result to packed elements in `c`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(a, b, c) } +} + +/// Multiplies the lower double-precision (64-bit) floating-point elements in +/// `a` and `b`, and add the intermediate result to the lower element in `c`. +/// Stores the result in the lower element of the returned value, and copy the +/// upper element from `a` to the upper elements of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_sd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) + ) + } +} + +/// Multiplies the lower single-precision (32-bit) floating-point elements in +/// `a` and `b`, and add the intermediate result to the lower element in `c`. +/// Stores the result in the lower element of the returned value, and copy the +/// 3 upper elements from `a` to the upper elements of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ss) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c)) + ) + } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and alternatively add and subtract packed elements in `c` to/from +/// the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [2, 1]) + } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and alternatively add and subtract packed elements in `c` to/from +/// the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and alternatively add and subtract packed elements in `c` to/from +/// the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and alternatively add and subtract packed elements in `c` to/from +/// the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmaddsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) + } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and subtract packed elements in `c` from the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(a, b, simd_neg(c)) } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and subtract packed elements in `c` from the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(a, b, simd_neg(c)) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and subtract packed elements in `c` from the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsub213ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(a, b, simd_neg(c)) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and subtract packed elements in `c` from the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsub213ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(a, b, simd_neg(c)) } +} + +/// Multiplies the lower double-precision (64-bit) floating-point elements in +/// `a` and `b`, and subtract the lower element in `c` from the intermediate +/// result. Store the result in the lower element of the returned value, and +/// copy the upper element from `a` to the upper elements of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_sd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) + ) + } +} + +/// Multiplies the lower single-precision (32-bit) floating-point elements in +/// `a` and `b`, and subtract the lower element in `c` from the intermediate +/// result. Store the result in the lower element of the returned value, and +/// copy the 3 upper elements from `a` to the upper elements of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ss) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c)) + ) + } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and alternatively subtract and add packed elements in `c` from/to +/// the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 3]) + } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and alternatively subtract and add packed elements in `c` from/to +/// the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 5, 2, 7]) + } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and alternatively subtract and add packed elements in `c` from/to +/// the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 5, 2, 7]) + } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and alternatively subtract and add packed elements in `c` from/to +/// the intermediate result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfmsubadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15]) + } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and add the negated intermediate result to packed elements in `c`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(simd_neg(a), b, c) } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and add the negated intermediate result to packed elements in `c`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(simd_neg(a), b, c) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and add the negated intermediate result to packed elements in `c`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(simd_neg(a), b, c) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and add the negated intermediate result to packed elements in `c`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(simd_neg(a), b, c) } +} + +/// Multiplies the lower double-precision (64-bit) floating-point elements in +/// `a` and `b`, and add the negated intermediate result to the lower element +/// in `c`. Store the result in the lower element of the returned value, and +/// copy the upper element from `a` to the upper elements of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_sd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) + ) + } +} + +/// Multiplies the lower single-precision (32-bit) floating-point elements in +/// `a` and `b`, and add the negated intermediate result to the lower element +/// in `c`. Store the result in the lower element of the returned value, and +/// copy the 3 upper elements from `a` to the upper elements of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ss) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmadd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c)) + ) + } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and subtract packed elements in `c` from the negated intermediate +/// result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`, and subtract packed elements in `c` from the negated intermediate +/// result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_pd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and subtract packed elements in `c` from the negated intermediate +/// result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and subtract packed elements in `c` from the negated intermediate +/// result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_ps) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } +} + +/// Multiplies the lower double-precision (64-bit) floating-point elements in +/// `a` and `b`, and subtract packed elements in `c` from the negated +/// intermediate result. Store the result in the lower element of the returned +/// value, and copy the upper element from `a` to the upper elements of the +/// result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_sd) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) + ) + } +} + +/// Multiplies the lower single-precision (32-bit) floating-point elements in +/// `a` and `b`, and subtract packed elements in `c` from the negated +/// intermediate result. Store the result in the lower element of the +/// returned value, and copy the 3 upper elements from `a` to the upper +/// elements of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ss) +#[inline] +#[target_feature(enable = "fma")] +#[cfg_attr(test, assert_instr(vfnmsub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c)) + ) + } +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmadd_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(9., 15.); + assert_eq_m128d(_mm_fmadd_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fmadd_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 3., 7., 2.); + let c = _mm256_setr_pd(4., 9., 1., 7.); + let r = _mm256_setr_pd(9., 15., 22., 15.); + assert_eq_m256d(_mm256_fmadd_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmadd_ps() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(9., 15., 22., 15.); + assert_eq_m128(_mm_fmadd_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fmadd_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); + let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); + let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); + let r = _mm256_setr_ps(9., 15., 22., 15., -5., -49., -2., -31.); + assert_eq_m256(_mm256_fmadd_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmadd_sd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(9., 2.); + assert_eq_m128d(_mm_fmadd_sd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmadd_ss() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(9., 2., 3., 4.); + assert_eq_m128(_mm_fmadd_ss(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmaddsub_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(1., 15.); + assert_eq_m128d(_mm_fmaddsub_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fmaddsub_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 3., 7., 2.); + let c = _mm256_setr_pd(4., 9., 1., 7.); + let r = _mm256_setr_pd(1., 15., 20., 15.); + assert_eq_m256d(_mm256_fmaddsub_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmaddsub_ps() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(1., 15., 20., 15.); + assert_eq_m128(_mm_fmaddsub_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fmaddsub_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); + let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); + let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); + let r = _mm256_setr_ps(1., 15., 20., 15., 5., -49., 2., -31.); + assert_eq_m256(_mm256_fmaddsub_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmsub_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(1., -3.); + assert_eq_m128d(_mm_fmsub_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fmsub_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 3., 7., 2.); + let c = _mm256_setr_pd(4., 9., 1., 7.); + let r = _mm256_setr_pd(1., -3., 20., 1.); + assert_eq_m256d(_mm256_fmsub_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmsub_ps() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(1., -3., 20., 1.); + assert_eq_m128(_mm_fmsub_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fmsub_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); + let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); + let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); + let r = _mm256_setr_ps(1., -3., 20., 1., 5., -71., 2., -25.); + assert_eq_m256(_mm256_fmsub_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmsub_sd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(1., 2.); + assert_eq_m128d(_mm_fmsub_sd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmsub_ss() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(1., 2., 3., 4.); + assert_eq_m128(_mm_fmsub_ss(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmsubadd_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(9., -3.); + assert_eq_m128d(_mm_fmsubadd_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fmsubadd_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 3., 7., 2.); + let c = _mm256_setr_pd(4., 9., 1., 7.); + let r = _mm256_setr_pd(9., -3., 22., 1.); + assert_eq_m256d(_mm256_fmsubadd_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fmsubadd_ps() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(9., -3., 22., 1.); + assert_eq_m128(_mm_fmsubadd_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fmsubadd_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); + let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); + let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); + let r = _mm256_setr_ps(9., -3., 22., 1., -5., -71., -2., -25.); + assert_eq_m256(_mm256_fmsubadd_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fnmadd_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(-1., 3.); + assert_eq_m128d(_mm_fnmadd_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fnmadd_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 3., 7., 2.); + let c = _mm256_setr_pd(4., 9., 1., 7.); + let r = _mm256_setr_pd(-1., 3., -20., -1.); + assert_eq_m256d(_mm256_fnmadd_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fnmadd_ps() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(-1., 3., -20., -1.); + assert_eq_m128(_mm_fnmadd_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fnmadd_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); + let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); + let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); + let r = _mm256_setr_ps(-1., 3., -20., -1., -5., 71., -2., 25.); + assert_eq_m256(_mm256_fnmadd_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fnmadd_sd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(-1., 2.); + assert_eq_m128d(_mm_fnmadd_sd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fnmadd_ss() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(-1., 2., 3., 4.); + assert_eq_m128(_mm_fnmadd_ss(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fnmsub_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(-9., -15.); + assert_eq_m128d(_mm_fnmsub_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fnmsub_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 3., 7., 2.); + let c = _mm256_setr_pd(4., 9., 1., 7.); + let r = _mm256_setr_pd(-9., -15., -22., -15.); + assert_eq_m256d(_mm256_fnmsub_pd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fnmsub_ps() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(-9., -15., -22., -15.); + assert_eq_m128(_mm_fnmsub_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm256_fnmsub_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); + let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); + let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); + let r = _mm256_setr_ps(-9., -15., -22., -15., 5., 49., 2., 31.); + assert_eq_m256(_mm256_fnmsub_ps(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fnmsub_sd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 3.); + let c = _mm_setr_pd(4., 9.); + let r = _mm_setr_pd(-9., 2.); + assert_eq_m128d(_mm_fnmsub_sd(a, b, c), r); + } + + #[simd_test(enable = "fma")] + unsafe fn test_mm_fnmsub_ss() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let b = _mm_setr_ps(5., 3., 7., 2.); + let c = _mm_setr_ps(4., 9., 1., 7.); + let r = _mm_setr_ps(-9., 2., 3., 4.); + assert_eq_m128(_mm_fnmsub_ss(a, b, c), r); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/fxsr.rs b/library/stdarch/crates/core_arch/src/x86/fxsr.rs new file mode 100644 index 000000000000..71fd52ca1496 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/fxsr.rs @@ -0,0 +1,88 @@ +//! FXSR floating-point context fast save and restore. + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.fxsave"] + fn fxsave(p: *mut u8); + #[link_name = "llvm.x86.fxrstor"] + fn fxrstor(p: *const u8); +} + +/// Saves the `x87` FPU, `MMX` technology, `XMM`, and `MXCSR` registers to the +/// 512-byte-long 16-byte-aligned memory region `mem_addr`. +/// +/// A misaligned destination operand raises a general-protection (#GP) or an +/// alignment check exception (#AC). +/// +/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. +/// +/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html +/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_fxsave) +#[inline] +#[target_feature(enable = "fxsr")] +#[cfg_attr(test, assert_instr(fxsave))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _fxsave(mem_addr: *mut u8) { + fxsave(mem_addr) +} + +/// Restores the `XMM`, `MMX`, `MXCSR`, and `x87` FPU registers from the +/// 512-byte-long 16-byte-aligned memory region `mem_addr`. +/// +/// The contents of this memory region should have been written to by a +/// previous +/// `_fxsave` or `_fxsave64` intrinsic. +/// +/// A misaligned destination operand raises a general-protection (#GP) or an +/// alignment check exception (#AC). +/// +/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. +/// +/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html +/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_fxrstor) +#[inline] +#[target_feature(enable = "fxsr")] +#[cfg_attr(test, assert_instr(fxrstor))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _fxrstor(mem_addr: *const u8) { + fxrstor(mem_addr) +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + use std::{cmp::PartialEq, fmt}; + use stdarch_test::simd_test; + + #[repr(align(16))] + struct FxsaveArea { + data: [u8; 512], // 512 bytes + } + + impl FxsaveArea { + fn new() -> FxsaveArea { + FxsaveArea { data: [0; 512] } + } + fn ptr(&mut self) -> *mut u8 { + self.data.as_mut_ptr() + } + } + + #[simd_test(enable = "fxsr")] + #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri + unsafe fn test_fxsave() { + let mut a = FxsaveArea::new(); + let mut b = FxsaveArea::new(); + + fxsr::_fxsave(a.ptr()); + fxsr::_fxrstor(a.ptr()); + fxsr::_fxsave(b.ptr()); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/gfni.rs b/library/stdarch/crates/core_arch/src/x86/gfni.rs new file mode 100644 index 000000000000..9386684abaef --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/gfni.rs @@ -0,0 +1,1549 @@ +//! Galois Field New Instructions (GFNI) +//! +//! The intrinsics here correspond to those in the `immintrin.h` C header. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +use crate::core_arch::simd::i8x16; +use crate::core_arch::simd::i8x32; +use crate::core_arch::simd::i8x64; +use crate::core_arch::x86::__m128i; +use crate::core_arch::x86::__m256i; +use crate::core_arch::x86::__m512i; +use crate::core_arch::x86::__mmask16; +use crate::core_arch::x86::__mmask32; +use crate::core_arch::x86::__mmask64; +use crate::intrinsics::simd::simd_select_bitmask; +use crate::mem::transmute; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.vgf2p8affineinvqb.512"] + fn vgf2p8affineinvqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64; + #[link_name = "llvm.x86.vgf2p8affineinvqb.256"] + fn vgf2p8affineinvqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32; + #[link_name = "llvm.x86.vgf2p8affineinvqb.128"] + fn vgf2p8affineinvqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16; + #[link_name = "llvm.x86.vgf2p8affineqb.512"] + fn vgf2p8affineqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64; + #[link_name = "llvm.x86.vgf2p8affineqb.256"] + fn vgf2p8affineqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32; + #[link_name = "llvm.x86.vgf2p8affineqb.128"] + fn vgf2p8affineqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16; + #[link_name = "llvm.x86.vgf2p8mulb.512"] + fn vgf2p8mulb_512(a: i8x64, b: i8x64) -> i8x64; + #[link_name = "llvm.x86.vgf2p8mulb.256"] + fn vgf2p8mulb_256(a: i8x32, b: i8x32) -> i8x32; + #[link_name = "llvm.x86.vgf2p8mulb.128"] + fn vgf2p8mulb_128(a: i8x16, b: i8x16) -> i8x16; +} + +// LLVM requires AVX512BW for a lot of these instructions, see +// https://github.com/llvm/llvm-project/blob/release/9.x/clang/include/clang/Basic/BuiltinsX86.def#L457 +// however our tests also require the target feature list to match Intel's +// which *doesn't* require AVX512BW but only AVX512F, so we added the redundant AVX512F +// requirement (for now) +// also see +// https://github.com/llvm/llvm-project/blob/release/9.x/clang/lib/Headers/gfniintrin.h +// for forcing GFNI, BW and optionally VL extension + +/// Performs a multiplication in GF(2^8) on the packed bytes. +/// The field is in polynomial representation with the reduction polynomial +/// x^8 + x^4 + x^3 + x + 1. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8mul_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8mulb))] +pub fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64())) } +} + +/// Performs a multiplication in GF(2^8) on the packed bytes. +/// The field is in polynomial representation with the reduction polynomial +/// x^8 + x^4 + x^3 + x + 1. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_gf2p8mul_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8mulb))] +pub fn _mm512_mask_gf2p8mul_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()), + src.as_i8x64(), + )) + } +} + +/// Performs a multiplication in GF(2^8) on the packed bytes. +/// The field is in polynomial representation with the reduction polynomial +/// x^8 + x^4 + x^3 + x + 1. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_gf2p8mul_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8mulb))] +pub fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + let zero = i8x64::ZERO; + unsafe { + transmute(simd_select_bitmask( + k, + vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()), + zero, + )) + } +} + +/// Performs a multiplication in GF(2^8) on the packed bytes. +/// The field is in polynomial representation with the reduction polynomial +/// x^8 + x^4 + x^3 + x + 1. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_gf2p8mul_epi8) +#[inline] +#[target_feature(enable = "gfni,avx")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8mulb))] +pub fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32())) } +} + +/// Performs a multiplication in GF(2^8) on the packed bytes. +/// The field is in polynomial representation with the reduction polynomial +/// x^8 + x^4 + x^3 + x + 1. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_gf2p8mul_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8mulb))] +pub fn _mm256_mask_gf2p8mul_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()), + src.as_i8x32(), + )) + } +} + +/// Performs a multiplication in GF(2^8) on the packed bytes. +/// The field is in polynomial representation with the reduction polynomial +/// x^8 + x^4 + x^3 + x + 1. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_gf2p8mul_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8mulb))] +pub fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + let zero = i8x32::ZERO; + unsafe { + transmute(simd_select_bitmask( + k, + vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()), + zero, + )) + } +} + +/// Performs a multiplication in GF(2^8) on the packed bytes. +/// The field is in polynomial representation with the reduction polynomial +/// x^8 + x^4 + x^3 + x + 1. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_gf2p8mul_epi8) +#[inline] +#[target_feature(enable = "gfni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(gf2p8mulb))] +pub fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16())) } +} + +/// Performs a multiplication in GF(2^8) on the packed bytes. +/// The field is in polynomial representation with the reduction polynomial +/// x^8 + x^4 + x^3 + x + 1. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_gf2p8mul_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8mulb))] +pub fn _mm_mask_gf2p8mul_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()), + src.as_i8x16(), + )) + } +} + +/// Performs a multiplication in GF(2^8) on the packed bytes. +/// The field is in polynomial representation with the reduction polynomial +/// x^8 + x^4 + x^3 + x + 1. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_gf2p8mul_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8mulb))] +pub fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let zero = i8x16::ZERO; + transmute(simd_select_bitmask( + k, + vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()), + zero, + )) + } +} + +/// Performs an affine transformation on the packed bytes in x. +/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8affine_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_gf2p8affine_epi64_epi8(x: __m512i, a: __m512i) -> __m512i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x64(); + let a = a.as_i8x64(); + unsafe { + let r = vgf2p8affineqb_512(x, a, b); + transmute(r) + } +} + +/// Performs an affine transformation on the packed bytes in x. +/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_gf2p8affine_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_gf2p8affine_epi64_epi8( + k: __mmask64, + x: __m512i, + a: __m512i, +) -> __m512i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let zero = i8x64::ZERO; + let x = x.as_i8x64(); + let a = a.as_i8x64(); + unsafe { + let r = vgf2p8affineqb_512(x, a, b); + transmute(simd_select_bitmask(k, r, zero)) + } +} + +/// Performs an affine transformation on the packed bytes in x. +/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_gf2p8affine_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_gf2p8affine_epi64_epi8( + src: __m512i, + k: __mmask64, + x: __m512i, + a: __m512i, +) -> __m512i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x64(); + let a = a.as_i8x64(); + unsafe { + let r = vgf2p8affineqb_512(x, a, b); + transmute(simd_select_bitmask(k, r, src.as_i8x64())) + } +} + +/// Performs an affine transformation on the packed bytes in x. +/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_gf2p8affine_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_gf2p8affine_epi64_epi8(x: __m256i, a: __m256i) -> __m256i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x32(); + let a = a.as_i8x32(); + unsafe { + let r = vgf2p8affineqb_256(x, a, b); + transmute(r) + } +} + +/// Performs an affine transformation on the packed bytes in x. +/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_gf2p8affine_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_gf2p8affine_epi64_epi8( + k: __mmask32, + x: __m256i, + a: __m256i, +) -> __m256i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let zero = i8x32::ZERO; + let x = x.as_i8x32(); + let a = a.as_i8x32(); + unsafe { + let r = vgf2p8affineqb_256(x, a, b); + transmute(simd_select_bitmask(k, r, zero)) + } +} + +/// Performs an affine transformation on the packed bytes in x. +/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_gf2p8affine_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_gf2p8affine_epi64_epi8( + src: __m256i, + k: __mmask32, + x: __m256i, + a: __m256i, +) -> __m256i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x32(); + let a = a.as_i8x32(); + unsafe { + let r = vgf2p8affineqb_256(x, a, b); + transmute(simd_select_bitmask(k, r, src.as_i8x32())) + } +} + +/// Performs an affine transformation on the packed bytes in x. +/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_gf2p8affine_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_gf2p8affine_epi64_epi8(x: __m128i, a: __m128i) -> __m128i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x16(); + let a = a.as_i8x16(); + unsafe { + let r = vgf2p8affineqb_128(x, a, b); + transmute(r) + } +} + +/// Performs an affine transformation on the packed bytes in x. +/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_gf2p8affine_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_gf2p8affine_epi64_epi8( + k: __mmask16, + x: __m128i, + a: __m128i, +) -> __m128i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let zero = i8x16::ZERO; + let x = x.as_i8x16(); + let a = a.as_i8x16(); + unsafe { + let r = vgf2p8affineqb_128(x, a, b); + transmute(simd_select_bitmask(k, r, zero)) + } +} + +/// Performs an affine transformation on the packed bytes in x. +/// That is computes a*x+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_gf2p8affine_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_gf2p8affine_epi64_epi8( + src: __m128i, + k: __mmask16, + x: __m128i, + a: __m128i, +) -> __m128i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x16(); + let a = a.as_i8x16(); + unsafe { + let r = vgf2p8affineqb_128(x, a, b); + transmute(simd_select_bitmask(k, r, src.as_i8x16())) + } +} + +/// Performs an affine transformation on the inverted packed bytes in x. +/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1. +/// The inverse of 0 is 0. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8affineinv_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_gf2p8affineinv_epi64_epi8(x: __m512i, a: __m512i) -> __m512i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x64(); + let a = a.as_i8x64(); + unsafe { + let r = vgf2p8affineinvqb_512(x, a, b); + transmute(r) + } +} + +/// Performs an affine transformation on the inverted packed bytes in x. +/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1. +/// The inverse of 0 is 0. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_gf2p8affineinv_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_gf2p8affineinv_epi64_epi8( + k: __mmask64, + x: __m512i, + a: __m512i, +) -> __m512i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let zero = i8x64::ZERO; + let x = x.as_i8x64(); + let a = a.as_i8x64(); + unsafe { + let r = vgf2p8affineinvqb_512(x, a, b); + transmute(simd_select_bitmask(k, r, zero)) + } +} + +/// Performs an affine transformation on the inverted packed bytes in x. +/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1. +/// The inverse of 0 is 0. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_gf2p8affineinv_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm512_mask_gf2p8affineinv_epi64_epi8( + src: __m512i, + k: __mmask64, + x: __m512i, + a: __m512i, +) -> __m512i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x64(); + let a = a.as_i8x64(); + unsafe { + let r = vgf2p8affineinvqb_512(x, a, b); + transmute(simd_select_bitmask(k, r, src.as_i8x64())) + } +} + +/// Performs an affine transformation on the inverted packed bytes in x. +/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1. +/// The inverse of 0 is 0. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_gf2p8affineinv_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_gf2p8affineinv_epi64_epi8(x: __m256i, a: __m256i) -> __m256i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x32(); + let a = a.as_i8x32(); + unsafe { + let r = vgf2p8affineinvqb_256(x, a, b); + transmute(r) + } +} + +/// Performs an affine transformation on the inverted packed bytes in x. +/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1. +/// The inverse of 0 is 0. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_gf2p8affineinv_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm256_maskz_gf2p8affineinv_epi64_epi8( + k: __mmask32, + x: __m256i, + a: __m256i, +) -> __m256i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let zero = i8x32::ZERO; + let x = x.as_i8x32(); + let a = a.as_i8x32(); + unsafe { + let r = vgf2p8affineinvqb_256(x, a, b); + transmute(simd_select_bitmask(k, r, zero)) + } +} + +/// Performs an affine transformation on the inverted packed bytes in x. +/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1. +/// The inverse of 0 is 0. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_gf2p8affineinv_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm256_mask_gf2p8affineinv_epi64_epi8( + src: __m256i, + k: __mmask32, + x: __m256i, + a: __m256i, +) -> __m256i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x32(); + let a = a.as_i8x32(); + unsafe { + let r = vgf2p8affineinvqb_256(x, a, b); + transmute(simd_select_bitmask(k, r, src.as_i8x32())) + } +} + +/// Performs an affine transformation on the inverted packed bytes in x. +/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1. +/// The inverse of 0 is 0. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_gf2p8affineinv_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_gf2p8affineinv_epi64_epi8(x: __m128i, a: __m128i) -> __m128i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x16(); + let a = a.as_i8x16(); + unsafe { + let r = vgf2p8affineinvqb_128(x, a, b); + transmute(r) + } +} + +/// Performs an affine transformation on the inverted packed bytes in x. +/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1. +/// The inverse of 0 is 0. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_gf2p8affineinv_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_gf2p8affineinv_epi64_epi8( + k: __mmask16, + x: __m128i, + a: __m128i, +) -> __m128i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let zero = i8x16::ZERO; + let x = x.as_i8x16(); + let a = a.as_i8x16(); + unsafe { + let r = vgf2p8affineinvqb_128(x, a, b); + transmute(simd_select_bitmask(k, r, zero)) + } +} + +/// Performs an affine transformation on the inverted packed bytes in x. +/// That is computes a*inv(x)+b over the Galois Field 2^8 for each packed byte with a being a 8x8 bit matrix +/// and b being a constant 8-bit immediate value. +/// The inverse of a byte is defined with respect to the reduction polynomial x^8+x^4+x^3+x+1. +/// The inverse of 0 is 0. +/// Each pack of 8 bytes in x is paired with the 64-bit word at the same position in a. +/// +/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. +/// Otherwise the computation result is written into the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_gf2p8affineinv_epi64_epi8) +#[inline] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] +#[rustc_legacy_const_generics(4)] +pub fn _mm_mask_gf2p8affineinv_epi64_epi8( + src: __m128i, + k: __mmask16, + x: __m128i, + a: __m128i, +) -> __m128i { + static_assert_uimm_bits!(B, 8); + let b = B as u8; + let x = x.as_i8x16(); + let a = a.as_i8x16(); + unsafe { + let r = vgf2p8affineinvqb_128(x, a, b); + transmute(simd_select_bitmask(k, r, src.as_i8x16())) + } +} + +#[cfg(test)] +mod tests { + // The constants in the tests below are just bit patterns. They should not + // be interpreted as integers; signedness does not make sense for them, but + // __mXXXi happens to be defined in terms of signed integers. + #![allow(overflowing_literals)] + + use core::hint::black_box; + use core::intrinsics::size_of; + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + fn mulbyte(left: u8, right: u8) -> u8 { + // this implementation follows the description in + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8mul_epi8 + const REDUCTION_POLYNOMIAL: u16 = 0x11b; + let left: u16 = left.into(); + let right: u16 = right.into(); + let mut carryless_product: u16 = 0; + + // Carryless multiplication + for i in 0..8 { + if ((left >> i) & 0x01) != 0 { + carryless_product ^= right << i; + } + } + + // reduction, adding in "0" where appropriate to clear out high bits + // note that REDUCTION_POLYNOMIAL is zero in this context + for i in (8..=14).rev() { + if ((carryless_product >> i) & 0x01) != 0 { + carryless_product ^= REDUCTION_POLYNOMIAL << (i - 8); + } + } + + carryless_product as u8 + } + + const NUM_TEST_WORDS_512: usize = 4; + const NUM_TEST_WORDS_256: usize = NUM_TEST_WORDS_512 * 2; + const NUM_TEST_WORDS_128: usize = NUM_TEST_WORDS_256 * 2; + const NUM_TEST_ENTRIES: usize = NUM_TEST_WORDS_512 * 64; + const NUM_TEST_WORDS_64: usize = NUM_TEST_WORDS_128 * 2; + const NUM_BYTES: usize = 256; + const NUM_BYTES_WORDS_128: usize = NUM_BYTES / 16; + const NUM_BYTES_WORDS_256: usize = NUM_BYTES_WORDS_128 / 2; + const NUM_BYTES_WORDS_512: usize = NUM_BYTES_WORDS_256 / 2; + + fn parity(input: u8) -> u8 { + let mut accumulator = 0; + for i in 0..8 { + accumulator ^= (input >> i) & 0x01; + } + accumulator + } + + fn mat_vec_multiply_affine(matrix: u64, x: u8, b: u8) -> u8 { + // this implementation follows the description in + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_gf2p8affine_epi64_epi8 + let mut accumulator = 0; + + for bit in 0..8 { + accumulator |= parity(x & matrix.to_le_bytes()[bit]) << (7 - bit); + } + + accumulator ^ b + } + + fn generate_affine_mul_test_data( + immediate: u8, + ) -> ( + [u64; NUM_TEST_WORDS_64], + [u8; NUM_TEST_ENTRIES], + [u8; NUM_TEST_ENTRIES], + ) { + let mut left: [u64; NUM_TEST_WORDS_64] = [0; NUM_TEST_WORDS_64]; + let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES]; + let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES]; + + for i in 0..NUM_TEST_WORDS_64 { + left[i] = (i as u64) * 103 * 101; + for j in 0..8 { + let j64 = j as u64; + right[i * 8 + j] = ((left[i] + j64) % 256) as u8; + result[i * 8 + j] = mat_vec_multiply_affine(left[i], right[i * 8 + j], immediate); + } + } + + (left, right, result) + } + + fn generate_inv_tests_data() -> ([u8; NUM_BYTES], [u8; NUM_BYTES]) { + let mut input: [u8; NUM_BYTES] = [0; NUM_BYTES]; + let mut result: [u8; NUM_BYTES] = [0; NUM_BYTES]; + + for i in 0..NUM_BYTES { + input[i] = (i % 256) as u8; + result[i] = if i == 0 { 0 } else { 1 }; + } + + (input, result) + } + + const AES_S_BOX: [u8; NUM_BYTES] = [ + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, + 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, + 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, + 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, + 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, + 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, + 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, + 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, + 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, + 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, + 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, + 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25, + 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, + 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, + 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, + 0x16, + ]; + + fn generate_byte_mul_test_data() -> ( + [u8; NUM_TEST_ENTRIES], + [u8; NUM_TEST_ENTRIES], + [u8; NUM_TEST_ENTRIES], + ) { + let mut left: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES]; + let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES]; + let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES]; + + for i in 0..NUM_TEST_ENTRIES { + left[i] = (i % 256) as u8; + right[i] = left[i].wrapping_mul(101); + result[i] = mulbyte(left[i], right[i]); + } + + (left, right, result) + } + + #[target_feature(enable = "sse2")] + #[stable(feature = "stdarch_x86_avx512", since = "1.89")] + unsafe fn load_m128i_word(data: &[T], word_index: usize) -> __m128i { + let byte_offset = word_index * 16 / size_of::(); + let pointer = data.as_ptr().add(byte_offset) as *const __m128i; + _mm_loadu_si128(black_box(pointer)) + } + + #[target_feature(enable = "avx")] + #[stable(feature = "stdarch_x86_avx512", since = "1.89")] + unsafe fn load_m256i_word(data: &[T], word_index: usize) -> __m256i { + let byte_offset = word_index * 32 / size_of::(); + let pointer = data.as_ptr().add(byte_offset) as *const __m256i; + _mm256_loadu_si256(black_box(pointer)) + } + + #[target_feature(enable = "avx512f")] + #[stable(feature = "stdarch_x86_avx512", since = "1.89")] + unsafe fn load_m512i_word(data: &[T], word_index: usize) -> __m512i { + let byte_offset = word_index * 64 / size_of::(); + let pointer = data.as_ptr().add(byte_offset) as *const _; + _mm512_loadu_si512(black_box(pointer)) + } + + #[simd_test(enable = "gfni,avx512f")] + unsafe fn test_mm512_gf2p8mul_epi8() { + let (left, right, expected) = generate_byte_mul_test_data(); + + for i in 0..NUM_TEST_WORDS_512 { + let left = load_m512i_word(&left, i); + let right = load_m512i_word(&right, i); + let expected = load_m512i_word(&expected, i); + let result = _mm512_gf2p8mul_epi8(left, right); + assert_eq_m512i(result, expected); + } + } + + #[simd_test(enable = "gfni,avx512bw")] + unsafe fn test_mm512_maskz_gf2p8mul_epi8() { + let (left, right, _expected) = generate_byte_mul_test_data(); + + for i in 0..NUM_TEST_WORDS_512 { + let left = load_m512i_word(&left, i); + let right = load_m512i_word(&right, i); + let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right); + assert_eq_m512i(result_zero, _mm512_setzero_si512()); + let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00; + let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00; + let expected_result = _mm512_gf2p8mul_epi8(left, right); + let result_masked = _mm512_maskz_gf2p8mul_epi8(mask_bytes, left, right); + let expected_masked = + _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result); + assert_eq_m512i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512bw")] + unsafe fn test_mm512_mask_gf2p8mul_epi8() { + let (left, right, _expected) = generate_byte_mul_test_data(); + + for i in 0..NUM_TEST_WORDS_512 { + let left = load_m512i_word(&left, i); + let right = load_m512i_word(&right, i); + let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right); + assert_eq_m512i(result_left, left); + let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00; + let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00; + let expected_result = _mm512_gf2p8mul_epi8(left, right); + let result_masked = _mm512_mask_gf2p8mul_epi8(left, mask_bytes, left, right); + let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result); + assert_eq_m512i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx")] + unsafe fn test_mm256_gf2p8mul_epi8() { + let (left, right, expected) = generate_byte_mul_test_data(); + + for i in 0..NUM_TEST_WORDS_256 { + let left = load_m256i_word(&left, i); + let right = load_m256i_word(&right, i); + let expected = load_m256i_word(&expected, i); + let result = _mm256_gf2p8mul_epi8(left, right); + assert_eq_m256i(result, expected); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_gf2p8mul_epi8() { + let (left, right, _expected) = generate_byte_mul_test_data(); + + for i in 0..NUM_TEST_WORDS_256 { + let left = load_m256i_word(&left, i); + let right = load_m256i_word(&right, i); + let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right); + assert_eq_m256i(result_zero, _mm256_setzero_si256()); + let mask_bytes: __mmask32 = 0x0F_F0_FF_00; + const MASK_WORDS: i32 = 0b01_10_11_00; + let expected_result = _mm256_gf2p8mul_epi8(left, right); + let result_masked = _mm256_maskz_gf2p8mul_epi8(mask_bytes, left, right); + let expected_masked = + _mm256_blend_epi32::(_mm256_setzero_si256(), expected_result); + assert_eq_m256i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm256_mask_gf2p8mul_epi8() { + let (left, right, _expected) = generate_byte_mul_test_data(); + + for i in 0..NUM_TEST_WORDS_256 { + let left = load_m256i_word(&left, i); + let right = load_m256i_word(&right, i); + let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right); + assert_eq_m256i(result_left, left); + let mask_bytes: __mmask32 = 0x0F_F0_FF_00; + const MASK_WORDS: i32 = 0b01_10_11_00; + let expected_result = _mm256_gf2p8mul_epi8(left, right); + let result_masked = _mm256_mask_gf2p8mul_epi8(left, mask_bytes, left, right); + let expected_masked = _mm256_blend_epi32::(left, expected_result); + assert_eq_m256i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni")] + unsafe fn test_mm_gf2p8mul_epi8() { + let (left, right, expected) = generate_byte_mul_test_data(); + + for i in 0..NUM_TEST_WORDS_128 { + let left = load_m128i_word(&left, i); + let right = load_m128i_word(&right, i); + let expected = load_m128i_word(&expected, i); + let result = _mm_gf2p8mul_epi8(left, right); + assert_eq_m128i(result, expected); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm_maskz_gf2p8mul_epi8() { + let (left, right, _expected) = generate_byte_mul_test_data(); + + for i in 0..NUM_TEST_WORDS_128 { + let left = load_m128i_word(&left, i); + let right = load_m128i_word(&right, i); + let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right); + assert_eq_m128i(result_zero, _mm_setzero_si128()); + let mask_bytes: __mmask16 = 0x0F_F0; + const MASK_WORDS: i32 = 0b01_10; + let expected_result = _mm_gf2p8mul_epi8(left, right); + let result_masked = _mm_maskz_gf2p8mul_epi8(mask_bytes, left, right); + let expected_masked = + _mm_blend_epi32::(_mm_setzero_si128(), expected_result); + assert_eq_m128i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm_mask_gf2p8mul_epi8() { + let (left, right, _expected) = generate_byte_mul_test_data(); + + for i in 0..NUM_TEST_WORDS_128 { + let left = load_m128i_word(&left, i); + let right = load_m128i_word(&right, i); + let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right); + assert_eq_m128i(result_left, left); + let mask_bytes: __mmask16 = 0x0F_F0; + const MASK_WORDS: i32 = 0b01_10; + let expected_result = _mm_gf2p8mul_epi8(left, right); + let result_masked = _mm_mask_gf2p8mul_epi8(left, mask_bytes, left, right); + let expected_masked = _mm_blend_epi32::(left, expected_result); + assert_eq_m128i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512f")] + unsafe fn test_mm512_gf2p8affine_epi64_epi8() { + let identity: i64 = 0x01_02_04_08_10_20_40_80; + const IDENTITY_BYTE: i32 = 0; + let constant: i64 = 0; + const CONSTANT_BYTE: i32 = 0x63; + let identity = _mm512_set1_epi64(identity); + let constant = _mm512_set1_epi64(constant); + let constant_reference = _mm512_set1_epi8(CONSTANT_BYTE as i8); + + let (bytes, more_bytes, _) = generate_byte_mul_test_data(); + let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_512 { + let data = load_m512i_word(&bytes, i); + let result = _mm512_gf2p8affine_epi64_epi8::(data, identity); + assert_eq_m512i(result, data); + let result = _mm512_gf2p8affine_epi64_epi8::(data, constant); + assert_eq_m512i(result, constant_reference); + let data = load_m512i_word(&more_bytes, i); + let result = _mm512_gf2p8affine_epi64_epi8::(data, identity); + assert_eq_m512i(result, data); + let result = _mm512_gf2p8affine_epi64_epi8::(data, constant); + assert_eq_m512i(result, constant_reference); + + let matrix = load_m512i_word(&matrices, i); + let vector = load_m512i_word(&vectors, i); + let reference = load_m512i_word(&references, i); + + let result = _mm512_gf2p8affine_epi64_epi8::(vector, matrix); + assert_eq_m512i(result, reference); + } + } + + #[simd_test(enable = "gfni,avx512bw")] + unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_512 { + let matrix = load_m512i_word(&matrices, i); + let vector = load_m512i_word(&vectors, i); + let result_zero = + _mm512_maskz_gf2p8affine_epi64_epi8::(0, vector, matrix); + assert_eq_m512i(result_zero, _mm512_setzero_si512()); + let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00; + let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00; + let expected_result = _mm512_gf2p8affine_epi64_epi8::(vector, matrix); + let result_masked = + _mm512_maskz_gf2p8affine_epi64_epi8::(mask_bytes, vector, matrix); + let expected_masked = + _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result); + assert_eq_m512i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512bw")] + unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_512 { + let left = load_m512i_word(&vectors, i); + let right = load_m512i_word(&matrices, i); + let result_left = + _mm512_mask_gf2p8affine_epi64_epi8::(left, 0, left, right); + assert_eq_m512i(result_left, left); + let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00; + let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00; + let expected_result = _mm512_gf2p8affine_epi64_epi8::(left, right); + let result_masked = + _mm512_mask_gf2p8affine_epi64_epi8::(left, mask_bytes, left, right); + let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result); + assert_eq_m512i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx")] + unsafe fn test_mm256_gf2p8affine_epi64_epi8() { + let identity: i64 = 0x01_02_04_08_10_20_40_80; + const IDENTITY_BYTE: i32 = 0; + let constant: i64 = 0; + const CONSTANT_BYTE: i32 = 0x63; + let identity = _mm256_set1_epi64x(identity); + let constant = _mm256_set1_epi64x(constant); + let constant_reference = _mm256_set1_epi8(CONSTANT_BYTE as i8); + + let (bytes, more_bytes, _) = generate_byte_mul_test_data(); + let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_256 { + let data = load_m256i_word(&bytes, i); + let result = _mm256_gf2p8affine_epi64_epi8::(data, identity); + assert_eq_m256i(result, data); + let result = _mm256_gf2p8affine_epi64_epi8::(data, constant); + assert_eq_m256i(result, constant_reference); + let data = load_m256i_word(&more_bytes, i); + let result = _mm256_gf2p8affine_epi64_epi8::(data, identity); + assert_eq_m256i(result, data); + let result = _mm256_gf2p8affine_epi64_epi8::(data, constant); + assert_eq_m256i(result, constant_reference); + + let matrix = load_m256i_word(&matrices, i); + let vector = load_m256i_word(&vectors, i); + let reference = load_m256i_word(&references, i); + + let result = _mm256_gf2p8affine_epi64_epi8::(vector, matrix); + assert_eq_m256i(result, reference); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_256 { + let matrix = load_m256i_word(&matrices, i); + let vector = load_m256i_word(&vectors, i); + let result_zero = + _mm256_maskz_gf2p8affine_epi64_epi8::(0, vector, matrix); + assert_eq_m256i(result_zero, _mm256_setzero_si256()); + let mask_bytes: __mmask32 = 0xFF_0F_F0_00; + const MASK_WORDS: i32 = 0b11_01_10_00; + let expected_result = _mm256_gf2p8affine_epi64_epi8::(vector, matrix); + let result_masked = + _mm256_maskz_gf2p8affine_epi64_epi8::(mask_bytes, vector, matrix); + let expected_masked = + _mm256_blend_epi32::(_mm256_setzero_si256(), expected_result); + assert_eq_m256i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_256 { + let left = load_m256i_word(&vectors, i); + let right = load_m256i_word(&matrices, i); + let result_left = + _mm256_mask_gf2p8affine_epi64_epi8::(left, 0, left, right); + assert_eq_m256i(result_left, left); + let mask_bytes: __mmask32 = 0xFF_0F_F0_00; + const MASK_WORDS: i32 = 0b11_01_10_00; + let expected_result = _mm256_gf2p8affine_epi64_epi8::(left, right); + let result_masked = + _mm256_mask_gf2p8affine_epi64_epi8::(left, mask_bytes, left, right); + let expected_masked = _mm256_blend_epi32::(left, expected_result); + assert_eq_m256i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni")] + unsafe fn test_mm_gf2p8affine_epi64_epi8() { + let identity: i64 = 0x01_02_04_08_10_20_40_80; + const IDENTITY_BYTE: i32 = 0; + let constant: i64 = 0; + const CONSTANT_BYTE: i32 = 0x63; + let identity = _mm_set1_epi64x(identity); + let constant = _mm_set1_epi64x(constant); + let constant_reference = _mm_set1_epi8(CONSTANT_BYTE as i8); + + let (bytes, more_bytes, _) = generate_byte_mul_test_data(); + let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_128 { + let data = load_m128i_word(&bytes, i); + let result = _mm_gf2p8affine_epi64_epi8::(data, identity); + assert_eq_m128i(result, data); + let result = _mm_gf2p8affine_epi64_epi8::(data, constant); + assert_eq_m128i(result, constant_reference); + let data = load_m128i_word(&more_bytes, i); + let result = _mm_gf2p8affine_epi64_epi8::(data, identity); + assert_eq_m128i(result, data); + let result = _mm_gf2p8affine_epi64_epi8::(data, constant); + assert_eq_m128i(result, constant_reference); + + let matrix = load_m128i_word(&matrices, i); + let vector = load_m128i_word(&vectors, i); + let reference = load_m128i_word(&references, i); + + let result = _mm_gf2p8affine_epi64_epi8::(vector, matrix); + assert_eq_m128i(result, reference); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_128 { + let matrix = load_m128i_word(&matrices, i); + let vector = load_m128i_word(&vectors, i); + let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::(0, vector, matrix); + assert_eq_m128i(result_zero, _mm_setzero_si128()); + let mask_bytes: __mmask16 = 0x0F_F0; + const MASK_WORDS: i32 = 0b01_10; + let expected_result = _mm_gf2p8affine_epi64_epi8::(vector, matrix); + let result_masked = + _mm_maskz_gf2p8affine_epi64_epi8::(mask_bytes, vector, matrix); + let expected_masked = + _mm_blend_epi32::(_mm_setzero_si128(), expected_result); + assert_eq_m128i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_128 { + let left = load_m128i_word(&vectors, i); + let right = load_m128i_word(&matrices, i); + let result_left = + _mm_mask_gf2p8affine_epi64_epi8::(left, 0, left, right); + assert_eq_m128i(result_left, left); + let mask_bytes: __mmask16 = 0x0F_F0; + const MASK_WORDS: i32 = 0b01_10; + let expected_result = _mm_gf2p8affine_epi64_epi8::(left, right); + let result_masked = + _mm_mask_gf2p8affine_epi64_epi8::(left, mask_bytes, left, right); + let expected_masked = _mm_blend_epi32::(left, expected_result); + assert_eq_m128i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512f")] + unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() { + let identity: i64 = 0x01_02_04_08_10_20_40_80; + const IDENTITY_BYTE: i32 = 0; + const CONSTANT_BYTE: i32 = 0x63; + let identity = _mm512_set1_epi64(identity); + + // validate inversion + let (inputs, results) = generate_inv_tests_data(); + + for i in 0..NUM_BYTES_WORDS_512 { + let input = load_m512i_word(&inputs, i); + let reference = load_m512i_word(&results, i); + let result = _mm512_gf2p8affineinv_epi64_epi8::(input, identity); + let remultiplied = _mm512_gf2p8mul_epi8(result, input); + assert_eq_m512i(remultiplied, reference); + } + + // validate subsequent affine operation + let (matrices, vectors, _affine_expected) = + generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_512 { + let vector = load_m512i_word(&vectors, i); + let matrix = load_m512i_word(&matrices, i); + + let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::(vector, identity); + let reference = _mm512_gf2p8affine_epi64_epi8::(inv_vec, matrix); + let result = _mm512_gf2p8affineinv_epi64_epi8::(vector, matrix); + assert_eq_m512i(result, reference); + } + + // validate everything by virtue of checking against the AES SBox + const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8; + let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX); + + for i in 0..NUM_BYTES_WORDS_512 { + let reference = load_m512i_word(&AES_S_BOX, i); + let input = load_m512i_word(&inputs, i); + let result = _mm512_gf2p8affineinv_epi64_epi8::(input, sbox_matrix); + assert_eq_m512i(result, reference); + } + } + + #[simd_test(enable = "gfni,avx512bw")] + unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_512 { + let matrix = load_m512i_word(&matrices, i); + let vector = load_m512i_word(&vectors, i); + let result_zero = + _mm512_maskz_gf2p8affineinv_epi64_epi8::(0, vector, matrix); + assert_eq_m512i(result_zero, _mm512_setzero_si512()); + let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00; + let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00; + let expected_result = _mm512_gf2p8affineinv_epi64_epi8::(vector, matrix); + let result_masked = + _mm512_maskz_gf2p8affineinv_epi64_epi8::(mask_bytes, vector, matrix); + let expected_masked = + _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result); + assert_eq_m512i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512bw")] + unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_512 { + let left = load_m512i_word(&vectors, i); + let right = load_m512i_word(&matrices, i); + let result_left = + _mm512_mask_gf2p8affineinv_epi64_epi8::(left, 0, left, right); + assert_eq_m512i(result_left, left); + let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00; + let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00; + let expected_result = _mm512_gf2p8affineinv_epi64_epi8::(left, right); + let result_masked = _mm512_mask_gf2p8affineinv_epi64_epi8::( + left, mask_bytes, left, right, + ); + let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result); + assert_eq_m512i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx")] + unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() { + let identity: i64 = 0x01_02_04_08_10_20_40_80; + const IDENTITY_BYTE: i32 = 0; + const CONSTANT_BYTE: i32 = 0x63; + let identity = _mm256_set1_epi64x(identity); + + // validate inversion + let (inputs, results) = generate_inv_tests_data(); + + for i in 0..NUM_BYTES_WORDS_256 { + let input = load_m256i_word(&inputs, i); + let reference = load_m256i_word(&results, i); + let result = _mm256_gf2p8affineinv_epi64_epi8::(input, identity); + let remultiplied = _mm256_gf2p8mul_epi8(result, input); + assert_eq_m256i(remultiplied, reference); + } + + // validate subsequent affine operation + let (matrices, vectors, _affine_expected) = + generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_256 { + let vector = load_m256i_word(&vectors, i); + let matrix = load_m256i_word(&matrices, i); + + let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::(vector, identity); + let reference = _mm256_gf2p8affine_epi64_epi8::(inv_vec, matrix); + let result = _mm256_gf2p8affineinv_epi64_epi8::(vector, matrix); + assert_eq_m256i(result, reference); + } + + // validate everything by virtue of checking against the AES SBox + const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8; + let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX); + + for i in 0..NUM_BYTES_WORDS_256 { + let reference = load_m256i_word(&AES_S_BOX, i); + let input = load_m256i_word(&inputs, i); + let result = _mm256_gf2p8affineinv_epi64_epi8::(input, sbox_matrix); + assert_eq_m256i(result, reference); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_256 { + let matrix = load_m256i_word(&matrices, i); + let vector = load_m256i_word(&vectors, i); + let result_zero = + _mm256_maskz_gf2p8affineinv_epi64_epi8::(0, vector, matrix); + assert_eq_m256i(result_zero, _mm256_setzero_si256()); + let mask_bytes: __mmask32 = 0xFF_0F_F0_00; + const MASK_WORDS: i32 = 0b11_01_10_00; + let expected_result = _mm256_gf2p8affineinv_epi64_epi8::(vector, matrix); + let result_masked = + _mm256_maskz_gf2p8affineinv_epi64_epi8::(mask_bytes, vector, matrix); + let expected_masked = + _mm256_blend_epi32::(_mm256_setzero_si256(), expected_result); + assert_eq_m256i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_256 { + let left = load_m256i_word(&vectors, i); + let right = load_m256i_word(&matrices, i); + let result_left = + _mm256_mask_gf2p8affineinv_epi64_epi8::(left, 0, left, right); + assert_eq_m256i(result_left, left); + let mask_bytes: __mmask32 = 0xFF_0F_F0_00; + const MASK_WORDS: i32 = 0b11_01_10_00; + let expected_result = _mm256_gf2p8affineinv_epi64_epi8::(left, right); + let result_masked = _mm256_mask_gf2p8affineinv_epi64_epi8::( + left, mask_bytes, left, right, + ); + let expected_masked = _mm256_blend_epi32::(left, expected_result); + assert_eq_m256i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni")] + unsafe fn test_mm_gf2p8affineinv_epi64_epi8() { + let identity: i64 = 0x01_02_04_08_10_20_40_80; + const IDENTITY_BYTE: i32 = 0; + const CONSTANT_BYTE: i32 = 0x63; + let identity = _mm_set1_epi64x(identity); + + // validate inversion + let (inputs, results) = generate_inv_tests_data(); + + for i in 0..NUM_BYTES_WORDS_128 { + let input = load_m128i_word(&inputs, i); + let reference = load_m128i_word(&results, i); + let result = _mm_gf2p8affineinv_epi64_epi8::(input, identity); + let remultiplied = _mm_gf2p8mul_epi8(result, input); + assert_eq_m128i(remultiplied, reference); + } + + // validate subsequent affine operation + let (matrices, vectors, _affine_expected) = + generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_128 { + let vector = load_m128i_word(&vectors, i); + let matrix = load_m128i_word(&matrices, i); + + let inv_vec = _mm_gf2p8affineinv_epi64_epi8::(vector, identity); + let reference = _mm_gf2p8affine_epi64_epi8::(inv_vec, matrix); + let result = _mm_gf2p8affineinv_epi64_epi8::(vector, matrix); + assert_eq_m128i(result, reference); + } + + // validate everything by virtue of checking against the AES SBox + const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8; + let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX); + + for i in 0..NUM_BYTES_WORDS_128 { + let reference = load_m128i_word(&AES_S_BOX, i); + let input = load_m128i_word(&inputs, i); + let result = _mm_gf2p8affineinv_epi64_epi8::(input, sbox_matrix); + assert_eq_m128i(result, reference); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_128 { + let matrix = load_m128i_word(&matrices, i); + let vector = load_m128i_word(&vectors, i); + let result_zero = + _mm_maskz_gf2p8affineinv_epi64_epi8::(0, vector, matrix); + assert_eq_m128i(result_zero, _mm_setzero_si128()); + let mask_bytes: __mmask16 = 0x0F_F0; + const MASK_WORDS: i32 = 0b01_10; + let expected_result = _mm_gf2p8affineinv_epi64_epi8::(vector, matrix); + let result_masked = + _mm_maskz_gf2p8affineinv_epi64_epi8::(mask_bytes, vector, matrix); + let expected_masked = + _mm_blend_epi32::(_mm_setzero_si128(), expected_result); + assert_eq_m128i(result_masked, expected_masked); + } + } + + #[simd_test(enable = "gfni,avx512bw,avx512vl")] + unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() { + const CONSTANT_BYTE: i32 = 0x63; + let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); + + for i in 0..NUM_TEST_WORDS_128 { + let left = load_m128i_word(&vectors, i); + let right = load_m128i_word(&matrices, i); + let result_left = + _mm_mask_gf2p8affineinv_epi64_epi8::(left, 0, left, right); + assert_eq_m128i(result_left, left); + let mask_bytes: __mmask16 = 0x0F_F0; + const MASK_WORDS: i32 = 0b01_10; + let expected_result = _mm_gf2p8affineinv_epi64_epi8::(left, right); + let result_masked = + _mm_mask_gf2p8affineinv_epi64_epi8::(left, mask_bytes, left, right); + let expected_masked = _mm_blend_epi32::(left, expected_result); + assert_eq_m128i(result_masked, expected_masked); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/kl.rs b/library/stdarch/crates/core_arch/src/x86/kl.rs new file mode 100644 index 000000000000..eb9eb83f4115 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/kl.rs @@ -0,0 +1,526 @@ +//! AES Key Locker Intrinsics +//! +//! The Intrinsics here correspond to those in the `keylockerintrin.h` C header. + +use crate::core_arch::x86::__m128i; +use crate::ptr; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[repr(C, packed)] +struct EncodeKey128Output(u32, __m128i, __m128i, __m128i, __m128i, __m128i, __m128i); + +#[repr(C, packed)] +struct EncodeKey256Output( + u32, + __m128i, + __m128i, + __m128i, + __m128i, + __m128i, + __m128i, + __m128i, +); + +#[repr(C, packed)] +struct AesOutput(u8, __m128i); + +#[repr(C, packed)] +struct WideAesOutput( + u8, + __m128i, + __m128i, + __m128i, + __m128i, + __m128i, + __m128i, + __m128i, + __m128i, +); + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.x86.loadiwkey"] + fn loadiwkey(integrity_key: __m128i, key_lo: __m128i, key_hi: __m128i, control: u32); + + #[link_name = "llvm.x86.encodekey128"] + fn encodekey128(key_metadata: u32, key: __m128i) -> EncodeKey128Output; + #[link_name = "llvm.x86.encodekey256"] + fn encodekey256(key_metadata: u32, key_lo: __m128i, key_hi: __m128i) -> EncodeKey256Output; + + #[link_name = "llvm.x86.aesenc128kl"] + fn aesenc128kl(data: __m128i, handle: *const u8) -> AesOutput; + #[link_name = "llvm.x86.aesdec128kl"] + fn aesdec128kl(data: __m128i, handle: *const u8) -> AesOutput; + #[link_name = "llvm.x86.aesenc256kl"] + fn aesenc256kl(data: __m128i, handle: *const u8) -> AesOutput; + #[link_name = "llvm.x86.aesdec256kl"] + fn aesdec256kl(data: __m128i, handle: *const u8) -> AesOutput; + + #[link_name = "llvm.x86.aesencwide128kl"] + fn aesencwide128kl( + handle: *const u8, + i0: __m128i, + i1: __m128i, + i2: __m128i, + i3: __m128i, + i4: __m128i, + i5: __m128i, + i6: __m128i, + i7: __m128i, + ) -> WideAesOutput; + #[link_name = "llvm.x86.aesdecwide128kl"] + fn aesdecwide128kl( + handle: *const u8, + i0: __m128i, + i1: __m128i, + i2: __m128i, + i3: __m128i, + i4: __m128i, + i5: __m128i, + i6: __m128i, + i7: __m128i, + ) -> WideAesOutput; + #[link_name = "llvm.x86.aesencwide256kl"] + fn aesencwide256kl( + handle: *const u8, + i0: __m128i, + i1: __m128i, + i2: __m128i, + i3: __m128i, + i4: __m128i, + i5: __m128i, + i6: __m128i, + i7: __m128i, + ) -> WideAesOutput; + #[link_name = "llvm.x86.aesdecwide256kl"] + fn aesdecwide256kl( + handle: *const u8, + i0: __m128i, + i1: __m128i, + i2: __m128i, + i3: __m128i, + i4: __m128i, + i5: __m128i, + i6: __m128i, + i7: __m128i, + ) -> WideAesOutput; +} + +/// Load internal wrapping key (IWKey). The 32-bit unsigned integer `control` specifies IWKey's KeySource +/// and whether backing up the key is permitted. IWKey's 256-bit encryption key is loaded from `key_lo` +/// and `key_hi`. +/// +/// - `control[0]`: NoBackup bit. If set, the IWKey cannot be backed up. +/// - `control[1:4]`: KeySource bits. These bits specify the encoding method of the IWKey. The only +/// allowed values are `0` (AES GCM SIV wrapping algorithm with the specified key) and `1` (AES GCM +/// SIV wrapping algorithm with random keys enforced by hardware). After calling `_mm_loadiwkey` with +/// KeySource set to `1`, software must check `ZF` to ensure that the key was loaded successfully. +/// Using any other value may result in a General Protection Exception. +/// - `control[5:31]`: Reserved for future use, must be set to `0`. +/// +/// Note that setting the NoBackup bit and using the KeySource value `1` requires hardware support. These +/// permissions can be found by calling `__cpuid(0x19)` and checking the `ECX[0:1]` bits. Failing to follow +/// these restrictions may result in a General Protection Exception. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadiwkey) +#[inline] +#[target_feature(enable = "kl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(loadiwkey))] +pub unsafe fn _mm_loadiwkey( + control: u32, + integrity_key: __m128i, + key_lo: __m128i, + key_hi: __m128i, +) { + loadiwkey(integrity_key, key_lo, key_hi, control); +} + +/// Wrap a 128-bit AES key into a 384-bit key handle and stores it in `handle`. Returns the `control` +/// parameter used to create the IWKey. +/// +/// - `key_params[0]`: If set, this key can only be used by the Kernel. +/// - `key_params[1]`: If set, this key can not be used to encrypt. +/// - `key_params[2]`: If set, this key can not be used to decrypt. +/// - `key_params[31:3]`: Reserved for future use, must be set to `0`. +/// +/// Note that these restrictions need hardware support, and the supported restrictions can be found by +/// calling `__cpuid(0x19)` and checking the `EAX[0:2]` bits. Failing to follow these restrictions may +/// result in a General Protection Exception. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_encodekey128_u32) +#[inline] +#[target_feature(enable = "kl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(encodekey128))] +pub unsafe fn _mm_encodekey128_u32(key_params: u32, key: __m128i, handle: *mut u8) -> u32 { + let EncodeKey128Output(control, key0, key1, key2, _, _, _) = encodekey128(key_params, key); + ptr::write_unaligned(handle.cast(), [key0, key1, key2]); + control +} + +/// Wrap a 256-bit AES key into a 512-bit key handle and stores it in `handle`. Returns the `control` +/// parameter used to create the IWKey. +/// +/// - `key_params[0]`: If set, this key can only be used by the Kernel. +/// - `key_params[1]`: If set, this key can not be used to encrypt. +/// - `key_params[2]`: If set, this key can not be used to decrypt. +/// - `key_params[31:3]`: Reserved for future use, must be set to `0`. +/// +/// Note that these restrictions need hardware support, and the supported restrictions can be found by +/// calling `__cpuid(0x19)` and checking the `EAX[0:2]` bits. Failing to follow these restrictions may +/// result in a General Protection Exception. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_encodekey256_u32) +#[inline] +#[target_feature(enable = "kl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(encodekey256))] +pub unsafe fn _mm_encodekey256_u32( + key_params: u32, + key_lo: __m128i, + key_hi: __m128i, + handle: *mut u8, +) -> u32 { + let EncodeKey256Output(control, key0, key1, key2, key3, _, _, _) = + encodekey256(key_params, key_lo, key_hi); + ptr::write_unaligned(handle.cast(), [key0, key1, key2, key3]); + control +} + +/// Encrypt 10 rounds of unsigned 8-bit integers in `input` using 128-bit AES key specified in the +/// 384-bit key handle `handle`. Store the resulting unsigned 8-bit integers into the corresponding +/// elements of `output`. Returns `0` if the operation was successful, and `1` if the operation failed +/// due to a handle violation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc128kl_u8) +#[inline] +#[target_feature(enable = "kl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(aesenc128kl))] +pub unsafe fn _mm_aesenc128kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 { + let AesOutput(status, result) = aesenc128kl(input, handle); + *output = result; + status +} + +/// Decrypt 10 rounds of unsigned 8-bit integers in `input` using 128-bit AES key specified in the +/// 384-bit key handle `handle`. Store the resulting unsigned 8-bit integers into the corresponding +/// elements of `output`. Returns `0` if the operation was successful, and `1` if the operation failed +/// due to a handle violation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec128kl_u8) +#[inline] +#[target_feature(enable = "kl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(aesdec128kl))] +pub unsafe fn _mm_aesdec128kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 { + let AesOutput(status, result) = aesdec128kl(input, handle); + *output = result; + status +} + +/// Encrypt 14 rounds of unsigned 8-bit integers in `input` using 256-bit AES key specified in the +/// 512-bit key handle `handle`. Store the resulting unsigned 8-bit integers into the corresponding +/// elements of `output`. Returns `0` if the operation was successful, and `1` if the operation failed +/// due to a handle violation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc256kl_u8) +#[inline] +#[target_feature(enable = "kl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(aesenc256kl))] +pub unsafe fn _mm_aesenc256kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 { + let AesOutput(status, result) = aesenc256kl(input, handle); + *output = result; + status +} + +/// Decrypt 14 rounds of unsigned 8-bit integers in `input` using 256-bit AES key specified in the +/// 512-bit key handle `handle`. Store the resulting unsigned 8-bit integers into the corresponding +/// elements of `output`. Returns `0` if the operation was successful, and `1` if the operation failed +/// due to a handle violation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec256kl_u8) +#[inline] +#[target_feature(enable = "kl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(aesdec256kl))] +pub unsafe fn _mm_aesdec256kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 { + let AesOutput(status, result) = aesdec256kl(input, handle); + *output = result; + status +} + +/// Encrypt 10 rounds of 8 groups of unsigned 8-bit integers in `input` using 128-bit AES key specified +/// in the 384-bit key handle `handle`. Store the resulting unsigned 8-bit integers into the corresponding +/// elements of `output`. Returns `0` if the operation was successful, and `1` if the operation failed +/// due to a handle violation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesencwide128kl_u8) +#[inline] +#[target_feature(enable = "widekl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(aesencwide128kl))] +pub unsafe fn _mm_aesencwide128kl_u8( + output: *mut __m128i, + input: *const __m128i, + handle: *const u8, +) -> u8 { + let input = &*ptr::slice_from_raw_parts(input, 8); + let WideAesOutput(status, out0, out1, out2, out3, out4, out5, out6, out7) = aesencwide128kl( + handle, input[0], input[1], input[2], input[3], input[4], input[5], input[6], input[7], + ); + *output.cast() = [out0, out1, out2, out3, out4, out5, out6, out7]; + status +} + +/// Decrypt 10 rounds of 8 groups of unsigned 8-bit integers in `input` using 128-bit AES key specified +/// in the 384-bit key handle `handle`. Store the resulting unsigned 8-bit integers into the corresponding +/// elements of `output`. Returns `0` if the operation was successful, and `1` if the operation failed +/// due to a handle violation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdecwide128kl_u8) +#[inline] +#[target_feature(enable = "widekl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(aesdecwide128kl))] +pub unsafe fn _mm_aesdecwide128kl_u8( + output: *mut __m128i, + input: *const __m128i, + handle: *const u8, +) -> u8 { + let input = &*ptr::slice_from_raw_parts(input, 8); + let WideAesOutput(status, out0, out1, out2, out3, out4, out5, out6, out7) = aesdecwide128kl( + handle, input[0], input[1], input[2], input[3], input[4], input[5], input[6], input[7], + ); + *output.cast() = [out0, out1, out2, out3, out4, out5, out6, out7]; + status +} + +/// Encrypt 14 rounds of 8 groups of unsigned 8-bit integers in `input` using 256-bit AES key specified +/// in the 512-bit key handle `handle`. Store the resulting unsigned 8-bit integers into the corresponding +/// elements of `output`. Returns `0` if the operation was successful, and `1` if the operation failed +/// due to a handle violation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesencwide256kl_u8) +#[inline] +#[target_feature(enable = "widekl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(aesencwide256kl))] +pub unsafe fn _mm_aesencwide256kl_u8( + output: *mut __m128i, + input: *const __m128i, + handle: *const u8, +) -> u8 { + let input = &*ptr::slice_from_raw_parts(input, 8); + let WideAesOutput(status, out0, out1, out2, out3, out4, out5, out6, out7) = aesencwide256kl( + handle, input[0], input[1], input[2], input[3], input[4], input[5], input[6], input[7], + ); + *output.cast() = [out0, out1, out2, out3, out4, out5, out6, out7]; + status +} + +/// Decrypt 14 rounds of 8 groups of unsigned 8-bit integers in `input` using 256-bit AES key specified +/// in the 512-bit key handle `handle`. Store the resulting unsigned 8-bit integers into the corresponding +/// elements of `output`. Returns `0` if the operation was successful, and `1` if the operation failed +/// due to a handle violation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdecwide256kl_u8) +#[inline] +#[target_feature(enable = "widekl")] +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +#[cfg_attr(test, assert_instr(aesdecwide256kl))] +pub unsafe fn _mm_aesdecwide256kl_u8( + output: *mut __m128i, + input: *const __m128i, + handle: *const u8, +) -> u8 { + let input = &*ptr::slice_from_raw_parts(input, 8); + let WideAesOutput(status, out0, out1, out2, out3, out4, out5, out6, out7) = aesdecwide256kl( + handle, input[0], input[1], input[2], input[3], input[4], input[5], input[6], input[7], + ); + *output.cast() = [out0, out1, out2, out3, out4, out5, out6, out7]; + status +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + use stdarch_test::simd_test; + + #[target_feature(enable = "kl")] + unsafe fn encodekey128() -> [u8; 48] { + let mut handle = [0; 48]; + let _ = _mm_encodekey128_u32(0, _mm_setzero_si128(), handle.as_mut_ptr()); + handle + } + + #[target_feature(enable = "kl")] + unsafe fn encodekey256() -> [u8; 64] { + let mut handle = [0; 64]; + let _ = _mm_encodekey256_u32( + 0, + _mm_setzero_si128(), + _mm_setzero_si128(), + handle.as_mut_ptr(), + ); + handle + } + + #[simd_test(enable = "kl")] + unsafe fn test_mm_encodekey128_u32() { + encodekey128(); + } + + #[simd_test(enable = "kl")] + unsafe fn test_mm_encodekey256_u32() { + encodekey256(); + } + + #[simd_test(enable = "kl")] + unsafe fn test_mm_aesenc128kl_u8() { + let mut buffer = _mm_setzero_si128(); + let key = encodekey128(); + + for _ in 0..100 { + let status = _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr()); + assert_eq!(status, 0); + } + for _ in 0..100 { + let status = _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr()); + assert_eq!(status, 0); + } + + assert_eq_m128i(buffer, _mm_setzero_si128()); + } + + #[simd_test(enable = "kl")] + unsafe fn test_mm_aesdec128kl_u8() { + let mut buffer = _mm_setzero_si128(); + let key = encodekey128(); + + for _ in 0..100 { + let status = _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr()); + assert_eq!(status, 0); + } + for _ in 0..100 { + let status = _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr()); + assert_eq!(status, 0); + } + + assert_eq_m128i(buffer, _mm_setzero_si128()); + } + + #[simd_test(enable = "kl")] + unsafe fn test_mm_aesenc256kl_u8() { + let mut buffer = _mm_setzero_si128(); + let key = encodekey256(); + + for _ in 0..100 { + let status = _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr()); + assert_eq!(status, 0); + } + for _ in 0..100 { + let status = _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr()); + assert_eq!(status, 0); + } + + assert_eq_m128i(buffer, _mm_setzero_si128()); + } + + #[simd_test(enable = "kl")] + unsafe fn test_mm_aesdec256kl_u8() { + let mut buffer = _mm_setzero_si128(); + let key = encodekey256(); + + for _ in 0..100 { + let status = _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr()); + assert_eq!(status, 0); + } + for _ in 0..100 { + let status = _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr()); + assert_eq!(status, 0); + } + + assert_eq_m128i(buffer, _mm_setzero_si128()); + } + + #[simd_test(enable = "widekl")] + unsafe fn test_mm_aesencwide128kl_u8() { + let mut buffer = [_mm_setzero_si128(); 8]; + let key = encodekey128(); + + for _ in 0..100 { + let status = _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr()); + assert_eq!(status, 0); + } + for _ in 0..100 { + let status = _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr()); + assert_eq!(status, 0); + } + + for elem in buffer { + assert_eq_m128i(elem, _mm_setzero_si128()); + } + } + + #[simd_test(enable = "widekl")] + unsafe fn test_mm_aesdecwide128kl_u8() { + let mut buffer = [_mm_setzero_si128(); 8]; + let key = encodekey128(); + + for _ in 0..100 { + let status = _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr()); + assert_eq!(status, 0); + } + for _ in 0..100 { + let status = _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr()); + assert_eq!(status, 0); + } + + for elem in buffer { + assert_eq_m128i(elem, _mm_setzero_si128()); + } + } + + #[simd_test(enable = "widekl")] + unsafe fn test_mm_aesencwide256kl_u8() { + let mut buffer = [_mm_setzero_si128(); 8]; + let key = encodekey256(); + + for _ in 0..100 { + let status = _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr()); + assert_eq!(status, 0); + } + for _ in 0..100 { + let status = _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr()); + assert_eq!(status, 0); + } + + for elem in buffer { + assert_eq_m128i(elem, _mm_setzero_si128()); + } + } + + #[simd_test(enable = "widekl")] + unsafe fn test_mm_aesdecwide256kl_u8() { + let mut buffer = [_mm_setzero_si128(); 8]; + let key = encodekey256(); + + for _ in 0..100 { + let status = _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr()); + assert_eq!(status, 0); + } + for _ in 0..100 { + let status = _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr()); + assert_eq!(status, 0); + } + + for elem in buffer { + assert_eq_m128i(elem, _mm_setzero_si128()); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/macros.rs b/library/stdarch/crates/core_arch/src/x86/macros.rs new file mode 100644 index 000000000000..9b9c24a447ec --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/macros.rs @@ -0,0 +1,98 @@ +//! Utility macros. + +// Helper macro used to trigger const eval errors when the const generic immediate value `imm` is +// not a round number. +#[allow(unused)] +macro_rules! static_assert_rounding { + ($imm:ident) => { + static_assert!( + $imm == 4 || $imm == 8 || $imm == 9 || $imm == 10 || $imm == 11, + "Invalid IMM value" + ) + }; +} + +// Helper macro used to trigger const eval errors when the const generic immediate value `imm` is +// not a sae number. +#[allow(unused)] +macro_rules! static_assert_sae { + ($imm:ident) => { + static_assert!($imm == 4 || $imm == 8, "Invalid IMM value") + }; +} + +// Helper macro used to trigger const eval errors when the const generic immediate value `imm` is +// not an extended rounding number +#[allow(unused)] +macro_rules! static_assert_extended_rounding { + ($imm: ident) => { + static_assert!(($imm & 7) < 5 && ($imm & !15) == 0, "Invalid IMM value") + }; +} + +// Helper macro used to trigger const eval errors when the const generic immediate value `imm` is +// not a mantissas sae number. +#[allow(unused)] +macro_rules! static_assert_mantissas_sae { + ($imm:ident) => { + static_assert!($imm == 4 || $imm == 8 || $imm == 12, "Invalid IMM value") + }; +} + +// Helper macro used to trigger const eval errors when the const generic immediate value `SCALE` is +// not valid for gather instructions: the only valid scale values are 1, 2, 4 and 8. +#[allow(unused)] +macro_rules! static_assert_imm8_scale { + ($imm:ident) => { + static_assert!( + $imm == 1 || $imm == 2 || $imm == 4 || $imm == 8, + "Invalid SCALE value" + ) + }; +} + +#[cfg(test)] +macro_rules! assert_approx_eq { + ($a:expr, $b:expr, $eps:expr) => {{ + let (a, b) = (&$a, &$b); + assert!( + (*a - *b).abs() < $eps, + "assertion failed: `(left !== right)` \ + (left: `{:?}`, right: `{:?}`, expect diff: `{:?}`, real diff: `{:?}`)", + *a, + *b, + $eps, + (*a - *b).abs() + ); + }}; +} + +// x86-32 wants to use a 32-bit address size, but asm! defaults to using the full +// register name (e.g. rax). We have to explicitly override the placeholder to +// use the 32-bit register name in that case. + +#[cfg(target_pointer_width = "32")] +macro_rules! vpl { + ($inst:expr) => { + concat!($inst, ", [{p:e}]") + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! vpl { + ($inst:expr) => { + concat!($inst, ", [{p}]") + }; +} + +#[cfg(target_pointer_width = "32")] +macro_rules! vps { + ($inst1:expr, $inst2:expr) => { + concat!($inst1, " [{p:e}]", $inst2) + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! vps { + ($inst1:expr, $inst2:expr) => { + concat!($inst1, " [{p}]", $inst2) + }; +} diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs new file mode 100644 index 000000000000..8897258c7dc2 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/mod.rs @@ -0,0 +1,776 @@ +//! `x86` and `x86_64` intrinsics. + +use crate::mem::transmute; + +#[macro_use] +mod macros; + +types! { + #![stable(feature = "simd_x86", since = "1.27.0")] + + /// 128-bit wide integer vector type, x86-specific + /// + /// This type is the same as the `__m128i` type defined by Intel, + /// representing a 128-bit SIMD register. Usage of this type typically + /// corresponds to the `sse` and up target features for x86/x86_64. + /// + /// Internally this type may be viewed as: + /// + /// * `i8x16` - sixteen `i8` variables packed together + /// * `i16x8` - eight `i16` variables packed together + /// * `i32x4` - four `i32` variables packed together + /// * `i64x2` - two `i64` variables packed together + /// + /// (as well as unsigned versions). Each intrinsic may interpret the + /// internal bits differently, check the documentation of the intrinsic + /// to see how it's being used. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + /// + /// Note that this means that an instance of `__m128i` typically just means + /// a "bag of bits" which is left up to interpretation at the point of use. + /// + /// Most intrinsics using `__m128i` are prefixed with `_mm_` and the + /// integer types tend to correspond to suffixes like "epi8" or "epi32". + /// + /// # Examples + /// + /// ``` + /// #[cfg(target_arch = "x86")] + /// use std::arch::x86::*; + /// #[cfg(target_arch = "x86_64")] + /// use std::arch::x86_64::*; + /// + /// # fn main() { + /// # #[target_feature(enable = "sse2")] + /// # #[allow(unused_unsafe)] // temporary, to unstick CI + /// # unsafe fn foo() { unsafe { + /// let all_bytes_zero = _mm_setzero_si128(); + /// let all_bytes_one = _mm_set1_epi8(1); + /// let four_i32 = _mm_set_epi32(1, 2, 3, 4); + /// # }} + /// # if is_x86_feature_detected!("sse2") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m128i(2 x i64); + + /// 128-bit wide set of four `f32` types, x86-specific + /// + /// This type is the same as the `__m128` type defined by Intel, + /// representing a 128-bit SIMD register which internally is consisted of + /// four packed `f32` instances. Usage of this type typically corresponds + /// to the `sse` and up target features for x86/x86_64. + /// + /// Note that unlike `__m128i`, the integer version of the 128-bit + /// registers, this `__m128` type has *one* interpretation. Each instance + /// of `__m128` always corresponds to `f32x4`, or four `f32` types packed + /// together. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + /// + /// Most intrinsics using `__m128` are prefixed with `_mm_` and are + /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with + /// "pd" which is used for `__m128d`. + /// + /// # Examples + /// + /// ``` + /// #[cfg(target_arch = "x86")] + /// use std::arch::x86::*; + /// #[cfg(target_arch = "x86_64")] + /// use std::arch::x86_64::*; + /// + /// # fn main() { + /// # #[target_feature(enable = "sse")] + /// # #[allow(unused_unsafe)] // temporary, to unstick CI + /// # unsafe fn foo() { unsafe { + /// let four_zeros = _mm_setzero_ps(); + /// let four_ones = _mm_set1_ps(1.0); + /// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + /// # }} + /// # if is_x86_feature_detected!("sse") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m128(4 x f32); + + /// 128-bit wide set of two `f64` types, x86-specific + /// + /// This type is the same as the `__m128d` type defined by Intel, + /// representing a 128-bit SIMD register which internally is consisted of + /// two packed `f64` instances. Usage of this type typically corresponds + /// to the `sse` and up target features for x86/x86_64. + /// + /// Note that unlike `__m128i`, the integer version of the 128-bit + /// registers, this `__m128d` type has *one* interpretation. Each instance + /// of `__m128d` always corresponds to `f64x2`, or two `f64` types packed + /// together. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + /// + /// Most intrinsics using `__m128d` are prefixed with `_mm_` and are + /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with + /// "ps" which is used for `__m128`. + /// + /// # Examples + /// + /// ``` + /// #[cfg(target_arch = "x86")] + /// use std::arch::x86::*; + /// #[cfg(target_arch = "x86_64")] + /// use std::arch::x86_64::*; + /// + /// # fn main() { + /// # #[target_feature(enable = "sse2")] + /// # #[allow(unused_unsafe)] // temporary, to unstick CI + /// # unsafe fn foo() { unsafe { + /// let two_zeros = _mm_setzero_pd(); + /// let two_ones = _mm_set1_pd(1.0); + /// let two_floats = _mm_set_pd(1.0, 2.0); + /// # }} + /// # if is_x86_feature_detected!("sse2") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m128d(2 x f64); + + /// 256-bit wide integer vector type, x86-specific + /// + /// This type is the same as the `__m256i` type defined by Intel, + /// representing a 256-bit SIMD register. Usage of this type typically + /// corresponds to the `avx` and up target features for x86/x86_64. + /// + /// Internally this type may be viewed as: + /// + /// * `i8x32` - thirty two `i8` variables packed together + /// * `i16x16` - sixteen `i16` variables packed together + /// * `i32x8` - eight `i32` variables packed together + /// * `i64x4` - four `i64` variables packed together + /// + /// (as well as unsigned versions). Each intrinsic may interpret the + /// internal bits differently, check the documentation of the intrinsic + /// to see how it's being used. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + /// + /// Note that this means that an instance of `__m256i` typically just means + /// a "bag of bits" which is left up to interpretation at the point of use. + /// + /// # Examples + /// + /// ``` + /// #[cfg(target_arch = "x86")] + /// use std::arch::x86::*; + /// #[cfg(target_arch = "x86_64")] + /// use std::arch::x86_64::*; + /// + /// # fn main() { + /// # #[target_feature(enable = "avx")] + /// # #[allow(unused_unsafe)] // temporary, to unstick CI + /// # unsafe fn foo() { unsafe { + /// let all_bytes_zero = _mm256_setzero_si256(); + /// let all_bytes_one = _mm256_set1_epi8(1); + /// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + /// # }} + /// # if is_x86_feature_detected!("avx") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m256i(4 x i64); + + /// 256-bit wide set of eight `f32` types, x86-specific + /// + /// This type is the same as the `__m256` type defined by Intel, + /// representing a 256-bit SIMD register which internally is consisted of + /// eight packed `f32` instances. Usage of this type typically corresponds + /// to the `avx` and up target features for x86/x86_64. + /// + /// Note that unlike `__m256i`, the integer version of the 256-bit + /// registers, this `__m256` type has *one* interpretation. Each instance + /// of `__m256` always corresponds to `f32x8`, or eight `f32` types packed + /// together. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding between two consecutive elements); however, the + /// alignment is different and equal to the size of the type. Note that the + /// ABI for function calls may *not* be the same. + /// + /// Most intrinsics using `__m256` are prefixed with `_mm256_` and are + /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with + /// "pd" which is used for `__m256d`. + /// + /// # Examples + /// + /// ``` + /// #[cfg(target_arch = "x86")] + /// use std::arch::x86::*; + /// #[cfg(target_arch = "x86_64")] + /// use std::arch::x86_64::*; + /// + /// # fn main() { + /// # #[target_feature(enable = "avx")] + /// # #[allow(unused_unsafe)] // temporary, to unstick CI + /// # unsafe fn foo() { unsafe { + /// let eight_zeros = _mm256_setzero_ps(); + /// let eight_ones = _mm256_set1_ps(1.0); + /// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + /// # }} + /// # if is_x86_feature_detected!("avx") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m256(8 x f32); + + /// 256-bit wide set of four `f64` types, x86-specific + /// + /// This type is the same as the `__m256d` type defined by Intel, + /// representing a 256-bit SIMD register which internally is consisted of + /// four packed `f64` instances. Usage of this type typically corresponds + /// to the `avx` and up target features for x86/x86_64. + /// + /// Note that unlike `__m256i`, the integer version of the 256-bit + /// registers, this `__m256d` type has *one* interpretation. Each instance + /// of `__m256d` always corresponds to `f64x4`, or four `f64` types packed + /// together. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + /// + /// Most intrinsics using `__m256d` are prefixed with `_mm256_` and are + /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with + /// "ps" which is used for `__m256`. + /// + /// # Examples + /// + /// ``` + /// #[cfg(target_arch = "x86")] + /// use std::arch::x86::*; + /// #[cfg(target_arch = "x86_64")] + /// use std::arch::x86_64::*; + /// + /// # fn main() { + /// # #[target_feature(enable = "avx")] + /// # #[allow(unused_unsafe)] // temporary, to unstick CI + /// # unsafe fn foo() { unsafe { + /// let four_zeros = _mm256_setzero_pd(); + /// let four_ones = _mm256_set1_pd(1.0); + /// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); + /// # }} + /// # if is_x86_feature_detected!("avx") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m256d(4 x f64); +} + +types! { + #![stable(feature = "simd_avx512_types", since = "1.72.0")] + + /// 512-bit wide integer vector type, x86-specific + /// + /// This type is the same as the `__m512i` type defined by Intel, + /// representing a 512-bit SIMD register. Usage of this type typically + /// corresponds to the `avx512*` and up target features for x86/x86_64. + /// + /// Internally this type may be viewed as: + /// + /// * `i8x64` - sixty-four `i8` variables packed together + /// * `i16x32` - thirty-two `i16` variables packed together + /// * `i32x16` - sixteen `i32` variables packed together + /// * `i64x8` - eight `i64` variables packed together + /// + /// (as well as unsigned versions). Each intrinsic may interpret the + /// internal bits differently, check the documentation of the intrinsic + /// to see how it's being used. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + /// + /// Note that this means that an instance of `__m512i` typically just means + /// a "bag of bits" which is left up to interpretation at the point of use. + pub struct __m512i(8 x i64); + + /// 512-bit wide set of sixteen `f32` types, x86-specific + /// + /// This type is the same as the `__m512` type defined by Intel, + /// representing a 512-bit SIMD register which internally is consisted of + /// eight packed `f32` instances. Usage of this type typically corresponds + /// to the `avx512*` and up target features for x86/x86_64. + /// + /// Note that unlike `__m512i`, the integer version of the 512-bit + /// registers, this `__m512` type has *one* interpretation. Each instance + /// of `__m512` always corresponds to `f32x16`, or sixteen `f32` types + /// packed together. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding between two consecutive elements); however, the + /// alignment is different and equal to the size of the type. Note that the + /// ABI for function calls may *not* be the same. + /// + /// Most intrinsics using `__m512` are prefixed with `_mm512_` and are + /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with + /// "pd" which is used for `__m512d`. + pub struct __m512(16 x f32); + + /// 512-bit wide set of eight `f64` types, x86-specific + /// + /// This type is the same as the `__m512d` type defined by Intel, + /// representing a 512-bit SIMD register which internally is consisted of + /// eight packed `f64` instances. Usage of this type typically corresponds + /// to the `avx` and up target features for x86/x86_64. + /// + /// Note that unlike `__m512i`, the integer version of the 512-bit + /// registers, this `__m512d` type has *one* interpretation. Each instance + /// of `__m512d` always corresponds to `f64x8`, or eight `f64` types packed + /// together. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding between two consecutive elements); however, the + /// alignment is different and equal to the size of the type. Note that the + /// ABI for function calls may *not* be the same. + /// + /// Most intrinsics using `__m512d` are prefixed with `_mm512_` and are + /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with + /// "ps" which is used for `__m512`. + pub struct __m512d(8 x f64); +} + +types! { + #![stable(feature = "stdarch_x86_avx512", since = "1.89")] + + /// 128-bit wide set of eight `u16` types, x86-specific + /// + /// This type is representing a 128-bit SIMD register which internally is consisted of + /// eight packed `u16` instances. Its purpose is for bf16 related intrinsic + /// implementations. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + pub struct __m128bh(8 x u16); + + /// 256-bit wide set of 16 `u16` types, x86-specific + /// + /// This type is the same as the `__m256bh` type defined by Intel, + /// representing a 256-bit SIMD register which internally is consisted of + /// 16 packed `u16` instances. Its purpose is for bf16 related intrinsic + /// implementations. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + pub struct __m256bh(16 x u16); + + /// 512-bit wide set of 32 `u16` types, x86-specific + /// + /// This type is the same as the `__m512bh` type defined by Intel, + /// representing a 512-bit SIMD register which internally is consisted of + /// 32 packed `u16` instances. Its purpose is for bf16 related intrinsic + /// implementations. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + pub struct __m512bh(32 x u16); +} + +types! { + #![unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] + + /// 128-bit wide set of 8 `f16` types, x86-specific + /// + /// This type is the same as the `__m128h` type defined by Intel, + /// representing a 128-bit SIMD register which internally is consisted of + /// 8 packed `f16` instances. its purpose is for f16 related intrinsic + /// implementations. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + pub struct __m128h(8 x f16); + + /// 256-bit wide set of 16 `f16` types, x86-specific + /// + /// This type is the same as the `__m256h` type defined by Intel, + /// representing a 256-bit SIMD register which internally is consisted of + /// 16 packed `f16` instances. its purpose is for f16 related intrinsic + /// implementations. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + pub struct __m256h(16 x f16); + + /// 512-bit wide set of 32 `f16` types, x86-specific + /// + /// This type is the same as the `__m512h` type defined by Intel, + /// representing a 512-bit SIMD register which internally is consisted of + /// 32 packed `f16` instances. its purpose is for f16 related intrinsic + /// implementations. + /// + /// The in-memory representation of this type is the same as the one of an + /// equivalent array (i.e. the in-memory order of elements is the same, and + /// there is no padding); however, the alignment is different and equal to + /// the size of the type. Note that the ABI for function calls may *not* be + /// the same. + pub struct __m512h(32 x f16); +} + +/// The BFloat16 type used in AVX-512 intrinsics. +#[repr(transparent)] +#[derive(Copy, Clone, Debug)] +#[allow(non_camel_case_types)] +#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] +pub struct bf16(u16); + +impl bf16 { + /// Raw transmutation from `u16` + #[inline] + #[must_use] + #[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] + pub const fn from_bits(bits: u16) -> bf16 { + bf16(bits) + } + + /// Raw transmutation to `u16` + #[inline] + #[must_use = "this returns the result of the operation, without modifying the original"] + #[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] + pub const fn to_bits(self) -> u16 { + self.0 + } +} + +/// The `__mmask64` type used in AVX-512 intrinsics, a 64-bit integer +#[allow(non_camel_case_types)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub type __mmask64 = u64; + +/// The `__mmask32` type used in AVX-512 intrinsics, a 32-bit integer +#[allow(non_camel_case_types)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub type __mmask32 = u32; + +/// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer +#[allow(non_camel_case_types)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub type __mmask16 = u16; + +/// The `__mmask8` type used in AVX-512 intrinsics, a 8-bit integer +#[allow(non_camel_case_types)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub type __mmask8 = u8; + +/// The `_MM_CMPINT_ENUM` type used to specify comparison operations in AVX-512 intrinsics. +#[allow(non_camel_case_types)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub type _MM_CMPINT_ENUM = i32; + +/// The `MM_MANTISSA_NORM_ENUM` type used to specify mantissa normalized operations in AVX-512 intrinsics. +#[allow(non_camel_case_types)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub type _MM_MANTISSA_NORM_ENUM = i32; + +/// The `MM_MANTISSA_SIGN_ENUM` type used to specify mantissa signed operations in AVX-512 intrinsics. +#[allow(non_camel_case_types)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub type _MM_MANTISSA_SIGN_ENUM = i32; + +/// The `MM_PERM_ENUM` type used to specify shuffle operations in AVX-512 intrinsics. +#[allow(non_camel_case_types)] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub type _MM_PERM_ENUM = i32; + +#[cfg(test)] +mod test; +#[cfg(test)] +pub use self::test::*; + +macro_rules! as_transmute { + ($from:ty => $as_from:ident, $($as_to:ident -> $to:ident),* $(,)?) => { + impl $from {$( + #[inline] + pub(crate) fn $as_to(self) -> crate::core_arch::simd::$to { + unsafe { transmute(self) } + } + )*} + $( + impl crate::core_arch::simd::$to { + #[inline] + pub(crate) fn $as_from(self) -> $from { + unsafe { transmute(self) } + } + } + )* + }; +} + +as_transmute!(__m128i => + as_m128i, + as_u8x16 -> u8x16, + as_u16x8 -> u16x8, + as_u32x4 -> u32x4, + as_u64x2 -> u64x2, + as_i8x16 -> i8x16, + as_i16x8 -> i16x8, + as_i32x4 -> i32x4, + as_i64x2 -> i64x2, +); +as_transmute!(__m256i => + as_m256i, + as_u8x32 -> u8x32, + as_u16x16 -> u16x16, + as_u32x8 -> u32x8, + as_u64x4 -> u64x4, + as_i8x32 -> i8x32, + as_i16x16 -> i16x16, + as_i32x8 -> i32x8, + as_i64x4 -> i64x4, +); +as_transmute!(__m512i => + as_m512i, + as_u8x64 -> u8x64, + as_u16x32 -> u16x32, + as_u32x16 -> u32x16, + as_u64x8 -> u64x8, + as_i8x64 -> i8x64, + as_i16x32 -> i16x32, + as_i32x16 -> i32x16, + as_i64x8 -> i64x8, +); + +as_transmute!(__m128 => as_m128, as_f32x4 -> f32x4); +as_transmute!(__m128d => as_m128d, as_f64x2 -> f64x2); +as_transmute!(__m256 => as_m256, as_f32x8 -> f32x8); +as_transmute!(__m256d => as_m256d, as_f64x4 -> f64x4); +as_transmute!(__m512 => as_m512, as_f32x16 -> f32x16); +as_transmute!(__m512d => as_m512d, as_f64x8 -> f64x8); + +as_transmute!(__m128bh => + as_m128bh, + as_u16x8 -> u16x8, + as_u32x4 -> u32x4, + as_i16x8 -> i16x8, + as_i32x4 -> i32x4, +); +as_transmute!(__m256bh => + as_m256bh, + as_u16x16 -> u16x16, + as_u32x8 -> u32x8, + as_i16x16 -> i16x16, + as_i32x8 -> i32x8, +); +as_transmute!(__m512bh => + as_m512bh, + as_u16x32 -> u16x32, + as_u32x16 -> u32x16, + as_i16x32 -> i16x32, + as_i32x16 -> i32x16, +); + +as_transmute!(__m128h => as_m128h, as_f16x8 -> f16x8); +as_transmute!(__m256h => as_m256h, as_f16x16 -> f16x16); +as_transmute!(__m512h => as_m512h, as_f16x32 -> f16x32); + +mod eflags; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::eflags::*; + +mod fxsr; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::fxsr::*; + +mod bswap; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::bswap::*; + +mod rdtsc; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::rdtsc::*; + +mod cpuid; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::cpuid::*; +mod xsave; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::xsave::*; + +mod sse; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse::*; +mod sse2; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse2::*; +mod sse3; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse3::*; +mod ssse3; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::ssse3::*; +mod sse41; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse41::*; +mod sse42; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse42::*; +mod avx; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::avx::*; +mod avx2; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::avx2::*; +mod fma; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::fma::*; + +mod abm; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::abm::*; +mod bmi1; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::bmi1::*; + +mod bmi2; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::bmi2::*; + +mod sse4a; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse4a::*; + +mod tbm; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::tbm::*; + +mod pclmulqdq; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::pclmulqdq::*; + +mod aes; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::aes::*; + +mod rdrand; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::rdrand::*; + +mod sha; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sha::*; + +mod adx; +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub use self::adx::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +mod avx512f; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512f::*; + +mod avx512bw; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512bw::*; + +mod avx512cd; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512cd::*; + +mod avx512dq; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512dq::*; + +mod avx512ifma; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512ifma::*; + +mod avx512vbmi; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512vbmi::*; + +mod avx512vbmi2; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512vbmi2::*; + +mod avx512vnni; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512vnni::*; + +mod avx512bitalg; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512bitalg::*; + +mod gfni; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::gfni::*; + +mod avx512vpopcntdq; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512vpopcntdq::*; + +mod vaes; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::vaes::*; + +mod vpclmulqdq; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::vpclmulqdq::*; + +mod bt; +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub use self::bt::*; + +mod rtm; +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub use self::rtm::*; + +mod f16c; +#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] +pub use self::f16c::*; + +mod avx512bf16; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512bf16::*; + +mod avxneconvert; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avxneconvert::*; + +mod avx512fp16; +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub use self::avx512fp16::*; + +mod kl; +#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] +pub use self::kl::*; diff --git a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs new file mode 100644 index 000000000000..cce6a51e2cd6 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs @@ -0,0 +1,66 @@ +//! Carry-less Multiplication (CLMUL) +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241). +//! +//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +use crate::core_arch::x86::__m128i; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.pclmulqdq"] + fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i; +} + +/// Performs a carry-less multiplication of two 64-bit polynomials over the +/// finite field GF(2). +/// +/// The immediate byte is used for determining which halves of `a` and `b` +/// should be used. Immediate bits other than 0 and 4 are ignored. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128) +#[inline] +#[target_feature(enable = "pclmulqdq")] +#[cfg_attr(test, assert_instr(pclmul, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_clmulepi64_si128(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pclmulqdq(a, b, IMM8 as u8) } +} + +#[cfg(test)] +mod tests { + // The constants in the tests below are just bit patterns. They should not + // be interpreted as integers; signedness does not make sense for them, but + // __m128i happens to be defined in terms of signed integers. + #![allow(overflowing_literals)] + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "pclmulqdq")] + unsafe fn test_mm_clmulepi64_si128() { + // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf + let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d); + let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d); + let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451); + let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315); + let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9); + let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed); + + assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a, b), r00); + assert_eq_m128i(_mm_clmulepi64_si128::<0x10>(a, b), r01); + assert_eq_m128i(_mm_clmulepi64_si128::<0x01>(a, b), r10); + assert_eq_m128i(_mm_clmulepi64_si128::<0x11>(a, b), r11); + + let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000); + let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000); + assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a0, a0), r); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs new file mode 100644 index 000000000000..50097915213b --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs @@ -0,0 +1,75 @@ +//! RDRAND and RDSEED instructions for returning random numbers from an Intel +//! on-chip hardware random number generator which has been seeded by an +//! on-chip entropy source. +#![allow(clippy::module_name_repetitions)] + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.x86.rdrand.16"] + fn x86_rdrand16_step() -> (u16, i32); + #[link_name = "llvm.x86.rdrand.32"] + fn x86_rdrand32_step() -> (u32, i32); + #[link_name = "llvm.x86.rdseed.16"] + fn x86_rdseed16_step() -> (u16, i32); + #[link_name = "llvm.x86.rdseed.32"] + fn x86_rdseed32_step() -> (u32, i32); +} + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Read a hardware generated 16-bit random value and store the result in val. +/// Returns 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdrand16_step) +#[inline] +#[target_feature(enable = "rdrand")] +#[cfg_attr(test, assert_instr(rdrand))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 { + let (v, flag) = x86_rdrand16_step(); + *val = v; + flag +} + +/// Read a hardware generated 32-bit random value and store the result in val. +/// Returns 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdrand32_step) +#[inline] +#[target_feature(enable = "rdrand")] +#[cfg_attr(test, assert_instr(rdrand))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 { + let (v, flag) = x86_rdrand32_step(); + *val = v; + flag +} + +/// Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store +/// in val. Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdseed16_step) +#[inline] +#[target_feature(enable = "rdseed")] +#[cfg_attr(test, assert_instr(rdseed))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 { + let (v, flag) = x86_rdseed16_step(); + *val = v; + flag +} + +/// Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store +/// in val. Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdseed32_step) +#[inline] +#[target_feature(enable = "rdseed")] +#[cfg_attr(test, assert_instr(rdseed))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _rdseed32_step(val: &mut u32) -> i32 { + let (v, flag) = x86_rdseed32_step(); + *val = v; + flag +} diff --git a/library/stdarch/crates/core_arch/src/x86/rdtsc.rs b/library/stdarch/crates/core_arch/src/x86/rdtsc.rs new file mode 100644 index 000000000000..3b348153d602 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/rdtsc.rs @@ -0,0 +1,79 @@ +//! RDTSC instructions. + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Reads the current value of the processor’s time-stamp counter. +/// +/// The processor monotonically increments the time-stamp counter MSR +/// every clock cycle and resets it to 0 whenever the processor is +/// reset. +/// +/// The RDTSC instruction is not a serializing instruction. It does +/// not necessarily wait until all previous instructions have been +/// executed before reading the counter. Similarly, subsequent +/// instructions may begin execution before the read operation is +/// performed. +/// +/// On processors that support the Intel 64 architecture, the +/// high-order 32 bits of each of RAX and RDX are cleared. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdtsc) +#[inline] +#[cfg_attr(test, assert_instr(rdtsc))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _rdtsc() -> u64 { + rdtsc() +} + +/// Reads the current value of the processor’s time-stamp counter and +/// the `IA32_TSC_AUX MSR`. +/// +/// The processor monotonically increments the time-stamp counter MSR +/// every clock cycle and resets it to 0 whenever the processor is +/// reset. +/// +/// The RDTSCP instruction waits until all previous instructions have +/// been executed before reading the counter. However, subsequent +/// instructions may begin execution before the read operation is +/// performed. +/// +/// On processors that support the Intel 64 architecture, the +/// high-order 32 bits of each of RAX, RDX, and RCX are cleared. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=__rdtscp) +#[inline] +#[cfg_attr(test, assert_instr(rdtscp))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn __rdtscp(aux: *mut u32) -> u64 { + let (tsc, auxval) = rdtscp(); + *aux = auxval; + tsc +} + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.x86.rdtsc"] + fn rdtsc() -> u64; + #[link_name = "llvm.x86.rdtscp"] + fn rdtscp() -> (u64, u32); +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "sse2")] + unsafe fn test_rdtsc() { + let r = _rdtsc(); + assert_ne!(r, 0); // The chances of this being 0 are infinitesimal + } + + #[simd_test(enable = "sse2")] + unsafe fn test_rdtscp() { + let mut aux = 0; + let r = __rdtscp(&mut aux); + assert_ne!(r, 0); // The chances of this being 0 are infinitesimal + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/rtm.rs b/library/stdarch/crates/core_arch/src/x86/rtm.rs new file mode 100644 index 000000000000..b807305d6aa8 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/rtm.rs @@ -0,0 +1,174 @@ +//! Intel's Restricted Transactional Memory (RTM). +//! +//! This CPU feature is available on Intel Broadwell or later CPUs (and some Haswell). +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [Wikipedia][wikipedia_rtm] provides a quick overview of the assembly instructions, and +//! Intel's [programming considerations][intel_consid] details what sorts of instructions within a +//! transaction are likely to cause an abort. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [wikipedia_rtm]: https://en.wikipedia.org/wiki/Transactional_Synchronization_Extensions#Restricted_Transactional_Memory +//! [intel_consid]: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-intel-transactional-synchronization-extensions-intel-tsx-programming-considerations + +#[cfg(test)] +use stdarch_test::assert_instr; + +unsafe extern "C" { + #[link_name = "llvm.x86.xbegin"] + fn x86_xbegin() -> i32; + #[link_name = "llvm.x86.xend"] + fn x86_xend(); + #[link_name = "llvm.x86.xabort"] + fn x86_xabort(imm8: i8); + #[link_name = "llvm.x86.xtest"] + fn x86_xtest() -> i32; +} + +/// Transaction successfully started. +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub const _XBEGIN_STARTED: u32 = !0; + +/// Transaction explicitly aborted with xabort. The parameter passed to xabort is available with +/// `_xabort_code(status)`. +#[allow(clippy::identity_op)] +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub const _XABORT_EXPLICIT: u32 = 1 << 0; + +/// Transaction retry is possible. +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub const _XABORT_RETRY: u32 = 1 << 1; + +/// Transaction abort due to a memory conflict with another thread. +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub const _XABORT_CONFLICT: u32 = 1 << 2; + +/// Transaction abort due to the transaction using too much memory. +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub const _XABORT_CAPACITY: u32 = 1 << 3; + +/// Transaction abort due to a debug trap. +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub const _XABORT_DEBUG: u32 = 1 << 4; + +/// Transaction abort in a inner nested transaction. +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub const _XABORT_NESTED: u32 = 1 << 5; + +/// Specifies the start of a restricted transactional memory (RTM) code region and returns a value +/// indicating status. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xbegin) +#[inline] +#[target_feature(enable = "rtm")] +#[cfg_attr(test, assert_instr(xbegin))] +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub unsafe fn _xbegin() -> u32 { + x86_xbegin() as _ +} + +/// Specifies the end of a restricted transactional memory (RTM) code region. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xend) +#[inline] +#[target_feature(enable = "rtm")] +#[cfg_attr(test, assert_instr(xend))] +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub unsafe fn _xend() { + x86_xend() +} + +/// Forces a restricted transactional memory (RTM) region to abort. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xabort) +#[inline] +#[target_feature(enable = "rtm")] +#[cfg_attr(test, assert_instr(xabort, IMM8 = 0x0))] +#[rustc_legacy_const_generics(0)] +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub unsafe fn _xabort() { + static_assert_uimm_bits!(IMM8, 8); + x86_xabort(IMM8 as i8) +} + +/// Queries whether the processor is executing in a transactional region identified by restricted +/// transactional memory (RTM) or hardware lock elision (HLE). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xtest) +#[inline] +#[target_feature(enable = "rtm")] +#[cfg_attr(test, assert_instr(xtest))] +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub unsafe fn _xtest() -> u8 { + x86_xtest() as _ +} + +/// Retrieves the parameter passed to [`_xabort`] when [`_xbegin`]'s status has the +/// `_XABORT_EXPLICIT` flag set. +#[inline] +#[unstable(feature = "stdarch_x86_rtm", issue = "111138")] +pub const fn _xabort_code(status: u32) -> u32 { + (status >> 24) & 0xFF +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "rtm")] + unsafe fn test_xbegin() { + let mut x = 0; + for _ in 0..10 { + let code = _xbegin(); + if code == _XBEGIN_STARTED { + x += 1; + _xend(); + assert_eq!(x, 1); + break; + } + assert_eq!(x, 0); + } + } + + #[simd_test(enable = "rtm")] + unsafe fn test_xabort() { + const ABORT_CODE: u32 = 42; + // aborting outside a transactional region does nothing + _xabort::(); + + for _ in 0..10 { + let mut x = 0; + let code = rtm::_xbegin(); + if code == _XBEGIN_STARTED { + x += 1; + rtm::_xabort::(); + } else if code & _XABORT_EXPLICIT != 0 { + let test_abort_code = rtm::_xabort_code(code); + assert_eq!(test_abort_code, ABORT_CODE); + } + assert_eq!(x, 0); + } + } + + #[simd_test(enable = "rtm")] + unsafe fn test_xtest() { + assert_eq!(_xtest(), 0); + + for _ in 0..10 { + let code = rtm::_xbegin(); + if code == _XBEGIN_STARTED { + let in_tx = _xtest(); + rtm::_xend(); + + // putting the assert inside the transaction would abort the transaction on fail + // without any output/panic/etc + assert_eq!(in_tx, 1); + break; + } + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/sha.rs b/library/stdarch/crates/core_arch/src/x86/sha.rs new file mode 100644 index 000000000000..da568c449a6b --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/sha.rs @@ -0,0 +1,732 @@ +use crate::core_arch::{simd::*, x86::*}; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.sha1msg1"] + fn sha1msg1(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha1msg2"] + fn sha1msg2(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha1nexte"] + fn sha1nexte(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha1rnds4"] + fn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4; + #[link_name = "llvm.x86.sha256msg1"] + fn sha256msg1(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha256msg2"] + fn sha256msg2(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha256rnds2"] + fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4; + #[link_name = "llvm.x86.vsha512msg1"] + fn vsha512msg1(a: i64x4, b: i64x2) -> i64x4; + #[link_name = "llvm.x86.vsha512msg2"] + fn vsha512msg2(a: i64x4, b: i64x4) -> i64x4; + #[link_name = "llvm.x86.vsha512rnds2"] + fn vsha512rnds2(a: i64x4, b: i64x4, k: i64x2) -> i64x4; + #[link_name = "llvm.x86.vsm3msg1"] + fn vsm3msg1(a: i32x4, b: i32x4, c: i32x4) -> i32x4; + #[link_name = "llvm.x86.vsm3msg2"] + fn vsm3msg2(a: i32x4, b: i32x4, c: i32x4) -> i32x4; + #[link_name = "llvm.x86.vsm3rnds2"] + fn vsm3rnds2(a: i32x4, b: i32x4, c: i32x4, d: i32) -> i32x4; + #[link_name = "llvm.x86.vsm4key4128"] + fn vsm4key4128(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.vsm4key4256"] + fn vsm4key4256(a: i32x8, b: i32x8) -> i32x8; + #[link_name = "llvm.x86.vsm4rnds4128"] + fn vsm4rnds4128(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.vsm4rnds4256"] + fn vsm4rnds4256(a: i32x8, b: i32x8) -> i32x8; +} + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Performs an intermediate calculation for the next four SHA1 message values +/// (unsigned 32-bit integers) using previous message values from `a` and `b`, +/// and returning the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32) +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha1msg1))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) } +} + +/// Performs the final calculation for the next four SHA1 message values +/// (unsigned 32-bit integers) using the intermediate result in `a` and the +/// previous message values in `b`, and returns the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32) +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha1msg2))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha1msg2(a.as_i32x4(), b.as_i32x4())) } +} + +/// Calculate SHA1 state variable E after four rounds of operation from the +/// current SHA1 state variable `a`, add that value to the scheduled values +/// (unsigned 32-bit integers) in `b`, and returns the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32) +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha1nexte))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) } +} + +/// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) +/// from `a` and some pre-computed sum of the next 4 round message values +/// (unsigned 32-bit integers), and state variable E from `b`, and return the +/// updated SHA1 state (A,B,C,D). `FUNC` contains the logic functions and round +/// constants. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32) +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha1rnds4, FUNC = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(FUNC, 2); + unsafe { transmute(sha1rnds4(a.as_i32x4(), b.as_i32x4(), FUNC as i8)) } +} + +/// Performs an intermediate calculation for the next four SHA256 message values +/// (unsigned 32-bit integers) using previous message values from `a` and `b`, +/// and return the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32) +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha256msg1))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) } +} + +/// Performs the final calculation for the next four SHA256 message values +/// (unsigned 32-bit integers) using previous message values from `a` and `b`, +/// and return the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32) +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha256msg2))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) } +} + +/// Performs 2 rounds of SHA256 operation using an initial SHA256 state +/// (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a +/// pre-computed sum of the next 2 round message values (unsigned 32-bit +/// integers) and the corresponding round constants from `k`, and store the +/// updated SHA256 state (A,B,E,F) in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32) +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha256rnds2))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i { + unsafe { transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) } +} + +/// This intrinsic is one of the two SHA512 message scheduling instructions. +/// The intrinsic performs an intermediate calculation for the next four SHA512 +/// message qwords. The calculated results are stored in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg1_epi64) +#[inline] +#[target_feature(enable = "sha512,avx")] +#[cfg_attr(test, assert_instr(vsha512msg1))] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i { + unsafe { transmute(vsha512msg1(a.as_i64x4(), b.as_i64x2())) } +} + +/// This intrinsic is one of the two SHA512 message scheduling instructions. +/// The intrinsic performs the final calculation for the next four SHA512 message +/// qwords. The calculated results are stored in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg2_epi64) +#[inline] +#[target_feature(enable = "sha512,avx")] +#[cfg_attr(test, assert_instr(vsha512msg2))] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vsha512msg2(a.as_i64x4(), b.as_i64x4())) } +} + +/// This intrinsic performs two rounds of SHA512 operation using initial SHA512 state +/// `(C,D,G,H)` from `a`, an initial SHA512 state `(A,B,E,F)` from `b`, and a +/// pre-computed sum of the next two round message qwords and the corresponding +/// round constants from `c` (only the two lower qwords of the third operand). The +/// updated SHA512 state `(A,B,E,F)` is written to dst, and dst can be used as the +/// updated state `(C,D,G,H)` in later rounds. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512rnds2_epi64) +#[inline] +#[target_feature(enable = "sha512,avx")] +#[cfg_attr(test, assert_instr(vsha512rnds2))] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i { + unsafe { transmute(vsha512rnds2(a.as_i64x4(), b.as_i64x4(), k.as_i64x2())) } +} + +/// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs +/// an initial calculation for the next four SM3 message words. The calculated results +/// are stored in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg1_epi32) +#[inline] +#[target_feature(enable = "sm3,avx")] +#[cfg_attr(test, assert_instr(vsm3msg1))] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vsm3msg1(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } +} + +/// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs +/// the final calculation for the next four SM3 message words. The calculated results +/// are stored in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg2_epi32) +#[inline] +#[target_feature(enable = "sm3,avx")] +#[cfg_attr(test, assert_instr(vsm3msg2))] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vsm3msg2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } +} + +/// The intrinsic performs two rounds of SM3 operation using initial SM3 state `(C, D, G, H)` +/// from `a`, an initial SM3 states `(A, B, E, F)` from `b` and a pre-computed words from the +/// `c`. `a` with initial SM3 state of `(C, D, G, H)` assumes input of non-rotated left variables +/// from previous state. The updated SM3 state `(A, B, E, F)` is written to `a`. The `imm8` +/// should contain the even round number for the first of the two rounds computed by this instruction. +/// The computation masks the `imm8` value by ANDing it with `0x3E` so that only even round numbers +/// from 0 through 62 are used for this operation. The calculated results are stored in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3rnds2_epi32) +#[inline] +#[target_feature(enable = "sm3,avx")] +#[cfg_attr(test, assert_instr(vsm3rnds2, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm_sm3rnds2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + static_assert!( + IMM8 == (IMM8 & 0x3e), + "IMM8 must be an even number in the range `0..=62`" + ); + unsafe { transmute(vsm3rnds2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4(), IMM8)) } +} + +/// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent +/// 128-bit lanes. The calculated results are stored in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4key4_epi32) +#[inline] +#[target_feature(enable = "sm4,avx")] +#[cfg_attr(test, assert_instr(vsm4key4))] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vsm4key4128(a.as_i32x4(), b.as_i32x4())) } +} + +/// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent +/// 128-bit lanes. The calculated results are stored in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4key4_epi32) +#[inline] +#[target_feature(enable = "sm4,avx")] +#[cfg_attr(test, assert_instr(vsm4key4))] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vsm4key4256(a.as_i32x8(), b.as_i32x8())) } +} + +/// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent +/// 128-bit lanes. The calculated results are stored in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4rnds4_epi32) +#[inline] +#[target_feature(enable = "sm4,avx")] +#[cfg_attr(test, assert_instr(vsm4rnds4))] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vsm4rnds4128(a.as_i32x4(), b.as_i32x4())) } +} + +/// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent +/// 128-bit lanes. The calculated results are stored in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4rnds4_epi32) +#[inline] +#[target_feature(enable = "sm4,avx")] +#[cfg_attr(test, assert_instr(vsm4rnds4))] +#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] +pub fn _mm256_sm4rnds4_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vsm4rnds4256(a.as_i32x8(), b.as_i32x8())) } +} + +#[cfg(test)] +mod tests { + use crate::{ + core_arch::{simd::*, x86::*}, + hint::black_box, + }; + use stdarch_test::simd_test; + + #[simd_test(enable = "sha")] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha1msg1_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c); + let r = _mm_sha1msg1_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "sha")] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha1msg2_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35); + let r = _mm_sha1msg2_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "sha")] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha1nexte_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b); + let r = _mm_sha1nexte_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "sha")] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha1rnds4_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f); + let r = _mm_sha1rnds4_epu32::<0>(a, b); + assert_eq_m128i(r, expected); + + let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe); + let r = _mm_sha1rnds4_epu32::<1>(a, b); + assert_eq_m128i(r, expected); + + let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001); + let r = _mm_sha1rnds4_epu32::<2>(a, b); + assert_eq_m128i(r, expected); + + let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b); + let r = _mm_sha1rnds4_epu32::<3>(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "sha")] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha256msg1_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee); + let r = _mm_sha256msg1_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "sha")] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha256msg2_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450); + let r = _mm_sha256msg2_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "sha")] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha256rnds2_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let k = _mm_set_epi64x(0, 0x12835b01d807aa98); + let expected = _mm_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19); + let r = _mm_sha256rnds2_epu32(a, b, k); + assert_eq_m128i(r, expected); + } + + static DATA_64: [u64; 10] = [ + 0x0011223344556677, + 0x8899aabbccddeeff, + 0xffeeddccbbaa9988, + 0x7766554433221100, + 0x0123456789abcdef, + 0xfedcba9876543210, + 0x02468ace13579bdf, + 0xfdb97531eca86420, + 0x048c159d26ae37bf, + 0xfb73ea62d951c840, + ]; + + #[simd_test(enable = "sha512,avx")] + unsafe fn test_mm256_sha512msg1_epi64() { + fn s0(word: u64) -> u64 { + word.rotate_right(1) ^ word.rotate_right(8) ^ (word >> 7) + } + + let A = &DATA_64[0..4]; + let B = &DATA_64[4..6]; + + let a = _mm256_loadu_si256(A.as_ptr().cast()); + let b = _mm_loadu_si128(B.as_ptr().cast()); + + let r = _mm256_sha512msg1_epi64(a, b); + + let e = _mm256_setr_epi64x( + A[0].wrapping_add(s0(A[1])) as _, + A[1].wrapping_add(s0(A[2])) as _, + A[2].wrapping_add(s0(A[3])) as _, + A[3].wrapping_add(s0(B[0])) as _, + ); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "sha512,avx")] + unsafe fn test_mm256_sha512msg2_epi64() { + fn s1(word: u64) -> u64 { + word.rotate_right(19) ^ word.rotate_right(61) ^ (word >> 6) + } + + let A = &DATA_64[0..4]; + let B = &DATA_64[4..8]; + + let a = _mm256_loadu_si256(A.as_ptr().cast()); + let b = _mm256_loadu_si256(B.as_ptr().cast()); + + let r = _mm256_sha512msg2_epi64(a, b); + + let e0 = A[0].wrapping_add(s1(B[2])); + let e1 = A[1].wrapping_add(s1(B[3])); + let e = _mm256_setr_epi64x( + e0 as _, + e1 as _, + A[2].wrapping_add(s1(e0)) as _, + A[3].wrapping_add(s1(e1)) as _, + ); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "sha512,avx")] + unsafe fn test_mm256_sha512rnds2_epi64() { + fn cap_sigma0(word: u64) -> u64 { + word.rotate_right(28) ^ word.rotate_right(34) ^ word.rotate_right(39) + } + + fn cap_sigma1(word: u64) -> u64 { + word.rotate_right(14) ^ word.rotate_right(18) ^ word.rotate_right(41) + } + + fn maj(a: u64, b: u64, c: u64) -> u64 { + (a & b) ^ (a & c) ^ (b & c) + } + + fn ch(e: u64, f: u64, g: u64) -> u64 { + (e & f) ^ (g & !e) + } + + let A = &DATA_64[0..4]; + let B = &DATA_64[4..8]; + let K = &DATA_64[8..10]; + + let a = _mm256_loadu_si256(A.as_ptr().cast()); + let b = _mm256_loadu_si256(B.as_ptr().cast()); + let k = _mm_loadu_si128(K.as_ptr().cast()); + + let r = _mm256_sha512rnds2_epi64(a, b, k); + + let mut array = [B[3], B[2], A[3], A[2], B[1], B[0], A[1], A[0]]; + for i in 0..2 { + let new_d = ch(array[4], array[5], array[6]) + .wrapping_add(cap_sigma1(array[4])) + .wrapping_add(K[i]) + .wrapping_add(array[7]); + array[7] = new_d + .wrapping_add(maj(array[0], array[1], array[2])) + .wrapping_add(cap_sigma0(array[0])); + array[3] = new_d.wrapping_add(array[3]); + array.rotate_right(1); + } + let e = _mm256_setr_epi64x(array[5] as _, array[4] as _, array[1] as _, array[0] as _); + + assert_eq_m256i(r, e); + } + + static DATA_32: [u32; 16] = [ + 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff, 0xffeeddcc, 0xbbaa9988, 0x77665544, + 0x33221100, 0x01234567, 0x89abcdef, 0xfedcba98, 0x76543210, 0x02468ace, 0x13579bdf, + 0xfdb97531, 0xeca86420, + ]; + + #[simd_test(enable = "sm3,avx")] + unsafe fn test_mm_sm3msg1_epi32() { + fn p1(x: u32) -> u32 { + x ^ x.rotate_left(15) ^ x.rotate_left(23) + } + let A = &DATA_32[0..4]; + let B = &DATA_32[4..8]; + let C = &DATA_32[8..12]; + + let a = _mm_loadu_si128(A.as_ptr().cast()); + let b = _mm_loadu_si128(B.as_ptr().cast()); + let c = _mm_loadu_si128(C.as_ptr().cast()); + + let r = _mm_sm3msg1_epi32(a, b, c); + + let e = _mm_setr_epi32( + p1(A[0] ^ C[0] ^ B[0].rotate_left(15)) as _, + p1(A[1] ^ C[1] ^ B[1].rotate_left(15)) as _, + p1(A[2] ^ C[2] ^ B[2].rotate_left(15)) as _, + p1(A[3] ^ C[3]) as _, + ); + + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sm3,avx")] + unsafe fn test_mm_sm3msg2_epi32() { + let A = &DATA_32[0..4]; + let B = &DATA_32[4..8]; + let C = &DATA_32[8..12]; + + let a = _mm_loadu_si128(A.as_ptr().cast()); + let b = _mm_loadu_si128(B.as_ptr().cast()); + let c = _mm_loadu_si128(C.as_ptr().cast()); + + let r = _mm_sm3msg2_epi32(a, b, c); + + let e0 = B[0].rotate_left(7) ^ C[0] ^ A[0]; + let e = _mm_setr_epi32( + e0 as _, + (B[1].rotate_left(7) ^ C[1] ^ A[1]) as _, + (B[2].rotate_left(7) ^ C[2] ^ A[2]) as _, + (B[3].rotate_left(7) + ^ C[3] + ^ A[3] + ^ e0.rotate_left(6) + ^ e0.rotate_left(15) + ^ e0.rotate_left(30)) as _, + ); + + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sm3,avx")] + unsafe fn test_mm_sm3rnds2_epi32() { + fn p0(x: u32) -> u32 { + x ^ x.rotate_left(9) ^ x.rotate_left(17) + } + fn ff(x: u32, y: u32, z: u32, round: u32) -> u32 { + if round < 16 { + x ^ y ^ z + } else { + (x & y) | (x & z) | (y & z) + } + } + fn gg(x: u32, y: u32, z: u32, round: u32) -> u32 { + if round < 16 { + x ^ y ^ z + } else { + (x & y) | (!x & z) + } + } + + const ROUND: u32 = 30; + + let A = &DATA_32[0..4]; + let B = &DATA_32[4..8]; + let C = &DATA_32[8..12]; + + let a = _mm_loadu_si128(A.as_ptr().cast()); + let b = _mm_loadu_si128(B.as_ptr().cast()); + let c = _mm_loadu_si128(C.as_ptr().cast()); + + let r = _mm_sm3rnds2_epi32::<{ ROUND as i32 }>(a, b, c); + + let CONST: u32 = if ROUND < 16 { 0x79cc4519 } else { 0x7a879d8a }; + + let mut array = [ + B[3], + B[2], + A[3].rotate_left(9), + A[2].rotate_left(9), + B[1], + B[0], + A[1].rotate_left(19), + A[0].rotate_left(19), + ]; + + for i in 0..2 { + let s1 = array[0] + .rotate_left(12) + .wrapping_add(array[4]) + .wrapping_add(CONST.rotate_left(ROUND as u32 + i as u32)) + .rotate_left(7); + let s2 = s1 ^ array[0].rotate_left(12); + + let t1 = ff(array[0], array[1], array[2], ROUND) + .wrapping_add(array[3]) + .wrapping_add(s2) + .wrapping_add(C[i] ^ C[i + 2]); + let t2 = gg(array[4], array[5], array[6], ROUND) + .wrapping_add(array[7]) + .wrapping_add(s1) + .wrapping_add(C[i]); + + array[3] = array[2]; + array[2] = array[1].rotate_left(9); + array[1] = array[0]; + array[0] = t1; + array[7] = array[6]; + array[6] = array[5].rotate_left(19); + array[5] = array[4]; + array[4] = p0(t2); + } + + let e = _mm_setr_epi32(array[5] as _, array[4] as _, array[1] as _, array[0] as _); + + assert_eq_m128i(r, e); + } + + fn lower_t(x: u32) -> u32 { + static SBOX: [u8; 256] = [ + 0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, + 0x2C, 0x05, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, + 0x49, 0x86, 0x06, 0x99, 0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, + 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, 0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, + 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, + 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, 0x68, 0x6B, 0x81, 0xB2, + 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, 0x1E, 0x24, + 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, + 0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, + 0xC8, 0x9E, 0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, + 0xF9, 0x61, 0x15, 0xA1, 0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, + 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, + 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, + 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, 0x8D, 0x1B, 0xAF, 0x92, + 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, 0x0A, 0xC1, + 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, + 0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, + 0xC6, 0x84, 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, + 0xD7, 0xCB, 0x39, 0x48, + ]; + + ((SBOX[(x >> 24) as usize] as u32) << 24) + | ((SBOX[((x >> 16) & 0xff) as usize] as u32) << 16) + | ((SBOX[((x >> 8) & 0xff) as usize] as u32) << 8) + | (SBOX[(x & 0xff) as usize] as u32) + } + + #[simd_test(enable = "sm4,avx")] + unsafe fn test_mm_sm4key4_epi32() { + fn l_key(x: u32) -> u32 { + x ^ x.rotate_left(13) ^ x.rotate_left(23) + } + fn f_key(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { + x0 ^ l_key(lower_t(x1 ^ x2 ^ x3 ^ rk)) + } + + let A = &DATA_32[0..4]; + let B = &DATA_32[4..8]; + + let a = _mm_loadu_si128(A.as_ptr().cast()); + let b = _mm_loadu_si128(B.as_ptr().cast()); + + let r = _mm_sm4key4_epi32(a, b); + + let e0 = f_key(A[0], A[1], A[2], A[3], B[0]); + let e1 = f_key(A[1], A[2], A[3], e0, B[1]); + let e2 = f_key(A[2], A[3], e0, e1, B[2]); + let e3 = f_key(A[3], e0, e1, e2, B[3]); + let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _); + + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sm4,avx")] + unsafe fn test_mm256_sm4key4_epi32() { + let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast()); + let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast()); + let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast()); + let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast()); + + let a = _mm256_set_m128i(a_high, a_low); + let b = _mm256_set_m128i(b_high, b_low); + + let r = _mm256_sm4key4_epi32(a, b); + + let e_low = _mm_sm4key4_epi32(a_low, b_low); + let e_high = _mm_sm4key4_epi32(a_high, b_high); + let e = _mm256_set_m128i(e_high, e_low); + + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "sm4,avx")] + unsafe fn test_mm_sm4rnds4_epi32() { + fn l_rnd(x: u32) -> u32 { + x ^ x.rotate_left(2) ^ x.rotate_left(10) ^ x.rotate_left(18) ^ x.rotate_left(24) + } + fn f_rnd(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { + x0 ^ l_rnd(lower_t(x1 ^ x2 ^ x3 ^ rk)) + } + + let A = &DATA_32[0..4]; + let B = &DATA_32[4..8]; + + let a = _mm_loadu_si128(A.as_ptr().cast()); + let b = _mm_loadu_si128(B.as_ptr().cast()); + + let r = _mm_sm4rnds4_epi32(a, b); + + let e0 = f_rnd(A[0], A[1], A[2], A[3], B[0]); + let e1 = f_rnd(A[1], A[2], A[3], e0, B[1]); + let e2 = f_rnd(A[2], A[3], e0, e1, B[2]); + let e3 = f_rnd(A[3], e0, e1, e2, B[3]); + let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _); + + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sm4,avx")] + unsafe fn test_mm256_sm4rnds4_epi32() { + let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast()); + let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast()); + let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast()); + let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast()); + + let a = _mm256_set_m128i(a_high, a_low); + let b = _mm256_set_m128i(b_high, b_low); + + let r = _mm256_sm4rnds4_epi32(a, b); + + let e_low = _mm_sm4rnds4_epi32(a_low, b_low); + let e_high = _mm_sm4rnds4_epi32(a_high, b_high); + let e = _mm256_set_m128i(e_high, e_low); + + assert_eq_m256i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs new file mode 100644 index 000000000000..1eca66adc2c6 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/sse.rs @@ -0,0 +1,3338 @@ +//! Streaming SIMD Extensions (SSE) + +use crate::{ + core_arch::{simd::*, x86::*}, + intrinsics::simd::*, + intrinsics::sqrtf32, + mem, ptr, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Adds the first component of `a` and `b`, the other components are copied +/// from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(addss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_add_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b)) } +} + +/// Adds packed single-precision (32-bit) floating-point elements in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(addps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_add_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_add(a, b) } +} + +/// Subtracts the first component of `b` from `a`, the other components are +/// copied from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(subss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b)) } +} + +/// Subtracts packed single-precision (32-bit) floating-point elements in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(subps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_sub(a, b) } +} + +/// Multiplies the first component of `a` and `b`, the other components are +/// copied from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(mulss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b)) } +} + +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(mulps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_mul(a, b) } +} + +/// Divides the first component of `b` by `a`, the other components are +/// copied from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(divss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_div_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b)) } +} + +/// Divides packed single-precision (32-bit) floating-point elements in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(divps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_div_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_div(a, b) } +} + +/// Returns the square root of the first single-precision (32-bit) +/// floating-point element in `a`, the other elements are unchanged. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(sqrtss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sqrt_ss(a: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, sqrtf32(_mm_cvtss_f32(a))) } +} + +/// Returns the square root of packed single-precision (32-bit) floating-point +/// elements in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(sqrtps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sqrt_ps(a: __m128) -> __m128 { + unsafe { simd_fsqrt(a) } +} + +/// Returns the approximate reciprocal of the first single-precision +/// (32-bit) floating-point element in `a`, the other elements are unchanged. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(rcpss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_rcp_ss(a: __m128) -> __m128 { + unsafe { rcpss(a) } +} + +/// Returns the approximate reciprocal of packed single-precision (32-bit) +/// floating-point elements in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(rcpps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_rcp_ps(a: __m128) -> __m128 { + unsafe { rcpps(a) } +} + +/// Returns the approximate reciprocal square root of the first single-precision +/// (32-bit) floating-point element in `a`, the other elements are unchanged. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(rsqrtss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_rsqrt_ss(a: __m128) -> __m128 { + unsafe { rsqrtss(a) } +} + +/// Returns the approximate reciprocal square root of packed single-precision +/// (32-bit) floating-point elements in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(rsqrtps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_rsqrt_ps(a: __m128) -> __m128 { + unsafe { rsqrtps(a) } +} + +/// Compares the first single-precision (32-bit) floating-point element of `a` +/// and `b`, and return the minimum value in the first element of the return +/// value, the other elements are copied from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(minss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_ss(a: __m128, b: __m128) -> __m128 { + unsafe { minss(a, b) } +} + +/// Compares packed single-precision (32-bit) floating-point elements in `a` and +/// `b`, and return the corresponding minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(minps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_ps(a: __m128, b: __m128) -> __m128 { + // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmin`. + unsafe { minps(a, b) } +} + +/// Compares the first single-precision (32-bit) floating-point element of `a` +/// and `b`, and return the maximum value in the first element of the return +/// value, the other elements are copied from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(maxss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_ss(a: __m128, b: __m128) -> __m128 { + unsafe { maxss(a, b) } +} + +/// Compares packed single-precision (32-bit) floating-point elements in `a` and +/// `b`, and return the corresponding maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(maxps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_ps(a: __m128, b: __m128) -> __m128 { + // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmax`. + unsafe { maxps(a, b) } +} + +/// Bitwise AND of packed single-precision (32-bit) floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_ps) +#[inline] +#[target_feature(enable = "sse")] +// i586 only seems to generate plain `and` instructions, so ignore it. +#[cfg_attr( + all(test, any(target_arch = "x86_64", target_feature = "sse2")), + assert_instr(andps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_and_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(simd_and(a, b)) + } +} + +/// Bitwise AND-NOT of packed single-precision (32-bit) floating-point +/// elements. +/// +/// Computes `!a & b` for each bit in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_ps) +#[inline] +#[target_feature(enable = "sse")] +// i586 only seems to generate plain `not` and `and` instructions, so ignore +// it. +#[cfg_attr( + all(test, any(target_arch = "x86_64", target_feature = "sse2")), + assert_instr(andnps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + let mask: __m128i = mem::transmute(i32x4::splat(-1)); + mem::transmute(simd_and(simd_xor(mask, a), b)) + } +} + +/// Bitwise OR of packed single-precision (32-bit) floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_ps) +#[inline] +#[target_feature(enable = "sse")] +// i586 only seems to generate plain `or` instructions, so we ignore it. +#[cfg_attr( + all(test, any(target_arch = "x86_64", target_feature = "sse2")), + assert_instr(orps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_or_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(simd_or(a, b)) + } +} + +/// Bitwise exclusive OR of packed single-precision (32-bit) floating-point +/// elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_ps) +#[inline] +#[target_feature(enable = "sse")] +// i586 only seems to generate plain `xor` instructions, so we ignore it. +#[cfg_attr( + all(test, any(target_arch = "x86_64", target_feature = "sse2")), + assert_instr(xorps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(simd_xor(a, b)) + } +} + +/// Compares the lowest `f32` of both inputs for equality. The lowest 32 bits of +/// the result will be `0xffffffff` if the two inputs are equal, or `0` +/// otherwise. The upper 96 bits of the result are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpeqss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 0) } +} + +/// Compares the lowest `f32` of both inputs for less than. The lowest 32 bits +/// of the result will be `0xffffffff` if `a.extract(0)` is less than +/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the +/// upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpltss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 1) } +} + +/// Compares the lowest `f32` of both inputs for less than or equal. The lowest +/// 32 bits of the result will be `0xffffffff` if `a.extract(0)` is less than +/// or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result +/// are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpless))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 2) } +} + +/// Compares the lowest `f32` of both inputs for greater than. The lowest 32 +/// bits of the result will be `0xffffffff` if `a.extract(0)` is greater +/// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result +/// are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpltss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, cmpss(b, a, 1), [4, 1, 2, 3]) } +} + +/// Compares the lowest `f32` of both inputs for greater than or equal. The +/// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is +/// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits +/// of the result are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpless))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, cmpss(b, a, 2), [4, 1, 2, 3]) } +} + +/// Compares the lowest `f32` of both inputs for inequality. The lowest 32 bits +/// of the result will be `0xffffffff` if `a.extract(0)` is not equal to +/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the +/// upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpneqss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 4) } +} + +/// Compares the lowest `f32` of both inputs for not-less-than. The lowest 32 +/// bits of the result will be `0xffffffff` if `a.extract(0)` is not less than +/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the +/// upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpnltss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 5) } +} + +/// Compares the lowest `f32` of both inputs for not-less-than-or-equal. The +/// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not +/// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits +/// of the result are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpnless))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 6) } +} + +/// Compares the lowest `f32` of both inputs for not-greater-than. The lowest 32 +/// bits of the result will be `0xffffffff` if `a.extract(0)` is not greater +/// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are +/// the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpnltss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, cmpss(b, a, 5), [4, 1, 2, 3]) } +} + +/// Compares the lowest `f32` of both inputs for not-greater-than-or-equal. The +/// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not +/// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 +/// bits of the result are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpnless))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, cmpss(b, a, 6), [4, 1, 2, 3]) } +} + +/// Checks if the lowest `f32` of both inputs are ordered. The lowest 32 bits of +/// the result will be `0xffffffff` if neither of `a.extract(0)` or +/// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result +/// are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpordss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 7) } +} + +/// Checks if the lowest `f32` of both inputs are unordered. The lowest 32 bits +/// of the result will be `0xffffffff` if any of `a.extract(0)` or +/// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result +/// are the upper 96 bits of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpunordss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 3) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input elements +/// were equal, or `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpeqps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 0) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input element +/// in `a` is less than the corresponding element in `b`, or `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpltps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 1) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input element +/// in `a` is less than or equal to the corresponding element in `b`, or `0` +/// otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpleps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 2) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input element +/// in `a` is greater than the corresponding element in `b`, or `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpltps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 1) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input element +/// in `a` is greater than or equal to the corresponding element in `b`, or `0` +/// otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpleps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 2) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input elements +/// are **not** equal, or `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpneqps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 4) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input element +/// in `a` is **not** less than the corresponding element in `b`, or `0` +/// otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpnltps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 5) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input element +/// in `a` is **not** less than or equal to the corresponding element in `b`, or +/// `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpnleps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 6) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input element +/// in `a` is **not** greater than the corresponding element in `b`, or `0` +/// otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpnltps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 5) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// The result in the output vector will be `0xffffffff` if the input element +/// in `a` is **not** greater than or equal to the corresponding element in `b`, +/// or `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpnleps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 6) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// Returns four floats that have one of two possible bit patterns. The element +/// in the output vector will be `0xffffffff` if the input elements in `a` and +/// `b` are ordered (i.e., neither of them is a NaN), or 0 otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpordps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 7) } +} + +/// Compares each of the four floats in `a` to the corresponding element in `b`. +/// Returns four floats that have one of two possible bit patterns. The element +/// in the output vector will be `0xffffffff` if the input elements in `a` and +/// `b` are unordered (i.e., at least on of them is a NaN), or 0 otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cmpunordps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 3) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if they are equal, or `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 { + unsafe { comieq_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if the value from `a` is less than the one from `b`, or `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 { + unsafe { comilt_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if the value from `a` is less than or equal to the one from `b`, or `0` +/// otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comile_ss(a: __m128, b: __m128) -> i32 { + unsafe { comile_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if the value from `a` is greater than the one from `b`, or `0` +/// otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 { + unsafe { comigt_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if the value from `a` is greater than or equal to the one from `b`, or +/// `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comige_ss(a: __m128, b: __m128) -> i32 { + unsafe { comige_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if they are **not** equal, or `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(comiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 { + unsafe { comineq_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if they are equal, or `0` otherwise. This instruction will not signal +/// an exception if either argument is a quiet NaN. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomieq_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if the value from `a` is less than the one from `b`, or `0` otherwise. +/// This instruction will not signal an exception if either argument is a quiet +/// NaN. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomilt_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if the value from `a` is less than or equal to the one from `b`, or `0` +/// otherwise. This instruction will not signal an exception if either argument +/// is a quiet NaN. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomile_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if the value from `a` is greater than the one from `b`, or `0` +/// otherwise. This instruction will not signal an exception if either argument +/// is a quiet NaN. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomigt_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if the value from `a` is greater than or equal to the one from `b`, or +/// `0` otherwise. This instruction will not signal an exception if either +/// argument is a quiet NaN. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomige_ss(a, b) } +} + +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if they are **not** equal, or `0` otherwise. This instruction will not +/// signal an exception if either argument is a quiet NaN. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(ucomiss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomineq_ss(a, b) } +} + +/// Converts the lowest 32 bit float in the input vector to a 32 bit integer. +/// +/// The result is rounded according to the current rounding mode. If the result +/// cannot be represented as a 32 bit integer the result will be `0x8000_0000` +/// (`i32::MIN`). +/// +/// This corresponds to the `CVTSS2SI` instruction (with 32 bit output). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si32) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvtss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtss_si32(a: __m128) -> i32 { + unsafe { cvtss2si(a) } +} + +/// Alias for [`_mm_cvtss_si32`](fn._mm_cvtss_si32.html). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ss2si) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvtss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvt_ss2si(a: __m128) -> i32 { + _mm_cvtss_si32(a) +} + +/// Converts the lowest 32 bit float in the input vector to a 32 bit integer +/// with +/// truncation. +/// +/// The result is rounded always using truncation (round towards zero). If the +/// result cannot be represented as a 32 bit integer the result will be +/// `0x8000_0000` (`i32::MIN`). +/// +/// This corresponds to the `CVTTSS2SI` instruction (with 32 bit output). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si32) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvttss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvttss_si32(a: __m128) -> i32 { + unsafe { cvttss2si(a) } +} + +/// Alias for [`_mm_cvttss_si32`](fn._mm_cvttss_si32.html). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_ss2si) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvttss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtt_ss2si(a: __m128) -> i32 { + _mm_cvttss_si32(a) +} + +/// Extracts the lowest 32 bit float from the input vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_f32) +#[inline] +#[target_feature(enable = "sse")] +// No point in using assert_instrs. In Unix x86_64 calling convention this is a +// no-op, and on msvc it's just a `mov`. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtss_f32(a: __m128) -> f32 { + unsafe { simd_extract!(a, 0) } +} + +/// Converts a 32 bit integer to a 32 bit float. The result vector is the input +/// vector `a` with the lowest 32 bit float replaced by the converted integer. +/// +/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 32 bit +/// input). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvtsi2ss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 { + unsafe { cvtsi2ss(a, b) } +} + +/// Alias for [`_mm_cvtsi32_ss`](fn._mm_cvtsi32_ss.html). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_si2ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvtsi2ss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 { + _mm_cvtsi32_ss(a, b) +} + +/// Construct a `__m128` with the lowest element set to `a` and the rest set to +/// zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_ss(a: f32) -> __m128 { + __m128([a, 0.0, 0.0, 0.0]) +} + +/// Construct a `__m128` with all element set to `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(shufps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set1_ps(a: f32) -> __m128 { + __m128([a, a, a, a]) +} + +/// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps1) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(shufps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_ps1(a: f32) -> __m128 { + _mm_set1_ps(a) +} + +/// Construct a `__m128` from four floating point values highest to lowest. +/// +/// Note that `a` will be the highest 32 bits of the result, and `d` the +/// lowest. This matches the standard way of writing bit patterns on x86: +/// +/// ```text +/// bit 127 .. 96 95 .. 64 63 .. 32 31 .. 0 +/// +---------+---------+---------+---------+ +/// | a | b | c | d | result +/// +---------+---------+---------+---------+ +/// ``` +/// +/// Alternatively: +/// +/// ```text +/// let v = _mm_set_ps(d, c, b, a); +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(unpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { + __m128([d, c, b, a]) +} + +/// Construct a `__m128` from four floating point values lowest to highest. +/// +/// This matches the memory order of `__m128`, i.e., `a` will be the lowest 32 +/// bits of the result, and `d` the highest. +/// +/// ```text +/// assert_eq!(__m128::new(a, b, c, d), _mm_setr_ps(a, b, c, d)); +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr( + all(test, any(target_env = "msvc", target_arch = "x86_64")), + assert_instr(unpcklps) +)] +// On a 32-bit architecture on non-msvc it just copies the operands from the stack. +#[cfg_attr( + all(test, all(not(target_env = "msvc"), target_arch = "x86")), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { + __m128([a, b, c, d]) +} + +/// Construct a `__m128` with all elements initialized to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(xorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_setzero_ps() -> __m128 { + const { unsafe { mem::zeroed() } } +} + +/// A utility function for creating masks to use with Intel shuffle and +/// permute intrinsics. +#[inline] +#[allow(non_snake_case)] +#[unstable(feature = "stdarch_x86_mm_shuffle", issue = "111147")] +pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { + ((z << 6) | (y << 4) | (x << 2) | w) as i32 +} + +/// Shuffles packed single-precision (32-bit) floating-point elements in `a` and +/// `b` using `MASK`. +/// +/// The lower half of result takes values from `a` and the higher half from +/// `b`. Mask is split to 2 control bits each to index the element from inputs. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_ps) +/// +/// Note that there appears to be a mistake within Intel's Intrinsics Guide. +/// `_mm_shuffle_ps` is supposed to take an `i32` instead of a `u32` +/// as is the case for [other shuffle intrinsics](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_). +/// Performing an implicit type conversion between an unsigned integer and a signed integer +/// does not cause a problem in C, however Rust's commitment to strong typing does not allow this. +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(shufps, MASK = 3))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_shuffle_ps(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(MASK, 8); + unsafe { + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ) + } +} + +/// Unpacks and interleave single-precision (32-bit) floating-point elements +/// from the higher half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(unpckhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } +} + +/// Unpacks and interleave single-precision (32-bit) floating-point elements +/// from the lower half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(unpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +} + +/// Combine higher half of `a` and `b`. The higher half of `b` occupies the +/// lower half of result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehl_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movhlps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 { + // TODO; figure why this is a different instruction on msvc? + unsafe { simd_shuffle!(a, b, [6, 7, 2, 3]) } +} + +/// Combine lower half of `a` and `b`. The lower half of `b` occupies the +/// higher half of result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movelh_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movlhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, b, [0, 1, 4, 5]) } +} + +/// Returns a mask of the most significant bit of each element in `a`. +/// +/// The mask is stored in the 4 least significant bits of the return value. +/// All other bits are set to `0`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movmskps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_movemask_ps(a: __m128) -> i32 { + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + unsafe { + let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO); + simd_bitmask::(mask).into() + } +} + +/// Construct a `__m128` with the lowest element read from `p` and the other +/// elements set to zero. +/// +/// This corresponds to instructions `VMOVSS` / `MOVSS`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 { + __m128([*p, 0.0, 0.0, 0.0]) +} + +/// Construct a `__m128` by duplicating the value read from `p` into all +/// elements. +/// +/// This corresponds to instructions `VMOVSS` / `MOVSS` followed by some +/// shuffling. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 { + let a = *p; + __m128([a, a, a, a]) +} + +/// Alias for [`_mm_load1_ps`](fn._mm_load1_ps.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps1) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 { + _mm_load1_ps(p) +} + +/// Loads four `f32` values from *aligned* memory into a `__m128`. If the +/// pointer is not aligned to a 128-bit boundary (16 bytes) a general +/// protection fault will be triggered (fatal program crash). +/// +/// Use [`_mm_loadu_ps`](fn._mm_loadu_ps.html) for potentially unaligned +/// memory. +/// +/// This corresponds to instructions `VMOVAPS` / `MOVAPS`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps) +#[inline] +#[target_feature(enable = "sse")] +// FIXME: Rust doesn't emit alignment attributes for MSVC x86-32. Ref https://github.com/rust-lang/rust/pull/139261 +// All aligned load/store intrinsics are affected +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 { + *(p as *const __m128) +} + +/// Loads four `f32` values from memory into a `__m128`. There are no +/// restrictions +/// on memory alignment. For aligned memory +/// [`_mm_load_ps`](fn._mm_load_ps.html) +/// may be faster. +/// +/// This corresponds to instructions `VMOVUPS` / `MOVUPS`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movups))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 { + // Note: Using `*p` would require `f32` alignment, but `movups` has no + // alignment restrictions. + let mut dst = _mm_undefined_ps(); + ptr::copy_nonoverlapping( + p as *const u8, + ptr::addr_of_mut!(dst) as *mut u8, + mem::size_of::<__m128>(), + ); + dst +} + +/// Loads four `f32` values from aligned memory into a `__m128` in reverse +/// order. +/// +/// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general +/// protection fault will be triggered (fatal program crash). +/// +/// Functionally equivalent to the following code sequence (assuming `p` +/// satisfies the alignment restrictions): +/// +/// ```text +/// let a0 = *p; +/// let a1 = *p.add(1); +/// let a2 = *p.add(2); +/// let a3 = *p.add(3); +/// __m128::new(a3, a2, a1, a0) +/// ``` +/// +/// This corresponds to instructions `VMOVAPS` / `MOVAPS` followed by some +/// shuffling. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 { + let a = _mm_load_ps(p); + simd_shuffle!(a, a, [3, 2, 1, 0]) +} + +/// Stores the lowest 32 bit float of `a` into memory. +/// +/// This intrinsic corresponds to the `MOVSS` instruction. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) { + *p = simd_extract!(a, 0); +} + +/// Stores the lowest 32 bit float of `a` repeated four times into *aligned* +/// memory. +/// +/// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general +/// protection fault will be triggered (fatal program crash). +/// +/// Functionally equivalent to the following code sequence (assuming `p` +/// satisfies the alignment restrictions): +/// +/// ```text +/// let x = a.extract(0); +/// *p = x; +/// *p.add(1) = x; +/// *p.add(2) = x; +/// *p.add(3) = x; +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) { + let b: __m128 = simd_shuffle!(a, a, [0, 0, 0, 0]); + *(p as *mut __m128) = b; +} + +/// Alias for [`_mm_store1_ps`](fn._mm_store1_ps.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps1) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) { + _mm_store1_ps(p, a); +} + +/// Stores four 32-bit floats into *aligned* memory. +/// +/// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general +/// protection fault will be triggered (fatal program crash). +/// +/// Use [`_mm_storeu_ps`](fn._mm_storeu_ps.html) for potentially unaligned +/// memory. +/// +/// This corresponds to instructions `VMOVAPS` / `MOVAPS`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) { + *(p as *mut __m128) = a; +} + +/// Stores four 32-bit floats into memory. There are no restrictions on memory +/// alignment. For aligned memory [`_mm_store_ps`](fn._mm_store_ps.html) may be +/// faster. +/// +/// This corresponds to instructions `VMOVUPS` / `MOVUPS`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movups))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) { + ptr::copy_nonoverlapping( + ptr::addr_of!(a) as *const u8, + p as *mut u8, + mem::size_of::<__m128>(), + ); +} + +/// Stores four 32-bit floats into *aligned* memory in reverse order. +/// +/// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general +/// protection fault will be triggered (fatal program crash). +/// +/// Functionally equivalent to the following code sequence (assuming `p` +/// satisfies the alignment restrictions): +/// +/// ```text +/// *p = a.extract(3); +/// *p.add(1) = a.extract(2); +/// *p.add(2) = a.extract(1); +/// *p.add(3) = a.extract(0); +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_ps) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) { + let b: __m128 = simd_shuffle!(a, a, [3, 2, 1, 0]); + *(p as *mut __m128) = b; +} + +/// Returns a `__m128` with the first component from `b` and the remaining +/// components from `a`. +/// +/// In other words for any `a` and `b`: +/// ```text +/// _mm_move_ss(a, b) == a.replace(0, b.extract(0)) +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_move_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, b, [4, 1, 2, 3]) } +} + +/// Performs a serializing operation on all non-temporal ("streaming") store instructions that +/// were issued by the current thread prior to this instruction. +/// +/// Guarantees that every non-temporal store instruction that precedes this fence, in program order, is +/// ordered before any load or store instruction which follows the fence in +/// synchronization order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sfence) +/// (but note that Intel is only documenting the hardware-level concerns related to this +/// instruction; the Intel documentation does not take into account the extra concerns that arise +/// because the Rust memory model is different from the x86 memory model.) +/// +/// # Safety of non-temporal stores +/// +/// After using any non-temporal store intrinsic, but before any other access to the memory that the +/// intrinsic mutates, a call to `_mm_sfence` must be performed on the thread that used the +/// intrinsic. +/// +/// Non-temporal stores behave very different from regular stores. For the purpose of the Rust +/// memory model, these stores are happening asynchronously in a background thread. This means a +/// non-temporal store can cause data races with other accesses, even other accesses on the same +/// thread. It also means that cross-thread synchronization does not work as expected: let's say the +/// intrinsic is called on thread T1, and T1 performs synchronization with some other thread T2. The +/// non-temporal store acts as if it happened not in T1 but in a different thread T3, and T2 has not +/// synchronized with T3! Calling `_mm_sfence` makes the current thread wait for and synchronize +/// with all the non-temporal stores previously started on this thread, which means in particular +/// that subsequent synchronization with other threads will then work as intended again. +/// +/// The general pattern to use non-temporal stores correctly is to call `_mm_sfence` before your +/// code jumps back to code outside your library. This ensures all stores inside your function +/// are synchronized-before the return, and thus transitively synchronized-before everything +/// the caller does after your function returns. +// +// The following is not a doc comment since it's not clear whether we want to put this into the +// docs, but it should be written out somewhere. +// +// Formally, we consider non-temporal stores and sfences to be opaque blobs that the compiler cannot +// inspect, and that behave like the following functions. This explains where the docs above come +// from. +// ``` +// #[thread_local] +// static mut PENDING_NONTEMP_WRITES = AtomicUsize::new(0); +// +// pub unsafe fn nontemporal_store(ptr: *mut T, val: T) { +// PENDING_NONTEMP_WRITES.fetch_add(1, Relaxed); +// // Spawn a thread that will eventually do our write. +// // We need to fetch a pointer to this thread's pending-write +// // counter, so that we can access it from the background thread. +// let pending_writes = addr_of!(PENDING_NONTEMP_WRITES); +// // If this was actual Rust code we'd have to do some extra work +// // because `ptr`, `val`, `pending_writes` are all `!Send`. We skip that here. +// std::thread::spawn(move || { +// // Do the write in the background thread. +// ptr.write(val); +// // Register the write as done. Crucially, this is `Release`, so it +// // syncs-with the `Acquire in `sfence`. +// (&*pending_writes).fetch_sub(1, Release); +// }); +// } +// +// pub fn sfence() { +// unsafe { +// // Wait until there are no more pending writes. +// while PENDING_NONTEMP_WRITES.load(Acquire) > 0 {} +// } +// } +// ``` +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(sfence))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_sfence() { + sfence() +} + +/// Gets the unsigned 32-bit value of the MXCSR control and status register. +/// +/// Note that Rust makes no guarantees whatsoever about the contents of this register: Rust +/// floating-point operations may or may not result in this register getting updated with exception +/// state, and the register can change between two invocations of this function even when no +/// floating-point operations appear in the source code (since floating-point operations appearing +/// earlier or later can be reordered). +/// +/// If you need to perform some floating-point operations and check whether they raised an +/// exception, use an inline assembly block for the entire sequence of operations. +/// +/// For more info see [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getcsr) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(stmxcsr))] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_getcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _mm_getcsr() -> u32 { + unsafe { + let mut result = 0_i32; + stmxcsr(ptr::addr_of_mut!(result) as *mut i8); + result as u32 + } +} + +/// Sets the MXCSR register with the 32-bit unsigned integer value. +/// +/// This register controls how SIMD instructions handle floating point +/// operations. Modifying this register only affects the current thread. +/// +/// It contains several groups of flags: +/// +/// * *Exception flags* report which exceptions occurred since last they were reset. +/// +/// * *Masking flags* can be used to mask (ignore) certain exceptions. By default +/// these flags are all set to 1, so all exceptions are masked. When +/// an exception is masked, the processor simply sets the exception flag and +/// continues the operation. If the exception is unmasked, the flag is also set +/// but additionally an exception handler is invoked. +/// +/// * *Rounding mode flags* control the rounding mode of floating point +/// instructions. +/// +/// * The *denormals-are-zero mode flag* turns all numbers which would be +/// denormalized (exponent bits are all zeros) into zeros. +/// +/// Note that modifying the masking flags, rounding mode, or denormals-are-zero mode flags leads to +/// **immediate Undefined Behavior**: Rust assumes that these are always in their default state and +/// will optimize accordingly. This even applies when the register is altered and later reset to its +/// original value without any floating-point operations appearing in the source code between those +/// operations (since floating-point operations appearing earlier or later can be reordered). +/// +/// If you need to perform some floating-point operations under a different masking flags, rounding +/// mode, or denormals-are-zero mode, use an inline assembly block and make sure to restore the +/// original MXCSR register state before the end of the block. +/// +/// ## Exception Flags +/// +/// * `_MM_EXCEPT_INVALID`: An invalid operation was performed (e.g., dividing +/// Infinity by Infinity). +/// +/// * `_MM_EXCEPT_DENORM`: An operation attempted to operate on a denormalized +/// number. Mainly this can cause loss of precision. +/// +/// * `_MM_EXCEPT_DIV_ZERO`: Division by zero occurred. +/// +/// * `_MM_EXCEPT_OVERFLOW`: A numeric overflow exception occurred, i.e., a +/// result was too large to be represented (e.g., an `f32` with absolute +/// value greater than `2^128`). +/// +/// * `_MM_EXCEPT_UNDERFLOW`: A numeric underflow exception occurred, i.e., a +/// result was too small to be represented in a normalized way (e.g., an +/// `f32` with absolute value smaller than `2^-126`.) +/// +/// * `_MM_EXCEPT_INEXACT`: An inexact-result exception occurred (a.k.a. +/// precision exception). This means some precision was lost due to rounding. +/// For example, the fraction `1/3` cannot be represented accurately in a +/// 32 or 64 bit float and computing it would cause this exception to be +/// raised. Precision exceptions are very common, so they are usually masked. +/// +/// Exception flags can be read and set using the convenience functions +/// `_MM_GET_EXCEPTION_STATE` and `_MM_SET_EXCEPTION_STATE`. For example, to +/// check if an operation caused some overflow: +/// +/// ```rust,ignore +/// _MM_SET_EXCEPTION_STATE(0); // clear all exception flags +/// // perform calculations +/// if _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_OVERFLOW != 0 { +/// // handle overflow +/// } +/// ``` +/// +/// ## Masking Flags +/// +/// There is one masking flag for each exception flag: `_MM_MASK_INVALID`, +/// `_MM_MASK_DENORM`, `_MM_MASK_DIV_ZERO`, `_MM_MASK_OVERFLOW`, +/// `_MM_MASK_UNDERFLOW`, `_MM_MASK_INEXACT`. +/// +/// A single masking bit can be set via +/// +/// ```rust,ignore +/// _MM_SET_EXCEPTION_MASK(_MM_MASK_UNDERFLOW); +/// ``` +/// +/// However, since mask bits are by default all set to 1, it is more common to +/// want to *disable* certain bits. For example, to unmask the underflow +/// exception, use: +/// +/// ```rust,ignore +/// _mm_setcsr(_mm_getcsr() & !_MM_MASK_UNDERFLOW); // unmask underflow +/// exception +/// ``` +/// +/// Warning: an unmasked exception will cause an exception handler to be +/// called. +/// The standard handler will simply terminate the process. So, in this case +/// any underflow exception would terminate the current process with something +/// like `signal: 8, SIGFPE: erroneous arithmetic operation`. +/// +/// ## Rounding Mode +/// +/// The rounding mode is describe using two bits. It can be read and set using +/// the convenience wrappers `_MM_GET_ROUNDING_MODE()` and +/// `_MM_SET_ROUNDING_MODE(mode)`. +/// +/// The rounding modes are: +/// +/// * `_MM_ROUND_NEAREST`: (default) Round to closest to the infinite precision +/// value. If two values are equally close, round to even (i.e., least +/// significant bit will be zero). +/// +/// * `_MM_ROUND_DOWN`: Round toward negative Infinity. +/// +/// * `_MM_ROUND_UP`: Round toward positive Infinity. +/// +/// * `_MM_ROUND_TOWARD_ZERO`: Round towards zero (truncate). +/// +/// Example: +/// +/// ```rust,ignore +/// _MM_SET_ROUNDING_MODE(_MM_ROUND_DOWN) +/// ``` +/// +/// ## Denormals-are-zero/Flush-to-zero Mode +/// +/// If this bit is set, values that would be denormalized will be set to zero +/// instead. This is turned off by default. +/// +/// You can read and enable/disable this mode via the helper functions +/// `_MM_GET_FLUSH_ZERO_MODE()` and `_MM_SET_FLUSH_ZERO_MODE()`: +/// +/// ```rust,ignore +/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); // turn off (default) +/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // turn on +/// ``` +/// +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(ldmxcsr))] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_setcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _mm_setcsr(val: u32) { + ldmxcsr(ptr::addr_of!(val) as *const i8); +} + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_EXCEPT_INVALID: u32 = 0x0001; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_EXCEPT_DENORM: u32 = 0x0002; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_EXCEPT_INEXACT: u32 = 0x0020; +/// See [`_MM_GET_EXCEPTION_STATE`](fn._MM_GET_EXCEPTION_STATE.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_EXCEPT_MASK: u32 = 0x003f; + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_MASK_INVALID: u32 = 0x0080; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_MASK_DENORM: u32 = 0x0100; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_MASK_DIV_ZERO: u32 = 0x0200; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_MASK_OVERFLOW: u32 = 0x0400; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_MASK_UNDERFLOW: u32 = 0x0800; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_MASK_INEXACT: u32 = 0x1000; +/// See [`_MM_GET_EXCEPTION_MASK`](fn._MM_GET_EXCEPTION_MASK.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_MASK_MASK: u32 = 0x1f80; + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_ROUND_NEAREST: u32 = 0x0000; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_ROUND_DOWN: u32 = 0x2000; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_ROUND_UP: u32 = 0x4000; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000; + +/// See [`_MM_GET_ROUNDING_MODE`](fn._MM_GET_ROUNDING_MODE.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_ROUND_MASK: u32 = 0x6000; + +/// See [`_MM_GET_FLUSH_ZERO_MODE`](fn._MM_GET_FLUSH_ZERO_MODE.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000; +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000; + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_EXCEPTION_MASK) +#[inline] +#[allow(deprecated)] // Deprecated function implemented on top of deprecated function +#[allow(non_snake_case)] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_getcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 { + _mm_getcsr() & _MM_MASK_MASK +} + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_EXCEPTION_STATE) +#[inline] +#[allow(deprecated)] // Deprecated function implemented on top of deprecated function +#[allow(non_snake_case)] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_getcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 { + _mm_getcsr() & _MM_EXCEPT_MASK +} + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_FLUSH_ZERO_MODE) +#[inline] +#[allow(deprecated)] // Deprecated function implemented on top of deprecated function +#[allow(non_snake_case)] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_getcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 { + _mm_getcsr() & _MM_FLUSH_ZERO_MASK +} + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_ROUNDING_MODE) +#[inline] +#[allow(deprecated)] // Deprecated function implemented on top of deprecated function +#[allow(non_snake_case)] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_getcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 { + _mm_getcsr() & _MM_ROUND_MASK +} + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_EXCEPTION_MASK) +#[inline] +#[allow(deprecated)] // Deprecated function implemented on top of deprecated function +#[allow(non_snake_case)] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_setcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) { + _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | (x & _MM_MASK_MASK)) +} + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_EXCEPTION_STATE) +#[inline] +#[allow(deprecated)] // Deprecated function implemented on top of deprecated function +#[allow(non_snake_case)] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_setcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) { + _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | (x & _MM_EXCEPT_MASK)) +} + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_FLUSH_ZERO_MODE) +#[inline] +#[allow(deprecated)] // Deprecated function implemented on top of deprecated function +#[allow(non_snake_case)] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_setcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) { + _mm_setcsr((_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | (x & _MM_FLUSH_ZERO_MASK)) +} + +/// See [`_mm_setcsr`](fn._mm_setcsr.html) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE) +#[inline] +#[allow(deprecated)] // Deprecated function implemented on top of deprecated function +#[allow(non_snake_case)] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[deprecated( + since = "1.75.0", + note = "see `_mm_setcsr` documentation - use inline assembly instead" +)] +pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) { + _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | (x & _MM_ROUND_MASK)) +} + +/// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_HINT_T0: i32 = 3; + +/// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_HINT_T1: i32 = 2; + +/// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_HINT_T2: i32 = 1; + +/// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_HINT_NTA: i32 = 0; + +/// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_HINT_ET0: i32 = 7; + +/// See [`_mm_prefetch`](fn._mm_prefetch.html). +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_HINT_ET1: i32 = 6; + +/// Fetch the cache line that contains address `p` using the given `STRATEGY`. +/// +/// The `STRATEGY` must be one of: +/// +/// * [`_MM_HINT_T0`](constant._MM_HINT_T0.html): Fetch into all levels of the +/// cache hierarchy. +/// +/// * [`_MM_HINT_T1`](constant._MM_HINT_T1.html): Fetch into L2 and higher. +/// +/// * [`_MM_HINT_T2`](constant._MM_HINT_T2.html): Fetch into L3 and higher or +/// an implementation-specific choice (e.g., L2 if there is no L3). +/// +/// * [`_MM_HINT_NTA`](constant._MM_HINT_NTA.html): Fetch data using the +/// non-temporal access (NTA) hint. It may be a place closer than main memory +/// but outside of the cache hierarchy. This is used to reduce access latency +/// without polluting the cache. +/// +/// * [`_MM_HINT_ET0`](constant._MM_HINT_ET0.html) and +/// [`_MM_HINT_ET1`](constant._MM_HINT_ET1.html) are similar to `_MM_HINT_T0` +/// and `_MM_HINT_T1` but indicate an anticipation to write to the address. +/// +/// The actual implementation depends on the particular CPU. This instruction +/// is considered a hint, so the CPU is also free to simply ignore the request. +/// +/// The amount of prefetched data depends on the cache line size of the +/// specific CPU, but it will be at least 32 bytes. +/// +/// Common caveats: +/// +/// * Most modern CPUs already automatically prefetch data based on predicted +/// access patterns. +/// +/// * Data is usually not fetched if this would cause a TLB miss or a page +/// fault. +/// +/// * Too much prefetching can cause unnecessary cache evictions. +/// +/// * Prefetching may also fail if there are not enough memory-subsystem +/// resources (e.g., request buffers). +/// +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_prefetch) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(prefetcht0, STRATEGY = _MM_HINT_T0))] +#[cfg_attr(test, assert_instr(prefetcht1, STRATEGY = _MM_HINT_T1))] +#[cfg_attr(test, assert_instr(prefetcht2, STRATEGY = _MM_HINT_T2))] +#[cfg_attr(test, assert_instr(prefetchnta, STRATEGY = _MM_HINT_NTA))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_prefetch(p: *const i8) { + static_assert_uimm_bits!(STRATEGY, 3); + // We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache). + // `locality` and `rw` are based on our `STRATEGY`. + prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1); +} + +/// Returns vector of type __m128 with indeterminate elements.with indetermination elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_ps) +#[inline] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_undefined_ps() -> __m128 { + const { unsafe { mem::zeroed() } } +} + +/// Transpose the 4x4 matrix formed by 4 rows of __m128 in place. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_TRANSPOSE4_PS) +#[inline] +#[allow(non_snake_case)] +#[target_feature(enable = "sse")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _MM_TRANSPOSE4_PS( + row0: &mut __m128, + row1: &mut __m128, + row2: &mut __m128, + row3: &mut __m128, +) { + let tmp0 = _mm_unpacklo_ps(*row0, *row1); + let tmp2 = _mm_unpacklo_ps(*row2, *row3); + let tmp1 = _mm_unpackhi_ps(*row0, *row1); + let tmp3 = _mm_unpackhi_ps(*row2, *row3); + + *row0 = _mm_movelh_ps(tmp0, tmp2); + *row1 = _mm_movehl_ps(tmp2, tmp0); + *row2 = _mm_movelh_ps(tmp1, tmp3); + *row3 = _mm_movehl_ps(tmp3, tmp1); +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.sse.rcp.ss"] + fn rcpss(a: __m128) -> __m128; + #[link_name = "llvm.x86.sse.rcp.ps"] + fn rcpps(a: __m128) -> __m128; + #[link_name = "llvm.x86.sse.rsqrt.ss"] + fn rsqrtss(a: __m128) -> __m128; + #[link_name = "llvm.x86.sse.rsqrt.ps"] + fn rsqrtps(a: __m128) -> __m128; + #[link_name = "llvm.x86.sse.min.ss"] + fn minss(a: __m128, b: __m128) -> __m128; + #[link_name = "llvm.x86.sse.min.ps"] + fn minps(a: __m128, b: __m128) -> __m128; + #[link_name = "llvm.x86.sse.max.ss"] + fn maxss(a: __m128, b: __m128) -> __m128; + #[link_name = "llvm.x86.sse.max.ps"] + fn maxps(a: __m128, b: __m128) -> __m128; + #[link_name = "llvm.x86.sse.cmp.ps"] + fn cmpps(a: __m128, b: __m128, imm8: i8) -> __m128; + #[link_name = "llvm.x86.sse.comieq.ss"] + fn comieq_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.comilt.ss"] + fn comilt_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.comile.ss"] + fn comile_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.comigt.ss"] + fn comigt_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.comige.ss"] + fn comige_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.comineq.ss"] + fn comineq_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.ucomieq.ss"] + fn ucomieq_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.ucomilt.ss"] + fn ucomilt_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.ucomile.ss"] + fn ucomile_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.ucomigt.ss"] + fn ucomigt_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.ucomige.ss"] + fn ucomige_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.ucomineq.ss"] + fn ucomineq_ss(a: __m128, b: __m128) -> i32; + #[link_name = "llvm.x86.sse.cvtss2si"] + fn cvtss2si(a: __m128) -> i32; + #[link_name = "llvm.x86.sse.cvttss2si"] + fn cvttss2si(a: __m128) -> i32; + #[link_name = "llvm.x86.sse.cvtsi2ss"] + fn cvtsi2ss(a: __m128, b: i32) -> __m128; + #[link_name = "llvm.x86.sse.sfence"] + fn sfence(); + #[link_name = "llvm.x86.sse.stmxcsr"] + fn stmxcsr(p: *mut i8); + #[link_name = "llvm.x86.sse.ldmxcsr"] + fn ldmxcsr(p: *const i8); + #[link_name = "llvm.prefetch"] + fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32); + #[link_name = "llvm.x86.sse.cmp.ss"] + fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128; +} + +/// Stores `a` into the memory at `mem_addr` using a non-temporal memory hint. +/// +/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection +/// exception _may_ be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_ps) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(movntps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) { + crate::arch::asm!( + vps!("movntps", ",{a}"), + p = in(reg) mem_addr, + a = in(xmm_reg) a, + options(nostack, preserves_flags), + ); +} + +#[cfg(test)] +mod tests { + use crate::{hint::black_box, mem::transmute, ptr}; + use std::boxed; + use stdarch_test::simd_test; + + use crate::core_arch::{simd::*, x86::*}; + + const NAN: f32 = f32::NAN; + + #[simd_test(enable = "sse")] + unsafe fn test_mm_add_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_add_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(-101.0, 25.0, 0.0, -15.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_add_ss() { + let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_add_ss(a, b); + assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_sub_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_sub_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(99.0, -15.0, 0.0, -5.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_sub_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_sub_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_mul_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_mul_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(100.0, 100.0, 0.0, 50.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_mul_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_mul_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_div_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0); + let r = _mm_div_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(0.01, 0.25, 10.0, 2.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_div_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_div_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_sqrt_ss() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_sqrt_ss(a); + let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_sqrt_ps() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_sqrt_ps(a); + let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_rcp_ss() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_rcp_ss(a); + let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0); + let rel_err = 0.00048828125; + assert_approx_eq!(get_m128(r, 0), get_m128(e, 0), 2. * rel_err); + for i in 1..4 { + assert_eq!(get_m128(r, i), get_m128(e, i)); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_rcp_ps() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_rcp_ps(a); + let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215); + let rel_err = 0.00048828125; + for i in 0..4 { + assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_rsqrt_ss() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_rsqrt_ss(a); + let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0); + let rel_err = 0.00048828125; + for i in 0..4 { + assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_rsqrt_ps() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_rsqrt_ps(a); + let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845); + let rel_err = 0.00048828125; + for i in 0..4 { + assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_min_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_min_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_min_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_min_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0)); + + // `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic. `simd_min` + // is lowered by the llvm codegen backend to `llvm.minnum.v*` llvm intrinsic. This intrinsic + // doesn't specify how -0.0 is handled. Unfortunately it happens to behave different from + // the `minps` x86 instruction on x86. The `llvm.minnum.v*` llvm intrinsic equals + // `r1` to `a` and `r2` to `b`. + let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0); + let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0); + let r1: [u8; 16] = transmute(_mm_min_ps(a, b)); + let r2: [u8; 16] = transmute(_mm_min_ps(b, a)); + let a: [u8; 16] = transmute(a); + let b: [u8; 16] = transmute(b); + assert_eq!(r1, b); + assert_eq!(r2, a); + assert_ne!(a, b); // sanity check that -0.0 is actually present + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_max_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_max_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_max_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_max_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0)); + + // Check SSE-specific semantics for -0.0 handling. + let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0); + let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0); + let r1: [u8; 16] = transmute(_mm_max_ps(a, b)); + let r2: [u8; 16] = transmute(_mm_max_ps(b, a)); + let a: [u8; 16] = transmute(a); + let b: [u8; 16] = transmute(b); + assert_eq!(r1, b); + assert_eq!(r2, a); + assert_ne!(a, b); // sanity check that -0.0 is actually present + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_and_ps() { + let a = transmute(u32x4::splat(0b0011)); + let b = transmute(u32x4::splat(0b0101)); + let r = _mm_and_ps(*black_box(&a), *black_box(&b)); + let e = transmute(u32x4::splat(0b0001)); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_andnot_ps() { + let a = transmute(u32x4::splat(0b0011)); + let b = transmute(u32x4::splat(0b0101)); + let r = _mm_andnot_ps(*black_box(&a), *black_box(&b)); + let e = transmute(u32x4::splat(0b0100)); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_or_ps() { + let a = transmute(u32x4::splat(0b0011)); + let b = transmute(u32x4::splat(0b0101)); + let r = _mm_or_ps(*black_box(&a), *black_box(&b)); + let e = transmute(u32x4::splat(0b0111)); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_xor_ps() { + let a = transmute(u32x4::splat(0b0011)); + let b = transmute(u32x4::splat(0b0101)); + let r = _mm_xor_ps(*black_box(&a), *black_box(&b)); + let e = transmute(u32x4::splat(0b0110)); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpeq_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0); + let r: u32x4 = transmute(_mm_cmpeq_ss(a, b)); + let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0)); + assert_eq!(r, e); + + let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2)); + let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0)); + assert_eq!(r2, e2); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmplt_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) < b.extract(0) + let c1 = 0u32; // a.extract(0) < c.extract(0) + let d1 = !0u32; // a.extract(0) < d.extract(0) + + let rb: u32x4 = transmute(_mm_cmplt_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmplt_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmplt_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmple_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) <= b.extract(0) + let c1 = !0u32; // a.extract(0) <= c.extract(0) + let d1 = !0u32; // a.extract(0) <= d.extract(0) + + let rb: u32x4 = transmute(_mm_cmple_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmple_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmple_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpgt_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) > b.extract(0) + let c1 = 0u32; // a.extract(0) > c.extract(0) + let d1 = 0u32; // a.extract(0) > d.extract(0) + + let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpge_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) >= b.extract(0) + let c1 = !0u32; // a.extract(0) >= c.extract(0) + let d1 = 0u32; // a.extract(0) >= d.extract(0) + + let rb: u32x4 = transmute(_mm_cmpge_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmpge_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmpge_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpneq_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) != b.extract(0) + let c1 = 0u32; // a.extract(0) != c.extract(0) + let d1 = !0u32; // a.extract(0) != d.extract(0) + + let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpnlt_ss() { + // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there + // must be a difference. It may have to do with behavior in the + // presence of NaNs (signaling or quiet). If so, we should add tests + // for those. + + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) >= b.extract(0) + let c1 = !0u32; // a.extract(0) >= c.extract(0) + let d1 = 0u32; // a.extract(0) >= d.extract(0) + + let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpnle_ss() { + // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there + // must be a difference. It may have to do with behavior in the + // presence + // of NaNs (signaling or quiet). If so, we should add tests for those. + + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) > b.extract(0) + let c1 = 0u32; // a.extract(0) > c.extract(0) + let d1 = 0u32; // a.extract(0) > d.extract(0) + + let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpngt_ss() { + // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there + // must be a difference. It may have to do with behavior in the + // presence of NaNs (signaling or quiet). If so, we should add tests + // for those. + + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) <= b.extract(0) + let c1 = !0u32; // a.extract(0) <= c.extract(0) + let d1 = !0u32; // a.extract(0) <= d.extract(0) + + let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpnge_ss() { + // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there + // must be a difference. It may have to do with behavior in the + // presence of NaNs (signaling or quiet). If so, we should add tests + // for those. + + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) < b.extract(0) + let c1 = 0u32; // a.extract(0) < c.extract(0) + let d1 = !0u32; // a.extract(0) < d.extract(0) + + let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpord_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) ord b.extract(0) + let c1 = 0u32; // a.extract(0) ord c.extract(0) + let d1 = !0u32; // a.extract(0) ord d.extract(0) + + let rb: u32x4 = transmute(_mm_cmpord_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmpord_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmpord_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpunord_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) unord b.extract(0) + let c1 = !0u32; // a.extract(0) unord c.extract(0) + let d1 = 0u32; // a.extract(0) unord d.extract(0) + + let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b)); + let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c)); + let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d)); + let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpeq_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(fls, fls, tru, fls); + let r: u32x4 = transmute(_mm_cmpeq_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmplt_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(tru, fls, fls, fls); + let r: u32x4 = transmute(_mm_cmplt_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmple_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0); + let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(tru, fls, tru, fls); + let r: u32x4 = transmute(_mm_cmple_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpgt_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(fls, tru, fls, fls); + let r: u32x4 = transmute(_mm_cmpgt_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpge_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(fls, tru, tru, fls); + let r: u32x4 = transmute(_mm_cmpge_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpneq_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(tru, tru, fls, tru); + let r: u32x4 = transmute(_mm_cmpneq_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpnlt_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(fls, tru, tru, tru); + let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpnle_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(fls, tru, fls, tru); + let r: u32x4 = transmute(_mm_cmpnle_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpngt_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(tru, fls, tru, tru); + let r: u32x4 = transmute(_mm_cmpngt_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpnge_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(tru, fls, fls, tru); + let r: u32x4 = transmute(_mm_cmpnge_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpord_ps() { + let a = _mm_setr_ps(10.0, 50.0, NAN, NAN); + let b = _mm_setr_ps(15.0, NAN, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(tru, fls, fls, fls); + let r: u32x4 = transmute(_mm_cmpord_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cmpunord_ps() { + let a = _mm_setr_ps(10.0, 50.0, NAN, NAN); + let b = _mm_setr_ps(15.0, NAN, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = u32x4::new(fls, tru, tru, tru); + let r: u32x4 = transmute(_mm_cmpunord_ps(a, b)); + assert_eq!(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_comieq_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[1i32, 0, 0, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_comieq_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_comilt_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[0i32, 1, 0, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_comilt_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_comile_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[1i32, 1, 0, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_comile_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_comigt_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[1i32, 0, 1, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_comige_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_comineq_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[0i32, 1, 1, 1]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_comineq_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_ucomieq_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[1i32, 0, 0, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_ucomieq_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_ucomilt_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[0i32, 1, 0, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_ucomilt_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_ucomile_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[1i32, 1, 0, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_ucomile_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_ucomigt_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[0i32, 0, 1, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_ucomigt_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_ucomige_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[1i32, 0, 1, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_ucomige_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_ucomineq_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; + + let ee = &[0i32, 1, 1, 1]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + + let r = _mm_ucomineq_ss(a, b); + + assert_eq!( + ee[i], r, + "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvtss_si32() { + let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1]; + let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520]; + for i in 0..inputs.len() { + let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0); + let e = result[i]; + let r = _mm_cvtss_si32(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}", + i, x, r, e + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvttss_si32() { + let inputs = &[ + (42.0f32, 42i32), + (-31.4, -31), + (-33.5, -33), + (-34.5, -34), + (10.999, 10), + (-5.99, -5), + (4.0e10, i32::MIN), + (4.0e-10, 0), + (NAN, i32::MIN), + (2147483500.1, 2147483520), + ]; + for (i, &(xi, e)) in inputs.iter().enumerate() { + let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); + let r = _mm_cvttss_si32(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}", + i, x, r, e + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvtsi32_ss() { + let inputs = &[ + (4555i32, 4555.0f32), + (322223333, 322223330.0), + (-432, -432.0), + (-322223333, -322223330.0), + ]; + + for &(x, f) in inputs.iter() { + let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsi32_ss(a, x); + let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); + assert_eq_m128(e, r); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvtss_f32() { + let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0); + assert_eq!(_mm_cvtss_f32(a), 312.0134); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_set_ss() { + let r = _mm_set_ss(black_box(4.25)); + assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_set1_ps() { + let r1 = _mm_set1_ps(black_box(4.25)); + let r2 = _mm_set_ps1(black_box(4.25)); + assert_eq!(get_m128(r1, 0), 4.25); + assert_eq!(get_m128(r1, 1), 4.25); + assert_eq!(get_m128(r1, 2), 4.25); + assert_eq!(get_m128(r1, 3), 4.25); + assert_eq!(get_m128(r2, 0), 4.25); + assert_eq!(get_m128(r2, 1), 4.25); + assert_eq!(get_m128(r2, 2), 4.25); + assert_eq!(get_m128(r2, 3), 4.25); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_set_ps() { + let r = _mm_set_ps( + black_box(1.0), + black_box(2.0), + black_box(3.0), + black_box(4.0), + ); + assert_eq!(get_m128(r, 0), 4.0); + assert_eq!(get_m128(r, 1), 3.0); + assert_eq!(get_m128(r, 2), 2.0); + assert_eq!(get_m128(r, 3), 1.0); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_setr_ps() { + let r = _mm_setr_ps( + black_box(1.0), + black_box(2.0), + black_box(3.0), + black_box(4.0), + ); + assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_setzero_ps() { + let r = *black_box(&_mm_setzero_ps()); + assert_eq_m128(r, _mm_set1_ps(0.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_MM_SHUFFLE() { + assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11); + assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00); + assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_shuffle_ps() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_shuffle_ps::<0b00_01_01_11>(a, b); + assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_unpackhi_ps() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_unpackhi_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(3.0, 7.0, 4.0, 8.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_unpacklo_ps() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_unpacklo_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(1.0, 5.0, 2.0, 6.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_movehl_ps() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_movehl_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(7.0, 8.0, 3.0, 4.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_movelh_ps() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_movelh_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_load_ss() { + let a = 42.0f32; + let r = _mm_load_ss(ptr::addr_of!(a)); + assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_load1_ps() { + let a = 42.0f32; + let r = _mm_load1_ps(ptr::addr_of!(a)); + assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_load_ps() { + let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + + let mut p = vals.as_ptr(); + let mut fixup = 0.0f32; + + // Make sure p is aligned, otherwise we might get a + // (signal: 11, SIGSEGV: invalid memory reference) + + let unalignment = (p as usize) & 0xf; + if unalignment != 0 { + let delta = (16 - unalignment) >> 2; + fixup = delta as f32; + p = p.add(delta); + } + + let r = _mm_load_ps(p); + let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup)); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_loadu_ps() { + let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + let p = vals.as_ptr().add(3); + let r = _mm_loadu_ps(black_box(p)); + assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0)); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_loadr_ps() { + let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + + let mut p = vals.as_ptr(); + let mut fixup = 0.0f32; + + // Make sure p is aligned, otherwise we might get a + // (signal: 11, SIGSEGV: invalid memory reference) + + let unalignment = (p as usize) & 0xf; + if unalignment != 0 { + let delta = (16 - unalignment) >> 2; + fixup = delta as f32; + p = p.add(delta); + } + + let r = _mm_loadr_ps(p); + let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup)); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_store_ss() { + let mut vals = [0.0f32; 8]; + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + _mm_store_ss(vals.as_mut_ptr().add(1), a); + + assert_eq!(vals[0], 0.0); + assert_eq!(vals[1], 1.0); + assert_eq!(vals[2], 0.0); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_store1_ps() { + let mut vals = [0.0f32; 8]; + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + + let mut ofs = 0; + let mut p = vals.as_mut_ptr(); + + if (p as usize) & 0xf != 0 { + ofs = (16 - ((p as usize) & 0xf)) >> 2; + p = p.add(ofs); + } + + _mm_store1_ps(p, *black_box(&a)); + + if ofs > 0 { + assert_eq!(vals[ofs - 1], 0.0); + } + assert_eq!(vals[ofs + 0], 1.0); + assert_eq!(vals[ofs + 1], 1.0); + assert_eq!(vals[ofs + 2], 1.0); + assert_eq!(vals[ofs + 3], 1.0); + assert_eq!(vals[ofs + 4], 0.0); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_store_ps() { + let mut vals = [0.0f32; 8]; + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + + let mut ofs = 0; + let mut p = vals.as_mut_ptr(); + + // Align p to 16-byte boundary + if (p as usize) & 0xf != 0 { + ofs = (16 - ((p as usize) & 0xf)) >> 2; + p = p.add(ofs); + } + + _mm_store_ps(p, *black_box(&a)); + + if ofs > 0 { + assert_eq!(vals[ofs - 1], 0.0); + } + assert_eq!(vals[ofs + 0], 1.0); + assert_eq!(vals[ofs + 1], 2.0); + assert_eq!(vals[ofs + 2], 3.0); + assert_eq!(vals[ofs + 3], 4.0); + assert_eq!(vals[ofs + 4], 0.0); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_storer_ps() { + let mut vals = [0.0f32; 8]; + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + + let mut ofs = 0; + let mut p = vals.as_mut_ptr(); + + // Align p to 16-byte boundary + if (p as usize) & 0xf != 0 { + ofs = (16 - ((p as usize) & 0xf)) >> 2; + p = p.add(ofs); + } + + _mm_storer_ps(p, *black_box(&a)); + + if ofs > 0 { + assert_eq!(vals[ofs - 1], 0.0); + } + assert_eq!(vals[ofs + 0], 4.0); + assert_eq!(vals[ofs + 1], 3.0); + assert_eq!(vals[ofs + 2], 2.0); + assert_eq!(vals[ofs + 3], 1.0); + assert_eq!(vals[ofs + 4], 0.0); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_storeu_ps() { + let mut vals = [0.0f32; 8]; + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + + let mut ofs = 0; + let mut p = vals.as_mut_ptr(); + + // Make sure p is **not** aligned to 16-byte boundary + if (p as usize) & 0xf == 0 { + ofs = 1; + p = p.add(1); + } + + _mm_storeu_ps(p, *black_box(&a)); + + if ofs > 0 { + assert_eq!(vals[ofs - 1], 0.0); + } + assert_eq!(vals[ofs + 0], 1.0); + assert_eq!(vals[ofs + 1], 2.0); + assert_eq!(vals[ofs + 2], 3.0); + assert_eq!(vals[ofs + 3], 4.0); + assert_eq!(vals[ofs + 4], 0.0); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_move_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + + let r = _mm_move_ss(a, b); + let e = _mm_setr_ps(5.0, 2.0, 3.0, 4.0); + assert_eq_m128(e, r); + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_movemask_ps() { + let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0)); + assert_eq!(r, 0b0101); + + let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0)); + assert_eq!(r, 0b0111); + } + + #[simd_test(enable = "sse")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_sfence() { + _mm_sfence(); + } + + #[simd_test(enable = "sse")] + unsafe fn test_MM_TRANSPOSE4_PS() { + let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0); + let mut d = _mm_setr_ps(13.0, 14.0, 15.0, 16.0); + + _MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d); + + assert_eq_m128(a, _mm_setr_ps(1.0, 5.0, 9.0, 13.0)); + assert_eq_m128(b, _mm_setr_ps(2.0, 6.0, 10.0, 14.0)); + assert_eq_m128(c, _mm_setr_ps(3.0, 7.0, 11.0, 15.0)); + assert_eq_m128(d, _mm_setr_ps(4.0, 8.0, 12.0, 16.0)); + } + + #[repr(align(16))] + struct Memory { + pub data: [f32; 4], + } + + #[simd_test(enable = "sse")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + // (non-temporal store) + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_stream_ps() { + let a = _mm_set1_ps(7.0); + let mut mem = Memory { data: [-1.0; 4] }; + + _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a); + for i in 0..4 { + assert_eq!(mem.data[i], get_m128(a, i)); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs new file mode 100644 index 000000000000..3dabcde18ce9 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs @@ -0,0 +1,5253 @@ +//! Streaming SIMD Extensions 2 (SSE2) + +#[cfg(test)] +use stdarch_test::assert_instr; + +use crate::{ + core_arch::{simd::*, x86::*}, + intrinsics::simd::*, + intrinsics::sqrtf64, + mem, ptr, +}; + +/// Provides a hint to the processor that the code sequence is a spin-wait loop. +/// +/// This can help improve the performance and power consumption of spin-wait +/// loops. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause) +#[inline] +#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_pause() { + // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without + // the SSE2 target-feature - therefore it does not require any target features + pause() +} + +/// Invalidates and flushes the cache line that contains `p` from all levels of +/// the cache hierarchy. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(clflush))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_clflush(p: *const u8) { + clflush(p) +} + +/// Performs a serializing operation on all load-from-memory instructions +/// that were issued prior to this instruction. +/// +/// Guarantees that every load instruction that precedes, in program order, is +/// globally visible before any load instruction which follows the fence in +/// program order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(lfence))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_lfence() { + lfence() +} + +/// Performs a serializing operation on all load-from-memory and store-to-memory +/// instructions that were issued prior to this instruction. +/// +/// Guarantees that every memory access that precedes, in program order, the +/// memory fence instruction is globally visible before any memory instruction +/// which follows the fence in program order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(mfence))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_mfence() { + mfence() +} + +/// Adds packed 8-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(paddb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) } +} + +/// Adds packed 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(paddw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) } +} + +/// Adds packed 32-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(paddd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) } +} + +/// Adds packed 64-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(paddq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) } +} + +/// Adds packed 8-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(paddsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) } +} + +/// Adds packed 16-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(paddsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) } +} + +/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(paddusb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) } +} + +/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(paddusw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) } +} + +/// Averages packed unsigned 8-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pavgb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, u16x16>(a.as_u8x16()); + let b = simd_cast::<_, u16x16>(b.as_u8x16()); + let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1)); + transmute(simd_cast::<_, u8x16>(r)) + } +} + +/// Averages packed unsigned 16-bit integers in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pavgw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, u32x8>(a.as_u16x8()); + let b = simd_cast::<_, u32x8>(b.as_u16x8()); + let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1)); + transmute(simd_cast::<_, u16x8>(r)) + } +} + +/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. +/// +/// Multiplies packed signed 16-bit integers in `a` and `b`, producing +/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of +/// intermediate 32-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pmaddwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compares packed 16-bit integers in `a` and `b`, and returns the packed +/// maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pmaxsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let b = b.as_i16x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the +/// packed maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pmaxub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let b = b.as_u8x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed 16-bit integers in `a` and `b`, and returns the packed +/// minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pminsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let b = b.as_i16x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the +/// packed minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pminub))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let b = b.as_u8x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Multiplies the packed 16-bit integers in `a` and `b`. +/// +/// The multiplication produces intermediate 32-bit integers, and returns the +/// high 16 bits of the intermediate integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pmulhw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, i32x8>(a.as_i16x8()); + let b = simd_cast::<_, i32x8>(b.as_i16x8()); + let r = simd_shr(simd_mul(a, b), i32x8::splat(16)); + transmute(simd_cast::(r)) + } +} + +/// Multiplies the packed unsigned 16-bit integers in `a` and `b`. +/// +/// The multiplication produces intermediate 32-bit integers, and returns the +/// high 16 bits of the intermediate integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pmulhuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, u32x8>(a.as_u16x8()); + let b = simd_cast::<_, u32x8>(b.as_u16x8()); + let r = simd_shr(simd_mul(a, b), u32x8::splat(16)); + transmute(simd_cast::(r)) + } +} + +/// Multiplies the packed 16-bit integers in `a` and `b`. +/// +/// The multiplication produces intermediate 32-bit integers, and returns the +/// low 16 bits of the intermediate integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pmullw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) } +} + +/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element +/// in `a` and `b`. +/// +/// Returns the unsigned 64-bit results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pmuludq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u64x2(); + let b = b.as_u64x2(); + let mask = u64x2::splat(u32::MAX.into()); + transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) + } +} + +/// Sum the absolute differences of packed unsigned 8-bit integers. +/// +/// Computes the absolute differences of packed unsigned 8-bit integers in `a` +/// and `b`, then horizontally sum each consecutive 8 differences to produce +/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in +/// the low 16 bits of 64-bit elements returned. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psadbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) } +} + +/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psubb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) } +} + +/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psubw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) } +} + +/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psubd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) } +} + +/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psubq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) } +} + +/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` +/// using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psubsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) } +} + +/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` +/// using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) } +} + +/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit +/// integers in `a` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psubusb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) } +} + +/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit +/// integers in `a` using saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psubusw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) } +} + +/// Shifts `a` left by `IMM8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_slli_si128(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { _mm_slli_si128_impl::(a) } +} + +/// Implementation detail: converts the immediate argument of the +/// `_mm_slli_si128` intrinsic into a compile-time constant. +#[inline] +#[target_feature(enable = "sse2")] +unsafe fn _mm_slli_si128_impl(a: __m128i) -> __m128i { + const fn mask(shift: i32, i: u32) -> u32 { + let shift = shift as u32 & 0xff; + if shift > 15 { i } else { 16 - shift + i } + } + transmute::(simd_shuffle!( + i8x16::ZERO, + a.as_i8x16(), + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + ], + )) +} + +/// Shifts `a` left by `IMM8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_bslli_si128(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + _mm_slli_si128_impl::(a) + } +} + +/// Shifts `a` right by `IMM8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_bsrli_si128(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + _mm_srli_si128_impl::(a) + } +} + +/// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_slli_epi16(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 16 { + _mm_setzero_si128() + } else { + transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))) + } + } +} + +/// Shifts packed 16-bit integers in `a` left by `count` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psllw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) } +} + +/// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_slli_epi32(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 32 { + _mm_setzero_si128() + } else { + transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32))) + } + } +} + +/// Shifts packed 32-bit integers in `a` left by `count` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pslld))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) } +} + +/// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_slli_epi64(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 64 { + _mm_setzero_si128() + } else { + transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))) + } + } +} + +/// Shifts packed 64-bit integers in `a` left by `count` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psllq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) } +} + +/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign +/// bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srai_epi16(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) } +} + +/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign +/// bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psraw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) } +} + +/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign +/// bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srai_epi32(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) } +} + +/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign +/// bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrad))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) } +} + +/// Shifts `a` right by `IMM8` bytes while shifting in zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srli_si128(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { _mm_srli_si128_impl::(a) } +} + +/// Implementation detail: converts the immediate argument of the +/// `_mm_srli_si128` intrinsic into a compile-time constant. +#[inline] +#[target_feature(enable = "sse2")] +unsafe fn _mm_srli_si128_impl(a: __m128i) -> __m128i { + const fn mask(shift: i32, i: u32) -> u32 { + if (shift as u32) > 15 { + i + 16 + } else { + i + (shift as u32) + } + } + let x: i8x16 = simd_shuffle!( + a.as_i8x16(), + i8x16::ZERO, + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + ], + ); + transmute(x) +} + +/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srli_epi16(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 16 { + _mm_setzero_si128() + } else { + transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16))) + } + } +} + +/// Shifts packed 16-bit integers in `a` right by `count` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrlw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) } +} + +/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srli_epi32(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 32 { + _mm_setzero_si128() + } else { + transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32))) + } + } +} + +/// Shifts packed 32-bit integers in `a` right by `count` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrld))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) } +} + +/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srli_epi64(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + if IMM8 >= 64 { + _mm_setzero_si128() + } else { + transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))) + } + } +} + +/// Shifts packed 64-bit integers in `a` right by `count` while shifting in +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(psrlq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) } +} + +/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(andps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { + unsafe { simd_and(a, b) } +} + +/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and +/// then AND with `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(andnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { + unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) } +} + +/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(orps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { + unsafe { simd_or(a, b) } +} + +/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(xorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { + unsafe { simd_xor(a, b) } +} + +/// Compares packed 8-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pcmpeqb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_eq(a.as_i8x16(), b.as_i8x16())) } +} + +/// Compares packed 16-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pcmpeqw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_eq(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compares packed 32-bit integers in `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pcmpeqd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_eq(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compares packed 8-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pcmpgtb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_gt(a.as_i8x16(), b.as_i8x16())) } +} + +/// Compares packed 16-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pcmpgtw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_gt(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compares packed 32-bit integers in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pcmpgtd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_gt(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compares packed 8-bit integers in `a` and `b` for less-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pcmpgtb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_lt(a.as_i8x16(), b.as_i8x16())) } +} + +/// Compares packed 16-bit integers in `a` and `b` for less-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pcmpgtw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_lt(a.as_i16x8(), b.as_i16x8())) } +} + +/// Compares packed 32-bit integers in `a` and `b` for less-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pcmpgtd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_lt(a.as_i32x4(), b.as_i32x4())) } +} + +/// Converts the lower two packed 32-bit integers in `a` to packed +/// double-precision (64-bit) floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtdq2pd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { + unsafe { + let a = a.as_i32x4(); + simd_cast::(simd_shuffle!(a, a, [0, 1])) + } +} + +/// Returns `a` with its lower element replaced by `b` after converting it to +/// an `f64`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsi2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { + unsafe { simd_insert!(a, 0, b as f64) } +} + +/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) +/// floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtdq2ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { + unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) } +} + +/// Converts packed single-precision (32-bit) floating-point elements in `a` +/// to packed 32-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtps2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtps_epi32(a: __m128) -> __m128i { + unsafe { transmute(cvtps2dq(a)) } +} + +/// Returns a vector whose lowest element is `a` and all higher elements are +/// `0`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi32_si128(a: i32) -> __m128i { + unsafe { transmute(i32x4::new(a, 0, 0, 0)) } +} + +/// Returns the lowest element of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 { + unsafe { simd_extract!(a.as_i32x4(), 0) } +} + +/// Sets packed 64-bit integers with the supplied values, from highest to +/// lowest. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { + unsafe { transmute(i64x2::new(e0, e1)) } +} + +/// Sets packed 32-bit integers with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { + unsafe { transmute(i32x4::new(e0, e1, e2, e3)) } +} + +/// Sets packed 16-bit integers with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_epi16( + e7: i16, + e6: i16, + e5: i16, + e4: i16, + e3: i16, + e2: i16, + e1: i16, + e0: i16, +) -> __m128i { + unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } +} + +/// Sets packed 8-bit integers with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_epi8( + e15: i8, + e14: i8, + e13: i8, + e12: i8, + e11: i8, + e10: i8, + e9: i8, + e8: i8, + e7: i8, + e6: i8, + e5: i8, + e4: i8, + e3: i8, + e2: i8, + e1: i8, + e0: i8, +) -> __m128i { + unsafe { + #[rustfmt::skip] + transmute(i8x16::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + )) + } +} + +/// Broadcasts 64-bit integer `a` to all elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set1_epi64x(a: i64) -> __m128i { + _mm_set_epi64x(a, a) +} + +/// Broadcasts 32-bit integer `a` to all elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set1_epi32(a: i32) -> __m128i { + _mm_set_epi32(a, a, a, a) +} + +/// Broadcasts 16-bit integer `a` to all elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set1_epi16(a: i16) -> __m128i { + _mm_set_epi16(a, a, a, a, a, a, a, a) +} + +/// Broadcasts 8-bit integer `a` to all elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set1_epi8(a: i8) -> __m128i { + _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) +} + +/// Sets packed 32-bit integers with the supplied values in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { + _mm_set_epi32(e0, e1, e2, e3) +} + +/// Sets packed 16-bit integers with the supplied values in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_setr_epi16( + e7: i16, + e6: i16, + e5: i16, + e4: i16, + e3: i16, + e2: i16, + e1: i16, + e0: i16, +) -> __m128i { + _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7) +} + +/// Sets packed 8-bit integers with the supplied values in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8) +#[inline] +#[target_feature(enable = "sse2")] +// no particular instruction to test +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_setr_epi8( + e15: i8, + e14: i8, + e13: i8, + e12: i8, + e11: i8, + e10: i8, + e9: i8, + e8: i8, + e7: i8, + e6: i8, + e5: i8, + e4: i8, + e3: i8, + e2: i8, + e1: i8, + e0: i8, +) -> __m128i { + #[rustfmt::skip] + _mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ) +} + +/// Returns a vector with all elements set to zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(xorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_setzero_si128() -> __m128i { + const { unsafe { mem::zeroed() } } +} + +/// Loads 64-bit integer from memory into first element of returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { + _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64)) +} + +/// Loads 128-bits of integer data from memory into a new vector. +/// +/// `mem_addr` must be aligned on a 16-byte boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { + *mem_addr +} + +/// Loads 128-bits of integer data from memory into a new vector. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movups))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { + let mut dst: __m128i = _mm_undefined_si128(); + ptr::copy_nonoverlapping( + mem_addr as *const u8, + ptr::addr_of_mut!(dst) as *mut u8, + mem::size_of::<__m128i>(), + ); + dst +} + +/// Conditionally store 8-bit integer elements from `a` into memory using +/// `mask`. +/// +/// Elements are not stored when the highest bit is not set in the +/// corresponding element. +/// +/// `mem_addr` should correspond to a 128-bit memory location and does not need +/// to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(maskmovdqu))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { + maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) +} + +/// Stores 128-bits of integer data from `a` into memory. +/// +/// `mem_addr` must be aligned on a 16-byte boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { + *mem_addr = a; +} + +/// Stores 128-bits of integer data from `a` into memory. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { + mem_addr.write_unaligned(a); +} + +/// Stores the lower 64-bit integer `a` to a memory location. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { + ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8); +} + +/// Stores a 128-bit integer vector to a 128-bit aligned memory location. +/// To minimize caching, the data is flagged as non-temporal (unlikely to be +/// used again soon). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movntdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { + crate::arch::asm!( + vps!("movntdq", ",{a}"), + p = in(reg) mem_addr, + a = in(xmm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Stores a 32-bit integer value in the specified memory location. +/// To minimize caching, the data is flagged as non-temporal (unlikely to be +/// used again soon). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movnti))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { + crate::arch::asm!( + vps!("movnti", ",{a:e}"), // `:e` for 32bit value + p = in(reg) mem_addr, + a = in(reg) a, + options(nostack, preserves_flags), + ); +} + +/// Returns a vector where the low element is extracted from `a` and its upper +/// element is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64) +#[inline] +#[target_feature(enable = "sse2")] +// FIXME movd on msvc, movd on i686 +#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_move_epi64(a: __m128i) -> __m128i { + unsafe { + let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]); + transmute(r) + } +} + +/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// using signed saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(packsswb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) } +} + +/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// using signed saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(packssdw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) } +} + +/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// using unsigned saturation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(packuswb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) } +} + +/// Returns the `imm8` element of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_extract_epi16(a: __m128i) -> i32 { + static_assert_uimm_bits!(IMM8, 3); + unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 } +} + +/// Returns a new vector where the `imm8` element of `a` is replaced with `i`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_insert_epi16(a: __m128i, i: i32) -> __m128i { + static_assert_uimm_bits!(IMM8, 3); + unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) } +} + +/// Returns a mask of the most significant bit of each element in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pmovmskb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_movemask_epi8(a: __m128i) -> i32 { + unsafe { + let z = i8x16::ZERO; + let m: i8x16 = simd_lt(a.as_i8x16(), z); + simd_bitmask::<_, u16>(m) as u32 as i32 + } +} + +/// Shuffles 32-bit integers in `a` using the control in `IMM8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_shuffle_epi32(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + let a = a.as_i32x4(); + let x: i32x4 = simd_shuffle!( + a, + a, + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ], + ); + transmute(x) + } +} + +/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in +/// `IMM8`. +/// +/// Put the results in the high 64 bits of the returned vector, with the low 64 +/// bits being copied from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_shufflehi_epi16(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + let a = a.as_i16x8(); + let x: i16x8 = simd_shuffle!( + a, + a, + [ + 0, + 1, + 2, + 3, + (IMM8 as u32 & 0b11) + 4, + ((IMM8 as u32 >> 2) & 0b11) + 4, + ((IMM8 as u32 >> 4) & 0b11) + 4, + ((IMM8 as u32 >> 6) & 0b11) + 4, + ], + ); + transmute(x) + } +} + +/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in +/// `IMM8`. +/// +/// Put the results in the low 64 bits of the returned vector, with the high 64 +/// bits being copied from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_shufflelo_epi16(a: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + let a = a.as_i16x8(); + let x: i16x8 = simd_shuffle!( + a, + a, + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + 4, + 5, + 6, + 7, + ], + ); + transmute(x) + } +} + +/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(punpckhbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + transmute::(simd_shuffle!( + a.as_i8x16(), + b.as_i8x16(), + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], + )) + } +} + +/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(punpckhwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); + transmute::(x) + } +} + +/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(unpckhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) } +} + +/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(unpckhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) } +} + +/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(punpcklbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + transmute::(simd_shuffle!( + a.as_i8x16(), + b.as_i8x16(), + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], + )) + } +} + +/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(punpcklwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); + transmute::(x) + } +} + +/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(unpcklps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) } +} + +/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movlhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) } +} + +/// Returns a new vector with the low element of `a` replaced by the sum of the +/// low elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(addsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) } +} + +/// Adds packed double-precision (64-bit) floating-point elements in `a` and +/// `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(addpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_add(a, b) } +} + +/// Returns a new vector with the low element of `a` replaced by the result of +/// diving the lower element of `a` by the lower element of `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(divsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) } +} + +/// Divide packed double-precision (64-bit) floating-point elements in `a` by +/// packed elements in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(divpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_div(a, b) } +} + +/// Returns a new vector with the low element of `a` replaced by the maximum +/// of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(maxsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { maxsd(a, b) } +} + +/// Returns a new vector with the maximum values from corresponding elements in +/// `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(maxpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { maxpd(a, b) } +} + +/// Returns a new vector with the low element of `a` replaced by the minimum +/// of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(minsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { minsd(a, b) } +} + +/// Returns a new vector with the minimum values from corresponding elements in +/// `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(minpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { minpd(a, b) } +} + +/// Returns a new vector with the low element of `a` replaced by multiplying the +/// low elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(mulsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) } +} + +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` +/// and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(mulpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_mul(a, b) } +} + +/// Returns a new vector with the low element of `a` replaced by the square +/// root of the lower element `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(sqrtsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) } +} + +/// Returns a new vector with the square root of each of the values in `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(sqrtpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sqrt_pd(a: __m128d) -> __m128d { + unsafe { simd_fsqrt(a) } +} + +/// Returns a new vector with the low element of `a` replaced by subtracting the +/// low element by `b` from the low element of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(subsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) } +} + +/// Subtract packed double-precision (64-bit) floating-point elements in `b` +/// from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(subpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_sub(a, b) } +} + +/// Computes the bitwise AND of packed double-precision (64-bit) floating-point +/// elements in `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(andps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a: __m128i = transmute(a); + let b: __m128i = transmute(b); + transmute(_mm_and_si128(a, b)) + } +} + +/// Computes the bitwise NOT of `a` and then AND with `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(andnps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a: __m128i = transmute(a); + let b: __m128i = transmute(b); + transmute(_mm_andnot_si128(a, b)) + } +} + +/// Computes the bitwise OR of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(orps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a: __m128i = transmute(a); + let b: __m128i = transmute(b); + transmute(_mm_or_si128(a, b)) + } +} + +/// Computes the bitwise XOR of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(xorps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a: __m128i = transmute(a); + let b: __m128i = transmute(b); + transmute(_mm_xor_si128(a, b)) + } +} + +/// Returns a new vector with the low element of `a` replaced by the equality +/// comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpeqsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 0) } +} + +/// Returns a new vector with the low element of `a` replaced by the less-than +/// comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpltsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 1) } +} + +/// Returns a new vector with the low element of `a` replaced by the +/// less-than-or-equal comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmplesd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 2) } +} + +/// Returns a new vector with the low element of `a` replaced by the +/// greater-than comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpltsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) } +} + +/// Returns a new vector with the low element of `a` replaced by the +/// greater-than-or-equal comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmplesd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) } +} + +/// Returns a new vector with the low element of `a` replaced by the result +/// of comparing both of the lower elements of `a` and `b` to `NaN`. If +/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` +/// otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpordsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 7) } +} + +/// Returns a new vector with the low element of `a` replaced by the result of +/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is +/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpunordsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 3) } +} + +/// Returns a new vector with the low element of `a` replaced by the not-equal +/// comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpneqsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 4) } +} + +/// Returns a new vector with the low element of `a` replaced by the +/// not-less-than comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpnltsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 5) } +} + +/// Returns a new vector with the low element of `a` replaced by the +/// not-less-than-or-equal comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpnlesd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 6) } +} + +/// Returns a new vector with the low element of `a` replaced by the +/// not-greater-than comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpnltsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) } +} + +/// Returns a new vector with the low element of `a` replaced by the +/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpnlesd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) } +} + +/// Compares corresponding elements in `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpeqpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 0) } +} + +/// Compares corresponding elements in `a` and `b` for less-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpltpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 1) } +} + +/// Compares corresponding elements in `a` and `b` for less-than-or-equal +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmplepd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 2) } +} + +/// Compares corresponding elements in `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpltpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { + _mm_cmplt_pd(b, a) +} + +/// Compares corresponding elements in `a` and `b` for greater-than-or-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmplepd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { + _mm_cmple_pd(b, a) +} + +/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpordpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 7) } +} + +/// Compares corresponding elements in `a` and `b` to see if either is `NaN`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpunordpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 3) } +} + +/// Compares corresponding elements in `a` and `b` for not-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpneqpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 4) } +} + +/// Compares corresponding elements in `a` and `b` for not-less-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpnltpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 5) } +} + +/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpnlepd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 6) } +} + +/// Compares corresponding elements in `a` and `b` for not-greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpnltpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { + _mm_cmpnlt_pd(b, a) +} + +/// Compares corresponding elements in `a` and `b` for +/// not-greater-than-or-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cmpnlepd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { + _mm_cmpnle_pd(b, a) +} + +/// Compares the lower element of `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comieqsd(a, b) } +} + +/// Compares the lower element of `a` and `b` for less-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comiltsd(a, b) } +} + +/// Compares the lower element of `a` and `b` for less-than-or-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comilesd(a, b) } +} + +/// Compares the lower element of `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comigtsd(a, b) } +} + +/// Compares the lower element of `a` and `b` for greater-than-or-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comigesd(a, b) } +} + +/// Compares the lower element of `a` and `b` for not-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(comisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comineqsd(a, b) } +} + +/// Compares the lower element of `a` and `b` for equality. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomieqsd(a, b) } +} + +/// Compares the lower element of `a` and `b` for less-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomiltsd(a, b) } +} + +/// Compares the lower element of `a` and `b` for less-than-or-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomilesd(a, b) } +} + +/// Compares the lower element of `a` and `b` for greater-than. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomigtsd(a, b) } +} + +/// Compares the lower element of `a` and `b` for greater-than-or-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomigesd(a, b) } +} + +/// Compares the lower element of `a` and `b` for not-equal. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(ucomisd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomineqsd(a, b) } +} + +/// Converts packed double-precision (64-bit) floating-point elements in `a` to +/// packed single-precision (32-bit) floating-point elements +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtpd2ps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 { + unsafe { + let r = simd_cast::<_, f32x2>(a.as_f64x2()); + let zero = f32x2::ZERO; + transmute::(simd_shuffle!(r, zero, [0, 1, 2, 3])) + } +} + +/// Converts packed single-precision (32-bit) floating-point elements in `a` to +/// packed +/// double-precision (64-bit) floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtps2pd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtps_pd(a: __m128) -> __m128d { + unsafe { + let a = a.as_f32x4(); + transmute(simd_cast::(simd_shuffle!(a, a, [0, 1]))) + } +} + +/// Converts packed double-precision (64-bit) floating-point elements in `a` to +/// packed 32-bit integers. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtpd2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { + unsafe { transmute(cvtpd2dq(a)) } +} + +/// Converts the lower double-precision (64-bit) floating-point element in a to +/// a 32-bit integer. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsd_si32(a: __m128d) -> i32 { + unsafe { cvtsd2si(a) } +} + +/// Converts the lower double-precision (64-bit) floating-point element in `b` +/// to a single-precision (32-bit) floating-point element, store the result in +/// the lower element of the return value, and copies the upper element from `a` +/// to the upper element the return value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsd2ss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { + unsafe { cvtsd2ss(a, b) } +} + +/// Returns the lower double-precision (64-bit) floating-point element of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsd_f64(a: __m128d) -> f64 { + unsafe { simd_extract!(a, 0) } +} + +/// Converts the lower single-precision (32-bit) floating-point element in `b` +/// to a double-precision (64-bit) floating-point element, store the result in +/// the lower element of the return value, and copies the upper element from `a` +/// to the upper element the return value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtss2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { + unsafe { cvtss2sd(a, b) } +} + +/// Converts packed double-precision (64-bit) floating-point elements in `a` to +/// packed 32-bit integers with truncation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvttpd2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { + unsafe { transmute(cvttpd2dq(a)) } +} + +/// Converts the lower double-precision (64-bit) floating-point element in `a` +/// to a 32-bit integer with truncation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvttsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvttsd_si32(a: __m128d) -> i32 { + unsafe { cvttsd2si(a) } +} + +/// Converts packed single-precision (32-bit) floating-point elements in `a` to +/// packed 32-bit integers with truncation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvttps2dq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvttps_epi32(a: __m128) -> __m128i { + unsafe { transmute(cvttps2dq(a)) } +} + +/// Copies double-precision (64-bit) floating-point element `a` to the lower +/// element of the packed 64-bit return value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_sd(a: f64) -> __m128d { + _mm_set_pd(0.0, a) +} + +/// Broadcasts double-precision (64-bit) floating-point value a to all elements +/// of the return value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set1_pd(a: f64) -> __m128d { + _mm_set_pd(a, a) +} + +/// Broadcasts double-precision (64-bit) floating-point value a to all elements +/// of the return value. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_pd1(a: f64) -> __m128d { + _mm_set_pd(a, a) +} + +/// Sets packed double-precision (64-bit) floating-point elements in the return +/// value with the supplied values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_set_pd(a: f64, b: f64) -> __m128d { + __m128d([b, a]) +} + +/// Sets packed double-precision (64-bit) floating-point elements in the return +/// value with the supplied values in reverse order. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d { + _mm_set_pd(b, a) +} + +/// Returns packed double-precision (64-bit) floating-point elements with all +/// zeros. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(xorp))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_setzero_pd() -> __m128d { + const { unsafe { mem::zeroed() } } +} + +/// Returns a mask of the most significant bit of each element in `a`. +/// +/// The mask is stored in the 2 least significant bits of the return value. +/// All other bits are set to `0`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movmskpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_movemask_pd(a: __m128d) -> i32 { + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + unsafe { + let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO); + simd_bitmask::(mask).into() + } +} + +/// Loads 128-bits (composed of 2 packed double-precision (64-bit) +/// floating-point elements) from memory into the returned vector. +/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection +/// exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { + *(mem_addr as *const __m128d) +} + +/// Loads a 64-bit double-precision value to the low element of a +/// 128-bit integer vector and clears the upper element. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { + _mm_setr_pd(*mem_addr, 0.) +} + +/// Loads a double-precision value into the high-order bits of a 128-bit +/// vector of `[2 x double]`. The low-order bits are copied from the low-order +/// bits of the first operand. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { + _mm_setr_pd(simd_extract!(a, 0), *mem_addr) +} + +/// Loads a double-precision value into the low-order bits of a 128-bit +/// vector of `[2 x double]`. The high-order bits are copied from the +/// high-order bits of the first operand. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movlps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { + _mm_setr_pd(*mem_addr, simd_extract!(a, 1)) +} + +/// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit +/// aligned memory location. +/// To minimize caching, the data is flagged as non-temporal (unlikely to be +/// used again soon). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movntpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { + crate::arch::asm!( + vps!("movntpd", ",{a}"), + p = in(reg) mem_addr, + a = in(xmm_reg) a, + options(nostack, preserves_flags), + ); +} + +/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a +/// memory location. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movlps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { + *mem_addr = simd_extract!(a, 0) +} + +/// Stores 128-bits (composed of 2 packed double-precision (64-bit) +/// floating-point elements) from `a` into memory. `mem_addr` must be aligned +/// on a 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { + *(mem_addr as *mut __m128d) = a; +} + +/// Stores 128-bits (composed of 2 packed double-precision (64-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { + mem_addr.cast::<__m128d>().write_unaligned(a); +} + +/// Store 16-bit integer from the first element of a into memory. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) { + ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0)) +} + +/// Store 32-bit integer from the first element of a into memory. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) { + ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0)) +} + +/// Store 64-bit integer from the first element of a into memory. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) { + ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0)) +} + +/// Stores the lower double-precision (64-bit) floating-point element from `a` +/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a +/// 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { + let b: __m128d = simd_shuffle!(a, a, [0, 0]); + *(mem_addr as *mut __m128d) = b; +} + +/// Stores the lower double-precision (64-bit) floating-point element from `a` +/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a +/// 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { + let b: __m128d = simd_shuffle!(a, a, [0, 0]); + *(mem_addr as *mut __m128d) = b; +} + +/// Stores 2 double-precision (64-bit) floating-point elements from `a` into +/// memory in reverse order. +/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection +/// exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { + let b: __m128d = simd_shuffle!(a, a, [1, 0]); + *(mem_addr as *mut __m128d) = b; +} + +/// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a +/// memory location. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { + *mem_addr = simd_extract!(a, 1); +} + +/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a +/// memory location. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movlps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { + *mem_addr = simd_extract!(a, 0); +} + +/// Loads a double-precision (64-bit) floating-point element from memory +/// into both elements of returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd) +#[inline] +#[target_feature(enable = "sse2")] +// #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { + let d = *mem_addr; + _mm_setr_pd(d, d) +} + +/// Loads a double-precision (64-bit) floating-point element from memory +/// into both elements of returned vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1) +#[inline] +#[target_feature(enable = "sse2")] +// #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { + _mm_load1_pd(mem_addr) +} + +/// Loads 2 double-precision (64-bit) floating-point elements from memory into +/// the returned vector in reverse order. `mem_addr` must be aligned on a +/// 16-byte boundary or a general-protection exception may be generated. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr( + all(test, not(all(target_arch = "x86", target_env = "msvc"))), + assert_instr(movaps) +)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { + let a = _mm_load_pd(mem_addr); + simd_shuffle!(a, a, [1, 0]) +} + +/// Loads 128-bits (composed of 2 packed double-precision (64-bit) +/// floating-point elements) from memory into the returned vector. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movups))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { + let mut dst = _mm_undefined_pd(); + ptr::copy_nonoverlapping( + mem_addr as *const u8, + ptr::addr_of_mut!(dst) as *mut u8, + mem::size_of::<__m128d>(), + ); + dst +} + +/// Loads unaligned 16-bits of integer data from memory into new vector. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i { + transmute(i16x8::new( + ptr::read_unaligned(mem_addr as *const i16), + 0, + 0, + 0, + 0, + 0, + 0, + 0, + )) +} + +/// Loads unaligned 32-bits of integer data from memory into new vector. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i { + transmute(i32x4::new( + ptr::read_unaligned(mem_addr as *const i32), + 0, + 0, + 0, + )) +} + +/// Loads unaligned 64-bits of integer data from memory into new vector. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")] +pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i { + transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0)) +} + +/// Constructs a 128-bit floating-point vector of `[2 x double]` from two +/// 128-bit vector parameters of `[2 x double]`, using the immediate-value +/// parameter as a specifier. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(shufps, MASK = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_shuffle_pd(a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(MASK, 8); + unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) } +} + +/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower +/// 64 bits are set to the lower 64 bits of the second parameter. The upper +/// 64 bits are set to the upper 64 bits of the first parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) } +} + +/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit +/// floating-point vector of `[4 x float]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_castpd_ps(a: __m128d) -> __m128 { + unsafe { transmute(a) } +} + +/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit +/// integer vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_castpd_si128(a: __m128d) -> __m128i { + unsafe { transmute(a) } +} + +/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit +/// floating-point vector of `[2 x double]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_castps_pd(a: __m128) -> __m128d { + unsafe { transmute(a) } +} + +/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit +/// integer vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_castps_si128(a: __m128) -> __m128i { + unsafe { transmute(a) } +} + +/// Casts a 128-bit integer vector into a 128-bit floating-point vector +/// of `[2 x double]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_castsi128_pd(a: __m128i) -> __m128d { + unsafe { transmute(a) } +} + +/// Casts a 128-bit integer vector into a 128-bit floating-point vector +/// of `[4 x float]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_castsi128_ps(a: __m128i) -> __m128 { + unsafe { transmute(a) } +} + +/// Returns vector of type __m128d with indeterminate elements.with indetermination elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_undefined_pd() -> __m128d { + const { unsafe { mem::zeroed() } } +} + +/// Returns vector of type __m128i with indeterminate elements.with indetermination elements. +/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically +/// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. +/// In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_undefined_si128() -> __m128i { + const { unsafe { mem::zeroed() } } +} + +/// The resulting `__m128d` element is composed by the low-order values of +/// the two `__m128d` interleaved input elements, i.e.: +/// +/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second input +/// * The `[63:0]` bits are copied from the `[127:64]` bits of the first input +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(unpckhpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_shuffle!(a, b, [1, 3]) } +} + +/// The resulting `__m128d` element is composed by the high-order values of +/// the two `__m128d` interleaved input elements, i.e.: +/// +/// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input +/// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movlhps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_shuffle!(a, b, [0, 2]) } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.sse2.pause"] + fn pause(); + #[link_name = "llvm.x86.sse2.clflush"] + fn clflush(p: *const u8); + #[link_name = "llvm.x86.sse2.lfence"] + fn lfence(); + #[link_name = "llvm.x86.sse2.mfence"] + fn mfence(); + #[link_name = "llvm.x86.sse2.pmadd.wd"] + fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; + #[link_name = "llvm.x86.sse2.psad.bw"] + fn psadbw(a: u8x16, b: u8x16) -> u64x2; + #[link_name = "llvm.x86.sse2.psll.w"] + fn psllw(a: i16x8, count: i16x8) -> i16x8; + #[link_name = "llvm.x86.sse2.psll.d"] + fn pslld(a: i32x4, count: i32x4) -> i32x4; + #[link_name = "llvm.x86.sse2.psll.q"] + fn psllq(a: i64x2, count: i64x2) -> i64x2; + #[link_name = "llvm.x86.sse2.psra.w"] + fn psraw(a: i16x8, count: i16x8) -> i16x8; + #[link_name = "llvm.x86.sse2.psra.d"] + fn psrad(a: i32x4, count: i32x4) -> i32x4; + #[link_name = "llvm.x86.sse2.psrl.w"] + fn psrlw(a: i16x8, count: i16x8) -> i16x8; + #[link_name = "llvm.x86.sse2.psrl.d"] + fn psrld(a: i32x4, count: i32x4) -> i32x4; + #[link_name = "llvm.x86.sse2.psrl.q"] + fn psrlq(a: i64x2, count: i64x2) -> i64x2; + #[link_name = "llvm.x86.sse2.cvtps2dq"] + fn cvtps2dq(a: __m128) -> i32x4; + #[link_name = "llvm.x86.sse2.maskmov.dqu"] + fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); + #[link_name = "llvm.x86.sse2.packsswb.128"] + fn packsswb(a: i16x8, b: i16x8) -> i8x16; + #[link_name = "llvm.x86.sse2.packssdw.128"] + fn packssdw(a: i32x4, b: i32x4) -> i16x8; + #[link_name = "llvm.x86.sse2.packuswb.128"] + fn packuswb(a: i16x8, b: i16x8) -> u8x16; + #[link_name = "llvm.x86.sse2.max.sd"] + fn maxsd(a: __m128d, b: __m128d) -> __m128d; + #[link_name = "llvm.x86.sse2.max.pd"] + fn maxpd(a: __m128d, b: __m128d) -> __m128d; + #[link_name = "llvm.x86.sse2.min.sd"] + fn minsd(a: __m128d, b: __m128d) -> __m128d; + #[link_name = "llvm.x86.sse2.min.pd"] + fn minpd(a: __m128d, b: __m128d) -> __m128d; + #[link_name = "llvm.x86.sse2.cmp.sd"] + fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; + #[link_name = "llvm.x86.sse2.cmp.pd"] + fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; + #[link_name = "llvm.x86.sse2.comieq.sd"] + fn comieqsd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.comilt.sd"] + fn comiltsd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.comile.sd"] + fn comilesd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.comigt.sd"] + fn comigtsd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.comige.sd"] + fn comigesd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.comineq.sd"] + fn comineqsd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.ucomieq.sd"] + fn ucomieqsd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.ucomilt.sd"] + fn ucomiltsd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.ucomile.sd"] + fn ucomilesd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.ucomigt.sd"] + fn ucomigtsd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.ucomige.sd"] + fn ucomigesd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.ucomineq.sd"] + fn ucomineqsd(a: __m128d, b: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.cvtpd2dq"] + fn cvtpd2dq(a: __m128d) -> i32x4; + #[link_name = "llvm.x86.sse2.cvtsd2si"] + fn cvtsd2si(a: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.cvtsd2ss"] + fn cvtsd2ss(a: __m128, b: __m128d) -> __m128; + #[link_name = "llvm.x86.sse2.cvtss2sd"] + fn cvtss2sd(a: __m128d, b: __m128) -> __m128d; + #[link_name = "llvm.x86.sse2.cvttpd2dq"] + fn cvttpd2dq(a: __m128d) -> i32x4; + #[link_name = "llvm.x86.sse2.cvttsd2si"] + fn cvttsd2si(a: __m128d) -> i32; + #[link_name = "llvm.x86.sse2.cvttps2dq"] + fn cvttps2dq(a: __m128) -> i32x4; +} + +#[cfg(test)] +mod tests { + use crate::{ + core_arch::{simd::*, x86::*}, + hint::black_box, + }; + use std::{ + boxed, f32, f64, + mem::{self, transmute}, + ptr, + }; + use stdarch_test::simd_test; + + const NAN: f64 = f64::NAN; + + #[test] + fn test_mm_pause() { + unsafe { _mm_pause() } + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_clflush() { + let x = 0_u8; + _mm_clflush(ptr::addr_of!(x)); + } + + #[simd_test(enable = "sse2")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_lfence() { + _mm_lfence(); + } + + #[simd_test(enable = "sse2")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_mfence() { + _mm_mfence(); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_add_epi8() { + let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm_add_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_add_epi8_overflow() { + let a = _mm_set1_epi8(0x7F); + let b = _mm_set1_epi8(1); + let r = _mm_add_epi8(a, b); + assert_eq_m128i(r, _mm_set1_epi8(-128)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_add_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_add_epi16(a, b); + let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_add_epi32() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let b = _mm_setr_epi32(4, 5, 6, 7); + let r = _mm_add_epi32(a, b); + let e = _mm_setr_epi32(4, 6, 8, 10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_add_epi64() { + let a = _mm_setr_epi64x(0, 1); + let b = _mm_setr_epi64x(2, 3); + let r = _mm_add_epi64(a, b); + let e = _mm_setr_epi64x(2, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epi8() { + let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm_adds_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epi8_saturate_positive() { + let a = _mm_set1_epi8(0x7F); + let b = _mm_set1_epi8(1); + let r = _mm_adds_epi8(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epi8_saturate_negative() { + let a = _mm_set1_epi8(-0x80); + let b = _mm_set1_epi8(-1); + let r = _mm_adds_epi8(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_adds_epi16(a, b); + let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epi16_saturate_positive() { + let a = _mm_set1_epi16(0x7FFF); + let b = _mm_set1_epi16(1); + let r = _mm_adds_epi16(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epi16_saturate_negative() { + let a = _mm_set1_epi16(-0x8000); + let b = _mm_set1_epi16(-1); + let r = _mm_adds_epi16(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epu8() { + let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm_adds_epu8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epu8_saturate() { + let a = _mm_set1_epi8(!0); + let b = _mm_set1_epi8(1); + let r = _mm_adds_epu8(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epu16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_adds_epu16(a, b); + let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_adds_epu16_saturate() { + let a = _mm_set1_epi16(!0); + let b = _mm_set1_epi16(1); + let r = _mm_adds_epu16(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_avg_epu8() { + let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); + let r = _mm_avg_epu8(a, b); + assert_eq_m128i(r, _mm_set1_epi8(6)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_avg_epu16() { + let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); + let r = _mm_avg_epu16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(6)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_madd_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm_madd_epi16(a, b); + let e = _mm_setr_epi32(29, 81, 149, 233); + assert_eq_m128i(r, e); + + // Test large values. + // MIN*MIN+MIN*MIN will overflow into i32::MIN. + let a = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MAX, + 0, + 0, + ); + let b = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MAX, + i16::MIN, + 0, + 0, + ); + let r = _mm_madd_epi16(a, b); + let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_max_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(-1); + let r = _mm_max_epi16(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_max_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(!0); + let r = _mm_max_epu8(a, b); + assert_eq_m128i(r, b); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_min_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(-1); + let r = _mm_min_epi16(a, b); + assert_eq_m128i(r, b); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_min_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(!0); + let r = _mm_min_epu8(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_mulhi_epi16() { + let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); + let r = _mm_mulhi_epi16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(-16)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_mulhi_epu16() { + let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); + let r = _mm_mulhi_epu16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(15)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_mullo_epi16() { + let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); + let r = _mm_mullo_epi16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(-17960)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_mul_epu32() { + let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); + let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); + let r = _mm_mul_epu32(a, b); + let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sad_epu8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, + 1, 2, 3, 4, + 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, + 1, 2, 3, 4, + ); + let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); + let r = _mm_sad_epu8(a, b); + let e = _mm_setr_epi64x(1020, 614); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sub_epi8() { + let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); + let r = _mm_sub_epi8(a, b); + assert_eq_m128i(r, _mm_set1_epi8(-1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sub_epi16() { + let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); + let r = _mm_sub_epi16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(-1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sub_epi32() { + let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); + let r = _mm_sub_epi32(a, b); + assert_eq_m128i(r, _mm_set1_epi32(-1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sub_epi64() { + let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); + let r = _mm_sub_epi64(a, b); + assert_eq_m128i(r, _mm_set1_epi64x(-1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epi8() { + let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); + let r = _mm_subs_epi8(a, b); + assert_eq_m128i(r, _mm_set1_epi8(3)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epi8_saturate_positive() { + let a = _mm_set1_epi8(0x7F); + let b = _mm_set1_epi8(-1); + let r = _mm_subs_epi8(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epi8_saturate_negative() { + let a = _mm_set1_epi8(-0x80); + let b = _mm_set1_epi8(1); + let r = _mm_subs_epi8(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epi16() { + let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); + let r = _mm_subs_epi16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(3)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epi16_saturate_positive() { + let a = _mm_set1_epi16(0x7FFF); + let b = _mm_set1_epi16(-1); + let r = _mm_subs_epi16(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epi16_saturate_negative() { + let a = _mm_set1_epi16(-0x8000); + let b = _mm_set1_epi16(1); + let r = _mm_subs_epi16(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epu8() { + let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); + let r = _mm_subs_epu8(a, b); + assert_eq_m128i(r, _mm_set1_epi8(3)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epu8_saturate() { + let a = _mm_set1_epi8(0); + let b = _mm_set1_epi8(1); + let r = _mm_subs_epu8(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epu16() { + let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); + let r = _mm_subs_epu16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(3)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_subs_epu16_saturate() { + let a = _mm_set1_epi16(0); + let b = _mm_set1_epi16(1); + let r = _mm_subs_epu16(a, b); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_slli_si128() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128::<1>(a); + let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128::<15>(a); + let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + assert_eq_m128i(r, e); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128::<16>(a); + assert_eq_m128i(r, _mm_set1_epi8(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_slli_epi16() { + let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); + let r = _mm_slli_epi16::<4>(a); + assert_eq_m128i( + r, + _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), + ); + let r = _mm_slli_epi16::<16>(a); + assert_eq_m128i(r, _mm_set1_epi16(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sll_epi16() { + let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); + let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i( + r, + _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), + ); + let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16)); + assert_eq_m128i(r, _mm_set1_epi16(0)); + let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi16(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_slli_epi32() { + let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); + let r = _mm_slli_epi32::<4>(a); + assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); + let r = _mm_slli_epi32::<32>(a); + assert_eq_m128i(r, _mm_set1_epi32(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sll_epi32() { + let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); + let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); + let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32)); + assert_eq_m128i(r, _mm_set1_epi32(0)); + let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi32(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_slli_epi64() { + let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); + let r = _mm_slli_epi64::<4>(a); + assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); + let r = _mm_slli_epi64::<64>(a); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sll_epi64() { + let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); + let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); + let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64)); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_srai_epi16() { + let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); + let r = _mm_srai_epi16::<4>(a); + assert_eq_m128i( + r, + _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), + ); + let r = _mm_srai_epi16::<16>(a); + assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sra_epi16() { + let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); + let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i( + r, + _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), + ); + let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16)); + assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); + let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_srai_epi32() { + let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); + let r = _mm_srai_epi32::<4>(a); + assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); + let r = _mm_srai_epi32::<32>(a); + assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sra_epi32() { + let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); + let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); + let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32)); + assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); + let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_srli_si128() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_srli_si128::<1>(a); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, + ); + assert_eq_m128i(r, e); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_srli_si128::<15>(a); + let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_srli_si128::<16>(a); + assert_eq_m128i(r, _mm_set1_epi8(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_srli_epi16() { + let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); + let r = _mm_srli_epi16::<4>(a); + assert_eq_m128i( + r, + _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), + ); + let r = _mm_srli_epi16::<16>(a); + assert_eq_m128i(r, _mm_set1_epi16(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_srl_epi16() { + let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); + let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i( + r, + _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), + ); + let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16)); + assert_eq_m128i(r, _mm_set1_epi16(0)); + let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi16(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_srli_epi32() { + let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); + let r = _mm_srli_epi32::<4>(a); + assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); + let r = _mm_srli_epi32::<32>(a); + assert_eq_m128i(r, _mm_set1_epi32(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_srl_epi32() { + let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); + let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); + let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32)); + assert_eq_m128i(r, _mm_set1_epi32(0)); + let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi32(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_srli_epi64() { + let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); + let r = _mm_srli_epi64::<4>(a); + assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); + let r = _mm_srli_epi64::<64>(a); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_srl_epi64() { + let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); + let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); + let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64)); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_and_si128() { + let a = _mm_set1_epi8(5); + let b = _mm_set1_epi8(3); + let r = _mm_and_si128(a, b); + assert_eq_m128i(r, _mm_set1_epi8(1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_andnot_si128() { + let a = _mm_set1_epi8(5); + let b = _mm_set1_epi8(3); + let r = _mm_andnot_si128(a, b); + assert_eq_m128i(r, _mm_set1_epi8(2)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_or_si128() { + let a = _mm_set1_epi8(5); + let b = _mm_set1_epi8(3); + let r = _mm_or_si128(a, b); + assert_eq_m128i(r, _mm_set1_epi8(7)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_xor_si128() { + let a = _mm_set1_epi8(5); + let b = _mm_set1_epi8(3); + let r = _mm_xor_si128(a, b); + assert_eq_m128i(r, _mm_set1_epi8(6)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpeq_epi8() { + let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_cmpeq_epi8(a, b); + #[rustfmt::skip] + assert_eq_m128i( + r, + _mm_setr_epi8( + 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + ) + ); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpeq_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); + let r = _mm_cmpeq_epi16(a, b); + assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpeq_epi32() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let b = _mm_setr_epi32(3, 2, 2, 0); + let r = _mm_cmpeq_epi32(a, b); + assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpgt_epi8() { + let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let b = _mm_set1_epi8(0); + let r = _mm_cmpgt_epi8(a, b); + let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpgt_epi16() { + let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); + let b = _mm_set1_epi16(0); + let r = _mm_cmpgt_epi16(a, b); + let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpgt_epi32() { + let a = _mm_set_epi32(5, 0, 0, 0); + let b = _mm_set1_epi32(0); + let r = _mm_cmpgt_epi32(a, b); + assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmplt_epi8() { + let a = _mm_set1_epi8(0); + let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r = _mm_cmplt_epi8(a, b); + let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmplt_epi16() { + let a = _mm_set1_epi16(0); + let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); + let r = _mm_cmplt_epi16(a, b); + let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmplt_epi32() { + let a = _mm_set1_epi32(0); + let b = _mm_set_epi32(5, 0, 0, 0); + let r = _mm_cmplt_epi32(a, b); + assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtepi32_pd() { + let a = _mm_set_epi32(35, 25, 15, 5); + let r = _mm_cvtepi32_pd(a); + assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsi32_sd() { + let a = _mm_set1_pd(3.5); + let r = _mm_cvtsi32_sd(a, 5); + assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtepi32_ps() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm_cvtepi32_ps(a); + assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtps_epi32() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_cvtps_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsi32_si128() { + let r = _mm_cvtsi32_si128(5); + assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsi128_si32() { + let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); + assert_eq!(r, 5); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set_epi64x() { + let r = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, _mm_setr_epi64x(1, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set_epi32() { + let r = _mm_set_epi32(0, 1, 2, 3); + assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set_epi16() { + let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set_epi8() { + #[rustfmt::skip] + let r = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set1_epi64x() { + let r = _mm_set1_epi64x(1); + assert_eq_m128i(r, _mm_set1_epi64x(1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set1_epi32() { + let r = _mm_set1_epi32(1); + assert_eq_m128i(r, _mm_set1_epi32(1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set1_epi16() { + let r = _mm_set1_epi16(1); + assert_eq_m128i(r, _mm_set1_epi16(1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set1_epi8() { + let r = _mm_set1_epi8(1); + assert_eq_m128i(r, _mm_set1_epi8(1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_setr_epi32() { + let r = _mm_setr_epi32(0, 1, 2, 3); + assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_setr_epi16() { + let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_setr_epi8() { + #[rustfmt::skip] + let r = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_setzero_si128() { + let r = _mm_setzero_si128(); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadl_epi64() { + let a = _mm_setr_epi64x(6, 5); + let r = _mm_loadl_epi64(ptr::addr_of!(a)); + assert_eq_m128i(r, _mm_setr_epi64x(6, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_load_si128() { + let a = _mm_set_epi64x(5, 6); + let r = _mm_load_si128(ptr::addr_of!(a) as *const _); + assert_eq_m128i(a, r); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadu_si128() { + let a = _mm_set_epi64x(5, 6); + let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _); + assert_eq_m128i(a, r); + } + + #[simd_test(enable = "sse2")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + // (non-temporal store) + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_maskmoveu_si128() { + let a = _mm_set1_epi8(9); + #[rustfmt::skip] + let mask = _mm_set_epi8( + 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ); + let mut r = _mm_set1_epi8(0); + _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8); + let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_store_si128() { + let a = _mm_set1_epi8(9); + let mut r = _mm_set1_epi8(0); + _mm_store_si128(&mut r, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_storeu_si128() { + let a = _mm_set1_epi8(9); + let mut r = _mm_set1_epi8(0); + _mm_storeu_si128(&mut r, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_storel_epi64() { + let a = _mm_setr_epi64x(2, 9); + let mut r = _mm_set1_epi8(0); + _mm_storel_epi64(&mut r, a); + assert_eq_m128i(r, _mm_setr_epi64x(2, 0)); + } + + #[simd_test(enable = "sse2")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + // (non-temporal store) + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_stream_si128() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let mut r = _mm_undefined_si128(); + _mm_stream_si128(ptr::addr_of_mut!(r), a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "sse2")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + // (non-temporal store) + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_stream_si32() { + let a: i32 = 7; + let mut mem = boxed::Box::::new(-1); + _mm_stream_si32(ptr::addr_of_mut!(*mem), a); + assert_eq!(a, *mem); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_move_epi64() { + let a = _mm_setr_epi64x(5, 6); + let r = _mm_move_epi64(a); + assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_packs_epi16() { + let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); + let r = _mm_packs_epi16(a, b); + #[rustfmt::skip] + assert_eq_m128i( + r, + _mm_setr_epi8( + 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F + ) + ); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_packs_epi32() { + let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); + let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); + let r = _mm_packs_epi32(a, b); + assert_eq_m128i( + r, + _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF), + ); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_packus_epi16() { + let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); + let r = _mm_packus_epi16(a, b); + assert_eq_m128i( + r, + _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0), + ); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_extract_epi16() { + let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); + let r1 = _mm_extract_epi16::<0>(a); + let r2 = _mm_extract_epi16::<3>(a); + assert_eq!(r1, 0xFFFF); + assert_eq!(r2, 3); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_insert_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_insert_epi16::<0>(a, 9); + let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_movemask_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, + 0b0101, 0b1111_0000u8 as i8, 0, 0, + 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101, + 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, + ); + let r = _mm_movemask_epi8(a); + assert_eq!(r, 0b10100110_00100101); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_shuffle_epi32() { + let a = _mm_setr_epi32(5, 10, 15, 20); + let r = _mm_shuffle_epi32::<0b00_01_01_11>(a); + let e = _mm_setr_epi32(20, 10, 10, 5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_shufflehi_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); + let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a); + let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_shufflelo_epi16() { + let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); + let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a); + let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpackhi_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm_unpackhi_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpackhi_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_unpackhi_epi16(a, b); + let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpackhi_epi32() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let b = _mm_setr_epi32(4, 5, 6, 7); + let r = _mm_unpackhi_epi32(a, b); + let e = _mm_setr_epi32(2, 6, 3, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpackhi_epi64() { + let a = _mm_setr_epi64x(0, 1); + let b = _mm_setr_epi64x(2, 3); + let r = _mm_unpackhi_epi64(a, b); + let e = _mm_setr_epi64x(1, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpacklo_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm_unpacklo_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpacklo_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_unpacklo_epi16(a, b); + let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpacklo_epi32() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let b = _mm_setr_epi32(4, 5, 6, 7); + let r = _mm_unpacklo_epi32(a, b); + let e = _mm_setr_epi32(0, 4, 1, 5); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpacklo_epi64() { + let a = _mm_setr_epi64x(0, 1); + let b = _mm_setr_epi64x(2, 3); + let r = _mm_unpacklo_epi64(a, b); + let e = _mm_setr_epi64x(0, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_add_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_add_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_add_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_add_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_div_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_div_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_div_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_div_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_max_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_max_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_max_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_max_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); + + // Check SSE(2)-specific semantics for -0.0 handling. + let a = _mm_setr_pd(-0.0, 0.0); + let b = _mm_setr_pd(0.0, 0.0); + let r1: [u8; 16] = transmute(_mm_max_pd(a, b)); + let r2: [u8; 16] = transmute(_mm_max_pd(b, a)); + let a: [u8; 16] = transmute(a); + let b: [u8; 16] = transmute(b); + assert_eq!(r1, b); + assert_eq!(r2, a); + assert_ne!(a, b); // sanity check that -0.0 is actually present + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_min_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_min_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_min_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_min_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); + + // Check SSE(2)-specific semantics for -0.0 handling. + let a = _mm_setr_pd(-0.0, 0.0); + let b = _mm_setr_pd(0.0, 0.0); + let r1: [u8; 16] = transmute(_mm_min_pd(a, b)); + let r2: [u8; 16] = transmute(_mm_min_pd(b, a)); + let a: [u8; 16] = transmute(a); + let b: [u8; 16] = transmute(b); + assert_eq!(r1, b); + assert_eq!(r2, a); + assert_ne!(a, b); // sanity check that -0.0 is actually present + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_mul_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_mul_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_mul_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_mul_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sqrt_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_sqrt_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sqrt_pd() { + let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); + assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sub_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_sub_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_sub_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_sub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_and_pd() { + let a = transmute(u64x2::splat(5)); + let b = transmute(u64x2::splat(3)); + let r = _mm_and_pd(a, b); + let e = transmute(u64x2::splat(1)); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_andnot_pd() { + let a = transmute(u64x2::splat(5)); + let b = transmute(u64x2::splat(3)); + let r = _mm_andnot_pd(a, b); + let e = transmute(u64x2::splat(2)); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_or_pd() { + let a = transmute(u64x2::splat(5)); + let b = transmute(u64x2::splat(3)); + let r = _mm_or_pd(a, b); + let e = transmute(u64x2::splat(7)); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_xor_pd() { + let a = transmute(u64x2::splat(5)); + let b = transmute(u64x2::splat(3)); + let r = _mm_xor_pd(a, b); + let e = transmute(u64x2::splat(6)); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpeq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmplt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmple_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpgt_sd() { + let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpge_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpord_sd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpunord_sd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpneq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpnlt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpnle_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpngt_sd() { + let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpnge_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); + let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpeq_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 0); + let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmplt_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmple_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, !0); + let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpgt_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, 0); + let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpge_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 0); + let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpord_pd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpunord_pd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, 0); + let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpneq_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, !0); + let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpnlt_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, 0); + let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpnle_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, 0); + let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpngt_pd() { + let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cmpnge_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_comieq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comieq_sd(a, b) != 0); + + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comieq_sd(a, b) == 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_comilt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comilt_sd(a, b) == 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_comile_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comile_sd(a, b) != 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_comigt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comigt_sd(a, b) == 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_comige_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comige_sd(a, b) != 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_comineq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comineq_sd(a, b) == 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_ucomieq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomieq_sd(a, b) != 0); + + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); + assert!(_mm_ucomieq_sd(a, b) == 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_ucomilt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomilt_sd(a, b) == 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_ucomile_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomile_sd(a, b) != 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_ucomigt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomigt_sd(a, b) == 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_ucomige_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomige_sd(a, b) != 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_ucomineq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomineq_sd(a, b) == 0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_movemask_pd() { + let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); + assert_eq!(r, 0b01); + + let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); + assert_eq!(r, 0b11); + } + + #[repr(align(16))] + struct Memory { + data: [f64; 4], + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_load_pd() { + let mem = Memory { + data: [1.0f64, 2.0, 3.0, 4.0], + }; + let vals = &mem.data; + let d = vals.as_ptr(); + + let r = _mm_load_pd(d); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_load_sd() { + let a = 1.; + let expected = _mm_setr_pd(a, 0.); + let r = _mm_load_sd(&a); + assert_eq_m128d(r, expected); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadh_pd() { + let a = _mm_setr_pd(1., 2.); + let b = 3.; + let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); + let r = _mm_loadh_pd(a, &b); + assert_eq_m128d(r, expected); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadl_pd() { + let a = _mm_setr_pd(1., 2.); + let b = 3.; + let expected = _mm_setr_pd(3., get_m128d(a, 1)); + let r = _mm_loadl_pd(a, &b); + assert_eq_m128d(r, expected); + } + + #[simd_test(enable = "sse2")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + // (non-temporal store) + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_stream_pd() { + #[repr(align(128))] + struct Memory { + pub data: [f64; 2], + } + let a = _mm_set1_pd(7.0); + let mut mem = Memory { data: [-1.0; 2] }; + + _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a); + for i in 0..2 { + assert_eq!(mem.data[i], get_m128d(a, i)); + } + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_store_sd() { + let mut dest = 0.; + let a = _mm_setr_pd(1., 2.); + _mm_store_sd(&mut dest, a); + assert_eq!(dest, _mm_cvtsd_f64(a)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_store_pd() { + let mut mem = Memory { data: [0.0f64; 4] }; + let vals = &mut mem.data; + let a = _mm_setr_pd(1.0, 2.0); + let d = vals.as_mut_ptr(); + + _mm_store_pd(d, *black_box(&a)); + assert_eq!(vals[0], 1.0); + assert_eq!(vals[1], 2.0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_storeu_pd() { + let mut mem = Memory { data: [0.0f64; 4] }; + let vals = &mut mem.data; + let a = _mm_setr_pd(1.0, 2.0); + + let mut ofs = 0; + let mut p = vals.as_mut_ptr(); + + // Make sure p is **not** aligned to 16-byte boundary + if (p as usize) & 0xf == 0 { + ofs = 1; + p = p.add(1); + } + + _mm_storeu_pd(p, *black_box(&a)); + + if ofs > 0 { + assert_eq!(vals[ofs - 1], 0.0); + } + assert_eq!(vals[ofs + 0], 1.0); + assert_eq!(vals[ofs + 1], 2.0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_storeu_si16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); + _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a); + let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_storeu_si32() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let mut r = _mm_setr_epi32(5, 6, 7, 8); + _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a); + let e = _mm_setr_epi32(1, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_storeu_si64() { + let a = _mm_setr_epi64x(1, 2); + let mut r = _mm_setr_epi64x(3, 4); + _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a); + let e = _mm_setr_epi64x(1, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_store1_pd() { + let mut mem = Memory { data: [0.0f64; 4] }; + let vals = &mut mem.data; + let a = _mm_setr_pd(1.0, 2.0); + let d = vals.as_mut_ptr(); + + _mm_store1_pd(d, *black_box(&a)); + assert_eq!(vals[0], 1.0); + assert_eq!(vals[1], 1.0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_store_pd1() { + let mut mem = Memory { data: [0.0f64; 4] }; + let vals = &mut mem.data; + let a = _mm_setr_pd(1.0, 2.0); + let d = vals.as_mut_ptr(); + + _mm_store_pd1(d, *black_box(&a)); + assert_eq!(vals[0], 1.0); + assert_eq!(vals[1], 1.0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_storer_pd() { + let mut mem = Memory { data: [0.0f64; 4] }; + let vals = &mut mem.data; + let a = _mm_setr_pd(1.0, 2.0); + let d = vals.as_mut_ptr(); + + _mm_storer_pd(d, *black_box(&a)); + assert_eq!(vals[0], 2.0); + assert_eq!(vals[1], 1.0); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_storeh_pd() { + let mut dest = 0.; + let a = _mm_setr_pd(1., 2.); + _mm_storeh_pd(&mut dest, a); + assert_eq!(dest, get_m128d(a, 1)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_storel_pd() { + let mut dest = 0.; + let a = _mm_setr_pd(1., 2.); + _mm_storel_pd(&mut dest, a); + assert_eq!(dest, _mm_cvtsd_f64(a)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadr_pd() { + let mut mem = Memory { + data: [1.0f64, 2.0, 3.0, 4.0], + }; + let vals = &mut mem.data; + let d = vals.as_ptr(); + + let r = _mm_loadr_pd(d); + assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadu_pd() { + let mut mem = Memory { + data: [1.0f64, 2.0, 3.0, 4.0], + }; + let vals = &mut mem.data; + let mut d = vals.as_ptr(); + + // make sure d is not aligned to 16-byte boundary + let mut offset = 0; + if (d as usize) & 0xf == 0 { + offset = 1; + d = d.add(offset); + } + + let r = _mm_loadu_pd(d); + let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadu_si16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _); + assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadu_si32() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _); + assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadu_si64() { + let a = _mm_setr_epi64x(5, 6); + let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _); + assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtpd_ps() { + let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); + assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); + + let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); + assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); + + let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); + + let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); + assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtps_pd() { + let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); + assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); + + let r = _mm_cvtps_pd(_mm_setr_ps( + f32::MAX, + f32::INFINITY, + f32::NEG_INFINITY, + f32::MIN, + )); + assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtpd_epi32() { + let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); + assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); + + let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); + assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); + + let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); + + let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); + + let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsd_si32() { + let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); + assert_eq!(r, -2); + + let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq!(r, i32::MIN); + + let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); + assert_eq!(r, i32::MIN); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsd_ss() { + let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); + let b = _mm_setr_pd(2.0, -5.0); + + let r = _mm_cvtsd_ss(a, b); + + assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); + + let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); + let b = _mm_setr_pd(f64::INFINITY, -5.0); + + let r = _mm_cvtsd_ss(a, b); + + assert_eq_m128( + r, + _mm_setr_ps( + f32::INFINITY, + f32::NEG_INFINITY, + f32::MAX, + f32::NEG_INFINITY, + ), + ); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsd_f64() { + let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); + assert_eq!(r, -1.1); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtss_sd() { + let a = _mm_setr_pd(-1.1, 2.2); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + + let r = _mm_cvtss_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); + + let a = _mm_setr_pd(-1.1, f64::INFINITY); + let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); + + let r = _mm_cvtss_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvttpd_epi32() { + let a = _mm_setr_pd(-1.1, 2.2); + let r = _mm_cvttpd_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); + + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttpd_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvttsd_si32() { + let a = _mm_setr_pd(-1.1, 2.2); + let r = _mm_cvttsd_si32(a); + assert_eq!(r, -1); + + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttsd_si32(a); + assert_eq!(r, i32::MIN); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvttps_epi32() { + let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); + let r = _mm_cvttps_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); + + let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); + let r = _mm_cvttps_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set_sd() { + let r = _mm_set_sd(-1.0_f64); + assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set1_pd() { + let r = _mm_set1_pd(-1.0_f64); + assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set_pd1() { + let r = _mm_set_pd1(-2.0_f64); + assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_set_pd() { + let r = _mm_set_pd(1.0_f64, 5.0_f64); + assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_setr_pd() { + let r = _mm_setr_pd(1.0_f64, -5.0_f64); + assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_setzero_pd() { + let r = _mm_setzero_pd(); + assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_load1_pd() { + let d = -5.0; + let r = _mm_load1_pd(&d); + assert_eq_m128d(r, _mm_setr_pd(d, d)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_load_pd1() { + let d = -5.0; + let r = _mm_load_pd1(&d); + assert_eq_m128d(r, _mm_setr_pd(d, d)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpackhi_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(3.0, 4.0); + let r = _mm_unpackhi_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_unpacklo_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(3.0, 4.0); + let r = _mm_unpacklo_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_shuffle_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(3., 4.); + let expected = _mm_setr_pd(1., 3.); + let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b); + assert_eq_m128d(r, expected); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_move_sd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(3., 4.); + let expected = _mm_setr_pd(3., 2.); + let r = _mm_move_sd(a, b); + assert_eq_m128d(r, expected); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_castpd_ps() { + let a = _mm_set1_pd(0.); + let expected = _mm_set1_ps(0.); + let r = _mm_castpd_ps(a); + assert_eq_m128(r, expected); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_castpd_si128() { + let a = _mm_set1_pd(0.); + let expected = _mm_set1_epi64x(0); + let r = _mm_castpd_si128(a); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_castps_pd() { + let a = _mm_set1_ps(0.); + let expected = _mm_set1_pd(0.); + let r = _mm_castps_pd(a); + assert_eq_m128d(r, expected); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_castps_si128() { + let a = _mm_set1_ps(0.); + let expected = _mm_set1_epi32(0); + let r = _mm_castps_si128(a); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_castsi128_pd() { + let a = _mm_set1_epi64x(0); + let expected = _mm_set1_pd(0.); + let r = _mm_castsi128_pd(a); + assert_eq_m128d(r, expected); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_castsi128_ps() { + let a = _mm_set1_epi32(0); + let expected = _mm_set1_ps(0.); + let r = _mm_castsi128_ps(a); + assert_eq_m128(r, expected); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/sse3.rs b/library/stdarch/crates/core_arch/src/x86/sse3.rs new file mode 100644 index 000000000000..7a32cfe472d4 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/sse3.rs @@ -0,0 +1,262 @@ +//! Streaming SIMD Extensions 3 (SSE3) + +use crate::core_arch::{simd::*, x86::*}; +use crate::intrinsics::simd::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Alternatively add and subtract packed single-precision (32-bit) +/// floating-point elements in `a` to/from packed elements in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_ps) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(addsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } +} + +/// Alternatively add and subtract packed double-precision (64-bit) +/// floating-point elements in `a` to/from packed elements in `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_pd) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(addsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [2, 1]) + } +} + +/// Horizontally adds adjacent pairs of double-precision (64-bit) +/// floating-point elements in `a` and `b`, and pack the results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(haddpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { haddpd(a, b) } +} + +/// Horizontally adds adjacent pairs of single-precision (32-bit) +/// floating-point elements in `a` and `b`, and pack the results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(haddps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 { + unsafe { haddps(a, b) } +} + +/// Horizontally subtract adjacent pairs of double-precision (64-bit) +/// floating-point elements in `a` and `b`, and pack the results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(hsubpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { hsubpd(a, b) } +} + +/// Horizontally adds adjacent pairs of single-precision (32-bit) +/// floating-point elements in `a` and `b`, and pack the results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_ps) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(hsubps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { + unsafe { hsubps(a, b) } +} + +/// Loads 128-bits of integer data from unaligned memory. +/// This intrinsic may perform better than `_mm_loadu_si128` +/// when the data crosses a cache line boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(lddqu))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i { + transmute(lddqu(mem_addr as *const _)) +} + +/// Duplicate the low double-precision (64-bit) floating-point element +/// from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(movddup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_movedup_pd(a: __m128d) -> __m128d { + unsafe { simd_shuffle!(a, a, [0, 0]) } +} + +/// Loads a double-precision (64-bit) floating-point element from memory +/// into both elements of return vector. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loaddup_pd) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(movddup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d { + _mm_load1_pd(mem_addr) +} + +/// Duplicate odd-indexed single-precision (32-bit) floating-point elements +/// from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(movshdup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_movehdup_ps(a: __m128) -> __m128 { + unsafe { simd_shuffle!(a, a, [1, 1, 3, 3]) } +} + +/// Duplicate even-indexed single-precision (32-bit) floating-point elements +/// from `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps) +#[inline] +#[target_feature(enable = "sse3")] +#[cfg_attr(test, assert_instr(movsldup))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_moveldup_ps(a: __m128) -> __m128 { + unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.sse3.hadd.pd"] + fn haddpd(a: __m128d, b: __m128d) -> __m128d; + #[link_name = "llvm.x86.sse3.hadd.ps"] + fn haddps(a: __m128, b: __m128) -> __m128; + #[link_name = "llvm.x86.sse3.hsub.pd"] + fn hsubpd(a: __m128d, b: __m128d) -> __m128d; + #[link_name = "llvm.x86.sse3.hsub.ps"] + fn hsubps(a: __m128, b: __m128) -> __m128; + #[link_name = "llvm.x86.sse3.ldu.dq"] + fn lddqu(mem_addr: *const i8) -> i8x16; +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_addsub_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_addsub_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0)); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_addsub_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_addsub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0)); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_hadd_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_hadd_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0)); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_hadd_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_hadd_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0)); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_hsub_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_hsub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0)); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_hsub_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_hsub_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0)); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_lddqu_si128() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16, + ); + let r = _mm_lddqu_si128(&a); + assert_eq_m128i(a, r); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_movedup_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let r = _mm_movedup_pd(a); + assert_eq_m128d(r, _mm_setr_pd(-1.0, -1.0)); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_movehdup_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let r = _mm_movehdup_ps(a); + assert_eq_m128(r, _mm_setr_ps(5.0, 5.0, -10.0, -10.0)); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_moveldup_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let r = _mm_moveldup_ps(a); + assert_eq_m128(r, _mm_setr_ps(-1.0, -1.0, 0.0, 0.0)); + } + + #[simd_test(enable = "sse3")] + unsafe fn test_mm_loaddup_pd() { + let d = -5.0; + let r = _mm_loaddup_pd(&d); + assert_eq_m128d(r, _mm_setr_pd(d, d)); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs new file mode 100644 index 000000000000..9aa200dfc07a --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs @@ -0,0 +1,1941 @@ +//! Streaming SIMD Extensions 4.1 (SSE4.1) + +use crate::core_arch::{simd::*, x86::*}; +use crate::intrinsics::simd::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +// SSE4 rounding constants +/// round to nearest +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00; +/// round down +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01; +/// round up +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_TO_POS_INF: i32 = 0x02; +/// truncate +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_TO_ZERO: i32 = 0x03; +/// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE` +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04; +/// do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_RAISE_EXC: i32 = 0x00; +/// suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_NO_EXC: i32 = 0x08; +/// round to nearest and do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_NINT: i32 = 0x00; +/// round down and do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF; +/// round up and do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF; +/// truncate and do not suppress exceptions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO; +/// use MXCSR.RC and do not suppress exceptions; see +/// `vendor::_MM_SET_ROUNDING_MODE` +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION; +/// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE` +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION; + +/// Blend packed 8-bit integers from `a` and `b` using `mask` +/// +/// The high bit of each corresponding mask byte determines the selection. +/// If the high bit is set, the element of `b` is selected. +/// Otherwise, the element of `a` is selected. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pblendvb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { + unsafe { + let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO); + transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16())) + } +} + +/// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`. +/// +/// The mask bits determine the selection. A clear bit selects the +/// corresponding element of `a`, and a set bit the corresponding +/// element of `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi16) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_blend_epi16(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { + transmute::(simd_shuffle!( + a.as_i16x8(), + b.as_i16x8(), + [ + [0, 8][IMM8 as usize & 1], + [1, 9][(IMM8 >> 1) as usize & 1], + [2, 10][(IMM8 >> 2) as usize & 1], + [3, 11][(IMM8 >> 3) as usize & 1], + [4, 12][(IMM8 >> 4) as usize & 1], + [5, 13][(IMM8 >> 5) as usize & 1], + [6, 14][(IMM8 >> 6) as usize & 1], + [7, 15][(IMM8 >> 7) as usize & 1], + ] + )) + } +} + +/// Blend packed double-precision (64-bit) floating-point elements from `a` +/// and `b` using `mask` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_pd) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(blendvpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { + unsafe { + let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO); + transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2())) + } +} + +/// Blend packed single-precision (32-bit) floating-point elements from `a` +/// and `b` using `mask` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(blendvps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { + unsafe { + let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO); + transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4())) + } +} + +/// Blend packed double-precision (64-bit) floating-point elements from `a` +/// and `b` using control mask `IMM2` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_pd) +#[inline] +#[target_feature(enable = "sse4.1")] +// Note: LLVM7 prefers the single-precision floating-point domain when possible +// see https://bugs.llvm.org/show_bug.cgi?id=38195 +// #[cfg_attr(test, assert_instr(blendpd, IMM2 = 0b10))] +#[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_blend_pd(a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(IMM2, 2); + unsafe { + transmute::(simd_shuffle!( + a.as_f64x2(), + b.as_f64x2(), + [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]] + )) + } +} + +/// Blend packed single-precision (32-bit) floating-point elements from `a` +/// and `b` using mask `IMM4` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_blend_ps(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM4, 4); + unsafe { + transmute::(simd_shuffle!( + a.as_f32x4(), + b.as_f32x4(), + [ + [0, 4][IMM4 as usize & 1], + [1, 5][(IMM4 >> 1) as usize & 1], + [2, 6][(IMM4 >> 2) as usize & 1], + [3, 7][(IMM4 >> 3) as usize & 1], + ] + )) + } +} + +/// Extracts a single-precision (32-bit) floating-point element from `a`, +/// selected with `IMM8`. The returned `i32` stores the float's bit-pattern, +/// and may be converted back to a floating point number via casting. +/// +/// # Example +/// ```rust +/// # #[cfg(target_arch = "x86")] +/// # use std::arch::x86::*; +/// # #[cfg(target_arch = "x86_64")] +/// # use std::arch::x86_64::*; +/// # fn main() { +/// # if is_x86_feature_detected!("sse4.1") { +/// # #[target_feature(enable = "sse4.1")] +/// # #[allow(unused_unsafe)] // FIXME remove after stdarch bump in rustc +/// # unsafe fn worker() { unsafe { +/// let mut float_store = vec![1.0, 1.0, 2.0, 3.0]; +/// let simd_floats = _mm_set_ps(2.5, 5.0, 7.5, 10.0); +/// let x: i32 = _mm_extract_ps::<2>(simd_floats); +/// float_store.push(f32::from_bits(x as u32)); +/// assert_eq!(float_store, vec![1.0, 1.0, 2.0, 3.0, 5.0]); +/// # }} +/// # unsafe { worker() } +/// # } +/// # } +/// ``` +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_ps) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(extractps, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_extract_ps(a: __m128) -> i32 { + static_assert_uimm_bits!(IMM8, 2); + unsafe { simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 } +} + +/// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit +/// integer containing the zero-extended integer data. +/// +/// See [LLVM commit D20468](https://reviews.llvm.org/D20468). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi8) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_extract_epi8(a: __m128i) -> i32 { + static_assert_uimm_bits!(IMM8, 4); + unsafe { simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 } +} + +/// Extracts an 32-bit integer from `a` selected with `IMM8` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(extractps, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_extract_epi32(a: __m128i) -> i32 { + static_assert_uimm_bits!(IMM8, 2); + unsafe { simd_extract!(a.as_i32x4(), IMM8 as u32, i32) } +} + +/// Select a single value in `b` to store at some position in `a`, +/// Then zero elements according to `IMM8`. +/// +/// `IMM8` specifies which bits from operand `b` will be copied, which bits in +/// the result they will be copied to, and which bits in the result will be +/// cleared. The following assignments are made: +/// +/// * Bits `[7:6]` specify the bits to copy from operand `b`: +/// - `00`: Selects bits `[31:0]` from operand `b`. +/// - `01`: Selects bits `[63:32]` from operand `b`. +/// - `10`: Selects bits `[95:64]` from operand `b`. +/// - `11`: Selects bits `[127:96]` from operand `b`. +/// +/// * Bits `[5:4]` specify the bits in the result to which the selected bits +/// from operand `b` are copied: +/// - `00`: Copies the selected bits from `b` to result bits `[31:0]`. +/// - `01`: Copies the selected bits from `b` to result bits `[63:32]`. +/// - `10`: Copies the selected bits from `b` to result bits `[95:64]`. +/// - `11`: Copies the selected bits from `b` to result bits `[127:96]`. +/// +/// * Bits `[3:0]`: If any of these bits are set, the corresponding result +/// element is cleared. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_ps) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_insert_ps(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { insertps(a, b, IMM8 as u8) } +} + +/// Returns a copy of `a` with the 8-bit integer from `i` inserted at a +/// location specified by `IMM8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi8) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i { + static_assert_uimm_bits!(IMM8, 4); + unsafe { transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) } +} + +/// Returns a copy of `a` with the 32-bit integer from `i` inserted at a +/// location specified by `IMM8`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_insert_epi32(a: __m128i, i: i32) -> __m128i { + static_assert_uimm_bits!(IMM8, 2); + unsafe { transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) } +} + +/// Compares packed 8-bit integers in `a` and `b` and returns packed maximum +/// values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi8) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmaxsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let b = b.as_i8x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed +/// maximum. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu16) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmaxuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let b = b.as_u16x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum +/// values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmaxsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed +/// maximum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmaxud))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u32x4(); + let b = b.as_u32x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } +} + +/// Compares packed 8-bit integers in `a` and `b` and returns packed minimum +/// values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi8) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pminsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let b = b.as_i8x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed +/// minimum. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu16) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pminuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let b = b.as_u16x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum +/// values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pminsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed +/// minimum values. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pminud))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u32x4(); + let b = b.as_u32x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } +} + +/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// using unsigned saturation +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(packusdw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) } +} + +/// Compares packed 64-bit integers in `a` and `b` for equality +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pcmpeqq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } +} + +/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi16) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovsxbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute(simd_cast::<_, i16x8>(a)) + } +} + +/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovsxbd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } +} + +/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed +/// 64-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovsxbq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let a: i8x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } +} + +/// Sign extend packed 16-bit integers in `a` to packed 32-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovsxwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } +} + +/// Sign extend packed 16-bit integers in `a` to packed 64-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovsxwq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let a: i16x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } +} + +/// Sign extend packed 32-bit integers in `a` to packed 64-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovsxdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let a: i32x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } +} + +/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi16) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovzxbw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute(simd_cast::<_, i16x8>(a)) + } +} + +/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovzxbd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } +} + +/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovzxbq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let a: u8x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } +} + +/// Zeroes extend packed unsigned 16-bit integers in `a` +/// to packed 32-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovzxwd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } +} + +/// Zeroes extend packed unsigned 16-bit integers in `a` +/// to packed 64-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovzxwq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let a: u16x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } +} + +/// Zeroes extend packed unsigned 32-bit integers in `a` +/// to packed 64-bit integers +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmovzxdq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u32x4(); + let a: u32x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } +} + +/// Returns the dot product of two __m128d vectors. +/// +/// `IMM8[1:0]` is the broadcast mask, and `IMM8[5:4]` is the condition mask. +/// If a condition mask bit is zero, the corresponding multiplication is +/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of +/// the dot product will be stored in the return value component. Otherwise if +/// the broadcast mask bit is zero then the return component will be zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_pd) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(dppd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_dp_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + dppd(a, b, IMM8 as u8) + } +} + +/// Returns the dot product of two __m128 vectors. +/// +/// `IMM8[3:0]` is the broadcast mask, and `IMM8[7:4]` is the condition mask. +/// If a condition mask bit is zero, the corresponding multiplication is +/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of +/// the dot product will be stored in the return value component. Otherwise if +/// the broadcast mask bit is zero then the return component will be zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_ps) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(dpps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_dp_ps(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { dpps(a, b, IMM8 as u8) } +} + +/// Round the packed double-precision (64-bit) floating-point elements in `a` +/// down to an integer value, and stores the results as packed double-precision +/// floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_floor_pd(a: __m128d) -> __m128d { + unsafe { simd_floor(a) } +} + +/// Round the packed single-precision (32-bit) floating-point elements in `a` +/// down to an integer value, and stores the results as packed single-precision +/// floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_floor_ps(a: __m128) -> __m128 { + unsafe { simd_floor(a) } +} + +/// Round the lower double-precision (64-bit) floating-point element in `b` +/// down to an integer value, store the result as a double-precision +/// floating-point element in the lower element of the intrinsic result, +/// and copies the upper element from `a` to the upper element of the intrinsic +/// result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_sd) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { roundsd(a, b, _MM_FROUND_FLOOR) } +} + +/// Round the lower single-precision (32-bit) floating-point element in `b` +/// down to an integer value, store the result as a single-precision +/// floating-point element in the lower element of the intrinsic result, +/// and copies the upper 3 packed elements from `a` to the upper elements +/// of the intrinsic result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ss) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { + unsafe { roundss(a, b, _MM_FROUND_FLOOR) } +} + +/// Round the packed double-precision (64-bit) floating-point elements in `a` +/// up to an integer value, and stores the results as packed double-precision +/// floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundpd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ceil_pd(a: __m128d) -> __m128d { + unsafe { simd_ceil(a) } +} + +/// Round the packed single-precision (32-bit) floating-point elements in `a` +/// up to an integer value, and stores the results as packed single-precision +/// floating-point elements. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundps))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ceil_ps(a: __m128) -> __m128 { + unsafe { simd_ceil(a) } +} + +/// Round the lower double-precision (64-bit) floating-point element in `b` +/// up to an integer value, store the result as a double-precision +/// floating-point element in the lower element of the intrinsic result, +/// and copies the upper element from `a` to the upper element +/// of the intrinsic result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_sd) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { roundsd(a, b, _MM_FROUND_CEIL) } +} + +/// Round the lower single-precision (32-bit) floating-point element in `b` +/// up to an integer value, store the result as a single-precision +/// floating-point element in the lower element of the intrinsic result, +/// and copies the upper 3 packed elements from `a` to the upper elements +/// of the intrinsic result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ss) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { + unsafe { roundss(a, b, _MM_FROUND_CEIL) } +} + +/// Round the packed double-precision (64-bit) floating-point elements in `a` +/// using the `ROUNDING` parameter, and stores the results as packed +/// double-precision floating-point elements. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_round_pd(a: __m128d) -> __m128d { + static_assert_uimm_bits!(ROUNDING, 4); + unsafe { roundpd(a, ROUNDING) } +} + +/// Round the packed single-precision (32-bit) floating-point elements in `a` +/// using the `ROUNDING` parameter, and stores the results as packed +/// single-precision floating-point elements. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ps) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_round_ps(a: __m128) -> __m128 { + static_assert_uimm_bits!(ROUNDING, 4); + unsafe { roundps(a, ROUNDING) } +} + +/// Round the lower double-precision (64-bit) floating-point element in `b` +/// using the `ROUNDING` parameter, store the result as a double-precision +/// floating-point element in the lower element of the intrinsic result, +/// and copies the upper element from `a` to the upper element of the intrinsic +/// result. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_sd) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_uimm_bits!(ROUNDING, 4); + unsafe { roundsd(a, b, ROUNDING) } +} + +/// Round the lower single-precision (32-bit) floating-point element in `b` +/// using the `ROUNDING` parameter, store the result as a single-precision +/// floating-point element in the lower element of the intrinsic result, +/// and copies the upper 3 packed elements from `a` to the upper elements +/// of the intrinsic result. +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ss) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_uimm_bits!(ROUNDING, 4); + unsafe { roundss(a, b, ROUNDING) } +} + +/// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector, +/// returning a vector containing its value in its first position, and its +/// index +/// in its second position; all other elements are set to zero. +/// +/// This intrinsic corresponds to the `VPHMINPOSUW` / `PHMINPOSUW` +/// instruction. +/// +/// Arguments: +/// +/// * `a` - A 128-bit vector of type `__m128i`. +/// +/// Returns: +/// +/// A 128-bit value where: +/// +/// * bits `[15:0]` - contain the minimum value found in parameter `a`, +/// * bits `[18:16]` - contain the index of the minimum value +/// * remaining bits are set to `0`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_minpos_epu16) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(phminposuw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_minpos_epu16(a: __m128i) -> __m128i { + unsafe { transmute(phminposuw(a.as_u16x8())) } +} + +/// Multiplies the low 32-bit integers from each packed 64-bit +/// element in `a` and `b`, and returns the signed 64-bit result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmuldq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2())); + let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2())); + transmute(simd_mul(a, b)) + } +} + +/// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate +/// 64-bit integers, and returns the lowest 32-bit, whatever they might be, +/// reinterpreted as a signed integer. While `pmulld __m128i::splat(2), +/// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping +/// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would +/// return a negative number. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi32) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pmulld))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) } +} + +/// Subtracts 8-bit unsigned integer values and computes the absolute +/// values of the differences to the corresponding bits in the destination. +/// Then sums of the absolute differences are returned according to the bit +/// fields in the immediate operand. +/// +/// The following algorithm is performed: +/// +/// ```ignore +/// i = IMM8[2] * 4 +/// j = IMM8[1:0] * 4 +/// for k := 0 to 7 +/// d0 = abs(a[i + k + 0] - b[j + 0]) +/// d1 = abs(a[i + k + 1] - b[j + 1]) +/// d2 = abs(a[i + k + 2] - b[j + 2]) +/// d3 = abs(a[i + k + 3] - b[j + 3]) +/// r[k] = d0 + d1 + d2 + d3 +/// ``` +/// +/// Arguments: +/// +/// * `a` - A 128-bit vector of type `__m128i`. +/// * `b` - A 128-bit vector of type `__m128i`. +/// * `IMM8` - An 8-bit immediate operand specifying how the absolute +/// differences are to be calculated +/// * Bit `[2]` specify the offset for operand `a` +/// * Bits `[1:0]` specify the offset for operand `b` +/// +/// Returns: +/// +/// * A `__m128i` vector containing the sums of the sets of absolute +/// differences between both operands. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 3); + unsafe { transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) } +} + +/// Tests whether the specified bits in a 128-bit integer vector are all +/// zeros. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are all zeros, +/// * `0` - otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_si128) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { + unsafe { ptestz(a.as_i64x2(), mask.as_i64x2()) } +} + +/// Tests whether the specified bits in a 128-bit integer vector are all +/// ones. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are all ones, +/// * `0` - otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_si128) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { + unsafe { ptestc(a.as_i64x2(), mask.as_i64x2()) } +} + +/// Tests whether the specified bits in a 128-bit integer vector are +/// neither all zeros nor all ones. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are neither all zeros nor all ones, +/// * `0` - otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_si128) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { + unsafe { ptestnzc(a.as_i64x2(), mask.as_i64x2()) } +} + +/// Tests whether the specified bits in a 128-bit integer vector are all +/// zeros. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are all zeros, +/// * `0` - otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_zeros) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { + _mm_testz_si128(a, mask) +} + +/// Tests whether the specified bits in `a` 128-bit integer vector are all +/// ones. +/// +/// Argument: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// +/// Returns: +/// +/// * `1` - if the bits specified in the operand are all set to 1, +/// * `0` - otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_ones) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pcmpeqd))] +#[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_test_all_ones(a: __m128i) -> i32 { + _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) +} + +/// Tests whether the specified bits in a 128-bit integer vector are +/// neither all zeros nor all ones. +/// +/// Arguments: +/// +/// * `a` - A 128-bit integer vector containing the bits to be tested. +/// * `mask` - A 128-bit integer vector selecting which bits to test in +/// operand `a`. +/// +/// Returns: +/// +/// * `1` - if the specified bits are neither all zeros nor all ones, +/// * `0` - otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_mix_ones_zeros) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(ptest))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { + _mm_testnzc_si128(a, mask) +} + +/// Load 128-bits of integer data from memory into dst. mem_addr must be aligned on a 16-byte +/// boundary or a general-protection exception may be generated. To minimize caching, the data +/// is flagged as non-temporal (unlikely to be used again soon) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_load_si128) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(movntdqa))] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i { + let dst: __m128i; + crate::arch::asm!( + vpl!("movntdqa {a}"), + a = out(xmm_reg) dst, + p = in(reg) mem_addr, + options(pure, readonly, nostack, preserves_flags), + ); + dst +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.sse41.insertps"] + fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128; + #[link_name = "llvm.x86.sse41.packusdw"] + fn packusdw(a: i32x4, b: i32x4) -> u16x8; + #[link_name = "llvm.x86.sse41.dppd"] + fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; + #[link_name = "llvm.x86.sse41.dpps"] + fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128; + #[link_name = "llvm.x86.sse41.round.pd"] + fn roundpd(a: __m128d, rounding: i32) -> __m128d; + #[link_name = "llvm.x86.sse41.round.ps"] + fn roundps(a: __m128, rounding: i32) -> __m128; + #[link_name = "llvm.x86.sse41.round.sd"] + fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d; + #[link_name = "llvm.x86.sse41.round.ss"] + fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128; + #[link_name = "llvm.x86.sse41.phminposuw"] + fn phminposuw(a: u16x8) -> u16x8; + #[link_name = "llvm.x86.sse41.mpsadbw"] + fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8; + #[link_name = "llvm.x86.sse41.ptestz"] + fn ptestz(a: i64x2, mask: i64x2) -> i32; + #[link_name = "llvm.x86.sse41.ptestc"] + fn ptestc(a: i64x2, mask: i64x2) -> i32; + #[link_name = "llvm.x86.sse41.ptestnzc"] + fn ptestnzc(a: i64x2, mask: i64x2) -> i32; +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + use std::mem; + use stdarch_test::simd_test; + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_blendv_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + #[rustfmt::skip] + let mask = _mm_setr_epi8( + 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, + ); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31, + ); + assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_blendv_pd() { + let a = _mm_set1_pd(0.0); + let b = _mm_set1_pd(1.0); + let mask = transmute(_mm_setr_epi64x(0, -1)); + let r = _mm_blendv_pd(a, b, mask); + let e = _mm_setr_pd(0.0, 1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_blendv_ps() { + let a = _mm_set1_ps(0.0); + let b = _mm_set1_ps(1.0); + let mask = transmute(_mm_setr_epi32(0, -1, 0, -1)); + let r = _mm_blendv_ps(a, b, mask); + let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_blend_pd() { + let a = _mm_set1_pd(0.0); + let b = _mm_set1_pd(1.0); + let r = _mm_blend_pd::<0b10>(a, b); + let e = _mm_setr_pd(0.0, 1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_blend_ps() { + let a = _mm_set1_ps(0.0); + let b = _mm_set1_ps(1.0); + let r = _mm_blend_ps::<0b1010>(a, b); + let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_blend_epi16() { + let a = _mm_set1_epi16(0); + let b = _mm_set1_epi16(1); + let r = _mm_blend_epi16::<0b1010_1100>(a, b); + let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_extract_ps() { + let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0); + let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32); + assert_eq!(r, 1.0); + let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32); + assert_eq!(r, 3.0); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_extract_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + -1, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 + ); + let r1 = _mm_extract_epi8::<0>(a); + let r2 = _mm_extract_epi8::<3>(a); + assert_eq!(r1, 0xFF); + assert_eq!(r2, 3); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_extract_epi32() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let r = _mm_extract_epi32::<1>(a); + assert_eq!(r, 1); + let r = _mm_extract_epi32::<3>(a); + assert_eq!(r, 3); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_insert_ps() { + let a = _mm_set1_ps(1.0); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_insert_ps::<0b11_00_1100>(a, b); + let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0); + assert_eq_m128(r, e); + + // Zeroing takes precedence over copied value + let a = _mm_set1_ps(1.0); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_insert_ps::<0b11_00_0001>(a, b); + let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_insert_epi8() { + let a = _mm_set1_epi8(0); + let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r = _mm_insert_epi8::<1>(a, 32); + assert_eq_m128i(r, e); + let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0); + let r = _mm_insert_epi8::<14>(a, 32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_insert_epi32() { + let a = _mm_set1_epi32(0); + let e = _mm_setr_epi32(0, 32, 0, 0); + let r = _mm_insert_epi32::<1>(a, 32); + assert_eq_m128i(r, e); + let e = _mm_setr_epi32(0, 0, 0, 32); + let r = _mm_insert_epi32::<3>(a, 32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_max_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 4, 5, 8, 9, 12, 13, 16, + 17, 20, 21, 24, 25, 28, 29, 32, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31, + ); + let r = _mm_max_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 2, 4, 6, 8, 10, 12, 14, 16, + 18, 20, 22, 24, 26, 28, 30, 32, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_max_epu16() { + let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm_max_epu16(a, b); + let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_max_epi32() { + let a = _mm_setr_epi32(1, 4, 5, 8); + let b = _mm_setr_epi32(2, 3, 6, 7); + let r = _mm_max_epi32(a, b); + let e = _mm_setr_epi32(2, 4, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_max_epu32() { + let a = _mm_setr_epi32(1, 4, 5, 8); + let b = _mm_setr_epi32(2, 3, 6, 7); + let r = _mm_max_epu32(a, b); + let e = _mm_setr_epi32(2, 4, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_min_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 4, 5, 8, 9, 12, 13, 16, + 17, 20, 21, 24, 25, 28, 29, 32, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31, + ); + let r = _mm_min_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31, + ); + assert_eq_m128i(r, e); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, -4, -5, 8, -9, -12, 13, -16, + 17, 20, 21, 24, 25, 28, 29, 32, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 2, -3, -6, 7, -10, -11, 14, -15, + 18, 19, 22, 23, 26, 27, 30, 31, + ); + let r = _mm_min_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 1, -4, -6, 7, -10, -12, 13, -16, + 17, 19, 21, 23, 25, 27, 29, 31, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_min_epu16() { + let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm_min_epu16(a, b); + let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_min_epi32() { + let a = _mm_setr_epi32(1, 4, 5, 8); + let b = _mm_setr_epi32(2, 3, 6, 7); + let r = _mm_min_epi32(a, b); + let e = _mm_setr_epi32(1, 3, 5, 7); + assert_eq_m128i(r, e); + + let a = _mm_setr_epi32(-1, 4, 5, -7); + let b = _mm_setr_epi32(-2, 3, -6, 8); + let r = _mm_min_epi32(a, b); + let e = _mm_setr_epi32(-2, 3, -6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_min_epu32() { + let a = _mm_setr_epi32(1, 4, 5, 8); + let b = _mm_setr_epi32(2, 3, 6, 7); + let r = _mm_min_epu32(a, b); + let e = _mm_setr_epi32(1, 3, 5, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_packus_epi32() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let b = _mm_setr_epi32(-1, -2, -3, -4); + let r = _mm_packus_epi32(a, b); + let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cmpeq_epi64() { + let a = _mm_setr_epi64x(0, 1); + let b = _mm_setr_epi64x(0, 0); + let r = _mm_cmpeq_epi64(a, b); + let e = _mm_setr_epi64x(-1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepi8_epi16() { + let a = _mm_set1_epi8(10); + let r = _mm_cvtepi8_epi16(a); + let e = _mm_set1_epi16(10); + assert_eq_m128i(r, e); + let a = _mm_set1_epi8(-10); + let r = _mm_cvtepi8_epi16(a); + let e = _mm_set1_epi16(-10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepi8_epi32() { + let a = _mm_set1_epi8(10); + let r = _mm_cvtepi8_epi32(a); + let e = _mm_set1_epi32(10); + assert_eq_m128i(r, e); + let a = _mm_set1_epi8(-10); + let r = _mm_cvtepi8_epi32(a); + let e = _mm_set1_epi32(-10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepi8_epi64() { + let a = _mm_set1_epi8(10); + let r = _mm_cvtepi8_epi64(a); + let e = _mm_set1_epi64x(10); + assert_eq_m128i(r, e); + let a = _mm_set1_epi8(-10); + let r = _mm_cvtepi8_epi64(a); + let e = _mm_set1_epi64x(-10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepi16_epi32() { + let a = _mm_set1_epi16(10); + let r = _mm_cvtepi16_epi32(a); + let e = _mm_set1_epi32(10); + assert_eq_m128i(r, e); + let a = _mm_set1_epi16(-10); + let r = _mm_cvtepi16_epi32(a); + let e = _mm_set1_epi32(-10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepi16_epi64() { + let a = _mm_set1_epi16(10); + let r = _mm_cvtepi16_epi64(a); + let e = _mm_set1_epi64x(10); + assert_eq_m128i(r, e); + let a = _mm_set1_epi16(-10); + let r = _mm_cvtepi16_epi64(a); + let e = _mm_set1_epi64x(-10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepi32_epi64() { + let a = _mm_set1_epi32(10); + let r = _mm_cvtepi32_epi64(a); + let e = _mm_set1_epi64x(10); + assert_eq_m128i(r, e); + let a = _mm_set1_epi32(-10); + let r = _mm_cvtepi32_epi64(a); + let e = _mm_set1_epi64x(-10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepu8_epi16() { + let a = _mm_set1_epi8(10); + let r = _mm_cvtepu8_epi16(a); + let e = _mm_set1_epi16(10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepu8_epi32() { + let a = _mm_set1_epi8(10); + let r = _mm_cvtepu8_epi32(a); + let e = _mm_set1_epi32(10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepu8_epi64() { + let a = _mm_set1_epi8(10); + let r = _mm_cvtepu8_epi64(a); + let e = _mm_set1_epi64x(10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepu16_epi32() { + let a = _mm_set1_epi16(10); + let r = _mm_cvtepu16_epi32(a); + let e = _mm_set1_epi32(10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepu16_epi64() { + let a = _mm_set1_epi16(10); + let r = _mm_cvtepu16_epi64(a); + let e = _mm_set1_epi64x(10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_cvtepu32_epi64() { + let a = _mm_set1_epi32(10); + let r = _mm_cvtepu32_epi64(a); + let e = _mm_set1_epi64x(10); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_dp_pd() { + let a = _mm_setr_pd(2.0, 3.0); + let b = _mm_setr_pd(1.0, 4.0); + let e = _mm_setr_pd(14.0, 0.0); + assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_dp_ps() { + let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0); + let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0); + let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0); + assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_floor_pd() { + let a = _mm_setr_pd(2.5, 4.5); + let r = _mm_floor_pd(a); + let e = _mm_setr_pd(2.0, 4.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_floor_ps() { + let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); + let r = _mm_floor_ps(a); + let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_floor_sd() { + let a = _mm_setr_pd(2.5, 4.5); + let b = _mm_setr_pd(-1.5, -3.5); + let r = _mm_floor_sd(a, b); + let e = _mm_setr_pd(-2.0, 4.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_floor_ss() { + let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); + let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5); + let r = _mm_floor_ss(a, b); + let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_ceil_pd() { + let a = _mm_setr_pd(1.5, 3.5); + let r = _mm_ceil_pd(a); + let e = _mm_setr_pd(2.0, 4.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_ceil_ps() { + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let r = _mm_ceil_ps(a); + let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_ceil_sd() { + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_ceil_sd(a, b); + let e = _mm_setr_pd(-2.0, 3.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_ceil_ss() { + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5); + let r = _mm_ceil_ss(a, b); + let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_round_pd() { + let a = _mm_setr_pd(1.25, 3.75); + let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a); + let e = _mm_setr_pd(1.0, 4.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_round_ps() { + let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25); + let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a); + let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_round_sd() { + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b); + let e = _mm_setr_pd(-2.0, 3.5); + assert_eq_m128d(r, e); + + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b); + let e = _mm_setr_pd(-3.0, 3.5); + assert_eq_m128d(r, e); + + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b); + let e = _mm_setr_pd(-2.0, 3.5); + assert_eq_m128d(r, e); + + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b); + let e = _mm_setr_pd(-2.0, 3.5); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_round_ss() { + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b); + let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); + + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b); + let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); + + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b); + let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); + + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b); + let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_minpos_epu16_1() { + let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66); + let r = _mm_minpos_epu16(a); + let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_minpos_epu16_2() { + let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66); + let r = _mm_minpos_epu16(a); + let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_minpos_epu16_3() { + // Case where the minimum value is repeated + let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13); + let r = _mm_minpos_epu16(a); + let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_mul_epi32() { + { + let a = _mm_setr_epi32(1, 1, 1, 1); + let b = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm_mul_epi32(a, b); + let e = _mm_setr_epi64x(1, 3); + assert_eq_m128i(r, e); + } + { + let a = _mm_setr_epi32(15, 2 /* ignored */, 1234567, 4 /* ignored */); + let b = _mm_setr_epi32( + -20, -256, /* ignored */ + 666666, 666666, /* ignored */ + ); + let r = _mm_mul_epi32(a, b); + let e = _mm_setr_epi64x(-300, 823043843622); + assert_eq_m128i(r, e); + } + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_mullo_epi32() { + { + let a = _mm_setr_epi32(1, 1, 1, 1); + let b = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm_mullo_epi32(a, b); + let e = _mm_setr_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + { + let a = _mm_setr_epi32(15, -2, 1234567, 99999); + let b = _mm_setr_epi32(-20, -256, 666666, -99999); + let r = _mm_mullo_epi32(a, b); + // Attention, most significant bit in r[2] is treated + // as a sign bit: + // 1234567 * 666666 = -1589877210 + let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409); + assert_eq_m128i(r, e); + } + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_minpos_epu16() { + let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3); + let r = _mm_minpos_epu16(a); + let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_mpsadbw_epu8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + + let r = _mm_mpsadbw_epu8::<0b000>(a, a); + let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); + assert_eq_m128i(r, e); + + let r = _mm_mpsadbw_epu8::<0b001>(a, a); + let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12); + assert_eq_m128i(r, e); + + let r = _mm_mpsadbw_epu8::<0b100>(a, a); + let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44); + assert_eq_m128i(r, e); + + let r = _mm_mpsadbw_epu8::<0b101>(a, a); + let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); + assert_eq_m128i(r, e); + + let r = _mm_mpsadbw_epu8::<0b111>(a, a); + let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_testz_si128() { + let a = _mm_set1_epi8(1); + let mask = _mm_set1_epi8(0); + let r = _mm_testz_si128(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_testz_si128(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b011); + let mask = _mm_set1_epi8(0b100); + let r = _mm_testz_si128(a, mask); + assert_eq!(r, 1); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_testc_si128() { + let a = _mm_set1_epi8(-1); + let mask = _mm_set1_epi8(0); + let r = _mm_testc_si128(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_testc_si128(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b100); + let r = _mm_testc_si128(a, mask); + assert_eq!(r, 1); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_testnzc_si128() { + let a = _mm_set1_epi8(0); + let mask = _mm_set1_epi8(1); + let r = _mm_testnzc_si128(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(-1); + let mask = _mm_set1_epi8(0); + let r = _mm_testnzc_si128(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_testnzc_si128(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b101); + let r = _mm_testnzc_si128(a, mask); + assert_eq!(r, 0); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_test_all_zeros() { + let a = _mm_set1_epi8(1); + let mask = _mm_set1_epi8(0); + let r = _mm_test_all_zeros(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_test_all_zeros(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b011); + let mask = _mm_set1_epi8(0b100); + let r = _mm_test_all_zeros(a, mask); + assert_eq!(r, 1); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_test_all_ones() { + let a = _mm_set1_epi8(-1); + let r = _mm_test_all_ones(a); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let r = _mm_test_all_ones(a); + assert_eq!(r, 0); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_test_mix_ones_zeros() { + let a = _mm_set1_epi8(0); + let mask = _mm_set1_epi8(1); + let r = _mm_test_mix_ones_zeros(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(-1); + let mask = _mm_set1_epi8(0); + let r = _mm_test_mix_ones_zeros(a, mask); + assert_eq!(r, 0); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_test_mix_ones_zeros(a, mask); + assert_eq!(r, 1); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b101); + let r = _mm_test_mix_ones_zeros(a, mask); + assert_eq!(r, 0); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_stream_load_si128() { + let a = _mm_set_epi64x(5, 6); + let r = _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _); + assert_eq_m128i(a, r); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs new file mode 100644 index 000000000000..83c51f2b70eb --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs @@ -0,0 +1,798 @@ +//! Streaming SIMD Extensions 4.2 (SSE4.2) +//! +//! Extends SSE4.1 with STTNI (String and Text New Instructions). + +#[cfg(test)] +use stdarch_test::assert_instr; + +use crate::{ + core_arch::{simd::*, x86::*}, + intrinsics::simd::*, +}; + +/// String contains unsigned 8-bit characters *(Default)* +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000; +/// String contains unsigned 16-bit characters +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001; +/// String contains signed 8-bit characters +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010; +/// String contains unsigned 16-bit characters +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011; + +/// For each character in `a`, find if it is in `b` *(Default)* +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000; +/// For each character in `a`, determine if +/// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...` +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100; +/// The strings defined by `a` and `b` are equal +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000; +/// Search for the defined substring in the target +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100; + +/// Do not negate results *(Default)* +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000; +/// Negates results +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000; +/// Do not negate results before the end of the string +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000; +/// Negates results only before the end of the string +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000; + +/// **Index only**: return the least significant bit *(Default)* +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000; +/// **Index only**: return the most significant bit +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000; + +/// **Mask only**: return the bit mask +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_BIT_MASK: i32 = 0b0000_0000; +/// **Mask only**: return the byte mask +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000; + +/// Compares packed strings with implicit lengths in `a` and `b` using the +/// control in `IMM8`, and return the generated mask. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrm) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpistrm, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpistrm(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { transmute(pcmpistrm128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)) } +} + +/// Compares packed strings with implicit lengths in `a` and `b` using the +/// control in `IMM8` and return the generated index. Similar to +/// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the +/// lengths of `a` and `b` to be explicitly specified. +/// +/// # Control modes +/// +/// The control specified by `IMM8` may be one or more of the following. +/// +/// ## Data size and signedness +/// +/// - [`_SIDD_UBYTE_OPS`] - Default +/// - [`_SIDD_UWORD_OPS`] +/// - [`_SIDD_SBYTE_OPS`] +/// - [`_SIDD_SWORD_OPS`] +/// +/// ## Comparison options +/// - [`_SIDD_CMP_EQUAL_ANY`] - Default +/// - [`_SIDD_CMP_RANGES`] +/// - [`_SIDD_CMP_EQUAL_EACH`] +/// - [`_SIDD_CMP_EQUAL_ORDERED`] +/// +/// ## Result polarity +/// - [`_SIDD_POSITIVE_POLARITY`] - Default +/// - [`_SIDD_NEGATIVE_POLARITY`] +/// +/// ## Bit returned +/// - [`_SIDD_LEAST_SIGNIFICANT`] - Default +/// - [`_SIDD_MOST_SIGNIFICANT`] +/// +/// # Examples +/// +/// Finds a substring using [`_SIDD_CMP_EQUAL_ORDERED`] +/// +/// ``` +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("sse4.2") { +/// # #[target_feature(enable = "sse4.2")] +/// # unsafe fn worker() { +/// let haystack = b"This is a long string of text data\r\n\tthat extends +/// multiple lines"; +/// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0"; +/// +/// let a = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) }; +/// let hop = 16; +/// let mut indexes = Vec::new(); +/// +/// // Chunk the haystack into 16 byte chunks and find +/// // the first "\r\n\t" in the chunk. +/// for (i, chunk) in haystack.chunks(hop).enumerate() { +/// let b = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const _) }; +/// let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); +/// if idx != 16 { +/// indexes.push((idx as usize) + (i * hop)); +/// } +/// } +/// assert_eq!(indexes, vec![34]); +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// The `_mm_cmpistri` intrinsic may also be used to find the existence of +/// one or more of a given set of characters in the haystack. +/// +/// ``` +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("sse4.2") { +/// # #[target_feature(enable = "sse4.2")] +/// # unsafe fn worker() { +/// // Ensure your input is 16 byte aligned +/// let password = b"hunter2\0\0\0\0\0\0\0\0\0"; +/// let special_chars = b"!@#$%^&*()[]:;<>"; +/// +/// // Load the input +/// let a = unsafe { _mm_loadu_si128(special_chars.as_ptr() as *const _) }; +/// let b = unsafe { _mm_loadu_si128(password.as_ptr() as *const _) }; +/// +/// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b +/// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY); +/// +/// if idx < 16 { +/// println!("Congrats! Your password contains a special character"); +/// # panic!("{:?} does not contain a special character", password); +/// } else { +/// println!("Your password should contain a special character"); +/// } +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// Finds the index of the first character in the haystack that is within a +/// range of characters. +/// +/// ``` +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("sse4.2") { +/// # #[target_feature(enable = "sse4.2")] +/// # unsafe fn worker() { +/// # let b = b":;<=>?@[\\]^_`abc"; +/// # let b = unsafe { _mm_loadu_si128(b.as_ptr() as *const _) }; +/// +/// // Specify the ranges of values to be searched for [A-Za-z0-9]. +/// let a = b"AZaz09\0\0\0\0\0\0\0\0\0\0"; +/// let a = unsafe { _mm_loadu_si128(a.as_ptr() as *const _) }; +/// +/// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges. +/// // Which in this case will be the first alpha numeric byte found +/// // in the string. +/// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES); +/// +/// if idx < 16 { +/// println!("Found an alpha numeric character"); +/// # assert_eq!(idx, 13); +/// } else { +/// println!("Did not find an alpha numeric character"); +/// } +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// Working with 16-bit characters. +/// +/// ``` +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("sse4.2") { +/// # #[target_feature(enable = "sse4.2")] +/// # unsafe fn worker() { +/// # let mut some_utf16_words = [0u16; 8]; +/// # let mut more_utf16_words = [0u16; 8]; +/// # '❤'.encode_utf16(&mut some_utf16_words); +/// # '𝕊'.encode_utf16(&mut more_utf16_words); +/// // Load the input +/// let a = unsafe { _mm_loadu_si128(some_utf16_words.as_ptr() as *const _) }; +/// let b = unsafe { _mm_loadu_si128(more_utf16_words.as_ptr() as *const _) }; +/// +/// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and +/// // use _SIDD_CMP_EQUAL_EACH to compare the two strings. +/// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH); +/// +/// if idx == 0 { +/// println!("16-bit unicode strings were equal!"); +/// # panic!("Strings should not be equal!") +/// } else { +/// println!("16-bit unicode strings were not equal!"); +/// } +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistri) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpistri(a: __m128i, b: __m128i) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpistri128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } +} + +/// Compares packed strings with implicit lengths in `a` and `b` using the +/// control in `IMM8`, and return `1` if any character in `b` was null. +/// and `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpistrz(a: __m128i, b: __m128i) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpistriz128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } +} + +/// Compares packed strings with implicit lengths in `a` and `b` using the +/// control in `IMM8`, and return `1` if the resulting mask was non-zero, +/// and `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrc) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpistrc(a: __m128i, b: __m128i) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpistric128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } +} + +/// Compares packed strings with implicit lengths in `a` and `b` using the +/// control in `IMM8`, and returns `1` if any character in `a` was null, +/// and `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpistrs(a: __m128i, b: __m128i) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpistris128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } +} + +/// Compares packed strings with implicit lengths in `a` and `b` using the +/// control in `IMM8`, and return bit `0` of the resulting bit mask. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistro) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpistro(a: __m128i, b: __m128i) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpistrio128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } +} + +/// Compares packed strings with implicit lengths in `a` and `b` using the +/// control in `IMM8`, and return `1` if `b` did not contain a null +/// character and the resulting mask was zero, and `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistra) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpistra(a: __m128i, b: __m128i) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpistria128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } +} + +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` +/// using the control in `IMM8`, and return the generated mask. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpestrm, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpestrm(a: __m128i, la: i32, b: __m128i, lb: i32) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { transmute(pcmpestrm128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)) } +} + +/// Compares packed strings `a` and `b` with lengths `la` and `lb` using the +/// control in `IMM8` and return the generated index. Similar to +/// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly +/// determines the length of `a` and `b`. +/// +/// # Control modes +/// +/// The control specified by `IMM8` may be one or more of the following. +/// +/// ## Data size and signedness +/// +/// - [`_SIDD_UBYTE_OPS`] - Default +/// - [`_SIDD_UWORD_OPS`] +/// - [`_SIDD_SBYTE_OPS`] +/// - [`_SIDD_SWORD_OPS`] +/// +/// ## Comparison options +/// - [`_SIDD_CMP_EQUAL_ANY`] - Default +/// - [`_SIDD_CMP_RANGES`] +/// - [`_SIDD_CMP_EQUAL_EACH`] +/// - [`_SIDD_CMP_EQUAL_ORDERED`] +/// +/// ## Result polarity +/// - [`_SIDD_POSITIVE_POLARITY`] - Default +/// - [`_SIDD_NEGATIVE_POLARITY`] +/// +/// ## Bit returned +/// - [`_SIDD_LEAST_SIGNIFICANT`] - Default +/// - [`_SIDD_MOST_SIGNIFICANT`] +/// +/// # Examples +/// +/// ``` +/// #[cfg(target_arch = "x86")] +/// use std::arch::x86::*; +/// #[cfg(target_arch = "x86_64")] +/// use std::arch::x86_64::*; +/// +/// # fn main() { +/// # if is_x86_feature_detected!("sse4.2") { +/// # #[target_feature(enable = "sse4.2")] +/// # unsafe fn worker() { +/// +/// // The string we want to find a substring in +/// let haystack = b"Split \r\n\t line "; +/// +/// // The string we want to search for with some +/// // extra bytes we do not want to search for. +/// let needle = b"\r\n\t ignore this "; +/// +/// let a = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) }; +/// let b = unsafe { _mm_loadu_si128(haystack.as_ptr() as *const _) }; +/// +/// // Note: We explicitly specify we only want to search `b` for the +/// // first 3 characters of a. +/// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED); +/// +/// assert_eq!(idx, 6); +/// # } +/// # unsafe { worker(); } +/// # } +/// # } +/// ``` +/// +/// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html +/// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html +/// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html +/// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html +/// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html +/// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html +/// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html +/// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html +/// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html +/// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html +/// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html +/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html +/// [`_mm_cmpistri`]: fn._mm_cmpistri.html +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpestri128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } +} + +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` +/// using the control in `IMM8`, and return `1` if any character in +/// `b` was null, and `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrz) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpestrz(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpestriz128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } +} + +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` +/// using the control in `IMM8`, and return `1` if the resulting mask +/// was non-zero, and `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrc) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpestrc(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpestric128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } +} + +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` +/// using the control in `IMM8`, and return `1` if any character in +/// a was null, and `0` otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrs) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpestrs(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpestris128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } +} + +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` +/// using the control in `IMM8`, and return bit `0` of the resulting +/// bit mask. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestro) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpestro(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpestrio128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } +} + +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` +/// using the control in `IMM8`, and return `1` if `b` did not +/// contain a null character and the resulting mask was zero, and `0` +/// otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestra) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpestra(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pcmpestria128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } +} + +/// Starting with the initial value in `crc`, return the accumulated +/// CRC32-C value for unsigned 8-bit integer `v`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u8) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(crc32))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_crc32_u8(crc: u32, v: u8) -> u32 { + unsafe { crc32_32_8(crc, v) } +} + +/// Starting with the initial value in `crc`, return the accumulated +/// CRC32-C value for unsigned 16-bit integer `v`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u16) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(crc32))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_crc32_u16(crc: u32, v: u16) -> u32 { + unsafe { crc32_32_16(crc, v) } +} + +/// Starting with the initial value in `crc`, return the accumulated +/// CRC32-C value for unsigned 32-bit integer `v`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u32) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(crc32))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { + unsafe { crc32_32_32(crc, v) } +} + +/// Compares packed 64-bit integers in `a` and `b` for greater-than, +/// return the results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(pcmpgtq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + // SSE 4.2 string and text comparison ops + #[link_name = "llvm.x86.sse42.pcmpestrm128"] + fn pcmpestrm128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> u8x16; + #[link_name = "llvm.x86.sse42.pcmpestri128"] + fn pcmpestri128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpestriz128"] + fn pcmpestriz128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpestric128"] + fn pcmpestric128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpestris128"] + fn pcmpestris128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpestrio128"] + fn pcmpestrio128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpestria128"] + fn pcmpestria128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpistrm128"] + fn pcmpistrm128(a: i8x16, b: i8x16, imm8: i8) -> i8x16; + #[link_name = "llvm.x86.sse42.pcmpistri128"] + fn pcmpistri128(a: i8x16, b: i8x16, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpistriz128"] + fn pcmpistriz128(a: i8x16, b: i8x16, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpistric128"] + fn pcmpistric128(a: i8x16, b: i8x16, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpistris128"] + fn pcmpistris128(a: i8x16, b: i8x16, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpistrio128"] + fn pcmpistrio128(a: i8x16, b: i8x16, imm8: i8) -> i32; + #[link_name = "llvm.x86.sse42.pcmpistria128"] + fn pcmpistria128(a: i8x16, b: i8x16, imm8: i8) -> i32; + // SSE 4.2 CRC instructions + #[link_name = "llvm.x86.sse42.crc32.32.8"] + fn crc32_32_8(crc: u32, v: u8) -> u32; + #[link_name = "llvm.x86.sse42.crc32.32.16"] + fn crc32_32_16(crc: u32, v: u16) -> u32; + #[link_name = "llvm.x86.sse42.crc32.32.32"] + fn crc32_32_32(crc: u32, v: u32) -> u32; +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use std::ptr; + + // Currently one cannot `load` a &[u8] that is less than 16 + // in length. This makes loading strings less than 16 in length + // a bit difficult. Rather than `load` and mutate the __m128i, + // it is easier to memcpy the given string to a local slice with + // length 16 and `load` the local slice. + #[target_feature(enable = "sse4.2")] + unsafe fn str_to_m128i(s: &[u8]) -> __m128i { + assert!(s.len() <= 16); + let slice = &mut [0u8; 16]; + ptr::copy_nonoverlapping(s.as_ptr(), slice.as_mut_ptr(), s.len()); + _mm_loadu_si128(slice.as_ptr() as *const _) + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpistrm() { + let a = str_to_m128i(b"Hello! Good-Bye!"); + let b = str_to_m128i(b"hello! good-bye!"); + let i = _mm_cmpistrm::<_SIDD_UNIT_MASK>(a, b); + #[rustfmt::skip] + let res = _mm_setr_epi8( + 0x00, !0, !0, !0, !0, !0, !0, 0x00, + !0, !0, !0, !0, 0x00, !0, !0, !0, + ); + assert_eq_m128i(i, res); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpistri() { + let a = str_to_m128i(b"Hello"); + let b = str_to_m128i(b" Hello "); + let i = _mm_cmpistri::<_SIDD_CMP_EQUAL_ORDERED>(a, b); + assert_eq!(3, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpistrz() { + let a = str_to_m128i(b""); + let b = str_to_m128i(b"Hello"); + let i = _mm_cmpistrz::<_SIDD_CMP_EQUAL_ORDERED>(a, b); + assert_eq!(1, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpistrc() { + let a = str_to_m128i(b" "); + let b = str_to_m128i(b" ! "); + let i = _mm_cmpistrc::<_SIDD_UNIT_MASK>(a, b); + assert_eq!(1, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpistrs() { + let a = str_to_m128i(b"Hello"); + let b = str_to_m128i(b""); + let i = _mm_cmpistrs::<_SIDD_CMP_EQUAL_ORDERED>(a, b); + assert_eq!(1, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpistro() { + #[rustfmt::skip] + let a_bytes = _mm_setr_epi8( + 0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, + 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ); + #[rustfmt::skip] + let b_bytes = _mm_setr_epi8( + 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, + 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ); + let a = a_bytes; + let b = b_bytes; + let i = _mm_cmpistro::<{ _SIDD_UWORD_OPS | _SIDD_UNIT_MASK }>(a, b); + assert_eq!(0, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpistra() { + let a = str_to_m128i(b""); + let b = str_to_m128i(b"Hello!!!!!!!!!!!"); + let i = _mm_cmpistra::<_SIDD_UNIT_MASK>(a, b); + assert_eq!(1, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpestrm() { + let a = str_to_m128i(b"Hello!"); + let b = str_to_m128i(b"Hello."); + let i = _mm_cmpestrm::<_SIDD_UNIT_MASK>(a, 5, b, 5); + #[rustfmt::skip] + let r = _mm_setr_epi8( + !0, !0, !0, !0, !0, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + ); + assert_eq_m128i(i, r); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpestri() { + let a = str_to_m128i(b"bar - garbage"); + let b = str_to_m128i(b"foobar"); + let i = _mm_cmpestri::<_SIDD_CMP_EQUAL_ORDERED>(a, 3, b, 6); + assert_eq!(3, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpestrz() { + let a = str_to_m128i(b""); + let b = str_to_m128i(b"Hello"); + let i = _mm_cmpestrz::<_SIDD_CMP_EQUAL_ORDERED>(a, 16, b, 6); + assert_eq!(1, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpestrc() { + let va = str_to_m128i(b"!!!!!!!!"); + let vb = str_to_m128i(b" "); + let i = _mm_cmpestrc::<_SIDD_UNIT_MASK>(va, 7, vb, 7); + assert_eq!(0, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpestrs() { + #[rustfmt::skip] + let a_bytes = _mm_setr_epi8( + 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, + 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ); + let a = a_bytes; + let b = _mm_set1_epi8(0x00); + let i = _mm_cmpestrs::<_SIDD_UWORD_OPS>(a, 8, b, 0); + assert_eq!(0, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpestro() { + let a = str_to_m128i(b"Hello"); + let b = str_to_m128i(b"World"); + let i = _mm_cmpestro::<_SIDD_UBYTE_OPS>(a, 5, b, 5); + assert_eq!(0, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpestra() { + let a = str_to_m128i(b"Cannot match a"); + let b = str_to_m128i(b"Null after 14"); + let i = _mm_cmpestra::<{ _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK }>(a, 14, b, 16); + assert_eq!(1, i); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_crc32_u8() { + let crc = 0x2aa1e72b; + let v = 0x2a; + let i = _mm_crc32_u8(crc, v); + assert_eq!(i, 0xf24122e4); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_crc32_u16() { + let crc = 0x8ecec3b5; + let v = 0x22b; + let i = _mm_crc32_u16(crc, v); + assert_eq!(i, 0x13bb2fb); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_crc32_u32() { + let crc = 0xae2912c8; + let v = 0x845fed; + let i = _mm_crc32_u32(crc, v); + assert_eq!(i, 0xffae2ed1); + } + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_cmpgt_epi64() { + let a = _mm_setr_epi64x(0, 0x2a); + let b = _mm_set1_epi64x(0x00); + let i = _mm_cmpgt_epi64(a, b); + assert_eq_m128i(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64)); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/sse4a.rs b/library/stdarch/crates/core_arch/src/x86/sse4a.rs new file mode 100644 index 000000000000..051b77d02dfe --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/sse4a.rs @@ -0,0 +1,243 @@ +//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`) + +use crate::core_arch::{simd::*, x86::*}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.sse4a.extrq"] + fn extrq(x: i64x2, y: i8x16) -> i64x2; + #[link_name = "llvm.x86.sse4a.extrqi"] + fn extrqi(x: i64x2, len: u8, idx: u8) -> i64x2; + #[link_name = "llvm.x86.sse4a.insertq"] + fn insertq(x: i64x2, y: i64x2) -> i64x2; + #[link_name = "llvm.x86.sse4a.insertqi"] + fn insertqi(x: i64x2, y: i64x2, len: u8, idx: u8) -> i64x2; + #[link_name = "llvm.x86.sse4a.movnt.sd"] + fn movntsd(x: *mut f64, y: __m128d); + #[link_name = "llvm.x86.sse4a.movnt.ss"] + fn movntss(x: *mut f32, y: __m128); +} + +/// Extracts the bit range specified by `y` from the lower 64 bits of `x`. +/// +/// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The +/// `[5:0]` bits of `y` specify the length of the bit-range to extract. All +/// other bits are ignored. +/// +/// If the length is zero, it is interpreted as `64`. If the length and index +/// are zero, the lower 64 bits of `x` are extracted. +/// +/// If `length == 0 && index > 0` or `length + index > 64` the result is +/// undefined. +#[inline] +#[target_feature(enable = "sse4a")] +#[cfg_attr(test, assert_instr(extrq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i { + unsafe { transmute(extrq(x.as_i64x2(), y.as_i8x16())) } +} + +/// Extracts the specified bits from the lower 64 bits of the 128-bit integer vector operand at the +/// index `idx` and of the length `len`. +/// +/// `idx` specifies the index of the LSB. `len` specifies the number of bits to extract. If length +/// and index are both zero, bits `[63:0]` of parameter `x` are extracted. It is a compile-time error +/// for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero. +/// +/// Returns a 128-bit integer vector whose lower 64 bits contain the extracted bits. +#[inline] +#[target_feature(enable = "sse4a")] +#[cfg_attr(test, assert_instr(extrq, LEN = 5, IDX = 5))] +#[rustc_legacy_const_generics(1, 2)] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub fn _mm_extracti_si64(x: __m128i) -> __m128i { + // LLVM mentions that it is UB if these are not satisfied + static_assert_uimm_bits!(LEN, 6); + static_assert_uimm_bits!(IDX, 6); + static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64)); + unsafe { transmute(extrqi(x.as_i64x2(), LEN as u8, IDX as u8)) } +} + +/// Inserts the `[length:0]` bits of `y` into `x` at `index`. +/// +/// The bits of `y`: +/// +/// - `[69:64]` specify the `length`, +/// - `[77:72]` specify the index. +/// +/// If the `length` is zero it is interpreted as `64`. If `index + length > 64` +/// or `index > 0 && length == 0` the result is undefined. +#[inline] +#[target_feature(enable = "sse4a")] +#[cfg_attr(test, assert_instr(insertq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { + unsafe { transmute(insertq(x.as_i64x2(), y.as_i64x2())) } +} + +/// Inserts the `len` least-significant bits from the lower 64 bits of the 128-bit integer vector operand `y` into +/// the lower 64 bits of the 128-bit integer vector operand `x` at the index `idx` and of the length `len`. +/// +/// `idx` specifies the index of the LSB. `len` specifies the number of bits to insert. If length and index +/// are both zero, bits `[63:0]` of parameter `x` are replaced with bits `[63:0]` of parameter `y`. It is a +/// compile-time error for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero. +#[inline] +#[target_feature(enable = "sse4a")] +#[cfg_attr(test, assert_instr(insertq, LEN = 5, IDX = 5))] +#[rustc_legacy_const_generics(2, 3)] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub fn _mm_inserti_si64(x: __m128i, y: __m128i) -> __m128i { + // LLVM mentions that it is UB if these are not satisfied + static_assert_uimm_bits!(LEN, 6); + static_assert_uimm_bits!(IDX, 6); + static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64)); + unsafe { transmute(insertqi(x.as_i64x2(), y.as_i64x2(), LEN as u8, IDX as u8)) } +} + +/// Non-temporal store of `a.0` into `p`. +/// +/// Writes 64-bit data to a memory location without polluting the caches. +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "sse4a")] +#[cfg_attr(test, assert_instr(movntsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { + movntsd(p, a); +} + +/// Non-temporal store of `a.0` into `p`. +/// +/// Writes 32-bit data to a memory location without polluting the caches. +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "sse4a")] +#[cfg_attr(test, assert_instr(movntss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) { + movntss(p, a); +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "sse4a")] + unsafe fn test_mm_extract_si64() { + let b = 0b0110_0000_0000_i64; + // ^^^^ bit range extracted + let x = _mm_setr_epi64x(b, 0); + let v = 0b001000___00___000100_i64; + // ^idx: 2^3 = 8 ^length = 2^2 = 4 + let y = _mm_setr_epi64x(v, 0); + let e = _mm_setr_epi64x(0b0110_i64, 0); + let r = _mm_extract_si64(x, y); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4a")] + unsafe fn test_mm_extracti_si64() { + let a = _mm_setr_epi64x(0x0123456789abcdef, 0); + let r = _mm_extracti_si64::<8, 8>(a); + let e = _mm_setr_epi64x(0xcd, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "sse4a")] + unsafe fn test_mm_insert_si64() { + let i = 0b0110_i64; + // ^^^^ bit range inserted + let z = 0b1010_1010_1010i64; + // ^^^^ bit range replaced + let e = 0b0110_1010_1010i64; + // ^^^^ replaced 1010 with 0110 + let x = _mm_setr_epi64x(z, 0); + let expected = _mm_setr_epi64x(e, 0); + let v = 0b001000___00___000100_i64; + // ^idx: 2^3 = 8 ^length = 2^2 = 4 + let y = _mm_setr_epi64x(i, v); + let r = _mm_insert_si64(x, y); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "sse4a")] + unsafe fn test_mm_inserti_si64() { + let a = _mm_setr_epi64x(0x0123456789abcdef, 0); + let b = _mm_setr_epi64x(0x0011223344556677, 0); + let r = _mm_inserti_si64::<8, 8>(a, b); + let e = _mm_setr_epi64x(0x0123456789ab77ef, 0); + assert_eq_m128i(r, e); + } + + #[repr(align(16))] + struct MemoryF64 { + data: [f64; 2], + } + + #[simd_test(enable = "sse4a")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + // (non-temporal store) + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_stream_sd() { + let mut mem = MemoryF64 { + data: [1.0_f64, 2.0], + }; + { + let vals = &mut mem.data; + let d = vals.as_mut_ptr(); + + let x = _mm_setr_pd(3.0, 4.0); + + _mm_stream_sd(d, x); + } + assert_eq!(mem.data[0], 3.0); + assert_eq!(mem.data[1], 2.0); + } + + #[repr(align(16))] + struct MemoryF32 { + data: [f32; 4], + } + + #[simd_test(enable = "sse4a")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + // (non-temporal store) + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_stream_ss() { + let mut mem = MemoryF32 { + data: [1.0_f32, 2.0, 3.0, 4.0], + }; + { + let vals = &mut mem.data; + let d = vals.as_mut_ptr(); + + let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + + _mm_stream_ss(d, x); + } + assert_eq!(mem.data[0], 5.0); + assert_eq!(mem.data[1], 2.0); + assert_eq!(mem.data[2], 3.0); + assert_eq!(mem.data[3], 4.0); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/ssse3.rs b/library/stdarch/crates/core_arch/src/x86/ssse3.rs new file mode 100644 index 000000000000..2be182e88f4b --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/ssse3.rs @@ -0,0 +1,656 @@ +//! Supplemental Streaming SIMD Extensions 3 (SSSE3) + +use crate::{ + core_arch::{simd::*, x86::*}, + intrinsics::simd::*, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Computes the absolute value of packed 8-bit signed integers in `a` and +/// return the unsigned results. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(pabsb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_abs_epi8(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let zero = i8x16::ZERO; + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) + } +} + +/// Computes the absolute value of each of the packed 16-bit signed integers in +/// `a` and +/// return the 16-bit unsigned integer +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(pabsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_abs_epi16(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let zero = i16x8::ZERO; + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) + } +} + +/// Computes the absolute value of each of the packed 32-bit signed integers in +/// `a` and +/// return the 32-bit unsigned integer +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(pabsd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_abs_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let zero = i32x4::ZERO; + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) + } +} + +/// Shuffles bytes from `a` according to the content of `b`. +/// +/// The last 4 bits of each byte of `b` are used as addresses +/// into the 16 bytes of `a`. +/// +/// In addition, if the highest significant bit of a byte of `b` +/// is set, the respective destination byte is set to 0. +/// +/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is +/// logically equivalent to: +/// +/// ``` +/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] { +/// let mut r = [0u8; 16]; +/// for i in 0..16 { +/// // if the most significant bit of b is set, +/// // then the destination byte is set to 0. +/// if b[i] & 0x80 == 0u8 { +/// r[i] = a[(b[i] % 16) as usize]; +/// } +/// } +/// r +/// } +/// ``` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(pshufb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) } +} + +/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result, +/// shift the result right by `n` bytes, and returns the low 16 bytes. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128i { + static_assert_uimm_bits!(IMM8, 8); + // If palignr is shifting the pair of vectors more than the size of two + // lanes, emit zero. + if IMM8 > 32 { + return _mm_setzero_si128(); + } + // If palignr is shifting the pair of input vectors more than one lane, + // but less than two lanes, convert to shifting in zeroes. + let (a, b) = if IMM8 > 16 { + (_mm_setzero_si128(), a) + } else { + (a, b) + }; + const fn mask(shift: u32, i: u32) -> u32 { + if shift > 32 { + // Unused, but needs to be a valid index. + i + } else if shift > 16 { + shift - 16 + i + } else { + shift + i + } + } + unsafe { + let r: i8x16 = simd_shuffle!( + b.as_i8x16(), + a.as_i8x16(), + [ + mask(IMM8 as u32, 0), + mask(IMM8 as u32, 1), + mask(IMM8 as u32, 2), + mask(IMM8 as u32, 3), + mask(IMM8 as u32, 4), + mask(IMM8 as u32, 5), + mask(IMM8 as u32, 6), + mask(IMM8 as u32, 7), + mask(IMM8 as u32, 8), + mask(IMM8 as u32, 9), + mask(IMM8 as u32, 10), + mask(IMM8 as u32, 11), + mask(IMM8 as u32, 12), + mask(IMM8 as u32, 13), + mask(IMM8 as u32, 14), + mask(IMM8 as u32, 15), + ], + ); + transmute(r) + } +} + +/// Horizontally adds the adjacent pairs of values contained in 2 packed +/// 128-bit vectors of `[8 x i16]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(phaddw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) } +} + +/// Horizontally adds the adjacent pairs of values contained in 2 packed +/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are +/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(phaddsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) } +} + +/// Horizontally adds the adjacent pairs of values contained in 2 packed +/// 128-bit vectors of `[4 x i32]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(phaddd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) } +} + +/// Horizontally subtract the adjacent pairs of values contained in 2 +/// packed 128-bit vectors of `[8 x i16]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(phsubw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) } +} + +/// Horizontally subtract the adjacent pairs of values contained in 2 +/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than +/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are +/// saturated to 8000h. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(phsubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) } +} + +/// Horizontally subtract the adjacent pairs of values contained in 2 +/// packed 128-bit vectors of `[4 x i32]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(phsubd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) } +} + +/// Multiplies corresponding pairs of packed 8-bit unsigned integer +/// values contained in the first source operand and packed 8-bit signed +/// integer values contained in the second source operand, add pairs of +/// contiguous products with signed saturation, and writes the 16-bit sums to +/// the corresponding bits in the destination. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(pmaddubsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) } +} + +/// Multiplies packed 16-bit signed integer values, truncate the 32-bit +/// product to the 18 most significant bits by right-shifting, round the +/// truncated value by adding 1, and write bits `[16:1]` to the destination. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(pmulhrsw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) } +} + +/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit +/// integer in `b` is negative, and returns the result. +/// Elements in result are zeroed out when the corresponding element in `b` +/// is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(psignb))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(psignb128(a.as_i8x16(), b.as_i8x16())) } +} + +/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit +/// integer in `b` is negative, and returns the results. +/// Elements in result are zeroed out when the corresponding element in `b` +/// is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(psignw))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(psignw128(a.as_i16x8(), b.as_i16x8())) } +} + +/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit +/// integer in `b` is negative, and returns the results. +/// Element in result are zeroed out when the corresponding element in `b` +/// is zero. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32) +#[inline] +#[target_feature(enable = "ssse3")] +#[cfg_attr(test, assert_instr(psignd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.ssse3.pshuf.b.128"] + fn pshufb128(a: u8x16, b: u8x16) -> u8x16; + + #[link_name = "llvm.x86.ssse3.phadd.w.128"] + fn phaddw128(a: i16x8, b: i16x8) -> i16x8; + + #[link_name = "llvm.x86.ssse3.phadd.sw.128"] + fn phaddsw128(a: i16x8, b: i16x8) -> i16x8; + + #[link_name = "llvm.x86.ssse3.phadd.d.128"] + fn phaddd128(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.ssse3.phsub.w.128"] + fn phsubw128(a: i16x8, b: i16x8) -> i16x8; + + #[link_name = "llvm.x86.ssse3.phsub.sw.128"] + fn phsubsw128(a: i16x8, b: i16x8) -> i16x8; + + #[link_name = "llvm.x86.ssse3.phsub.d.128"] + fn phsubd128(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"] + fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8; + + #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"] + fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8; + + #[link_name = "llvm.x86.ssse3.psign.b.128"] + fn psignb128(a: i8x16, b: i8x16) -> i8x16; + + #[link_name = "llvm.x86.ssse3.psign.w.128"] + fn psignw128(a: i16x8, b: i16x8) -> i16x8; + + #[link_name = "llvm.x86.ssse3.psign.d.128"] + fn psignd128(a: i32x4, b: i32x4) -> i32x4; +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_abs_epi8() { + let r = _mm_abs_epi8(_mm_set1_epi8(-5)); + assert_eq_m128i(r, _mm_set1_epi8(5)); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_abs_epi16() { + let r = _mm_abs_epi16(_mm_set1_epi16(-5)); + assert_eq_m128i(r, _mm_set1_epi16(5)); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_abs_epi32() { + let r = _mm_abs_epi32(_mm_set1_epi32(-5)); + assert_eq_m128i(r, _mm_set1_epi32(5)); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_shuffle_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 4, 128_u8 as i8, 4, 3, + 24, 12, 6, 19, + 12, 5, 5, 10, + 4, 1, 8, 0, + ); + let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1); + let r = _mm_shuffle_epi8(a, b); + assert_eq_m128i(r, expected); + + // Test indices greater than 15 wrapping around + let b = _mm_add_epi8(b, _mm_set1_epi8(32)); + let r = _mm_shuffle_epi8(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_alignr_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 4, 63, 4, 3, + 24, 12, 6, 19, + 12, 5, 5, 10, + 4, 1, 8, 0, + ); + let r = _mm_alignr_epi8::<33>(a, b); + assert_eq_m128i(r, _mm_set1_epi8(0)); + + let r = _mm_alignr_epi8::<17>(a, b); + #[rustfmt::skip] + let expected = _mm_setr_epi8( + 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 0, + ); + assert_eq_m128i(r, expected); + + let r = _mm_alignr_epi8::<16>(a, b); + assert_eq_m128i(r, a); + + let r = _mm_alignr_epi8::<15>(a, b); + #[rustfmt::skip] + let expected = _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + assert_eq_m128i(r, expected); + + let r = _mm_alignr_epi8::<0>(a, b); + assert_eq_m128i(r, b); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_hadd_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25); + let r = _mm_hadd_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4); + let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4); + let expected = _mm_setr_epi16( + i16::MIN, + i16::MIN + 1, + i16::MIN + 2, + i16::MIN + 3, + i16::MAX, + i16::MAX - 1, + i16::MAX - 2, + i16::MAX - 3, + ); + let r = _mm_hadd_epi16(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_hadds_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1); + let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768); + let r = _mm_hadds_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test saturating on overflow + let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4); + let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4); + let expected = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let r = _mm_hadds_epi16(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_hadd_epi32() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let b = _mm_setr_epi32(4, 128, 4, 3); + let expected = _mm_setr_epi32(3, 7, 132, 7); + let r = _mm_hadd_epi32(a, b); + assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2); + let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2); + let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1); + let r = _mm_hadd_epi32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_hsub_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13); + let r = _mm_hsub_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4); + let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4); + let expected = _mm_setr_epi16( + i16::MIN, + i16::MIN + 1, + i16::MIN + 2, + i16::MIN + 3, + i16::MAX, + i16::MAX - 1, + i16::MAX - 2, + i16::MAX - 3, + ); + let r = _mm_hsub_epi16(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_hsubs_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1); + let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768); + let r = _mm_hsubs_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test saturating on overflow + let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4); + let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4); + let expected = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let r = _mm_hsubs_epi16(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_hsub_epi32() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let b = _mm_setr_epi32(4, 128, 4, 3); + let expected = _mm_setr_epi32(-1, -1, -124, 1); + let r = _mm_hsub_epi32(a, b); + assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2); + let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2); + let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1); + let r = _mm_hsub_epi32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_maddubs_epi16() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 4, 63, 4, 3, + 24, 12, 6, 19, + 12, 5, 5, 10, + 4, 1, 8, 0, + ); + let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120); + let r = _mm_maddubs_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test widening and saturation + #[rustfmt::skip] + let a = _mm_setr_epi8( + u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, + 100, 100, 0, 0, + 0, 0, 0, 0, 0, 0, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + i8::MAX, i8::MAX, + i8::MAX, i8::MIN, + i8::MIN, i8::MIN, + 50, 15, 0, 0, 0, + 0, 0, 0, 0, 0, + ); + let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0); + let r = _mm_maddubs_epi16(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_mulhrs_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1); + let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0); + let r = _mm_mulhrs_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test extreme values + let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0); + let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0); + let r = _mm_mulhrs_epi16(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_sign_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, -14, -15, 16, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 4, 63, -4, 3, 24, 12, -6, -19, + 12, 5, -5, 10, 4, 1, -8, 0, + ); + #[rustfmt::skip] + let expected = _mm_setr_epi8( + 1, 2, -3, 4, 5, 6, -7, -8, + 9, 10, -11, 12, 13, -14, 15, 0, + ); + let r = _mm_sign_epi8(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_sign_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8); + let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1); + let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8); + let r = _mm_sign_epi16(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test(enable = "ssse3")] + unsafe fn test_mm_sign_epi32() { + let a = _mm_setr_epi32(-1, 2, 3, 4); + let b = _mm_setr_epi32(1, -1, 1, 0); + let expected = _mm_setr_epi32(-1, -2, 3, 0); + let r = _mm_sign_epi32(a, b); + assert_eq_m128i(r, expected); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/tbm.rs b/library/stdarch/crates/core_arch/src/x86/tbm.rs new file mode 100644 index 000000000000..a245e693284f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/tbm.rs @@ -0,0 +1,225 @@ +//! Trailing Bit Manipulation (TBM) instruction set. +//! +//! The reference is [AMD64 Architecture Programmer's Manual, Volume 3: +//! General-Purpose and System Instructions][amd64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the available +//! instructions. +//! +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wikipedia_bmi]: +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +unsafe extern "C" { + #[link_name = "llvm.x86.tbm.bextri.u32"] + fn bextri_u32(a: u32, control: u32) -> u32; +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits `[7,0]` of `control` specify the index to the first bit in the range to +/// be extracted, and bits `[15,8]` specify the length of the range. For any bit +/// position in the specified range that lie beyond the MSB of the source operand, +/// zeroes will be written. If the range is empty, the result is zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub unsafe fn _bextri_u32(a: u32) -> u32 { + static_assert_uimm_bits!(CONTROL, 16); + unsafe { bextri_u32(a, CONTROL) } +} + +/// Clears all bits below the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcfill))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcfill_u32(x: u32) -> u32 { + x & (x.wrapping_add(1)) +} + +/// Sets all bits of `x` to 1 except for the least significant zero bit. +/// +/// If there is no zero bit in `x`, it sets all bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blci))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blci_u32(x: u32) -> u32 { + x | !x.wrapping_add(1) +} + +/// Sets the least significant zero bit of `x` and clears all other bits. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcic))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcic_u32(x: u32) -> u32 { + !x & x.wrapping_add(1) +} + +/// Sets the least significant zero bit of `x` and clears all bits above +/// that bit. +/// +/// If there is no zero bit in `x`, it sets all the bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcmsk_u32(x: u32) -> u32 { + x ^ x.wrapping_add(1) +} + +/// Sets the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns `x`. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcs))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcs_u32(x: u32) -> u32 { + x | x.wrapping_add(1) +} + +/// Sets all bits of `x` below the least significant one. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blsfill))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blsfill_u32(x: u32) -> u32 { + x | x.wrapping_sub(1) +} + +/// Clears least significant bit and sets all other bits. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blsic))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blsic_u32(x: u32) -> u32 { + !x | x.wrapping_sub(1) +} + +/// Clears all bits below the least significant zero of `x` and sets all other +/// bits. +/// +/// If the least significant bit of `x` is `0`, it sets all bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(t1mskc))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _t1mskc_u32(x: u32) -> u32 { + !x | x.wrapping_add(1) +} + +/// Sets all bits below the least significant one of `x` and clears all other +/// bits. +/// +/// If the least significant bit of `x` is 1, it returns zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(tzmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _tzmsk_u32(x: u32) -> u32 { + !x & x.wrapping_sub(1) +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "tbm")] + unsafe fn test_bextri_u32() { + assert_eq!(_bextri_u32::<0x0404>(0b0101_0000u32), 0b0000_0101u32); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcfill_u32() { + assert_eq!(_blcfill_u32(0b0101_0111u32), 0b0101_0000u32); + assert_eq!(_blcfill_u32(0b1111_1111u32), 0u32); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blci_u32() { + assert_eq!( + _blci_u32(0b0101_0000u32), + 0b1111_1111_1111_1111_1111_1111_1111_1110u32 + ); + assert_eq!( + _blci_u32(0b1111_1111u32), + 0b1111_1111_1111_1111_1111_1110_1111_1111u32 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcic_u32() { + assert_eq!(_blcic_u32(0b0101_0001u32), 0b0000_0010u32); + assert_eq!(_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcmsk_u32() { + assert_eq!(_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32); + assert_eq!(_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcs_u32() { + assert_eq!(_blcs_u32(0b0101_0001u32), 0b0101_0011u32); + assert_eq!(_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blsfill_u32() { + assert_eq!(_blsfill_u32(0b0101_0100u32), 0b0101_0111u32); + assert_eq!( + _blsfill_u32(0u32), + 0b1111_1111_1111_1111_1111_1111_1111_1111u32 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blsic_u32() { + assert_eq!( + _blsic_u32(0b0101_0100u32), + 0b1111_1111_1111_1111_1111_1111_1111_1011u32 + ); + assert_eq!( + _blsic_u32(0u32), + 0b1111_1111_1111_1111_1111_1111_1111_1111u32 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_t1mskc_u32() { + assert_eq!( + _t1mskc_u32(0b0101_0111u32), + 0b1111_1111_1111_1111_1111_1111_1111_1000u32 + ); + assert_eq!( + _t1mskc_u32(0u32), + 0b1111_1111_1111_1111_1111_1111_1111_1111u32 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_tzmsk_u32() { + assert_eq!(_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32); + assert_eq!(_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/test.rs b/library/stdarch/crates/core_arch/src/x86/test.rs new file mode 100644 index 000000000000..fec25ce2bc7c --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/test.rs @@ -0,0 +1,168 @@ +//! Utilities used in testing the x86 intrinsics + +use crate::core_arch::x86::*; +use std::mem::transmute; + +#[track_caller] +#[target_feature(enable = "sse2")] +pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) +} + +#[track_caller] +#[target_feature(enable = "sse2")] +pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } +} + +#[target_feature(enable = "sse2")] +pub unsafe fn get_m128d(a: __m128d, idx: usize) -> f64 { + transmute::<_, [f64; 2]>(a)[idx] +} + +#[track_caller] +#[target_feature(enable = "sse")] +pub unsafe fn assert_eq_m128(a: __m128, b: __m128) { + let r = _mm_cmpeq_ps(a, b); + if _mm_movemask_ps(r) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[target_feature(enable = "sse")] +pub unsafe fn get_m128(a: __m128, idx: usize) -> f32 { + transmute::<_, [f32; 4]>(a)[idx] +} + +#[track_caller] +#[target_feature(enable = "avx512fp16,avx512vl")] +pub unsafe fn assert_eq_m128h(a: __m128h, b: __m128h) { + let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); + if r != 0b1111_1111 { + panic!("{:?} != {:?}", a, b); + } +} + +// not actually an intrinsic but useful in various tests as we proted from +// `i64x2::new` which is backwards from `_mm_set_epi64x` +#[target_feature(enable = "sse2")] +pub unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i { + _mm_set_epi64x(b, a) +} + +#[track_caller] +#[target_feature(enable = "avx")] +pub unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) { + assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b)) +} + +#[track_caller] +#[target_feature(enable = "avx")] +pub unsafe fn assert_eq_m256d(a: __m256d, b: __m256d) { + let cmp = _mm256_cmp_pd::<_CMP_EQ_OQ>(a, b); + if _mm256_movemask_pd(cmp) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[target_feature(enable = "avx")] +pub unsafe fn get_m256d(a: __m256d, idx: usize) -> f64 { + transmute::<_, [f64; 4]>(a)[idx] +} + +#[track_caller] +#[target_feature(enable = "avx")] +pub unsafe fn assert_eq_m256(a: __m256, b: __m256) { + let cmp = _mm256_cmp_ps::<_CMP_EQ_OQ>(a, b); + if _mm256_movemask_ps(cmp) != 0b11111111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[target_feature(enable = "avx")] +pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 { + transmute::<_, [f32; 8]>(a)[idx] +} + +#[track_caller] +#[target_feature(enable = "avx512fp16,avx512vl")] +pub unsafe fn assert_eq_m256h(a: __m256h, b: __m256h) { + let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); + if r != 0b11111111_11111111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[target_feature(enable = "avx512f")] +pub unsafe fn get_m512(a: __m512, idx: usize) -> f32 { + transmute::<_, [f32; 16]>(a)[idx] +} + +#[target_feature(enable = "avx512f")] +pub unsafe fn get_m512d(a: __m512d, idx: usize) -> f64 { + transmute::<_, [f64; 8]>(a)[idx] +} + +#[target_feature(enable = "avx512f")] +pub unsafe fn get_m512i(a: __m512i, idx: usize) -> i64 { + transmute::<_, [i64; 8]>(a)[idx] +} + +// These intrinsics doesn't exist on x86 b/c it requires a 64-bit register, +// which doesn't exist on x86! +#[cfg(target_arch = "x86")] +mod x86_polyfill { + use crate::core_arch::x86::*; + use crate::intrinsics::simd::*; + + #[rustc_legacy_const_generics(2)] + pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64) -> __m128i { + static_assert_uimm_bits!(INDEX, 1); + transmute(simd_insert!(a.as_i64x2(), INDEX as u32, val)) + } + + #[target_feature(enable = "avx2")] + #[rustc_legacy_const_generics(2)] + pub unsafe fn _mm256_insert_epi64(a: __m256i, val: i64) -> __m256i { + static_assert_uimm_bits!(INDEX, 2); + transmute(simd_insert!(a.as_i64x4(), INDEX as u32, val)) + } +} + +#[cfg(target_arch = "x86_64")] +mod x86_polyfill { + pub use crate::core_arch::x86_64::{_mm_insert_epi64, _mm256_insert_epi64}; +} +pub use self::x86_polyfill::*; + +#[track_caller] +pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { + assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b)) +} + +#[track_caller] +pub unsafe fn assert_eq_m512(a: __m512, b: __m512) { + let cmp = _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b); + if cmp != 0b11111111_11111111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +pub unsafe fn assert_eq_m512d(a: __m512d, b: __m512d) { + let cmp = _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b); + if cmp != 0b11111111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "avx512fp16")] +pub unsafe fn assert_eq_m512h(a: __m512h, b: __m512h) { + let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); + if r != 0b11111111_11111111_11111111_11111111 { + panic!("{:?} != {:?}", a, b); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/vaes.rs b/library/stdarch/crates/core_arch/src/x86/vaes.rs new file mode 100644 index 000000000000..b1fe193e3f5d --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/vaes.rs @@ -0,0 +1,340 @@ +//! Vectorized AES Instructions (VAES) +//! +//! The intrinsics here correspond to those in the `immintrin.h` C header. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +use crate::core_arch::x86::__m256i; +use crate::core_arch::x86::__m512i; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.aesni.aesenc.256"] + fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i; + #[link_name = "llvm.x86.aesni.aesenclast.256"] + fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i; + #[link_name = "llvm.x86.aesni.aesdec.256"] + fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i; + #[link_name = "llvm.x86.aesni.aesdeclast.256"] + fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i; + #[link_name = "llvm.x86.aesni.aesenc.512"] + fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i; + #[link_name = "llvm.x86.aesni.aesenclast.512"] + fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i; + #[link_name = "llvm.x86.aesni.aesdec.512"] + fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i; + #[link_name = "llvm.x86.aesni.aesdeclast.512"] + fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i; +} + +/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using +/// the corresponding 128-bit word (key) in `round_key`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesenc_epi128) +#[inline] +#[target_feature(enable = "vaes")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaesenc))] +pub fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i { + unsafe { aesenc_256(a, round_key) } +} + +/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using +/// the corresponding 128-bit word (key) in `round_key`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesenclast_epi128) +#[inline] +#[target_feature(enable = "vaes")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaesenclast))] +pub fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i { + unsafe { aesenclast_256(a, round_key) } +} + +/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using +/// the corresponding 128-bit word (key) in `round_key`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesdec_epi128) +#[inline] +#[target_feature(enable = "vaes")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaesdec))] +pub fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i { + unsafe { aesdec_256(a, round_key) } +} + +/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using +/// the corresponding 128-bit word (key) in `round_key`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesdeclast_epi128) +#[inline] +#[target_feature(enable = "vaes")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaesdeclast))] +pub fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i { + unsafe { aesdeclast_256(a, round_key) } +} + +/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using +/// the corresponding 128-bit word (key) in `round_key`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesenc_epi128) +#[inline] +#[target_feature(enable = "vaes,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaesenc))] +pub fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i { + unsafe { aesenc_512(a, round_key) } +} + +/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using +/// the corresponding 128-bit word (key) in `round_key`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesenclast_epi128) +#[inline] +#[target_feature(enable = "vaes,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaesenclast))] +pub fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i { + unsafe { aesenclast_512(a, round_key) } +} + +/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using +/// the corresponding 128-bit word (key) in `round_key`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesdec_epi128) +#[inline] +#[target_feature(enable = "vaes,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaesdec))] +pub fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i { + unsafe { aesdec_512(a, round_key) } +} + +/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using +/// the corresponding 128-bit word (key) in `round_key`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesdeclast_epi128) +#[inline] +#[target_feature(enable = "vaes,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vaesdeclast))] +pub fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i { + unsafe { aesdeclast_512(a, round_key) } +} + +#[cfg(test)] +mod tests { + // The constants in the tests below are just bit patterns. They should not + // be interpreted as integers; signedness does not make sense for them, but + // __mXXXi happens to be defined in terms of signed integers. + #![allow(overflowing_literals)] + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + // the first parts of these tests are straight ports from the AES-NI tests + // the second parts directly compare the two, for inputs that are different across lanes + // and "more random" than the standard test vectors + // ideally we'd be using quickcheck here instead + + #[target_feature(enable = "avx2")] + unsafe fn helper_for_256_vaes( + linear: unsafe fn(__m128i, __m128i) -> __m128i, + vectorized: unsafe fn(__m256i, __m256i) -> __m256i, + ) { + let a = _mm256_set_epi64x( + 0xDCB4DB3657BF0B7D, + 0x18DB0601068EDD9F, + 0xB76B908233200DC5, + 0xE478235FA8E22D5E, + ); + let k = _mm256_set_epi64x( + 0x672F6F105A94CEA7, + 0x8298B8FFCA5F829C, + 0xA3927047B3FB61D8, + 0x978093862CDE7187, + ); + let mut a_decomp = [_mm_setzero_si128(); 2]; + a_decomp[0] = _mm256_extracti128_si256::<0>(a); + a_decomp[1] = _mm256_extracti128_si256::<1>(a); + let mut k_decomp = [_mm_setzero_si128(); 2]; + k_decomp[0] = _mm256_extracti128_si256::<0>(k); + k_decomp[1] = _mm256_extracti128_si256::<1>(k); + let r = vectorized(a, k); + let mut e_decomp = [_mm_setzero_si128(); 2]; + for i in 0..2 { + e_decomp[i] = linear(a_decomp[i], k_decomp[i]); + } + assert_eq_m128i(_mm256_extracti128_si256::<0>(r), e_decomp[0]); + assert_eq_m128i(_mm256_extracti128_si256::<1>(r), e_decomp[1]); + } + + #[target_feature(enable = "sse2")] + unsafe fn setup_state_key(broadcast: unsafe fn(__m128i) -> T) -> (T, T) { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. + let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); + let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); + (broadcast(a), broadcast(k)) + } + + #[target_feature(enable = "avx2")] + unsafe fn setup_state_key_256() -> (__m256i, __m256i) { + setup_state_key(_mm256_broadcastsi128_si256) + } + + #[target_feature(enable = "avx512f")] + unsafe fn setup_state_key_512() -> (__m512i, __m512i) { + setup_state_key(_mm512_broadcast_i32x4) + } + + #[simd_test(enable = "vaes,avx512vl")] + unsafe fn test_mm256_aesdec_epi128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. + let (a, k) = setup_state_key_256(); + let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee); + let e = _mm256_broadcastsi128_si256(e); + let r = _mm256_aesdec_epi128(a, k); + assert_eq_m256i(r, e); + + helper_for_256_vaes(_mm_aesdec_si128, _mm256_aesdec_epi128); + } + + #[simd_test(enable = "vaes,avx512vl")] + unsafe fn test_mm256_aesdeclast_epi128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. + let (a, k) = setup_state_key_256(); + let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493); + let e = _mm256_broadcastsi128_si256(e); + let r = _mm256_aesdeclast_epi128(a, k); + assert_eq_m256i(r, e); + + helper_for_256_vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128); + } + + #[simd_test(enable = "vaes,avx512vl")] + unsafe fn test_mm256_aesenc_epi128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. + // they are repeated appropriately + let (a, k) = setup_state_key_256(); + let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333); + let e = _mm256_broadcastsi128_si256(e); + let r = _mm256_aesenc_epi128(a, k); + assert_eq_m256i(r, e); + + helper_for_256_vaes(_mm_aesenc_si128, _mm256_aesenc_epi128); + } + + #[simd_test(enable = "vaes,avx512vl")] + unsafe fn test_mm256_aesenclast_epi128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. + let (a, k) = setup_state_key_256(); + let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8); + let e = _mm256_broadcastsi128_si256(e); + let r = _mm256_aesenclast_epi128(a, k); + assert_eq_m256i(r, e); + + helper_for_256_vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128); + } + + #[target_feature(enable = "avx512f")] + unsafe fn helper_for_512_vaes( + linear: unsafe fn(__m128i, __m128i) -> __m128i, + vectorized: unsafe fn(__m512i, __m512i) -> __m512i, + ) { + let a = _mm512_set_epi64( + 0xDCB4DB3657BF0B7D, + 0x18DB0601068EDD9F, + 0xB76B908233200DC5, + 0xE478235FA8E22D5E, + 0xAB05CFFA2621154C, + 0x1171B47A186174C9, + 0x8C6B6C0E7595CEC9, + 0xBE3E7D4934E961BD, + ); + let k = _mm512_set_epi64( + 0x672F6F105A94CEA7, + 0x8298B8FFCA5F829C, + 0xA3927047B3FB61D8, + 0x978093862CDE7187, + 0xB1927AB22F31D0EC, + 0xA9A5DA619BE4D7AF, + 0xCA2590F56884FDC6, + 0x19BE9F660038BDB5, + ); + let mut a_decomp = [_mm_setzero_si128(); 4]; + a_decomp[0] = _mm512_extracti32x4_epi32::<0>(a); + a_decomp[1] = _mm512_extracti32x4_epi32::<1>(a); + a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a); + a_decomp[3] = _mm512_extracti32x4_epi32::<3>(a); + let mut k_decomp = [_mm_setzero_si128(); 4]; + k_decomp[0] = _mm512_extracti32x4_epi32::<0>(k); + k_decomp[1] = _mm512_extracti32x4_epi32::<1>(k); + k_decomp[2] = _mm512_extracti32x4_epi32::<2>(k); + k_decomp[3] = _mm512_extracti32x4_epi32::<3>(k); + let r = vectorized(a, k); + let mut e_decomp = [_mm_setzero_si128(); 4]; + for i in 0..4 { + e_decomp[i] = linear(a_decomp[i], k_decomp[i]); + } + assert_eq_m128i(_mm512_extracti32x4_epi32::<0>(r), e_decomp[0]); + assert_eq_m128i(_mm512_extracti32x4_epi32::<1>(r), e_decomp[1]); + assert_eq_m128i(_mm512_extracti32x4_epi32::<2>(r), e_decomp[2]); + assert_eq_m128i(_mm512_extracti32x4_epi32::<3>(r), e_decomp[3]); + } + + #[simd_test(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesdec_epi128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. + let (a, k) = setup_state_key_512(); + let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee); + let e = _mm512_broadcast_i32x4(e); + let r = _mm512_aesdec_epi128(a, k); + assert_eq_m512i(r, e); + + helper_for_512_vaes(_mm_aesdec_si128, _mm512_aesdec_epi128); + } + + #[simd_test(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesdeclast_epi128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. + let (a, k) = setup_state_key_512(); + let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493); + let e = _mm512_broadcast_i32x4(e); + let r = _mm512_aesdeclast_epi128(a, k); + assert_eq_m512i(r, e); + + helper_for_512_vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128); + } + + #[simd_test(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesenc_epi128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. + let (a, k) = setup_state_key_512(); + let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333); + let e = _mm512_broadcast_i32x4(e); + let r = _mm512_aesenc_epi128(a, k); + assert_eq_m512i(r, e); + + helper_for_512_vaes(_mm_aesenc_si128, _mm512_aesenc_epi128); + } + + #[simd_test(enable = "vaes,avx512f")] + unsafe fn test_mm512_aesenclast_epi128() { + // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. + let (a, k) = setup_state_key_512(); + let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8); + let e = _mm512_broadcast_i32x4(e); + let r = _mm512_aesenclast_epi128(a, k); + assert_eq_m512i(r, e); + + helper_for_512_vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs new file mode 100644 index 000000000000..b1f23bd2f45c --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs @@ -0,0 +1,260 @@ +//! Vectorized Carry-less Multiplication (VCLMUL) +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241). +//! +//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +use crate::core_arch::x86::__m256i; +use crate::core_arch::x86::__m512i; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.pclmulqdq.256"] + fn pclmulqdq_256(a: __m256i, round_key: __m256i, imm8: u8) -> __m256i; + #[link_name = "llvm.x86.pclmulqdq.512"] + fn pclmulqdq_512(a: __m512i, round_key: __m512i, imm8: u8) -> __m512i; +} + +// for some odd reason on x86_64 we generate the correct long name instructions +// but on i686 we generate the short name + imm8 +// so we need to special-case on that... + +/// Performs a carry-less multiplication of two 64-bit polynomials over the +/// finite field GF(2) - in each of the 4 128-bit lanes. +/// +/// The immediate byte is used for determining which halves of each lane `a` and `b` +/// should be used. Immediate bits other than 0 and 4 are ignored. +/// All lanes share immediate byte. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_clmulepi64_epi128) +#[inline] +#[target_feature(enable = "vpclmulqdq,avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +// technically according to Intel's documentation we don't need avx512f here, however LLVM gets confused otherwise +#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm512_clmulepi64_epi128(a: __m512i, b: __m512i) -> __m512i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pclmulqdq_512(a, b, IMM8 as u8) } +} + +/// Performs a carry-less multiplication of two 64-bit polynomials over the +/// finite field GF(2) - in each of the 2 128-bit lanes. +/// +/// The immediate byte is used for determining which halves of each lane `a` and `b` +/// should be used. Immediate bits other than 0 and 4 are ignored. +/// All lanes share immediate byte. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_clmulepi64_epi128) +#[inline] +#[target_feature(enable = "vpclmulqdq")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub fn _mm256_clmulepi64_epi128(a: __m256i, b: __m256i) -> __m256i { + static_assert_uimm_bits!(IMM8, 8); + unsafe { pclmulqdq_256(a, b, IMM8 as u8) } +} + +#[cfg(test)] +mod tests { + // The constants in the tests below are just bit patterns. They should not + // be interpreted as integers; signedness does not make sense for them, but + // __mXXXi happens to be defined in terms of signed integers. + #![allow(overflowing_literals)] + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + macro_rules! verify_kat_pclmul { + ($broadcast:ident, $clmul:ident, $assert:ident) => { + // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf + let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d); + let a = $broadcast(a); + let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d); + let b = $broadcast(b); + let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451); + let r00 = $broadcast(r00); + let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315); + let r01 = $broadcast(r01); + let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9); + let r10 = $broadcast(r10); + let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed); + let r11 = $broadcast(r11); + + $assert($clmul::<0x00>(a, b), r00); + $assert($clmul::<0x10>(a, b), r01); + $assert($clmul::<0x01>(a, b), r10); + $assert($clmul::<0x11>(a, b), r11); + + let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000); + let a0 = $broadcast(a0); + let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000); + let r = $broadcast(r); + $assert($clmul::<0x00>(a0, a0), r); + } + } + + macro_rules! unroll { + ($target:ident[4] = $op:ident::<4>($source:ident);) => { + $target[3] = $op::<3>($source); + $target[2] = $op::<2>($source); + unroll! {$target[2] = $op::<2>($source);} + }; + ($target:ident[2] = $op:ident::<2>($source:ident);) => { + $target[1] = $op::<1>($source); + $target[0] = $op::<0>($source); + }; + (assert_eq_m128i($op:ident::<4>($vec_res:ident),$lin_res:ident[4]);) => { + assert_eq_m128i($op::<3>($vec_res), $lin_res[3]); + assert_eq_m128i($op::<2>($vec_res), $lin_res[2]); + unroll! {assert_eq_m128i($op::<2>($vec_res),$lin_res[2]);} + }; + (assert_eq_m128i($op:ident::<2>($vec_res:ident),$lin_res:ident[2]);) => { + assert_eq_m128i($op::<1>($vec_res), $lin_res[1]); + assert_eq_m128i($op::<0>($vec_res), $lin_res[0]); + }; + } + + // this function tests one of the possible 4 instances + // with different inputs across lanes + #[target_feature(enable = "vpclmulqdq,avx512f")] + unsafe fn verify_512_helper( + linear: unsafe fn(__m128i, __m128i) -> __m128i, + vectorized: unsafe fn(__m512i, __m512i) -> __m512i, + ) { + let a = _mm512_set_epi64( + 0xDCB4DB3657BF0B7D, + 0x18DB0601068EDD9F, + 0xB76B908233200DC5, + 0xE478235FA8E22D5E, + 0xAB05CFFA2621154C, + 0x1171B47A186174C9, + 0x8C6B6C0E7595CEC9, + 0xBE3E7D4934E961BD, + ); + let b = _mm512_set_epi64( + 0x672F6F105A94CEA7, + 0x8298B8FFCA5F829C, + 0xA3927047B3FB61D8, + 0x978093862CDE7187, + 0xB1927AB22F31D0EC, + 0xA9A5DA619BE4D7AF, + 0xCA2590F56884FDC6, + 0x19BE9F660038BDB5, + ); + + let mut a_decomp = [_mm_setzero_si128(); 4]; + unroll! {a_decomp[4] = _mm512_extracti32x4_epi32::<4>(a);} + let mut b_decomp = [_mm_setzero_si128(); 4]; + unroll! {b_decomp[4] = _mm512_extracti32x4_epi32::<4>(b);} + + let r = vectorized(a, b); + let mut e_decomp = [_mm_setzero_si128(); 4]; + for i in 0..4 { + e_decomp[i] = linear(a_decomp[i], b_decomp[i]); + } + unroll! {assert_eq_m128i(_mm512_extracti32x4_epi32::<4>(r),e_decomp[4]);} + } + + // this function tests one of the possible 4 instances + // with different inputs across lanes for the VL version + #[target_feature(enable = "vpclmulqdq,avx512vl")] + unsafe fn verify_256_helper( + linear: unsafe fn(__m128i, __m128i) -> __m128i, + vectorized: unsafe fn(__m256i, __m256i) -> __m256i, + ) { + let a = _mm512_set_epi64( + 0xDCB4DB3657BF0B7D, + 0x18DB0601068EDD9F, + 0xB76B908233200DC5, + 0xE478235FA8E22D5E, + 0xAB05CFFA2621154C, + 0x1171B47A186174C9, + 0x8C6B6C0E7595CEC9, + 0xBE3E7D4934E961BD, + ); + let b = _mm512_set_epi64( + 0x672F6F105A94CEA7, + 0x8298B8FFCA5F829C, + 0xA3927047B3FB61D8, + 0x978093862CDE7187, + 0xB1927AB22F31D0EC, + 0xA9A5DA619BE4D7AF, + 0xCA2590F56884FDC6, + 0x19BE9F660038BDB5, + ); + + let mut a_decomp = [_mm_setzero_si128(); 2]; + unroll! {a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a);} + let mut b_decomp = [_mm_setzero_si128(); 2]; + unroll! {b_decomp[2] = _mm512_extracti32x4_epi32::<2>(b);} + + let r = vectorized( + _mm512_extracti64x4_epi64::<0>(a), + _mm512_extracti64x4_epi64::<0>(b), + ); + let mut e_decomp = [_mm_setzero_si128(); 2]; + for i in 0..2 { + e_decomp[i] = linear(a_decomp[i], b_decomp[i]); + } + unroll! {assert_eq_m128i(_mm256_extracti128_si256::<2>(r),e_decomp[2]);} + } + + #[simd_test(enable = "vpclmulqdq,avx512f")] + unsafe fn test_mm512_clmulepi64_epi128() { + verify_kat_pclmul!( + _mm512_broadcast_i32x4, + _mm512_clmulepi64_epi128, + assert_eq_m512i + ); + + verify_512_helper( + |a, b| _mm_clmulepi64_si128::<0x00>(a, b), + |a, b| _mm512_clmulepi64_epi128::<0x00>(a, b), + ); + verify_512_helper( + |a, b| _mm_clmulepi64_si128::<0x01>(a, b), + |a, b| _mm512_clmulepi64_epi128::<0x01>(a, b), + ); + verify_512_helper( + |a, b| _mm_clmulepi64_si128::<0x10>(a, b), + |a, b| _mm512_clmulepi64_epi128::<0x10>(a, b), + ); + verify_512_helper( + |a, b| _mm_clmulepi64_si128::<0x11>(a, b), + |a, b| _mm512_clmulepi64_epi128::<0x11>(a, b), + ); + } + + #[simd_test(enable = "vpclmulqdq,avx512vl")] + unsafe fn test_mm256_clmulepi64_epi128() { + verify_kat_pclmul!( + _mm256_broadcastsi128_si256, + _mm256_clmulepi64_epi128, + assert_eq_m256i + ); + + verify_256_helper( + |a, b| _mm_clmulepi64_si128::<0x00>(a, b), + |a, b| _mm256_clmulepi64_epi128::<0x00>(a, b), + ); + verify_256_helper( + |a, b| _mm_clmulepi64_si128::<0x01>(a, b), + |a, b| _mm256_clmulepi64_epi128::<0x01>(a, b), + ); + verify_256_helper( + |a, b| _mm_clmulepi64_si128::<0x10>(a, b), + |a, b| _mm256_clmulepi64_epi128::<0x10>(a, b), + ); + verify_256_helper( + |a, b| _mm_clmulepi64_si128::<0x11>(a, b), + |a, b| _mm256_clmulepi64_epi128::<0x11>(a, b), + ); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs new file mode 100644 index 000000000000..10266662e13e --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs @@ -0,0 +1,233 @@ +//! `i586`'s `xsave` and `xsaveopt` target feature intrinsics +#![allow(clippy::module_name_repetitions)] + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.xsave"] + fn xsave(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xrstor"] + fn xrstor(p: *const u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xsetbv"] + fn xsetbv(v: u32, hi: u32, lo: u32); + #[link_name = "llvm.x86.xgetbv"] + fn xgetbv(v: u32) -> i64; + #[link_name = "llvm.x86.xsaveopt"] + fn xsaveopt(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xsavec"] + fn xsavec(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xsaves"] + fn xsaves(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xrstors"] + fn xrstors(p: *const u8, hi: u32, lo: u32); +} + +/// Performs a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits `[62:0]` in `save_mask` and XCR0. +/// `mem_addr` must be aligned on a 64-byte boundary. +/// +/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of +/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsave) +#[inline] +#[target_feature(enable = "xsave")] +#[cfg_attr(test, assert_instr(xsave))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) { + xsave(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial restore of the enabled processor states using +/// the state information stored in memory at `mem_addr`. +/// +/// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xrstor) +#[inline] +#[target_feature(enable = "xsave")] +#[cfg_attr(test, assert_instr(xrstor))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) { + xrstor(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); +} + +/// `XFEATURE_ENABLED_MASK` for `XCR` +/// +/// This intrinsic maps to `XSETBV` instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0; + +/// Copies 64-bits from `val` to the extended control register (`XCR`) specified +/// by `a`. +/// +/// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsetbv) +#[inline] +#[target_feature(enable = "xsave")] +#[cfg_attr(test, assert_instr(xsetbv))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsetbv(a: u32, val: u64) { + xsetbv(a, (val >> 32) as u32, val as u32); +} + +/// Reads the contents of the extended control register `XCR` +/// specified in `xcr_no`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xgetbv) +#[inline] +#[target_feature(enable = "xsave")] +#[cfg_attr(test, assert_instr(xgetbv))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xgetbv(xcr_no: u32) -> u64 { + xgetbv(xcr_no) as u64 +} + +/// Performs a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`. +/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize +/// the manner in which data is saved. The performance of this instruction will +/// be equal to or better than using the `XSAVE` instruction. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsaveopt) +#[inline] +#[target_feature(enable = "xsave,xsaveopt")] +#[cfg_attr(test, assert_instr(xsaveopt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) { + xsaveopt(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial save of the enabled processor states to memory +/// at `mem_addr`. +/// +/// `xsavec` differs from `xsave` in that it uses compaction and that it may +/// use init optimization. State is saved based on bits `[62:0]` in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsavec) +#[inline] +#[target_feature(enable = "xsave,xsavec")] +#[cfg_attr(test, assert_instr(xsavec))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) { + xsavec(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial save of the enabled processor states to memory at +/// `mem_addr` +/// +/// `xsaves` differs from xsave in that it can save state components +/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the +/// modified optimization. State is saved based on bits `[62:0]` in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsaves) +#[inline] +#[target_feature(enable = "xsave,xsaves")] +#[cfg_attr(test, assert_instr(xsaves))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) { + xsaves(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial restore of the enabled processor states using the +/// state information stored in memory at `mem_addr`. +/// +/// `xrstors` differs from `xrstor` in that it can restore state components +/// corresponding to bits set in the `IA32_XSS` `MSR`; `xrstors` cannot restore +/// from an `xsave` area in which the extended region is in the standard form. +/// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xrstors) +#[inline] +#[target_feature(enable = "xsave,xsaves")] +#[cfg_attr(test, assert_instr(xrstors))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) { + xrstors(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); +} + +#[cfg(test)] +mod tests { + use std::{fmt, prelude::v1::*}; + + use crate::core_arch::x86::*; + use stdarch_test::simd_test; + + #[repr(align(64))] + #[derive(Debug)] + struct XsaveArea { + // max size for 256-bit registers is 800 bytes: + // see https://software.intel.com/en-us/node/682996 + // max size for 512-bit registers is 2560 bytes: + // FIXME: add source + data: [u8; 2560], + } + + impl XsaveArea { + fn new() -> XsaveArea { + XsaveArea { data: [0; 2560] } + } + fn ptr(&mut self) -> *mut u8 { + self.data.as_mut_ptr() + } + } + + #[simd_test(enable = "xsave")] + #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri + unsafe fn test_xsave() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + _xsave(a.ptr(), m); + _xrstor(a.ptr(), m); + _xsave(b.ptr(), m); + } + + #[simd_test(enable = "xsave")] + #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri + unsafe fn test_xgetbv() { + let xcr_n: u32 = _XCR_XFEATURE_ENABLED_MASK; + + let xcr: u64 = _xgetbv(xcr_n); + let xcr_cpy: u64 = _xgetbv(xcr_n); + assert_eq!(xcr, xcr_cpy); + } + + #[simd_test(enable = "xsave,xsaveopt")] + #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri + unsafe fn test_xsaveopt() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + _xsaveopt(a.ptr(), m); + _xrstor(a.ptr(), m); + _xsaveopt(b.ptr(), m); + } + + #[simd_test(enable = "xsave,xsavec")] + #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri + unsafe fn test_xsavec() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + _xsavec(a.ptr(), m); + _xrstor(a.ptr(), m); + _xsavec(b.ptr(), m); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/abm.rs b/library/stdarch/crates/core_arch/src/x86_64/abm.rs new file mode 100644 index 000000000000..bf59cc463218 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/abm.rs @@ -0,0 +1,62 @@ +//! Advanced Bit Manipulation (ABM) instructions +//! +//! The POPCNT and LZCNT have their own CPUID bits to indicate support. +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +//! Instruction Set Reference, A-Z][intel64_ref]. +//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +//! System Instructions][amd64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions +//! available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wikipedia_bmi]: +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Counts the leading most significant zero bits. +/// +/// When the operand is zero, it returns its size in bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_lzcnt_u64) +#[inline] +#[target_feature(enable = "lzcnt")] +#[cfg_attr(test, assert_instr(lzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _lzcnt_u64(x: u64) -> u64 { + x.leading_zeros() as u64 +} + +/// Counts the bits that are set. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_popcnt64) +#[inline] +#[target_feature(enable = "popcnt")] +#[cfg_attr(test, assert_instr(popcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _popcnt64(x: i64) -> i32 { + x.count_ones() as i32 +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::arch::x86_64::*; + + #[simd_test(enable = "lzcnt")] + unsafe fn test_lzcnt_u64() { + assert_eq!(_lzcnt_u64(0b0101_1010), 57); + } + + #[simd_test(enable = "popcnt")] + unsafe fn test_popcnt64() { + assert_eq!(_popcnt64(0b0101_1010), 4); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs new file mode 100644 index 000000000000..bdc534b5a525 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs @@ -0,0 +1,154 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.x86.addcarry.64"] + fn llvm_addcarry_u64(a: u8, b: u64, c: u64) -> (u8, u64); + #[link_name = "llvm.x86.addcarryx.u64"] + fn llvm_addcarryx_u64(a: u8, b: u64, c: u64, d: *mut u64) -> u8; + #[link_name = "llvm.x86.subborrow.64"] + fn llvm_subborrow_u64(a: u8, b: u64, c: u64) -> (u8, u64); +} + +/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and the carry-out +/// is returned (carry or overflow flag). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_addcarry_u64) +#[inline] +#[cfg_attr(test, assert_instr(adc))] +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { + let (a, b) = llvm_addcarry_u64(c_in, a, b); + *out = b; + a +} + +/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and +/// the carry-out is returned (carry or overflow flag). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_addcarryx_u64) +#[inline] +#[target_feature(enable = "adx")] +#[cfg_attr(test, assert_instr(adc))] +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { + llvm_addcarryx_u64(c_in, a, b, out as *mut _) +} + +/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`. +/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and +/// the carry-out is returned (carry or overflow flag). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_subborrow_u64) +#[inline] +#[cfg_attr(test, assert_instr(sbb))] +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub unsafe fn _subborrow_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { + let (a, b) = llvm_subborrow_u64(c_in, a, b); + *out = b; + a +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86_64::*; + + #[test] + fn test_addcarry_u64() { + unsafe { + let a = u64::MAX; + let mut out = 0; + + let r = _addcarry_u64(0, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, a, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, a); + + let r = _addcarry_u64(1, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 1); + + let r = _addcarry_u64(1, a, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 7); + + let r = _addcarry_u64(1, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 8); + } + } + + #[simd_test(enable = "adx")] + unsafe fn test_addcarryx_u64() { + let a = u64::MAX; + let mut out = 0; + + let r = _addcarry_u64(0, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, a, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, a); + + let r = _addcarry_u64(1, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 1); + + let r = _addcarry_u64(1, a, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 7); + + let r = _addcarry_u64(1, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 8); + } + + #[test] + fn test_subborrow_u64() { + unsafe { + let a = u64::MAX; + let mut out = 0; + + let r = _subborrow_u64(0, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u64(0, 0, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 0); + + let r = _subborrow_u64(1, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a - 1); + + let r = _subborrow_u64(1, 0, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u64(0, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 4); + + let r = _subborrow_u64(1, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 3); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/amx.rs b/library/stdarch/crates/core_arch/src/x86_64/amx.rs new file mode 100644 index 000000000000..4b33c0ab6c15 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/amx.rs @@ -0,0 +1,622 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Load tile configuration from a 64-byte memory location specified by mem_addr. +/// The tile configuration format is specified below, and includes the tile type pallette, +/// the number of bytes per row, and the number of rows. If the specified pallette_id is zero, +/// that signifies the init state for both the tile config and the tile data, and the tiles are zeroed. +/// Any invalid configurations will result in #GP fault. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_loadconfig&ig_expand=6875) +#[inline] +#[target_feature(enable = "amx-tile")] +#[cfg_attr(test, assert_instr(ldtilecfg))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_loadconfig(mem_addr: *const u8) { + ldtilecfg(mem_addr); +} + +/// Stores the current tile configuration to a 64-byte memory location specified by mem_addr. +/// The tile configuration format is specified below, and includes the tile type pallette, +/// the number of bytes per row, and the number of rows. If tiles are not configured, all zeroes will be stored to memory. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_storeconfig&ig_expand=6879) +#[inline] +#[target_feature(enable = "amx-tile")] +#[cfg_attr(test, assert_instr(sttilecfg))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_storeconfig(mem_addr: *mut u8) { + sttilecfg(mem_addr); +} + +/// Load tile rows from memory specifieid by base address and stride into destination tile dst using the tile configuration previously configured via _tile_loadconfig. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_loadd&ig_expand=6877) +#[inline] +#[rustc_legacy_const_generics(0)] +#[target_feature(enable = "amx-tile")] +#[cfg_attr(test, assert_instr(tileloadd, DST = 0))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_loadd(base: *const u8, stride: usize) { + static_assert_uimm_bits!(DST, 3); + tileloadd64(DST as i8, base, stride); +} + +/// Release the tile configuration to return to the init state, which releases all storage it currently holds. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_release&ig_expand=6878) +#[inline] +#[target_feature(enable = "amx-tile")] +#[cfg_attr(test, assert_instr(tilerelease))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_release() { + tilerelease(); +} + +/// Store the tile specified by src to memory specifieid by base address and stride using the tile configuration previously configured via _tile_loadconfig. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_stored&ig_expand=6881) +#[inline] +#[rustc_legacy_const_generics(0)] +#[target_feature(enable = "amx-tile")] +#[cfg_attr(test, assert_instr(tilestored, DST = 0))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_stored(base: *mut u8, stride: usize) { + static_assert_uimm_bits!(DST, 3); + tilestored64(DST as i8, base, stride); +} + +/// Load tile rows from memory specifieid by base address and stride into destination tile dst using the tile configuration +/// previously configured via _tile_loadconfig. This intrinsic provides a hint to the implementation that the data will +/// likely not be reused in the near future and the data caching can be optimized accordingly. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_stream_loadd&ig_expand=6883) +#[inline] +#[rustc_legacy_const_generics(0)] +#[target_feature(enable = "amx-tile")] +#[cfg_attr(test, assert_instr(tileloaddt1, DST = 0))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_stream_loadd(base: *const u8, stride: usize) { + static_assert_uimm_bits!(DST, 3); + tileloaddt164(DST as i8, base, stride); +} + +/// Zero the tile specified by tdest. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_zero&ig_expand=6885) +#[inline] +#[rustc_legacy_const_generics(0)] +#[target_feature(enable = "amx-tile")] +#[cfg_attr(test, assert_instr(tilezero, DST = 0))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_zero() { + static_assert_uimm_bits!(DST, 3); + tilezero(DST as i8); +} + +/// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in dst, and store the 32-bit result back to tile dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_dpbf16ps&ig_expand=6864) +#[inline] +#[rustc_legacy_const_generics(0, 1, 2)] +#[target_feature(enable = "amx-bf16")] +#[cfg_attr(test, assert_instr(tdpbf16ps, DST = 0, A = 1, B = 2))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_dpbf16ps() { + static_assert_uimm_bits!(DST, 3); + static_assert_uimm_bits!(A, 3); + static_assert_uimm_bits!(B, 3); + tdpbf16ps(DST as i8, A as i8, B as i8); +} + +/// Compute dot-product of bytes in tiles with a source/destination accumulator. +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding +/// signed 8-bit integers in b, producing 4 intermediate 32-bit results. +/// Sum these 4 results with the corresponding 32-bit integer in dst, and store the 32-bit result back to tile dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_dpbssd&ig_expand=6866) +#[inline] +#[rustc_legacy_const_generics(0, 1, 2)] +#[target_feature(enable = "amx-int8")] +#[cfg_attr(test, assert_instr(tdpbssd, DST = 0, A = 1, B = 2))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_dpbssd() { + static_assert_uimm_bits!(DST, 3); + static_assert_uimm_bits!(A, 3); + static_assert_uimm_bits!(B, 3); + tdpbssd(DST as i8, A as i8, B as i8); +} + +/// Compute dot-product of bytes in tiles with a source/destination accumulator. +/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding +/// unsigned 8-bit integers in b, producing 4 intermediate 32-bit results. +/// Sum these 4 results with the corresponding 32-bit integer in dst, and store the 32-bit result back to tile dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_dpbsud&ig_expand=6868) +#[inline] +#[rustc_legacy_const_generics(0, 1, 2)] +#[target_feature(enable = "amx-int8")] +#[cfg_attr(test, assert_instr(tdpbsud, DST = 0, A = 1, B = 2))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_dpbsud() { + static_assert_uimm_bits!(DST, 3); + static_assert_uimm_bits!(A, 3); + static_assert_uimm_bits!(B, 3); + tdpbsud(DST as i8, A as i8, B as i8); +} + +/// Compute dot-product of bytes in tiles with a source/destination accumulator. +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding +/// signed 8-bit integers in b, producing 4 intermediate 32-bit results. +/// Sum these 4 results with the corresponding 32-bit integer in dst, and store the 32-bit result back to tile dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_dpbusd&ig_expand=6870) +#[inline] +#[rustc_legacy_const_generics(0, 1, 2)] +#[target_feature(enable = "amx-int8")] +#[cfg_attr(test, assert_instr(tdpbusd, DST = 0, A = 1, B = 2))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_dpbusd() { + static_assert_uimm_bits!(DST, 3); + static_assert_uimm_bits!(A, 3); + static_assert_uimm_bits!(B, 3); + tdpbusd(DST as i8, A as i8, B as i8); +} + +/// Compute dot-product of bytes in tiles with a source/destination accumulator. +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding +/// unsigned 8-bit integers in b, producing 4 intermediate 32-bit results. +/// Sum these 4 results with the corresponding 32-bit integer in dst, and store the 32-bit result back to tile dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_dpbuud&ig_expand=6872) +#[inline] +#[rustc_legacy_const_generics(0, 1, 2)] +#[target_feature(enable = "amx-int8")] +#[cfg_attr(test, assert_instr(tdpbuud, DST = 0, A = 1, B = 2))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_dpbuud() { + static_assert_uimm_bits!(DST, 3); + static_assert_uimm_bits!(A, 3); + static_assert_uimm_bits!(B, 3); + tdpbuud(DST as i8, A as i8, B as i8); +} + +/// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles a and b, +/// accumulating the intermediate single-precision (32-bit) floating-point elements +/// with elements in dst, and store the 32-bit result back to tile dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_dpfp16ps&ig_expand=6874) +#[inline] +#[rustc_legacy_const_generics(0, 1, 2)] +#[target_feature(enable = "amx-fp16")] +#[cfg_attr(test, assert_instr(tdpfp16ps, DST = 0, A = 1, B = 2))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_dpfp16ps() { + static_assert_uimm_bits!(DST, 3); + static_assert_uimm_bits!(A, 3); + static_assert_uimm_bits!(B, 3); + tdpfp16ps(DST as i8, A as i8, B as i8); +} + +/// Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. +/// Each dword element in input tiles a and b is interpreted as a complex number with FP16 real part and FP16 imaginary part. +/// Calculates the imaginary part of the result. For each possible combination of (row of a, column of b), +/// it performs a set of multiplication and accumulations on all corresponding complex numbers (one from a and one from b). +/// The imaginary part of the a element is multiplied with the real part of the corresponding b element, and the real part of +/// the a element is multiplied with the imaginary part of the corresponding b elements. The two accumulated results are added, +/// and then accumulated into the corresponding row and column of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_cmmimfp16ps&ig_expand=6860) +#[inline] +#[rustc_legacy_const_generics(0, 1, 2)] +#[target_feature(enable = "amx-complex")] +#[cfg_attr(test, assert_instr(tcmmimfp16ps, DST = 0, A = 1, B = 2))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_cmmimfp16ps() { + static_assert_uimm_bits!(DST, 3); + static_assert_uimm_bits!(A, 3); + static_assert_uimm_bits!(B, 3); + tcmmimfp16ps(DST as i8, A as i8, B as i8); +} + +/// Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. +/// Each dword element in input tiles a and b is interpreted as a complex number with FP16 real part and FP16 imaginary part. +/// Calculates the real part of the result. For each possible combination of (row of a, column of b), +/// it performs a set of multiplication and accumulations on all corresponding complex numbers (one from a and one from b). +/// The real part of the a element is multiplied with the real part of the corresponding b element, and the negated imaginary part of +/// the a element is multiplied with the imaginary part of the corresponding b elements. +/// The two accumulated results are added, and then accumulated into the corresponding row and column of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tile_cmmrlfp16ps&ig_expand=6862) +#[inline] +#[rustc_legacy_const_generics(0, 1, 2)] +#[target_feature(enable = "amx-complex")] +#[cfg_attr(test, assert_instr(tcmmrlfp16ps, DST = 0, A = 1, B = 2))] +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub unsafe fn _tile_cmmrlfp16ps() { + static_assert_uimm_bits!(DST, 3); + static_assert_uimm_bits!(A, 3); + static_assert_uimm_bits!(B, 3); + tcmmrlfp16ps(DST as i8, A as i8, B as i8); +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.ldtilecfg"] + fn ldtilecfg(mem_addr: *const u8); + #[link_name = "llvm.x86.sttilecfg"] + fn sttilecfg(mem_addr: *mut u8); + #[link_name = "llvm.x86.tileloadd64"] + fn tileloadd64(dst: i8, base: *const u8, stride: usize); + #[link_name = "llvm.x86.tileloaddt164"] + fn tileloaddt164(dst: i8, base: *const u8, stride: usize); + #[link_name = "llvm.x86.tilerelease"] + fn tilerelease(); + #[link_name = "llvm.x86.tilestored64"] + fn tilestored64(dst: i8, base: *mut u8, stride: usize); + #[link_name = "llvm.x86.tilezero"] + fn tilezero(dst: i8); + #[link_name = "llvm.x86.tdpbf16ps"] + fn tdpbf16ps(dst: i8, a: i8, b: i8); + #[link_name = "llvm.x86.tdpbuud"] + fn tdpbuud(dst: i8, a: i8, b: i8); + #[link_name = "llvm.x86.tdpbusd"] + fn tdpbusd(dst: i8, a: i8, b: i8); + #[link_name = "llvm.x86.tdpbsud"] + fn tdpbsud(dst: i8, a: i8, b: i8); + #[link_name = "llvm.x86.tdpbssd"] + fn tdpbssd(dst: i8, a: i8, b: i8); + #[link_name = "llvm.x86.tdpfp16ps"] + fn tdpfp16ps(dst: i8, a: i8, b: i8); + #[link_name = "llvm.x86.tcmmimfp16ps"] + fn tcmmimfp16ps(dst: i8, a: i8, b: i8); + #[link_name = "llvm.x86.tcmmrlfp16ps"] + fn tcmmrlfp16ps(dst: i8, a: i8, b: i8); +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::_mm_cvtness_sbh; + use crate::core_arch::x86_64::*; + use core::mem::transmute; + use stdarch_test::simd_test; + #[cfg(target_os = "linux")] + use syscalls::{Sysno, syscall}; + + #[allow(non_camel_case_types)] + #[repr(packed)] + #[derive(Copy, Clone, Default, Debug, PartialEq)] + struct __tilecfg { + /// 0 `or` 1 + palette: u8, + start_row: u8, + /// reserved, must be zero + reserved_a0: [u8; 14], + /// number of bytes of one row in each tile + colsb: [u16; 8], + /// reserved, must be zero + reserved_b0: [u16; 8], + /// number of rows in each tile + rows: [u8; 8], + /// reserved, must be zero + reserved_c0: [u8; 8], + } + + impl __tilecfg { + fn new(palette: u8, start_row: u8, colsb: [u16; 8], rows: [u8; 8]) -> Self { + Self { + palette, + start_row, + reserved_a0: [0u8; 14], + colsb, + reserved_b0: [0u16; 8], + rows, + reserved_c0: [0u8; 8], + } + } + + const fn as_ptr(&self) -> *const u8 { + self as *const Self as *const u8 + } + + fn as_mut_ptr(&mut self) -> *mut u8 { + self as *mut Self as *mut u8 + } + } + + #[cfg(not(target_os = "linux"))] + #[target_feature(enable = "amx-tile")] + fn _init_amx() {} + + #[cfg(target_os = "linux")] + #[target_feature(enable = "amx-tile")] + #[inline] + unsafe fn _init_amx() { + let mut ret: usize; + let mut xfeatures: usize = 0; + ret = syscall!(Sysno::arch_prctl, 0x1022, &mut xfeatures as *mut usize) + .expect("arch_prctl ARCH_GET_XCOMP_PERM syscall failed"); + if ret != 0 { + panic!("Failed to get XFEATURES"); + } else { + match 0b11 & (xfeatures >> 17) { + 0 => panic!("AMX is not available"), + 1 => { + ret = syscall!(Sysno::arch_prctl, 0x1023, 18) + .expect("arch_prctl ARCH_REQ_XCOMP_PERM syscall failed"); + if ret != 0 { + panic!("Failed to enable AMX"); + } + } + 3 => {} + _ => unreachable!(), + } + } + } + + #[simd_test(enable = "amx-tile")] + unsafe fn test_tile_loadconfig() { + let config = __tilecfg::default(); + _tile_loadconfig(config.as_ptr()); + _tile_release(); + } + + #[simd_test(enable = "amx-tile")] + unsafe fn test_tile_storeconfig() { + let config = __tilecfg::new(1, 0, [32; 8], [8; 8]); + _tile_loadconfig(config.as_ptr()); + let mut _config = __tilecfg::default(); + _tile_storeconfig(_config.as_mut_ptr()); + _tile_release(); + assert_eq!(config, _config); + } + + #[simd_test(enable = "amx-tile")] + unsafe fn test_tile_zero() { + _init_amx(); + let mut config = __tilecfg::default(); + config.palette = 1; + config.colsb[0] = 64; + config.rows[0] = 16; + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + let mut out = [[1_i8; 64]; 16]; + _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64); + _tile_release(); + assert_eq!(out, [[0; 64]; 16]); + } + + #[simd_test(enable = "amx-tile")] + unsafe fn test_tile_stored() { + _init_amx(); + let mut config = __tilecfg::default(); + config.palette = 1; + config.colsb[0] = 64; + config.rows[0] = 16; + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + let mut out = [[1_i8; 64]; 16]; + _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64); + _tile_release(); + assert_eq!(out, [[0; 64]; 16]); + } + + #[simd_test(enable = "amx-tile")] + unsafe fn test_tile_loadd() { + _init_amx(); + let mut config = __tilecfg::default(); + config.palette = 1; + config.colsb[0] = 64; + config.rows[0] = 16; + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + let mat = [1_i8; 1024]; + _tile_loadd::<0>(&mat as *const i8 as *const u8, 64); + let mut out = [[0_i8; 64]; 16]; + _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64); + _tile_release(); + assert_eq!(out, [[1; 64]; 16]); + } + + #[simd_test(enable = "amx-tile")] + unsafe fn test_tile_stream_loadd() { + _init_amx(); + let mut config = __tilecfg::default(); + config.palette = 1; + config.colsb[0] = 64; + config.rows[0] = 16; + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + let mat = [1_i8; 1024]; + _tile_stream_loadd::<0>(&mat as *const i8 as *const u8, 64); + let mut out = [[0_i8; 64]; 16]; + _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64); + _tile_release(); + assert_eq!(out, [[1; 64]; 16]); + } + + #[simd_test(enable = "amx-tile")] + unsafe fn test_tile_release() { + _tile_release(); + } + + #[simd_test(enable = "amx-bf16,avx512f")] + unsafe fn test_tile_dpbf16ps() { + _init_amx(); + let bf16_1: u16 = _mm_cvtness_sbh(1.0).to_bits(); + let bf16_2: u16 = _mm_cvtness_sbh(2.0).to_bits(); + let ones: [u8; 1024] = transmute([bf16_1; 512]); + let twos: [u8; 1024] = transmute([bf16_2; 512]); + let mut res = [[0f32; 16]; 16]; + let mut config = __tilecfg::default(); + config.palette = 1; + (0..=2).for_each(|i| { + config.colsb[i] = 64; + config.rows[i] = 16; + }); + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + _tile_loadd::<1>(&ones as *const u8, 64); + _tile_loadd::<2>(&twos as *const u8, 64); + _tile_dpbf16ps::<0, 1, 2>(); + _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64); + _tile_release(); + assert_eq!(res, [[64f32; 16]; 16]); + } + + #[simd_test(enable = "amx-int8")] + unsafe fn test_tile_dpbssd() { + _init_amx(); + let ones = [-1_i8; 1024]; + let twos = [-2_i8; 1024]; + let mut res = [[0_i32; 16]; 16]; + let mut config = __tilecfg::default(); + config.palette = 1; + (0..=2).for_each(|i| { + config.colsb[i] = 64; + config.rows[i] = 16; + }); + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + _tile_loadd::<1>(&ones as *const i8 as *const u8, 64); + _tile_loadd::<2>(&twos as *const i8 as *const u8, 64); + _tile_dpbssd::<0, 1, 2>(); + _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64); + _tile_release(); + assert_eq!(res, [[128_i32; 16]; 16]); + } + + #[simd_test(enable = "amx-int8")] + unsafe fn test_tile_dpbsud() { + _init_amx(); + let ones = [-1_i8; 1024]; + let twos = [2_u8; 1024]; + let mut res = [[0_i32; 16]; 16]; + let mut config = __tilecfg::default(); + config.palette = 1; + (0..=2).for_each(|i| { + config.colsb[i] = 64; + config.rows[i] = 16; + }); + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + _tile_loadd::<1>(&ones as *const i8 as *const u8, 64); + _tile_loadd::<2>(&twos as *const u8, 64); + _tile_dpbsud::<0, 1, 2>(); + _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64); + _tile_release(); + assert_eq!(res, [[-128_i32; 16]; 16]); + } + + #[simd_test(enable = "amx-int8")] + unsafe fn test_tile_dpbusd() { + _init_amx(); + let ones = [1_u8; 1024]; + let twos = [-2_i8; 1024]; + let mut res = [[0_i32; 16]; 16]; + let mut config = __tilecfg::default(); + config.palette = 1; + (0..=2).for_each(|i| { + config.colsb[i] = 64; + config.rows[i] = 16; + }); + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + _tile_loadd::<1>(&ones as *const u8, 64); + _tile_loadd::<2>(&twos as *const i8 as *const u8, 64); + _tile_dpbusd::<0, 1, 2>(); + _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64); + _tile_release(); + assert_eq!(res, [[-128_i32; 16]; 16]); + } + + #[simd_test(enable = "amx-int8")] + unsafe fn test_tile_dpbuud() { + _init_amx(); + let ones = [1_u8; 1024]; + let twos = [2_u8; 1024]; + let mut res = [[0_i32; 16]; 16]; + let mut config = __tilecfg::default(); + config.palette = 1; + (0..=2).for_each(|i| { + config.colsb[i] = 64; + config.rows[i] = 16; + }); + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + _tile_loadd::<1>(&ones as *const u8, 64); + _tile_loadd::<2>(&twos as *const u8, 64); + _tile_dpbuud::<0, 1, 2>(); + _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64); + _tile_release(); + assert_eq!(res, [[128_i32; 16]; 16]); + } + + #[simd_test(enable = "amx-fp16")] + unsafe fn test_tile_dpfp16ps() { + _init_amx(); + let ones = [1f16; 512]; + let twos = [2f16; 512]; + let mut res = [[0f32; 16]; 16]; + let mut config = __tilecfg::default(); + config.palette = 1; + (0..=2).for_each(|i| { + config.colsb[i] = 64; + config.rows[i] = 16; + }); + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + _tile_loadd::<1>(&ones as *const f16 as *const u8, 64); + _tile_loadd::<2>(&twos as *const f16 as *const u8, 64); + _tile_dpfp16ps::<0, 1, 2>(); + _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64); + _tile_release(); + assert_eq!(res, [[64f32; 16]; 16]); + } + + #[simd_test(enable = "amx-complex")] + unsafe fn test_tile_cmmimfp16ps() { + _init_amx(); + let ones = [1f16; 512]; + let twos = [2f16; 512]; + let mut res = [[0f32; 16]; 16]; + let mut config = __tilecfg::default(); + config.palette = 1; + (0..=2).for_each(|i| { + config.colsb[i] = 64; + config.rows[i] = 16; + }); + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + _tile_loadd::<1>(&ones as *const f16 as *const u8, 64); + _tile_loadd::<2>(&twos as *const f16 as *const u8, 64); + _tile_cmmimfp16ps::<0, 1, 2>(); + _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64); + _tile_release(); + assert_eq!(res, [[64f32; 16]; 16]); + } + + #[simd_test(enable = "amx-complex")] + unsafe fn test_tile_cmmrlfp16ps() { + _init_amx(); + let ones = [1f16; 512]; + let twos = [2f16; 512]; + let mut res = [[0f32; 16]; 16]; + let mut config = __tilecfg::default(); + config.palette = 1; + (0..=2).for_each(|i| { + config.colsb[i] = 64; + config.rows[i] = 16; + }); + _tile_loadconfig(config.as_ptr()); + _tile_zero::<0>(); + _tile_loadd::<1>(&ones as *const f16 as *const u8, 64); + _tile_loadd::<2>(&twos as *const f16 as *const u8, 64); + _tile_cmmrlfp16ps::<0, 1, 2>(); + _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64); + _tile_release(); + assert_eq!(res, [[0f32; 16]; 16]); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx.rs b/library/stdarch/crates/core_arch/src/x86_64/avx.rs new file mode 100644 index 000000000000..b494385e4a61 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/avx.rs @@ -0,0 +1,65 @@ +//! Advanced Vector Extensions (AVX) +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture +//! Programmer's Manual, Volume 3: General-Purpose and System +//! Instructions][amd64_ref]. +//! +//! [Wikipedia][wiki] provides a quick overview of the instructions available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions + +use crate::{core_arch::x86::*, mem::transmute}; + +/// Copies `a` to result, and insert the 64-bit integer `i` into result +/// at the location specified by `index`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi64) +#[inline] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m256i { + static_assert_uimm_bits!(INDEX, 2); + unsafe { transmute(simd_insert!(a.as_i64x4(), INDEX as u32, i)) } +} + +/// Extracts a 64-bit integer from `a`, selected with `INDEX`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi64) +#[inline] +#[target_feature(enable = "avx")] +#[rustc_legacy_const_generics(1)] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm256_extract_epi64(a: __m256i) -> i64 { + static_assert_uimm_bits!(INDEX, 2); + unsafe { simd_extract!(a.as_i64x4(), INDEX as u32) } +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::arch::x86_64::*; + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_insert_epi64() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_insert_epi64::<3>(a, 0); + let e = _mm256_setr_epi64x(1, 2, 3, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_extract_epi64() { + let a = _mm256_setr_epi64x(0, 1, 2, 3); + let r = _mm256_extract_epi64::<3>(a); + assert_eq!(r, 3); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512bw.rs new file mode 100644 index 000000000000..466c36ef31e5 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/avx512bw.rs @@ -0,0 +1,45 @@ +use crate::core_arch::x86::*; + +/// Convert 64-bit mask a into an integer value, and store the result in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask64_u64) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _cvtmask64_u64(a: __mmask64) -> u64 { + a +} + +/// Convert integer value a into an 64-bit mask, and store the result in k. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu64_mask64) +#[inline] +#[target_feature(enable = "avx512bw")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub fn _cvtu64_mask64(a: u64) -> __mmask64 { + a +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::{x86::*, x86_64::*}; + + #[simd_test(enable = "avx512bw")] + unsafe fn test_cvtmask64_u64() { + let a: __mmask64 = 0b11001100_00110011_01100110_10011001; + let r = _cvtmask64_u64(a); + let e: u64 = 0b11001100_00110011_01100110_10011001; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512bw")] + unsafe fn test_cvtu64_mask64() { + let a: u64 = 0b11001100_00110011_01100110_10011001; + let r = _cvtu64_mask64(a); + let e: __mmask64 = 0b11001100_00110011_01100110_10011001; + assert_eq!(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs new file mode 100644 index 000000000000..934c9e2812c4 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs @@ -0,0 +1,13014 @@ +use crate::{ + core_arch::{simd::*, x86::*, x86_64::*}, + mem::transmute, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_i64&expand=1792) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2si))] +pub fn _mm_cvtsd_i64(a: __m128d) -> i64 { + _mm_cvtsd_si64(a) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_i64&expand=1894) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2si))] +pub fn _mm_cvtss_i64(a: __m128) -> i64 { + _mm_cvtss_si64(a) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_u64&expand=1902) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2usi))] +pub fn _mm_cvtss_u64(a: __m128) -> u64 { + unsafe { vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_u64&expand=1800) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2usi))] +pub fn _mm_cvtsd_u64(a: __m128d) -> u64 { + unsafe { vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti64_ss&expand=1643) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2ss))] +pub fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 { + unsafe { + let b = b as f32; + simd_insert!(a, 0, b) + } +} + +/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvti64_sd&expand=1644) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2sd))] +pub fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d { + unsafe { + let b = b as f64; + simd_insert!(a, 0, b) + } +} + +/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu64_ss&expand=2035) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtusi2ss))] +pub fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 { + unsafe { + let b = b as f32; + simd_insert!(a, 0, b) + } +} + +/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu64_sd&expand=2034) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtusi2sd))] +pub fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d { + unsafe { + let b = b as f64; + simd_insert!(a, 0, b) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i64&expand=2016) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2si))] +pub fn _mm_cvttsd_i64(a: __m128d) -> i64 { + unsafe { vcvttsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u64&expand=2021) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2usi))] +pub fn _mm_cvttsd_u64(a: __m128d) -> u64 { + unsafe { vcvttsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm_cvttss_i64&expand=2023) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2si))] +pub fn _mm_cvttss_i64(a: __m128) -> i64 { + unsafe { vcvttss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u64&expand=2027) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2usi))] +pub fn _mm_cvttss_u64(a: __m128) -> u64 { + unsafe { vcvttss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundi64_sd&expand=1313) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtsi2sd64(a, b, ROUNDING); + transmute(r) + } +} + +/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsi64_sd&expand=1367) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtsi2sd64(a, b, ROUNDING); + transmute(r) + } +} + +/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundi64_ss&expand=1314) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss64(a, b, ROUNDING); + transmute(r) + } +} + +/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundu64_sd&expand=1379) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtusi2sd64(a, b, ROUNDING); + transmute(r) + } +} + +/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsi64_ss&expand=1368) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss64(a, b, ROUNDING); + transmute(r) + } +} + +/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundu64_ss&expand=1380) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtusi2ss64(a, b, ROUNDING); + transmute(r) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_si64&expand=1360) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2si64(a, ROUNDING) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_i64&expand=1358) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2si64(a, ROUNDING) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u64&expand=1365) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2usi64(a, ROUNDING) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundss_si64&expand=1375) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundss_si64(a: __m128) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2si64(a, ROUNDING) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundss_i64&expand=1370) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundss_i64(a: __m128) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2si64(a, ROUNDING) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundss_u64&expand=1377) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvt_roundss_u64(a: __m128) -> u64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2usi64(a, ROUNDING) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si64&expand=1931) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2si64(a, SAE) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i64&expand=1929) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2si64(a, SAE) + } +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_u64&expand=1933) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2usi64(a, SAE) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundss_i64&expand=1935) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2si64(a, SAE) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundss_si64&expand=1937) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2si64(a, SAE) + } +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundss_u64&expand=1939) +#[inline] +#[target_feature(enable = "avx512f")] +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub fn _mm_cvtt_roundss_u64(a: __m128) -> u64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2usi64(a, SAE) + } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512.vcvtss2si64"] + fn vcvtss2si64(a: f32x4, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.vcvtss2usi64"] + fn vcvtss2usi64(a: f32x4, rounding: i32) -> u64; + #[link_name = "llvm.x86.avx512.vcvtsd2si64"] + fn vcvtsd2si64(a: f64x2, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.vcvtsd2usi64"] + fn vcvtsd2usi64(a: f64x2, rounding: i32) -> u64; + + #[link_name = "llvm.x86.avx512.cvtsi2ss64"] + fn vcvtsi2ss64(a: f32x4, b: i64, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.cvtsi2sd64"] + fn vcvtsi2sd64(a: f64x2, b: i64, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.cvtusi642ss"] + fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.cvtusi642sd"] + fn vcvtusi2sd64(a: f64x2, b: u64, rounding: i32) -> f64x2; + + #[link_name = "llvm.x86.avx512.cvttss2si64"] + fn vcvttss2si64(a: f32x4, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.cvttss2usi64"] + fn vcvttss2usi64(a: f32x4, rounding: i32) -> u64; + #[link_name = "llvm.x86.avx512.cvttsd2si64"] + fn vcvttsd2si64(a: f64x2, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.cvttsd2usi64"] + fn vcvttsd2usi64(a: f64x2, rounding: i32) -> u64; +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::core_arch::x86_64::*; + use crate::hint::black_box; + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_epi64() { + let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let r = _mm512_abs_epi64(a); + let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_epi64() { + let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let r = _mm512_mask_abs_epi64(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_abs_epi64(a, 0b11111111, a); + let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, 100, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_abs_epi64() { + let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let r = _mm512_maskz_abs_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi64(0b11111111, a); + let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, 100, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_abs_epi64() { + let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100); + let r = _mm256_abs_epi64(a); + let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_abs_epi64() { + let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100); + let r = _mm256_mask_abs_epi64(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_abs_epi64(a, 0b00001111, a); + let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_abs_epi64() { + let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100); + let r = _mm256_maskz_abs_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_abs_epi64(0b00001111, a); + let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_abs_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let r = _mm_abs_epi64(a); + let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1)); + assert_eq_m128i(r, e); + let a = _mm_set_epi64x(100, -100); + let r = _mm_abs_epi64(a); + let e = _mm_set_epi64x(100, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_abs_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let r = _mm_mask_abs_epi64(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_abs_epi64(a, 0b00000011, a); + let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1)); + assert_eq_m128i(r, e); + let a = _mm_set_epi64x(100, -100); + let r = _mm_mask_abs_epi64(a, 0b00000011, a); + let e = _mm_set_epi64x(100, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_abs_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let r = _mm_maskz_abs_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_abs_epi64(0b00000011, a); + let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1)); + assert_eq_m128i(r, e); + let a = _mm_set_epi64x(100, -100); + let r = _mm_maskz_abs_epi64(0b00000011, a); + let e = _mm_set_epi64x(100, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let r = _mm512_abs_pd(a); + let e = _mm512_setr_pd(0., 1., 1., f64::MAX, f64::MAX, 100., 100., 32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let r = _mm512_mask_abs_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_abs_pd(a, 0b00001111, a); + let e = _mm512_setr_pd(0., 1., 1., f64::MAX, f64::MIN, 100., -100., -32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mov_epi64() { + let src = _mm512_set1_epi64(1); + let a = _mm512_set1_epi64(2); + let r = _mm512_mask_mov_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_mov_epi64(src, 0b11111111, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mov_epi64() { + let a = _mm512_set1_epi64(2); + let r = _mm512_maskz_mov_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mov_epi64(0b11111111, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mov_epi64() { + let src = _mm256_set1_epi64x(1); + let a = _mm256_set1_epi64x(2); + let r = _mm256_mask_mov_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_mov_epi64(src, 0b00001111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mov_epi64() { + let a = _mm256_set1_epi64x(2); + let r = _mm256_maskz_mov_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mov_epi64(0b00001111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mov_epi64() { + let src = _mm_set1_epi64x(1); + let a = _mm_set1_epi64x(2); + let r = _mm_mask_mov_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_mov_epi64(src, 0b00000011, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mov_epi64() { + let a = _mm_set1_epi64x(2); + let r = _mm_maskz_mov_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mov_epi64(0b00000011, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mov_pd() { + let src = _mm512_set1_pd(1.); + let a = _mm512_set1_pd(2.); + let r = _mm512_mask_mov_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_mov_pd(src, 0b11111111, a); + assert_eq_m512d(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mov_pd() { + let a = _mm512_set1_pd(2.); + let r = _mm512_maskz_mov_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_mov_pd(0b11111111, a); + assert_eq_m512d(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mov_pd() { + let src = _mm256_set1_pd(1.); + let a = _mm256_set1_pd(2.); + let r = _mm256_mask_mov_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_mov_pd(src, 0b00001111, a); + assert_eq_m256d(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mov_pd() { + let a = _mm256_set1_pd(2.); + let r = _mm256_maskz_mov_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_mov_pd(0b00001111, a); + assert_eq_m256d(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mov_pd() { + let src = _mm_set1_pd(1.); + let a = _mm_set1_pd(2.); + let r = _mm_mask_mov_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_mov_pd(src, 0b00000011, a); + assert_eq_m128d(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mov_pd() { + let a = _mm_set1_pd(2.); + let r = _mm_maskz_mov_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_mov_pd(0b00000011, a); + assert_eq_m128d(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_add_epi64(a, b); + let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN + 1, 101, -99, -31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_mask_add_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_add_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN, 100, -100, -32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_maskz_add_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_add_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_add_epi64() { + let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN); + let b = _mm256_set1_epi64x(1); + let r = _mm256_mask_add_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_add_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(2, 0, i64::MIN, i64::MIN + 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_add_epi64() { + let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN); + let b = _mm256_set1_epi64x(1); + let r = _mm256_maskz_add_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_add_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(2, 0, i64::MIN, i64::MIN + 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_add_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let b = _mm_set1_epi64x(1); + let r = _mm_mask_add_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_add_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(i64::MIN, i64::MIN + 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_add_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let b = _mm_set1_epi64x(1); + let r = _mm_maskz_add_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_add_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(i64::MIN, i64::MIN + 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_add_pd(a, b); + let e = _mm512_setr_pd(1., 2., 0., f64::MAX, f64::MIN + 1., 101., -99., -31.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_mask_add_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_add_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(1., 2., 0., f64::MAX, f64::MIN, 100., -100., -32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_maskz_add_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_add_pd(0b00001111, a, b); + let e = _mm512_setr_pd(1., 2., 0., f64::MAX, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_add_pd() { + let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(1.); + let r = _mm256_mask_add_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_add_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(2., 0., f64::MAX, f64::MIN + 1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_add_pd() { + let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(1.); + let r = _mm256_maskz_add_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_add_pd(0b00001111, a, b); + let e = _mm256_set_pd(2., 0., f64::MAX, f64::MIN + 1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_add_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(1.); + let r = _mm_mask_add_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_add_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(f64::MAX, f64::MIN + 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_add_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(1.); + let r = _mm_maskz_add_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_add_pd(0b00000011, a, b); + let e = _mm_set_pd(f64::MAX, f64::MIN + 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_sub_epi64(a, b); + let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, i64::MAX, 99, -101, -33); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_mask_sub_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_sub_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, i64::MIN, 100, -100, -32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_maskz_sub_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sub_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sub_epi64() { + let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN); + let b = _mm256_set1_epi64x(1); + let r = _mm256_mask_sub_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_sub_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(0, -2, i64::MAX - 1, i64::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sub_epi64() { + let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN); + let b = _mm256_set1_epi64x(1); + let r = _mm256_maskz_sub_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sub_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(0, -2, i64::MAX - 1, i64::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sub_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let b = _mm_set1_epi64x(1); + let r = _mm_mask_sub_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_sub_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(i64::MAX - 1, i64::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sub_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let b = _mm_set1_epi64x(1); + let r = _mm_maskz_sub_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sub_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(i64::MAX - 1, i64::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_sub_pd(a, b); + let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., f64::MIN, 99., -101., -33.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_mask_sub_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_sub_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., f64::MIN, 100., -100., -32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_maskz_sub_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sub_pd(0b00001111, a, b); + let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sub_pd() { + let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(1.); + let r = _mm256_mask_sub_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_sub_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(0., -2., f64::MAX - 1., f64::MIN); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sub_pd() { + let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(1.); + let r = _mm256_maskz_sub_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_sub_pd(0b00001111, a, b); + let e = _mm256_set_pd(0., -2., f64::MAX - 1., f64::MIN); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sub_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(1.); + let r = _mm_mask_sub_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_sub_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(f64::MAX - 1., f64::MIN); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sub_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(1.); + let r = _mm_maskz_sub_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_sub_pd(0b00000011, a, b); + let e = _mm_set_pd(f64::MAX - 1., f64::MIN); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mul_epi32(a, b); + let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_mul_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mul_epi32(a, 0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, + 7, 5, 3, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_mul_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mul_epi32(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mul_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_mask_mul_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_mul_epi32(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(2, 4, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mul_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_mul_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mul_epi32(0b00001111, a, b); + let e = _mm256_set_epi64x(2, 4, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mul_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_mask_mul_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_mul_epi32(a, 0b00000011, a, b); + let e = _mm_set_epi64x(2, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mul_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_maskz_mul_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mul_epi32(0b00000011, a, b); + let e = _mm_set_epi64x(2, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_epu32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mul_epu32(a, b); + let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_epu32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_mul_epu32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mul_epu32(a, 0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, + 7, 5, 3, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_epu32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_mul_epu32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mul_epu32(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mul_epu32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_mask_mul_epu32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_mul_epu32(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(2, 4, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mul_epu32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_mul_epu32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mul_epu32(0b00001111, a, b); + let e = _mm256_set_epi64x(2, 4, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mul_epu32() { + let a = _mm_set1_epi32(1); + let b = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_mask_mul_epu32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_mul_epu32(a, 0b00000011, a, b); + let e = _mm_set_epi64x(2, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mul_epu32() { + let a = _mm_set1_epi32(1); + let b = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_maskz_mul_epu32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mul_epu32(0b00000011, a, b); + let e = _mm_set_epi64x(2, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mullox_epi64() { + let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32); + let b = _mm512_set1_epi64(2); + let r = _mm512_mullox_epi64(a, b); + let e = _mm512_setr_epi64(0, 2, -2, 0, -2, 200, -200, -64); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mullox_epi64() { + let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32); + let b = _mm512_set1_epi64(2); + let r = _mm512_mask_mullox_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mullox_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(0, 2, -2, 0, i64::MAX, 100, -100, -32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_set1_pd(2.); + let r = _mm512_mul_pd(a, b); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 2., f64::INFINITY, f64::NEG_INFINITY, + f64::INFINITY, f64::NEG_INFINITY, -200., -64., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_set1_pd(2.); + let r = _mm512_mask_mul_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_mul_pd(a, 0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 2., f64::INFINITY, f64::NEG_INFINITY, + f64::MAX, f64::MIN, -100., -32., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_set1_pd(2.); + let r = _mm512_maskz_mul_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_mul_pd(0b00001111, a, b); + let e = _mm512_setr_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mul_pd() { + let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(2.); + let r = _mm256_mask_mul_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_mul_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mul_pd() { + let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(2.); + let r = _mm256_maskz_mul_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_mul_pd(0b00001111, a, b); + let e = _mm256_set_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mul_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(2.); + let r = _mm_mask_mul_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_mul_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mul_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(2.); + let r = _mm_maskz_mul_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_mul_pd(0b00000011, a, b); + let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_div_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.); + let r = _mm512_div_pd(a, b); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 0.5, f64::INFINITY, f64::NEG_INFINITY, + f64::INFINITY, f64::NEG_INFINITY, -50., -16., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_div_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.); + let r = _mm512_mask_div_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_div_pd(a, 0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 0.5, f64::INFINITY, f64::NEG_INFINITY, + f64::MAX, f64::MIN, -100., -32., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_div_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.); + let r = _mm512_maskz_div_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_div_pd(0b00001111, a, b); + let e = _mm512_setr_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_div_pd() { + let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN); + let b = _mm256_set_pd(2., 2., 0., 0.); + let r = _mm256_mask_div_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_div_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_div_pd() { + let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN); + let b = _mm256_set_pd(2., 2., 0., 0.); + let r = _mm256_maskz_div_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_div_pd(0b00001111, a, b); + let e = _mm256_set_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_div_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set_pd(0., 0.); + let r = _mm_mask_div_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_div_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_div_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set_pd(0., 0.); + let r = _mm_maskz_div_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_div_pd(0b00000011, a, b); + let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epi64(a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_max_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_max_epi64(a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_max_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_max_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_max_epi64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_max_epi64(a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epi64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_mask_max_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epi64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_maskz_max_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_max_pd(a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_max_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_max_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_max_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_max_pd(0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_mask_max_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_max_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(3., 2., 2., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_maskz_max_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_max_pd(0b00001111, a, b); + let e = _mm256_set_pd(3., 2., 2., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_pd() { + let a = _mm_set_pd(2., 3.); + let b = _mm_set_pd(3., 2.); + let r = _mm_mask_max_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_max_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(3., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_pd() { + let a = _mm_set_pd(2., 3.); + let b = _mm_set_pd(3., 2.); + let r = _mm_maskz_max_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_max_pd(0b00000011, a, b); + let e = _mm_set_pd(3., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epu64(a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epu64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epu64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epu64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epu64(0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_max_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_max_epu64(a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_max_epu64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epu64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_max_epu64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epu64(0b00001111, a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_max_epu64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_max_epu64(a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epu64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_mask_max_epu64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epu64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epu64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_maskz_max_epu64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epu64(0b00000011, a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epi64(a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_min_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_min_epi64(a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_min_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_min_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_min_epi64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(3, 2); + let r = _mm_min_epi64(a, b); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(1, 0); + let r = _mm_min_epi64(a, b); + let e = _mm_set_epi64x(1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epi64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(3, 2); + let r = _mm_mask_min_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epi64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(3, 2); + let r = _mm_maskz_min_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_min_pd(a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 3., 2., 1., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_min_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_min_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_min_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_min_pd(0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_mask_min_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_min_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(0., 1., 1., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_maskz_min_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_min_pd(0b00001111, a, b); + let e = _mm256_set_pd(0., 1., 1., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_pd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(1., 0.); + let r = _mm_mask_min_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_min_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_pd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(1., 0.); + let r = _mm_maskz_min_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_min_pd(0b00000011, a, b); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epu64(a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epu64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epu64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epu64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epu64(0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_min_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_min_epu64(a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_min_epu64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epu64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_min_epu64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epu64(0b00001111, a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_min_epu64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(1, 0); + let r = _mm_min_epu64(a, b); + let e = _mm_set_epi64x(0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epu64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(1, 0); + let r = _mm_mask_min_epu64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epu64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epu64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(1, 0); + let r = _mm_maskz_min_epu64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epu64(0b00000011, a, b); + let e = _mm_set_epi64x(0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sqrt_pd() { + let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm512_sqrt_pd(a); + let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sqrt_pd() { + let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm512_mask_sqrt_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_sqrt_pd(a, 0b00001111, a); + let e = _mm512_setr_pd(0., 1., 2., 3., 16., 25., 36., 49.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sqrt_pd() { + let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm512_maskz_sqrt_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sqrt_pd(0b00001111, a); + let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sqrt_pd() { + let a = _mm256_set_pd(0., 1., 4., 9.); + let r = _mm256_mask_sqrt_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_sqrt_pd(a, 0b00001111, a); + let e = _mm256_set_pd(0., 1., 2., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sqrt_pd() { + let a = _mm256_set_pd(0., 1., 4., 9.); + let r = _mm256_maskz_sqrt_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_sqrt_pd(0b00001111, a); + let e = _mm256_set_pd(0., 1., 2., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sqrt_pd() { + let a = _mm_set_pd(0., 1.); + let r = _mm_mask_sqrt_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_sqrt_pd(a, 0b00000011, a); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sqrt_pd() { + let a = _mm_set_pd(0., 1.); + let r = _mm_maskz_sqrt_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_sqrt_pd(0b00000011, a); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmadd_pd() { + let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let r = _mm512_fmadd_pd(a, b, c); + let e = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmadd_pd() { + let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let r = _mm512_mask_fmadd_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmadd_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(1., 2., 3., 4., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmadd_pd() { + let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let r = _mm512_maskz_fmadd_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmadd_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(1., 2., 3., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmadd_pd() { + let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fmadd_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmadd_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(1., 2., 3., 4., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fmadd_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fmadd_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fmadd_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fmadd_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fmadd_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fmadd_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fmadd_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmadd_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fmadd_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fmadd_pd(0b00000011, a, b, c); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fmadd_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmadd_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fmsub_pd(a, b, c); + let e = _mm512_setr_pd(-1., 0., 1., 2., 3., 4., 5., 6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fmsub_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmsub_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(-1., 0., 1., 2., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fmsub_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmsub_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(-1., 0., 1., 2., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fmsub_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmsub_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(-1., 0., 1., 2., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fmsub_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fmsub_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(-1., 0., 1., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fmsub_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fmsub_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(-1., 0., 1., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fmsub_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fmsub_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(-1., 0., 1., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fmsub_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmsub_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(-1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fmsub_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fmsub_pd(0b00000011, a, b, c); + let e = _mm_set_pd(-1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fmsub_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmsub_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(-1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmaddsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fmaddsub_pd(a, b, c); + let e = _mm512_setr_pd(-1., 2., 1., 4., 3., 6., 5., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmaddsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fmaddsub_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmaddsub_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(-1., 2., 1., 4., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmaddsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fmaddsub_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmaddsub_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(-1., 2., 1., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmaddsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fmaddsub_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmaddsub_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(-1., 2., 1., 4., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmaddsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fmaddsub_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fmaddsub_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(1., 0., 3., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmaddsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fmaddsub_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fmaddsub_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(1., 0., 3., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmaddsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fmaddsub_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fmaddsub_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(1., 0., 3., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmaddsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fmaddsub_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmaddsub_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmaddsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fmaddsub_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fmaddsub_pd(0b00000011, a, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmaddsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fmaddsub_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmaddsub_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsubadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fmsubadd_pd(a, b, c); + let e = _mm512_setr_pd(1., 0., 3., 2., 5., 4., 7., 6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsubadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fmsubadd_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmsubadd_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(1., 0., 3., 2., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsubadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fmsubadd_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmsubadd_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(1., 0., 3., 2., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsubadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fmsubadd_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmsubadd_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(1., 0., 3., 2., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmsubadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fmsubadd_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fmsubadd_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(-1., 2., 1., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmsubadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fmsubadd_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fmsubadd_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(-1., 2., 1., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmsubadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fmsubadd_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fmsubadd_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(-1., 2., 1., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmsubadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fmsubadd_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmsubadd_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(-1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmsubadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fmsubadd_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fmsubadd_pd(0b00000011, a, b, c); + let e = _mm_set_pd(-1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmsubadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fmsubadd_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmsubadd_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(-1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fnmadd_pd(a, b, c); + let e = _mm512_setr_pd(1., 0., -1., -2., -3., -4., -5., -6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fnmadd_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fnmadd_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(1., 0., -1., -2., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fnmadd_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fnmadd_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(1., 0., -1., -2., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fnmadd_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fnmadd_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(1., 0., -1., -2., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fnmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fnmadd_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fnmadd_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(1., 0., -1., -2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fnmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fnmadd_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fnmadd_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(1., 0., -1., -2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fnmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fnmadd_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fnmadd_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(1., 0., -1., -2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fnmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fnmadd_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fnmadd_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fnmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fnmadd_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fnmadd_pd(0b00000011, a, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fnmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fnmadd_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fnmadd_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fnmsub_pd(a, b, c); + let e = _mm512_setr_pd(-1., -2., -3., -4., -5., -6., -7., -8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fnmsub_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fnmsub_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(-1., -2., -3., -4., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fnmsub_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fnmsub_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(-1., -2., -3., -4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fnmsub_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fnmsub_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(-1., -2., -3., -4., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fnmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fnmsub_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fnmsub_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(-1., -2., -3., -4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fnmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fnmsub_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fnmsub_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(-1., -2., -3., -4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fnmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fnmsub_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fnmsub_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(-1., -2., -3., -4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fnmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fnmsub_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fnmsub_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(-1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fnmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fnmsub_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fnmsub_pd(0b00000011, a, b, c); + let e = _mm_set_pd(-1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fnmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fnmsub_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fnmsub_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(-1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rcp14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_rcp14_pd(a); + let e = _mm512_set1_pd(0.3333320617675781); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rcp14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_mask_rcp14_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_rcp14_pd(a, 0b11110000, a); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 3., 3., 3., 3., + 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rcp14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_maskz_rcp14_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_rcp14_pd(0b11110000, a); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 0., 0., 0., + 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rcp14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_rcp14_pd(a); + let e = _mm256_set1_pd(0.3333320617675781); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rcp14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_mask_rcp14_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_rcp14_pd(a, 0b00001111, a); + let e = _mm256_set1_pd(0.3333320617675781); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rcp14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_maskz_rcp14_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_rcp14_pd(0b00001111, a); + let e = _mm256_set1_pd(0.3333320617675781); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rcp14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_rcp14_pd(a); + let e = _mm_set1_pd(0.3333320617675781); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rcp14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_mask_rcp14_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_rcp14_pd(a, 0b00000011, a); + let e = _mm_set1_pd(0.3333320617675781); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rcp14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_maskz_rcp14_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_rcp14_pd(0b00000011, a); + let e = _mm_set1_pd(0.3333320617675781); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rsqrt14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_rsqrt14_pd(a); + let e = _mm512_set1_pd(0.5773391723632813); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rsqrt14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_mask_rsqrt14_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_rsqrt14_pd(a, 0b11110000, a); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 3., 3., 3., 3., + 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rsqrt14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_maskz_rsqrt14_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_rsqrt14_pd(0b11110000, a); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 0., 0., 0., + 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rsqrt14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_rsqrt14_pd(a); + let e = _mm256_set1_pd(0.5773391723632813); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rsqrt14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_mask_rsqrt14_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_rsqrt14_pd(a, 0b00001111, a); + let e = _mm256_set1_pd(0.5773391723632813); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rsqrt14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_maskz_rsqrt14_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_rsqrt14_pd(0b00001111, a); + let e = _mm256_set1_pd(0.5773391723632813); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rsqrt14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_rsqrt14_pd(a); + let e = _mm_set1_pd(0.5773391723632813); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rsqrt14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_mask_rsqrt14_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_rsqrt14_pd(a, 0b00000011, a); + let e = _mm_set1_pd(0.5773391723632813); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rsqrt14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_maskz_rsqrt14_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_rsqrt14_pd(0b00000011, a); + let e = _mm_set1_pd(0.5773391723632813); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getexp_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_getexp_pd(a); + let e = _mm512_set1_pd(1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getexp_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_mask_getexp_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_getexp_pd(a, 0b11110000, a); + let e = _mm512_setr_pd(3., 3., 3., 3., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getexp_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_maskz_getexp_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_getexp_pd(0b11110000, a); + let e = _mm512_setr_pd(0., 0., 0., 0., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_getexp_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_getexp_pd(a); + let e = _mm256_set1_pd(1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_getexp_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_mask_getexp_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_getexp_pd(a, 0b00001111, a); + let e = _mm256_set1_pd(1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_getexp_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_maskz_getexp_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_getexp_pd(0b00001111, a); + let e = _mm256_set1_pd(1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_getexp_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_getexp_pd(a); + let e = _mm_set1_pd(1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_getexp_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_mask_getexp_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_getexp_pd(a, 0b00000011, a); + let e = _mm_set1_pd(1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_getexp_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_maskz_getexp_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_getexp_pd(0b00000011, a); + let e = _mm_set1_pd(1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_roundscale_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_roundscale_pd::<0b00_00_00_00>(a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_roundscale_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a); + let e = _mm512_set1_pd(1.1); + assert_eq_m512d(r, e); + let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0b11111111, a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_roundscale_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0b11111111, a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_roundscale_pd() { + let a = _mm256_set1_pd(1.1); + let r = _mm256_roundscale_pd::<0b00_00_00_00>(a); + let e = _mm256_set1_pd(1.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_roundscale_pd() { + let a = _mm256_set1_pd(1.1); + let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00001111, a); + let e = _mm256_set1_pd(1.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_roundscale_pd() { + let a = _mm256_set1_pd(1.1); + let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0b00001111, a); + let e = _mm256_set1_pd(1.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_roundscale_pd() { + let a = _mm_set1_pd(1.1); + let r = _mm_roundscale_pd::<0b00_00_00_00>(a); + let e = _mm_set1_pd(1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_roundscale_pd() { + let a = _mm_set1_pd(1.1); + let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a); + let e = _mm_set1_pd(1.1); + assert_eq_m128d(r, e); + let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00000011, a); + let e = _mm_set1_pd(1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_roundscale_pd() { + let a = _mm_set1_pd(1.1); + let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0b00000011, a); + let e = _mm_set1_pd(1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_scalef_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_scalef_pd(a, b); + let e = _mm512_set1_pd(8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_scalef_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_mask_scalef_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_scalef_pd(a, 0b11110000, a, b); + let e = _mm512_set_pd(8., 8., 8., 8., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_scalef_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_maskz_scalef_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_scalef_pd(0b11110000, a, b); + let e = _mm512_set_pd(8., 8., 8., 8., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_scalef_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(3.); + let r = _mm256_scalef_pd(a, b); + let e = _mm256_set1_pd(8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_scalef_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(3.); + let r = _mm256_mask_scalef_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_scalef_pd(a, 0b00001111, a, b); + let e = _mm256_set1_pd(8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_scalef_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(3.); + let r = _mm256_maskz_scalef_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_scalef_pd(0b00001111, a, b); + let e = _mm256_set1_pd(8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_scalef_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_scalef_pd(a, b); + let e = _mm_set1_pd(8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_scalef_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_mask_scalef_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_scalef_pd(a, 0b00000011, a, b); + let e = _mm_set1_pd(8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_scalef_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_maskz_scalef_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_scalef_pd(0b00000011, a, b); + let e = _mm_set1_pd(8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fixupimm_pd() { + let a = _mm512_set1_pd(f64::NAN); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_fixupimm_pd::<5>(a, b, c); + let e = _mm512_set1_pd(0.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fixupimm_pd() { + let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_mask_fixupimm_pd::<5>(a, 0b11110000, b, c); + let e = _mm512_set_pd(0., 0., 0., 0., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fixupimm_pd() { + let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_maskz_fixupimm_pd::<5>(0b11110000, a, b, c); + let e = _mm512_set_pd(0., 0., 0., 0., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_fixupimm_pd() { + let a = _mm256_set1_pd(f64::NAN); + let b = _mm256_set1_pd(f64::MAX); + let c = _mm256_set1_epi64x(i32::MAX as i64); + let r = _mm256_fixupimm_pd::<5>(a, b, c); + let e = _mm256_set1_pd(0.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fixupimm_pd() { + let a = _mm256_set1_pd(f64::NAN); + let b = _mm256_set1_pd(f64::MAX); + let c = _mm256_set1_epi64x(i32::MAX as i64); + let r = _mm256_mask_fixupimm_pd::<5>(a, 0b00001111, b, c); + let e = _mm256_set1_pd(0.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fixupimm_pd() { + let a = _mm256_set1_pd(f64::NAN); + let b = _mm256_set1_pd(f64::MAX); + let c = _mm256_set1_epi64x(i32::MAX as i64); + let r = _mm256_maskz_fixupimm_pd::<5>(0b00001111, a, b, c); + let e = _mm256_set1_pd(0.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_fixupimm_pd() { + let a = _mm_set1_pd(f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_fixupimm_pd::<5>(a, b, c); + let e = _mm_set1_pd(0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fixupimm_pd() { + let a = _mm_set1_pd(f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_mask_fixupimm_pd::<5>(a, 0b00000011, b, c); + let e = _mm_set1_pd(0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fixupimm_pd() { + let a = _mm_set1_pd(f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_maskz_fixupimm_pd::<5>(0b00000011, a, b, c); + let e = _mm_set1_pd(0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_ternarylogic_epi64() { + let a = _mm512_set1_epi64(1 << 2); + let b = _mm512_set1_epi64(1 << 1); + let c = _mm512_set1_epi64(1 << 0); + let r = _mm512_ternarylogic_epi64::<8>(a, b, c); + let e = _mm512_set1_epi64(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_ternarylogic_epi64() { + let src = _mm512_set1_epi64(1 << 2); + let a = _mm512_set1_epi64(1 << 1); + let b = _mm512_set1_epi64(1 << 0); + let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0, a, b); + assert_eq_m512i(r, src); + let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0b11111111, a, b); + let e = _mm512_set1_epi64(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_ternarylogic_epi64() { + let a = _mm512_set1_epi64(1 << 2); + let b = _mm512_set1_epi64(1 << 1); + let c = _mm512_set1_epi64(1 << 0); + let r = _mm512_maskz_ternarylogic_epi64::<8>(0, a, b, c); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_ternarylogic_epi64::<8>(0b11111111, a, b, c); + let e = _mm512_set1_epi64(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_ternarylogic_epi64() { + let a = _mm256_set1_epi64x(1 << 2); + let b = _mm256_set1_epi64x(1 << 1); + let c = _mm256_set1_epi64x(1 << 0); + let r = _mm256_ternarylogic_epi64::<8>(a, b, c); + let e = _mm256_set1_epi64x(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_ternarylogic_epi64() { + let src = _mm256_set1_epi64x(1 << 2); + let a = _mm256_set1_epi64x(1 << 1); + let b = _mm256_set1_epi64x(1 << 0); + let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0, a, b); + assert_eq_m256i(r, src); + let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0b00001111, a, b); + let e = _mm256_set1_epi64x(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_ternarylogic_epi64() { + let a = _mm256_set1_epi64x(1 << 2); + let b = _mm256_set1_epi64x(1 << 1); + let c = _mm256_set1_epi64x(1 << 0); + let r = _mm256_maskz_ternarylogic_epi64::<9>(0, a, b, c); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_ternarylogic_epi64::<8>(0b00001111, a, b, c); + let e = _mm256_set1_epi64x(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_ternarylogic_epi64() { + let a = _mm_set1_epi64x(1 << 2); + let b = _mm_set1_epi64x(1 << 1); + let c = _mm_set1_epi64x(1 << 0); + let r = _mm_ternarylogic_epi64::<8>(a, b, c); + let e = _mm_set1_epi64x(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_ternarylogic_epi64() { + let src = _mm_set1_epi64x(1 << 2); + let a = _mm_set1_epi64x(1 << 1); + let b = _mm_set1_epi64x(1 << 0); + let r = _mm_mask_ternarylogic_epi64::<8>(src, 0, a, b); + assert_eq_m128i(r, src); + let r = _mm_mask_ternarylogic_epi64::<8>(src, 0b00000011, a, b); + let e = _mm_set1_epi64x(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_ternarylogic_epi64() { + let a = _mm_set1_epi64x(1 << 2); + let b = _mm_set1_epi64x(1 << 1); + let c = _mm_set1_epi64x(1 << 0); + let r = _mm_maskz_ternarylogic_epi64::<9>(0, a, b, c); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_ternarylogic_epi64::<8>(0b00000011, a, b, c); + let e = _mm_set1_epi64x(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getmant_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a); + let e = _mm512_set1_pd(1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getmant_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11110000, a); + let e = _mm512_setr_pd(10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getmant_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11110000, a); + let e = _mm512_setr_pd(0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_getmant_pd() { + let a = _mm256_set1_pd(10.); + let r = _mm256_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a); + let e = _mm256_set1_pd(1.25); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_getmant_pd() { + let a = _mm256_set1_pd(10.); + let r = _mm256_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a); + let e = _mm256_set1_pd(1.25); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_getmant_pd() { + let a = _mm256_set1_pd(10.); + let r = _mm256_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a); + let e = _mm256_set1_pd(1.25); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_getmant_pd() { + let a = _mm_set1_pd(10.); + let r = _mm_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a); + let e = _mm_set1_pd(1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_getmant_pd() { + let a = _mm_set1_pd(10.); + let r = _mm_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00000011, a); + let e = _mm_set1_pd(1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_getmant_pd() { + let a = _mm_set1_pd(10.); + let r = _mm_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00000011, a); + let e = _mm_set1_pd(1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtps_pd(a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm512_set1_pd(0.); + let r = _mm512_mask_cvtps_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtps_pd(src, 0b00001111, a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtps_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_cvtps_pd(0b00001111, a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpslo_pd() { + let v2 = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 100., 100., 100., 100., 100., 100., 100., 100., + ); + let r = _mm512_cvtpslo_pd(v2); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpslo_pd() { + let v2 = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 100., 100., 100., 100., 100., 100., 100., 100., + ); + let src = _mm512_set1_pd(0.); + let r = _mm512_mask_cvtpslo_pd(src, 0, v2); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtpslo_pd(src, 0b00001111, v2); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtpd_ps(a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_ps(0.); + let r = _mm512_mask_cvtpd_ps(src, 0, a); + assert_eq_m256(r, src); + let r = _mm512_mask_cvtpd_ps(src, 0b00001111, a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtpd_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm512_maskz_cvtpd_ps(0b00001111, a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_ps() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let src = _mm_set1_ps(0.); + let r = _mm256_mask_cvtpd_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm256_mask_cvtpd_ps(src, 0b00001111, a); + let e = _mm_set_ps(4., -5.5, 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_ps() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let r = _mm256_maskz_cvtpd_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm256_maskz_cvtpd_ps(0b00001111, a); + let e = _mm_set_ps(4., -5.5, 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtpd_ps() { + let a = _mm_set_pd(6., -7.5); + let src = _mm_set1_ps(0.); + let r = _mm_mask_cvtpd_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_cvtpd_ps(src, 0b00000011, a); + let e = _mm_set_ps(0., 0., 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_ps() { + let a = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvtpd_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_cvtpd_ps(0b00000011, a); + let e = _mm_set_ps(0., 0., 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtpd_epi32(a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtpd_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtpd_epi32(src, 0b11111111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtpd_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtpd_epi32(0b11111111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_epi32() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtpd_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, -6, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_epi32() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let r = _mm256_maskz_cvtpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtpd_epi32(0b00001111, a); + let e = _mm_set_epi32(4, -6, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtpd_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvtpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtpd_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_epu32() { + let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5); + let r = _mm512_cvtpd_epu32(a); + let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_epu32() { + let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtpd_epu32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtpd_epu32(src, 0b11111111, a); + let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtpd_epu32() { + let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5); + let r = _mm512_maskz_cvtpd_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtpd_epu32(0b11111111, a); + let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_cvtpd_epu32(a); + let e = _mm_set_epi32(4, 6, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtpd_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 6, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_maskz_cvtpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtpd_epu32(0b00001111, a); + let e = _mm_set_epi32(4, 6, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_cvtpd_epu32(a); + let e = _mm_set_epi32(0, 0, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtpd_epu32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_maskz_cvtpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtpd_epu32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_pslo() { + let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtpd_pslo(v2); + let e = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_pslo() { + let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm512_set1_ps(0.); + let r = _mm512_mask_cvtpd_pslo(src, 0, v2); + assert_eq_m512(r, src); + let r = _mm512_mask_cvtpd_pslo(src, 0b00001111, v2); + let e = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi8_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepi8_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi8_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi8_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepi8_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepi8_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi8_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepi8_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi8_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepi8_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi8_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepi8_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi8_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu8_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepu8_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepu8_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu8_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepu8_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepu8_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu8_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu8_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu8_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepu8_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu8_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu8_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu8_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi16_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepi16_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi16_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi16_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepi16_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepi16_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi16_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepi16_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi16_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepi16_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi16_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepi16_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi16_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu16_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepu16_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepu16_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu16_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepu16_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepu16_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu16_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu16_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu16_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepu16_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu16_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu16_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu16_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi32_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepi32_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi32_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi32_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepi32_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_epi64() { + let a = _mm_set_epi32(8, 9, 10, 11); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepi32_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi32_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(8, 9, 10, 11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi32_epi64() { + let a = _mm_set_epi32(8, 9, 10, 11); + let r = _mm256_maskz_cvtepi32_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi32_epi64(0b00001111, a); + let e = _mm256_set_epi64x(8, 9, 10, 11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_epi64() { + let a = _mm_set_epi32(8, 9, 10, 11); + let src = _mm_set1_epi64x(0); + let r = _mm_mask_cvtepi32_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi32_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(10, 11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi32_epi64() { + let a = _mm_set_epi32(8, 9, 10, 11); + let r = _mm_maskz_cvtepi32_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi32_epi64(0b00000011, a); + let e = _mm_set_epi64x(10, 11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu32_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepu32_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepu32_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu32_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepu32_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepu32_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu32_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_maskz_cvtepu32_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu32_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepu32_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu32_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_maskz_cvtepu32_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu32_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi32_pd(a); + let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_pd(-1.); + let r = _mm512_mask_cvtepi32_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtepi32_pd(src, 0b00001111, a); + let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi32_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_cvtepi32_pd(0b00001111, a); + let e = _mm512_set_pd(0., 0., 0., 0., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm256_set1_pd(-1.); + let r = _mm256_mask_cvtepi32_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_cvtepi32_pd(src, 0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_maskz_cvtepi32_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_cvtepi32_pd(0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm_set1_pd(-1.); + let r = _mm_mask_cvtepi32_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_cvtepi32_pd(src, 0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_maskz_cvtepi32_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_cvtepi32_pd(0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu32_pd(a); + let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_pd(-1.); + let r = _mm512_mask_cvtepu32_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtepu32_pd(src, 0b00001111, a); + let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu32_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_cvtepu32_pd(0b00001111, a); + let e = _mm512_set_pd(0., 0., 0., 0., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_cvtepu32_pd(a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm256_set1_pd(-1.); + let r = _mm256_mask_cvtepu32_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_cvtepu32_pd(src, 0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_maskz_cvtepu32_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_cvtepu32_pd(0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_cvtepu32_pd(a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm_set1_pd(-1.); + let r = _mm_mask_cvtepu32_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_cvtepu32_pd(src, 0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_maskz_cvtepu32_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_cvtepu32_pd(0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi32lo_pd() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi32lo_pd(a); + let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32lo_pd() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_pd(-1.); + let r = _mm512_mask_cvtepi32lo_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtepi32lo_pd(src, 0b00001111, a); + let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu32lo_pd() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu32lo_pd(a); + let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu32lo_pd() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_pd(-1.); + let r = _mm512_mask_cvtepu32lo_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtepu32lo_pd(src, 0b00001111, a); + let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi64_epi32() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi64_epi32(a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_epi32() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi32(-1); + let r = _mm512_mask_cvtepi64_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtepi64_epi32(src, 0b00001111, a); + let e = _mm256_set_epi32(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi64_epi32() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi64_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtepi64_epi32(0b00001111, a); + let e = _mm256_set_epi32(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepi64_epi32() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_cvtepi64_epi32(a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_epi32() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtepi64_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi64_epi32() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_maskz_cvtepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtepi64_epi32(0b00001111, a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepi64_epi32() { + let a = _mm_set_epi64x(3, 4); + let r = _mm_cvtepi64_epi32(a); + let e = _mm_set_epi32(0, 0, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_epi32() { + let a = _mm_set_epi64x(3, 4); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi64_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi64_epi32() { + let a = _mm_set_epi64x(3, 4); + let r = _mm_maskz_cvtepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi64_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi64_epi16() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi64_epi16(a); + let e = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_epi16() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi16(-1); + let r = _mm512_mask_cvtepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi64_epi16() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepi64_epi16() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let r = _mm256_cvtepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_epi16() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtepi64_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi64_epi16() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let r = _mm256_maskz_cvtepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtepi64_epi16(0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepi64_epi16() { + let a = _mm_set_epi64x(14, 15); + let r = _mm_cvtepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_epi16() { + let a = _mm_set_epi64x(14, 15); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi64_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi64_epi16() { + let a = _mm_set_epi64x(14, 15); + let r = _mm_maskz_cvtepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi64_epi16(0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi64_epi8() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_epi8() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_mask_cvtepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi64_epi8() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepi64_epi8() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let r = _mm256_cvtepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_epi8() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi64_epi8() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let r = _mm256_maskz_cvtepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepi64_epi8() { + let a = _mm_set_epi64x(14, 15); + let r = _mm_cvtepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_epi8() { + let a = _mm_set_epi64x(14, 15); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi64_epi8(src, 0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi64_epi8() { + let a = _mm_set_epi64x(14, 15); + let r = _mm_maskz_cvtepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi64_epi8(0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_cvtsepi64_epi32(a); + let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, i32::MIN, i32::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let src = _mm256_set1_epi32(-1); + let r = _mm512_mask_cvtsepi64_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtsepi64_epi32(src, 0b00001111, a); + let e = _mm256_set_epi32(-1, -1, -1, -1, 4, 5, i32::MIN, i32::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtsepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_maskz_cvtsepi64_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtsepi64_epi32(0b00001111, a); + let e = _mm256_set_epi32(0, 0, 0, 0, 4, 5, i32::MIN, i32::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_cvtsepi64_epi32(a); + let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi32(-1); + let r = _mm256_mask_cvtsepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi64_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_maskz_cvtsepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi64_epi32(0b00001111, a); + let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi64_epi32() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_cvtsepi64_epi32(a); + let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_epi32() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtsepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi64_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi64_epi32() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_maskz_cvtsepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi64_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_cvtsepi64_epi16(a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi16(-1); + let r = _mm512_mask_cvtsepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtsepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(-1, -1, -1, -1, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtsepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_maskz_cvtsepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtsepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_cvtsepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtsepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_maskz_cvtsepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi64_epi16() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_cvtsepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_epi16() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtsepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi64_epi16(src, 0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi64_epi16() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_maskz_cvtsepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi64_epi16(0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_cvtsepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_mask_cvtsepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtsepi64_epi8(src, 0b00001111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + -1, -1, -1, -1, + 4, 5, i8::MIN, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtsepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_maskz_cvtsepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtsepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_cvtsepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtsepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_maskz_cvtsepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi64_epi8() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_cvtsepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_epi8() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtsepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi64_epi8(src, 0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi64_epi8() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_maskz_cvtsepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi64_epi8(0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtusepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_cvtusepi64_epi32(a); + let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let src = _mm256_set1_epi32(-1); + let r = _mm512_mask_cvtusepi64_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtusepi64_epi32(src, 0b00001111, a); + let e = _mm256_set_epi32(-1, -1, -1, -1, 4, 5, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtusepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_maskz_cvtusepi64_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtusepi64_epi32(0b00001111, a); + let e = _mm256_set_epi32(0, 0, 0, 0, 4, 5, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_cvtusepi64_epi32(a); + let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtusepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi64_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_maskz_cvtusepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi64_epi32(0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi64_epi32() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_cvtusepi64_epi32(a); + let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_epi32() { + let a = _mm_set_epi64x(6, i64::MAX); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtusepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi64_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi64_epi32() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_maskz_cvtusepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi64_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtusepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_cvtusepi64_epi16(a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let src = _mm_set1_epi16(-1); + let r = _mm512_mask_cvtusepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtusepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(-1, -1, -1, -1, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtusepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_maskz_cvtusepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtusepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_cvtusepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtusepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_maskz_cvtusepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi64_epi16() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_cvtusepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_epi16() { + let a = _mm_set_epi64x(6, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtusepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi64_epi16(src, 0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi64_epi16() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_maskz_cvtusepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi64_epi16(0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtusepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_cvtusepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_mask_cvtusepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtusepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtusepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_maskz_cvtusepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtusepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_cvtusepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtusepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_maskz_cvtusepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi64_epi8() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_cvtusepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_epi8() { + let a = _mm_set_epi64x(6, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtusepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi64_epi8(src, 0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi64_epi8() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_maskz_cvtusepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi64_epi8(0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvttpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvttpd_epi32(a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvttpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvttpd_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvttpd_epi32(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvttpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvttpd_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvttpd_epi32(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttpd_epi32() { + let a = _mm256_setr_pd(4., -5.5, 6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvttpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvttpd_epi32(src, 0b00001111, a); + let e = _mm_setr_epi32(4, -5, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttpd_epi32() { + let a = _mm256_setr_pd(4., -5.5, 6., -7.5); + let r = _mm256_maskz_cvttpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvttpd_epi32(0b00001111, a); + let e = _mm_setr_epi32(4, -5, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttpd_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvttpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttpd_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvttpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvttpd_epu32(a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvttpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvttpd_epu32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvttpd_epu32(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvttpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvttpd_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvttpd_epu32(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvttpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_cvttpd_epu32(a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvttpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvttpd_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_maskz_cvttpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvttpd_epu32(0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvttpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_cvttpd_epu32(a); + let e = _mm_set_epi32(0, 0, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttpd_epu32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_maskz_cvttpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttpd_epu32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(-1.); + let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(-1.); + let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_setr_pd(8., 9.5, 10., 11.5, 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(-1.); + let r = + _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_setr_pd(0., 0., 0., 0., 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(1.); + let r = _mm512_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + let r = _mm512_sub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(1.); + let r = _mm512_mask_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_setr_pd(8., 9.5, 10., 11.5, 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(1.); + let r = + _mm512_maskz_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_setr_pd(0., 0., 0., 0., 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.); + let b = _mm512_set1_pd(0.1); + let r = _mm512_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd( + 0.8, + 0.9500000000000001, + 1., + 1.1500000000000001, + 1.2000000000000002, + 1.35, + 1.4000000000000001, + 0., + ); + assert_eq_m512d(r, e); + let r = _mm512_mul_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(0.8, 0.95, 1.0, 1.15, 1.2, 1.3499999999999999, 1.4, 0.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.); + let b = _mm512_set1_pd(0.1); + let r = _mm512_mask_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_setr_pd( + 8., + 9.5, + 10., + 11.5, + 1.2000000000000002, + 1.35, + 1.4000000000000001, + 0., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.); + let b = _mm512_set1_pd(0.1); + let r = + _mm512_maskz_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_setr_pd( + 0., + 0., + 0., + 0., + 1.2000000000000002, + 1.35, + 1.4000000000000001, + 0., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_div_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pd(0.3333333333333333); + assert_eq_m512d(r, e); + let r = _mm512_div_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pd(0.3333333333333333); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_div_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_mask_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_setr_pd( + 1., + 1., + 1., + 1., + 0.3333333333333333, + 0.3333333333333333, + 0.3333333333333333, + 0.3333333333333333, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_div_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = + _mm512_maskz_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_setr_pd( + 0., + 0., + 0., + 0., + 0.3333333333333333, + 0.3333333333333333, + 0.3333333333333333, + 0.3333333333333333, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sqrt_round_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set1_pd(1.7320508075688772); + assert_eq_m512d(r, e); + let r = _mm512_sqrt_round_pd::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set1_pd(1.7320508075688774); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sqrt_round_pd() { + let a = _mm512_set1_pd(3.); + let r = + _mm512_mask_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, + ); + let e = _mm512_setr_pd( + 3., + 3., + 3., + 3., + 1.7320508075688772, + 1.7320508075688772, + 1.7320508075688772, + 1.7320508075688772, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sqrt_round_pd() { + let a = _mm512_set1_pd(3.); + let r = + _mm512_maskz_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, + ); + let e = _mm512_setr_pd( + 0., + 0., + 0., + 0., + 1.7320508075688772, + 1.7320508075688772, + 1.7320508075688772, + 1.7320508075688772, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(-1.); + assert_eq_m512d(r, e); + let r = _mm512_fmadd_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(-0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + -1., + -1., + -1., + -1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_maskz_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(-1., -1., -1., -1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask3_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(-1., -1., -1., -1., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(-1.); + assert_eq_m512d(r, e); + let r = _mm512_fmsub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(-0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + -1., + -1., + -1., + -1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(-1., -1., -1., -1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask3_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(-1., -1., -1., -1., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmaddsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = + _mm512_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_pd(1., -1., 1., -1., 1., -1., 1., -1.); + assert_eq_m512d(r, e); + let r = _mm512_fmaddsub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_pd( + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmaddsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + 1., + -1., + 1., + -1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmaddsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_maskz_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(1., -1., 1., -1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmaddsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask3_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(1., -1., 1., -1., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsubadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = + _mm512_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_pd(-1., 1., -1., 1., -1., 1., -1., 1.); + assert_eq_m512d(r, e); + let r = _mm512_fmsubadd_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_pd( + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsubadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + -1., + 1., + -1., + 1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsubadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_maskz_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(-1., 1., -1., 1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsubadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask3_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(-1., 1., -1., 1., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = + _mm512_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(1.); + assert_eq_m512d(r, e); + let r = _mm512_fnmadd_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + 1., + 1., + 1., + 1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(1., 1., 1., 1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask3_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = + _mm512_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(1.); + assert_eq_m512d(r, e); + let r = _mm512_fnmsub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + 1., + 1., + 1., + 1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_maskz_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(1., 1., 1., 1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask3_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(1., 1., 1., 1., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 3., 2., 1., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getexp_round_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm512_set1_pd(1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getexp_round_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_mask_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11110000, a); + let e = _mm512_setr_pd(3., 3., 3., 3., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getexp_round_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_maskz_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(0b11110000, a); + let e = _mm512_setr_pd(0., 0., 0., 0., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_roundscale_round_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_roundscale_round_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_mask_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a); + let e = _mm512_set1_pd(1.1); + assert_eq_m512d(r, e); + let r = _mm512_mask_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_roundscale_round_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_maskz_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_scalef_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pd(8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_scalef_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_mask_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_set_pd(8., 8., 8., 8., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_scalef_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_maskz_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_set_pd(8., 8., 8., 8., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fixupimm_round_pd() { + let a = _mm512_set1_pd(f64::NAN); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_fixupimm_round_pd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c); + let e = _mm512_set1_pd(0.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fixupimm_round_pd() { + let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_mask_fixupimm_round_pd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11110000, b, c); + let e = _mm512_set_pd(0., 0., 0., 0., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fixupimm_round_pd() { + let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_maskz_fixupimm_round_pd::<5, _MM_FROUND_CUR_DIRECTION>(0b11110000, a, b, c); + let e = _mm512_set_pd(0., 0., 0., 0., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getmant_round_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a); + let e = _mm512_set1_pd(1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getmant_round_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_mask_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0b11110000, a); + let e = _mm512_setr_pd(10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getmant_round_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_maskz_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0b11110000, a); + let e = _mm512_setr_pd(0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm512_set1_pd(0.); + let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_ps(0.); + let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); + assert_eq_m256(r, src); + let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setzero_pd() { + assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.)); + } + + unsafe fn test_mm512_set1_epi64() { + let r = _mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, _mm512_set1_epi64(2)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set1_pd() { + let expected = _mm512_set_pd(2., 2., 2., 2., 2., 2., 2., 2.); + assert_eq_m512d(expected, _mm512_set1_pd(2.)); + } + + unsafe fn test_mm512_set4_epi64() { + let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1); + assert_eq_m512i(r, _mm512_set4_epi64(4, 3, 2, 1)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set4_pd() { + let r = _mm512_set_pd(4., 3., 2., 1., 4., 3., 2., 1.); + assert_eq_m512d(r, _mm512_set4_pd(4., 3., 2., 1.)); + } + + unsafe fn test_mm512_setr4_epi64() { + let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1); + assert_eq_m512i(r, _mm512_setr4_epi64(1, 2, 3, 4)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr4_pd() { + let r = _mm512_set_pd(4., 3., 2., 1., 4., 3., 2., 1.); + assert_eq_m512d(r, _mm512_setr4_pd(1., 2., 3., 4.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let m = _mm512_cmplt_pd_mask(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01100110; + let r = _mm512_mask_cmplt_pd_mask(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpnlt_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + assert_eq!(_mm512_cmpnlt_pd_mask(a, b), !_mm512_cmplt_pd_mask(a, b)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpnlt_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01111010; + assert_eq!(_mm512_mask_cmpnlt_pd_mask(mask, a, b), 0b01111010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + assert_eq!(_mm512_cmple_pd_mask(a, b), 0b00100101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01111010; + assert_eq!(_mm512_mask_cmple_pd_mask(mask, a, b), 0b00100000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpnle_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let m = _mm512_cmpnle_pd_mask(b, a); + assert_eq!(m, 0b00001101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpnle_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01100110; + let r = _mm512_mask_cmpnle_pd_mask(mask, b, a); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.); + let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.); + let m = _mm512_cmpeq_pd_mask(b, a); + assert_eq!(m, 0b11001101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.); + let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.); + let mask = 0b01111010; + let r = _mm512_mask_cmpeq_pd_mask(mask, b, a); + assert_eq!(r, 0b01001000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpneq_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.); + let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.); + let m = _mm512_cmpneq_pd_mask(b, a); + assert_eq!(m, 0b00110010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.); + let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.); + let mask = 0b01111010; + let r = _mm512_mask_cmpneq_pd_mask(mask, b, a); + assert_eq!(r, 0b00110010) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let m = _mm512_cmp_pd_mask::<_CMP_LT_OQ>(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01100110; + let r = _mm512_mask_cmp_pd_mask::<_CMP_LT_OQ>(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmp_pd_mask() { + let a = _mm256_set_pd(0., 1., -1., 13.); + let b = _mm256_set1_pd(1.); + let m = _mm256_cmp_pd_mask::<_CMP_LT_OQ>(a, b); + assert_eq!(m, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmp_pd_mask() { + let a = _mm256_set_pd(0., 1., -1., 13.); + let b = _mm256_set1_pd(1.); + let mask = 0b11111111; + let r = _mm256_mask_cmp_pd_mask::<_CMP_LT_OQ>(mask, a, b); + assert_eq!(r, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmp_pd_mask() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set1_pd(1.); + let m = _mm_cmp_pd_mask::<_CMP_LT_OQ>(a, b); + assert_eq!(m, 0b00000010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmp_pd_mask() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set1_pd(1.); + let mask = 0b11111111; + let r = _mm_mask_cmp_pd_mask::<_CMP_LT_OQ>(mask, a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_round_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let m = _mm512_cmp_round_pd_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_round_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01100110; + let r = _mm512_mask_cmp_round_pd_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpord_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.); + #[rustfmt::skip] + let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.); + let m = _mm512_cmpord_pd_mask(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpord_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.); + #[rustfmt::skip] + let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.); + let mask = 0b11000011; + let m = _mm512_mask_cmpord_pd_mask(mask, a, b); + assert_eq!(m, 0b00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpunord_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.); + #[rustfmt::skip] + let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.); + let m = _mm512_cmpunord_pd_mask(a, b); + + assert_eq!(m, 0b11111010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpunord_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.); + #[rustfmt::skip] + let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.); + let mask = 0b00001111; + let m = _mm512_mask_cmpunord_pd_mask(mask, a, b); + assert_eq!(m, 0b000001010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmplt_epu64_mask(a, b); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + let r = _mm512_mask_cmplt_epu64_mask(mask, a, b); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmplt_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 100); + let b = _mm256_set1_epi64x(2); + let r = _mm256_cmplt_epu64_mask(a, b); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 100); + let b = _mm256_set1_epi64x(2); + let mask = 0b11111111; + let r = _mm256_mask_cmplt_epu64_mask(mask, a, b); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmplt_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(2); + let r = _mm_cmplt_epu64_mask(a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmplt_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(2); + let mask = 0b11111111; + let r = _mm_mask_cmplt_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpgt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmpgt_epu64_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpgt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + let r = _mm512_mask_cmpgt_epu64_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpgt_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set1_epi64x(1); + let r = _mm256_cmpgt_epu64_mask(a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmpgt_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpgt_epu64_mask() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set1_epi64x(1); + let r = _mm_cmpgt_epu64_mask(a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epu64_mask() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmpgt_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!( + _mm512_cmple_epu64_mask(a, b), + !_mm512_cmpgt_epu64_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + assert_eq!(_mm512_mask_cmple_epu64_mask(mask, a, b), 0b01111010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmple_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 1); + let b = _mm256_set1_epi64x(1); + let r = _mm256_cmple_epu64_mask(a, b); + assert_eq!(r, 0b00001101) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmple_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 1); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmple_epu64_mask(mask, a, b); + assert_eq!(r, 0b00001101) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmple_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let r = _mm_cmple_epu64_mask(a, b); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmple_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmple_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpge_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!( + _mm512_cmpge_epu64_mask(a, b), + !_mm512_cmplt_epu64_mask(a, b) + ); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpge_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b11111111; + let r = _mm512_mask_cmpge_epu64_mask(mask, a, b); + assert_eq!(r, 0b00110000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpge_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, u64::MAX as i64); + let b = _mm256_set1_epi64x(1); + let r = _mm256_cmpge_epu64_mask(a, b); + assert_eq!(r, 0b00000111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, u64::MAX as i64); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmpge_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpge_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let r = _mm_cmpge_epu64_mask(a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpge_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmpge_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpeq_epu64_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpeq_epu64_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpeq_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let m = _mm256_cmpeq_epu64_mask(b, a); + assert_eq!(m, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm256_mask_cmpeq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpeq_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(0, 1); + let m = _mm_cmpeq_epu64_mask(b, a); + assert_eq!(m, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(0, 1); + let mask = 0b11111111; + let r = _mm_mask_cmpeq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpneq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpneq_epu64_mask(b, a); + assert_eq!(m, !_mm512_cmpeq_epu64_mask(b, a)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, -100, 100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpneq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00110010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpneq_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let r = _mm256_cmpneq_epu64_mask(b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm256_mask_cmpneq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpneq_epu64_mask() { + let a = _mm_set_epi64x(-1, u64::MAX as i64); + let b = _mm_set_epi64x(13, 42); + let r = _mm_cmpneq_epu64_mask(b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epu64_mask() { + let a = _mm_set_epi64x(-1, u64::MAX as i64); + let b = _mm_set_epi64x(13, 42); + let mask = 0b11111111; + let r = _mm_mask_cmpneq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + let r = _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmp_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 100); + let b = _mm256_set1_epi64x(1); + let m = _mm256_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00001000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmp_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 100); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00001000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmp_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let m = _mm_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00000010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmp_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmplt_epi64_mask(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01100110; + let r = _mm512_mask_cmplt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmplt_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, -13); + let b = _mm256_set1_epi64x(-1); + let r = _mm256_cmplt_epi64_mask(a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, -13); + let b = _mm256_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmplt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmplt_epi64_mask() { + let a = _mm_set_epi64x(-1, -13); + let b = _mm_set1_epi64x(-1); + let r = _mm_cmplt_epi64_mask(a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmplt_epi64_mask() { + let a = _mm_set_epi64x(-1, -13); + let b = _mm_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm_mask_cmplt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpgt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmpgt_epi64_mask(b, a); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpgt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01100110; + let r = _mm512_mask_cmpgt_epi64_mask(mask, b, a); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpgt_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set1_epi64x(-1); + let r = _mm256_cmpgt_epi64_mask(a, b); + assert_eq!(r, 0b00001101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmpgt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00001101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpgt_epi64_mask() { + let a = _mm_set_epi64x(0, -1); + let b = _mm_set1_epi64x(-1); + let r = _mm_cmpgt_epi64_mask(a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epi64_mask() { + let a = _mm_set_epi64x(0, -1); + let b = _mm_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm_mask_cmpgt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!( + _mm512_cmple_epi64_mask(a, b), + !_mm512_cmpgt_epi64_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + assert_eq!(_mm512_mask_cmple_epi64_mask(mask, a, b), 0b00110000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmple_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, i64::MAX); + let b = _mm256_set1_epi64x(-1); + let r = _mm256_cmple_epi64_mask(a, b); + assert_eq!(r, 0b00000010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmple_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, i64::MAX); + let b = _mm256_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmple_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmple_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let r = _mm_cmple_epi64_mask(a, b); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmple_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmple_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpge_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!( + _mm512_cmpge_epi64_mask(a, b), + !_mm512_cmplt_epi64_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpge_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b11111111; + let r = _mm512_mask_cmpge_epi64_mask(mask, a, b); + assert_eq!(r, 0b11111010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpge_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, i64::MAX); + let b = _mm256_set1_epi64x(-1); + let r = _mm256_cmpge_epi64_mask(a, b); + assert_eq!(r, 0b00001111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, i64::MAX); + let b = _mm256_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmpge_epi64_mask(mask, a, b); + assert_eq!(r, 0b00001111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpge_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(-1); + let r = _mm_cmpge_epi64_mask(a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpge_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm_mask_cmpge_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpeq_epi64_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpeq_epi64_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpeq_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let m = _mm256_cmpeq_epi64_mask(b, a); + assert_eq!(m, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm256_mask_cmpeq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpeq_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(0, 1); + let m = _mm_cmpeq_epi64_mask(b, a); + assert_eq!(m, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(0, 1); + let mask = 0b11111111; + let r = _mm_mask_cmpeq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_epi64() { + let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0)) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr_epi64() { + let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0)) + } + + unsafe fn test_mm512_cmpneq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpneq_epi64_mask(b, a); + assert_eq!(m, !_mm512_cmpeq_epi64_mask(b, a)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, -100, 100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpneq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00110010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpneq_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let r = _mm256_cmpneq_epi64_mask(b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm256_mask_cmpneq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpneq_epi64_mask() { + let a = _mm_set_epi64x(-1, 13); + let b = _mm_set_epi64x(13, 42); + let r = _mm_cmpneq_epi64_mask(b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epi64_mask() { + let a = _mm_set_epi64x(-1, 13); + let b = _mm_set_epi64x(13, 42); + let mask = 0b11111111; + let r = _mm_mask_cmpneq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01100110; + let r = _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmp_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set1_epi64x(1); + let m = _mm256_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmp_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmp_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let m = _mm_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00000010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmp_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i32gather_pd::<8>(index, arr.as_ptr()); + assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + let src = _mm512_set1_pd(2.); + let mask = 0b10101010; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i32gather_pd::<8>(src, mask, index, arr.as_ptr()); + assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + // A multiplier of 8 is word-addressing + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i64gather_pd::<8>(index, arr.as_ptr()); + assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64gather_pd() { + let arr: [f64; 128] = core::array::from_fn(|i| i as f64); + let src = _mm512_set1_pd(2.); + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i64gather_pd::<8>(src, mask, index, arr.as_ptr()); + assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + // A multiplier of 4 is word-addressing + #[rustfmt::skip] + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i64gather_ps::<4>(index, arr.as_ptr()); + assert_eq_m256(r, _mm256_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64gather_ps() { + let arr: [f32; 128] = core::array::from_fn(|i| i as f32); + let src = _mm256_set1_ps(2.); + let mask = 0b10101010; + #[rustfmt::skip] + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 4 is word-addressing + let r = _mm512_mask_i64gather_ps::<4>(src, mask, index, arr.as_ptr()); + assert_eq_m256(r, _mm256_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32gather_epi64() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + // A multiplier of 8 is word-addressing + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i32gather_epi64::<8>(index, arr.as_ptr()); + assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32gather_epi64() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + let src = _mm512_set1_epi64(2); + let mask = 0b10101010; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i32gather_epi64::<8>(src, mask, index, arr.as_ptr()); + assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64gather_epi64() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + // A multiplier of 8 is word-addressing + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i64gather_epi64::<8>(index, arr.as_ptr()); + assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64gather_epi64() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + let src = _mm512_set1_epi64(2); + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i64gather_epi64::<8>(src, mask, index, arr.as_ptr()); + assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64gather_epi32() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + // A multiplier of 8 is word-addressing + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i64gather_epi32::<8>(index, arr.as_ptr() as *const i32); + assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64gather_epi32() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + let src = _mm256_set1_epi32(2); + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i64gather_epi32::<8>(src, mask, index, arr.as_ptr() as *const i32); + assert_eq_m256i(r, _mm256_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32scatter_pd() { + let mut arr = [0f64; 128]; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 8 is word-addressing + _mm512_i32scatter_pd::<8>(arr.as_mut_ptr(), index, src); + let mut expected = [0f64; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as f64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32scatter_pd() { + let mut arr = [0f64; 128]; + let mask = 0b10101010; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 8 is word-addressing + _mm512_mask_i32scatter_pd::<8>(arr.as_mut_ptr(), mask, index, src); + let mut expected = [0f64; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2. * (i + 1) as f64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64scatter_pd() { + let mut arr = [0f64; 128]; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 8 is word-addressing + _mm512_i64scatter_pd::<8>(arr.as_mut_ptr(), index, src); + let mut expected = [0f64; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as f64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64scatter_pd() { + let mut arr = [0f64; 128]; + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 8 is word-addressing + _mm512_mask_i64scatter_pd::<8>(arr.as_mut_ptr(), mask, index, src); + let mut expected = [0f64; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2. * (i + 1) as f64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64scatter_ps() { + let mut arr = [0f32; 128]; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 4 is word-addressing + _mm512_i64scatter_ps::<4>(arr.as_mut_ptr(), index, src); + let mut expected = [0f32; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as f32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64scatter_ps() { + let mut arr = [0f32; 128]; + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 4 is word-addressing + _mm512_mask_i64scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src); + let mut expected = [0f32; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2. * (i + 1) as f32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32scatter_epi64() { + let mut arr = [0i64; 128]; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 8 is word-addressing + _mm512_i32scatter_epi64::<8>(arr.as_mut_ptr(), index, src); + let mut expected = [0i64; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as i64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32scatter_epi64() { + let mut arr = [0i64; 128]; + let mask = 0b10101010; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 8 is word-addressing + _mm512_mask_i32scatter_epi64::<8>(arr.as_mut_ptr(), mask, index, src); + let mut expected = [0i64; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2 * (i + 1) as i64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64scatter_epi64() { + let mut arr = [0i64; 128]; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 8 is word-addressing + _mm512_i64scatter_epi64::<8>(arr.as_mut_ptr(), index, src); + let mut expected = [0i64; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as i64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64scatter_epi64() { + let mut arr = [0i64; 128]; + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 8 is word-addressing + _mm512_mask_i64scatter_epi64::<8>(arr.as_mut_ptr(), mask, index, src); + let mut expected = [0i64; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2 * (i + 1) as i64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64scatter_epi32() { + let mut arr = [0i32; 128]; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 4 is word-addressing + _mm512_i64scatter_epi32::<4>(arr.as_mut_ptr(), index, src); + let mut expected = [0i32; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as i32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64scatter_epi32() { + let mut arr = [0i32; 128]; + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 4 is word-addressing + _mm512_mask_i64scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src); + let mut expected = [0i32; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2 * (i + 1) as i32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32logather_epi64() { + let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_i32logather_epi64::<8>(vindex, base_addr.as_ptr()); + let expected = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1); + assert_eq_m512i(expected, r); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32logather_epi64() { + let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let src = _mm512_setr_epi64(9, 10, 11, 12, 13, 14, 15, 16); + let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_mask_i32logather_epi64::<8>(src, 0b01010101, vindex, base_addr.as_ptr()); + let expected = _mm512_setr_epi64(2, 10, 4, 12, 6, 14, 8, 16); + assert_eq_m512i(expected, r); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32logather_pd() { + let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; + let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_i32logather_pd::<8>(vindex, base_addr.as_ptr()); + let expected = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.); + assert_eq_m512d(expected, r); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32logather_pd() { + let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; + let src = _mm512_setr_pd(9., 10., 11., 12., 13., 14., 15., 16.); + let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_mask_i32logather_pd::<8>(src, 0b01010101, vindex, base_addr.as_ptr()); + let expected = _mm512_setr_pd(2., 10., 4., 12., 6., 14., 8., 16.); + assert_eq_m512d(expected, r); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32loscatter_epi64() { + let mut base_addr: [i64; 8] = [0; 8]; + let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1); + _mm512_i32loscatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1, 2, 3, 4, 5, 6, 7, 8]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32loscatter_epi64() { + let mut base_addr: [i64; 8] = [0; 8]; + let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1); + _mm512_mask_i32loscatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01010101, vindex, src); + let expected = [0, 2, 0, 4, 0, 6, 0, 8]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32loscatter_pd() { + let mut base_addr: [f64; 8] = [0.; 8]; + let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.); + _mm512_i32loscatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1., 2., 3., 4., 5., 6., 7., 8.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32loscatter_pd() { + let mut base_addr: [f64; 8] = [0.; 8]; + let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.); + _mm512_mask_i32loscatter_pd::<8>(base_addr.as_mut_ptr(), 0b01010101, vindex, src); + let expected = [0., 2., 0., 4., 0., 6., 0., 8.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mmask_i32gather_epi32() { + let base_addr: [i32; 4] = [1, 2, 3, 4]; + let src = _mm_setr_epi32(5, 6, 7, 8); + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let r = _mm_mmask_i32gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr()); + let expected = _mm_setr_epi32(2, 6, 4, 8); + assert_eq_m128i(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mmask_i32gather_epi64() { + let base_addr: [i64; 2] = [1, 2]; + let src = _mm_setr_epi64x(5, 6); + let vindex = _mm_setr_epi32(1, 0, -1, -1); + let r = _mm_mmask_i32gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr()); + let expected = _mm_setr_epi64x(2, 6); + assert_eq_m128i(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mmask_i32gather_pd() { + let base_addr: [f64; 2] = [1., 2.]; + let src = _mm_setr_pd(5., 6.); + let vindex = _mm_setr_epi32(1, 0, -1, -1); + let r = _mm_mmask_i32gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr()); + let expected = _mm_setr_pd(2., 6.); + assert_eq_m128d(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mmask_i32gather_ps() { + let base_addr: [f32; 4] = [1., 2., 3., 4.]; + let src = _mm_setr_ps(5., 6., 7., 8.); + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let r = _mm_mmask_i32gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr()); + let expected = _mm_setr_ps(2., 6., 4., 8.); + assert_eq_m128(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mmask_i64gather_epi32() { + let base_addr: [i32; 2] = [1, 2]; + let src = _mm_setr_epi32(5, 6, 7, 8); + let vindex = _mm_setr_epi64x(1, 0); + let r = _mm_mmask_i64gather_epi32::<4>(src, 0b01, vindex, base_addr.as_ptr()); + let expected = _mm_setr_epi32(2, 6, 0, 0); + assert_eq_m128i(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mmask_i64gather_epi64() { + let base_addr: [i64; 2] = [1, 2]; + let src = _mm_setr_epi64x(5, 6); + let vindex = _mm_setr_epi64x(1, 0); + let r = _mm_mmask_i64gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr()); + let expected = _mm_setr_epi64x(2, 6); + assert_eq_m128i(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mmask_i64gather_pd() { + let base_addr: [f64; 2] = [1., 2.]; + let src = _mm_setr_pd(5., 6.); + let vindex = _mm_setr_epi64x(1, 0); + let r = _mm_mmask_i64gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr()); + let expected = _mm_setr_pd(2., 6.); + assert_eq_m128d(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mmask_i64gather_ps() { + let base_addr: [f32; 2] = [1., 2.]; + let src = _mm_setr_ps(5., 6., 7., 8.); + let vindex = _mm_setr_epi64x(1, 0); + let r = _mm_mmask_i64gather_ps::<4>(src, 0b01, vindex, base_addr.as_ptr()); + let expected = _mm_setr_ps(2., 6., 0., 0.); + assert_eq_m128(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mmask_i32gather_epi32() { + let base_addr: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let src = _mm256_setr_epi32(9, 10, 11, 12, 13, 14, 15, 16); + let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); + let r = _mm256_mmask_i32gather_epi32::<4>(src, 0b01010101, vindex, base_addr.as_ptr()); + let expected = _mm256_setr_epi32(2, 10, 4, 12, 6, 14, 8, 16); + assert_eq_m256i(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mmask_i32gather_epi64() { + let base_addr: [i64; 4] = [1, 2, 3, 4]; + let src = _mm256_setr_epi64x(9, 10, 11, 12); + let vindex = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm256_mmask_i32gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr()); + let expected = _mm256_setr_epi64x(2, 10, 4, 12); + assert_eq_m256i(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mmask_i32gather_pd() { + let base_addr: [f64; 4] = [1., 2., 3., 4.]; + let src = _mm256_setr_pd(9., 10., 11., 12.); + let vindex = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm256_mmask_i32gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr()); + let expected = _mm256_setr_pd(2., 10., 4., 12.); + assert_eq_m256d(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mmask_i32gather_ps() { + let base_addr: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; + let src = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); + let r = _mm256_mmask_i32gather_ps::<4>(src, 0b01010101, vindex, base_addr.as_ptr()); + let expected = _mm256_setr_ps(2., 10., 4., 12., 6., 14., 8., 16.); + assert_eq_m256(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mmask_i64gather_epi32() { + let base_addr: [i32; 4] = [1, 2, 3, 4]; + let src = _mm_setr_epi32(9, 10, 11, 12); + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let r = _mm256_mmask_i64gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr()); + let expected = _mm_setr_epi32(2, 10, 4, 12); + assert_eq_m128i(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mmask_i64gather_epi64() { + let base_addr: [i64; 4] = [1, 2, 3, 4]; + let src = _mm256_setr_epi64x(9, 10, 11, 12); + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let r = _mm256_mmask_i64gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr()); + let expected = _mm256_setr_epi64x(2, 10, 4, 12); + assert_eq_m256i(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mmask_i64gather_pd() { + let base_addr: [f64; 4] = [1., 2., 3., 4.]; + let src = _mm256_setr_pd(9., 10., 11., 12.); + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let r = _mm256_mmask_i64gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr()); + let expected = _mm256_setr_pd(2., 10., 4., 12.); + assert_eq_m256d(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mmask_i64gather_ps() { + let base_addr: [f32; 4] = [1., 2., 3., 4.]; + let src = _mm_setr_ps(9., 10., 11., 12.); + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let r = _mm256_mmask_i64gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr()); + let expected = _mm_setr_ps(2., 10., 4., 12.); + assert_eq_m128(expected, r); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_i32scatter_epi32() { + let mut base_addr: [i32; 4] = [0; 4]; + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let src = _mm_setr_epi32(2, 3, 4, 1); + _mm_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1, 2, 3, 4]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_i32scatter_epi32() { + let mut base_addr: [i32; 4] = [0; 4]; + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let src = _mm_setr_epi32(2, 3, 4, 1); + _mm_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src); + let expected = [0, 2, 0, 4]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_i32scatter_epi64() { + let mut base_addr: [i64; 2] = [0; 2]; + let vindex = _mm_setr_epi32(1, 0, -1, -1); + let src = _mm_setr_epi64x(2, 1); + _mm_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1, 2]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_i32scatter_epi64() { + let mut base_addr: [i64; 2] = [0; 2]; + let vindex = _mm_setr_epi32(1, 0, -1, -1); + let src = _mm_setr_epi64x(2, 1); + _mm_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src); + let expected = [0, 2]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_i32scatter_pd() { + let mut base_addr: [f64; 2] = [0.; 2]; + let vindex = _mm_setr_epi32(1, 0, -1, -1); + let src = _mm_setr_pd(2., 1.); + _mm_i32scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1., 2.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_i32scatter_pd() { + let mut base_addr: [f64; 2] = [0.; 2]; + let vindex = _mm_setr_epi32(1, 0, -1, -1); + let src = _mm_setr_pd(2., 1.); + _mm_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src); + let expected = [0., 2.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_i32scatter_ps() { + let mut base_addr: [f32; 4] = [0.; 4]; + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let src = _mm_setr_ps(2., 3., 4., 1.); + _mm_i32scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1., 2., 3., 4.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_i32scatter_ps() { + let mut base_addr: [f32; 4] = [0.; 4]; + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let src = _mm_setr_ps(2., 3., 4., 1.); + _mm_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src); + let expected = [0., 2., 0., 4.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_i64scatter_epi32() { + let mut base_addr: [i32; 2] = [0; 2]; + let vindex = _mm_setr_epi64x(1, 0); + let src = _mm_setr_epi32(2, 1, -1, -1); + _mm_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1, 2]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_i64scatter_epi32() { + let mut base_addr: [i32; 2] = [0; 2]; + let vindex = _mm_setr_epi64x(1, 0); + let src = _mm_setr_epi32(2, 1, -1, -1); + _mm_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b01, vindex, src); + let expected = [0, 2]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_i64scatter_epi64() { + let mut base_addr: [i64; 2] = [0; 2]; + let vindex = _mm_setr_epi64x(1, 0); + let src = _mm_setr_epi64x(2, 1); + _mm_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1, 2]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_i64scatter_epi64() { + let mut base_addr: [i64; 2] = [0; 2]; + let vindex = _mm_setr_epi64x(1, 0); + let src = _mm_setr_epi64x(2, 1); + _mm_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src); + let expected = [0, 2]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_i64scatter_pd() { + let mut base_addr: [f64; 2] = [0.; 2]; + let vindex = _mm_setr_epi64x(1, 0); + let src = _mm_setr_pd(2., 1.); + _mm_i64scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1., 2.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_i64scatter_pd() { + let mut base_addr: [f64; 2] = [0.; 2]; + let vindex = _mm_setr_epi64x(1, 0); + let src = _mm_setr_pd(2., 1.); + _mm_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src); + let expected = [0., 2.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_i64scatter_ps() { + let mut base_addr: [f32; 2] = [0.; 2]; + let vindex = _mm_setr_epi64x(1, 0); + let src = _mm_setr_ps(2., 1., -1., -1.); + _mm_i64scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1., 2.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_i64scatter_ps() { + let mut base_addr: [f32; 2] = [0.; 2]; + let vindex = _mm_setr_epi64x(1, 0); + let src = _mm_setr_ps(2., 1., -1., -1.); + _mm_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr(), 0b01, vindex, src); + let expected = [0., 2.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_i32scatter_epi32() { + let mut base_addr: [i32; 8] = [0; 8]; + let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); + let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1); + _mm256_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1, 2, 3, 4, 5, 6, 7, 8]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_i32scatter_epi32() { + let mut base_addr: [i32; 8] = [0; 8]; + let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); + let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1); + _mm256_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b01010101, vindex, src); + let expected = [0, 2, 0, 4, 0, 6, 0, 8]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_i32scatter_epi64() { + let mut base_addr: [i64; 4] = [0; 4]; + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let src = _mm256_setr_epi64x(2, 3, 4, 1); + _mm256_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1, 2, 3, 4]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_i32scatter_epi64() { + let mut base_addr: [i64; 4] = [0; 4]; + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let src = _mm256_setr_epi64x(2, 3, 4, 1); + _mm256_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src); + let expected = [0, 2, 0, 4]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_i32scatter_pd() { + let mut base_addr: [f64; 4] = [0.; 4]; + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let src = _mm256_setr_pd(2., 3., 4., 1.); + _mm256_i32scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1., 2., 3., 4.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_i32scatter_pd() { + let mut base_addr: [f64; 4] = [0.; 4]; + let vindex = _mm_setr_epi32(1, 2, 3, 0); + let src = _mm256_setr_pd(2., 3., 4., 1.); + _mm256_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src); + let expected = [0., 2., 0., 4.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_i32scatter_ps() { + let mut base_addr: [f32; 8] = [0.; 8]; + let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); + let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.); + _mm256_i32scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1., 2., 3., 4., 5., 6., 7., 8.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_i32scatter_ps() { + let mut base_addr: [f32; 8] = [0.; 8]; + let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); + let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.); + _mm256_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr(), 0b01010101, vindex, src); + let expected = [0., 2., 0., 4., 0., 6., 0., 8.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_i64scatter_epi32() { + let mut base_addr: [i32; 4] = [0; 4]; + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let src = _mm_setr_epi32(2, 3, 4, 1); + _mm256_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1, 2, 3, 4]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_i64scatter_epi32() { + let mut base_addr: [i32; 4] = [0; 4]; + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let src = _mm_setr_epi32(2, 3, 4, 1); + _mm256_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src); + let expected = [0, 2, 0, 4]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_i64scatter_epi64() { + let mut base_addr: [i64; 4] = [0; 4]; + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let src = _mm256_setr_epi64x(2, 3, 4, 1); + _mm256_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1, 2, 3, 4]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_i64scatter_epi64() { + let mut base_addr: [i64; 4] = [0; 4]; + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let src = _mm256_setr_epi64x(2, 3, 4, 1); + _mm256_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src); + let expected = [0, 2, 0, 4]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_i64scatter_pd() { + let mut base_addr: [f64; 4] = [0.; 4]; + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let src = _mm256_setr_pd(2., 3., 4., 1.); + _mm256_i64scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1., 2., 3., 4.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_i64scatter_pd() { + let mut base_addr: [f64; 4] = [0.; 4]; + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let src = _mm256_setr_pd(2., 3., 4., 1.); + _mm256_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src); + let expected = [0., 2., 0., 4.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_i64scatter_ps() { + let mut base_addr: [f32; 4] = [0.; 4]; + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let src = _mm_setr_ps(2., 3., 4., 1.); + _mm256_i64scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src); + let expected = [1., 2., 3., 4.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_i64scatter_ps() { + let mut base_addr: [f32; 4] = [0.; 4]; + let vindex = _mm256_setr_epi64x(1, 2, 3, 0); + let src = _mm_setr_ps(2., 3., 4., 1.); + _mm256_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src); + let expected = [0., 2., 0., 4.]; + assert_eq!(expected, base_addr); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rol_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_rol_epi64::<1>(a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rol_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_mask_rol_epi64::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_rol_epi64::<1>(a, 0b11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rol_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 63, + ); + let r = _mm512_maskz_rol_epi64::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_rol_epi64::<1>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rol_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_rol_epi64::<1>(a); + let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rol_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_mask_rol_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_rol_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rol_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_maskz_rol_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_rol_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rol_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_rol_epi64::<1>(a); + let e = _mm_set_epi64x(1 << 0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rol_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_mask_rol_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_rol_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(1 << 0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rol_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_maskz_rol_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_rol_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(1 << 0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_ror_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_ror_epi64::<1>(a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 63, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_ror_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_mask_ror_epi64::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_ror_epi64::<1>(a, 0b11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 63, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_ror_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let r = _mm512_maskz_ror_epi64::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_ror_epi64::<1>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 1 << 63); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_ror_epi64() { + let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_ror_epi64::<1>(a); + let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_ror_epi64() { + let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_mask_ror_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_ror_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_ror_epi64() { + let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_maskz_ror_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_ror_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_ror_epi64() { + let a = _mm_set_epi64x(1 << 0, 1 << 32); + let r = _mm_ror_epi64::<1>(a); + let e = _mm_set_epi64x(1 << 63, 1 << 31); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_ror_epi64() { + let a = _mm_set_epi64x(1 << 0, 1 << 32); + let r = _mm_mask_ror_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_ror_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(1 << 63, 1 << 31); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_ror_epi64() { + let a = _mm_set_epi64x(1 << 0, 1 << 32); + let r = _mm_maskz_ror_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_ror_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(1 << 63, 1 << 31); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_slli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_slli_epi64::<1>(a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_slli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_mask_slli_epi64::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_slli_epi64::<1>(a, 0b11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_slli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 63, + ); + let r = _mm512_maskz_slli_epi64::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_slli_epi64::<1>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_slli_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_mask_slli_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_slli_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_slli_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_maskz_slli_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_slli_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_slli_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_mask_slli_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_slli_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_slli_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_maskz_slli_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_slli_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_srli_epi64::<1>(a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_mask_srli_epi64::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_srli_epi64::<1>(a, 0b11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let r = _mm512_maskz_srli_epi64::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srli_epi64::<1>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srli_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_mask_srli_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_srli_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srli_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_maskz_srli_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srli_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srli_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_mask_srli_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_srli_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srli_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_maskz_srli_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srli_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rolv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_rolv_epi64(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 34, 1 << 35, + 1 << 36, 1 << 37, 1 << 38, 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rolv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_rolv_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_rolv_epi64(a, 0b11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 34, 1 << 35, + 1 << 36, 1 << 37, 1 << 38, 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rolv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 62, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2); + let r = _mm512_maskz_rolv_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_rolv_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rolv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_rolv_epi64(a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 34, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rolv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_rolv_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_rolv_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 34, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rolv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_rolv_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_rolv_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 34, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rolv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 63); + let b = _mm_set_epi64x(0, 1); + let r = _mm_rolv_epi64(a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rolv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 63); + let b = _mm_set_epi64x(0, 1); + let r = _mm_mask_rolv_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_rolv_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rolv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 63); + let b = _mm_set_epi64x(0, 1); + let r = _mm_maskz_rolv_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_rolv_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rorv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_rorv_epi64(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 30, 1 << 29, + 1 << 28, 1 << 27, 1 << 26, 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rorv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_rorv_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_rorv_epi64(a, 0b11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 30, 1 << 29, + 1 << 28, 1 << 27, 1 << 26, 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rorv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2); + let r = _mm512_maskz_rorv_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_rorv_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 1 << 62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rorv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_rorv_epi64(a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 30, 1 << 29); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rorv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_rorv_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_rorv_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 30, 1 << 29); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rorv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_rorv_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_rorv_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 30, 1 << 29); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rorv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 0); + let b = _mm_set_epi64x(0, 1); + let r = _mm_rorv_epi64(a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 63); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rorv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 0); + let b = _mm_set_epi64x(0, 1); + let r = _mm_mask_rorv_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_rorv_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 63); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rorv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 0); + let b = _mm_set_epi64x(0, 1); + let r = _mm_maskz_rorv_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_rorv_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 63); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sllv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm512_set_epi64(0, 2, 2, 3, 4, 5, 6, 7); + let r = _mm512_sllv_epi64(a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 0, 1 << 34, 1 << 35, + 1 << 36, 1 << 37, 1 << 38, 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sllv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 63, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_sllv_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sllv_epi64(a, 0b11111111, a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 33, 0, 1 << 35, + 1 << 36, 1 << 37, 1 << 38, 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sllv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 63, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 1); + let r = _mm512_maskz_sllv_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sllv_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sllv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 32, 1 << 63, 1 << 32); + let count = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_sllv_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sllv_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 32, 1 << 33, 0, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sllv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 32, 1 << 63, 1 << 32); + let count = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_sllv_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sllv_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 32, 1 << 33, 0, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sllv_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let count = _mm_set_epi64x(2, 3); + let r = _mm_mask_sllv_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sllv_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(0, 1 << 35); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sllv_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let count = _mm_set_epi64x(2, 3); + let r = _mm_maskz_sllv_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sllv_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(0, 1 << 35); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srlv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_srlv_epi64(a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 0, 1 << 30, 1 << 29, + 1 << 28, 1 << 27, 1 << 26, 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srlv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_srlv_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srlv_epi64(a, 0b11111111, a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 0, 1 << 30, 1 << 29, + 1 << 28, 1 << 27, 1 << 26, 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srlv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_srlv_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srlv_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srlv_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_mask_srlv_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srlv_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srlv_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_maskz_srlv_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srlv_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srlv_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_mask_srlv_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srlv_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srlv_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_maskz_srlv_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srlv_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sll_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_sll_epi64(a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + let count = _mm_set_epi64x(1, 0); + let r = _mm512_sll_epi64(a, count); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sll_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_mask_sll_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sll_epi64(a, 0b11111111, a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sll_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 63, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_maskz_sll_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sll_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sll_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_mask_sll_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sll_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sll_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_maskz_sll_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sll_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sll_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let count = _mm_set_epi64x(0, 1); + let r = _mm_mask_sll_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sll_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sll_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let count = _mm_set_epi64x(0, 1); + let r = _mm_maskz_sll_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sll_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srl_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_srl_epi64(a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srl_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_mask_srl_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srl_epi64(a, 0b11111111, a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srl_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_maskz_srl_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srl_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srl_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_mask_srl_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srl_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srl_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_maskz_srl_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srl_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srl_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_mask_srl_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srl_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srl_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_maskz_srl_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srl_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_sra_epi64(a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_mask_sra_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sra_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_maskz_sra_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sra_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_sra_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_sra_epi64(a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sra_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_mask_sra_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sra_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sra_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_maskz_sra_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sra_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_sra_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_sra_epi64(a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sra_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_mask_sra_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sra_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sra_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_maskz_sra_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sra_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_srav_epi64(a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_mask_srav_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srav_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_maskz_srav_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srav_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_srav_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_srav_epi64(a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srav_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_mask_srav_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srav_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srav_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_maskz_srav_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srav_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_srav_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_srav_epi64(a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srav_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_mask_srav_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srav_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srav_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_maskz_srav_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srav_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_srai_epi64::<2>(a); + let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_mask_srai_epi64::<2>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_srai_epi64::<2>(a, 0b11111111, a); + let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_maskz_srai_epi64::<2>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srai_epi64::<2>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_srai_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_srai_epi64::<1>(a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srai_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_mask_srai_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_srai_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srai_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_maskz_srai_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srai_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_srai_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_srai_epi64::<1>(a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srai_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_mask_srai_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_srai_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srai_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_maskz_srai_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srai_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permute_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_permute_pd::<0b11_11_11_11>(a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permute_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_permute_pd::<0b11_11_11_11>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_permute_pd::<0b11_11_11_11>(a, 0b11111111, a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permute_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_permute_pd::<0b11_11_11_11>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permute_pd::<0b11_11_11_11>(0b11111111, a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permute_pd() { + let a = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_mask_permute_pd::<0b11_11>(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_permute_pd::<0b11_11>(a, 0b00001111, a); + let e = _mm256_set_pd(3., 3., 1., 1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permute_pd() { + let a = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_maskz_permute_pd::<0b11_11>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permute_pd::<0b11_11>(0b00001111, a); + let e = _mm256_set_pd(3., 3., 1., 1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permute_pd() { + let a = _mm_set_pd(1., 0.); + let r = _mm_mask_permute_pd::<0b11>(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_permute_pd::<0b11>(a, 0b00000011, a); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permute_pd() { + let a = _mm_set_pd(1., 0.); + let r = _mm_maskz_permute_pd::<0b11>(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_permute_pd::<0b11>(0b00000011, a); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_permutex_epi64::<0b11_11_11_11>(a); + let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_permutex_epi64::<0b11_11_11_11>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutex_epi64::<0b11_11_11_11>(a, 0b11111111, a); + let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_permutex_epi64::<0b11_11_11_11>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutex_epi64::<0b11_11_11_11>(0b11111111, a); + let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex_epi64() { + let a = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_permutex_epi64::<0b11_11_11_11>(a); + let e = _mm256_set_epi64x(3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex_epi64() { + let a = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_permutex_epi64::<0b11_11_11_11>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutex_epi64::<0b11_11_11_11>(a, 0b00001111, a); + let e = _mm256_set_epi64x(3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm256_maskz_permutex_epi64() { + let a = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_permutex_epi64::<0b11_11_11_11>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutex_epi64::<0b11_11_11_11>(0b00001111, a); + let e = _mm256_set_epi64x(3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_permutex_pd::<0b11_11_11_11>(a); + let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_permutex_pd::<0b11_11_11_11>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_permutex_pd::<0b11_11_11_11>(a, 0b11111111, a); + let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_permutex_pd::<0b11_11_11_11>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permutex_pd::<0b11_11_11_11>(0b11111111, a); + let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_permutex_pd::<0b11_11_11_11>(a); + let e = _mm256_set_pd(0., 0., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_mask_permutex_pd::<0b11_11_11_11>(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_permutex_pd::<0b11_11_11_11>(a, 0b00001111, a); + let e = _mm256_set_pd(0., 0., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutex_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_maskz_permutex_pd::<0b11_11_11_11>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permutex_pd::<0b11_11_11_11>(0b00001111, a); + let e = _mm256_set_pd(0., 0., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutevar_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_set1_epi64(0b1); + let r = _mm512_permutevar_pd(a, b); + let e = _mm512_set_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutevar_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_set1_epi64(0b1); + let r = _mm512_mask_permutevar_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_permutevar_pd(a, 0b11111111, a, b); + let e = _mm512_set_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutevar_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_set1_epi64(0b1); + let r = _mm512_maskz_permutevar_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permutevar_pd(0b00001111, a, b); + let e = _mm512_set_pd(0., 0., 0., 0., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutevar_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set1_epi64x(0b1); + let r = _mm256_mask_permutevar_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_permutevar_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(1., 1., 3., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutevar_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set1_epi64x(0b1); + let r = _mm256_maskz_permutevar_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permutevar_pd(0b00001111, a, b); + let e = _mm256_set_pd(1., 1., 3., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permutevar_pd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set1_epi64x(0b1); + let r = _mm_mask_permutevar_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_permutevar_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permutevar_pd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set1_epi64x(0b1); + let r = _mm_maskz_permutevar_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_permutevar_pd(0b00000011, a, b); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutexvar_epi64() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_permutexvar_epi64(idx, a); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutexvar_epi64() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_permutexvar_epi64(a, 0, idx, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutexvar_epi64(a, 0b11111111, idx, a); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutexvar_epi64() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_permutexvar_epi64(0, idx, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutexvar_epi64(0b00001111, idx, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 6, 6, 6, 6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutexvar_epi64() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_permutexvar_epi64(idx, a); + let e = _mm256_set1_epi64x(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutexvar_epi64() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_permutexvar_epi64(a, 0, idx, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutexvar_epi64(a, 0b00001111, idx, a); + let e = _mm256_set1_epi64x(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutexvar_epi64() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_permutexvar_epi64(0, idx, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutexvar_epi64(0b00001111, idx, a); + let e = _mm256_set1_epi64x(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutexvar_pd() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_permutexvar_pd(idx, a); + let e = _mm512_set1_pd(6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutexvar_pd() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_permutexvar_pd(a, 0, idx, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_permutexvar_pd(a, 0b11111111, idx, a); + let e = _mm512_set1_pd(6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutexvar_pd() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_permutexvar_pd(0, idx, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permutexvar_pd(0b00001111, idx, a); + let e = _mm512_set_pd(0., 0., 0., 0., 6., 6., 6., 6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutexvar_pd() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_permutexvar_pd(idx, a); + let e = _mm256_set1_pd(2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutexvar_pd() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_mask_permutexvar_pd(a, 0, idx, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_permutexvar_pd(a, 0b00001111, idx, a); + let e = _mm256_set1_pd(2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutexvar_pd() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_maskz_permutexvar_pd(0, idx, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permutexvar_pd(0b00001111, idx, a); + let e = _mm256_set1_pd(2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex2var_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_epi64(100); + let r = _mm512_permutex2var_epi64(a, idx, b); + let e = _mm512_set_epi64(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex2var_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_epi64(100); + let r = _mm512_mask_permutex2var_epi64(a, 0, idx, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutex2var_epi64(a, 0b11111111, idx, b); + let e = _mm512_set_epi64(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex2var_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_epi64(100); + let r = _mm512_maskz_permutex2var_epi64(0, a, idx, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutex2var_epi64(0b00001111, a, idx, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 4, 100, 3, 100); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask2_permutex2var_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm512_set_epi64(1000, 1 << 3, 2000, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_epi64(100); + let r = _mm512_mask2_permutex2var_epi64(a, idx, 0, b); + assert_eq_m512i(r, idx); + let r = _mm512_mask2_permutex2var_epi64(a, idx, 0b00001111, b); + let e = _mm512_set_epi64(1000, 1 << 3, 2000, 1 << 3, 4, 100, 3, 100); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex2var_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_epi64x(100); + let r = _mm256_permutex2var_epi64(a, idx, b); + let e = _mm256_set_epi64x(2, 100, 1, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex2var_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_epi64x(100); + let r = _mm256_mask_permutex2var_epi64(a, 0, idx, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutex2var_epi64(a, 0b00001111, idx, b); + let e = _mm256_set_epi64x(2, 100, 1, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutex2var_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_epi64x(100); + let r = _mm256_maskz_permutex2var_epi64(0, a, idx, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutex2var_epi64(0b00001111, a, idx, b); + let e = _mm256_set_epi64x(2, 100, 1, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask2_permutex2var_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_epi64x(100); + let r = _mm256_mask2_permutex2var_epi64(a, idx, 0, b); + assert_eq_m256i(r, idx); + let r = _mm256_mask2_permutex2var_epi64(a, idx, 0b00001111, b); + let e = _mm256_set_epi64x(2, 100, 1, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_permutex2var_epi64() { + let a = _mm_set_epi64x(0, 1); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_epi64x(100); + let r = _mm_permutex2var_epi64(a, idx, b); + let e = _mm_set_epi64x(0, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permutex2var_epi64() { + let a = _mm_set_epi64x(0, 1); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_epi64x(100); + let r = _mm_mask_permutex2var_epi64(a, 0, idx, b); + assert_eq_m128i(r, a); + let r = _mm_mask_permutex2var_epi64(a, 0b00000011, idx, b); + let e = _mm_set_epi64x(0, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permutex2var_epi64() { + let a = _mm_set_epi64x(0, 1); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_epi64x(100); + let r = _mm_maskz_permutex2var_epi64(0, a, idx, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_permutex2var_epi64(0b00000011, a, idx, b); + let e = _mm_set_epi64x(0, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask2_permutex2var_epi64() { + let a = _mm_set_epi64x(0, 1); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_epi64x(100); + let r = _mm_mask2_permutex2var_epi64(a, idx, 0, b); + assert_eq_m128i(r, idx); + let r = _mm_mask2_permutex2var_epi64(a, idx, 0b00000011, b); + let e = _mm_set_epi64x(0, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex2var_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_pd(100.); + let r = _mm512_permutex2var_pd(a, idx, b); + let e = _mm512_set_pd(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex2var_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_pd(100.); + let r = _mm512_mask_permutex2var_pd(a, 0, idx, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_permutex2var_pd(a, 0b11111111, idx, b); + let e = _mm512_set_pd(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex2var_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_pd(100.); + let r = _mm512_maskz_permutex2var_pd(0, a, idx, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permutex2var_pd(0b00001111, a, idx, b); + let e = _mm512_set_pd(0., 0., 0., 0., 4., 100., 3., 100.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask2_permutex2var_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_pd(100.); + let r = _mm512_mask2_permutex2var_pd(a, idx, 0, b); + assert_eq_m512d(r, _mm512_castsi512_pd(idx)); + let r = _mm512_mask2_permutex2var_pd(a, idx, 0b11111111, b); + let e = _mm512_set_pd(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex2var_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_pd(100.); + let r = _mm256_permutex2var_pd(a, idx, b); + let e = _mm256_set_pd(2., 100., 1., 100.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex2var_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_pd(100.); + let r = _mm256_mask_permutex2var_pd(a, 0, idx, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_permutex2var_pd(a, 0b00001111, idx, b); + let e = _mm256_set_pd(2., 100., 1., 100.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutex2var_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_pd(100.); + let r = _mm256_maskz_permutex2var_pd(0, a, idx, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permutex2var_pd(0b00001111, a, idx, b); + let e = _mm256_set_pd(2., 100., 1., 100.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask2_permutex2var_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_pd(100.); + let r = _mm256_mask2_permutex2var_pd(a, idx, 0, b); + assert_eq_m256d(r, _mm256_castsi256_pd(idx)); + let r = _mm256_mask2_permutex2var_pd(a, idx, 0b00001111, b); + let e = _mm256_set_pd(2., 100., 1., 100.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_permutex2var_pd() { + let a = _mm_set_pd(0., 1.); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_pd(100.); + let r = _mm_permutex2var_pd(a, idx, b); + let e = _mm_set_pd(0., 100.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permutex2var_pd() { + let a = _mm_set_pd(0., 1.); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_pd(100.); + let r = _mm_mask_permutex2var_pd(a, 0, idx, b); + assert_eq_m128d(r, a); + let r = _mm_mask_permutex2var_pd(a, 0b00000011, idx, b); + let e = _mm_set_pd(0., 100.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permutex2var_pd() { + let a = _mm_set_pd(0., 1.); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_pd(100.); + let r = _mm_maskz_permutex2var_pd(0, a, idx, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_permutex2var_pd(0b00000011, a, idx, b); + let e = _mm_set_pd(0., 100.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask2_permutex2var_pd() { + let a = _mm_set_pd(0., 1.); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_pd(100.); + let r = _mm_mask2_permutex2var_pd(a, idx, 0, b); + assert_eq_m128d(r, _mm_castsi128_pd(idx)); + let r = _mm_mask2_permutex2var_pd(a, idx, 0b00000011, b); + let e = _mm_set_pd(0., 100.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_pd() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00001111, a, b); + let e = _mm256_set_pd(2., 1., 6., 5.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_pd() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b); + let e = _mm256_set_pd(2., 1., 6., 5.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_shuffle_pd() { + let a = _mm_set_pd(1., 4.); + let b = _mm_set_pd(2., 3.); + let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00000011, a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_shuffle_pd() { + let a = _mm_set_pd(1., 4.); + let b = _mm_set_pd(2., 3.); + let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0b00000011, a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_i64x2() { + let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm512_shuffle_i64x2::<0b00_00_00_00>(a, b); + let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_i64x2() { + let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0b11111111, a, b); + let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_i64x2() { + let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0b00001111, a, b); + let e = _mm512_setr_epi64(1, 4, 1, 4, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_shuffle_i64x2() { + let a = _mm256_set_epi64x(1, 4, 5, 8); + let b = _mm256_set_epi64x(2, 3, 6, 7); + let r = _mm256_shuffle_i64x2::<0b00>(a, b); + let e = _mm256_set_epi64x(6, 7, 5, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_i64x2() { + let a = _mm256_set_epi64x(1, 4, 5, 8); + let b = _mm256_set_epi64x(2, 3, 6, 7); + let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(6, 7, 5, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_i64x2() { + let a = _mm256_set_epi64x(1, 4, 5, 8); + let b = _mm256_set_epi64x(2, 3, 6, 7); + let r = _mm256_maskz_shuffle_i64x2::<0b00>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shuffle_i64x2::<0b00>(0b00001111, a, b); + let e = _mm256_set_epi64x(6, 7, 5, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_f64x2() { + let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm512_shuffle_f64x2::<0b00_00_00_00>(a, b); + let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_f64x2() { + let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0b11111111, a, b); + let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_f64x2() { + let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0b00001111, a, b); + let e = _mm512_setr_pd(1., 4., 1., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_shuffle_f64x2() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_shuffle_f64x2::<0b00>(a, b); + let e = _mm256_set_pd(6., 7., 5., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_f64x2() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0b00001111, a, b); + let e = _mm256_set_pd(6., 7., 5., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_f64x2() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_maskz_shuffle_f64x2::<0b00>(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_shuffle_f64x2::<0b00>(0b00001111, a, b); + let e = _mm256_set_pd(6., 7., 5., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_movedup_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_movedup_pd(a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_movedup_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_movedup_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_movedup_pd(a, 0b11111111, a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_movedup_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_movedup_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_movedup_pd(0b00001111, a); + let e = _mm512_setr_pd(1., 1., 3., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_movedup_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_movedup_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_movedup_pd(a, 0b00001111, a); + let e = _mm256_set_pd(2., 2., 4., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_movedup_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_maskz_movedup_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_movedup_pd(0b00001111, a); + let e = _mm256_set_pd(2., 2., 4., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_movedup_pd() { + let a = _mm_set_pd(1., 2.); + let r = _mm_mask_movedup_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_movedup_pd(a, 0b00000011, a); + let e = _mm_set_pd(2., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_movedup_pd() { + let a = _mm_set_pd(1., 2.); + let r = _mm_maskz_movedup_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_movedup_pd(0b00000011, a); + let e = _mm_set_pd(2., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_inserti64x4() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_inserti64x4::<1>(a, b); + let e = _mm512_setr_epi64(1, 2, 3, 4, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_inserti64x4() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_mask_inserti64x4::<1>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_inserti64x4::<1>(a, 0b11111111, a, b); + let e = _mm512_setr_epi64(1, 2, 3, 4, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_inserti64x4() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_maskz_inserti64x4::<1>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_inserti64x4::<1>(0b00001111, a, b); + let e = _mm512_setr_epi64(1, 2, 3, 4, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_insertf64x4() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_insertf64x4::<1>(a, b); + let e = _mm512_setr_pd(1., 2., 3., 4., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_insertf64x4() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_mask_insertf64x4::<1>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_insertf64x4::<1>(a, 0b11111111, a, b); + let e = _mm512_setr_pd(1., 2., 3., 4., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_insertf64x4() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_maskz_insertf64x4::<1>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_insertf64x4::<1>(0b00001111, a, b); + let e = _mm512_setr_pd(1., 2., 3., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd128_pd512() { + let a = _mm_setr_pd(17., 18.); + let r = _mm512_castpd128_pd512(a); + assert_eq_m128d(_mm512_castpd512_pd128(r), a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd256_pd512() { + let a = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_castpd256_pd512(a); + assert_eq_m256d(_mm512_castpd512_pd256(r), a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextpd128_pd512() { + let a = _mm_setr_pd(17., 18.); + let r = _mm512_zextpd128_pd512(a); + let e = _mm512_setr_pd(17., 18., 0., 0., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextpd256_pd512() { + let a = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_zextpd256_pd512(a); + let e = _mm512_setr_pd(17., 18., 19., 20., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd512_pd128() { + let a = _mm512_setr_pd(17., 18., -1., -1., -1., -1., -1., -1.); + let r = _mm512_castpd512_pd128(a); + let e = _mm_setr_pd(17., 18.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd512_pd256() { + let a = _mm512_setr_pd(17., 18., 19., 20., -1., -1., -1., -1.); + let r = _mm512_castpd512_pd256(a); + let e = _mm256_setr_pd(17., 18., 19., 20.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd_ps() { + let a = _mm512_set1_pd(1.); + let r = _mm512_castpd_ps(a); + let e = _mm512_set_ps( + 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, + 1.875, 0.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd_si512() { + let a = _mm512_set1_pd(1.); + let r = _mm512_castpd_si512(a); + let e = _mm512_set_epi32( + 1072693248, 0, 1072693248, 0, 1072693248, 0, 1072693248, 0, 1072693248, 0, 1072693248, + 0, 1072693248, 0, 1072693248, 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi128_si512() { + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_castsi128_si512(a); + assert_eq_m128i(_mm512_castsi512_si128(r), a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi256_si512() { + let a = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_castsi256_si512(a); + assert_eq_m256i(_mm512_castsi512_si256(r), a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextsi128_si512() { + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_zextsi128_si512(a); + let e = _mm512_setr_epi64(17, 18, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextsi256_si512() { + let a = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_zextsi256_si512(a); + let e = _mm512_setr_epi64(17, 18, 19, 20, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi512_si128() { + let a = _mm512_setr_epi64(17, 18, -1, -1, -1, -1, -1, -1); + let r = _mm512_castsi512_si128(a); + let e = _mm_setr_epi64x(17, 18); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi512_si256() { + let a = _mm512_setr_epi64(17, 18, 19, 20, -1, -1, -1, -1); + let r = _mm512_castsi512_si256(a); + let e = _mm256_setr_epi64x(17, 18, 19, 20); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi512_ps() { + let a = _mm512_set1_epi64(1 << 62); + let r = _mm512_castsi512_ps(a); + let e = _mm512_set_ps( + 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi512_pd() { + let a = _mm512_set1_epi64(1 << 62); + let r = _mm512_castsi512_pd(a); + let e = _mm512_set_pd(2., 2., 2., 2., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcastq_epi64() { + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_broadcastq_epi64(a); + let e = _mm512_set1_epi64(17); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcastq_epi64() { + let src = _mm512_set1_epi64(18); + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_mask_broadcastq_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_broadcastq_epi64(src, 0b11111111, a); + let e = _mm512_set1_epi64(17); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcastq_epi64() { + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_maskz_broadcastq_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_broadcastq_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 17, 17, 17, 17); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_broadcastq_epi64() { + let src = _mm256_set1_epi64x(18); + let a = _mm_set_epi64x(17, 18); + let r = _mm256_mask_broadcastq_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_broadcastq_epi64(src, 0b00001111, a); + let e = _mm256_set1_epi64x(18); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_broadcastq_epi64() { + let a = _mm_set_epi64x(17, 18); + let r = _mm256_maskz_broadcastq_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_broadcastq_epi64(0b00001111, a); + let e = _mm256_set1_epi64x(18); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_broadcastq_epi64() { + let src = _mm_set1_epi64x(18); + let a = _mm_set_epi64x(17, 18); + let r = _mm_mask_broadcastq_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_broadcastq_epi64(src, 0b00000011, a); + let e = _mm_set1_epi64x(18); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_broadcastq_epi64() { + let a = _mm_set_epi64x(17, 18); + let r = _mm_maskz_broadcastq_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_broadcastq_epi64(0b00000011, a); + let e = _mm_set1_epi64x(18); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcastsd_pd() { + let a = _mm_set_pd(17., 18.); + let r = _mm512_broadcastsd_pd(a); + let e = _mm512_set1_pd(18.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcastsd_pd() { + let src = _mm512_set1_pd(18.); + let a = _mm_set_pd(17., 18.); + let r = _mm512_mask_broadcastsd_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_broadcastsd_pd(src, 0b11111111, a); + let e = _mm512_set1_pd(18.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcastsd_pd() { + let a = _mm_set_pd(17., 18.); + let r = _mm512_maskz_broadcastsd_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_broadcastsd_pd(0b00001111, a); + let e = _mm512_set_pd(0., 0., 0., 0., 18., 18., 18., 18.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_broadcastsd_pd() { + let src = _mm256_set1_pd(18.); + let a = _mm_set_pd(17., 18.); + let r = _mm256_mask_broadcastsd_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_broadcastsd_pd(src, 0b00001111, a); + let e = _mm256_set1_pd(18.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_broadcastsd_pd() { + let a = _mm_set_pd(17., 18.); + let r = _mm256_maskz_broadcastsd_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_broadcastsd_pd(0b00001111, a); + let e = _mm256_set1_pd(18.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcast_i64x4() { + let a = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm512_broadcast_i64x4(a); + let e = _mm512_set_epi64(17, 18, 19, 20, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcast_i64x4() { + let src = _mm512_set1_epi64(18); + let a = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm512_mask_broadcast_i64x4(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_broadcast_i64x4(src, 0b11111111, a); + let e = _mm512_set_epi64(17, 18, 19, 20, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcast_i64x4() { + let a = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm512_maskz_broadcast_i64x4(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_broadcast_i64x4(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcast_f64x4() { + let a = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm512_broadcast_f64x4(a); + let e = _mm512_set_pd(17., 18., 19., 20., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcast_f64x4() { + let src = _mm512_set1_pd(18.); + let a = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm512_mask_broadcast_f64x4(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_broadcast_f64x4(src, 0b11111111, a); + let e = _mm512_set_pd(17., 18., 19., 20., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcast_f64x4() { + let a = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm512_maskz_broadcast_f64x4(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_broadcast_f64x4(0b00001111, a); + let e = _mm512_set_pd(0., 0., 0., 0., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_blend_epi64() { + let a = _mm512_set1_epi64(1); + let b = _mm512_set1_epi64(2); + let r = _mm512_mask_blend_epi64(0b11110000, a, b); + let e = _mm512_set_epi64(2, 2, 2, 2, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_blend_epi64() { + let a = _mm256_set1_epi64x(1); + let b = _mm256_set1_epi64x(2); + let r = _mm256_mask_blend_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_blend_epi64() { + let a = _mm_set1_epi64x(1); + let b = _mm_set1_epi64x(2); + let r = _mm_mask_blend_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_blend_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(2.); + let r = _mm512_mask_blend_pd(0b11110000, a, b); + let e = _mm512_set_pd(2., 2., 2., 2., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_blend_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(2.); + let r = _mm256_mask_blend_pd(0b00001111, a, b); + let e = _mm256_set1_pd(2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_blend_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let r = _mm_mask_blend_pd(0b00000011, a, b); + let e = _mm_set1_pd(2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpackhi_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_unpackhi_epi64(a, b); + let e = _mm512_set_epi64(17, 1, 19, 3, 21, 5, 23, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpackhi_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_mask_unpackhi_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpackhi_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64(17, 1, 19, 3, 21, 5, 23, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpackhi_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_maskz_unpackhi_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpackhi_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 21, 5, 23, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpackhi_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm256_mask_unpackhi_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpackhi_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(17, 1, 19, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpackhi_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm256_maskz_unpackhi_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpackhi_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(17, 1, 19, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpackhi_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(17, 18); + let r = _mm_mask_unpackhi_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpackhi_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(17, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpackhi_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(17, 18); + let r = _mm_maskz_unpackhi_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpackhi_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(17, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpackhi_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_unpackhi_pd(a, b); + let e = _mm512_set_pd(17., 1., 19., 3., 21., 5., 23., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpackhi_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_mask_unpackhi_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_unpackhi_pd(a, 0b11111111, a, b); + let e = _mm512_set_pd(17., 1., 19., 3., 21., 5., 23., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpackhi_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_maskz_unpackhi_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_unpackhi_pd(0b00001111, a, b); + let e = _mm512_set_pd(0., 0., 0., 0., 21., 5., 23., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpackhi_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm256_mask_unpackhi_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_unpackhi_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(17., 1., 19., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpackhi_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm256_maskz_unpackhi_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_unpackhi_pd(0b00001111, a, b); + let e = _mm256_set_pd(17., 1., 19., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpackhi_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(17., 18.); + let r = _mm_mask_unpackhi_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_unpackhi_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(17., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpackhi_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(17., 18.); + let r = _mm_maskz_unpackhi_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_unpackhi_pd(0b00000011, a, b); + let e = _mm_set_pd(17., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpacklo_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_unpacklo_epi64(a, b); + let e = _mm512_set_epi64(18, 2, 20, 4, 22, 6, 24, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpacklo_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_mask_unpacklo_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpacklo_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64(18, 2, 20, 4, 22, 6, 24, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpacklo_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_maskz_unpacklo_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpacklo_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 22, 6, 24, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpacklo_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm256_mask_unpacklo_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpacklo_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(18, 2, 20, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpacklo_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm256_maskz_unpacklo_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpacklo_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(18, 2, 20, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpacklo_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(17, 18); + let r = _mm_mask_unpacklo_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpacklo_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(18, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpacklo_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(17, 18); + let r = _mm_maskz_unpacklo_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpacklo_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(18, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpacklo_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_unpacklo_pd(a, b); + let e = _mm512_set_pd(18., 2., 20., 4., 22., 6., 24., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpacklo_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_mask_unpacklo_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_unpacklo_pd(a, 0b11111111, a, b); + let e = _mm512_set_pd(18., 2., 20., 4., 22., 6., 24., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpacklo_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_maskz_unpacklo_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_unpacklo_pd(0b00001111, a, b); + let e = _mm512_set_pd(0., 0., 0., 0., 22., 6., 24., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpacklo_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm256_mask_unpacklo_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_unpacklo_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(18., 2., 20., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpacklo_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm256_maskz_unpacklo_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_unpacklo_pd(0b00001111, a, b); + let e = _mm256_set_pd(18., 2., 20., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpacklo_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(17., 18.); + let r = _mm_mask_unpacklo_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_unpacklo_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(18., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpacklo_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(17., 18.); + let r = _mm_maskz_unpacklo_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_unpacklo_pd(0b00000011, a, b); + let e = _mm_set_pd(18., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_alignr_epi64() { + let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9); + let r = _mm512_alignr_epi64::<0>(a, b); + assert_eq_m512i(r, b); + let r = _mm512_alignr_epi64::<8>(a, b); + assert_eq_m512i(r, b); + let r = _mm512_alignr_epi64::<1>(a, b); + let e = _mm512_set_epi64(1, 16, 15, 14, 13, 12, 11, 10); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_alignr_epi64() { + let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9); + let r = _mm512_mask_alignr_epi64::<1>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_alignr_epi64::<1>(a, 0b11111111, a, b); + let e = _mm512_set_epi64(1, 16, 15, 14, 13, 12, 11, 10); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_alignr_epi64() { + let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9); + let r = _mm512_maskz_alignr_epi64::<1>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_alignr_epi64::<1>(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 13, 12, 11, 10); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_alignr_epi64() { + let a = _mm256_set_epi64x(4, 3, 2, 1); + let b = _mm256_set_epi64x(8, 7, 6, 5); + let r = _mm256_alignr_epi64::<0>(a, b); + let e = _mm256_set_epi64x(8, 7, 6, 5); + assert_eq_m256i(r, e); + let r = _mm256_alignr_epi64::<1>(a, b); + let e = _mm256_set_epi64x(1, 8, 7, 6); + assert_eq_m256i(r, e); + let r = _mm256_alignr_epi64::<6>(a, b); + let e = _mm256_set_epi64x(2, 1, 8, 7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_alignr_epi64() { + let a = _mm256_set_epi64x(4, 3, 2, 1); + let b = _mm256_set_epi64x(8, 7, 6, 5); + let r = _mm256_mask_alignr_epi64::<1>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_alignr_epi64::<0>(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(8, 7, 6, 5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_alignr_epi64() { + let a = _mm256_set_epi64x(4, 3, 2, 1); + let b = _mm256_set_epi64x(8, 7, 6, 5); + let r = _mm256_maskz_alignr_epi64::<1>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_alignr_epi64::<0>(0b00001111, a, b); + let e = _mm256_set_epi64x(8, 7, 6, 5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_alignr_epi64() { + let a = _mm_set_epi64x(2, 1); + let b = _mm_set_epi64x(4, 3); + let r = _mm_alignr_epi64::<0>(a, b); + let e = _mm_set_epi64x(4, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_alignr_epi64() { + let a = _mm_set_epi64x(2, 1); + let b = _mm_set_epi64x(4, 3); + let r = _mm_mask_alignr_epi64::<1>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_alignr_epi64::<0>(a, 0b00000011, a, b); + let e = _mm_set_epi64x(4, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_alignr_epi64() { + let a = _mm_set_epi64x(2, 1); + let b = _mm_set_epi64x(4, 3); + let r = _mm_maskz_alignr_epi64::<1>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_alignr_epi64::<0>(0b00000011, a, b); + let e = _mm_set_epi64x(4, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_and_epi64(a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_and_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_mask_and_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_and_epi64(a, 0b01111111, a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_and_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_maskz_and_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_and_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_and_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 0); + let r = _mm256_mask_and_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_and_epi64(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_and_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 0); + let r = _mm256_maskz_and_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_and_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_and_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 0); + let r = _mm_mask_and_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_and_epi64(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_and_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 0); + let r = _mm_maskz_and_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_and_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_si512() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_and_epi64(a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_or_epi64(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_or_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_mask_or_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_or_epi64(a, 0b11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_or_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_maskz_or_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_or_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_or_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_or_epi64(a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_or_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_mask_or_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_or_epi64(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_or_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_maskz_or_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_or_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_or_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_or_epi64(a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_or_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_mask_or_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_or_epi64(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_or_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_maskz_or_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_or_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_si512() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_or_epi64(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_xor_epi64(a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_xor_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_mask_xor_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_xor_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_xor_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_maskz_xor_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_xor_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_xor_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_xor_epi64(a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_xor_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_mask_xor_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_xor_epi64(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_xor_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_maskz_xor_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_xor_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_xor_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_xor_epi64(a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_xor_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_mask_xor_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_xor_epi64(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_xor_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_maskz_xor_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_xor_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_si512() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_xor_epi64(a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_andnot_epi64() { + let a = _mm512_set1_epi64(0); + let b = _mm512_set1_epi64(1 << 3 | 1 << 4); + let r = _mm512_andnot_epi64(a, b); + let e = _mm512_set1_epi64(1 << 3 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_andnot_epi64() { + let a = _mm512_set1_epi64(1 << 1 | 1 << 2); + let b = _mm512_set1_epi64(1 << 3 | 1 << 4); + let r = _mm512_mask_andnot_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_andnot_epi64(a, 0b11111111, a, b); + let e = _mm512_set1_epi64(1 << 3 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_andnot_epi64() { + let a = _mm512_set1_epi64(1 << 1 | 1 << 2); + let b = _mm512_set1_epi64(1 << 3 | 1 << 4); + let r = _mm512_maskz_andnot_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_andnot_epi64(0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 0, 0, 0, + 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_andnot_epi64() { + let a = _mm256_set1_epi64x(1 << 1 | 1 << 2); + let b = _mm256_set1_epi64x(1 << 3 | 1 << 4); + let r = _mm256_mask_andnot_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_andnot_epi64(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 3 | 1 << 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_andnot_epi64() { + let a = _mm256_set1_epi64x(1 << 1 | 1 << 2); + let b = _mm256_set1_epi64x(1 << 3 | 1 << 4); + let r = _mm256_maskz_andnot_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_andnot_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 3 | 1 << 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_andnot_epi64() { + let a = _mm_set1_epi64x(1 << 1 | 1 << 2); + let b = _mm_set1_epi64x(1 << 3 | 1 << 4); + let r = _mm_mask_andnot_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_andnot_epi64(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 3 | 1 << 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_andnot_epi64() { + let a = _mm_set1_epi64x(1 << 1 | 1 << 2); + let b = _mm_set1_epi64x(1 << 3 | 1 << 4); + let r = _mm_maskz_andnot_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_andnot_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 3 | 1 << 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_andnot_si512() { + let a = _mm512_set1_epi64(0); + let b = _mm512_set1_epi64(1 << 3 | 1 << 4); + let r = _mm512_andnot_si512(a, b); + let e = _mm512_set1_epi64(1 << 3 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_add_epi64() { + let a = _mm512_set1_epi64(1); + let e: i64 = _mm512_reduce_add_epi64(a); + assert_eq!(8, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_add_epi64() { + let a = _mm512_set1_epi64(1); + let e: i64 = _mm512_mask_reduce_add_epi64(0b11110000, a); + assert_eq!(4, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_add_pd() { + let a = _mm512_set1_pd(1.); + let e: f64 = _mm512_reduce_add_pd(a); + assert_eq!(8., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_add_pd() { + let a = _mm512_set1_pd(1.); + let e: f64 = _mm512_mask_reduce_add_pd(0b11110000, a); + assert_eq!(4., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_mul_epi64() { + let a = _mm512_set1_epi64(2); + let e: i64 = _mm512_reduce_mul_epi64(a); + assert_eq!(256, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_mul_epi64() { + let a = _mm512_set1_epi64(2); + let e: i64 = _mm512_mask_reduce_mul_epi64(0b11110000, a); + assert_eq!(16, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_mul_pd() { + let a = _mm512_set1_pd(2.); + let e: f64 = _mm512_reduce_mul_pd(a); + assert_eq!(256., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_mul_pd() { + let a = _mm512_set1_pd(2.); + let e: f64 = _mm512_mask_reduce_mul_pd(0b11110000, a); + assert_eq!(16., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_max_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: i64 = _mm512_reduce_max_epi64(a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_max_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: i64 = _mm512_mask_reduce_max_epi64(0b11110000, a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_max_epu64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: u64 = _mm512_reduce_max_epu64(a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_max_epu64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: u64 = _mm512_mask_reduce_max_epu64(0b11110000, a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_max_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let e: f64 = _mm512_reduce_max_pd(a); + assert_eq!(7., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_max_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let e: f64 = _mm512_mask_reduce_max_pd(0b11110000, a); + assert_eq!(3., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_min_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: i64 = _mm512_reduce_min_epi64(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_min_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: i64 = _mm512_mask_reduce_min_epi64(0b11110000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_min_epu64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: u64 = _mm512_reduce_min_epu64(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_min_epu64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: u64 = _mm512_mask_reduce_min_epu64(0b11110000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_min_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let e: f64 = _mm512_reduce_min_pd(a); + assert_eq!(0., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_min_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let e: f64 = _mm512_mask_reduce_min_pd(0b11110000, a); + assert_eq!(0., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_and_epi64() { + let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2); + let e: i64 = _mm512_reduce_and_epi64(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_and_epi64() { + let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2); + let e: i64 = _mm512_mask_reduce_and_epi64(0b11110000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_or_epi64() { + let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2); + let e: i64 = _mm512_reduce_or_epi64(a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_or_epi64() { + let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2); + let e: i64 = _mm512_mask_reduce_or_epi64(0b11110000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_extractf64x4_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_extractf64x4_pd::<1>(a); + let e = _mm256_setr_pd(5., 6., 7., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_extractf64x4_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let src = _mm256_set1_pd(100.); + let r = _mm512_mask_extractf64x4_pd::<1>(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm512_mask_extractf64x4_pd::<1>(src, 0b11111111, a); + let e = _mm256_setr_pd(5., 6., 7., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_extractf64x4_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_extractf64x4_pd::<1>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm512_maskz_extractf64x4_pd::<1>(0b00000001, a); + let e = _mm256_setr_pd(5., 0., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_extracti64x4_epi64() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_extracti64x4_epi64::<0x1>(a); + let e = _mm256_setr_epi64x(5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_extracti64x4_epi64() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm256_set1_epi64x(100); + let r = _mm512_mask_extracti64x4_epi64::<0x1>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_extracti64x4_epi64::<0x1>(src, 0b11111111, a); + let e = _mm256_setr_epi64x(5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_extracti64x4_epi64() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_extracti64x4_epi64::<0x1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_extracti64x4_epi64::<0x1>(0b00000001, a); + let e = _mm256_setr_epi64x(5, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compress_epi64() { + let src = _mm512_set1_epi64(200); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_compress_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_compress_epi64(src, 0b01010101, a); + let e = _mm512_set_epi64(200, 200, 200, 200, 1, 3, 5, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_compress_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_compress_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_compress_epi64(0b01010101, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1, 3, 5, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compress_epi64() { + let src = _mm256_set1_epi64x(200); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_compress_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_compress_epi64(src, 0b00000101, a); + let e = _mm256_set_epi64x(200, 200, 1, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_compress_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_compress_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_compress_epi64(0b00000101, a); + let e = _mm256_set_epi64x(0, 0, 1, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compress_epi64() { + let src = _mm_set1_epi64x(200); + let a = _mm_set_epi64x(0, 1); + let r = _mm_mask_compress_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_compress_epi64(src, 0b00000001, a); + let e = _mm_set_epi64x(200, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_compress_epi64() { + let a = _mm_set_epi64x(0, 1); + let r = _mm_maskz_compress_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_compress_epi64(0b00000001, a); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compress_pd() { + let src = _mm512_set1_pd(200.); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_compress_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_compress_pd(src, 0b01010101, a); + let e = _mm512_set_pd(200., 200., 200., 200., 1., 3., 5., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_compress_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_compress_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_compress_pd(0b01010101, a); + let e = _mm512_set_pd(0., 0., 0., 0., 1., 3., 5., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compress_pd() { + let src = _mm256_set1_pd(200.); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_mask_compress_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_compress_pd(src, 0b00000101, a); + let e = _mm256_set_pd(200., 200., 1., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_compress_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_maskz_compress_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_compress_pd(0b00000101, a); + let e = _mm256_set_pd(0., 0., 1., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compress_pd() { + let src = _mm_set1_pd(200.); + let a = _mm_set_pd(0., 1.); + let r = _mm_mask_compress_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_compress_pd(src, 0b00000001, a); + let e = _mm_set_pd(200., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_compress_pd() { + let a = _mm_set_pd(0., 1.); + let r = _mm_maskz_compress_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_compress_pd(0b00000001, a); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expand_epi64() { + let src = _mm512_set1_epi64(200); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_expand_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_expand_epi64(src, 0b01010101, a); + let e = _mm512_set_epi64(200, 4, 200, 5, 200, 6, 200, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expand_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_expand_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_expand_epi64(0b01010101, a); + let e = _mm512_set_epi64(0, 4, 0, 5, 0, 6, 0, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expand_epi64() { + let src = _mm256_set1_epi64x(200); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_expand_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_expand_epi64(src, 0b00000101, a); + let e = _mm256_set_epi64x(200, 2, 200, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expand_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_expand_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_expand_epi64(0b00000101, a); + let e = _mm256_set_epi64x(0, 2, 0, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expand_epi64() { + let src = _mm_set1_epi64x(200); + let a = _mm_set_epi64x(0, 1); + let r = _mm_mask_expand_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_expand_epi64(src, 0b00000001, a); + let e = _mm_set_epi64x(200, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expand_epi64() { + let a = _mm_set_epi64x(0, 1); + let r = _mm_maskz_expand_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_expand_epi64(0b00000001, a); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expand_pd() { + let src = _mm512_set1_pd(200.); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_expand_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_expand_pd(src, 0b01010101, a); + let e = _mm512_set_pd(200., 4., 200., 5., 200., 6., 200., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expand_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_expand_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_expand_pd(0b01010101, a); + let e = _mm512_set_pd(0., 4., 0., 5., 0., 6., 0., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expand_pd() { + let src = _mm256_set1_pd(200.); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_mask_expand_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_expand_pd(src, 0b00000101, a); + let e = _mm256_set_pd(200., 2., 200., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expand_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_maskz_expand_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_expand_pd(0b00000101, a); + let e = _mm256_set_pd(0., 2., 0., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expand_pd() { + let src = _mm_set1_pd(200.); + let a = _mm_set_pd(0., 1.); + let r = _mm_mask_expand_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_expand_pd(src, 0b00000001, a); + let e = _mm_set_pd(200., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expand_pd() { + let a = _mm_set_pd(0., 1.); + let r = _mm_maskz_expand_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_expand_pd(0b00000001, a); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_loadu_epi64() { + let a = &[4, 3, 2, 5, -8, -9, -64, -50]; + let p = a.as_ptr(); + let r = _mm512_loadu_epi64(black_box(p)); + let e = _mm512_setr_epi64(4, 3, 2, 5, -8, -9, -64, -50); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_loadu_epi64() { + let a = &[4, 3, 2, 5]; + let p = a.as_ptr(); + let r = _mm256_loadu_epi64(black_box(p)); + let e = _mm256_setr_epi64x(4, 3, 2, 5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_loadu_epi64() { + let a = &[4, 3]; + let p = a.as_ptr(); + let r = _mm_loadu_epi64(black_box(p)); + let e = _mm_setr_epi64x(4, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_storeu_epi16() { + let a = _mm512_set1_epi64(9); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set1_epi16(9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_storeu_epi16() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm_set1_epi16(0); + _mm256_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_storeu_epi16() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_storeu_epi16() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set1_epi16(i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_storeu_epi16() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm256_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_storeu_epi16() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_storeu_epi16() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set1_epi16(u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_storeu_epi16() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm256_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set_epi16( + 0, + 0, + 0, + 0, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_storeu_epi16() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_storeu_epi8() { + let a = _mm512_set1_epi64(9); + let mut r = _mm_set1_epi8(0); + _mm512_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_storeu_epi8() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_storeu_epi8() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_storeu_epi8() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm512_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_storeu_epi8() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_storeu_epi8() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_storeu_epi8() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm512_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_storeu_epi8() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_storeu_epi8() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_storeu_epi32() { + let a = _mm512_set1_epi64(9); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); + let e = _mm256_set1_epi32(9); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_storeu_epi32() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm_set1_epi32(0); + _mm256_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); + let e = _mm_set_epi32(9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_storeu_epi32() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); + let e = _mm_set_epi32(0, 0, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_storeu_epi32() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); + let e = _mm256_set1_epi32(i32::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_storeu_epi32() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi32(0); + _mm256_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00001111, a); + let e = _mm_set1_epi32(i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_storeu_epi32() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00000011, a); + let e = _mm_set_epi32(0, 0, i32::MAX, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_storeu_epi32() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); + let e = _mm256_set1_epi32(u32::MAX as i32); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_storeu_epi32() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi32(0); + _mm256_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00001111, a); + let e = _mm_set1_epi32(u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_storeu_epi32() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00000011, a); + let e = _mm_set_epi32(0, 0, u32::MAX as i32, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_storeu_epi64() { + let a = _mm512_set1_epi64(9); + let mut r = _mm512_set1_epi64(0); + _mm512_storeu_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_storeu_epi64() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm256_set1_epi64x(0); + _mm256_storeu_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_storeu_epi64() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi64x(0); + _mm_storeu_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_load_epi64() { + #[repr(align(64))] + struct Align { + data: [i64; 8], // 64 bytes + } + let a = Align { + data: [4, 3, 2, 5, -8, -9, -64, -50], + }; + let p = (a.data).as_ptr(); + let r = _mm512_load_epi64(black_box(p)); + let e = _mm512_setr_epi64(4, 3, 2, 5, -8, -9, -64, -50); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_load_epi64() { + #[repr(align(64))] + struct Align { + data: [i64; 4], + } + let a = Align { data: [4, 3, 2, 5] }; + let p = (a.data).as_ptr(); + let r = _mm256_load_epi64(black_box(p)); + let e = _mm256_set_epi64x(5, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_load_epi64() { + #[repr(align(64))] + struct Align { + data: [i64; 2], + } + let a = Align { data: [4, 3] }; + let p = (a.data).as_ptr(); + let r = _mm_load_epi64(black_box(p)); + let e = _mm_set_epi64x(3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_store_epi64() { + let a = _mm512_set1_epi64(9); + let mut r = _mm512_set1_epi64(0); + _mm512_store_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_store_epi64() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm256_set1_epi64x(0); + _mm256_store_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_store_epi64() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi64x(0); + _mm_store_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_load_pd() { + #[repr(align(64))] + struct Align { + data: [f64; 8], // 64 bytes + } + let a = Align { + data: [4., 3., 2., 5., -8., -9., -64., -50.], + }; + let p = (a.data).as_ptr(); + let r = _mm512_load_pd(black_box(p)); + let e = _mm512_setr_pd(4., 3., 2., 5., -8., -9., -64., -50.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_store_pd() { + let a = _mm512_set1_pd(9.); + let mut r = _mm512_undefined_pd(); + _mm512_store_pd(&mut r as *mut _ as *mut f64, a); + assert_eq_m512d(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_test_epi64_mask() { + let a = _mm512_set1_epi64(1 << 0); + let b = _mm512_set1_epi64(1 << 0 | 1 << 1); + let r = _mm512_test_epi64_mask(a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_test_epi64_mask() { + let a = _mm512_set1_epi64(1 << 0); + let b = _mm512_set1_epi64(1 << 0 | 1 << 1); + let r = _mm512_mask_test_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_test_epi64_mask(0b11111111, a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_test_epi64_mask() { + let a = _mm256_set1_epi64x(1 << 0); + let b = _mm256_set1_epi64x(1 << 0 | 1 << 1); + let r = _mm256_test_epi64_mask(a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_test_epi64_mask() { + let a = _mm256_set1_epi64x(1 << 0); + let b = _mm256_set1_epi64x(1 << 0 | 1 << 1); + let r = _mm256_mask_test_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_test_epi64_mask(0b00001111, a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_test_epi64_mask() { + let a = _mm_set1_epi64x(1 << 0); + let b = _mm_set1_epi64x(1 << 0 | 1 << 1); + let r = _mm_test_epi64_mask(a, b); + let e: __mmask8 = 0b00000011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_test_epi64_mask() { + let a = _mm_set1_epi64x(1 << 0); + let b = _mm_set1_epi64x(1 << 0 | 1 << 1); + let r = _mm_mask_test_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_test_epi64_mask(0b00000011, a, b); + let e: __mmask8 = 0b00000011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_testn_epi64_mask() { + let a = _mm512_set1_epi64(1 << 0); + let b = _mm512_set1_epi64(1 << 0 | 1 << 1); + let r = _mm512_testn_epi64_mask(a, b); + let e: __mmask8 = 0b00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_testn_epi64_mask() { + let a = _mm512_set1_epi64(1 << 0); + let b = _mm512_set1_epi64(1 << 1); + let r = _mm512_mask_testn_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_testn_epi64_mask(0b11111111, a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_testn_epi64_mask() { + let a = _mm256_set1_epi64x(1 << 0); + let b = _mm256_set1_epi64x(1 << 1); + let r = _mm256_testn_epi64_mask(a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_testn_epi64_mask() { + let a = _mm256_set1_epi64x(1 << 0); + let b = _mm256_set1_epi64x(1 << 1); + let r = _mm256_mask_testn_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_testn_epi64_mask(0b11111111, a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_testn_epi64_mask() { + let a = _mm_set1_epi64x(1 << 0); + let b = _mm_set1_epi64x(1 << 1); + let r = _mm_testn_epi64_mask(a, b); + let e: __mmask8 = 0b00000011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_testn_epi64_mask() { + let a = _mm_set1_epi64x(1 << 0); + let b = _mm_set1_epi64x(1 << 1); + let r = _mm_mask_testn_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_testn_epi64_mask(0b11111111, a, b); + let e: __mmask8 = 0b00000011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_set1_epi64() { + let src = _mm512_set1_epi64(2); + let a: i64 = 11; + let r = _mm512_mask_set1_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_set1_epi64(src, 0b11111111, a); + let e = _mm512_set1_epi64(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_set1_epi64() { + let a: i64 = 11; + let r = _mm512_maskz_set1_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_set1_epi64(0b11111111, a); + let e = _mm512_set1_epi64(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_set1_epi64() { + let src = _mm256_set1_epi64x(2); + let a: i64 = 11; + let r = _mm256_mask_set1_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_set1_epi64(src, 0b00001111, a); + let e = _mm256_set1_epi64x(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_set1_epi64() { + let a: i64 = 11; + let r = _mm256_maskz_set1_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_set1_epi64(0b00001111, a); + let e = _mm256_set1_epi64x(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_set1_epi64() { + let src = _mm_set1_epi64x(2); + let a: i64 = 11; + let r = _mm_mask_set1_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_set1_epi64(src, 0b00000011, a); + let e = _mm_set1_epi64x(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_set1_epi64() { + let a: i64 = 11; + let r = _mm_maskz_set1_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_set1_epi64(0b00000011, a); + let e = _mm_set1_epi64x(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtsd_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtss_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundi64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsi64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundsi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvti64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i64 = 9; + let r = _mm_cvti64_ss(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvti64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i64 = 9; + let r = _mm_cvti64_sd(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_si64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvt_roundsd_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvt_roundsd_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_u64() { + let a = _mm_set_pd(1., f64::MAX); + let r = _mm_cvt_roundsd_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtsd_u64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtsd_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_si64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtss_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvttsd_i64(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_i64::<_MM_FROUND_NO_EXC>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_si64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_si64::<_MM_FROUND_NO_EXC>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_u64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_u64::<_MM_FROUND_NO_EXC>(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttsd_u64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvttsd_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvttss_i64(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_i64::<_MM_FROUND_NO_EXC>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_si64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_si64::<_MM_FROUND_NO_EXC>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_u64::<_MM_FROUND_NO_EXC>(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvttss_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtu64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: u64 = 9; + let r = _mm_cvtu64_ss(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtu64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: u64 = 9; + let r = _mm_cvtu64_sd(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundu64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: u64 = 9; + let r = _mm_cvt_roundu64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundu64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: u64 = 9; + let r = _mm_cvt_roundu64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundi64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsi64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundsi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs new file mode 100644 index 000000000000..955c6ccc7526 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs @@ -0,0 +1,321 @@ +use crate::core_arch::x86::*; +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the +/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements +/// of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsi2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h { + unsafe { vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the +/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements +/// of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundi64_sh(a: __m128h, b: i64) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsi642sh(a, b, ROUNDING) + } +} + +/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the +/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements +/// of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtusi2sh))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h { + unsafe { vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the +/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements +/// of dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sh) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundu64_sh(a: __m128h, b: u64) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtusi642sh(a, b, ROUNDING) + } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store +/// the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2si))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsh_i64(a: __m128h) -> i64 { + unsafe { vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store +/// the result in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundsh_i64(a: __m128h) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsh2si64(a, ROUNDING) + } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store +/// the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2usi))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtsh_u64(a: __m128h) -> u64 { + unsafe { vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store +/// the result in dst. +/// +/// Rounding is done according to the rounding parameter, which can be one of: +/// +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvt_roundsh_u64(a: __m128h) -> u64 { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsh2usi64(a, ROUNDING) + } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation, +/// and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttsh2si))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttsh_i64(a: __m128h) -> i64 { + unsafe { vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation, +/// and store the result in dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtt_roundsh_i64(a: __m128h) -> i64 { + unsafe { + static_assert_sae!(SAE); + vcvttsh2si64(a, SAE) + } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation, +/// and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttsh2usi))] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvttsh_u64(a: __m128h) -> u64 { + unsafe { vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION) } +} + +/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation, +/// and store the result in dst. +/// +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u64) +#[inline] +#[target_feature(enable = "avx512fp16")] +#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_cvtt_roundsh_u64(a: __m128h) -> u64 { + unsafe { + static_assert_sae!(SAE); + vcvttsh2usi64(a, SAE) + } +} + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.avx512fp16.vcvtsi642sh"] + fn vcvtsi642sh(a: __m128h, b: i64, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.vcvtusi642sh"] + fn vcvtusi642sh(a: __m128h, b: u64, rounding: i32) -> __m128h; + #[link_name = "llvm.x86.avx512fp16.vcvtsh2si64"] + fn vcvtsh2si64(a: __m128h, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi64"] + fn vcvtsh2usi64(a: __m128h, rounding: i32) -> u64; + #[link_name = "llvm.x86.avx512fp16.vcvttsh2si64"] + fn vcvttsh2si64(a: __m128h, sae: i32) -> i64; + #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi64"] + fn vcvttsh2usi64(a: __m128h, sae: i32) -> u64; +} + +#[cfg(test)] +mod tests { + use crate::core_arch::{x86::*, x86_64::*}; + use stdarch_test::simd_test; + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvti64_sh() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvti64_sh(a, 10); + let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvt_roundi64_sh() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvt_roundi64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10); + let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvtu64_sh() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtu64_sh(a, 10); + let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16,avx512vl")] + unsafe fn test_mm_cvt_roundu64_sh() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvt_roundu64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10); + let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + assert_eq_m128h(r, e); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsh_i64() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsh_i64(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundsh_i64() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtsh_u64() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsh_u64(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvt_roundsh_u64() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvt_roundsh_u64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvttsh_i64() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvttsh_i64(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtt_roundsh_i64() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvttsh_u64() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvttsh_u64(a); + assert_eq!(r, 1); + } + + #[simd_test(enable = "avx512fp16")] + unsafe fn test_mm_cvtt_roundsh_u64() { + let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtt_roundsh_u64::<_MM_FROUND_NO_EXC>(a); + assert_eq!(r, 1); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs new file mode 100644 index 000000000000..5d204d51ae6d --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs @@ -0,0 +1,183 @@ +//! Bit Manipulation Instruction (BMI) Set 1.0. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions +//! available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Extracts bits in range [`start`, `start` + `length`) from `a` into +/// the least significant bits of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(bextr))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 { + _bextr2_u64(a, ((start & 0xff) | ((len & 0xff) << 8)) as u64) +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits `[7,0]` of `control` specify the index to the first bit in the range +/// to be extracted, and bits `[15,8]` specify the length of the range. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr2_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(bextr))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _bextr2_u64(a: u64, control: u64) -> u64 { + unsafe { x86_bmi_bextr_64(a, control) } +} + +/// Bitwise logical `AND` of inverted `a` with `b`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_andn_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(andn))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _andn_u64(a: u64, b: u64) -> u64 { + !a & b +} + +/// Extracts lowest set isolated bit. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsi_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(blsi))] +#[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _blsi_u64(x: u64) -> u64 { + x & x.wrapping_neg() +} + +/// Gets mask up to lowest set bit. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsmsk_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(blsmsk))] +#[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _blsmsk_u64(x: u64) -> u64 { + x ^ (x.wrapping_sub(1_u64)) +} + +/// Resets the lowest set bit of `x`. +/// +/// If `x` is sets CF. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_blsr_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(blsr))] +#[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _blsr_u64(x: u64) -> u64 { + x & (x.wrapping_sub(1)) +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is `0`, it returns its size in bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_tzcnt_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _tzcnt_u64(x: u64) -> u64 { + x.trailing_zeros() as u64 +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is `0`, it returns its size in bits. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_tzcnt_64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_tzcnt_64(x: u64) -> i64 { + x.trailing_zeros() as i64 +} + +unsafe extern "C" { + #[link_name = "llvm.x86.bmi.bextr.64"] + fn x86_bmi_bextr_64(x: u64, y: u64) -> u64; +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::{x86::*, x86_64::*}; + + #[simd_test(enable = "bmi1")] + unsafe fn test_bextr_u64() { + let r = _bextr_u64(0b0101_0000u64, 4, 4); + assert_eq!(r, 0b0000_0101u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_andn_u64() { + assert_eq!(_andn_u64(0, 0), 0); + assert_eq!(_andn_u64(0, 1), 1); + assert_eq!(_andn_u64(1, 0), 0); + assert_eq!(_andn_u64(1, 1), 0); + + let r = _andn_u64(0b0000_0000u64, 0b0000_0000u64); + assert_eq!(r, 0b0000_0000u64); + + let r = _andn_u64(0b0000_0000u64, 0b1111_1111u64); + assert_eq!(r, 0b1111_1111u64); + + let r = _andn_u64(0b1111_1111u64, 0b0000_0000u64); + assert_eq!(r, 0b0000_0000u64); + + let r = _andn_u64(0b1111_1111u64, 0b1111_1111u64); + assert_eq!(r, 0b0000_0000u64); + + let r = _andn_u64(0b0100_0000u64, 0b0101_1101u64); + assert_eq!(r, 0b0001_1101u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_blsi_u64() { + assert_eq!(_blsi_u64(0b1101_0000u64), 0b0001_0000u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_blsmsk_u64() { + let r = _blsmsk_u64(0b0011_0000u64); + assert_eq!(r, 0b0001_1111u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_blsr_u64() { + // TODO: test the behavior when the input is `0`. + let r = _blsr_u64(0b0011_0000u64); + assert_eq!(r, 0b0010_0000u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_tzcnt_u64() { + assert_eq!(_tzcnt_u64(0b0000_0001u64), 0u64); + assert_eq!(_tzcnt_u64(0b0000_0000u64), 64u64); + assert_eq!(_tzcnt_u64(0b1001_0000u64), 4u64); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs new file mode 100644 index 000000000000..ea9daf88574f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs @@ -0,0 +1,139 @@ +//! Bit Manipulation Instruction (BMI) Set 2.0. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions +//! available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [wikipedia_bmi]: +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Unsigned multiply without affecting flags. +/// +/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with +/// the low half and the high half of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mulx_u64) +#[inline] +#[cfg_attr(test, assert_instr(mul))] +#[target_feature(enable = "bmi2")] +#[cfg(not(target_arch = "x86"))] // calls an intrinsic +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { + let result: u128 = (a as u128) * (b as u128); + *hi = (result >> 64) as u64; + result as u64 +} + +/// Zeroes higher bits of `a` >= `index`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u64) +#[inline] +#[target_feature(enable = "bmi2")] +#[cfg_attr(test, assert_instr(bzhi))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _bzhi_u64(a: u64, index: u32) -> u64 { + unsafe { x86_bmi2_bzhi_64(a, index as u64) } +} + +/// Scatter contiguous low order bits of `a` to the result at the positions +/// specified by the `mask`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u64) +#[inline] +#[target_feature(enable = "bmi2")] +#[cfg_attr(test, assert_instr(pdep))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _pdep_u64(a: u64, mask: u64) -> u64 { + unsafe { x86_bmi2_pdep_64(a, mask) } +} + +/// Gathers the bits of `x` specified by the `mask` into the contiguous low +/// order bit positions of the result. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u64) +#[inline] +#[target_feature(enable = "bmi2")] +#[cfg_attr(test, assert_instr(pext))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _pext_u64(a: u64, mask: u64) -> u64 { + unsafe { x86_bmi2_pext_64(a, mask) } +} + +unsafe extern "C" { + #[link_name = "llvm.x86.bmi.bzhi.64"] + fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64; + #[link_name = "llvm.x86.bmi.pdep.64"] + fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64; + #[link_name = "llvm.x86.bmi.pext.64"] + fn x86_bmi2_pext_64(x: u64, y: u64) -> u64; +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86_64::*; + + #[simd_test(enable = "bmi2")] + unsafe fn test_pext_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0000_0011_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b0001_0111_0100_0011u64; + + assert_eq!(_pext_u64(n, m0), s0); + assert_eq!(_pext_u64(n, m1), s1); + } + + #[simd_test(enable = "bmi2")] + unsafe fn test_pdep_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0010_0000_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b1110_1001_0010_0011u64; + + assert_eq!(_pdep_u64(n, m0), s0); + assert_eq!(_pdep_u64(n, m1), s1); + } + + #[simd_test(enable = "bmi2")] + unsafe fn test_bzhi_u64() { + let n = 0b1111_0010u64; + let s = 0b0001_0010u64; + assert_eq!(_bzhi_u64(n, 5), s); + } + + #[simd_test(enable = "bmi2")] + #[rustfmt::skip] + unsafe fn test_mulx_u64() { + let a: u64 = 9_223_372_036_854_775_800; + let b: u64 = 100; + let mut hi = 0; + let lo = _mulx_u64(a, b, &mut hi); + /* +result = 922337203685477580000 = +0b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000 + ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + assert_eq!( + lo, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64 + ); + assert_eq!(hi, 0b00110001u64); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs new file mode 100644 index 000000000000..62cd2948ce14 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs @@ -0,0 +1,29 @@ +//! Byte swap intrinsics. + +#![allow(clippy::module_name_repetitions)] + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Returns an integer with the reversed byte order of x +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bswap64) +#[inline] +#[cfg_attr(test, assert_instr(bswap))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _bswap64(x: i64) -> i64 { + x.swap_bytes() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bswap64() { + unsafe { + assert_eq!(_bswap64(0x0EADBEEFFADECA0E), 0x0ECADEFAEFBEAD0E); + assert_eq!(_bswap64(0x0000000000000000), 0x0000000000000000); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/bt.rs b/library/stdarch/crates/core_arch/src/x86_64/bt.rs new file mode 100644 index 000000000000..f9aa3e16ccdf --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/bt.rs @@ -0,0 +1,147 @@ +use crate::arch::asm; +#[cfg(test)] +use stdarch_test::assert_instr; + +// x32 wants to use a 32-bit address size, but asm! defaults to using the full +// register name (e.g. rax). We have to explicitly override the placeholder to +// use the 32-bit register name in that case. +#[cfg(target_pointer_width = "32")] +macro_rules! bt { + ($inst:expr) => { + concat!($inst, " {b}, ({p:e})") + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! bt { + ($inst:expr) => { + concat!($inst, " {b}, ({p})") + }; +} + +/// Returns the bit in position `b` of the memory addressed by `p`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bittest64) +#[inline] +#[cfg_attr(test, assert_instr(bt))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittest64(p: *const i64, b: i64) -> u8 { + let r: u8; + asm!( + bt!("btq"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(readonly, nostack, pure, att_syntax) + ); + r +} + +/// Returns the bit in position `b` of the memory addressed by `p`, then sets the bit to `1`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bittestandset64) +#[inline] +#[cfg_attr(test, assert_instr(bts))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittestandset64(p: *mut i64, b: i64) -> u8 { + let r: u8; + asm!( + bt!("btsq"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(nostack, att_syntax) + ); + r +} + +/// Returns the bit in position `b` of the memory addressed by `p`, then resets that bit to `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bittestandreset64) +#[inline] +#[cfg_attr(test, assert_instr(btr))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittestandreset64(p: *mut i64, b: i64) -> u8 { + let r: u8; + asm!( + bt!("btrq"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(nostack, att_syntax) + ); + r +} + +/// Returns the bit in position `b` of the memory addressed by `p`, then inverts that bit. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bittestandcomplement64) +#[inline] +#[cfg_attr(test, assert_instr(btc))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittestandcomplement64(p: *mut i64, b: i64) -> u8 { + let r: u8; + asm!( + bt!("btcq"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(nostack, att_syntax) + ); + r +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86_64::*; + + #[test] + #[cfg_attr(miri, ignore)] // Uses inline assembly + fn test_bittest64() { + unsafe { + let a = 0b0101_0000i64; + assert_eq!(_bittest64(&a as _, 4), 1); + assert_eq!(_bittest64(&a as _, 5), 0); + } + } + + #[test] + #[cfg_attr(miri, ignore)] // Uses inline assembly + fn test_bittestandset64() { + unsafe { + let mut a = 0b0101_0000i64; + assert_eq!(_bittestandset64(&mut a as _, 4), 1); + assert_eq!(_bittestandset64(&mut a as _, 4), 1); + assert_eq!(_bittestandset64(&mut a as _, 5), 0); + assert_eq!(_bittestandset64(&mut a as _, 5), 1); + } + } + + #[test] + #[cfg_attr(miri, ignore)] // Uses inline assembly + fn test_bittestandreset64() { + unsafe { + let mut a = 0b0101_0000i64; + assert_eq!(_bittestandreset64(&mut a as _, 4), 1); + assert_eq!(_bittestandreset64(&mut a as _, 4), 0); + assert_eq!(_bittestandreset64(&mut a as _, 5), 0); + assert_eq!(_bittestandreset64(&mut a as _, 5), 0); + } + } + + #[test] + #[cfg_attr(miri, ignore)] // Uses inline assembly + fn test_bittestandcomplement64() { + unsafe { + let mut a = 0b0101_0000i64; + assert_eq!(_bittestandcomplement64(&mut a as _, 4), 1); + assert_eq!(_bittestandcomplement64(&mut a as _, 4), 0); + assert_eq!(_bittestandcomplement64(&mut a as _, 4), 1); + assert_eq!(_bittestandcomplement64(&mut a as _, 5), 0); + assert_eq!(_bittestandcomplement64(&mut a as _, 5), 1); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs new file mode 100644 index 000000000000..46a008245bf8 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs @@ -0,0 +1,55 @@ +use crate::sync::atomic::Ordering; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Compares and exchange 16 bytes (128 bits) of data atomically. +/// +/// This intrinsic corresponds to the `cmpxchg16b` instruction on `x86_64` +/// processors. It performs an atomic compare-and-swap, updating the `ptr` +/// memory location to `val` if the current value in memory equals `old`. +/// +/// # Return value +/// +/// This function returns the previous value at the memory location. If it is +/// equal to `old` then the memory was updated to `new`. +/// +/// # Memory Orderings +/// +/// This atomic operation has the same semantics of memory orderings as +/// `AtomicUsize::compare_exchange` does, only operating on 16 bytes of memory +/// instead of just a pointer. +/// +/// The failure ordering must be [`Ordering::SeqCst`], [`Ordering::Acquire`] or +/// [`Ordering::Relaxed`]. +/// +/// For more information on memory orderings here see the `compare_exchange` +/// documentation for other `Atomic*` types in the standard library. +/// +/// # Unsafety +/// +/// This method is unsafe because it takes a raw pointer and will attempt to +/// read and possibly write the memory at the pointer. The pointer must also be +/// aligned on a 16-byte boundary. +/// +/// This method also requires the `cmpxchg16b` CPU feature to be available at +/// runtime to work correctly. If the CPU running the binary does not actually +/// support `cmpxchg16b` and the program enters an execution path that +/// eventually would reach this function the behavior is undefined. +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +#[cfg_attr(test, assert_instr(cmpxchg16b, success = Ordering::SeqCst, failure = Ordering::SeqCst))] +#[target_feature(enable = "cmpxchg16b")] +#[stable(feature = "cmpxchg16b_intrinsic", since = "1.67.0")] +pub unsafe fn cmpxchg16b( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> u128 { + debug_assert!(dst as usize % 16 == 0); + + let res = crate::sync::atomic::atomic_compare_exchange(dst, old, new, success, failure); + res.unwrap_or_else(|x| x) +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs new file mode 100644 index 000000000000..a24b44fb1f7e --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs @@ -0,0 +1,88 @@ +//! FXSR floating-point context fast save and restore. + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.fxsave64"] + fn fxsave64(p: *mut u8); + #[link_name = "llvm.x86.fxrstor64"] + fn fxrstor64(p: *const u8); +} + +/// Saves the `x87` FPU, `MMX` technology, `XMM`, and `MXCSR` registers to the +/// 512-byte-long 16-byte-aligned memory region `mem_addr`. +/// +/// A misaligned destination operand raises a general-protection (#GP) or an +/// alignment check exception (#AC). +/// +/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. +/// +/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html +/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_fxsave64) +#[inline] +#[target_feature(enable = "fxsr")] +#[cfg_attr(test, assert_instr(fxsave64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _fxsave64(mem_addr: *mut u8) { + fxsave64(mem_addr) +} + +/// Restores the `XMM`, `MMX`, `MXCSR`, and `x87` FPU registers from the +/// 512-byte-long 16-byte-aligned memory region `mem_addr`. +/// +/// The contents of this memory region should have been written to by a +/// previous +/// `_fxsave` or `_fxsave64` intrinsic. +/// +/// A misaligned destination operand raises a general-protection (#GP) or an +/// alignment check exception (#AC). +/// +/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. +/// +/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html +/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_fxrstor64) +#[inline] +#[target_feature(enable = "fxsr")] +#[cfg_attr(test, assert_instr(fxrstor64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _fxrstor64(mem_addr: *const u8) { + fxrstor64(mem_addr) +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86_64::*; + use std::{cmp::PartialEq, fmt}; + use stdarch_test::simd_test; + + #[repr(align(16))] + struct FxsaveArea { + data: [u8; 512], // 512 bytes + } + + impl FxsaveArea { + fn new() -> FxsaveArea { + FxsaveArea { data: [0; 512] } + } + fn ptr(&mut self) -> *mut u8 { + self.data.as_mut_ptr() + } + } + + #[simd_test(enable = "fxsr")] + #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri + unsafe fn test_fxsave64() { + let mut a = FxsaveArea::new(); + let mut b = FxsaveArea::new(); + + fxsr::_fxsave64(a.ptr()); + fxsr::_fxrstor64(a.ptr()); + fxsr::_fxsave64(b.ptr()); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/macros.rs b/library/stdarch/crates/core_arch/src/x86_64/macros.rs new file mode 100644 index 000000000000..53f1d02bd368 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/macros.rs @@ -0,0 +1,35 @@ +//! Utility macros. + +// Helper macro used to trigger const eval errors when the const generic immediate value `imm` is +// not a round number. +#[allow(unused)] +macro_rules! static_assert_rounding { + ($imm:ident) => { + static_assert!( + $imm == 4 || $imm == 8 || $imm == 9 || $imm == 10 || $imm == 11, + "Invalid IMM value" + ) + }; +} + +// Helper macro used to trigger const eval errors when the const generic immediate value `imm` is +// not a sae number. +#[allow(unused)] +macro_rules! static_assert_sae { + ($imm:ident) => { + static_assert!($imm == 4 || $imm == 8, "Invalid IMM value") + }; +} + +#[cfg(target_pointer_width = "32")] +macro_rules! vps { + ($inst1:expr, $inst2:expr) => { + concat!($inst1, " [{p:e}]", $inst2) + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! vps { + ($inst1:expr, $inst2:expr) => { + concat!($inst1, " [{p}]", $inst2) + }; +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/mod.rs b/library/stdarch/crates/core_arch/src/x86_64/mod.rs new file mode 100644 index 000000000000..7d681882bef0 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/mod.rs @@ -0,0 +1,83 @@ +//! `x86_64` intrinsics + +#[macro_use] +mod macros; + +mod fxsr; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::fxsr::*; + +mod sse; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse::*; + +mod sse2; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse2::*; + +mod sse41; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse41::*; + +mod sse42; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::sse42::*; + +mod xsave; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::xsave::*; + +mod abm; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::abm::*; + +mod avx; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::avx::*; + +mod bmi; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::bmi::*; +mod bmi2; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::bmi2::*; + +mod tbm; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::tbm::*; + +mod avx512f; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512f::*; + +mod avx512bw; +#[stable(feature = "stdarch_x86_avx512", since = "1.89")] +pub use self::avx512bw::*; + +mod bswap; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::bswap::*; + +mod rdrand; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::rdrand::*; + +mod cmpxchg16b; +#[stable(feature = "cmpxchg16b_intrinsic", since = "1.67.0")] +pub use self::cmpxchg16b::*; + +mod adx; +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub use self::adx::*; + +mod bt; +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub use self::bt::*; + +mod avx512fp16; +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub use self::avx512fp16::*; + +mod amx; +#[unstable(feature = "x86_amx_intrinsics", issue = "126622")] +pub use self::amx::*; diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs new file mode 100644 index 000000000000..42e907b4e478 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs @@ -0,0 +1,44 @@ +//! RDRAND and RDSEED instructions for returning random numbers from an Intel +//! on-chip hardware random number generator which has been seeded by an +//! on-chip entropy source. + +#![allow(clippy::module_name_repetitions)] + +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { + #[link_name = "llvm.x86.rdrand.64"] + fn x86_rdrand64_step() -> (u64, i32); + #[link_name = "llvm.x86.rdseed.64"] + fn x86_rdseed64_step() -> (u64, i32); +} + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Read a hardware generated 64-bit random value and store the result in val. +/// Returns 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdrand64_step) +#[inline] +#[target_feature(enable = "rdrand")] +#[cfg_attr(test, assert_instr(rdrand))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 { + let (v, flag) = x86_rdrand64_step(); + *val = v; + flag +} + +/// Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store +/// in val. Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_rdseed64_step) +#[inline] +#[target_feature(enable = "rdseed")] +#[cfg_attr(test, assert_instr(rdseed))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _rdseed64_step(val: &mut u64) -> i32 { + let (v, flag) = x86_rdseed64_step(); + *val = v; + flag +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse.rs b/library/stdarch/crates/core_arch/src/x86_64/sse.rs new file mode 100644 index 000000000000..863c3cd2e701 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/sse.rs @@ -0,0 +1,145 @@ +//! `x86_64` Streaming SIMD Extensions (SSE) + +use crate::core_arch::x86::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.sse.cvtss2si64"] + fn cvtss2si64(a: __m128) -> i64; + #[link_name = "llvm.x86.sse.cvttss2si64"] + fn cvttss2si64(a: __m128) -> i64; + #[link_name = "llvm.x86.sse.cvtsi642ss"] + fn cvtsi642ss(a: __m128, b: i64) -> __m128; +} + +/// Converts the lowest 32 bit float in the input vector to a 64 bit integer. +/// +/// The result is rounded according to the current rounding mode. If the result +/// cannot be represented as a 64 bit integer the result will be +/// `0x8000_0000_0000_0000` (`i64::MIN`) or trigger an invalid operation +/// floating point exception if unmasked (see +/// [`_mm_setcsr`](fn._mm_setcsr.html)). +/// +/// This corresponds to the `CVTSS2SI` instruction (with 64 bit output). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si64) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvtss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtss_si64(a: __m128) -> i64 { + unsafe { cvtss2si64(a) } +} + +/// Converts the lowest 32 bit float in the input vector to a 64 bit integer +/// with truncation. +/// +/// The result is rounded always using truncation (round towards zero). If the +/// result cannot be represented as a 64 bit integer the result will be +/// `0x8000_0000_0000_0000` (`i64::MIN`) or an invalid operation floating +/// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). +/// +/// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si64) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvttss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvttss_si64(a: __m128) -> i64 { + unsafe { cvttss2si64(a) } +} + +/// Converts a 64 bit integer to a 32 bit float. The result vector is the input +/// vector `a` with the lowest 32 bit float replaced by the converted integer. +/// +/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit +/// input). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvtsi2ss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { + unsafe { cvtsi642ss(a, b) } +} + +#[cfg(test)] +mod tests { + use crate::core_arch::arch::x86_64::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvtss_si64() { + let inputs = &[ + (42.0f32, 42i64), + (-31.4, -31), + (-33.5, -34), + (-34.5, -34), + (4.0e10, 40_000_000_000), + (4.0e-10, 0), + (f32::NAN, i64::MIN), + (2147483500.1, 2147483520), + (9.223371e18, 9223370937343148032), + ]; + for (i, &(xi, e)) in inputs.iter().enumerate() { + let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); + let r = _mm_cvtss_si64(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", + i, x, r, e + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvttss_si64() { + let inputs = &[ + (42.0f32, 42i64), + (-31.4, -31), + (-33.5, -33), + (-34.5, -34), + (10.999, 10), + (-5.99, -5), + (4.0e10, 40_000_000_000), + (4.0e-10, 0), + (f32::NAN, i64::MIN), + (2147483500.1, 2147483520), + (9.223371e18, 9223370937343148032), + (9.223372e18, i64::MIN), + ]; + for (i, &(xi, e)) in inputs.iter().enumerate() { + let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); + let r = _mm_cvttss_si64(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", + i, x, r, e + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvtsi64_ss() { + let inputs = &[ + (4555i64, 4555.0f32), + (322223333, 322223330.0), + (-432, -432.0), + (-322223333, -322223330.0), + (9223372036854775807, 9.223372e18), + (-9223372036854775808, -9.223372e18), + ]; + + for &(x, f) in inputs { + let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsi64_ss(a, x); + let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); + assert_eq_m128(e, r); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs new file mode 100644 index 000000000000..475e2d2a83cc --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs @@ -0,0 +1,224 @@ +//! `x86_64`'s Streaming SIMD Extensions 2 (SSE2) + +use crate::core_arch::x86::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.sse2.cvtsd2si64"] + fn cvtsd2si64(a: __m128d) -> i64; + #[link_name = "llvm.x86.sse2.cvttsd2si64"] + fn cvttsd2si64(a: __m128d) -> i64; +} + +/// Converts the lower double-precision (64-bit) floating-point element in a to +/// a 64-bit integer. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsd_si64(a: __m128d) -> i64 { + unsafe { cvtsd2si64(a) } +} + +/// Alias for `_mm_cvtsd_si64` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64x) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsd_si64x(a: __m128d) -> i64 { + _mm_cvtsd_si64(a) +} + +/// Converts the lower double-precision (64-bit) floating-point element in `a` +/// to a 64-bit integer with truncation. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvttsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvttsd_si64(a: __m128d) -> i64 { + unsafe { cvttsd2si64(a) } +} + +/// Alias for `_mm_cvttsd_si64` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64x) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvttsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvttsd_si64x(a: __m128d) -> i64 { + _mm_cvttsd_si64(a) +} + +/// Stores a 64-bit integer value in the specified memory location. +/// To minimize caching, the data is flagged as non-temporal (unlikely to be +/// used again soon). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64) +/// +/// # Safety of non-temporal stores +/// +/// After using this intrinsic, but before any other access to the memory that this intrinsic +/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In +/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they +/// return. +/// +/// See [`_mm_sfence`] for details. +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movnti))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) { + crate::arch::asm!( + vps!("movnti", ",{a}"), + p = in(reg) mem_addr, + a = in(reg) a, + options(nostack, preserves_flags), + ); +} + +/// Returns a vector whose lowest element is `a` and all higher elements are +/// `0`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi64_si128(a: i64) -> __m128i { + _mm_set_epi64x(0, a) +} + +/// Returns a vector whose lowest element is `a` and all higher elements are +/// `0`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi64x_si128(a: i64) -> __m128i { + _mm_cvtsi64_si128(a) +} + +/// Returns the lowest element of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi128_si64(a: __m128i) -> i64 { + unsafe { simd_extract!(a.as_i64x2(), 0) } +} + +/// Returns the lowest element of `a`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { + _mm_cvtsi128_si64(a) +} + +/// Returns `a` with its lower element replaced by `b` after converting it to +/// an `f64`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsi2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { + unsafe { simd_insert!(a, 0, b as f64) } +} + +/// Returns `a` with its lower element replaced by `b` after converting it to +/// an `f64`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsi2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d { + _mm_cvtsi64_sd(a, b) +} + +#[cfg(test)] +mod tests { + use crate::core_arch::arch::x86_64::*; + use std::boxed; + use std::ptr; + use stdarch_test::simd_test; + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsd_si64() { + let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0)); + assert_eq!(r, -2_i64); + + let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq!(r, i64::MIN); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsd_si64x() { + let r = _mm_cvtsd_si64x(_mm_setr_pd(f64::NAN, f64::NAN)); + assert_eq!(r, i64::MIN); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvttsd_si64() { + let a = _mm_setr_pd(-1.1, 2.2); + let r = _mm_cvttsd_si64(a); + assert_eq!(r, -1_i64); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvttsd_si64x() { + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttsd_si64x(a); + assert_eq!(r, i64::MIN); + } + + #[simd_test(enable = "sse2")] + // Miri cannot support this until it is clear how it fits in the Rust memory model + // (non-temporal store) + #[cfg_attr(miri, ignore)] + unsafe fn test_mm_stream_si64() { + let a: i64 = 7; + let mut mem = boxed::Box::::new(-1); + _mm_stream_si64(ptr::addr_of_mut!(*mem), a); + assert_eq!(a, *mem); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsi64_si128() { + let r = _mm_cvtsi64_si128(5); + assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsi128_si64() { + let r = _mm_cvtsi128_si64(_mm_setr_epi64x(5, 0)); + assert_eq!(r, 5); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsi64_sd() { + let a = _mm_set1_pd(3.5); + let r = _mm_cvtsi64_sd(a, 5); + assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs new file mode 100644 index 000000000000..4b7d25f2144b --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs @@ -0,0 +1,59 @@ +//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1) + +use crate::{core_arch::x86::*, mem::transmute}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Extracts an 64-bit integer from `a` selected with `IMM1` +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pextrq, IMM1 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_extract_epi64(a: __m128i) -> i64 { + static_assert_uimm_bits!(IMM1, 1); + unsafe { simd_extract!(a.as_i64x2(), IMM1 as u32) } +} + +/// Returns a copy of `a` with the 64-bit integer from `i` inserted at a +/// location specified by `IMM1`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pinsrq, IMM1 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_insert_epi64(a: __m128i, i: i64) -> __m128i { + static_assert_uimm_bits!(IMM1, 1); + unsafe { transmute(simd_insert!(a.as_i64x2(), IMM1 as u32, i)) } +} + +#[cfg(test)] +mod tests { + use crate::core_arch::arch::x86_64::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_extract_epi64() { + let a = _mm_setr_epi64x(0, 1); + let r = _mm_extract_epi64::<1>(a); + assert_eq!(r, 1); + let r = _mm_extract_epi64::<0>(a); + assert_eq!(r, 0); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_insert_epi64() { + let a = _mm_set1_epi64x(0); + let e = _mm_setr_epi64x(0, 32); + let r = _mm_insert_epi64::<1>(a, 32); + assert_eq_m128i(r, e); + let e = _mm_setr_epi64x(32, 0); + let r = _mm_insert_epi64::<0>(a, 32); + assert_eq_m128i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse42.rs b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs new file mode 100644 index 000000000000..64a23b2b1921 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs @@ -0,0 +1,37 @@ +//! `x86_64`'s Streaming SIMD Extensions 4.2 (SSE4.2) + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.sse42.crc32.64.64"] + fn crc32_64_64(crc: u64, v: u64) -> u64; +} + +/// Starting with the initial value in `crc`, return the accumulated +/// CRC32-C value for unsigned 64-bit integer `v`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u64) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(crc32))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub fn _mm_crc32_u64(crc: u64, v: u64) -> u64 { + unsafe { crc32_64_64(crc, v) } +} + +#[cfg(test)] +mod tests { + use crate::core_arch::arch::x86_64::*; + + use stdarch_test::simd_test; + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_crc32_u64() { + let crc = 0x7819dccd3e824; + let v = 0x2a22b845fed; + let i = _mm_crc32_u64(crc, v); + assert_eq!(i, 0xbb6cdc6c); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/tbm.rs b/library/stdarch/crates/core_arch/src/x86_64/tbm.rs new file mode 100644 index 000000000000..002e0059160b --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/tbm.rs @@ -0,0 +1,225 @@ +//! Trailing Bit Manipulation (TBM) instruction set. +//! +//! The reference is [AMD64 Architecture Programmer's Manual, Volume 3: +//! General-Purpose and System Instructions][amd64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the available +//! instructions. +//! +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wikipedia_bmi]: +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +unsafe extern "C" { + #[link_name = "llvm.x86.tbm.bextri.u64"] + fn bextri_u64(a: u64, control: u64) -> u64; +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits `[7,0]` of `control` specify the index to the first bit in the range to +/// be extracted, and bits `[15,8]` specify the length of the range. For any bit +/// position in the specified range that lie beyond the MSB of the source operand, +/// zeroes will be written. If the range is empty, the result is zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86_updates", since = "1.82.0")] +pub unsafe fn _bextri_u64(a: u64) -> u64 { + static_assert_uimm_bits!(CONTROL, 16); + unsafe { bextri_u64(a, CONTROL) } +} + +/// Clears all bits below the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcfill))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcfill_u64(x: u64) -> u64 { + x & x.wrapping_add(1) +} + +/// Sets all bits of `x` to 1 except for the least significant zero bit. +/// +/// If there is no zero bit in `x`, it sets all bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blci))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blci_u64(x: u64) -> u64 { + x | !x.wrapping_add(1) +} + +/// Sets the least significant zero bit of `x` and clears all other bits. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcic))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcic_u64(x: u64) -> u64 { + !x & x.wrapping_add(1) +} + +/// Sets the least significant zero bit of `x` and clears all bits above +/// that bit. +/// +/// If there is no zero bit in `x`, it sets all the bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcmsk_u64(x: u64) -> u64 { + x ^ x.wrapping_add(1) +} + +/// Sets the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns `x`. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcs))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcs_u64(x: u64) -> u64 { + x | x.wrapping_add(1) +} + +/// Sets all bits of `x` below the least significant one. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blsfill))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blsfill_u64(x: u64) -> u64 { + x | x.wrapping_sub(1) +} + +/// Clears least significant bit and sets all other bits. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blsic))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blsic_u64(x: u64) -> u64 { + !x | x.wrapping_sub(1) +} + +/// Clears all bits below the least significant zero of `x` and sets all other +/// bits. +/// +/// If the least significant bit of `x` is `0`, it sets all bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(t1mskc))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _t1mskc_u64(x: u64) -> u64 { + !x | x.wrapping_add(1) +} + +/// Sets all bits below the least significant one of `x` and clears all other +/// bits. +/// +/// If the least significant bit of `x` is 1, it returns zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(tzmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _tzmsk_u64(x: u64) -> u64 { + !x & x.wrapping_sub(1) +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86_64::*; + + #[simd_test(enable = "tbm")] + unsafe fn test_bextri_u64() { + assert_eq!(_bextri_u64::<0x0404>(0b0101_0000u64), 0b0000_0101u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcfill_u64() { + assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64); + assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blci_u64() { + assert_eq!( + _blci_u64(0b0101_0000u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64 + ); + assert_eq!( + _blci_u64(0b1111_1111u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcic_u64() { + assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64); + assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcmsk_u64() { + assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64); + assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcs_u64() { + assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64); + assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blsfill_u64() { + assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64); + assert_eq!( + _blsfill_u64(0u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blsic_u64() { + assert_eq!( + _blsic_u64(0b0101_0100u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64 + ); + assert_eq!( + _blsic_u64(0u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_t1mskc_u64() { + assert_eq!( + _t1mskc_u64(0b0101_0111u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64 + ); + assert_eq!( + _t1mskc_u64(0u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_tzmsk_u64() { + assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64); + assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs new file mode 100644 index 000000000000..ca2367307f8d --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs @@ -0,0 +1,187 @@ +//! `x86_64`'s `xsave` and `xsaveopt` target feature intrinsics + +#![allow(clippy::module_name_repetitions)] + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.xsave64"] + fn xsave64(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xrstor64"] + fn xrstor64(p: *const u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xsaveopt64"] + fn xsaveopt64(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xsavec64"] + fn xsavec64(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xsaves64"] + fn xsaves64(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xrstors64"] + fn xrstors64(p: *const u8, hi: u32, lo: u32); +} + +/// Performs a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits `[62:0]` in `save_mask` and XCR0. +/// `mem_addr` must be aligned on a 64-byte boundary. +/// +/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of +/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsave64) +#[inline] +#[target_feature(enable = "xsave")] +#[cfg_attr(test, assert_instr(xsave64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) { + xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial restore of the enabled processor states using +/// the state information stored in memory at `mem_addr`. +/// +/// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xrstor64) +#[inline] +#[target_feature(enable = "xsave")] +#[cfg_attr(test, assert_instr(xrstor64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) { + xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); +} + +/// Performs a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`. +/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize +/// the manner in which data is saved. The performance of this instruction will +/// be equal to or better than using the `XSAVE64` instruction. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsaveopt64) +#[inline] +#[target_feature(enable = "xsave,xsaveopt")] +#[cfg_attr(test, assert_instr(xsaveopt64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) { + xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial save of the enabled processor states to memory +/// at `mem_addr`. +/// +/// `xsavec` differs from `xsave` in that it uses compaction and that it may +/// use init optimization. State is saved based on bits `[62:0]` in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsavec64) +#[inline] +#[target_feature(enable = "xsave,xsavec")] +#[cfg_attr(test, assert_instr(xsavec64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) { + xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial save of the enabled processor states to memory at +/// `mem_addr` +/// +/// `xsaves` differs from xsave in that it can save state components +/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the +/// modified optimization. State is saved based on bits `[62:0]` in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xsaves64) +#[inline] +#[target_feature(enable = "xsave,xsaves")] +#[cfg_attr(test, assert_instr(xsaves64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) { + xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial restore of the enabled processor states using the +/// state information stored in memory at `mem_addr`. +/// +/// `xrstors` differs from `xrstor` in that it can restore state components +/// corresponding to bits set in the `IA32_XSS` `MSR`; `xrstors` cannot restore +/// from an `xsave` area in which the extended region is in the standard form. +/// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_xrstors64) +#[inline] +#[target_feature(enable = "xsave,xsaves")] +#[cfg_attr(test, assert_instr(xrstors64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) { + xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86_64::xsave; + use std::fmt; + use stdarch_test::simd_test; + + #[repr(align(64))] + #[derive(Debug)] + struct XsaveArea { + // max size for 256-bit registers is 800 bytes: + // see https://software.intel.com/en-us/node/682996 + // max size for 512-bit registers is 2560 bytes: + // FIXME: add source + data: [u8; 2560], + } + + impl XsaveArea { + fn new() -> XsaveArea { + XsaveArea { data: [0; 2560] } + } + fn ptr(&mut self) -> *mut u8 { + self.data.as_mut_ptr() + } + } + + #[simd_test(enable = "xsave")] + #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri + unsafe fn test_xsave64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + xsave::_xsave64(a.ptr(), m); + xsave::_xrstor64(a.ptr(), m); + xsave::_xsave64(b.ptr(), m); + } + + #[simd_test(enable = "xsave,xsaveopt")] + #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri + unsafe fn test_xsaveopt64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + xsave::_xsaveopt64(a.ptr(), m); + xsave::_xrstor64(a.ptr(), m); + xsave::_xsaveopt64(b.ptr(), m); + } + + #[simd_test(enable = "xsave,xsavec")] + #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri + unsafe fn test_xsavec64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + xsave::_xsavec64(a.ptr(), m); + xsave::_xrstor64(a.ptr(), m); + xsave::_xsavec64(b.ptr(), m); + } +} diff --git a/library/stdarch/crates/intrinsic-test/Cargo.toml b/library/stdarch/crates/intrinsic-test/Cargo.toml new file mode 100644 index 000000000000..06051abc8d0d --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "intrinsic-test" +version = "0.1.0" +authors = ["Jamie Cunliffe ", + "James McGregor ", + "James Barford-Evans " + ] +license = "MIT OR Apache-2.0" +edition = "2024" + +[dependencies] +lazy_static = "1.4.0" +serde = { version = "1", features = ["derive"] } +serde_json = "1.0" +csv = "1.1" +clap = { version = "4.4", features = ["derive"] } +regex = "1.4.2" +log = "0.4.11" +pretty_env_logger = "0.5.0" +rayon = "1.5.0" +diff = "0.1.12" +itertools = "0.14.0" diff --git a/library/stdarch/crates/intrinsic-test/LICENSE-APACHE b/library/stdarch/crates/intrinsic-test/LICENSE-APACHE new file mode 100644 index 000000000000..16fe87b06e80 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/library/stdarch/crates/intrinsic-test/LICENSE-MIT b/library/stdarch/crates/intrinsic-test/LICENSE-MIT new file mode 100644 index 000000000000..ef223ae2c7c0 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2021-2023 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/library/stdarch/crates/intrinsic-test/README.md b/library/stdarch/crates/intrinsic-test/README.md new file mode 100644 index 000000000000..260d59fca80f --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/README.md @@ -0,0 +1,23 @@ +Generate and run programs using equivalent C and Rust intrinsics, checking that +each produces the same result from random inputs. + +# Usage +``` +USAGE: + intrinsic-test [FLAGS] [OPTIONS] + +FLAGS: + --a32 Run tests for A32 instrinsics instead of A64 + --generate-only Regenerate test programs, but don't build or run them + -h, --help Prints help information + -V, --version Prints version information + +OPTIONS: + --cppcompiler The C++ compiler to use for compiling the c++ code [default: clang++] + --runner Run the C programs under emulation with this command + --skip Filename for a list of intrinsics to skip (one per line) + --toolchain The rust toolchain to use for building the rust code + +ARGS: + The input file containing the intrinsics +``` diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64.txt new file mode 100644 index 000000000000..bbcfc40c69ab --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/missing_aarch64.txt @@ -0,0 +1,64 @@ +# Not implemented in stdarch yet +vbfdot_f32 +vbfdot_lane_f32 +vbfdot_laneq_f32 +vbfdotq_f32 +vbfdotq_lane_f32 +vbfdotq_laneq_f32 +vbfmlalbq_f32 +vbfmlalbq_lane_f32 +vbfmlalbq_laneq_f32 +vbfmlaltq_f32 +vbfmlaltq_lane_f32 +vbfmlaltq_laneq_f32 +vbfmmlaq_f32 + + +# Implemented in stdarch, but missing in Clang. +vrnd32xq_f64 +vrnd32zq_f64 +vrnd64xq_f64 +vrnd64zq_f64 +vamin_f32 +vaminq_f32 +vaminq_f64 +vamax_f32 +vamaxq_f32 +vamaxq_f64 +# LLVM select error, and missing in Clang. +vrnd32x_f64 +vrnd32z_f64 +vrnd64x_f64 +vrnd64z_f64 +vluti2_lane_p16 +vluti2_lane_p8 +vluti2_lane_s16 +vluti2_lane_s8 +vluti2_lane_u16 +vluti2_lane_u8 +vluti2q_lane_p16 +vluti2q_lane_p8 +vluti2q_lane_s16 +vluti2q_lane_s8 +vluti2q_lane_u16 +vluti2q_lane_u8 +vluti4q_lane_f16_x2 +vluti4q_lane_p16_x2 +vluti4q_lane_p8 +vluti4q_lane_s16_x2 +vluti4q_lane_s8 +vluti4q_lane_u16_x2 +vluti4q_lane_u8 +vluti4q_laneq_f16_x2 +vluti4q_laneq_p16_x2 +vluti4q_laneq_p8 +vluti4q_laneq_s16_x2 +vluti4q_laneq_s8 +vluti4q_laneq_u16_x2 +vluti4q_laneq_u8 + +# Broken in Clang +vcvth_s16_f16 +# FIXME: Broken output due to missing f16 printing support in Rust, see git blame for this line +vmulh_lane_f16 +vmulh_laneq_f16 diff --git a/library/stdarch/crates/intrinsic-test/missing_arm.txt b/library/stdarch/crates/intrinsic-test/missing_arm.txt new file mode 100644 index 000000000000..04c09a27d90d --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/missing_arm.txt @@ -0,0 +1,320 @@ +# Not implemented in stdarch yet +vbfdot_f32 +vbfdot_lane_f32 +vbfdot_laneq_f32 +vbfdotq_f32 +vbfdotq_lane_f32 +vbfdotq_laneq_f32 +vbfmlalbq_f32 +vbfmlalbq_lane_f32 +vbfmlalbq_laneq_f32 +vbfmlaltq_f32 +vbfmlaltq_lane_f32 +vbfmlaltq_laneq_f32 +vbfmmlaq_f32 + +# Implemented in Clang and stdarch for A64 only even though CSV claims A32 support +vaddq_p64 +vbsl_p64 +vbslq_p64 +vceq_p64 +vceqq_p64 +vceqz_p64 +vceqzq_p64 +vcombine_p64 +vcopy_lane_p64 +vcopy_laneq_p64 +vcopyq_lane_p64 +vcopyq_laneq_p64 +vcreate_p64 +vdup_lane_p64 +vdup_n_p64 +vdupq_lane_p64 +vdupq_n_p64 +vext_p64 +vextq_p64 +vget_high_p64 +vget_lane_p64 +vget_low_p64 +vgetq_lane_p64 +vmovn_high_s16 +vmovn_high_s32 +vmovn_high_s64 +vmovn_high_u16 +vmovn_high_u32 +vmovn_high_u64 +vmull_high_p64 +vmull_p64 +vreinterpret_p16_p64 +vreinterpret_p64_f32 +vreinterpret_p64_p16 +vreinterpret_p64_p8 +vreinterpret_p64_s16 +vreinterpret_p64_s32 +vreinterpret_p64_s8 +vreinterpret_p64_u16 +vreinterpret_p64_u32 +vreinterpret_p64_u64 +vreinterpret_p64_u8 +vreinterpret_p8_p64 +vreinterpretq_f64_u64 +vreinterpretq_p128_f32 +vreinterpretq_p128_p16 +vreinterpretq_p128_p8 +vreinterpretq_p128_s16 +vreinterpretq_p128_s32 +vreinterpretq_p128_s64 +vreinterpretq_p128_s8 +vreinterpretq_p128_u16 +vreinterpretq_p128_u32 +vreinterpretq_p128_u64 +vreinterpretq_p128_u8 +vreinterpretq_p16_p64 +vreinterpretq_p64_f32 +vreinterpretq_p64_p16 +vreinterpretq_p64_p8 +vreinterpretq_p64_s16 +vreinterpretq_p64_s32 +vreinterpretq_p64_s64 +vreinterpretq_p64_s8 +vreinterpretq_p64_u16 +vreinterpretq_p64_u32 +vreinterpretq_p64_u64 +vreinterpretq_p64_u8 +vreinterpretq_p8_p64 +vreinterpretq_s16_p64 +vreinterpretq_s32_p64 +vreinterpretq_s64_p64 +vreinterpretq_s8_p64 +vreinterpretq_u16_p64 +vreinterpretq_u32_p64 +vreinterpretq_u64_p64 +vreinterpretq_u8_p64 +vreinterpret_s16_p64 +vreinterpret_s32_p64 +vreinterpret_s64_p64 +vreinterpret_s8_p64 +vreinterpret_u16_p64 +vreinterpret_u32_p64 +vreinterpret_u64_p64 +vreinterpret_u8_p64 +vrndn_f64 +vrndnq_f64 +vset_lane_p64 +vsetq_lane_p64 +vsli_n_p64 +vsliq_n_p64 +vsri_n_p64 +vsriq_n_p64 +vtst_p64 +vtstq_p64 +vaddh_f16 +vsubh_f16 +vabsh_f16 +vdivh_f16 +vmulh_f16 +vfmsh_f16 +vfmah_f16 +vminnmh_f16 +vmaxnmh_f16 +vrndh_f16 +vrndnh_f16 +vrndih_f16 +vrndah_f16 +vrndph_f16 +vrndmh_f16 +vrndxh_f16 +vsqrth_f16 +vnegh_f16 +vcvth_f16_s32 +vcvth_s32_f16 +vcvth_n_f16_s32 +vcvth_n_s32_f16 +vcvth_f16_u32 +vcvth_u32_f16 +vcvth_n_f16_u32 +vcvth_n_u32_f16 +vcvtah_s32_f16 +vcvtah_u32_f16 +vcvtmh_s32_f16 +vcvtmh_u32_f16 +vcvtpq_s16_f16 +vcvtpq_u16_f16 +vcvtp_s16_f16 +vcvtp_u16_f16 +vcvtph_s32_f16 +vcvtph_u32_f16 +vcvtnh_u32_f16 +vcvtnh_s32_f16 +vfmlsl_low_f16 +vfmlslq_low_f16 +vfmlsl_high_f16 +vfmlslq_high_f16 +vfmlsl_lane_high_f16 +vfmlsl_laneq_high_f16 +vfmlslq_lane_high_f16 +vfmlslq_laneq_high_f16 +vfmlsl_lane_low_f16 +vfmlsl_laneq_low_f16 +vfmlslq_lane_low_f16 +vfmlslq_laneq_low_f16 +vfmlal_low_f16 +vfmlalq_low_f16 +vfmlal_high_f16 +vfmlalq_high_f16 +vfmlal_lane_low_f16 +vfmlal_laneq_low_f16 +vfmlalq_lane_low_f16 +vfmlalq_laneq_low_f16 +vfmlal_lane_high_f16 +vfmlal_laneq_high_f16 +vfmlalq_lane_high_f16 +vfmlalq_laneq_high_f16 +vreinterpret_f16_p64 +vreinterpretq_f16_p64 +vreinterpret_p64_f16 +vreinterpretq_p64_f16 +vreinterpret_p128_f16 +vreinterpretq_p128_f16 + +# Present in Clang header but triggers an ICE due to lack of backend support. +vcmla_f32 +vcmla_lane_f32 +vcmla_laneq_f32 +vcmla_rot180_f32 +vcmla_rot180_lane_f32 +vcmla_rot180_laneq_f32 +vcmla_rot270_f32 +vcmla_rot270_lane_f32 +vcmla_rot270_laneq_f32 +vcmla_rot90_f32 +vcmla_rot90_lane_f32 +vcmla_rot90_laneq_f32 +vcmlaq_f32 +vcmlaq_lane_f32 +vcmlaq_laneq_f32 +vcmlaq_rot180_f32 +vcmlaq_rot180_lane_f32 +vcmlaq_rot180_laneq_f32 +vcmlaq_rot270_f32 +vcmlaq_rot270_lane_f32 +vcmlaq_rot270_laneq_f32 +vcmlaq_rot90_f32 +vcmlaq_rot90_lane_f32 +vcmlaq_rot90_laneq_f32 +vcmla_f16 +vcmlaq_f16 +vcmla_laneq_f16 +vcmla_lane_f16 +vcmla_laneq_f16 +vcmlaq_lane_f16 +vcmlaq_laneq_f16 +vcmla_rot90_f16 +vcmlaq_rot90_f16 +vcmla_rot180_f16 +vcmlaq_rot180_f16 +vcmla_rot270_f16 +vcmlaq_rot270_f16 +vcmla_rot90_lane_f16 +vcmla_rot90_laneq_f16 +vcmlaq_rot90_lane_f16 +vcmlaq_rot90_laneq_f16 +vcmla_rot180_lane_f16 +vcmla_rot180_laneq_f16 +vcmlaq_rot180_lane_f16 +vcmlaq_rot180_laneq_f16 +vcmla_rot270_lane_f16 +vcmla_rot270_laneq_f16 +vcmlaq_rot270_lane_f16 +vcmlaq_rot270_laneq_f16 + +# Implemented in stdarch for A64 only, Clang support both A32/A64 +vadd_s64 +vadd_u64 +vcaddq_rot270_f32 +vcaddq_rot90_f32 +vcadd_rot270_f32 +vcadd_rot90_f32 +vcvtaq_s32_f32 +vcvtaq_u32_f32 +vcvta_s32_f32 +vcvta_u32_f32 +vcvtmq_s32_f32 +vcvtmq_u32_f32 +vcvtm_s32_f32 +vcvtm_u32_f32 +vcvtnq_s32_f32 +vcvtnq_u32_f32 +vcvtn_s32_f32 +vcvtn_u32_f32 +vcvtpq_s32_f32 +vcvtpq_u32_f32 +vcvtp_s32_f32 +vcvtp_u32_f32 +vqdmulh_lane_s16 +vqdmulh_lane_s32 +vqdmulhq_lane_s16 +vqdmulhq_lane_s32 +vrnda_f32 +vrnda_f32 +vrndaq_f32 +vrndaq_f32 +vrnd_f32 +vrnd_f32 +vrndi_f32 +vrndi_f32 +vrndiq_f32 +vrndiq_f32 +vrndm_f32 +vrndm_f32 +vrndmq_f32 +vrndmq_f32 +vrndns_f32 +vrndp_f32 +vrndpq_f32 +vrndq_f32 +vrndq_f32 +vrndx_f32 +vrndxq_f32 +vrnda_f16 +vrnda_f16 +vrndaq_f16 +vrndaq_f16 +vrnd_f16 +vrnd_f16 +vrndi_f16 +vrndi_f16 +vrndiq_f16 +vrndiq_f16 +vrndm_f16 +vrndm_f16 +vrndmq_f16 +vrndmq_f16 +vrndns_f16 +vrndp_f16 +vrndpq_f16 +vrndq_f16 +vrndx_f16 +vrndxq_f16 +vpmin_f16 +vpmax_f16 +vcaddq_rot270_f16 +vcaddq_rot90_f16 +vcadd_rot270_f16 +vcadd_rot90_f16 +vcvtm_s16_f16 +vcvtmq_s16_f16 +vcvtm_u16_f16 +vcvtmq_u16_f16 +vcvtaq_s16_f16 +vcvtaq_u16_f16 +vcvtnq_s16_f16 +vcvtnq_u16_f16 +vcvtn_s16_f16 +vcvtn_u16_f16 +vcvtaq_s16_f16 +vcvtaq_u16_f16 +vcvta_s16_f16 +vcvta_u16_f16 +vceqz_f16 +vceqzq_f16 diff --git a/library/stdarch/crates/intrinsic-test/src/arm/compile.rs b/library/stdarch/crates/intrinsic-test/src/arm/compile.rs new file mode 100644 index 000000000000..8276cd87c1cb --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/arm/compile.rs @@ -0,0 +1,64 @@ +use crate::common::compile_c::CompilationCommandBuilder; +use crate::common::gen_c::compile_c_programs; + +pub fn compile_c_arm( + intrinsics_name_list: &[String], + compiler: &str, + target: &str, + cxx_toolchain_dir: Option<&str>, +) -> bool { + // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations + let mut command = CompilationCommandBuilder::new() + .add_arch_flags(vec!["armv8.6-a", "crypto", "crc", "dotprod", "fp16"]) + .set_compiler(compiler) + .set_target(target) + .set_opt_level("2") + .set_cxx_toolchain_dir(cxx_toolchain_dir) + .set_project_root("c_programs") + .add_extra_flags(vec!["-ffp-contract=off", "-Wno-narrowing"]); + + if !target.contains("v7") { + command = command.add_arch_flags(vec!["faminmax", "lut", "sha3"]); + } + + /* + * clang++ cannot link an aarch64_be object file, so we invoke + * aarch64_be-unknown-linux-gnu's C++ linker. This ensures that we + * are testing the intrinsics against LLVM. + * + * Note: setting `--sysroot=<...>` which is the obvious thing to do + * does not work as it gets caught up with `#include_next ` + * not existing... + */ + if target.contains("aarch64_be") { + command = command + .set_linker( + cxx_toolchain_dir.unwrap_or("").to_string() + "/bin/aarch64_be-none-linux-gnu-g++", + ) + .set_include_paths(vec![ + "/include", + "/aarch64_be-none-linux-gnu/include", + "/aarch64_be-none-linux-gnu/include/c++/14.2.1", + "/aarch64_be-none-linux-gnu/include/c++/14.2.1/aarch64_be-none-linux-gnu", + "/aarch64_be-none-linux-gnu/include/c++/14.2.1/backward", + "/aarch64_be-none-linux-gnu/libc/usr/include", + ]); + } + + if !compiler.contains("clang") { + command = command.add_extra_flag("-flax-vector-conversions"); + } + + let compiler_commands = intrinsics_name_list + .iter() + .map(|intrinsic_name| { + command + .clone() + .set_input_name(intrinsic_name) + .set_output_name(intrinsic_name) + .make_string() + }) + .collect::>(); + + compile_c_programs(&compiler_commands) +} diff --git a/library/stdarch/crates/intrinsic-test/src/arm/config.rs b/library/stdarch/crates/intrinsic-test/src/arm/config.rs new file mode 100644 index 000000000000..cee80374ae9d --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -0,0 +1,122 @@ +pub fn build_notices(line_prefix: &str) -> String { + format!( + "\ +{line_prefix}This is a transient test file, not intended for distribution. Some aspects of the +{line_prefix}test are derived from a JSON specification, published under the same license as the +{line_prefix}`intrinsic-test` crate.\n +" + ) +} + +pub const POLY128_OSTREAM_DEF: &str = r#"std::ostream& operator<<(std::ostream& os, poly128_t value) { + std::stringstream temp; + do { + int n = value % 10; + value /= 10; + temp << n; + } while (value != 0); + std::string tempstr(temp.str()); + std::string res(tempstr.rbegin(), tempstr.rend()); + os << res; + return os; +}"#; + +// Format f16 values (and vectors containing them) in a way that is consistent with C. +pub const F16_FORMATTING_DEF: &str = r#" +/// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they +/// were before moving to array-based simd. +#[inline] +fn debug_simd_finish( + formatter: &mut core::fmt::Formatter<'_>, + type_name: &str, + array: &[T; N], +) -> core::fmt::Result { + core::fmt::Formatter::debug_tuple_fields_finish( + formatter, + type_name, + &core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]), + ) +} + +#[repr(transparent)] +struct Hex(T); + +impl core::fmt::Debug for Hex { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + ::fmt(&self.0, f) + } +} + +fn debug_f16(x: T) -> impl core::fmt::Debug { + Hex(x) +} + +trait DebugHexF16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result; +} + +impl DebugHexF16 for f16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{:#06x?}", self.to_bits()) + } +} + +impl DebugHexF16 for float16x4_t { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [Hex; 4]>(*self) }; + debug_simd_finish(f, "float16x4_t", &array) + } +} + +impl DebugHexF16 for float16x8_t { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [Hex; 8]>(*self) }; + debug_simd_finish(f, "float16x8_t", &array) + } +} + +impl DebugHexF16 for float16x4x2_t { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, "float16x4x2_t", &[Hex(self.0), Hex(self.1)]) + } +} +impl DebugHexF16 for float16x4x3_t { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, "float16x4x3_t", &[Hex(self.0), Hex(self.1), Hex(self.2)]) + } +} +impl DebugHexF16 for float16x4x4_t { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, "float16x4x4_t", &[Hex(self.0), Hex(self.1), Hex(self.2), Hex(self.3)]) + } +} + +impl DebugHexF16 for float16x8x2_t { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, "float16x8x2_t", &[Hex(self.0), Hex(self.1)]) + } +} +impl DebugHexF16 for float16x8x3_t { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, "float16x8x3_t", &[Hex(self.0), Hex(self.1), Hex(self.2)]) + } +} +impl DebugHexF16 for float16x8x4_t { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, "float16x8x4_t", &[Hex(self.0), Hex(self.1), Hex(self.2), Hex(self.3)]) + } +} + "#; + +pub const AARCH_CONFIGURATIONS: &str = r#" +#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] +#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_dotprod))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] +#![feature(fmt_helpers_for_derive)] +#![feature(stdarch_neon_f16)] +"#; diff --git a/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs new file mode 100644 index 000000000000..773dabf4d75b --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs @@ -0,0 +1,95 @@ +use crate::common::argument::ArgumentList; +use crate::common::indentation::Indentation; +use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition}; +use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, TypeKind}; +use std::ops::Deref; + +#[derive(Debug, Clone, PartialEq)] +pub struct ArmIntrinsicType(pub IntrinsicType); + +impl Deref for ArmIntrinsicType { + type Target = IntrinsicType; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl IntrinsicDefinition for Intrinsic { + fn arguments(&self) -> ArgumentList { + self.arguments.clone() + } + + fn results(&self) -> ArmIntrinsicType { + self.results.clone() + } + + fn name(&self) -> String { + self.name.clone() + } + + /// Generates a std::cout for the intrinsics results that will match the + /// rust debug output format for the return type. The generated line assumes + /// there is an int i in scope which is the current pass number. + fn print_result_c(&self, indentation: Indentation, additional: &str) -> String { + let lanes = if self.results().num_vectors() > 1 { + (0..self.results().num_vectors()) + .map(|vector| { + format!( + r#""{ty}(" << {lanes} << ")""#, + ty = self.results().c_single_vector_type(), + lanes = (0..self.results().num_lanes()) + .map(move |idx| -> std::string::String { + format!( + "{cast}{lane_fn}(__return_value.val[{vector}], {lane})", + cast = self.results().c_promotion(), + lane_fn = self.results().get_lane_function(), + lane = idx, + vector = vector, + ) + }) + .collect::>() + .join(r#" << ", " << "#) + ) + }) + .collect::>() + .join(r#" << ", " << "#) + } else if self.results().num_lanes() > 1 { + (0..self.results().num_lanes()) + .map(|idx| -> std::string::String { + format!( + "{cast}{lane_fn}(__return_value, {lane})", + cast = self.results().c_promotion(), + lane_fn = self.results().get_lane_function(), + lane = idx + ) + }) + .collect::>() + .join(r#" << ", " << "#) + } else { + format!( + "{promote}cast<{cast}>(__return_value)", + cast = match self.results.kind() { + TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(), + TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(), + TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(), + TypeKind::Int => format!("int{}_t", self.results().inner_size()), + TypeKind::UInt => format!("uint{}_t", self.results().inner_size()), + TypeKind::Poly => format!("poly{}_t", self.results().inner_size()), + ty => todo!("print_result_c - Unknown type: {:#?}", ty), + }, + promote = self.results().c_promotion(), + ) + }; + + format!( + r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, + ty = if self.results().is_simd() { + format!("{}(", self.results().c_type()) + } else { + String::from("") + }, + close = if self.results.is_simd() { ")" } else { "" }, + ) + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs b/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs new file mode 100644 index 000000000000..0ac47484b019 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs @@ -0,0 +1,137 @@ +use super::intrinsic::ArmIntrinsicType; +use crate::common::argument::{Argument, ArgumentList}; +use crate::common::constraint::Constraint; +use crate::common::intrinsic::Intrinsic; +use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition}; +use serde::Deserialize; +use serde_json::Value; +use std::collections::HashMap; +use std::path::Path; + +#[derive(Deserialize, Debug)] +#[serde(deny_unknown_fields)] +struct ReturnType { + value: String, +} + +#[derive(Deserialize, Debug)] +#[serde(untagged, deny_unknown_fields)] +pub enum ArgPrep { + Register { + #[serde(rename = "register")] + #[allow(dead_code)] + reg: String, + }, + Immediate { + #[serde(rename = "minimum")] + min: i64, + #[serde(rename = "maximum")] + max: i64, + }, + Nothing {}, +} + +impl TryFrom for ArgPrep { + type Error = serde_json::Error; + + fn try_from(value: Value) -> Result { + serde_json::from_value(value) + } +} + +#[derive(Deserialize, Debug)] +struct JsonIntrinsic { + #[serde(rename = "SIMD_ISA")] + simd_isa: String, + name: String, + arguments: Vec, + return_type: ReturnType, + #[serde(rename = "Arguments_Preparation")] + args_prep: Option>, + #[serde(rename = "Architectures")] + architectures: Vec, +} + +pub fn get_neon_intrinsics( + filename: &Path, + target: &str, +) -> Result>, Box> { + let file = std::fs::File::open(filename)?; + let reader = std::io::BufReader::new(file); + let json: Vec = serde_json::from_reader(reader).expect("Couldn't parse JSON"); + + let parsed = json + .into_iter() + .filter_map(|intr| { + if intr.simd_isa == "Neon" { + Some(json_to_intrinsic(intr, target).expect("Couldn't parse JSON")) + } else { + None + } + }) + .collect(); + Ok(parsed) +} + +fn json_to_intrinsic( + mut intr: JsonIntrinsic, + target: &str, +) -> Result, Box> { + let name = intr.name.replace(['[', ']'], ""); + + let results = ArmIntrinsicType::from_c(&intr.return_type.value, target)?; + + let args = intr + .arguments + .into_iter() + .enumerate() + .map(|(i, arg)| { + let arg_name = Argument::::type_and_name_from_c(&arg).1; + let metadata = intr.args_prep.as_mut(); + let metadata = metadata.and_then(|a| a.remove(arg_name)); + let arg_prep: Option = metadata.and_then(|a| a.try_into().ok()); + let constraint: Option = arg_prep.and_then(|a| a.try_into().ok()); + + let mut arg = Argument::::from_c(i, &arg, target, constraint); + + // The JSON doesn't list immediates as const + let IntrinsicType { + ref mut constant, .. + } = arg.ty.0; + if arg.name.starts_with("imm") { + *constant = true + } + arg + }) + .collect(); + + let arguments = ArgumentList:: { args }; + + Ok(Intrinsic { + name, + arguments, + results: *results, + arch_tags: intr.architectures, + }) +} + +/// ARM-specific +impl TryFrom for Constraint { + type Error = (); + + fn try_from(prep: ArgPrep) -> Result { + let parsed_ints = match prep { + ArgPrep::Immediate { min, max } => Ok((min, max)), + _ => Err(()), + }; + if let Ok((min, max)) = parsed_ints { + if min == max { + Ok(Constraint::Equal(min)) + } else { + Ok(Constraint::Range(min..max + 1)) + } + } else { + Err(()) + } + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs new file mode 100644 index 000000000000..6aaa49ff97f9 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs @@ -0,0 +1,124 @@ +mod compile; +mod config; +mod intrinsic; +mod json_parser; +mod types; + +use crate::common::SupportedArchitectureTest; +use crate::common::cli::ProcessedCli; +use crate::common::compare::compare_outputs; +use crate::common::gen_rust::compile_rust_programs; +use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition}; +use crate::common::intrinsic_helpers::TypeKind; +use crate::common::write_file::{write_c_testfiles, write_rust_testfiles}; +use compile::compile_c_arm; +use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, POLY128_OSTREAM_DEF, build_notices}; +use intrinsic::ArmIntrinsicType; +use json_parser::get_neon_intrinsics; + +pub struct ArmArchitectureTest { + intrinsics: Vec>, + cli_options: ProcessedCli, +} + +impl SupportedArchitectureTest for ArmArchitectureTest { + fn create(cli_options: ProcessedCli) -> Box { + let a32 = cli_options.target.contains("v7"); + let mut intrinsics = get_neon_intrinsics(&cli_options.filename, &cli_options.target) + .expect("Error parsing input file"); + + intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); + + let mut intrinsics = intrinsics + .into_iter() + // Not sure how we would compare intrinsic that returns void. + .filter(|i| i.results.kind() != TypeKind::Void) + .filter(|i| i.results.kind() != TypeKind::BFloat) + .filter(|i| !i.arguments.iter().any(|a| a.ty.kind() == TypeKind::BFloat)) + // Skip pointers for now, we would probably need to look at the return + // type to work out how many elements we need to point to. + .filter(|i| !i.arguments.iter().any(|a| a.is_ptr())) + .filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128)) + .filter(|i| !cli_options.skip.contains(&i.name)) + .filter(|i| !(a32 && i.arch_tags == vec!["A64".to_string()])) + .collect::>(); + intrinsics.dedup(); + + Box::new(Self { + intrinsics, + cli_options, + }) + } + + fn build_c_file(&self) -> bool { + let compiler = self.cli_options.cpp_compiler.as_deref(); + let target = &self.cli_options.target; + let cxx_toolchain_dir = self.cli_options.cxx_toolchain_dir.as_deref(); + let c_target = "aarch64"; + + let intrinsics_name_list = write_c_testfiles( + &self + .intrinsics + .iter() + .map(|i| i as &dyn IntrinsicDefinition<_>) + .collect::>(), + target, + c_target, + &["arm_neon.h", "arm_acle.h", "arm_fp16.h"], + &build_notices("// "), + &[POLY128_OSTREAM_DEF], + ); + + match compiler { + None => true, + Some(compiler) => compile_c_arm( + intrinsics_name_list.as_slice(), + compiler, + target, + cxx_toolchain_dir, + ), + } + } + + fn build_rust_file(&self) -> bool { + let rust_target = if self.cli_options.target.contains("v7") { + "arm" + } else { + "aarch64" + }; + let target = &self.cli_options.target; + let toolchain = self.cli_options.toolchain.as_deref(); + let linker = self.cli_options.linker.as_deref(); + let intrinsics_name_list = write_rust_testfiles( + self.intrinsics + .iter() + .map(|i| i as &dyn IntrinsicDefinition<_>) + .collect::>(), + rust_target, + &build_notices("// "), + F16_FORMATTING_DEF, + AARCH_CONFIGURATIONS, + ); + + compile_rust_programs(intrinsics_name_list, toolchain, target, linker) + } + + fn compare_outputs(&self) -> bool { + if let Some(ref toolchain) = self.cli_options.toolchain { + let intrinsics_name_list = self + .intrinsics + .iter() + .map(|i| i.name.clone()) + .collect::>(); + + compare_outputs( + &intrinsics_name_list, + toolchain, + &self.cli_options.c_runner, + &self.cli_options.target, + ) + } else { + true + } + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/arm/types.rs b/library/stdarch/crates/intrinsic-test/src/arm/types.rs new file mode 100644 index 000000000000..9f3d6302f460 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/arm/types.rs @@ -0,0 +1,195 @@ +use super::intrinsic::ArmIntrinsicType; +use crate::common::cli::Language; +use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, TypeKind}; + +impl IntrinsicTypeDefinition for ArmIntrinsicType { + /// Gets a string containing the typename for this type in C format. + fn c_type(&self) -> String { + let prefix = self.0.kind.c_prefix(); + let const_prefix = if self.0.constant { "const " } else { "" }; + + if let (Some(bit_len), simd_len, vec_len) = + (self.0.bit_len, self.0.simd_len, self.0.vec_len) + { + match (simd_len, vec_len) { + (None, None) => format!("{const_prefix}{prefix}{bit_len}_t"), + (Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"), + (Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"), + (None, Some(_)) => todo!("{:#?}", self), // Likely an invalid case + } + } else { + todo!("{:#?}", self) + } + } + + fn c_single_vector_type(&self) -> String { + if let (Some(bit_len), Some(simd_len)) = (self.0.bit_len, self.0.simd_len) { + format!( + "{prefix}{bit_len}x{simd_len}_t", + prefix = self.0.kind.c_prefix() + ) + } else { + unreachable!("Shouldn't be called on this type") + } + } + + fn rust_type(&self) -> String { + let rust_prefix = self.0.kind.rust_prefix(); + let c_prefix = self.0.kind.c_prefix(); + if self.0.ptr_constant { + self.c_type() + } else if let (Some(bit_len), simd_len, vec_len) = + (self.0.bit_len, self.0.simd_len, self.0.vec_len) + { + match (simd_len, vec_len) { + (None, None) => format!("{rust_prefix}{bit_len}"), + (Some(simd), None) => format!("{c_prefix}{bit_len}x{simd}_t"), + (Some(simd), Some(vec)) => format!("{c_prefix}{bit_len}x{simd}x{vec}_t"), + (None, Some(_)) => todo!("{:#?}", self), // Likely an invalid case + } + } else { + todo!("{:#?}", self) + } + } + + /// Determines the load function for this type. + fn get_load_function(&self, language: Language) -> String { + if let IntrinsicType { + kind: k, + bit_len: Some(bl), + simd_len, + vec_len, + target, + .. + } = &self.0 + { + let quad = if simd_len.unwrap_or(1) * bl > 64 { + "q" + } else { + "" + }; + + let choose_workaround = language == Language::C && target.contains("v7"); + format!( + "vld{len}{quad}_{type}{size}", + type = match k { + TypeKind::UInt => "u", + TypeKind::Int => "s", + TypeKind::Float => "f", + // The ACLE doesn't support 64-bit polynomial loads on Armv7 + // if armv7 and bl == 64, use "s", else "p" + TypeKind::Poly => if choose_workaround && *bl == 64 {"s"} else {"p"}, + x => todo!("get_load_function TypeKind: {:#?}", x), + }, + size = bl, + quad = quad, + len = vec_len.unwrap_or(1), + ) + } else { + todo!("get_load_function IntrinsicType: {:#?}", self) + } + } + + /// Determines the get lane function for this type. + fn get_lane_function(&self) -> String { + if let IntrinsicType { + kind: k, + bit_len: Some(bl), + simd_len, + .. + } = &self.0 + { + let quad = if (simd_len.unwrap_or(1) * bl) > 64 { + "q" + } else { + "" + }; + format!( + "vget{quad}_lane_{type}{size}", + type = match k { + TypeKind::UInt => "u", + TypeKind::Int => "s", + TypeKind::Float => "f", + TypeKind::Poly => "p", + x => todo!("get_load_function TypeKind: {:#?}", x), + }, + size = bl, + quad = quad, + ) + } else { + todo!("get_lane_function IntrinsicType: {:#?}", self) + } + } + + fn from_c(s: &str, target: &str) -> Result, String> { + const CONST_STR: &str = "const"; + if let Some(s) = s.strip_suffix('*') { + let (s, constant) = match s.trim().strip_suffix(CONST_STR) { + Some(stripped) => (stripped, true), + None => (s, false), + }; + let s = s.trim_end(); + let temp_return = ArmIntrinsicType::from_c(s, target); + temp_return.map(|mut op| { + let edited = op.as_mut(); + edited.0.ptr = true; + edited.0.ptr_constant = constant; + op + }) + } else { + // [const ]TYPE[{bitlen}[x{simdlen}[x{vec_len}]]][_t] + let (mut s, constant) = match s.strip_prefix(CONST_STR) { + Some(stripped) => (stripped.trim(), true), + None => (s, false), + }; + s = s.strip_suffix("_t").unwrap_or(s); + let mut parts = s.split('x'); // [[{bitlen}], [{simdlen}], [{vec_len}] ] + let start = parts.next().ok_or("Impossible to parse type")?; + if let Some(digit_start) = start.find(|c: char| c.is_ascii_digit()) { + let (arg_kind, bit_len) = start.split_at(digit_start); + let arg_kind = arg_kind.parse::()?; + let bit_len = bit_len.parse::().map_err(|err| err.to_string())?; + let simd_len = match parts.next() { + Some(part) => Some( + part.parse::() + .map_err(|_| "Couldn't parse simd_len: {part}")?, + ), + None => None, + }; + let vec_len = match parts.next() { + Some(part) => Some( + part.parse::() + .map_err(|_| "Couldn't parse vec_len: {part}")?, + ), + None => None, + }; + Ok(Box::new(ArmIntrinsicType(IntrinsicType { + ptr: false, + ptr_constant: false, + constant, + kind: arg_kind, + bit_len: Some(bit_len), + simd_len, + vec_len, + target: target.to_string(), + }))) + } else { + let kind = start.parse::()?; + let bit_len = match kind { + TypeKind::Int => Some(32), + _ => None, + }; + Ok(Box::new(ArmIntrinsicType(IntrinsicType { + ptr: false, + ptr_constant: false, + constant, + kind: start.parse::()?, + bit_len, + simd_len: None, + vec_len: None, + target: target.to_string(), + }))) + } + } + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/argument.rs b/library/stdarch/crates/intrinsic-test/src/common/argument.rs new file mode 100644 index 000000000000..443ccb919f46 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/argument.rs @@ -0,0 +1,209 @@ +use super::cli::Language; +use super::constraint::Constraint; +use super::indentation::Indentation; +use super::intrinsic_helpers::IntrinsicTypeDefinition; + +/// An argument for the intrinsic. +#[derive(Debug, PartialEq, Clone)] +pub struct Argument { + /// The argument's index in the intrinsic function call. + pub pos: usize, + /// The argument name. + pub name: String, + /// The type of the argument. + pub ty: T, + /// Any constraints that are on this argument + pub constraint: Option, +} + +impl Argument +where + T: IntrinsicTypeDefinition, +{ + pub fn to_c_type(&self) -> String { + self.ty.c_type() + } + + pub fn is_simd(&self) -> bool { + self.ty.is_simd() + } + + pub fn is_ptr(&self) -> bool { + self.ty.is_ptr() + } + + pub fn has_constraint(&self) -> bool { + self.constraint.is_some() + } + + pub fn type_and_name_from_c(arg: &str) -> (&str, &str) { + let split_index = arg + .rfind([' ', '*']) + .expect("Couldn't split type and argname"); + + (arg[..split_index + 1].trim_end(), &arg[split_index + 1..]) + } + + /// The binding keyword (e.g. "const" or "let") for the array of possible test inputs. + fn rust_vals_array_binding(&self) -> impl std::fmt::Display { + if self.ty.is_rust_vals_array_const() { + "const" + } else { + "let" + } + } + + /// The name (e.g. "A_VALS" or "a_vals") for the array of possible test inputs. + fn rust_vals_array_name(&self) -> impl std::fmt::Display { + if self.ty.is_rust_vals_array_const() { + format!("{}_VALS", self.name.to_uppercase()) + } else { + format!("{}_vals", self.name.to_lowercase()) + } + } + + pub fn from_c( + pos: usize, + arg: &str, + target: &str, + constraint: Option, + ) -> Argument { + let (ty, var_name) = Self::type_and_name_from_c(arg); + + let ty = + T::from_c(ty, target).unwrap_or_else(|_| panic!("Failed to parse argument '{arg}'")); + + Argument { + pos, + name: String::from(var_name), + ty: *ty, + constraint, + } + } + + fn as_call_param_c(&self) -> String { + self.ty.as_call_param_c(&self.name) + } +} + +#[derive(Debug, PartialEq, Clone)] +pub struct ArgumentList { + pub args: Vec>, +} + +impl ArgumentList +where + T: IntrinsicTypeDefinition, +{ + /// Converts the argument list into the call parameters for a C function call. + /// e.g. this would generate something like `a, &b, c` + pub fn as_call_param_c(&self) -> String { + self.iter() + .map(|arg| arg.as_call_param_c()) + .collect::>() + .join(", ") + } + + /// Converts the argument list into the call parameters for a Rust function. + /// e.g. this would generate something like `a, b, c` + pub fn as_call_param_rust(&self) -> String { + self.iter() + .filter(|a| !a.has_constraint()) + .map(|arg| arg.name.clone()) + .collect::>() + .join(", ") + } + + pub fn as_constraint_parameters_rust(&self) -> String { + self.iter() + .filter(|a| a.has_constraint()) + .map(|arg| arg.name.clone()) + .collect::>() + .join(", ") + } + + /// Creates a line for each argument that initializes an array for C from which `loads` argument + /// values can be loaded as a sliding window. + /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2. + pub fn gen_arglists_c(&self, indentation: Indentation, loads: u32) -> String { + self.iter() + .filter(|&arg| !arg.has_constraint()) + .map(|arg| { + format!( + "{indentation}const {ty} {name}_vals[] = {values};", + ty = arg.ty.c_scalar_type(), + name = arg.name, + values = arg.ty.populate_random(indentation, loads, &Language::C) + ) + }) + .collect::>() + .join("\n") + } + + /// Creates a line for each argument that initializes an array for Rust from which `loads` argument + /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20] = [...];` + pub fn gen_arglists_rust(&self, indentation: Indentation, loads: u32) -> String { + self.iter() + .filter(|&arg| !arg.has_constraint()) + .map(|arg| { + format!( + "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};", + bind = arg.rust_vals_array_binding(), + name = arg.rust_vals_array_name(), + ty = arg.ty.rust_scalar_type(), + load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, + values = arg.ty.populate_random(indentation, loads, &Language::Rust) + ) + }) + .collect::>() + .join("\n") + } + + /// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at + /// an offset `i` using a load intrinsic, in C. + /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);` + /// + /// ARM-specific + pub fn load_values_c(&self, indentation: Indentation) -> String { + self.iter() + .filter(|&arg| !arg.has_constraint()) + .map(|arg| { + format!( + "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i]));\n", + ty = arg.to_c_type(), + name = arg.name, + load = if arg.is_simd() { + arg.ty.get_load_function(Language::C) + } else { + "*".to_string() + } + ) + }) + .collect() + } + + /// Creates a line for each argument that initializes the argument from array `[ARG]_VALS` at + /// an offset `i` using a load intrinsic, in Rust. + /// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));` + pub fn load_values_rust(&self, indentation: Indentation) -> String { + self.iter() + .filter(|&arg| !arg.has_constraint()) + .map(|arg| { + format!( + "{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i));\n", + name = arg.name, + vals_name = arg.rust_vals_array_name(), + load = if arg.is_simd() { + arg.ty.get_load_function(Language::Rust) + } else { + "*".to_string() + }, + ) + }) + .collect() + } + + pub fn iter(&self) -> std::slice::Iter<'_, Argument> { + self.args.iter() + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/cli.rs b/library/stdarch/crates/intrinsic-test/src/common/cli.rs new file mode 100644 index 000000000000..1d572723008d --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/cli.rs @@ -0,0 +1,113 @@ +use itertools::Itertools; +use std::path::PathBuf; + +#[derive(Debug, PartialEq)] +pub enum Language { + Rust, + C, +} + +pub enum FailureReason { + RunC(String), + RunRust(String), + Difference(String, String, String), +} + +/// Intrinsic test tool +#[derive(clap::Parser)] +#[command( + name = "Intrinsic test tool", + about = "Generates Rust and C programs for intrinsics and compares the output" +)] +pub struct Cli { + /// The input file containing the intrinsics + pub input: PathBuf, + + /// The rust toolchain to use for building the rust code + #[arg(long)] + pub toolchain: Option, + + /// The C++ compiler to use for compiling the c++ code + #[arg(long, default_value_t = String::from("clang++"))] + pub cppcompiler: String, + + /// Run the C programs under emulation with this command + #[arg(long)] + pub runner: Option, + + /// Filename for a list of intrinsics to skip (one per line) + #[arg(long)] + pub skip: Option, + + /// Regenerate test programs, but don't build or run them + #[arg(long)] + pub generate_only: bool, + + /// Pass a target the test suite + #[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))] + pub target: String, + + /// Set the linker + #[arg(long)] + pub linker: Option, + + /// Set the sysroot for the C++ compiler + #[arg(long)] + pub cxx_toolchain_dir: Option, +} + +pub struct ProcessedCli { + pub filename: PathBuf, + pub toolchain: Option, + pub cpp_compiler: Option, + pub c_runner: String, + pub target: String, + pub linker: Option, + pub cxx_toolchain_dir: Option, + pub skip: Vec, +} + +impl ProcessedCli { + pub fn new(cli_options: Cli) -> Self { + let filename = cli_options.input; + let c_runner = cli_options.runner.unwrap_or_default(); + let target = cli_options.target; + let linker = cli_options.linker; + let cxx_toolchain_dir = cli_options.cxx_toolchain_dir; + + let skip = if let Some(filename) = cli_options.skip { + let data = std::fs::read_to_string(&filename).expect("Failed to open file"); + data.lines() + .map(str::trim) + .filter(|s| !s.contains('#')) + .map(String::from) + .collect_vec() + } else { + Default::default() + }; + + let (toolchain, cpp_compiler) = if cli_options.generate_only { + (None, None) + } else { + ( + Some( + cli_options + .toolchain + .map_or_else(String::new, |t| format!("+{t}")), + ), + Some(cli_options.cppcompiler), + ) + }; + + Self { + toolchain, + cpp_compiler, + c_runner, + target, + linker, + cxx_toolchain_dir, + skip, + filename, + } + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/compare.rs b/library/stdarch/crates/intrinsic-test/src/common/compare.rs new file mode 100644 index 000000000000..815ccf89fc69 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/compare.rs @@ -0,0 +1,90 @@ +use super::cli::FailureReason; +use rayon::prelude::*; +use std::process::Command; + +pub fn compare_outputs( + intrinsic_name_list: &Vec, + toolchain: &str, + runner: &str, + target: &str, +) -> bool { + let intrinsics = intrinsic_name_list + .par_iter() + .filter_map(|intrinsic_name| { + let c = Command::new("sh") + .arg("-c") + .arg(format!("{runner} ./c_programs/{intrinsic_name}")) + .output(); + + let rust = Command::new("sh") + .current_dir("rust_programs") + .arg("-c") + .arg(format!( + "cargo {toolchain} run --target {target} --bin {intrinsic_name} --release", + )) + .env("RUSTFLAGS", "-Cdebuginfo=0") + .output(); + + let (c, rust) = match (c, rust) { + (Ok(c), Ok(rust)) => (c, rust), + a => panic!("{a:#?}"), + }; + + if !c.status.success() { + error!( + "Failed to run C program for intrinsic {intrinsic_name}\nstdout: {stdout}\nstderr: {stderr}", + stdout = std::str::from_utf8(&c.stdout).unwrap_or(""), + stderr = std::str::from_utf8(&c.stderr).unwrap_or(""), + ); + return Some(FailureReason::RunC(intrinsic_name.clone())); + } + + if !rust.status.success() { + error!( + "Failed to run Rust program for intrinsic {intrinsic_name}\nstdout: {stdout}\nstderr: {stderr}", + stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""), + stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""), + ); + return Some(FailureReason::RunRust(intrinsic_name.clone())); + } + + info!("Comparing intrinsic: {}", intrinsic_name); + + let c = std::str::from_utf8(&c.stdout) + .unwrap() + .to_lowercase() + .replace("-nan", "nan"); + let rust = std::str::from_utf8(&rust.stdout) + .unwrap() + .to_lowercase() + .replace("-nan", "nan"); + + if c == rust { + None + } else { + Some(FailureReason::Difference(intrinsic_name.clone(), c, rust)) + } + }) + .collect::>(); + + intrinsics.iter().for_each(|reason| match reason { + FailureReason::Difference(intrinsic, c, rust) => { + println!("Difference for intrinsic: {intrinsic}"); + let diff = diff::lines(c, rust); + diff.iter().for_each(|diff| match diff { + diff::Result::Left(c) => println!("C: {c}"), + diff::Result::Right(rust) => println!("Rust: {rust}"), + diff::Result::Both(_, _) => (), + }); + println!("****************************************************************"); + } + FailureReason::RunC(intrinsic) => { + println!("Failed to run C program for intrinsic {intrinsic}") + } + FailureReason::RunRust(intrinsic) => { + println!("Failed to run rust program for intrinsic {intrinsic}") + } + }); + println!("{} differences found", intrinsics.len()); + intrinsics.is_empty() +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs b/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs new file mode 100644 index 000000000000..aebb7b111e28 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs @@ -0,0 +1,154 @@ +#[derive(Clone)] +pub struct CompilationCommandBuilder { + compiler: String, + target: Option, + cxx_toolchain_dir: Option, + arch_flags: Vec, + optimization: String, + include_paths: Vec, + project_root: Option, + output: String, + input: String, + linker: Option, + extra_flags: Vec, +} + +impl CompilationCommandBuilder { + pub fn new() -> Self { + Self { + compiler: String::new(), + target: None, + cxx_toolchain_dir: None, + arch_flags: Vec::new(), + optimization: "2".to_string(), + include_paths: Vec::new(), + project_root: None, + output: String::new(), + input: String::new(), + linker: None, + extra_flags: Vec::new(), + } + } + + pub fn set_compiler(mut self, compiler: &str) -> Self { + self.compiler = compiler.to_string(); + self + } + + pub fn set_target(mut self, target: &str) -> Self { + self.target = Some(target.to_string()); + self + } + + pub fn set_cxx_toolchain_dir(mut self, path: Option<&str>) -> Self { + self.cxx_toolchain_dir = path.map(|p| p.to_string()); + self + } + + pub fn add_arch_flags(mut self, flags: Vec<&str>) -> Self { + let mut new_arch_flags = flags.into_iter().map(|v| v.to_string()).collect(); + self.arch_flags.append(&mut new_arch_flags); + + self + } + + pub fn set_opt_level(mut self, optimization: &str) -> Self { + self.optimization = optimization.to_string(); + self + } + + /// Sets a list of include paths for compilation. + /// The paths that are passed must be relative to the + /// "cxx_toolchain_dir" directory path. + pub fn set_include_paths(mut self, paths: Vec<&str>) -> Self { + self.include_paths = paths.into_iter().map(|path| path.to_string()).collect(); + self + } + + /// Sets the root path of all the generated test files. + pub fn set_project_root(mut self, path: &str) -> Self { + self.project_root = Some(path.to_string()); + self + } + + /// The name of the output executable, without any suffixes + pub fn set_output_name(mut self, path: &str) -> Self { + self.output = path.to_string(); + self + } + + /// The name of the input C file, without any suffixes + pub fn set_input_name(mut self, path: &str) -> Self { + self.input = path.to_string(); + self + } + + pub fn set_linker(mut self, linker: String) -> Self { + self.linker = Some(linker); + self + } + + pub fn add_extra_flags(mut self, flags: Vec<&str>) -> Self { + let mut flags: Vec = flags.into_iter().map(|f| f.to_string()).collect(); + self.extra_flags.append(&mut flags); + self + } + + pub fn add_extra_flag(self, flag: &str) -> Self { + self.add_extra_flags(vec![flag]) + } +} + +impl CompilationCommandBuilder { + pub fn make_string(self) -> String { + let arch_flags = self.arch_flags.join("+"); + let flags = std::env::var("CPPFLAGS").unwrap_or("".into()); + let project_root = self.project_root.unwrap_or_default(); + let project_root_str = project_root.as_str(); + let mut output = self.output.clone(); + if self.linker.is_some() { + output += ".o" + }; + let mut command = format!( + "{} {flags} -march={arch_flags} \ + -O{} \ + -o {project_root}/{} \ + {project_root}/{}.cpp", + self.compiler, self.optimization, output, self.input, + ); + + command = command + " " + self.extra_flags.join(" ").as_str(); + + if let Some(target) = &self.target { + command = command + " --target=" + target; + } + + if let (Some(linker), Some(cxx_toolchain_dir)) = (&self.linker, &self.cxx_toolchain_dir) { + let include_args = self + .include_paths + .iter() + .map(|path| "--include-directory=".to_string() + cxx_toolchain_dir + path) + .collect::>() + .join(" "); + + command = command + + " -c " + + include_args.as_str() + + " && " + + linker + + " " + + project_root_str + + "/" + + &output + + " -o " + + project_root_str + + "/" + + &self.output + + " && rm " + + project_root_str + + "/" + + &output; + } + command + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/constraint.rs b/library/stdarch/crates/intrinsic-test/src/common/constraint.rs new file mode 100644 index 000000000000..269fb7f90cb7 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/constraint.rs @@ -0,0 +1,17 @@ +use serde::Deserialize; +use std::ops::Range; + +#[derive(Debug, PartialEq, Clone, Deserialize)] +pub enum Constraint { + Equal(i64), + Range(Range), +} + +impl Constraint { + pub fn to_range(&self) -> Range { + match self { + Constraint::Equal(eq) => *eq..*eq + 1, + Constraint::Range(range) => range.clone(), + } + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs new file mode 100644 index 000000000000..84c28cc4bf43 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs @@ -0,0 +1,198 @@ +use itertools::Itertools; +use rayon::prelude::*; +use std::collections::BTreeMap; +use std::process::Command; + +use super::argument::Argument; +use super::indentation::Indentation; +use super::intrinsic::IntrinsicDefinition; +use super::intrinsic_helpers::IntrinsicTypeDefinition; + +// The number of times each intrinsic will be called. +const PASSES: u32 = 20; + +// Formats the main C program template with placeholders +pub fn format_c_main_template( + notices: &str, + header_files: &[&str], + arch_identifier: &str, + arch_specific_definitions: &[&str], + arglists: &str, + passes: &str, +) -> String { + format!( + r#"{notices}{header_files} +#include +#include +#include +#include + +template T1 cast(T2 x) {{ + static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); + T1 ret{{}}; + memcpy(&ret, &x, sizeof(T1)); + return ret; +}} + +std::ostream& operator<<(std::ostream& os, float16_t value) {{ + uint16_t temp = 0; + memcpy(&temp, &value, sizeof(float16_t)); + std::stringstream ss; + ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp; + os << ss.str(); + return os; +}} + +#ifdef __{arch_identifier}__ +{arch_specific_definitions} +#endif + +{arglists} + +int main(int argc, char **argv) {{ +{passes} + return 0; +}}"#, + header_files = header_files + .iter() + .map(|header| format!("#include <{header}>")) + .collect::>() + .join("\n"), + arch_specific_definitions = arch_specific_definitions.join("\n"), + ) +} + +pub fn compile_c_programs(compiler_commands: &[String]) -> bool { + compiler_commands + .par_iter() + .map(|compiler_command| { + let output = Command::new("sh").arg("-c").arg(compiler_command).output(); + if let Ok(output) = output { + if output.status.success() { + true + } else { + error!( + "Failed to compile code for intrinsics: \n\nstdout:\n{}\n\nstderr:\n{}", + std::str::from_utf8(&output.stdout).unwrap_or(""), + std::str::from_utf8(&output.stderr).unwrap_or("") + ); + false + } + } else { + error!("Command failed: {:#?}", output); + false + } + }) + .find_any(|x| !x) + .is_none() +} + +// Creates directory structure and file path mappings +pub fn setup_c_file_paths(identifiers: &Vec) -> BTreeMap<&String, String> { + let _ = std::fs::create_dir("c_programs"); + identifiers + .par_iter() + .map(|identifier| { + let c_filename = format!(r#"c_programs/{identifier}.cpp"#); + + (identifier, c_filename) + }) + .collect::>() +} + +pub fn generate_c_test_loop( + intrinsic: &dyn IntrinsicDefinition, + indentation: Indentation, + additional: &str, + passes: u32, + _target: &str, +) -> String { + let body_indentation = indentation.nested(); + format!( + "{indentation}for (int i=0; i<{passes}; i++) {{\n\ + {loaded_args}\ + {body_indentation}auto __return_value = {intrinsic_call}({args});\n\ + {print_result}\n\ + {indentation}}}", + loaded_args = intrinsic.arguments().load_values_c(body_indentation), + intrinsic_call = intrinsic.name(), + args = intrinsic.arguments().as_call_param_c(), + print_result = intrinsic.print_result_c(body_indentation, additional) + ) +} + +pub fn generate_c_constraint_blocks( + intrinsic: &dyn IntrinsicDefinition, + indentation: Indentation, + constraints: &[&Argument], + name: String, + target: &str, +) -> String { + if let Some((current, constraints)) = constraints.split_last() { + let range = current + .constraint + .iter() + .map(|c| c.to_range()) + .flat_map(|r| r.into_iter()); + + let body_indentation = indentation.nested(); + range + .map(|i| { + format!( + "{indentation}{{\n\ + {body_indentation}{ty} {name} = {val};\n\ + {pass}\n\ + {indentation}}}", + name = current.name, + ty = current.ty.c_type(), + val = i, + pass = generate_c_constraint_blocks( + intrinsic, + body_indentation, + constraints, + format!("{name}-{i}"), + target, + ) + ) + }) + .join("\n") + } else { + generate_c_test_loop(intrinsic, indentation, &name, PASSES, target) + } +} + +// Compiles C test programs using specified compiler +pub fn create_c_test_program( + intrinsic: &dyn IntrinsicDefinition, + header_files: &[&str], + target: &str, + c_target: &str, + notices: &str, + arch_specific_definitions: &[&str], +) -> String { + let arguments = intrinsic.arguments(); + let constraints = arguments + .iter() + .filter(|&i| i.has_constraint()) + .collect_vec(); + + let indentation = Indentation::default(); + format_c_main_template( + notices, + header_files, + c_target, + arch_specific_definitions, + intrinsic + .arguments() + .gen_arglists_c(indentation, PASSES) + .as_str(), + generate_c_constraint_blocks( + intrinsic, + indentation.nested(), + constraints.as_slice(), + Default::default(), + target, + ) + .as_str(), + ) +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs new file mode 100644 index 000000000000..a2878502ac94 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs @@ -0,0 +1,243 @@ +use itertools::Itertools; +use rayon::prelude::*; +use std::collections::BTreeMap; +use std::fs::File; +use std::io::Write; +use std::process::Command; + +use super::argument::Argument; +use super::indentation::Indentation; +use super::intrinsic::{IntrinsicDefinition, format_f16_return_value}; +use super::intrinsic_helpers::IntrinsicTypeDefinition; + +// The number of times each intrinsic will be called. +const PASSES: u32 = 20; + +pub fn format_rust_main_template( + notices: &str, + definitions: &str, + configurations: &str, + arch_definition: &str, + arglists: &str, + passes: &str, +) -> String { + format!( + r#"{notices}#![feature(simd_ffi)] +#![feature(link_llvm_intrinsics)] +#![feature(f16)] +{configurations} +{definitions} + +use core_arch::arch::{arch_definition}::*; + +fn main() {{ +{arglists} +{passes} +}} +"#, + ) +} + +pub fn compile_rust_programs( + binaries: Vec, + toolchain: Option<&str>, + target: &str, + linker: Option<&str>, +) -> bool { + let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); + cargo + .write_all( + format!( + r#"[package] +name = "intrinsic-test-programs" +version = "{version}" +authors = [{authors}] +license = "{license}" +edition = "2018" +[workspace] +[dependencies] +core_arch = {{ path = "../crates/core_arch" }} +{binaries}"#, + version = env!("CARGO_PKG_VERSION"), + authors = env!("CARGO_PKG_AUTHORS") + .split(":") + .format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))), + license = env!("CARGO_PKG_LICENSE"), + binaries = binaries + .iter() + .map(|binary| { + format!( + r#"[[bin]] +name = "{binary}" +path = "{binary}/main.rs""#, + ) + }) + .collect::>() + .join("\n") + ) + .into_bytes() + .as_slice(), + ) + .unwrap(); + + let toolchain = match toolchain { + None => return true, + Some(t) => t, + }; + + /* If there has been a linker explicitly set from the command line then + * we want to set it via setting it in the RUSTFLAGS*/ + + let cargo_command = format!("cargo {toolchain} build --target {target} --release"); + + let mut command = Command::new("sh"); + command + .current_dir("rust_programs") + .arg("-c") + .arg(cargo_command); + + let mut rust_flags = "-Cdebuginfo=0".to_string(); + if let Some(linker) = linker { + rust_flags.push_str(" -C linker="); + rust_flags.push_str(linker); + rust_flags.push_str(" -C link-args=-static"); + + command.env("CPPFLAGS", "-fuse-ld=lld"); + } + + command.env("RUSTFLAGS", rust_flags); + let output = command.output(); + + if let Ok(output) = output { + if output.status.success() { + true + } else { + error!( + "Failed to compile code for rust intrinsics\n\nstdout:\n{}\n\nstderr:\n{}", + std::str::from_utf8(&output.stdout).unwrap_or(""), + std::str::from_utf8(&output.stderr).unwrap_or("") + ); + false + } + } else { + error!("Command failed: {:#?}", output); + false + } +} + +// Creates directory structure and file path mappings +pub fn setup_rust_file_paths(identifiers: &Vec) -> BTreeMap<&String, String> { + identifiers + .par_iter() + .map(|identifier| { + let rust_dir = format!("rust_programs/{identifier}"); + let _ = std::fs::create_dir_all(&rust_dir); + let rust_filename = format!("{rust_dir}/main.rs"); + + (identifier, rust_filename) + }) + .collect::>() +} + +pub fn generate_rust_test_loop( + intrinsic: &dyn IntrinsicDefinition, + indentation: Indentation, + additional: &str, + passes: u32, +) -> String { + let constraints = intrinsic.arguments().as_constraint_parameters_rust(); + let constraints = if !constraints.is_empty() { + format!("::<{constraints}>") + } else { + constraints + }; + + let return_value = format_f16_return_value(intrinsic); + let indentation2 = indentation.nested(); + let indentation3 = indentation2.nested(); + format!( + "{indentation}for i in 0..{passes} {{\n\ + {indentation2}unsafe {{\n\ + {loaded_args}\ + {indentation3}let __return_value = {intrinsic_call}{const}({args});\n\ + {indentation3}println!(\"Result {additional}-{{}}: {{:?}}\", i + 1, {return_value});\n\ + {indentation2}}}\n\ + {indentation}}}", + loaded_args = intrinsic.arguments().load_values_rust(indentation3), + intrinsic_call = intrinsic.name(), + const = constraints, + args = intrinsic.arguments().as_call_param_rust(), + ) +} + +pub fn generate_rust_constraint_blocks( + intrinsic: &dyn IntrinsicDefinition, + indentation: Indentation, + constraints: &[&Argument], + name: String, +) -> String { + if let Some((current, constraints)) = constraints.split_last() { + let range = current + .constraint + .iter() + .map(|c| c.to_range()) + .flat_map(|r| r.into_iter()); + + let body_indentation = indentation.nested(); + range + .map(|i| { + format!( + "{indentation}{{\n\ + {body_indentation}const {name}: {ty} = {val};\n\ + {pass}\n\ + {indentation}}}", + name = current.name, + ty = current.ty.rust_type(), + val = i, + pass = generate_rust_constraint_blocks( + intrinsic, + body_indentation, + constraints, + format!("{name}-{i}") + ) + ) + }) + .join("\n") + } else { + generate_rust_test_loop(intrinsic, indentation, &name, PASSES) + } +} + +// Top-level function to create complete test program +pub fn create_rust_test_program( + intrinsic: &dyn IntrinsicDefinition, + target: &str, + notice: &str, + definitions: &str, + cfg: &str, +) -> String { + let arguments = intrinsic.arguments(); + let constraints = arguments + .iter() + .filter(|i| i.has_constraint()) + .collect_vec(); + + let indentation = Indentation::default(); + format_rust_main_template( + notice, + definitions, + cfg, + target, + intrinsic + .arguments() + .gen_arglists_rust(indentation.nested(), PASSES) + .as_str(), + generate_rust_constraint_blocks( + intrinsic, + indentation.nested(), + &constraints, + Default::default(), + ) + .as_str(), + ) +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/indentation.rs b/library/stdarch/crates/intrinsic-test/src/common/indentation.rs new file mode 100644 index 000000000000..9ee331d7f7a3 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/indentation.rs @@ -0,0 +1,22 @@ +//! Basic code formatting tools. +//! +//! We don't need perfect formatting for the generated tests, but simple indentation can make +//! debugging a lot easier. + +#[derive(Copy, Clone, Debug, Default)] +pub struct Indentation(u32); + +impl Indentation { + pub fn nested(self) -> Self { + Self(self.0 + 1) + } +} + +impl std::fmt::Display for Indentation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for _ in 0..self.0 { + write!(f, " ")?; + } + Ok(()) + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs new file mode 100644 index 000000000000..bc46ccfbac40 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs @@ -0,0 +1,51 @@ +use super::argument::ArgumentList; +use super::indentation::Indentation; +use super::intrinsic_helpers::{IntrinsicTypeDefinition, TypeKind}; + +/// An intrinsic +#[derive(Debug, PartialEq, Clone)] +pub struct Intrinsic { + /// The function name of this intrinsic. + pub name: String, + + /// Any arguments for this intrinsic. + pub arguments: ArgumentList, + + /// The return type of this intrinsic. + pub results: T, + + /// Any architecture-specific tags. + pub arch_tags: Vec, +} + +pub trait IntrinsicDefinition +where + T: IntrinsicTypeDefinition, +{ + fn arguments(&self) -> ArgumentList; + + fn results(&self) -> T; + + fn name(&self) -> String; + + /// Generates a std::cout for the intrinsics results that will match the + /// rust debug output format for the return type. The generated line assumes + /// there is an int i in scope which is the current pass number. + fn print_result_c(&self, _indentation: Indentation, _additional: &str) -> String; +} + +pub fn format_f16_return_value( + intrinsic: &dyn IntrinsicDefinition, +) -> String { + // the `intrinsic-test` crate compares the output of C and Rust intrinsics. Currently, It uses + // a string representation of the output value to compare. In C, f16 values are currently printed + // as hexadecimal integers. Since https://github.com/rust-lang/rust/pull/127013, rust does print + // them as decimal floating point values. To keep the intrinsics tests working, for now, format + // vectors containing f16 values like C prints them. + let return_value = match intrinsic.results().kind() { + TypeKind::Float if intrinsic.results().inner_size() == 16 => "debug_f16(__return_value)", + _ => "format_args!(\"{__return_value:.150?}\")", + }; + + String::from(return_value) +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs new file mode 100644 index 000000000000..3d200b19461e --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -0,0 +1,296 @@ +use std::fmt; +use std::ops::Deref; +use std::str::FromStr; + +use itertools::Itertools as _; + +use super::cli::Language; +use super::indentation::Indentation; +use super::values::value_for_array; + +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum TypeKind { + BFloat, + Float, + Int, + UInt, + Poly, + Void, +} + +impl FromStr for TypeKind { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "bfloat" => Ok(Self::BFloat), + "float" => Ok(Self::Float), + "int" => Ok(Self::Int), + "poly" => Ok(Self::Poly), + "uint" | "unsigned" => Ok(Self::UInt), + "void" => Ok(Self::Void), + _ => Err(format!("Impossible to parse argument kind {s}")), + } + } +} + +impl fmt::Display for TypeKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match self { + Self::BFloat => "bfloat", + Self::Float => "float", + Self::Int => "int", + Self::UInt => "uint", + Self::Poly => "poly", + Self::Void => "void", + } + ) + } +} + +impl TypeKind { + /// Gets the type part of a c typedef for a type that's in the form of {type}{size}_t. + pub fn c_prefix(&self) -> &str { + match self { + Self::Float => "float", + Self::Int => "int", + Self::UInt => "uint", + Self::Poly => "poly", + _ => unreachable!("Not used: {:#?}", self), + } + } + + /// Gets the rust prefix for the type kind i.e. i, u, f. + pub fn rust_prefix(&self) -> &str { + match self { + Self::Float => "f", + Self::Int => "i", + Self::UInt => "u", + Self::Poly => "u", + _ => unreachable!("Unused type kind: {:#?}", self), + } + } +} + +#[derive(Debug, PartialEq, Clone)] +pub struct IntrinsicType { + pub constant: bool, + + /// whether this object is a const pointer + pub ptr_constant: bool, + + pub ptr: bool, + + pub kind: TypeKind, + /// The bit length of this type (e.g. 32 for u32). + pub bit_len: Option, + + /// Length of the SIMD vector (i.e. 4 for uint32x4_t), A value of `None` + /// means this is not a simd type. A `None` can be assumed to be 1, + /// although in some places a distinction is needed between `u64` and + /// `uint64x1_t` this signals that. + pub simd_len: Option, + + /// The number of rows for SIMD matrices (i.e. 2 for uint8x8x2_t). + /// A value of `None` represents a type that does not contain any + /// rows encoded in the type (e.g. uint8x8_t). + /// A value of `None` can be assumed to be 1 though. + pub vec_len: Option, + + pub target: String, +} + +impl IntrinsicType { + pub fn kind(&self) -> TypeKind { + self.kind + } + + pub fn inner_size(&self) -> u32 { + if let Some(bl) = self.bit_len { + bl + } else { + unreachable!("") + } + } + + pub fn num_lanes(&self) -> u32 { + self.simd_len.unwrap_or(1) + } + + pub fn num_vectors(&self) -> u32 { + self.vec_len.unwrap_or(1) + } + + pub fn is_simd(&self) -> bool { + self.simd_len.is_some() || self.vec_len.is_some() + } + + pub fn is_ptr(&self) -> bool { + self.ptr + } + + pub fn c_scalar_type(&self) -> String { + format!( + "{prefix}{bits}_t", + prefix = self.kind().c_prefix(), + bits = self.inner_size() + ) + } + + pub fn rust_scalar_type(&self) -> String { + format!( + "{prefix}{bits}", + prefix = self.kind().rust_prefix(), + bits = self.inner_size() + ) + } + + pub fn c_promotion(&self) -> &str { + match *self { + IntrinsicType { + kind, + bit_len: Some(8), + .. + } => match kind { + TypeKind::Int => "(int)", + TypeKind::UInt => "(unsigned int)", + TypeKind::Poly => "(unsigned int)(uint8_t)", + _ => "", + }, + IntrinsicType { + kind: TypeKind::Poly, + bit_len: Some(bit_len), + .. + } => match bit_len { + 8 => unreachable!("handled above"), + 16 => "(uint16_t)", + 32 => "(uint32_t)", + 64 => "(uint64_t)", + 128 => "", + _ => panic!("invalid bit_len"), + }, + _ => "", + } + } + + pub fn populate_random( + &self, + indentation: Indentation, + loads: u32, + language: &Language, + ) -> String { + match self { + IntrinsicType { + bit_len: Some(bit_len @ (8 | 16 | 32 | 64)), + kind: kind @ (TypeKind::Int | TypeKind::UInt | TypeKind::Poly), + simd_len, + vec_len, + .. + } => { + let (prefix, suffix) = match language { + Language::Rust => ("[", "]"), + Language::C => ("{", "}"), + }; + let body_indentation = indentation.nested(); + format!( + "{prefix}\n{body}\n{indentation}{suffix}", + body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) + .format_with(",\n", |i, fmt| { + let src = value_for_array(*bit_len, i); + assert!(src == 0 || src.ilog2() < *bit_len); + if *kind == TypeKind::Int && (src >> (*bit_len - 1)) != 0 { + // `src` is a two's complement representation of a negative value. + let mask = !0u64 >> (64 - *bit_len); + let ones_compl = src ^ mask; + let twos_compl = ones_compl + 1; + if (twos_compl == src) && (language == &Language::C) { + // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid + // undefined literal overflow behaviour. + fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1")) + } else { + fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) + } + } else { + fmt(&format_args!("{body_indentation}{src:#x}")) + } + }) + ) + } + IntrinsicType { + kind: TypeKind::Float, + bit_len: Some(bit_len @ (16 | 32 | 64)), + simd_len, + vec_len, + .. + } => { + let (prefix, cast_prefix, cast_suffix, suffix) = match (language, bit_len) { + (&Language::Rust, 16) => ("[", "f16::from_bits(", ")", "]"), + (&Language::Rust, 32) => ("[", "f32::from_bits(", ")", "]"), + (&Language::Rust, 64) => ("[", "f64::from_bits(", ")", "]"), + (&Language::C, 16) => ("{", "cast(", ")", "}"), + (&Language::C, 32) => ("{", "cast(", ")", "}"), + (&Language::C, 64) => ("{", "cast(", ")", "}"), + _ => unreachable!(), + }; + format!( + "{prefix}\n{body}\n{indentation}{suffix}", + body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) + .format_with(",\n", |i, fmt| fmt(&format_args!( + "{indentation}{cast_prefix}{src:#x}{cast_suffix}", + indentation = indentation.nested(), + src = value_for_array(*bit_len, i) + ))) + ) + } + _ => unimplemented!("populate random: {:#?}", self), + } + } + + pub fn is_rust_vals_array_const(&self) -> bool { + match self { + // Floats have to be loaded at runtime for stable NaN conversion. + IntrinsicType { + kind: TypeKind::Float, + .. + } => false, + IntrinsicType { + kind: TypeKind::Int | TypeKind::UInt | TypeKind::Poly, + .. + } => true, + _ => unimplemented!(), + } + } + + pub fn as_call_param_c(&self, name: &String) -> String { + if self.ptr { + format!("&{name}") + } else { + name.clone() + } + } +} + +pub trait IntrinsicTypeDefinition: Deref { + /// Determines the load function for this type. + /// can be implemented in an `impl` block + fn get_load_function(&self, _language: Language) -> String; + + /// can be implemented in an `impl` block + fn get_lane_function(&self) -> String; + + /// can be implemented in an `impl` block + fn from_c(_s: &str, _target: &str) -> Result, String>; + + /// Gets a string containing the typename for this type in C format. + /// can be directly defined in `impl` blocks + fn c_type(&self) -> String; + + /// can be directly defined in `impl` blocks + fn c_single_vector_type(&self) -> String; + + /// can be defined in `impl` blocks + fn rust_type(&self) -> String; +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/mod.rs b/library/stdarch/crates/intrinsic-test/src/common/mod.rs new file mode 100644 index 000000000000..5d51d3460ecf --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -0,0 +1,25 @@ +use cli::ProcessedCli; + +pub mod argument; +pub mod cli; +pub mod compare; +pub mod compile_c; +pub mod constraint; +pub mod gen_c; +pub mod gen_rust; +pub mod indentation; +pub mod intrinsic; +pub mod intrinsic_helpers; +pub mod values; +pub mod write_file; + +/// Architectures must support this trait +/// to be successfully tested. +pub trait SupportedArchitectureTest { + fn create(cli_options: ProcessedCli) -> Box + where + Self: Sized; + fn build_c_file(&self) -> bool; + fn build_rust_file(&self) -> bool; + fn compare_outputs(&self) -> bool; +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/values.rs b/library/stdarch/crates/intrinsic-test/src/common/values.rs new file mode 100644 index 000000000000..1b614a742ef8 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/values.rs @@ -0,0 +1,120 @@ +/// Get a single value for an argument values array in a determistic way. +/// * `bits`: The number of bits for the type, only 8, 16, 32, 64 are valid values +/// * `index`: The position in the array we are generating for +pub fn value_for_array(bits: u32, index: u32) -> u64 { + let index = index as usize; + match bits { + 8 => VALUES_8[index % VALUES_8.len()].into(), + 16 => VALUES_16[index % VALUES_16.len()].into(), + 32 => VALUES_32[index % VALUES_32.len()].into(), + 64 => VALUES_64[index % VALUES_64.len()], + _ => unimplemented!("value_for_array(bits: {bits}, ..)"), + } +} + +pub const VALUES_8: &[u8] = &[ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xf0, 0x80, 0x3b, 0xff, +]; + +pub const VALUES_16: &[u16] = &[ + 0x0000, // 0.0 + 0x0400, // The smallest normal value. + 0x37ff, // The value just below 0.5. + 0x3800, // 0.5 + 0x3801, // The value just above 0.5. + 0x3bff, // The value just below 1.0. + 0x3c00, // 1.0 + 0x3c01, // The value just above 1.0. + 0x3e00, // 1.5 + 0x4900, // 10 + 0x7bff, // The largest finite value. + 0x7c00, // Infinity. + // NaNs. + // - Quiet NaNs + 0x7f23, 0x7e00, // - Signalling NaNs + 0x7d23, 0x7c01, // Subnormals. + // - A recognisable bit pattern. + 0x0012, // - The largest subnormal value. + 0x03ff, // - The smallest subnormal value. + 0x0001, // The same values again, but negated. + 0x8000, 0x8400, 0xb7ff, 0xb800, 0xb801, 0xbbff, 0xbc00, 0xbc01, 0xbe00, 0xc900, 0xfbff, 0xfc00, + 0xff23, 0xfe00, 0xfd23, 0xfc01, 0x8012, 0x83ff, 0x8001, +]; + +pub const VALUES_32: &[u32] = &[ + // Simple values. + 0x00000000, // 0.0 + 0x00800000, // The smallest normal value. + 0x3effffff, // The value just below 0.5. + 0x3f000000, // 0.5 + 0x3f000001, // The value just above 0.5. + 0x3f7fffff, // The value just below 1.0. + 0x3f800000, // 1.0 + 0x3f800001, // The value just above 1.0. + 0x3fc00000, // 1.5 + 0x41200000, // 10 + 0x7f8fffff, // The largest finite value. + 0x7f800000, // Infinity. + // NaNs. + // - Quiet NaNs + 0x7fd23456, 0x7fc00000, // - Signalling NaNs + 0x7f923456, 0x7f800001, // Subnormals. + // - A recognisable bit pattern. + 0x00123456, // - The largest subnormal value. + 0x007fffff, // - The smallest subnormal value. + 0x00000001, // The same values again, but negated. + 0x80000000, 0x80800000, 0xbeffffff, 0xbf000000, 0xbf000001, 0xbf7fffff, 0xbf800000, 0xbf800001, + 0xbfc00000, 0xc1200000, 0xff8fffff, 0xff800000, 0xffd23456, 0xffc00000, 0xff923456, 0xff800001, + 0x80123456, 0x807fffff, 0x80000001, +]; + +pub const VALUES_64: &[u64] = &[ + // Simple values. + 0x0000000000000000, // 0.0 + 0x0010000000000000, // The smallest normal value. + 0x3fdfffffffffffff, // The value just below 0.5. + 0x3fe0000000000000, // 0.5 + 0x3fe0000000000001, // The value just above 0.5. + 0x3fefffffffffffff, // The value just below 1.0. + 0x3ff0000000000000, // 1.0 + 0x3ff0000000000001, // The value just above 1.0. + 0x3ff8000000000000, // 1.5 + 0x4024000000000000, // 10 + 0x7fefffffffffffff, // The largest finite value. + 0x7ff0000000000000, // Infinity. + // NaNs. + // - Quiet NaNs + 0x7ff923456789abcd, + 0x7ff8000000000000, + // - Signalling NaNs + 0x7ff123456789abcd, + 0x7ff0000000000000, + // Subnormals. + // - A recognisable bit pattern. + 0x000123456789abcd, + // - The largest subnormal value. + 0x000fffffffffffff, + // - The smallest subnormal value. + 0x0000000000000001, + // The same values again, but negated. + 0x8000000000000000, + 0x8010000000000000, + 0xbfdfffffffffffff, + 0xbfe0000000000000, + 0xbfe0000000000001, + 0xbfefffffffffffff, + 0xbff0000000000000, + 0xbff0000000000001, + 0xbff8000000000000, + 0xc024000000000000, + 0xffefffffffffffff, + 0xfff0000000000000, + 0xfff923456789abcd, + 0xfff8000000000000, + 0xfff123456789abcd, + 0xfff0000000000000, + 0x800123456789abcd, + 0x800fffffffffffff, + 0x8000000000000001, +]; diff --git a/library/stdarch/crates/intrinsic-test/src/common/write_file.rs b/library/stdarch/crates/intrinsic-test/src/common/write_file.rs new file mode 100644 index 000000000000..0ba3e829a6b8 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/common/write_file.rs @@ -0,0 +1,66 @@ +use super::gen_c::create_c_test_program; +use super::gen_c::setup_c_file_paths; +use super::gen_rust::{create_rust_test_program, setup_rust_file_paths}; +use super::intrinsic::IntrinsicDefinition; +use super::intrinsic_helpers::IntrinsicTypeDefinition; +use std::fs::File; +use std::io::Write; + +pub fn write_file(filename: &String, code: String) { + let mut file = File::create(filename).unwrap(); + file.write_all(code.into_bytes().as_slice()).unwrap(); +} + +pub fn write_c_testfiles( + intrinsics: &Vec<&dyn IntrinsicDefinition>, + target: &str, + c_target: &str, + headers: &[&str], + notice: &str, + arch_specific_definitions: &[&str], +) -> Vec { + let intrinsics_name_list = intrinsics + .iter() + .map(|i| i.name().clone()) + .collect::>(); + let filename_mapping = setup_c_file_paths(&intrinsics_name_list); + + intrinsics.iter().for_each(|&i| { + let c_code = create_c_test_program( + i, + headers, + target, + c_target, + notice, + arch_specific_definitions, + ); + if let Some(filename) = filename_mapping.get(&i.name()) { + write_file(filename, c_code) + }; + }); + + intrinsics_name_list +} + +pub fn write_rust_testfiles( + intrinsics: Vec<&dyn IntrinsicDefinition>, + rust_target: &str, + notice: &str, + definitions: &str, + cfg: &str, +) -> Vec { + let intrinsics_name_list = intrinsics + .iter() + .map(|i| i.name().clone()) + .collect::>(); + let filename_mapping = setup_rust_file_paths(&intrinsics_name_list); + + intrinsics.iter().for_each(|&i| { + let rust_code = create_rust_test_program(i, rust_target, notice, definitions, cfg); + if let Some(filename) = filename_mapping.get(&i.name()) { + write_file(filename, rust_code) + } + }); + + intrinsics_name_list +} diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs new file mode 100644 index 000000000000..054138a0dba1 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/src/main.rs @@ -0,0 +1,42 @@ +#[macro_use] +extern crate log; + +mod arm; +mod common; + +use arm::ArmArchitectureTest; +use common::SupportedArchitectureTest; +use common::cli::{Cli, ProcessedCli}; + +fn main() { + pretty_env_logger::init(); + let args: Cli = clap::Parser::parse(); + let processed_cli_options = ProcessedCli::new(args); + + let test_environment_result: Option> = + match processed_cli_options.target.as_str() { + "aarch64-unknown-linux-gnu" + | "armv7-unknown-linux-gnueabihf" + | "aarch64_be-unknown-linux-gnu" => { + Some(ArmArchitectureTest::create(processed_cli_options)) + } + + _ => None, + }; + + if test_environment_result.is_none() { + std::process::exit(0); + } + + let test_environment = test_environment_result.unwrap(); + + if !test_environment.build_c_file() { + std::process::exit(2); + } + if !test_environment.build_rust_file() { + std::process::exit(3); + } + if !test_environment.compare_outputs() { + std::process::exit(1); + } +} diff --git a/library/stdarch/crates/simd-test-macro/Cargo.toml b/library/stdarch/crates/simd-test-macro/Cargo.toml new file mode 100644 index 000000000000..8f9f9b13273c --- /dev/null +++ b/library/stdarch/crates/simd-test-macro/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "simd-test-macro" +version = "0.1.0" +authors = ["Alex Crichton "] +edition = "2024" + +[lib] +proc-macro = true +test = false + +[dependencies] +proc-macro2 = "1.0" +quote = "1.0" +syn = { version = "2.0", features = ["full"] } diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs new file mode 100644 index 000000000000..18e4747d94d9 --- /dev/null +++ b/library/stdarch/crates/simd-test-macro/src/lib.rs @@ -0,0 +1,126 @@ +//! Implementation of the `#[simd_test]` macro +//! +//! This macro expands to a `#[test]` function which tests the local machine +//! for the appropriate cfg before calling the inner test function. +#![deny(rust_2018_idioms)] + +#[macro_use] +extern crate quote; + +use proc_macro2::{Ident, Literal, Span, TokenStream, TokenTree}; +use quote::ToTokens; +use std::env; + +fn string(s: &str) -> TokenTree { + Literal::string(s).into() +} + +#[proc_macro_attribute] +pub fn simd_test( + attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + let tokens = TokenStream::from(attr).into_iter().collect::>(); + if tokens.len() != 3 { + panic!("expected #[simd_test(enable = \"feature\")]"); + } + match &tokens[0] { + TokenTree::Ident(tt) if *tt == "enable" => {} + _ => panic!("expected #[simd_test(enable = \"feature\")]"), + } + match &tokens[1] { + TokenTree::Punct(tt) if tt.as_char() == '=' => {} + _ => panic!("expected #[simd_test(enable = \"feature\")]"), + } + let enable_feature = match &tokens[2] { + TokenTree::Literal(tt) => tt.to_string(), + _ => panic!("expected #[simd_test(enable = \"feature\")]"), + }; + let enable_feature = enable_feature.trim_start_matches('"').trim_end_matches('"'); + let target_features: Vec = enable_feature + .replace('+', "") + .split(',') + .map(String::from) + .collect(); + + let enable_feature = string(enable_feature); + let mut item = syn::parse_macro_input!(item as syn::ItemFn); + let item_attrs = std::mem::take(&mut item.attrs); + let name = &item.sig.ident; + + let target = env::var("TARGET").expect( + "TARGET environment variable should be set for rustc (e.g. TARGET=x86_64-apple-darwin cargo test)" + ); + let macro_test = match target + .split('-') + .next() + .unwrap_or_else(|| panic!("target triple contained no \"-\": {target}")) + { + "i686" | "x86_64" | "i586" => "is_x86_feature_detected", + "arm" | "armv7" => "is_arm_feature_detected", + "aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected", + maybe_riscv if maybe_riscv.starts_with("riscv") => "is_riscv_feature_detected", + "powerpc" | "powerpcle" => "is_powerpc_feature_detected", + "powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected", + "loongarch64" => "is_loongarch_feature_detected", + "s390x" => "is_s390x_feature_detected", + t => panic!("unknown target: {t}"), + }; + let macro_test = Ident::new(macro_test, Span::call_site()); + + let skipped_functions = env::var("STDARCH_TEST_SKIP_FUNCTION").unwrap_or_default(); + let skipped_features = env::var("STDARCH_TEST_SKIP_FEATURE").unwrap_or_default(); + + let mut name_str = &*name.to_string(); + if name_str.starts_with("test_") { + name_str = &name_str[5..]; + } + + let skip_this = skipped_functions + .split(',') + .map(str::trim) + .any(|s| s == name_str) + || skipped_features + .split(',') + .map(str::trim) + .any(|s| target_features.iter().any(|feature| s == feature)); + + let mut detect_missing_features = TokenStream::new(); + for feature in target_features { + let q = quote_spanned! { + proc_macro2::Span::call_site() => + if !#macro_test!(#feature) { + missing_features.push(#feature); + } + }; + q.to_tokens(&mut detect_missing_features); + } + + let maybe_ignore = if skip_this { + quote! { #[ignore] } + } else { + TokenStream::new() + }; + + let ret: TokenStream = quote_spanned! { + proc_macro2::Span::call_site() => + #[allow(non_snake_case)] + #[test] + #maybe_ignore + #(#item_attrs)* + fn #name() { + let mut missing_features = ::std::vec::Vec::new(); + #detect_missing_features + if missing_features.is_empty() { + let v = unsafe { #name() }; + return v; + } else { + ::stdarch_test::assert_skip_test_ok(stringify!(#name), &missing_features); + } + + #[target_feature(enable = #enable_feature)] + #item + } + }; + ret.into() +} diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml new file mode 100644 index 000000000000..f990e7241252 --- /dev/null +++ b/library/stdarch/crates/std_detect/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "std_detect" +version = "0.1.5" +authors = [ + "Alex Crichton ", + "Andrew Gallant ", + "Gonzalo Brito Gadeschi ", +] +description = "`std::detect` - Rust's standard library run-time CPU feature detection." +homepage = "https://github.com/rust-lang/stdarch" +repository = "https://github.com/rust-lang/stdarch" +readme = "README.md" +keywords = ["std", "run-time", "feature", "detection"] +categories = ["hardware-support"] +license = "MIT OR Apache-2.0" +edition = "2024" + +[badges] +is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" } +is-it-maintained-open-issues = { repository = "rust-lang/stdarch" } +maintenance = { status = "experimental" } + +[dependencies] +cfg-if = "1.0.0" + +# When built as part of libstd +core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" } +alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-alloc" } + +[target.'cfg(not(windows))'.dependencies] +libc = { version = "0.2.0", optional = true, default-features = false } + +[features] +default = [ "std_detect_dlsym_getauxval", "std_detect_file_io" ] +std_detect_file_io = [ "libc" ] +std_detect_dlsym_getauxval = [ "libc" ] +std_detect_env_override = [ "libc" ] +rustc-dep-of-std = [ + "core", + "alloc", +] diff --git a/library/stdarch/crates/std_detect/LICENSE-APACHE b/library/stdarch/crates/std_detect/LICENSE-APACHE new file mode 100644 index 000000000000..16fe87b06e80 --- /dev/null +++ b/library/stdarch/crates/std_detect/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/library/stdarch/crates/std_detect/LICENSE-MIT b/library/stdarch/crates/std_detect/LICENSE-MIT new file mode 100644 index 000000000000..52d82415d8b6 --- /dev/null +++ b/library/stdarch/crates/std_detect/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2017 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md new file mode 100644 index 000000000000..091f5542e0e8 --- /dev/null +++ b/library/stdarch/crates/std_detect/README.md @@ -0,0 +1,93 @@ +`std::detect` - Rust's standard library run-time CPU feature detection +======= + +The private `std::detect` module implements run-time feature detection in Rust's +standard library. This allows detecting whether the CPU the binary runs on +supports certain features, like SIMD instructions. + +# Usage + +`std::detect` APIs are available as part of `libstd`. Prefer using it via the +standard library than through this crate. Unstable features of `std::detect` are +available on nightly Rust behind various feature-gates. + +If you need run-time feature detection in `#[no_std]` environments, Rust `core` +library cannot help you. By design, Rust `core` is platform independent, but +performing run-time feature detection requires a certain level of cooperation +from the platform. + +You can then manually include `std_detect` as a dependency to get similar +run-time feature detection support than the one offered by Rust's standard +library. We intend to make `std_detect` more flexible and configurable in this +regard to better serve the needs of `#[no_std]` targets. + +# Features + +* `std_detect_dlsym_getauxval` (enabled by default, requires `libc`): Enable to +use `libc::dlsym` to query whether [`getauxval`] is linked into the binary. When +this is not the case, this feature allows other fallback methods to perform +run-time feature detection. When this feature is disabled, `std_detect` assumes +that [`getauxval`] is linked to the binary. If that is not the case the behavior +is undefined. + + Note: This feature is ignored on `*-linux-{gnu,musl,ohos}*` and `*-android*` targets + because we can safely assume `getauxval` is linked to the binary. + * `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) + have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html). + * `*-linux-musl*` targets ([at least since Rust 1.15](https://github.com/rust-lang/rust/blob/1.15.0/src/ci/docker/x86_64-musl/build-musl.sh#L15)) + use musl newer than [musl 1.1.0 that added `getauxval`](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197) + * `*-linux-ohos*` targets use a [fork of musl 1.2](https://gitee.com/openharmony/docs/blob/master/en/application-dev/reference/native-lib/musl.md) + * `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html)) + have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49). + +* `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature +detection using file APIs (e.g. `/proc/self/auxv`, etc.) if other more performant +methods fail. This feature requires `libstd` as a dependency, preventing the +crate from working on applications in which `std` is not available. + +[`getauxval`]: https://man7.org/linux/man-pages/man3/getauxval.3.html + +# Platform support + +* All `x86`/`x86_64` targets are supported on all platforms by querying the + `cpuid` instruction directly for the features supported by the hardware and + the operating system. `std_detect` assumes that the binary is an user-space + application. + +* Linux/Android: + * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch64`, `s390x`: + `std_detect` supports these on Linux by querying ELF auxiliary vectors (using `getauxval` + when available), and if that fails, by querying `/proc/self/auxv`. + * `arm64`: partial support for doing run-time feature detection by directly + querying `mrs` is implemented for Linux >= 4.11, but not enabled by default. + * `riscv{32,64}`: + `std_detect` supports these on Linux by querying `riscv_hwprobe`, and + by querying ELF auxiliary vectors (using `getauxval` when available). + +* FreeBSD: + * `arm32`, `powerpc64`: `std_detect` supports these on FreeBSD by querying ELF + auxiliary vectors using `sysctl`. + * `arm64`: run-time feature detection is implemented by directly querying `mrs`. + +* OpenBSD: + * `arm64`: run-time feature detection is implemented by querying `sysctl`. + +* Windows: + * `arm64`: run-time feature detection is implemented by querying `IsProcessorFeaturePresent`. + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +# Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in `std_detect` by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs new file mode 100644 index 000000000000..13570a25c1cf --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs @@ -0,0 +1,259 @@ +//! Aarch64 run-time features. + +features! { + @TARGET: aarch64; + @CFG: any(target_arch = "aarch64", target_arch = "arm64ec"); + @MACRO_NAME: is_aarch64_feature_detected; + @MACRO_ATTRS: + /// This macro tests, at runtime, whether an `aarch64` feature is enabled on aarch64 platforms. + /// Currently most features are only supported on linux-based platforms. + /// + /// This macro takes one argument which is a string literal of the feature being tested for. + /// The feature names are mostly taken from their FEAT_* definitions in the [ARM Architecture + /// Reference Manual][docs]. + /// + /// ## Supported arguments + /// + /// * `"aes"` - FEAT_AES & FEAT_PMULL + /// * `"asimd"` or "neon" - FEAT_AdvSIMD + /// * `"bf16"` - FEAT_BF16 + /// * `"bti"` - FEAT_BTI + /// * `"crc"` - FEAT_CRC + /// * `"cssc"` - FEAT_CSSC + /// * `"dit"` - FEAT_DIT + /// * `"dotprod"` - FEAT_DotProd + /// * `"dpb"` - FEAT_DPB + /// * `"dpb2"` - FEAT_DPB2 + /// * `"ecv"` - FEAT_ECV + /// * `"f32mm"` - FEAT_F32MM + /// * `"f64mm"` - FEAT_F64MM + /// * `"faminmax"` - FEAT_FAMINMAX + /// * `"fcma"` - FEAT_FCMA + /// * `"fhm"` - FEAT_FHM + /// * `"flagm"` - FEAT_FLAGM + /// * `"flagm2"` - FEAT_FLAGM2 + /// * `"fp"` - FEAT_FP + /// * `"fp16"` - FEAT_FP16 + /// * `"fp8"` - FEAT_FP8 + /// * `"fp8dot2"` - FEAT_FP8DOT2 + /// * `"fp8dot4"` - FEAT_FP8DOT4 + /// * `"fp8fma"` - FEAT_FP8FMA + /// * `"fpmr"` - FEAT_FPMR + /// * `"frintts"` - FEAT_FRINTTS + /// * `"hbc"` - FEAT_HBC + /// * `"i8mm"` - FEAT_I8MM + /// * `"jsconv"` - FEAT_JSCVT + /// * `"lse"` - FEAT_LSE + /// * `"lse128"` - FEAT_LSE128 + /// * `"lse2"` - FEAT_LSE2 + /// * `"lut"` - FEAT_LUT + /// * `"mops"` - FEAT_MOPS + /// * `"mte"` - FEAT_MTE & FEAT_MTE2 + /// * `"paca"` - FEAT_PAuth (address authentication) + /// * `"pacg"` - FEAT_Pauth (generic authentication) + /// * `"pauth-lr"` - FEAT_PAuth_LR + /// * `"pmull"` - FEAT_PMULL + /// * `"rand"` - FEAT_RNG + /// * `"rcpc"` - FEAT_LRCPC + /// * `"rcpc2"` - FEAT_LRCPC2 + /// * `"rcpc3"` - FEAT_LRCPC3 + /// * `"rdm"` - FEAT_RDM + /// * `"sb"` - FEAT_SB + /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256 + /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3 + /// * `"sm4"` - FEAT_SM3 & FEAT_SM4 + /// * `"sme"` - FEAT_SME + /// * `"sme-b16b16"` - FEAT_SME_B16B16 + /// * `"sme-f16f16"` - FEAT_SME_F16F16 + /// * `"sme-f64f64"` - FEAT_SME_F64F64 + /// * `"sme-f8f16"` - FEAT_SME_F8F16 + /// * `"sme-f8f32"` - FEAT_SME_F8F32 + /// * `"sme-fa64"` - FEAT_SME_FA64 + /// * `"sme-i16i64"` - FEAT_SME_I16I64 + /// * `"sme-lutv2"` - FEAT_SME_LUTv2 + /// * `"sme2"` - FEAT_SME2 + /// * `"sme2p1"` - FEAT_SME2p1 + /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2 + /// * `"ssve-fp8dot2"` - FEAT_SSVE_FP8DOT2 + /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4 + /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA + /// * `"sve"` - FEAT_SVE + /// * `"sve-b16b16"` - FEAT_SVE_B16B16 (SVE or SME Z-targeting instructions) + /// * `"sve2"` - FEAT_SVE2 + /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) + /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm + /// * `"sve2-sha3"` - FEAT_SVE2_SHA3 + /// * `"sve2-sm4"` - FEAT_SVE2_SM4 + /// * `"sve2p1"` - FEAT_SVE2p1 + /// * `"tme"` - FEAT_TME + /// * `"wfxt"` - FEAT_WFxT + /// + /// [docs]: https://developer.arm.com/documentation/ddi0487/latest + #[stable(feature = "simd_aarch64", since = "1.60.0")] + @BIND_FEATURE_NAME: "asimd"; "neon"; + @NO_RUNTIME_DETECTION: "ras"; + @NO_RUNTIME_DETECTION: "v8.1a"; + @NO_RUNTIME_DETECTION: "v8.2a"; + @NO_RUNTIME_DETECTION: "v8.3a"; + @NO_RUNTIME_DETECTION: "v8.4a"; + @NO_RUNTIME_DETECTION: "v8.5a"; + @NO_RUNTIME_DETECTION: "v8.6a"; + @NO_RUNTIME_DETECTION: "v8.7a"; + @NO_RUNTIME_DETECTION: "v8.8a"; + @NO_RUNTIME_DETECTION: "v8.9a"; + @NO_RUNTIME_DETECTION: "v9.1a"; + @NO_RUNTIME_DETECTION: "v9.2a"; + @NO_RUNTIME_DETECTION: "v9.3a"; + @NO_RUNTIME_DETECTION: "v9.4a"; + @NO_RUNTIME_DETECTION: "v9.5a"; + @NO_RUNTIME_DETECTION: "v9a"; + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon"; + /// FEAT_AdvSIMD (Advanced SIMD/NEON) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull"; + implied by target_features: ["aes"]; + /// FEAT_PMULL (Polynomial Multiply) - Implied by `aes` target_feature + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp: "fp"; + implied by target_features: ["neon"]; + /// FEAT_FP (Floating point support) - Implied by `neon` target_feature + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes"; + /// FEAT_AES (AES SIMD instructions) & FEAT_PMULL (PMULL{2}, 64-bit operand variants) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bf16: "bf16"; + /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti"; + /// FEAT_BTI (Branch Target Identification) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] crc: "crc"; + /// FEAT_CRC32 (Cyclic Redundancy Check) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc"; + /// FEAT_CSSC (Common Short Sequence Compression instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dit: "dit"; + /// FEAT_DIT (Data Independent Timing instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb"; + /// FEAT_DPB (aka dcpop - data cache clean to point of persistence) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2"; + /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod"; + /// FEAT_DotProd (Vector Dot-Product - ASIMDDP) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv"; + /// FEAT_ECV (Enhanced Counter Virtualization) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f32mm: "f32mm"; + /// FEAT_F32MM (single-precision matrix multiplication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f64mm: "f64mm"; + /// FEAT_F64MM (double-precision matrix multiplication) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax"; + /// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma"; + /// FEAT_FCMA (float complex number operations) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fhm: "fhm"; + /// FEAT_FHM (fp16 multiplication instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm"; + /// FEAT_FLAGM (flag manipulation instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] flagm2: "flagm2"; + /// FEAT_FLAGM2 (flag manipulation instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16"; + /// FEAT_FP16 (Half-float support) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8"; + /// FEAT_FP8 (F8CVT Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2"; + /// FEAT_FP8DOT2 (F8DP2 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4"; + /// FEAT_FP8DOT4 (F8DP4 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma"; + /// FEAT_FP8FMA (F8FMA Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr"; + without cfg check: true; + /// FEAT_FPMR (Special-purpose AArch64-FPMR register) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts"; + /// FEAT_FRINTTS (float to integer rounding instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc"; + /// FEAT_HBC (Hinted conditional branches) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm"; + /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] jsconv: "jsconv"; + /// FEAT_JSCVT (JavaScript float conversion instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse: "lse"; + /// FEAT_LSE (Large System Extension - atomics) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128"; + /// FEAT_LSE128 (128-bit atomics) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2"; + /// FEAT_LSE2 (unaligned and register-pair atomics) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut"; + /// FEAT_LUT (Lookup Table Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops"; + /// FEAT_MOPS (Standardization of memory operations) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte"; + /// FEAT_MTE & FEAT_MTE2 (Memory Tagging Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca"; + /// FEAT_PAuth (address authentication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg"; + /// FEAT_PAuth (generic authentication) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] pauth_lr: "pauth-lr"; + /// FEAT_PAuth_LR + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand"; + /// FEAT_RNG (Random Number Generator) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc"; + /// FEAT_LRCPC (Release consistent Processor consistent) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2"; + /// FEAT_LRCPC2 (RCPC with immediate offsets) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3"; + /// FEAT_LRCPC3 (RCPC Instructions v3) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm"; + /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb"; + /// FEAT_SB (speculation barrier) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha2: "sha2"; + /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha3: "sha3"; + /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4"; + /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme: "sme"; + /// FEAT_SME (Scalable Matrix Extension) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2: "sme2"; + /// FEAT_SME2 (SME Version 2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1"; + /// FEAT_SME2p1 (SME Version 2.1) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_b16b16: "sme-b16b16"; + /// FEAT_SME_B16B16 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16"; + /// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64"; + /// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f16: "sme-f8f16"; + /// FEAT_SME_F8F16 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f32: "sme-f8f32"; + /// FEAT_SME_F8F32 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64"; + /// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64"; + /// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2"; + /// FEAT_SME_LUTv2 (LUTI4 Instruction) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs"; + /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot2: "ssve-fp8dot2"; + /// FEAT_SSVE_FP8DOT2 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4"; + /// FEAT_SSVE_FP8DOT4 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma"; + /// FEAT_SSVE_FP8FMA + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve"; + /// FEAT_SVE (Scalable Vector Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2"; + /// FEAT_SVE2 (Scalable Vector Extension 2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1"; + /// FEAT_SVE2p1 (Scalable Vector Extension 2.1) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes"; + /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16"; + /// FEAT_SVE_B16B16 (SVE or SME Z-targeting instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm"; + /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3"; + /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4"; + /// FEAT_SVE_SM4 (SVE2 SM4 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme"; + /// FEAT_TME (Transactional Memory Extensions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt"; + /// FEAT_WFxT (WFET and WFIT Instructions) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs new file mode 100644 index 000000000000..c3c8883ce315 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs @@ -0,0 +1,29 @@ +//! Run-time feature detection on ARM Aarch32. + +features! { + @TARGET: arm; + @CFG: target_arch = "arm"; + @MACRO_NAME: is_arm_feature_detected; + @MACRO_ATTRS: + /// Checks if `arm` feature is enabled. + #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] + @NO_RUNTIME_DETECTION: "v7"; + @NO_RUNTIME_DETECTION: "vfp2"; + @NO_RUNTIME_DETECTION: "vfp3"; + @NO_RUNTIME_DETECTION: "vfp4"; + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] neon: "neon"; + /// ARM Advanced SIMD (NEON) - Aarch32 + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] pmull: "pmull"; + without cfg check: true; + /// Polynomial Multiply + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] crc: "crc"; + /// CRC32 (Cyclic Redundancy Check) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] aes: "aes"; + /// FEAT_AES (AES instructions) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] sha2: "sha2"; + /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] i8mm: "i8mm"; + /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] dotprod: "dotprod"; + /// FEAT_DotProd (Vector Dot-Product - ASIMDDP) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs new file mode 100644 index 000000000000..e9d68f6a9bf7 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs @@ -0,0 +1,51 @@ +//! Run-time feature detection on LoongArch. + +features! { + @TARGET: loongarch; + @CFG: target_arch = "loongarch64"; + @MACRO_NAME: is_loongarch_feature_detected; + @MACRO_ATTRS: + /// Checks if `loongarch` feature is enabled. + /// Supported arguments are: + /// + /// * `"f"` + /// * `"d"` + /// * `"frecipe"` + /// * `"div32"` + /// * `"lsx"` + /// * `"lasx"` + /// * `"lam-bh"` + /// * `"lamcas"` + /// * `"ld-seq-sa"` + /// * `"scq"` + /// * `"lbt"` + /// * `"lvz"` + /// * `"ual"` + #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] f: "f"; + /// F + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] d: "d"; + /// D + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] frecipe: "frecipe"; + /// Frecipe + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] div32: "div32"; + /// Div32 + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lsx: "lsx"; + /// LSX + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lasx: "lasx"; + /// LASX + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lam_bh: "lam-bh"; + /// LAM-BH + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lamcas: "lamcas"; + /// LAM-CAS + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ld_seq_sa: "ld-seq-sa"; + /// LD-SEQ-SA + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] scq: "scq"; + /// SCQ + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lbt: "lbt"; + /// LBT + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lvz: "lvz"; + /// LVZ + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ual: "ual"; + /// UAL +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs new file mode 100644 index 000000000000..e185fdfcaac6 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs @@ -0,0 +1,12 @@ +//! Run-time feature detection on MIPS. + +features! { + @TARGET: mips; + @CFG: target_arch = "mips"; + @MACRO_NAME: is_mips_feature_detected; + @MACRO_ATTRS: + /// Checks if `mips` feature is enabled. + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa"; + /// MIPS SIMD Architecture (MSA) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs new file mode 100644 index 000000000000..69fe4869d30e --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs @@ -0,0 +1,12 @@ +//! Run-time feature detection on MIPS64. + +features! { + @TARGET: mips64; + @CFG: target_arch = "mips64"; + @MACRO_NAME: is_mips64_feature_detected; + @MACRO_ATTRS: + /// Checks if `mips64` feature is enabled. + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa"; + /// MIPS SIMD Architecture (MSA) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs new file mode 100644 index 000000000000..d5a13acc0282 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs @@ -0,0 +1,75 @@ +#![allow(dead_code)] + +use cfg_if::cfg_if; + +// Export the macros for all supported architectures. +#[macro_use] +mod x86; +#[macro_use] +mod arm; +#[macro_use] +mod aarch64; +#[macro_use] +mod riscv; +#[macro_use] +mod powerpc; +#[macro_use] +mod powerpc64; +#[macro_use] +mod mips; +#[macro_use] +mod mips64; +#[macro_use] +mod loongarch; +#[macro_use] +mod s390x; + +cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + #[stable(feature = "simd_x86", since = "1.27.0")] + pub use x86::*; + } else if #[cfg(target_arch = "arm")] { + #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] + pub use arm::*; + } else if #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { + #[stable(feature = "simd_aarch64", since = "1.60.0")] + pub use aarch64::*; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] + pub use riscv::*; + } else if #[cfg(target_arch = "powerpc")] { + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + pub use powerpc::*; + } else if #[cfg(target_arch = "powerpc64")] { + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + pub use powerpc64::*; + } else if #[cfg(target_arch = "mips")] { + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + pub use mips::*; + } else if #[cfg(target_arch = "mips64")] { + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + pub use mips64::*; + } else if #[cfg(target_arch = "loongarch64")] { + #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] + pub use loongarch::*; + } else if #[cfg(target_arch = "s390x")] { + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + pub use s390x::*; + } else { + // Unimplemented architecture: + #[doc(hidden)] + pub(crate) enum Feature { + Null + } + #[doc(hidden)] + #[unstable(feature = "stdarch_internal", issue = "none")] + pub mod __is_feature_detected {} + + impl Feature { + #[doc(hidden)] + pub(crate) fn from_str(_s: &str) -> Result { Err(()) } + #[doc(hidden)] + pub(crate) fn to_str(self) -> &'static str { "" } + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs new file mode 100644 index 000000000000..c390993a48a6 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs @@ -0,0 +1,30 @@ +//! Run-time feature detection on PowerPC. + +features! { + @TARGET: powerpc; + @CFG: target_arch = "powerpc"; + @MACRO_NAME: is_powerpc_feature_detected; + @MACRO_ATTRS: + /// Checks if `powerpc` feature is enabled. + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec"; + /// Altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx"; + /// VSX + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8"; + without cfg check: true; + /// Power8 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_altivec: "power8-altivec"; + /// Power8 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_vector: "power8-vector"; + /// Power8 vector + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_crypto: "power8-crypto"; + /// Power8 crypto + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9: "power9"; + without cfg check: true; + /// Power9 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_altivec: "power9-altivec"; + /// Power9 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_vector: "power9-vector"; + /// Power9 vector +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs new file mode 100644 index 000000000000..cf05baa6f799 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs @@ -0,0 +1,30 @@ +//! Run-time feature detection on PowerPC64. + +features! { + @TARGET: powerpc64; + @CFG: target_arch = "powerpc64"; + @MACRO_NAME: is_powerpc64_feature_detected; + @MACRO_ATTRS: + /// Checks if `powerpc` feature is enabled. + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec"; + /// Altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx"; + /// VSX + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8"; + without cfg check: true; + /// Power8 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_altivec: "power8-altivec"; + /// Power8 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_vector: "power8-vector"; + /// Power8 vector + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_crypto: "power8-crypto"; + /// Power8 crypto + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9: "power9"; + without cfg check: true; + /// Power9 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_altivec: "power9-altivec"; + /// Power9 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_vector: "power9-vector"; + /// Power9 vector +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs new file mode 100644 index 000000000000..b86190d7bbf0 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs @@ -0,0 +1,344 @@ +//! Run-time feature detection on RISC-V. + +features! { + @TARGET: riscv; + @CFG: any(target_arch = "riscv32", target_arch = "riscv64"); + @MACRO_NAME: is_riscv_feature_detected; + @MACRO_ATTRS: + /// A macro to test at *runtime* whether instruction sets are available on + /// RISC-V platforms. + /// + /// RISC-V standard defined the base sets and the extension sets. + /// The base sets are RV32I, RV64I, RV32E or RV128I. Any RISC-V platform + /// must support one base set and/or multiple extension sets. + /// + /// Any RISC-V standard instruction sets can be in state of either ratified, + /// frozen or draft. The version and status of current standard instruction + /// sets can be checked out from preface section of the [ISA manual]. + /// + /// Platform may define and support their own custom instruction sets with + /// ISA prefix X. These sets are highly platform specific and should be + /// detected with their own platform support crates. + /// + /// [ISA manual]: https://riscv.org/specifications/ratified/ + /// + /// # Platform-specific/agnostic Behavior and Availability + /// + /// Runtime detection depends on the platform-specific feature detection + /// facility and its availability per feature is + /// highly platform/version-specific. + /// + /// Still, a best-effort attempt is performed to enable subset/dependent + /// features if a superset feature is enabled regardless of the platform. + /// For instance, if the A extension (`"a"`) is enabled, its subsets (the + /// Zalrsc and Zaamo extensions; `"zalrsc"` and `"zaamo"`) are also enabled. + /// Likewise, if the F extension (`"f"`) is enabled, one of its dependencies + /// (the Zicsr extension `"zicsr"`) is also enabled. + /// + /// # Unprivileged Specification + /// + /// The supported ratified RISC-V instruction sets are as follows: + /// + /// * RV32E: `"rv32e"` + /// * RV32I: `"rv32i"` + /// * RV64I: `"rv64i"` + /// * A: `"a"` + /// * Zaamo: `"zaamo"` + /// * Zalrsc: `"zalrsc"` + /// * B: `"b"` + /// * Zba: `"zba"` + /// * Zbb: `"zbb"` + /// * Zbs: `"zbs"` + /// * C: `"c"` + /// * Zca: `"zca"` + /// * Zcd: `"zcd"` (if D is enabled) + /// * Zcf: `"zcf"` (if F is enabled on RV32) + /// * D: `"d"` + /// * F: `"f"` + /// * M: `"m"` + /// * Q: `"q"` + /// * V: `"v"` + /// * Zve32x: `"zve32x"` + /// * Zve32f: `"zve32f"` + /// * Zve64x: `"zve64x"` + /// * Zve64f: `"zve64f"` + /// * Zve64d: `"zve64d"` + /// * Zicbom: `"zicbom"` + /// * Zicboz: `"zicboz"` + /// * Zicntr: `"zicntr"` + /// * Zicond: `"zicond"` + /// * Zicsr: `"zicsr"` + /// * Zifencei: `"zifencei"` + /// * Zihintntl: `"zihintntl"` + /// * Zihintpause: `"zihintpause"` + /// * Zihpm: `"zihpm"` + /// * Zimop: `"zimop"` + /// * Zacas: `"zacas"` + /// * Zawrs: `"zawrs"` + /// * Zfa: `"zfa"` + /// * Zfbfmin: `"zfbfmin"` + /// * Zfh: `"zfh"` + /// * Zfhmin: `"zfhmin"` + /// * Zfinx: `"zfinx"` + /// * Zdinx: `"zdinx"` + /// * Zhinx: `"zhinx"` + /// * Zhinxmin: `"zhinxmin"` + /// * Zcb: `"zcb"` + /// * Zcmop: `"zcmop"` + /// * Zbc: `"zbc"` + /// * Zbkb: `"zbkb"` + /// * Zbkc: `"zbkc"` + /// * Zbkx: `"zbkx"` + /// * Zk: `"zk"` + /// * Zkn: `"zkn"` + /// * Zknd: `"zknd"` + /// * Zkne: `"zkne"` + /// * Zknh: `"zknh"` + /// * Zkr: `"zkr"` + /// * Zks: `"zks"` + /// * Zksed: `"zksed"` + /// * Zksh: `"zksh"` + /// * Zkt: `"zkt"` + /// * Zvbb: `"zvbb"` + /// * Zvbc: `"zvbc"` + /// * Zvfbfmin: `"zvfbfmin"` + /// * Zvfbfwma: `"zvfbfwma"` + /// * Zvfh: `"zvfh"` + /// * Zvfhmin: `"zvfhmin"` + /// * Zvkb: `"zvkb"` + /// * Zvkg: `"zvkg"` + /// * Zvkn: `"zvkn"` + /// * Zvkned: `"zvkned"` + /// * Zvknha: `"zvknha"` + /// * Zvknhb: `"zvknhb"` + /// * Zvknc: `"zvknc"` + /// * Zvkng: `"zvkng"` + /// * Zvks: `"zvks"` + /// * Zvksed: `"zvksed"` + /// * Zvksh: `"zvksh"` + /// * Zvksc: `"zvksc"` + /// * Zvksg: `"zvksg"` + /// * Zvkt: `"zvkt"` + /// * Ztso: `"ztso"` + /// + /// There's also bases and extensions marked as standard instruction set, + /// but they are in frozen or draft state. These instruction sets are also + /// reserved by this macro and can be detected in the future platforms. + /// + /// Draft RISC-V instruction sets: + /// + /// * RV128I: `"rv128i"` + /// * J: `"j"` + /// * P: `"p"` + /// * Zam: `"zam"` + /// + /// # Performance Hints + /// + /// The two features below define performance hints for unaligned + /// scalar/vector memory accesses, respectively. If enabled, it denotes that + /// corresponding unaligned memory access is reasonably fast. + /// + /// * `"unaligned-scalar-mem"` + /// * Runtime detection requires Linux kernel version 6.4 or later. + /// * `"unaligned-vector-mem"` + /// * Runtime detection requires Linux kernel version 6.13 or later. + #[stable(feature = "riscv_ratified", since = "1.78.0")] + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i"; + without cfg check: true; + /// RV32I Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32e: "rv32e"; + without cfg check: true; + /// RV32E Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv64i: "rv64i"; + without cfg check: true; + /// RV64I Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv128i: "rv128i"; + without cfg check: true; + /// RV128I Base Integer Instruction Set + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] unaligned_scalar_mem: "unaligned-scalar-mem"; + /// Has reasonably performant unaligned scalar + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] unaligned_vector_mem: "unaligned-vector-mem"; + /// Has reasonably performant unaligned vector + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr"; + /// "Zicsr" Extension for Control and Status Register (CSR) Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicntr: "zicntr"; + /// "Zicntr" Extension for Base Counters and Timers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm"; + /// "Zihpm" Extension for Hardware Performance Counters + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei"; + /// "Zifencei" Extension for Instruction-Fetch Fence + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintntl: "zihintntl"; + /// "Zihintntl" Extension for Non-Temporal Locality Hints + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause"; + /// "Zihintpause" Extension for Pause Hint + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zimop: "zimop"; + /// "Zimop" Extension for May-Be-Operations + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicbom: "zicbom"; + /// "Zicbom" Extension for Cache-Block Management Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicboz: "zicboz"; + /// "Zicboz" Extension for Cache-Block Zero Instruction + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicond: "zicond"; + /// "Zicond" Extension for Integer Conditional Operations + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m"; + /// "M" Extension for Integer Multiplication and Division + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] a: "a"; + /// "A" Extension for Atomic Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zalrsc: "zalrsc"; + /// "Zalrsc" Extension for Load-Reserved/Store-Conditional Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zaamo: "zaamo"; + /// "Zaamo" Extension for Atomic Memory Operations + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zawrs: "zawrs"; + /// "Zawrs" Extension for Wait-on-Reservation-Set Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zacas: "zacas"; + /// "Zacas" Extension for Atomic Compare-and-Swap (CAS) Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam"; + without cfg check: true; + /// "Zam" Extension for Misaligned Atomics + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] ztso: "ztso"; + /// "Ztso" Extension for Total Store Ordering + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] f: "f"; + /// "F" Extension for Single-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] d: "d"; + /// "D" Extension for Double-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] q: "q"; + without cfg check: true; + /// "Q" Extension for Quad-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfh: "zfh"; + /// "Zfh" Extension for Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin"; + /// "Zfhmin" Extension for Minimal Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfa: "zfa"; + /// "Zfa" Extension for Additional Floating-Point Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfbfmin: "zfbfmin"; + /// "Zfbfmin" Extension for Scalar BF16 Converts + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx"; + /// "Zfinx" Extension for Single-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zdinx: "zdinx"; + /// "Zdinx" Extension for Double-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinx: "zhinx"; + /// "Zhinx" Extension for Half-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinxmin: "zhinxmin"; + /// "Zhinxmin" Extension for Minimal Half-Precision Floating-Point in Integer Registers + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c"; + /// "C" Extension for Compressed Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zca: "zca"; + /// "Zca" Compressed Instructions excluding Floating-Point Loads/Stores + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcf: "zcf"; + without cfg check: true; + /// "Zcf" Compressed Instructions for Single-Precision Floating-Point Loads/Stores on RV32 + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcd: "zcd"; + without cfg check: true; + /// "Zcd" Compressed Instructions for Double-Precision Floating-Point Loads/Stores + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcb: "zcb"; + /// "Zcb" Simple Code-size Saving Compressed Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcmop: "zcmop"; + /// "Zcmop" Extension for Compressed May-Be-Operations + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] b: "b"; + /// "B" Extension for Bit Manipulation + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zba: "zba"; + /// "Zba" Extension for Address Generation + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbb: "zbb"; + /// "Zbb" Extension for Basic Bit-Manipulation + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbc: "zbc"; + /// "Zbc" Extension for Carry-less Multiplication + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbs: "zbs"; + /// "Zbs" Extension for Single-Bit Instructions + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkb: "zbkb"; + /// "Zbkb" Extension for Bit-Manipulation for Cryptography + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkc: "zbkc"; + /// "Zbkc" Extension for Carry-less Multiplication for Cryptography + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkx: "zbkx"; + /// "Zbkx" Extension for Crossbar Permutations + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknd: "zknd"; + /// "Zknd" Cryptography Extension for NIST Suite: AES Decryption + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkne: "zkne"; + /// "Zkne" Cryptography Extension for NIST Suite: AES Encryption + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknh: "zknh"; + /// "Zknh" Cryptography Extension for NIST Suite: Hash Function Instructions + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksed: "zksed"; + /// "Zksed" Cryptography Extension for ShangMi Suite: SM4 Block Cipher Instructions + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksh: "zksh"; + /// "Zksh" Cryptography Extension for ShangMi Suite: SM3 Hash Function Instructions + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkr: "zkr"; + /// "Zkr" Entropy Source Extension + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkn: "zkn"; + /// "Zkn" Cryptography Extension for NIST Algorithm Suite + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zks: "zks"; + /// "Zks" Cryptography Extension for ShangMi Algorithm Suite + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zk: "zk"; + /// "Zk" Cryptography Extension for Standard Scalar Cryptography + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkt: "zkt"; + /// "Zkt" Cryptography Extension for Data Independent Execution Latency + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] v: "v"; + /// "V" Extension for Vector Operations + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve32x: "zve32x"; + /// "Zve32x" Vector Extension for Embedded Processors (32-bit+; Integer) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve32f: "zve32f"; + /// "Zve32f" Vector Extension for Embedded Processors (32-bit+; with Single-Precision Floating-Point) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64x: "zve64x"; + /// "Zve64x" Vector Extension for Embedded Processors (64-bit+; Integer) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64f: "zve64f"; + /// "Zve64f" Vector Extension for Embedded Processors (64-bit+; with Single-Precision Floating-Point) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64d: "zve64d"; + /// "Zve64d" Vector Extension for Embedded Processors (64-bit+; with Double-Precision Floating-Point) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfh: "zvfh"; + /// "Zvfh" Vector Extension for Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfhmin: "zvfhmin"; + /// "Zvfhmin" Vector Extension for Minimal Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfbfmin: "zvfbfmin"; + /// "Zvfbfmin" Vector Extension for BF16 Converts + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfbfwma: "zvfbfwma"; + /// "Zvfbfwma" Vector Extension for BF16 Widening Multiply-Add + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvbb: "zvbb"; + /// "Zvbb" Extension for Vector Basic Bit-Manipulation + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvbc: "zvbc"; + /// "Zvbc" Extension for Vector Carryless Multiplication + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkb: "zvkb"; + /// "Zvkb" Extension for Vector Cryptography Bit-Manipulation + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkg: "zvkg"; + /// "Zvkg" Cryptography Extension for Vector GCM/GMAC + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkned: "zvkned"; + /// "Zvkned" Cryptography Extension for NIST Suite: Vector AES Block Cipher + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknha: "zvknha"; + /// "Zvknha" Cryptography Extension for Vector SHA-2 Secure Hash (SHA-256) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknhb: "zvknhb"; + /// "Zvknhb" Cryptography Extension for Vector SHA-2 Secure Hash (SHA-256/512) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksed: "zvksed"; + /// "Zvksed" Cryptography Extension for ShangMi Suite: Vector SM4 Block Cipher + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksh: "zvksh"; + /// "Zvksh" Cryptography Extension for ShangMi Suite: Vector SM3 Secure Hash + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkn: "zvkn"; + /// "Zvkn" Cryptography Extension for NIST Algorithm Suite + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknc: "zvknc"; + /// "Zvknc" Cryptography Extension for NIST Algorithm Suite with Carryless Multiply + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkng: "zvkng"; + /// "Zvkng" Cryptography Extension for NIST Algorithm Suite with GCM + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvks: "zvks"; + /// "Zvks" Cryptography Extension for ShangMi Algorithm Suite + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksc: "zvksc"; + /// "Zvksc" Cryptography Extension for ShangMi Algorithm Suite with Carryless Multiply + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksg: "zvksg"; + /// "Zvksg" Cryptography Extension for ShangMi Algorithm Suite with GCM + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkt: "zvkt"; + /// "Zvkt" Extension for Vector Data-Independent Execution Latency + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j"; + without cfg check: true; + /// "J" Extension for Dynamically Translated Languages + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] p: "p"; + without cfg check: true; + /// "P" Extension for Packed-SIMD Instructions +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs new file mode 100644 index 000000000000..4c20d011680b --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs @@ -0,0 +1,81 @@ +//! Run-time feature detection on s390x. + +features! { + @TARGET: s390x; + @CFG: target_arch = "s390x"; + @MACRO_NAME: is_s390x_feature_detected; + @MACRO_ATTRS: + /// Checks if `s390x` feature is enabled. + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] concurrent_functions: "concurrent-functions"; + /// s390x concurrent-functions facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] deflate_conversion: "deflate-conversion"; + /// s390x deflate-conversion facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] enhanced_sort: "enhanced-sort"; + /// s390x enhanced-sort facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] guarded_storage: "guarded-storage"; + /// s390x guarded-storage facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] high_word: "high-word"; + /// s390x high-word facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension3: "message-security-assist-extension3"; + /// s390x message-security-assist-extension3 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension4: "message-security-assist-extension4"; + /// s390x message-security-assist-extension4 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension5: "message-security-assist-extension5"; + /// s390x message-security-assist-extension5 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension8: "message-security-assist-extension8"; + /// s390x message-security-assist-extension8 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension9: "message-security-assist-extension9"; + /// s390x message-security-assist-extension9 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension12: "message-security-assist-extension12"; + /// s390x message-security-assist-extension12 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_2: "miscellaneous-extensions-2"; + /// s390x miscellaneous-extensions-2 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_3: "miscellaneous-extensions-3"; + /// s390x miscellaneous-extensions-3 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_4: "miscellaneous-extensions-4"; + /// s390x miscellaneous-extensions-4 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] nnp_assist: "nnp-assist"; + /// s390x nnp-assist facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] transactional_execution: "transactional-execution"; + /// s390x transactional-execution facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector: "vector"; + /// s390x vector facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_1: "vector-enhancements-1"; + /// s390x vector-enhancements-1 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_2: "vector-enhancements-2"; + /// s390x vector-enhancements-2 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_3: "vector-enhancements-3"; + /// s390x vector-enhancements-3 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal: "vector-packed-decimal"; + /// s390x vector-packed-decimal facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement: "vector-packed-decimal-enhancement"; + /// s390x vector-packed-decimal-enhancement facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2"; + /// s390x vector-packed-decimal-enhancement-2 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_3: "vector-packed-decimal-enhancement-3"; + /// s390x vector-packed-decimal-enhancement-3 facility +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs new file mode 100644 index 000000000000..f23cfc334170 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -0,0 +1,278 @@ +//! This module implements minimal run-time feature detection for x86. +//! +//! The features are detected using the `detect_features` function below. +//! This function uses the CPUID instruction to read the feature flags from the +//! CPU and encodes them in a `usize` where each bit position represents +//! whether a feature is available (bit is set) or unavailable (bit is cleared). +//! +//! The enum `Feature` is used to map bit positions to feature names, and the +//! the `__crate::detect::check_for!` macro is used to map string literals (e.g., +//! "avx") to these bit positions (e.g., `Feature::avx`). +//! +//! The run-time feature detection is performed by the +//! `__crate::detect::check_for(Feature) -> bool` function. On its first call, +//! this functions queries the CPU for the available features and stores them +//! in a global `AtomicUsize` variable. The query is performed by just checking +//! whether the feature bit in this global variable is set or cleared. + +features! { + @TARGET: x86; + @CFG: any(target_arch = "x86", target_arch = "x86_64"); + @MACRO_NAME: is_x86_feature_detected; + @MACRO_ATTRS: + /// A macro to test at *runtime* whether a CPU feature is available on + /// x86/x86-64 platforms. + /// + /// This macro is provided in the standard library and will detect at runtime + /// whether the specified CPU feature is detected. This does **not** resolve at + /// compile time unless the specified feature is already enabled for the entire + /// crate. Runtime detection currently relies mostly on the `cpuid` instruction. + /// + /// This macro only takes one argument which is a string literal of the feature + /// being tested for. The feature names supported are the lowercase versions of + /// the ones defined by Intel in [their documentation][docs]. + /// + /// ## Supported arguments + /// + /// This macro supports the same names that `#[target_feature]` supports. Unlike + /// `#[target_feature]`, however, this macro does not support names separated + /// with a comma. Instead testing for multiple features must be done through + /// separate macro invocations for now. + /// + /// Supported arguments are: + /// + /// * `"aes"` + /// * `"pclmulqdq"` + /// * `"rdrand"` + /// * `"rdseed"` + /// * `"tsc"` + /// * `"mmx"` + /// * `"sse"` + /// * `"sse2"` + /// * `"sse3"` + /// * `"ssse3"` + /// * `"sse4.1"` + /// * `"sse4.2"` + /// * `"sse4a"` + /// * `"sha"` + /// * `"avx"` + /// * `"avx2"` + /// * `"sha512"` + /// * `"sm3"` + /// * `"sm4"` + /// * `"avx512f"` + /// * `"avx512cd"` + /// * `"avx512er"` + /// * `"avx512pf"` + /// * `"avx512bw"` + /// * `"avx512dq"` + /// * `"avx512vl"` + /// * `"avx512ifma"` + /// * `"avx512vbmi"` + /// * `"avx512vpopcntdq"` + /// * `"avx512vbmi2"` + /// * `"gfni"` + /// * `"vaes"` + /// * `"vpclmulqdq"` + /// * `"avx512vnni"` + /// * `"avx512bitalg"` + /// * `"avx512bf16"` + /// * `"avx512vp2intersect"` + /// * `"avx512fp16"` + /// * `"avxvnni"` + /// * `"avxifma"` + /// * `"avxneconvert"` + /// * `"avxvnniint8"` + /// * `"avxvnniint16"` + /// * `"amx-tile"` + /// * `"amx-int8"` + /// * `"amx-bf16"` + /// * `"amx-fp16"` + /// * `"amx-complex"` + /// * `"amx-avx512"` + /// * `"amx-fp8"` + /// * `"amx-movrs"` + /// * `"amx-tf32"` + /// * `"amx-transpose"` + /// * `"f16c"` + /// * `"fma"` + /// * `"bmi1"` + /// * `"bmi2"` + /// * `"abm"` + /// * `"lzcnt"` + /// * `"tbm"` + /// * `"popcnt"` + /// * `"fxsr"` + /// * `"xsave"` + /// * `"xsaveopt"` + /// * `"xsaves"` + /// * `"xsavec"` + /// * `"cmpxchg16b"` + /// * `"kl"` + /// * `"widekl"` + /// * `"adx"` + /// * `"rtm"` + /// * `"movbe"` + /// * `"ermsb"` + /// * `"movrs"` + /// * `"xop"` + /// + /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide + #[stable(feature = "simd_x86", since = "1.27.0")] + @BIND_FEATURE_NAME: "abm"; "lzcnt"; // abm is a synonym for lzcnt + @BIND_FEATURE_NAME: "avx512gfni"; "gfni"; #[deprecated(since = "1.67.0", note = "the `avx512gfni` feature has been renamed to `gfni`")]; + @BIND_FEATURE_NAME: "avx512vaes"; "vaes"; #[deprecated(since = "1.67.0", note = "the `avx512vaes` feature has been renamed to `vaes`")]; + @BIND_FEATURE_NAME: "avx512vpclmulqdq"; "vpclmulqdq"; #[deprecated(since = "1.67.0", note = "the `avx512vpclmulqdq` feature has been renamed to `vpclmulqdq`")]; + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] aes: "aes"; + /// AES (Advanced Encryption Standard New Instructions AES-NI) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] pclmulqdq: "pclmulqdq"; + /// CLMUL (Carry-less Multiplication) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdrand: "rdrand"; + /// RDRAND + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdseed: "rdseed"; + /// RDSEED + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tsc: "tsc"; + without cfg check: true; + /// TSC (Time Stamp Counter) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] mmx: "mmx"; + without cfg check: true; + /// MMX (MultiMedia eXtensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse: "sse"; + /// SSE (Streaming SIMD Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse2: "sse2"; + /// SSE2 (Streaming SIMD Extensions 2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse3: "sse3"; + /// SSE3 (Streaming SIMD Extensions 3) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ssse3: "ssse3"; + /// SSSE3 (Supplemental Streaming SIMD Extensions 3) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_1: "sse4.1"; + /// SSE4.1 (Streaming SIMD Extensions 4.1) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_2: "sse4.2"; + /// SSE4.2 (Streaming SIMD Extensions 4.2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4a: "sse4a"; + /// SSE4a (Streaming SIMD Extensions 4a) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sha: "sha"; + /// SHA + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx: "avx"; + /// AVX (Advanced Vector Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2"; + /// AVX2 (Advanced Vector Extensions 2) + @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sha512: "sha512"; + /// SHA512 + @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm3: "sm3"; + /// SM3 + @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm4: "sm4"; + /// SM4 + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ; + /// AVX-512 F (Foundation) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512cd: "avx512cd" ; + /// AVX-512 CD (Conflict Detection Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512er: "avx512er"; + without cfg check: true; + /// AVX-512 ER (Expo nential and Reciprocal Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512pf: "avx512pf"; + without cfg check: true; + /// AVX-512 PF (Prefetch Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bw: "avx512bw"; + /// AVX-512 BW (Byte and Word Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512dq: "avx512dq"; + /// AVX-512 DQ (Doubleword and Quadword) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vl: "avx512vl"; + /// AVX-512 VL (Vector Length Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512ifma: "avx512ifma"; + /// AVX-512 IFMA (Integer Fused Multiply Add) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi"; + /// AVX-512 VBMI (Vector Byte Manipulation Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq"; + /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2"; + /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni"; + /// AVX-512 GFNI (Galois Field New Instruction) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] vaes: "vaes"; + /// AVX-512 VAES (Vector AES instruction) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] vpclmulqdq: "vpclmulqdq"; + /// AVX-512 VPCLMULQDQ (Vector PCLMULQDQ instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vnni: "avx512vnni"; + /// AVX-512 VNNI (Vector Neural Network Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bitalg: "avx512bitalg"; + /// AVX-512 BITALG (Support for VPOPCNT\[B,W\] and VPSHUFBITQMB) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bf16: "avx512bf16"; + /// AVX-512 BF16 (BFLOAT16 instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vp2intersect: "avx512vp2intersect"; + /// AVX-512 P2INTERSECT + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16"; + /// AVX-512 FP16 (FLOAT16 instructions) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxifma: "avxifma"; + /// AVX-IFMA (Integer Fused Multiply Add) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxneconvert: "avxneconvert"; + /// AVX-NE-CONVERT (Exceptionless Convert) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnni: "avxvnni"; + /// AVX-VNNI (Vector Neural Network Instructions) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnniint16: "avxvnniint16"; + /// AVX-VNNI_INT8 (VNNI with 16-bit Integers) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnniint8: "avxvnniint8"; + /// AVX-VNNI_INT16 (VNNI with 8-bit integers) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tile: "amx-tile"; + /// AMX (Advanced Matrix Extensions) - Tile load/store + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_int8: "amx-int8"; + /// AMX-INT8 (Operations on 8-bit integers) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_bf16: "amx-bf16"; + /// AMX-BF16 (BFloat16 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp16: "amx-fp16"; + /// AMX-FP16 (Float16 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex"; + /// AMX-COMPLEX (Complex number Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512"; + /// AMX-AVX512 (AVX512 operations extended to matrices) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8"; + /// AMX-FP8 (Float8 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs"; + /// AMX-MOVRS (Matrix MOVERS operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32"; + /// AMX-TF32 (TensorFloat32 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose"; + /// AMX-TRANSPOSE (Matrix Transpose Operations) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c"; + /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma"; + /// FMA (Fused Multiply Add) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi1: "bmi1" ; + /// BMI1 (Bit Manipulation Instructions 1) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi2: "bmi2" ; + /// BMI2 (Bit Manipulation Instructions 2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] lzcnt: "lzcnt"; + /// ABM (Advanced Bit Manipulation) / LZCNT (Leading Zero Count) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tbm: "tbm"; + /// TBM (Trailing Bit Manipulation) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] popcnt: "popcnt"; + /// POPCNT (Population Count) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fxsr: "fxsr"; + /// FXSR (Floating-point context fast save and restore) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsave: "xsave"; + /// XSAVE (Save Processor Extended States) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaveopt: "xsaveopt"; + /// XSAVEOPT (Save Processor Extended States Optimized) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaves: "xsaves"; + /// XSAVES (Save Processor Extended States Supervisor) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsavec: "xsavec"; + /// XSAVEC (Save Processor Extended States Compacted) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] cmpxchg16b: "cmpxchg16b"; + /// CMPXCH16B (16-byte compare-and-swap instruction) + @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] kl: "kl"; + /// Intel Key Locker + @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] widekl: "widekl"; + /// Intel Key Locker Wide + @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx"; + /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rtm: "rtm"; + /// RTM, Intel (Restricted Transactional Memory) + @FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe"; + /// MOVBE (Move Data After Swapping Bytes) + @FEATURE: #[unstable(feature = "movrs_target_feature", issue = "137976")] movrs: "movrs"; + /// MOVRS (Move data with the read-shared hint) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb"; + /// ERMSB, Enhanced REP MOVSB and STOSB + @FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop"; + /// XOP: eXtended Operations (AMD) +} diff --git a/library/stdarch/crates/std_detect/src/detect/bit.rs b/library/stdarch/crates/std_detect/src/detect/bit.rs new file mode 100644 index 000000000000..6f06c5523e4f --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/bit.rs @@ -0,0 +1,9 @@ +//! Bit manipulation utilities. + +/// Tests the `bit` of `x`. +#[allow(dead_code)] +#[inline] +pub(crate) fn test(x: usize, bit: u32) -> bool { + debug_assert!(bit < usize::BITS, "bit index out-of-bounds"); + x & (1 << bit) != 0 +} diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs new file mode 100644 index 000000000000..83bcedea612e --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -0,0 +1,223 @@ +//! Caches run-time feature detection so that it only needs to be computed +//! once. + +#![allow(dead_code)] // not used on all platforms + +use core::sync::atomic::Ordering; + +use core::sync::atomic::AtomicUsize; + +/// Sets the `bit` of `x`. +#[inline] +const fn set_bit(x: u128, bit: u32) -> u128 { + x | 1 << bit +} + +/// Tests the `bit` of `x`. +#[inline] +const fn test_bit(x: u128, bit: u32) -> bool { + x & (1 << bit) != 0 +} + +/// Unset the `bit of `x`. +#[inline] +const fn unset_bit(x: u128, bit: u32) -> u128 { + x & !(1 << bit) +} + +/// Maximum number of features that can be cached. +const CACHE_CAPACITY: u32 = 93; + +/// This type is used to initialize the cache +// The derived `Default` implementation will initialize the field to zero, +// which is what we want. +#[derive(Copy, Clone, Default, PartialEq, Eq)] +pub(crate) struct Initializer(u128); + +// NOTE: the `debug_assert!` would catch that we do not add more Features than +// the one fitting our cache. +impl Initializer { + /// Tests the `bit` of the cache. + #[inline] + pub(crate) fn test(self, bit: u32) -> bool { + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + test_bit(self.0, bit) + } + + /// Sets the `bit` of the cache. + #[inline] + pub(crate) fn set(&mut self, bit: u32) { + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + let v = self.0; + self.0 = set_bit(v, bit); + } + + /// Unsets the `bit` of the cache. + #[inline] + pub(crate) fn unset(&mut self, bit: u32) { + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + let v = self.0; + self.0 = unset_bit(v, bit); + } +} + +/// This global variable is a cache of the features supported by the CPU. +// Note: the third slot is only used in x86 +// Another Slot can be added if needed without any change to `Initializer` +static CACHE: [Cache; 3] = [ + Cache::uninitialized(), + Cache::uninitialized(), + Cache::uninitialized(), +]; + +/// Feature cache with capacity for `size_of::() * 8 - 1` features. +/// +/// Note: 0 is used to represent an uninitialized cache, and (at least) the most +/// significant bit is set on any cache which has been initialized. +/// +/// Note: we use `Relaxed` atomic operations, because we are only interested in +/// the effects of operations on a single memory location. That is, we only need +/// "modification order", and not the full-blown "happens before". +struct Cache(AtomicUsize); + +impl Cache { + const CAPACITY: u32 = (core::mem::size_of::() * 8 - 1) as u32; + const MASK: usize = (1 << Cache::CAPACITY) - 1; + const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY; + + /// Creates an uninitialized cache. + #[allow(clippy::declare_interior_mutable_const)] + const fn uninitialized() -> Self { + Cache(AtomicUsize::new(0)) + } + + /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized. + #[inline] + pub(crate) fn test(&self, bit: u32) -> Option { + let cached = self.0.load(Ordering::Relaxed); + if cached == 0 { + None + } else { + Some(test_bit(cached as u128, bit)) + } + } + + /// Initializes the cache. + #[inline] + fn initialize(&self, value: usize) -> usize { + debug_assert_eq!((value & !Cache::MASK), 0); + self.0 + .store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed); + value + } +} + +cfg_if::cfg_if! { + if #[cfg(feature = "std_detect_env_override")] { + #[inline] + fn disable_features(disable: &[u8], value: &mut Initializer) { + if let Ok(disable) = core::str::from_utf8(disable) { + for v in disable.split(" ") { + let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32)); + } + } + } + + #[inline] + fn initialize(mut value: Initializer) -> Initializer { + use core::ffi::CStr; + const RUST_STD_DETECT_UNSTABLE: &CStr = c"RUST_STD_DETECT_UNSTABLE"; + cfg_if::cfg_if! { + if #[cfg(windows)] { + use alloc::vec; + #[link(name = "kernel32")] + unsafe extern "system" { + fn GetEnvironmentVariableA(name: *const u8, buffer: *mut u8, size: u32) -> u32; + } + let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::(), core::ptr::null_mut(), 0) }; + if len > 0 { + // +1 to include the null terminator. + let mut env = vec![0; len as usize + 1]; + let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::(), env.as_mut_ptr(), len + 1) }; + if len > 0 { + disable_features(&env[..len as usize], &mut value); + } + } + } else { + let env = unsafe { + libc::getenv(RUST_STD_DETECT_UNSTABLE.as_ptr()) + }; + if !env.is_null() { + let len = unsafe { libc::strlen(env) }; + let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) }; + disable_features(env, &mut value); + } + } + } + do_initialize(value); + value + } + } else { + #[inline] + fn initialize(value: Initializer) -> Initializer { + do_initialize(value); + value + } + } +} + +#[inline] +fn do_initialize(value: Initializer) { + CACHE[0].initialize((value.0) as usize & Cache::MASK); + CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK); + CACHE[2].initialize((value.0 >> (2 * Cache::CAPACITY)) as usize & Cache::MASK); +} + +// We only have to detect features once, and it's fairly costly, so hint to LLVM +// that it should assume that cache hits are more common than misses (which is +// the point of caching). It's possibly unfortunate that this function needs to +// reach across modules like this to call `os::detect_features`, but it produces +// the best code out of several attempted variants. +// +// The `Initializer` that the cache was initialized with is returned, so that +// the caller can call `test()` on it without having to load the value from the +// cache again. +#[cold] +fn detect_and_initialize() -> Initializer { + initialize(super::os::detect_features()) +} + +/// Tests the `bit` of the storage. If the storage has not been initialized, +/// initializes it with the result of `os::detect_features()`. +/// +/// On its first invocation, it detects the CPU features and caches them in the +/// `CACHE` global variable as an `AtomicU64`. +/// +/// It uses the `Feature` variant to index into this variable as a bitset. If +/// the bit is set, the feature is enabled, and otherwise it is disabled. +/// +/// If the feature `std_detect_env_override` is enabled looks for the env +/// variable `RUST_STD_DETECT_UNSTABLE` and uses its content to disable +/// Features that would had been otherwise detected. +#[inline] +pub(crate) fn test(bit: u32) -> bool { + let (relative_bit, idx) = if bit < Cache::CAPACITY { + (bit, 0) + } else if bit < 2 * Cache::CAPACITY { + (bit - Cache::CAPACITY, 1) + } else { + (bit - 2 * Cache::CAPACITY, 2) + }; + CACHE[idx] + .test(relative_bit) + .unwrap_or_else(|| detect_and_initialize().test(bit)) +} diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs new file mode 100644 index 000000000000..a2994fb7daa7 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/macros.rs @@ -0,0 +1,204 @@ +#[macro_export] +#[allow_internal_unstable(stdarch_internal)] +#[unstable(feature = "stdarch_internal", issue = "none")] +macro_rules! detect_feature { + ($feature:tt, $feature_lit:tt) => { + $crate::detect_feature!($feature, $feature_lit : $feature_lit) + }; + ($feature:tt, $feature_lit:tt : $($target_feature_lit:tt),*) => { + $(cfg!(target_feature = $target_feature_lit) ||)* + $crate::detect::__is_feature_detected::$feature() + }; + ($feature:tt, $feature_lit:tt, without cfg check: true) => { + $crate::detect::__is_feature_detected::$feature() + }; +} + +#[allow(unused_macros, reason = "it's used in the features! macro below")] +macro_rules! check_cfg_feature { + ($feature:tt, $feature_lit:tt) => { + check_cfg_feature!($feature, $feature_lit : $feature_lit) + }; + ($feature:tt, $feature_lit:tt : $($target_feature_lit:tt),*) => { + $(cfg!(target_feature = $target_feature_lit);)* + }; + ($feature:tt, $feature_lit:tt, without cfg check: $feature_cfg_check:literal) => { + #[allow(unexpected_cfgs, reason = $feature_lit)] + { cfg!(target_feature = $feature_lit) } + }; +} + +#[allow(unused)] +macro_rules! features { + ( + @TARGET: $target:ident; + @CFG: $cfg:meta; + @MACRO_NAME: $macro_name:ident; + @MACRO_ATTRS: $(#[$macro_attrs:meta])* + $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; $(#[$deprecate_attr:meta];)?)* + $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )* + $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt; + $(without cfg check: $feature_cfg_check:tt;)? + $(implied by target_features: [$($target_feature_lit:tt),*];)? + $(#[$feature_comment:meta])*)* + ) => { + #[macro_export] + $(#[$macro_attrs])* + #[allow_internal_unstable(stdarch_internal)] + #[cfg($cfg)] + #[doc(cfg($cfg))] + macro_rules! $macro_name { + $( + ($feature_lit) => { + $crate::detect_feature!($feature, $feature_lit $(, without cfg check: $feature_cfg_check)? $(: $($target_feature_lit),*)?) + }; + )* + $( + ($bind_feature) => { + { + $( + #[$deprecate_attr] macro_rules! deprecated_feature { {} => {}; } + deprecated_feature! {}; + )? + $crate::$macro_name!($feature_impl) + } + }; + )* + $( + ($nort_feature) => { + compile_error!( + concat!( + stringify!($nort_feature), + " feature cannot be detected at run-time" + ) + ) + }; + )* + ($t:tt,) => { + $crate::$macro_name!($t); + }; + ($t:tt) => { + compile_error!( + concat!( + concat!("unknown ", stringify!($target)), + concat!(" target feature: ", $t) + ) + ) + }; + } + + $(#[$macro_attrs])* + #[macro_export] + #[cfg(not($cfg))] + #[doc(cfg($cfg))] + macro_rules! $macro_name { + $( + ($feature_lit) => { + compile_error!( + concat!( + r#"This macro cannot be used on the current target. + You can prevent it from being used in other architectures by + guarding it behind a cfg("#, + stringify!($cfg), + ")." + ) + ) + }; + )* + $( + ($bind_feature) => { $crate::$macro_name!($feature_impl) }; + )* + $( + ($nort_feature) => { + compile_error!( + concat!( + stringify!($nort_feature), + " feature cannot be detected at run-time" + ) + ) + }; + )* + ($t:tt,) => { + $crate::$macro_name!($t); + }; + ($t:tt) => { + compile_error!( + concat!( + concat!("unknown ", stringify!($target)), + concat!(" target feature: ", $t) + ) + ) + }; + } + + #[test] + #[deny(unexpected_cfgs)] + #[deny(unfulfilled_lint_expectations)] + fn unexpected_cfgs() { + $( + check_cfg_feature!($feature, $feature_lit $(, without cfg check: $feature_cfg_check)? $(: $($target_feature_lit),*)?); + )* + } + + /// Each variant denotes a position in a bitset for a particular feature. + /// + /// PLEASE: do not use this, it is an implementation detail subject + /// to change. + #[doc(hidden)] + #[allow(non_camel_case_types)] + #[derive(Copy, Clone)] + #[repr(u8)] + #[unstable(feature = "stdarch_internal", issue = "none")] + #[cfg($cfg)] + pub(crate) enum Feature { + $( + $(#[$feature_comment])* + $feature, + )* + + // Do not add variants after last: + _last + } + + #[cfg($cfg)] + impl Feature { + pub(crate) fn to_str(self) -> &'static str { + match self { + $(Feature::$feature => $feature_lit,)* + Feature::_last => unreachable!(), + } + } + + #[cfg(feature = "std_detect_env_override")] + pub(crate) fn from_str(s: &str) -> Result { + match s { + $($feature_lit => Ok(Feature::$feature),)* + _ => Err(()) + } + } + } + + /// Each function performs run-time feature detection for a single + /// feature. This allow us to use stability attributes on a per feature + /// basis. + /// + /// PLEASE: do not use this, it is an implementation detail subject + /// to change. + #[doc(hidden)] + #[cfg($cfg)] + #[unstable(feature = "stdarch_internal", issue = "none")] + pub mod __is_feature_detected { + $( + + /// PLEASE: do not use this, it is an implementation detail + /// subject to change. + #[inline] + #[doc(hidden)] + #[$stability_attr] + pub fn $feature() -> bool { + $crate::detect::check_for($crate::detect::Feature::$feature) + } + )* + } + }; +} diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs new file mode 100644 index 000000000000..8fd3d9579328 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/mod.rs @@ -0,0 +1,120 @@ +//! This module implements run-time feature detection. +//! +//! The `is_{arch}_feature_detected!("feature-name")` macros take the name of a +//! feature as a string-literal, and return a boolean indicating whether the +//! feature is enabled at run-time or not. +//! +//! These macros do two things: +//! * map the string-literal into an integer stored as a `Feature` enum, +//! * call a `os::check_for(x: Feature)` function that returns `true` if the +//! feature is enabled. +//! +//! The `Feature` enums are also implemented in the `arch/{target_arch}.rs` +//! modules. +//! +//! The `check_for` functions are, in general, Operating System dependent. Most +//! architectures do not allow user-space programs to query the feature bits +//! due to security concerns (x86 is the big exception). These functions are +//! implemented in the `os/{target_os}.rs` modules. + +use cfg_if::cfg_if; + +#[macro_use] +mod macros; + +mod arch; + +// This module needs to be public because the `is_{arch}_feature_detected!` +// macros expand calls to items within it in user crates. +#[doc(hidden)] +#[unstable(feature = "stdarch_internal", issue = "none")] +pub use self::arch::__is_feature_detected; + +pub(crate) use self::arch::Feature; + +mod bit; +mod cache; + +cfg_if! { + if #[cfg(miri)] { + // When running under miri all target-features that are not enabled at + // compile-time are reported as disabled at run-time. + // + // For features for which `cfg(target_feature)` returns true, + // this run-time detection logic is never called. + #[path = "os/other.rs"] + mod os; + } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + // On x86/x86_64 no OS specific functionality is required. + #[path = "os/x86.rs"] + mod os; + } else if #[cfg(all(any(target_os = "linux", target_os = "android"), feature = "libc"))] { + #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] + #[path = "os/riscv.rs"] + mod riscv; + #[path = "os/linux/mod.rs"] + mod os; + } else if #[cfg(all(target_os = "freebsd", feature = "libc"))] { + #[cfg(target_arch = "aarch64")] + #[path = "os/aarch64.rs"] + mod aarch64; + #[path = "os/freebsd/mod.rs"] + mod os; + } else if #[cfg(all(target_os = "openbsd", target_arch = "aarch64", feature = "libc"))] { + #[allow(dead_code)] // we don't use code that calls the mrs instruction. + #[path = "os/aarch64.rs"] + mod aarch64; + #[path = "os/openbsd/aarch64.rs"] + mod os; + } else if #[cfg(all(target_os = "windows", any(target_arch = "aarch64", target_arch = "arm64ec")))] { + #[path = "os/windows/aarch64.rs"] + mod os; + } else if #[cfg(all(target_vendor = "apple", target_arch = "aarch64", feature = "libc"))] { + #[path = "os/darwin/aarch64.rs"] + mod os; + } else { + #[path = "os/other.rs"] + mod os; + } +} + +/// Performs run-time feature detection. +#[inline] +#[allow(dead_code)] +fn check_for(x: Feature) -> bool { + cache::test(x as u32) +} + +/// Returns an `Iterator` where +/// `Item.0` is the feature name, and `Item.1` is a `bool` which +/// is `true` if the feature is supported by the host and `false` otherwise. +#[unstable(feature = "stdarch_internal", issue = "none")] +pub fn features() -> impl Iterator { + cfg_if! { + if #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch64", + target_arch = "s390x", + ))] { + (0_u8..Feature::_last as u8).map(|discriminant: u8| { + #[allow(bindings_with_variant_name)] // RISC-V has Feature::f + let f: Feature = unsafe { core::mem::transmute(discriminant) }; + let name: &'static str = f.to_str(); + let enabled: bool = check_for(f); + (name, enabled) + }) + } else { + None.into_iter() + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs new file mode 100644 index 000000000000..1ff2a17e6e1e --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs @@ -0,0 +1,130 @@ +//! Run-time feature detection for Aarch64 on any OS that emulates the mrs instruction. +//! +//! On FreeBSD >= 12.0, Linux >= 4.11 and other operating systems, it is possible to use +//! privileged system registers from userspace to check CPU feature support. +//! +//! AArch64 system registers ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1, ID_AA64ISAR1_EL1 +//! have bits dedicated to features like AdvSIMD, CRC32, AES, atomics (LSE), etc. +//! Each part of the register indicates the level of support for a certain feature, e.g. +//! when ID_AA64ISAR0_EL1\[7:4\] is >= 1, AES is supported; when it's >= 2, PMULL is supported. +//! +//! For proper support of [SoCs where different cores have different capabilities](https://medium.com/@jadr2ddude/a-big-little-problem-a-tale-of-big-little-gone-wrong-e7778ce744bb), +//! the OS has to always report only the features supported by all cores, like [FreeBSD does](https://reviews.freebsd.org/D17137#393947). +//! +//! References: +//! +//! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp) +//! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt) +//! - [ARM documentation](https://developer.arm.com/documentation/ddi0601/2022-12/AArch64-Registers?lang=en) + +use crate::detect::{Feature, cache}; +use core::arch::asm; + +/// Try to read the features from the system registers. +/// +/// This will cause SIGILL if the current OS is not trapping the mrs instruction. +pub(crate) fn detect_features() -> cache::Initializer { + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + let aa64isar0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR0_EL1", + out(reg) aa64isar0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + let aa64isar1: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR1_EL1", + out(reg) aa64isar1, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + let aa64mmfr2: u64; + unsafe { + asm!( + "mrs {}, ID_AA64MMFR2_EL1", + out(reg) aa64mmfr2, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + let aa64pfr0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64PFR0_EL1", + out(reg) aa64pfr0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, Some(aa64pfr0)) +} + +pub(crate) fn parse_system_registers( + aa64isar0: u64, + aa64isar1: u64, + aa64mmfr2: u64, + aa64pfr0: Option, +) -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2); + enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1); + enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 2); + enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1); + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + if let Some(aa64pfr0) = aa64pfr0 { + let fp = bits_shift(aa64pfr0, 19, 16) < 0xF; + let fphp = bits_shift(aa64pfr0, 19, 16) >= 1; + let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF; + let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1; + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fphp); + // SIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp)); + // SIMD extensions require SIMD support: + enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 2); + let sha1 = bits_shift(aa64isar0, 11, 8) >= 1; + let sha2 = bits_shift(aa64isar0, 15, 12) >= 1; + enable_feature(Feature::sha2, asimd && sha1 && sha2); + enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1); + enable_feature( + Feature::dotprod, + asimd && bits_shift(aa64isar0, 47, 44) >= 1, + ); + enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1); + } + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + // Check for either APA or API field + enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1); + enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1); + // Check for either GPA or GPI field + enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1); + + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + enable_feature(Feature::lse2, bits_shift(aa64mmfr2, 35, 32) >= 1); + + value +} + +#[inline] +fn bits_shift(x: u64, high: usize, low: usize) -> u64 { + (x >> low) & ((1 << (high - low + 1)) - 1) +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs new file mode 100644 index 000000000000..44d921689e5a --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs @@ -0,0 +1,155 @@ +//! Run-time feature detection for aarch64 on Darwin (macOS/iOS/tvOS/watchOS/visionOS). +//! +//! + +use crate::detect::{Feature, cache}; +use core::ffi::CStr; + +#[inline] +fn _sysctlbyname(name: &CStr) -> bool { + use libc; + + let mut enabled: i32 = 0; + let mut enabled_len: usize = 4; + let enabled_ptr = &mut enabled as *mut i32 as *mut libc::c_void; + + let ret = unsafe { + libc::sysctlbyname( + name.as_ptr(), + enabled_ptr, + &mut enabled_len, + core::ptr::null_mut(), + 0, + ) + }; + + match ret { + 0 => enabled != 0, + _ => false, + } +} + +/// Try to read the features using sysctlbyname. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Armv8.0 features not using the standard identifiers + let fp = _sysctlbyname(c"hw.optional.floatingpoint"); + let asimd = _sysctlbyname(c"hw.optional.AdvSIMD"); + let crc = _sysctlbyname(c"hw.optional.armv8_crc32"); + + // Armv8 and Armv9 features using the standard identifiers + let aes = _sysctlbyname(c"hw.optional.arm.FEAT_AES"); + let bf16 = _sysctlbyname(c"hw.optional.arm.FEAT_BF16"); + let bti = _sysctlbyname(c"hw.optional.arm.FEAT_BTI"); + let cssc = _sysctlbyname(c"hw.optional.arm.FEAT_CSSC"); + let dit = _sysctlbyname(c"hw.optional.arm.FEAT_DIT"); + let dpb = _sysctlbyname(c"hw.optional.arm.FEAT_DPB"); + let dpb2 = _sysctlbyname(c"hw.optional.arm.FEAT_DPB2"); + let dotprod = _sysctlbyname(c"hw.optional.arm.FEAT_DotProd"); + let ecv = _sysctlbyname(c"hw.optional.arm.FEAT_ECV"); + let fcma = _sysctlbyname(c"hw.optional.arm.FEAT_FCMA"); + let fhm = _sysctlbyname(c"hw.optional.arm.FEAT_FHM"); + let fp16 = _sysctlbyname(c"hw.optional.arm.FEAT_FP16"); + let frintts = _sysctlbyname(c"hw.optional.arm.FEAT_FRINTTS"); + let flagm = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM"); + let flagm2 = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM2"); + let hbc = _sysctlbyname(c"hw.optional.arm.FEAT_HBC"); + let i8mm = _sysctlbyname(c"hw.optional.arm.FEAT_I8MM"); + let jsconv = _sysctlbyname(c"hw.optional.arm.FEAT_JSCVT"); + let rcpc = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC"); + let rcpc2 = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC2"); + let lse = _sysctlbyname(c"hw.optional.arm.FEAT_LSE"); + let lse2 = _sysctlbyname(c"hw.optional.arm.FEAT_LSE2"); + let pauth = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth"); + let pmull = _sysctlbyname(c"hw.optional.arm.FEAT_PMULL"); + let rdm = _sysctlbyname(c"hw.optional.arm.FEAT_RDM"); + let sb = _sysctlbyname(c"hw.optional.arm.FEAT_SB"); + let sha1 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA1"); + let sha256 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA256"); + let sha3 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA3"); + let sha512 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA512"); + let sme = _sysctlbyname(c"hw.optional.arm.FEAT_SME"); + let sme2 = _sysctlbyname(c"hw.optional.arm.FEAT_SME2"); + let sme_f64f64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_F64F64"); + let sme_i16i64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_I16I64"); + let ssbs = _sysctlbyname(c"hw.optional.arm.FEAT_SSBS"); + let wfxt = _sysctlbyname(c"hw.optional.arm.FEAT_WFxT"); + + // The following features are not exposed by `is_aarch64_feature_detected`, + // but *are* reported by `sysctl`. They are here as documentation that they + // exist, and may potentially be exposed later. + /* + let afp = _sysctlbyname(c"hw.optional.arm.FEAT_AFP"); + let csv2 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV2"); + let csv3 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV3"); + let ebf16 = _sysctlbyname(c"hw.optional.arm.FEAT_EBF16"); + let fpac = _sysctlbyname(c"hw.optional.arm.FEAT_FPAC"); + let fpaccombine = _sysctlbyname(c"hw.optional.arm.FEAT_FPACCOMBINE"); + let pacimp = _sysctlbyname(c"hw.optional.arm.FEAT_PACIMP"); + let pauth2 = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth2"); + let rpres = _sysctlbyname(c"hw.optional.arm.FEAT_RPRES"); + let specres = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES"); + let specres2 = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES2"); + */ + + // The following "features" are reported by `sysctl` but are mandatory parts + // of SME or SME2, and so are not exposed separately by + // `is_aarch64_feature_detected`. They are here to document their + // existence, in case they're needed in the future. + /* + let sme_b16f32 = _sysctlbyname(c"hw.optional.arm.SME_B16F32"); + let sme_bi32i32 = _sysctlbyname(c"hw.optional.arm.SME_BI32I32"); + let sme_f16f32 = _sysctlbyname(c"hw.optional.arm.SME_F16F32"); + let sme_f32f32 = _sysctlbyname(c"hw.optional.arm.SME_F32F32"); + let sme_i16i32 = _sysctlbyname(c"hw.optional.arm.SME_I16I32"); + let sme_i8i32 = _sysctlbyname(c"hw.optional.arm.SME_I8I32"); + */ + + enable_feature(Feature::aes, aes && pmull); + enable_feature(Feature::asimd, asimd); + enable_feature(Feature::bf16, bf16); + enable_feature(Feature::bti, bti); + enable_feature(Feature::crc, crc); + enable_feature(Feature::cssc, cssc); + enable_feature(Feature::dit, dit); + enable_feature(Feature::dotprod, dotprod); + enable_feature(Feature::dpb, dpb); + enable_feature(Feature::dpb2, dpb2); + enable_feature(Feature::ecv, ecv); + enable_feature(Feature::fcma, fcma); + enable_feature(Feature::fhm, fhm); + enable_feature(Feature::flagm, flagm); + enable_feature(Feature::flagm2, flagm2); + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fp16); + enable_feature(Feature::frintts, frintts); + enable_feature(Feature::hbc, hbc); + enable_feature(Feature::i8mm, i8mm); + enable_feature(Feature::jsconv, jsconv); + enable_feature(Feature::lse, lse); + enable_feature(Feature::lse2, lse2); + enable_feature(Feature::paca, pauth); + enable_feature(Feature::pacg, pauth); + enable_feature(Feature::pmull, aes && pmull); + enable_feature(Feature::rcpc, rcpc); + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rdm, rdm); + enable_feature(Feature::sb, sb); + enable_feature(Feature::sha2, sha1 && sha256 && asimd); + enable_feature(Feature::sha3, sha512 && sha3 && asimd); + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme_f64f64, sme_f64f64); + enable_feature(Feature::sme_i16i64, sme_i16i64); + enable_feature(Feature::ssbs, ssbs); + enable_feature(Feature::wfxt, wfxt); + + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs new file mode 100644 index 000000000000..ccc48f536054 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs @@ -0,0 +1,3 @@ +//! Run-time feature detection for Aarch64 on FreeBSD. + +pub(crate) use super::super::aarch64::detect_features; diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs new file mode 100644 index 000000000000..0a15156e1bd8 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs @@ -0,0 +1,36 @@ +//! Run-time feature detection for ARM on FreeBSD + +use super::auxvec; +use crate::detect::{Feature, cache}; + +// Defined in machine/elf.h. +// https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm/include/elf.h +const HWCAP_NEON: usize = 0x00001000; +const HWCAP2_AES: usize = 0x00000001; +const HWCAP2_PMULL: usize = 0x00000002; +const HWCAP2_SHA1: usize = 0x00000004; +const HWCAP2_SHA2: usize = 0x00000008; +const HWCAP2_CRC32: usize = 0x00000010; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::neon, auxv.hwcap & HWCAP_NEON != 0); + enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & HWCAP2_PMULL != 0); + enable_feature(&mut value, Feature::crc, auxv.hwcap2 & HWCAP2_CRC32 != 0); + enable_feature(&mut value, Feature::aes, auxv.hwcap2 & HWCAP2_AES != 0); + // SHA2 requires SHA1 & SHA2 features + let sha1 = auxv.hwcap2 & HWCAP2_SHA1 != 0; + let sha2 = auxv.hwcap2 & HWCAP2_SHA2 != 0; + enable_feature(&mut value, Feature::sha2, sha1 && sha2); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs new file mode 100644 index 000000000000..4e72bf22d76c --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs @@ -0,0 +1,66 @@ +//! Parses ELF auxiliary vectors. +#![cfg_attr( + any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "riscv64" + ), + allow(dead_code) +)] + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +pub(crate) struct AuxVec { + pub hwcap: usize, + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For FreeBSD, they are +/// defined in [sys/elf_common.h][elf_common_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// [elf_common.h]: https://svnweb.freebsd.org/base/release/12.0.0/sys/sys/elf_common.h?revision=341707 +pub(crate) fn auxv() -> Result { + let hwcap = archauxv(libc::AT_HWCAP); + let hwcap2 = archauxv(libc::AT_HWCAP2); + // Zero could indicate that no features were detected, but it's also used to + // indicate an error. In particular, on many platforms AT_HWCAP2 will be + // legitimately zero, since it contains the most recent feature flags. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + Err(()) +} + +/// Tries to read the `key` from the auxiliary vector. +fn archauxv(key: libc::c_int) -> usize { + const OUT_LEN: libc::c_int = core::mem::size_of::() as libc::c_int; + let mut out: libc::c_ulong = 0; + unsafe { + // elf_aux_info is available on FreeBSD 12.0+ and 11.4+: + // https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 + // https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h + // FreeBSD 11 support in std has been removed in Rust 1.75 (https://github.com/rust-lang/rust/pull/114521), + // so we can safely use this function. + let res = libc::elf_aux_info( + key, + &mut out as *mut libc::c_ulong as *mut libc::c_void, + OUT_LEN, + ); + // If elf_aux_info fails, `out` will be left at zero (which is the proper default value). + debug_assert!(res == 0 || out == 0); + } + out as usize +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs new file mode 100644 index 000000000000..ade7fb6269d1 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs @@ -0,0 +1,22 @@ +//! Run-time feature detection on FreeBSD + +mod auxvec; + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(target_arch = "powerpc64")] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs new file mode 100644 index 000000000000..d03af68cd081 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs @@ -0,0 +1,21 @@ +//! Run-time feature detection for PowerPC on FreeBSD. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs new file mode 100644 index 000000000000..22a9cefff7b8 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs @@ -0,0 +1,484 @@ +//! Run-time feature detection for Aarch64 on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + #[cfg(target_os = "android")] + let is_exynos9810 = { + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // https://reviews.llvm.org/D114523 + let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize]; + let len = unsafe { + libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char) + }; + // On Exynos, ro.arch is not available on Android 12+, but it is fine + // because Android 9+ includes the fix. + len > 0 && arch.starts_with(b"exynos9810") + }; + #[cfg(not(target_os = "android"))] + let is_exynos9810 = false; + + if let Ok(auxv) = auxvec::auxv() { + let hwcap: AtHwcap = auxv.into(); + return hwcap.cache(is_exynos9810); + } + cache::Initializer::default() +} + +/// These values are part of the platform-specific [asm/hwcap.h][hwcap] . +/// +/// The names match those used for cpuinfo. +/// +/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + // AT_HWCAP + fp: bool, + asimd: bool, + // evtstrm: No LLVM support. + aes: bool, + pmull: bool, + sha1: bool, + sha2: bool, + crc32: bool, + atomics: bool, + fphp: bool, + asimdhp: bool, + // cpuid: No LLVM support. + asimdrdm: bool, + jscvt: bool, + fcma: bool, + lrcpc: bool, + dcpop: bool, + sha3: bool, + sm3: bool, + sm4: bool, + asimddp: bool, + sha512: bool, + sve: bool, + fhm: bool, + dit: bool, + uscat: bool, + ilrcpc: bool, + flagm: bool, + ssbs: bool, + sb: bool, + paca: bool, + pacg: bool, + + // AT_HWCAP2 + dcpodp: bool, + sve2: bool, + sveaes: bool, + svepmull: bool, + svebitperm: bool, + svesha3: bool, + svesm4: bool, + flagm2: bool, + frint: bool, + // svei8mm: See i8mm feature. + svef32mm: bool, + svef64mm: bool, + // svebf16: See bf16 feature. + i8mm: bool, + bf16: bool, + // dgh: No LLVM support. + rng: bool, + bti: bool, + mte: bool, + ecv: bool, + // afp: bool, + // rpres: bool, + // mte3: bool, + sme: bool, + smei16i64: bool, + smef64f64: bool, + // smei8i32: bool, + // smef16f32: bool, + // smeb16f32: bool, + // smef32f32: bool, + smefa64: bool, + wfxt: bool, + // ebf16: bool, + // sveebf16: bool, + cssc: bool, + // rprfm: bool, + sve2p1: bool, + sme2: bool, + sme2p1: bool, + // smei16i32: bool, + // smebi32i32: bool, + smeb16b16: bool, + smef16f16: bool, + mops: bool, + hbc: bool, + sveb16b16: bool, + lrcpc3: bool, + lse128: bool, + fpmr: bool, + lut: bool, + faminmax: bool, + f8cvt: bool, + f8fma: bool, + f8dp4: bool, + f8dp2: bool, + f8e4m3: bool, + f8e5m2: bool, + smelutv2: bool, + smef8f16: bool, + smef8f32: bool, + smesf8fma: bool, + smesf8dp4: bool, + smesf8dp2: bool, + // pauthlr: bool, +} + +impl From for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + fp: bit::test(auxv.hwcap, 0), + asimd: bit::test(auxv.hwcap, 1), + // evtstrm: bit::test(auxv.hwcap, 2), + aes: bit::test(auxv.hwcap, 3), + pmull: bit::test(auxv.hwcap, 4), + sha1: bit::test(auxv.hwcap, 5), + sha2: bit::test(auxv.hwcap, 6), + crc32: bit::test(auxv.hwcap, 7), + atomics: bit::test(auxv.hwcap, 8), + fphp: bit::test(auxv.hwcap, 9), + asimdhp: bit::test(auxv.hwcap, 10), + // cpuid: bit::test(auxv.hwcap, 11), + asimdrdm: bit::test(auxv.hwcap, 12), + jscvt: bit::test(auxv.hwcap, 13), + fcma: bit::test(auxv.hwcap, 14), + lrcpc: bit::test(auxv.hwcap, 15), + dcpop: bit::test(auxv.hwcap, 16), + sha3: bit::test(auxv.hwcap, 17), + sm3: bit::test(auxv.hwcap, 18), + sm4: bit::test(auxv.hwcap, 19), + asimddp: bit::test(auxv.hwcap, 20), + sha512: bit::test(auxv.hwcap, 21), + sve: bit::test(auxv.hwcap, 22), + fhm: bit::test(auxv.hwcap, 23), + dit: bit::test(auxv.hwcap, 24), + uscat: bit::test(auxv.hwcap, 25), + ilrcpc: bit::test(auxv.hwcap, 26), + flagm: bit::test(auxv.hwcap, 27), + ssbs: bit::test(auxv.hwcap, 28), + sb: bit::test(auxv.hwcap, 29), + paca: bit::test(auxv.hwcap, 30), + pacg: bit::test(auxv.hwcap, 31), + + // AT_HWCAP2 + dcpodp: bit::test(auxv.hwcap2, 0), + sve2: bit::test(auxv.hwcap2, 1), + sveaes: bit::test(auxv.hwcap2, 2), + svepmull: bit::test(auxv.hwcap2, 3), + svebitperm: bit::test(auxv.hwcap2, 4), + svesha3: bit::test(auxv.hwcap2, 5), + svesm4: bit::test(auxv.hwcap2, 6), + flagm2: bit::test(auxv.hwcap2, 7), + frint: bit::test(auxv.hwcap2, 8), + // svei8mm: bit::test(auxv.hwcap2, 9), + svef32mm: bit::test(auxv.hwcap2, 10), + svef64mm: bit::test(auxv.hwcap2, 11), + // svebf16: bit::test(auxv.hwcap2, 12), + i8mm: bit::test(auxv.hwcap2, 13), + bf16: bit::test(auxv.hwcap2, 14), + // dgh: bit::test(auxv.hwcap2, 15), + rng: bit::test(auxv.hwcap2, 16), + bti: bit::test(auxv.hwcap2, 17), + mte: bit::test(auxv.hwcap2, 18), + ecv: bit::test(auxv.hwcap2, 19), + // afp: bit::test(auxv.hwcap2, 20), + // rpres: bit::test(auxv.hwcap2, 21), + // mte3: bit::test(auxv.hwcap2, 22), + sme: bit::test(auxv.hwcap2, 23), + smei16i64: bit::test(auxv.hwcap2, 24), + smef64f64: bit::test(auxv.hwcap2, 25), + // smei8i32: bit::test(auxv.hwcap2, 26), + // smef16f32: bit::test(auxv.hwcap2, 27), + // smeb16f32: bit::test(auxv.hwcap2, 28), + // smef32f32: bit::test(auxv.hwcap2, 29), + smefa64: bit::test(auxv.hwcap2, 30), + wfxt: bit::test(auxv.hwcap2, 31), + // ebf16: bit::test(auxv.hwcap2, 32), + // sveebf16: bit::test(auxv.hwcap2, 33), + cssc: bit::test(auxv.hwcap2, 34), + // rprfm: bit::test(auxv.hwcap2, 35), + sve2p1: bit::test(auxv.hwcap2, 36), + sme2: bit::test(auxv.hwcap2, 37), + sme2p1: bit::test(auxv.hwcap2, 38), + // smei16i32: bit::test(auxv.hwcap2, 39), + // smebi32i32: bit::test(auxv.hwcap2, 40), + smeb16b16: bit::test(auxv.hwcap2, 41), + smef16f16: bit::test(auxv.hwcap2, 42), + mops: bit::test(auxv.hwcap2, 43), + hbc: bit::test(auxv.hwcap2, 44), + sveb16b16: bit::test(auxv.hwcap2, 45), + lrcpc3: bit::test(auxv.hwcap2, 46), + lse128: bit::test(auxv.hwcap2, 47), + fpmr: bit::test(auxv.hwcap2, 48), + lut: bit::test(auxv.hwcap2, 49), + faminmax: bit::test(auxv.hwcap2, 50), + f8cvt: bit::test(auxv.hwcap2, 51), + f8fma: bit::test(auxv.hwcap2, 52), + f8dp4: bit::test(auxv.hwcap2, 53), + f8dp2: bit::test(auxv.hwcap2, 54), + f8e4m3: bit::test(auxv.hwcap2, 55), + f8e5m2: bit::test(auxv.hwcap2, 56), + smelutv2: bit::test(auxv.hwcap2, 57), + smef8f16: bit::test(auxv.hwcap2, 58), + smef8f32: bit::test(auxv.hwcap2, 59), + smesf8fma: bit::test(auxv.hwcap2, 60), + smesf8dp4: bit::test(auxv.hwcap2, 61), + smesf8dp2: bit::test(auxv.hwcap2, 62), + // pauthlr: bit::test(auxv.hwcap2, ??), + } + } +} + +impl AtHwcap { + /// Initializes the cache from the feature -bits. + /// + /// The feature dependencies here come directly from LLVM's feature definitions: + /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td + fn cache(self, is_exynos9810: bool) -> cache::Initializer { + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // So, only check features that are known to be available on exynos-m3: + // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature + // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342. + if is_exynos9810 { + enable_feature(Feature::fp, self.fp); + enable_feature(Feature::crc, self.crc32); + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // Cryptographic extensions require ASIMD + // AES also covers FEAT_PMULL + enable_feature(Feature::aes, self.aes && self.pmull && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + return value; + } + + enable_feature(Feature::fp, self.fp); + // Half-float support requires float support + enable_feature(Feature::fp16, self.fp && self.fphp); + // FHM (fp16fml in LLVM) requires half float support + enable_feature(Feature::fhm, self.fphp && self.fhm); + enable_feature(Feature::pmull, self.pmull); + enable_feature(Feature::crc, self.crc32); + enable_feature(Feature::lse, self.atomics); + enable_feature(Feature::lse2, self.uscat); + enable_feature(Feature::lse128, self.lse128 && self.atomics); + enable_feature(Feature::rcpc, self.lrcpc); + // RCPC2 (rcpc-immo in LLVM) requires RCPC support + let rcpc2 = self.ilrcpc && self.lrcpc; + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2); + enable_feature(Feature::dit, self.dit); + enable_feature(Feature::flagm, self.flagm); + enable_feature(Feature::flagm2, self.flagm2); + enable_feature(Feature::ssbs, self.ssbs); + enable_feature(Feature::sb, self.sb); + enable_feature(Feature::paca, self.paca); + enable_feature(Feature::pacg, self.pacg); + // enable_feature(Feature::pauth_lr, self.pauthlr); + enable_feature(Feature::dpb, self.dcpop); + enable_feature(Feature::dpb2, self.dcpodp); + enable_feature(Feature::rand, self.rng); + enable_feature(Feature::bti, self.bti); + enable_feature(Feature::mte, self.mte); + // jsconv requires float support + enable_feature(Feature::jsconv, self.jscvt && self.fp); + enable_feature(Feature::rdm, self.asimdrdm); + enable_feature(Feature::dotprod, self.asimddp); + enable_feature(Feature::frintts, self.frint); + + // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes + // separately. We ignore that distinction here. + enable_feature(Feature::i8mm, self.i8mm); + enable_feature(Feature::bf16, self.bf16); + + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // ASIMD extensions require ASIMD support: + enable_feature(Feature::fcma, self.fcma && asimd); + enable_feature(Feature::sve, self.sve && asimd); + + // SVE extensions require SVE & ASIMD + enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd); + enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd); + + // Cryptographic extensions require ASIMD + enable_feature(Feature::aes, self.aes && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + // SHA512/SHA3 require SHA1 & SHA256 + enable_feature( + Feature::sha3, + self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd, + ); + enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd); + + // SVE2 requires SVE + let sve2 = self.sve2 && self.sve && asimd; + enable_feature(Feature::sve2, sve2); + enable_feature(Feature::sve2p1, self.sve2p1 && sve2); + // SVE2 extensions require SVE2 and crypto features + enable_feature( + Feature::sve2_aes, + self.sveaes && self.svepmull && sve2 && self.aes, + ); + enable_feature( + Feature::sve2_sm4, + self.svesm4 && sve2 && self.sm3 && self.sm4, + ); + enable_feature( + Feature::sve2_sha3, + self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2, + ); + enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2); + enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16); + enable_feature(Feature::hbc, self.hbc); + enable_feature(Feature::mops, self.mops); + enable_feature(Feature::ecv, self.ecv); + enable_feature(Feature::lut, self.lut); + enable_feature(Feature::cssc, self.cssc); + enable_feature(Feature::fpmr, self.fpmr); + enable_feature(Feature::faminmax, self.faminmax); + let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16; + enable_feature(Feature::fp8, fp8); + let fp8fma = self.f8fma && fp8; + enable_feature(Feature::fp8fma, fp8fma); + let fp8dot4 = self.f8dp4 && fp8fma; + enable_feature(Feature::fp8dot4, fp8dot4); + enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4); + enable_feature(Feature::wfxt, self.wfxt); + let sme = self.sme && self.bf16; + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme_i16i64, self.smei16i64 && sme); + enable_feature(Feature::sme_f64f64, self.smef64f64 && sme); + enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2); + let sme2 = self.sme2 && sme; + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme2p1, self.sme2p1 && sme2); + enable_feature( + Feature::sme_b16b16, + sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16, + ); + enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2); + enable_feature(Feature::sme_lutv2, self.smelutv2); + let sme_f8f32 = self.smef8f32 && sme2 && fp8; + enable_feature(Feature::sme_f8f32, sme_f8f32); + enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32); + let ssve_fp8fma = self.smesf8fma && sme2 && fp8; + enable_feature(Feature::ssve_fp8fma, ssve_fp8fma); + let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma; + enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4); + enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4); + } + value + } +} + +#[cfg(target_endian = "little")] +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(feature = "std_detect_file_io")] + mod auxv_from_file { + use super::auxvec::auxv_from_file; + use super::*; + // The baseline hwcaps used in the (artificial) auxv test files. + fn baseline_hwcaps() -> AtHwcap { + AtHwcap { + fp: true, + asimd: true, + aes: true, + pmull: true, + sha1: true, + sha2: true, + crc32: true, + atomics: true, + fphp: true, + asimdhp: true, + asimdrdm: true, + lrcpc: true, + dcpop: true, + asimddp: true, + ssbs: true, + ..AtHwcap::default() + } + } + + #[test] + fn linux_empty_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!( + AtHwcap::from(v), + AtHwcap { + // Some other HWCAP bits. + paca: true, + pacg: true, + // HWCAP2-only bits. + dcpodp: true, + frint: true, + rng: true, + bti: true, + mte: true, + ..baseline_hwcaps() + } + ); + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs new file mode 100644 index 000000000000..bbb173227d07 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs @@ -0,0 +1,34 @@ +//! Run-time feature detection for ARM on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::i8mm, bit::test(auxv.hwcap, 27)); + enable_feature(&mut value, Feature::dotprod, bit::test(auxv.hwcap, 24)); + enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12)); + enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1)); + enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4)); + enable_feature(&mut value, Feature::aes, bit::test(auxv.hwcap2, 0)); + // SHA2 requires SHA1 & SHA2 features + enable_feature( + &mut value, + Feature::sha2, + bit::test(auxv.hwcap2, 2) && bit::test(auxv.hwcap2, 3), + ); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs new file mode 100644 index 000000000000..c30379ff0655 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs @@ -0,0 +1,339 @@ +//! Parses ELF auxiliary vectors. +#![allow(dead_code)] + +pub(crate) const AT_NULL: usize = 0; + +/// Key to access the CPU Hardware capabilities bitfield. +pub(crate) const AT_HWCAP: usize = 16; +/// Key to access the CPU Hardware capabilities 2 bitfield. +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", +))] +pub(crate) const AT_HWCAP2: usize = 26; + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) struct AuxVec { + pub hwcap: usize, + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For Linux, they are +/// defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// There is no perfect way of reading the auxiliary vector. +/// +/// - If the `std_detect_dlsym_getauxval` cargo feature is enabled, this will use +/// `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation. +/// When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is +/// linked to the binary - if that is not the case the behavior is undefined. +/// - Otherwise, if the `std_detect_file_io` cargo feature is enabled, it will +/// try to read `/proc/self/auxv`. +/// - If that fails, this function returns an error. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on +/// `*-linux-{gnu,musl,ohos}*` and `*-android*` targets because we can safely assume `getauxval` +/// is linked to the binary. +/// - `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) +/// have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html). +/// - `*-linux-musl*` targets ([at least since Rust 1.15](https://github.com/rust-lang/rust/blob/1.15.0/src/ci/docker/x86_64-musl/build-musl.sh#L15)) +/// use musl newer than [musl 1.1.0 that added `getauxval`](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197) +/// - `*-linux-ohos*` targets use a [fork of musl 1.2](https://gitee.com/openharmony/docs/blob/master/en/application-dev/reference/native-lib/musl.md) +/// - `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html)) +/// have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49). +/// +/// For more information about when `getauxval` is available check the great +/// [`auxv` crate documentation][auxv_docs]. +/// +/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h +/// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/ +pub(crate) fn auxv() -> Result { + // Try to call a getauxval function. + if let Ok(hwcap) = getauxval(AT_HWCAP) { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch64", + ))] + { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In either case, try the fallback. + if hwcap != 0 { + return Ok(AuxVec { hwcap }); + } + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In particular, on many platforms AT_HWCAP2 will be legitimately zero, + // since it contains the most recent feature flags. Use the fallback only if no + // features were detected at all. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + + // Intentionnaly not used + let _ = hwcap; + } + + #[cfg(feature = "std_detect_file_io")] + { + // If calling getauxval fails, try to read the auxiliary vector from + // its file: + auxv_from_file("/proc/self/auxv") + } + #[cfg(not(feature = "std_detect_file_io"))] + { + Err(()) + } +} + +/// Tries to read the `key` from the auxiliary vector by calling the +/// `getauxval` function. If the function is not linked, this function return `Err`. +fn getauxval(key: usize) -> Result { + type F = unsafe extern "C" fn(libc::c_ulong) -> libc::c_ulong; + cfg_if::cfg_if! { + if #[cfg(all( + feature = "std_detect_dlsym_getauxval", + not(all( + target_os = "linux", + any(target_env = "gnu", target_env = "musl", target_env = "ohos"), + )), + not(target_os = "android"), + ))] { + let ffi_getauxval: F = unsafe { + let ptr = libc::dlsym(libc::RTLD_DEFAULT, c"getauxval".as_ptr()); + if ptr.is_null() { + return Err(()); + } + core::mem::transmute(ptr) + }; + } else { + let ffi_getauxval: F = libc::getauxval; + } + } + Ok(unsafe { ffi_getauxval(key as libc::c_ulong) as usize }) +} + +/// Tries to read the auxiliary vector from the `file`. If this fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +pub(super) fn auxv_from_file(file: &str) -> Result { + let file = super::read_file(file)?; + + // See . + // + // The auxiliary vector contains at most 34 (key,value) fields: from + // `AT_MINSIGSTKSZ` to `AT_NULL`, but its number may increase. + let len = file.len(); + let mut buf = alloc::vec![0_usize; 1 + len / core::mem::size_of::()]; + unsafe { + core::ptr::copy_nonoverlapping(file.as_ptr(), buf.as_mut_ptr() as *mut u8, len); + } + + auxv_from_buf(&buf) +} + +/// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +fn auxv_from_buf(buf: &[usize]) -> Result { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch64", + ))] + { + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }), + _ => (), + } + } + } + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + let mut hwcap = None; + // For some platforms, AT_HWCAP2 was added recently, so let it default to zero. + let mut hwcap2 = 0; + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => hwcap = Some(el[1]), + AT_HWCAP2 => hwcap2 = el[1], + _ => (), + } + } + + if let Some(hwcap) = hwcap { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + // Suppress unused variable + let _ = buf; + Err(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv + // does not always contain the AT_HWCAP key under qemu. + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + #[test] + fn auxv_crate() { + let v = auxv(); + if let Ok(hwcap) = getauxval(AT_HWCAP) { + let rt_hwcap = v.expect("failed to find hwcap key").hwcap; + assert_eq!(rt_hwcap, hwcap); + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2; + assert_eq!(rt_hwcap2, hwcap2); + } + } + } + + #[test] + fn auxv_dump() { + if let Ok(auxvec) = auxv() { + println!("{:?}", auxvec); + } else { + println!("both getauxval() and reading /proc/self/auxv failed!"); + } + } + + #[cfg(feature = "std_detect_file_io")] + cfg_if::cfg_if! { + if #[cfg(target_arch = "arm")] { + #[test] + fn linux_rpi3() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-rpi3.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 4174038); + assert_eq!(v.hwcap2, 16); + } + + #[test] + fn linux_macos_vb() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv"); + println!("file: {file}"); + // The file contains HWCAP but not HWCAP2. In that case, we treat HWCAP2 as zero. + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 126614527); + assert_eq!(v.hwcap2, 0); + } + } else if #[cfg(target_arch = "aarch64")] { + #[cfg(target_endian = "little")] + #[test] + fn linux_artificial_aarch64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 0x0123456789abcdef); + assert_eq!(v.hwcap2, 0x02468ace13579bdf); + } + #[cfg(target_endian = "little")] + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + // An absent HWCAP2 is treated as zero, and does not prevent acceptance of HWCAP. + assert_ne!(v.hwcap, 0); + assert_eq!(v.hwcap2, 0); + } + } + } + + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_dump_procfs() { + if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") { + println!("{:?}", auxvec); + } else { + println!("reading /proc/self/auxv failed!"); + } + } + + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_crate_procfs() { + if let Ok(procfs_auxv) = auxv_from_file("/proc/self/auxv") { + assert_eq!(auxv().unwrap(), procfs_auxv); + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs new file mode 100644 index 000000000000..14cc7a731835 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs @@ -0,0 +1,68 @@ +//! Run-time feature detection for LoongArch on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; +use core::arch::asm; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // The values are part of the platform-specific [cpucfg] + // + // [cpucfg]: LoongArch Reference Manual Volume 1: Basic Architecture v1.1 + let cpucfg2: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg2, in(reg) 2, + options(pure, nomem, preserves_flags, nostack) + ); + } + let cpucfg3: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg3, in(reg) 3, + options(pure, nomem, preserves_flags, nostack) + ); + } + enable_feature(&mut value, Feature::frecipe, bit::test(cpucfg2, 25)); + enable_feature(&mut value, Feature::div32, bit::test(cpucfg2, 26)); + enable_feature(&mut value, Feature::lam_bh, bit::test(cpucfg2, 27)); + enable_feature(&mut value, Feature::lamcas, bit::test(cpucfg2, 28)); + enable_feature(&mut value, Feature::scq, bit::test(cpucfg2, 30)); + enable_feature(&mut value, Feature::ld_seq_sa, bit::test(cpucfg3, 23)); + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature( + &mut value, + Feature::f, + bit::test(cpucfg2, 1) && bit::test(auxv.hwcap, 3), + ); + enable_feature( + &mut value, + Feature::d, + bit::test(cpucfg2, 2) && bit::test(auxv.hwcap, 3), + ); + enable_feature(&mut value, Feature::lsx, bit::test(auxv.hwcap, 4)); + enable_feature(&mut value, Feature::lasx, bit::test(auxv.hwcap, 5)); + enable_feature( + &mut value, + Feature::lbt, + bit::test(auxv.hwcap, 10) && bit::test(auxv.hwcap, 11) && bit::test(auxv.hwcap, 12), + ); + enable_feature(&mut value, Feature::lvz, bit::test(auxv.hwcap, 9)); + enable_feature(&mut value, Feature::ual, bit::test(auxv.hwcap, 2)); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs new file mode 100644 index 000000000000..0cfa8869887e --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs @@ -0,0 +1,23 @@ +//! Run-time feature detection for MIPS on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/mips/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1)); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs new file mode 100644 index 000000000000..8c689d0b1f0e --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs @@ -0,0 +1,67 @@ +//! Run-time feature detection on Linux +//! +#[cfg(feature = "std_detect_file_io")] +use alloc::vec::Vec; + +mod auxvec; + +#[cfg(feature = "std_detect_file_io")] +fn read_file(path: &str) -> Result, ()> { + let mut path = Vec::from(path.as_bytes()); + path.push(0); + + unsafe { + let file = libc::open(path.as_ptr() as *const libc::c_char, libc::O_RDONLY); + if file == -1 { + return Err(()); + } + + let mut data = Vec::new(); + loop { + data.reserve(4096); + let spare = data.spare_capacity_mut(); + match libc::read(file, spare.as_mut_ptr() as *mut _, spare.len()) { + -1 => { + libc::close(file); + return Err(()); + } + 0 => break, + n => data.set_len(data.len() + n as usize), + } + } + + libc::close(file); + Ok(data) + } +} + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + mod riscv; + pub(crate) use self::riscv::detect_features; + } else if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] { + mod mips; + pub(crate) use self::mips::detect_features; + } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else if #[cfg(target_arch = "loongarch64")] { + mod loongarch; + pub(crate) use self::loongarch::detect_features; + } else if #[cfg(target_arch = "s390x")] { + mod s390x; + pub(crate) use self::s390x::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs new file mode 100644 index 000000000000..6a4f7e715d93 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs @@ -0,0 +1,35 @@ +//! Run-time feature detection for PowerPC on Linux. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/cputable.h][cputable] + // + // [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h + if let Ok(auxv) = auxvec::auxv() { + // note: the PowerPC values are the mask to do the test (instead of the + // index of the bit to test like in ARM and Aarch64) + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + let power8_features = auxv.hwcap2 & 0x80000000 != 0; + enable_feature(&mut value, Feature::power8, power8_features); + enable_feature(&mut value, Feature::power8_altivec, power8_features); + enable_feature(&mut value, Feature::power8_crypto, power8_features); + enable_feature(&mut value, Feature::power8_vector, power8_features); + let power9_features = auxv.hwcap2 & 0x00800000 != 0; + enable_feature(&mut value, Feature::power9, power9_features); + enable_feature(&mut value, Feature::power9_altivec, power9_features); + enable_feature(&mut value, Feature::power9_vector, power9_features); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs new file mode 100644 index 000000000000..5506ff31fc79 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs @@ -0,0 +1,330 @@ +//! Run-time feature detection for RISC-V on Linux. +//! +//! On RISC-V, detection using auxv only supports single-letter extensions. +//! So, we use riscv_hwprobe that supports multi-letter extensions if available. +//! + +use core::ptr; + +use super::super::riscv::imply_features; +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +// See +// for runtime status query constants. +const PR_RISCV_V_GET_CONTROL: libc::c_int = 70; +const PR_RISCV_V_VSTATE_CTRL_ON: libc::c_int = 2; +const PR_RISCV_V_VSTATE_CTRL_CUR_MASK: libc::c_int = 3; + +// See +// for riscv_hwprobe struct and hardware probing constants. + +#[repr(C)] +struct riscv_hwprobe { + key: i64, + value: u64, +} + +#[allow(non_upper_case_globals)] +const __NR_riscv_hwprobe: libc::c_long = 258; + +const RISCV_HWPROBE_KEY_BASE_BEHAVIOR: i64 = 3; +const RISCV_HWPROBE_BASE_BEHAVIOR_IMA: u64 = 1 << 0; + +const RISCV_HWPROBE_KEY_IMA_EXT_0: i64 = 4; +const RISCV_HWPROBE_IMA_FD: u64 = 1 << 0; +const RISCV_HWPROBE_IMA_C: u64 = 1 << 1; +const RISCV_HWPROBE_IMA_V: u64 = 1 << 2; +const RISCV_HWPROBE_EXT_ZBA: u64 = 1 << 3; +const RISCV_HWPROBE_EXT_ZBB: u64 = 1 << 4; +const RISCV_HWPROBE_EXT_ZBS: u64 = 1 << 5; +const RISCV_HWPROBE_EXT_ZICBOZ: u64 = 1 << 6; +const RISCV_HWPROBE_EXT_ZBC: u64 = 1 << 7; +const RISCV_HWPROBE_EXT_ZBKB: u64 = 1 << 8; +const RISCV_HWPROBE_EXT_ZBKC: u64 = 1 << 9; +const RISCV_HWPROBE_EXT_ZBKX: u64 = 1 << 10; +const RISCV_HWPROBE_EXT_ZKND: u64 = 1 << 11; +const RISCV_HWPROBE_EXT_ZKNE: u64 = 1 << 12; +const RISCV_HWPROBE_EXT_ZKNH: u64 = 1 << 13; +const RISCV_HWPROBE_EXT_ZKSED: u64 = 1 << 14; +const RISCV_HWPROBE_EXT_ZKSH: u64 = 1 << 15; +const RISCV_HWPROBE_EXT_ZKT: u64 = 1 << 16; +const RISCV_HWPROBE_EXT_ZVBB: u64 = 1 << 17; +const RISCV_HWPROBE_EXT_ZVBC: u64 = 1 << 18; +const RISCV_HWPROBE_EXT_ZVKB: u64 = 1 << 19; +const RISCV_HWPROBE_EXT_ZVKG: u64 = 1 << 20; +const RISCV_HWPROBE_EXT_ZVKNED: u64 = 1 << 21; +const RISCV_HWPROBE_EXT_ZVKNHA: u64 = 1 << 22; +const RISCV_HWPROBE_EXT_ZVKNHB: u64 = 1 << 23; +const RISCV_HWPROBE_EXT_ZVKSED: u64 = 1 << 24; +const RISCV_HWPROBE_EXT_ZVKSH: u64 = 1 << 25; +const RISCV_HWPROBE_EXT_ZVKT: u64 = 1 << 26; +const RISCV_HWPROBE_EXT_ZFH: u64 = 1 << 27; +const RISCV_HWPROBE_EXT_ZFHMIN: u64 = 1 << 28; +const RISCV_HWPROBE_EXT_ZIHINTNTL: u64 = 1 << 29; +const RISCV_HWPROBE_EXT_ZVFH: u64 = 1 << 30; +const RISCV_HWPROBE_EXT_ZVFHMIN: u64 = 1 << 31; +const RISCV_HWPROBE_EXT_ZFA: u64 = 1 << 32; +const RISCV_HWPROBE_EXT_ZTSO: u64 = 1 << 33; +const RISCV_HWPROBE_EXT_ZACAS: u64 = 1 << 34; +const RISCV_HWPROBE_EXT_ZICOND: u64 = 1 << 35; +const RISCV_HWPROBE_EXT_ZIHINTPAUSE: u64 = 1 << 36; +const RISCV_HWPROBE_EXT_ZVE32X: u64 = 1 << 37; +const RISCV_HWPROBE_EXT_ZVE32F: u64 = 1 << 38; +const RISCV_HWPROBE_EXT_ZVE64X: u64 = 1 << 39; +const RISCV_HWPROBE_EXT_ZVE64F: u64 = 1 << 40; +const RISCV_HWPROBE_EXT_ZVE64D: u64 = 1 << 41; +const RISCV_HWPROBE_EXT_ZIMOP: u64 = 1 << 42; +const RISCV_HWPROBE_EXT_ZCA: u64 = 1 << 43; +const RISCV_HWPROBE_EXT_ZCB: u64 = 1 << 44; +const RISCV_HWPROBE_EXT_ZCD: u64 = 1 << 45; +const RISCV_HWPROBE_EXT_ZCF: u64 = 1 << 46; +const RISCV_HWPROBE_EXT_ZCMOP: u64 = 1 << 47; +const RISCV_HWPROBE_EXT_ZAWRS: u64 = 1 << 48; +// Excluded because it only reports the existence of `prctl`-based pointer masking control. +// const RISCV_HWPROBE_EXT_SUPM: u64 = 1 << 49; +const RISCV_HWPROBE_EXT_ZICNTR: u64 = 1 << 50; +const RISCV_HWPROBE_EXT_ZIHPM: u64 = 1 << 51; +const RISCV_HWPROBE_EXT_ZFBFMIN: u64 = 1 << 52; +const RISCV_HWPROBE_EXT_ZVFBFMIN: u64 = 1 << 53; +const RISCV_HWPROBE_EXT_ZVFBFWMA: u64 = 1 << 54; +const RISCV_HWPROBE_EXT_ZICBOM: u64 = 1 << 55; +const RISCV_HWPROBE_EXT_ZAAMO: u64 = 1 << 56; +const RISCV_HWPROBE_EXT_ZALRSC: u64 = 1 << 57; + +const RISCV_HWPROBE_KEY_CPUPERF_0: i64 = 5; +const RISCV_HWPROBE_MISALIGNED_FAST: u64 = 3; +const RISCV_HWPROBE_MISALIGNED_MASK: u64 = 7; + +const RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: i64 = 9; +const RISCV_HWPROBE_MISALIGNED_SCALAR_FAST: u64 = 3; + +const RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: i64 = 10; +const RISCV_HWPROBE_MISALIGNED_VECTOR_FAST: u64 = 3; + +// syscall returns an unsupported error if riscv_hwprobe is not supported, +// so we can safely use this function on older versions of Linux. +fn _riscv_hwprobe(out: &mut [riscv_hwprobe]) -> bool { + unsafe fn __riscv_hwprobe( + pairs: *mut riscv_hwprobe, + pair_count: libc::size_t, + cpu_set_size: libc::size_t, + cpus: *mut libc::c_ulong, + flags: libc::c_uint, + ) -> libc::c_long { + unsafe { + libc::syscall( + __NR_riscv_hwprobe, + pairs, + pair_count, + cpu_set_size, + cpus, + flags, + ) + } + } + + let len = out.len(); + unsafe { __riscv_hwprobe(out.as_mut_ptr(), len, 0, ptr::null_mut(), 0) == 0 } +} + +/// Read list of supported features from (1) the auxiliary vector +/// and (2) the results of `riscv_hwprobe` and `prctl` system calls. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let mut enable_feature = |feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // Use auxiliary vector to enable single-letter ISA extensions. + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.15 + let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform + let mut has_i = bit::test(auxv.hwcap, (b'i' - b'a').into()); + #[allow(clippy::eq_op)] + enable_feature(Feature::a, bit::test(auxv.hwcap, (b'a' - b'a').into())); + enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into())); + enable_feature(Feature::d, bit::test(auxv.hwcap, (b'd' - b'a').into())); + enable_feature(Feature::f, bit::test(auxv.hwcap, (b'f' - b'a').into())); + enable_feature(Feature::m, bit::test(auxv.hwcap, (b'm' - b'a').into())); + let has_v = bit::test(auxv.hwcap, (b'v' - b'a').into()); + let mut is_v_set = false; + + // Use riscv_hwprobe syscall to query more extensions and + // performance-related capabilities. + 'hwprobe: { + let mut out = [ + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_BASE_BEHAVIOR, + value: 0, + }, + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_IMA_EXT_0, + value: 0, + }, + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF, + value: 0, + }, + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF, + value: 0, + }, + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_CPUPERF_0, + value: 0, + }, + ]; + if !_riscv_hwprobe(&mut out) { + break 'hwprobe; + } + + // Query scalar/vector misaligned behavior. + if out[2].key != -1 { + enable_feature( + Feature::unaligned_scalar_mem, + out[2].value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST, + ); + } else if out[4].key != -1 { + // Deprecated method for fallback + enable_feature( + Feature::unaligned_scalar_mem, + out[4].value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST, + ); + } + if out[3].key != -1 { + enable_feature( + Feature::unaligned_vector_mem, + out[3].value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST, + ); + } + + // Query whether "I" base and extensions "M" and "A" (as in the ISA + // manual version 2.2) are enabled. "I" base at that time corresponds + // to "I", "Zicsr", "Zicntr" and "Zifencei" (as in the ISA manual version + // 20240411). + // This is a current requirement of + // `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests. + let has_ima = (out[0].key != -1) && (out[0].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA != 0); + if !has_ima { + break 'hwprobe; + } + has_i |= has_ima; + enable_feature(Feature::zicsr, has_ima); + enable_feature(Feature::zicntr, has_ima); + enable_feature(Feature::zifencei, has_ima); + enable_feature(Feature::m, has_ima); + enable_feature(Feature::a, has_ima); + + // Enable features based on `RISCV_HWPROBE_KEY_IMA_EXT_0`. + if out[1].key == -1 { + break 'hwprobe; + } + let ima_ext_0 = out[1].value; + let test = |mask| (ima_ext_0 & mask) != 0; + + enable_feature(Feature::d, test(RISCV_HWPROBE_IMA_FD)); // F is implied. + enable_feature(Feature::c, test(RISCV_HWPROBE_IMA_C)); + + enable_feature(Feature::zicntr, test(RISCV_HWPROBE_EXT_ZICNTR)); + enable_feature(Feature::zihpm, test(RISCV_HWPROBE_EXT_ZIHPM)); + + enable_feature(Feature::zihintntl, test(RISCV_HWPROBE_EXT_ZIHINTNTL)); + enable_feature(Feature::zihintpause, test(RISCV_HWPROBE_EXT_ZIHINTPAUSE)); + enable_feature(Feature::zimop, test(RISCV_HWPROBE_EXT_ZIMOP)); + enable_feature(Feature::zicbom, test(RISCV_HWPROBE_EXT_ZICBOM)); + enable_feature(Feature::zicboz, test(RISCV_HWPROBE_EXT_ZICBOZ)); + enable_feature(Feature::zicond, test(RISCV_HWPROBE_EXT_ZICOND)); + + enable_feature(Feature::zalrsc, test(RISCV_HWPROBE_EXT_ZALRSC)); + enable_feature(Feature::zaamo, test(RISCV_HWPROBE_EXT_ZAAMO)); + enable_feature(Feature::zawrs, test(RISCV_HWPROBE_EXT_ZAWRS)); + enable_feature(Feature::zacas, test(RISCV_HWPROBE_EXT_ZACAS)); + enable_feature(Feature::ztso, test(RISCV_HWPROBE_EXT_ZTSO)); + + enable_feature(Feature::zba, test(RISCV_HWPROBE_EXT_ZBA)); + enable_feature(Feature::zbb, test(RISCV_HWPROBE_EXT_ZBB)); + enable_feature(Feature::zbs, test(RISCV_HWPROBE_EXT_ZBS)); + enable_feature(Feature::zbc, test(RISCV_HWPROBE_EXT_ZBC)); + + enable_feature(Feature::zbkb, test(RISCV_HWPROBE_EXT_ZBKB)); + enable_feature(Feature::zbkc, test(RISCV_HWPROBE_EXT_ZBKC)); + enable_feature(Feature::zbkx, test(RISCV_HWPROBE_EXT_ZBKX)); + enable_feature(Feature::zknd, test(RISCV_HWPROBE_EXT_ZKND)); + enable_feature(Feature::zkne, test(RISCV_HWPROBE_EXT_ZKNE)); + enable_feature(Feature::zknh, test(RISCV_HWPROBE_EXT_ZKNH)); + enable_feature(Feature::zksed, test(RISCV_HWPROBE_EXT_ZKSED)); + enable_feature(Feature::zksh, test(RISCV_HWPROBE_EXT_ZKSH)); + enable_feature(Feature::zkt, test(RISCV_HWPROBE_EXT_ZKT)); + + enable_feature(Feature::zcmop, test(RISCV_HWPROBE_EXT_ZCMOP)); + enable_feature(Feature::zca, test(RISCV_HWPROBE_EXT_ZCA)); + enable_feature(Feature::zcf, test(RISCV_HWPROBE_EXT_ZCF)); + enable_feature(Feature::zcd, test(RISCV_HWPROBE_EXT_ZCD)); + enable_feature(Feature::zcb, test(RISCV_HWPROBE_EXT_ZCB)); + + enable_feature(Feature::zfh, test(RISCV_HWPROBE_EXT_ZFH)); + enable_feature(Feature::zfhmin, test(RISCV_HWPROBE_EXT_ZFHMIN)); + enable_feature(Feature::zfa, test(RISCV_HWPROBE_EXT_ZFA)); + enable_feature(Feature::zfbfmin, test(RISCV_HWPROBE_EXT_ZFBFMIN)); + + // Use prctl (if any) to determine whether the vector extension + // is enabled on the current thread (assuming the entire process + // share the same status). If prctl fails (e.g. QEMU userland emulator + // as of version 9.2.3), use auxiliary vector to retrieve the default + // vector status on the process startup. + let has_vectors = { + let v_status = unsafe { libc::prctl(PR_RISCV_V_GET_CONTROL) }; + if v_status >= 0 { + (v_status & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) == PR_RISCV_V_VSTATE_CTRL_ON + } else { + has_v + } + }; + if has_vectors { + enable_feature(Feature::v, test(RISCV_HWPROBE_IMA_V)); + enable_feature(Feature::zve32x, test(RISCV_HWPROBE_EXT_ZVE32X)); + enable_feature(Feature::zve32f, test(RISCV_HWPROBE_EXT_ZVE32F)); + enable_feature(Feature::zve64x, test(RISCV_HWPROBE_EXT_ZVE64X)); + enable_feature(Feature::zve64f, test(RISCV_HWPROBE_EXT_ZVE64F)); + enable_feature(Feature::zve64d, test(RISCV_HWPROBE_EXT_ZVE64D)); + + enable_feature(Feature::zvbb, test(RISCV_HWPROBE_EXT_ZVBB)); + enable_feature(Feature::zvbc, test(RISCV_HWPROBE_EXT_ZVBC)); + enable_feature(Feature::zvkb, test(RISCV_HWPROBE_EXT_ZVKB)); + enable_feature(Feature::zvkg, test(RISCV_HWPROBE_EXT_ZVKG)); + enable_feature(Feature::zvkned, test(RISCV_HWPROBE_EXT_ZVKNED)); + enable_feature(Feature::zvknha, test(RISCV_HWPROBE_EXT_ZVKNHA)); + enable_feature(Feature::zvknhb, test(RISCV_HWPROBE_EXT_ZVKNHB)); + enable_feature(Feature::zvksed, test(RISCV_HWPROBE_EXT_ZVKSED)); + enable_feature(Feature::zvksh, test(RISCV_HWPROBE_EXT_ZVKSH)); + enable_feature(Feature::zvkt, test(RISCV_HWPROBE_EXT_ZVKT)); + + enable_feature(Feature::zvfh, test(RISCV_HWPROBE_EXT_ZVFH)); + enable_feature(Feature::zvfhmin, test(RISCV_HWPROBE_EXT_ZVFHMIN)); + enable_feature(Feature::zvfbfmin, test(RISCV_HWPROBE_EXT_ZVFBFMIN)); + enable_feature(Feature::zvfbfwma, test(RISCV_HWPROBE_EXT_ZVFBFWMA)); + } + is_v_set = true; + }; + + // Set V purely depending on the auxiliary vector + // only if no fine-grained vector extension detection is available. + if !is_v_set { + enable_feature(Feature::v, has_v); + } + + // Handle base ISA. + // If future RV128I is supported, implement with `enable_feature` here. + // Note that we should use `target_arch` instead of `target_pointer_width` + // to avoid misdetection caused by experimental ABIs such as RV64ILP32. + #[cfg(target_arch = "riscv64")] + enable_feature(Feature::rv64i, has_i); + #[cfg(target_arch = "riscv32")] + enable_feature(Feature::rv32i, has_i); + + imply_features(value) +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs new file mode 100644 index 000000000000..9b53f526d619 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs @@ -0,0 +1,152 @@ +//! Run-time feature detection for s390x on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let opt_hwcap: Option = auxvec::auxv().ok().map(Into::into); + let facilities = ExtendedFacilityList::new(); + cache(opt_hwcap, facilities) +} + +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + esan3: bool, + zarch: bool, + stfle: bool, + msa: bool, + ldisp: bool, + eimm: bool, + dfp: bool, + hpage: bool, + etf3eh: bool, + high_gprs: bool, + te: bool, + vxrs: bool, + vxrs_bcd: bool, + vxrs_ext: bool, + gs: bool, + vxrs_ext2: bool, + vxrs_pde: bool, + sort: bool, + dflt: bool, + vxrs_pde2: bool, + nnpa: bool, + pci_mio: bool, + sie: bool, +} + +impl From for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + esan3: bit::test(auxv.hwcap, 0), + zarch: bit::test(auxv.hwcap, 1), + stfle: bit::test(auxv.hwcap, 2), + msa: bit::test(auxv.hwcap, 3), + ldisp: bit::test(auxv.hwcap, 4), + eimm: bit::test(auxv.hwcap, 5), + dfp: bit::test(auxv.hwcap, 6), + hpage: bit::test(auxv.hwcap, 7), + etf3eh: bit::test(auxv.hwcap, 8), + high_gprs: bit::test(auxv.hwcap, 9), + te: bit::test(auxv.hwcap, 10), + vxrs: bit::test(auxv.hwcap, 11), + vxrs_bcd: bit::test(auxv.hwcap, 12), + vxrs_ext: bit::test(auxv.hwcap, 13), + gs: bit::test(auxv.hwcap, 14), + vxrs_ext2: bit::test(auxv.hwcap, 15), + vxrs_pde: bit::test(auxv.hwcap, 16), + sort: bit::test(auxv.hwcap, 17), + dflt: bit::test(auxv.hwcap, 18), + vxrs_pde2: bit::test(auxv.hwcap, 19), + nnpa: bit::test(auxv.hwcap, 20), + pci_mio: bit::test(auxv.hwcap, 21), + sie: bit::test(auxv.hwcap, 22), + } + } +} + +struct ExtendedFacilityList([u64; 4]); + +impl ExtendedFacilityList { + fn new() -> Self { + let mut result: [u64; 4] = [0; 4]; + // SAFETY: rust/llvm only support s390x version with the `stfle` instruction. + unsafe { + core::arch::asm!( + // equivalently ".insn s, 0xb2b00000, 0({1})", + "stfle 0({})", + in(reg_addr) result.as_mut_ptr() , + inout("r0") result.len() as u64 - 1 => _, + options(nostack) + ); + } + Self(result) + } + + const fn get_bit(&self, n: usize) -> bool { + // NOTE: bits are numbered from the left. + self.0[n / 64] & (1 << (63 - (n % 64))) != 0 + } +} + +/// Initializes the cache from the feature bits. +/// +/// These values are part of the platform-specific [asm/elf.h][kernel], and are a selection of the +/// fields found in the [Facility Indications]. +/// +/// [Facility Indications]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63 +/// [kernel]: https://github.com/torvalds/linux/blob/b62cef9a5c673f1b8083159f5dc03c1c5daced2f/arch/s390/include/asm/elf.h#L129 +fn cache(hwcap: Option, facilities: ExtendedFacilityList) -> cache::Initializer { + let mut value = cache::Initializer::default(); + + { + let mut enable_if_set = |bit_index, f| { + if facilities.get_bit(bit_index) { + value.set(f as u32); + } + }; + + // We use HWCAP for `vector` because it requires both hardware and kernel support. + if let Some(AtHwcap { vxrs: true, .. }) = hwcap { + // vector and related + + enable_if_set(129, Feature::vector); + + enable_if_set(135, Feature::vector_enhancements_1); + enable_if_set(148, Feature::vector_enhancements_2); + enable_if_set(198, Feature::vector_enhancements_3); + + enable_if_set(134, Feature::vector_packed_decimal); + enable_if_set(152, Feature::vector_packed_decimal_enhancement); + enable_if_set(192, Feature::vector_packed_decimal_enhancement_2); + enable_if_set(199, Feature::vector_packed_decimal_enhancement_3); + + enable_if_set(165, Feature::nnp_assist); + } + + // others + + enable_if_set(76, Feature::message_security_assist_extension3); + enable_if_set(77, Feature::message_security_assist_extension4); + enable_if_set(57, Feature::message_security_assist_extension5); + enable_if_set(146, Feature::message_security_assist_extension8); + enable_if_set(155, Feature::message_security_assist_extension9); + enable_if_set(86, Feature::message_security_assist_extension12); + + enable_if_set(58, Feature::miscellaneous_extensions_2); + enable_if_set(61, Feature::miscellaneous_extensions_3); + enable_if_set(84, Feature::miscellaneous_extensions_4); + + enable_if_set(45, Feature::high_word); + enable_if_set(73, Feature::transactional_execution); + enable_if_set(133, Feature::guarded_storage); + enable_if_set(150, Feature::enhanced_sort); + enable_if_set(151, Feature::deflate_conversion); + enable_if_set(201, Feature::concurrent_functions); + } + + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs new file mode 100644 index 000000000000..cfe4ad10ad64 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs @@ -0,0 +1,55 @@ +//! Run-time feature detection for Aarch64 on OpenBSD. +//! +//! OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl. +//! https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 +//! https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925 + +use crate::detect::cache; +use core::{mem::MaybeUninit, ptr}; + +// Defined in machine/cpu.h. +// https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40 +const CPU_ID_AA64ISAR0: libc::c_int = 2; +const CPU_ID_AA64ISAR1: libc::c_int = 3; +const CPU_ID_AA64MMFR2: libc::c_int = 7; +const CPU_ID_AA64PFR0: libc::c_int = 8; + +/// Try to read the features from the system registers. +pub(crate) fn detect_features() -> cache::Initializer { + // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+. + // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 + // Others are supported on OpenBSD 7.3+. + // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c + // sysctl returns an unsupported error if operation is not supported, + // so we can safely use this function on older versions of OpenBSD. + let aa64isar0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR0]).unwrap_or(0); + let aa64isar1 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR1]).unwrap_or(0); + let aa64mmfr2 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64MMFR2]).unwrap_or(0); + // Do not use unwrap_or(0) because in fp and asimd fields, 0 indicates that + // the feature is available. + let aa64pfr0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64PFR0]); + + super::aarch64::parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, aa64pfr0) +} + +#[inline] +fn sysctl64(mib: &[libc::c_int]) -> Option { + const OUT_LEN: libc::size_t = core::mem::size_of::(); + let mut out = MaybeUninit::::uninit(); + let mut out_len = OUT_LEN; + let res = unsafe { + libc::sysctl( + mib.as_ptr(), + mib.len() as libc::c_uint, + out.as_mut_ptr() as *mut libc::c_void, + &mut out_len, + ptr::null_mut(), + 0, + ) + }; + if res == -1 || out_len != OUT_LEN { + return None; + } + // SAFETY: we've checked that sysctl was successful and `out` was filled. + Some(unsafe { out.assume_init() }) +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/other.rs b/library/stdarch/crates/std_detect/src/detect/os/other.rs new file mode 100644 index 000000000000..091fafc4ebf4 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/other.rs @@ -0,0 +1,8 @@ +//! Other operating systems + +use crate::detect::cache; + +#[allow(dead_code)] +pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs new file mode 100644 index 000000000000..4c59ede80293 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs @@ -0,0 +1,203 @@ +//! Run-time feature detection utility for RISC-V. +//! +//! On RISC-V, full feature detection needs a help of one or more +//! feature detection mechanisms (usually provided by the operating system). +//! +//! RISC-V architecture defines many extensions and some have dependency to others. +//! More importantly, some of them cannot be enabled without resolving such +//! dependencies due to limited set of features that such mechanisms provide. +//! +//! This module provides an OS-independent utility to process such relations +//! between RISC-V extensions. + +use crate::detect::{Feature, cache}; + +/// Imply features by the given set of enabled features. +/// +/// Note that it does not perform any consistency checks including existence of +/// conflicting extensions and/or complicated requirements. Eliminating such +/// inconsistencies is the responsibility of the feature detection logic and +/// its provider(s). +pub(crate) fn imply_features(mut value: cache::Initializer) -> cache::Initializer { + loop { + // Check convergence of the feature flags later. + let prev = value; + + // Expect that the optimizer turns repeated operations into + // a fewer number of bit-manipulation operations. + macro_rules! imply { + // Regular implication: + // A1 => (B1[, B2...]), A2 => (B1[, B2...]) and so on. + ($($from: ident)|+ => $($to: ident)&+) => { + if [$(Feature::$from as u32),+].iter().any(|&x| value.test(x)) { + $( + value.set(Feature::$to as u32); + )+ + } + }; + // Implication with multiple requirements: + // A1 && A2 ... => (B1[, B2...]). + ($($from: ident)&+ => $($to: ident)&+) => { + if [$(Feature::$from as u32),+].iter().all(|&x| value.test(x)) { + $( + value.set(Feature::$to as u32); + )+ + } + }; + } + macro_rules! group { + ($group: ident == $($member: ident)&+) => { + // Forward implication as defined in the specifications. + imply!($group => $($member)&+); + // Reverse implication to "group extension" from its members. + // This is not a part of specifications but convenient for + // feature detection and implemented in e.g. LLVM. + imply!($($member)&+ => $group); + }; + } + + /* + If a dependency/implication is not explicitly stated in the + specification, it is denoted as a comment as follows: + "defined as subset": + The latter extension is described as a subset of the former + (but the evidence is weak). + "functional": + The former extension is functionally a superset of the latter + (no direct references though). + */ + + imply!(zvbb => zvkb); + + // Certain set of vector cryptography extensions form a group. + group!(zvkn == zvkned & zvknhb & zvkb & zvkt); + group!(zvknc == zvkn & zvbc); + group!(zvkng == zvkn & zvkg); + group!(zvks == zvksed & zvksh & zvkb & zvkt); + group!(zvksc == zvks & zvbc); + group!(zvksg == zvks & zvkg); + + imply!(zvknhb => zvknha); // functional + + // For vector cryptography, Zvknhb and Zvbc require integer arithmetic + // with EEW=64 (Zve64x) while others not depending on them + // require EEW=32 (Zve32x). + imply!(zvknhb | zvbc => zve64x); + imply!(zvbb | zvkb | zvkg | zvkned | zvknha | zvksed | zvksh => zve32x); + + imply!(zbc => zbkc); // defined as subset + group!(zkn == zbkb & zbkc & zbkx & zkne & zknd & zknh); + group!(zks == zbkb & zbkc & zbkx & zksed & zksh); + group!(zk == zkn & zkr & zkt); + + imply!(zacas => zaamo); + group!(a == zalrsc & zaamo); + + group!(b == zba & zbb & zbs); + + imply!(zcf => zca & f); + imply!(zcd => zca & d); + imply!(zcmop | zcb => zca); + + imply!(zhinx => zhinxmin); + imply!(zdinx | zhinxmin => zfinx); + + imply!(zvfh => zvfhmin); // functional + imply!(zvfh => zve32f & zfhmin); + imply!(zvfhmin => zve32f); + imply!(zvfbfwma => zvfbfmin & zfbfmin); + imply!(zvfbfmin => zve32f); + + imply!(v => zve64d); + imply!(zve64d => zve64f & d); + imply!(zve64f => zve64x & zve32f); + imply!(zve64x => zve32x); + imply!(zve32f => zve32x & f); + + imply!(zfh => zfhmin); + imply!(q => d); + imply!(d | zfhmin | zfa => f); + imply!(zfbfmin => f); // and some of (not all) "Zfh" instructions. + + // Relatively complex implication rules from the "C" extension. + imply!(c => zca); + imply!(c & d => zcd); + #[cfg(target_arch = "riscv32")] + imply!(c & f => zcf); + + imply!(zicntr | zihpm | f | zfinx | zve32x => zicsr); + + // Loop until the feature flags converge. + if prev == value { + return value; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple_direct() { + let mut value = cache::Initializer::default(); + value.set(Feature::f as u32); + // F (and other extensions with CSRs) -> Zicsr + assert!(imply_features(value).test(Feature::zicsr as u32)); + } + + #[test] + fn simple_indirect() { + let mut value = cache::Initializer::default(); + value.set(Feature::q as u32); + // Q -> D, D -> F, F -> Zicsr + assert!(imply_features(value).test(Feature::zicsr as u32)); + } + + #[test] + fn complex_zcd() { + let mut value = cache::Initializer::default(); + // C & D -> Zcd + value.set(Feature::c as u32); + assert!(!imply_features(value).test(Feature::zcd as u32)); + value.set(Feature::d as u32); + assert!(imply_features(value).test(Feature::zcd as u32)); + } + + #[test] + fn group_simple_forward() { + let mut value = cache::Initializer::default(); + // A -> Zalrsc & Zaamo (forward implication) + value.set(Feature::a as u32); + let value = imply_features(value); + assert!(value.test(Feature::zalrsc as u32)); + assert!(value.test(Feature::zaamo as u32)); + } + + #[test] + fn group_simple_backward() { + let mut value = cache::Initializer::default(); + // Zalrsc & Zaamo -> A (reverse implication) + value.set(Feature::zalrsc as u32); + value.set(Feature::zaamo as u32); + assert!(imply_features(value).test(Feature::a as u32)); + } + + #[test] + fn group_complex_convergence() { + let mut value = cache::Initializer::default(); + // Needs 3 iterations to converge + // (and 4th iteration for convergence checking): + // 1. [Zvksc] -> Zvks & Zvbc + // 2. Zvks -> Zvksed & Zvksh & Zvkb & Zvkt + // 3a. [Zvkned] & [Zvknhb] & [Zvkb] & Zvkt -> {Zvkn} + // 3b. Zvkn & Zvbc -> {Zvknc} + value.set(Feature::zvksc as u32); + value.set(Feature::zvkned as u32); + value.set(Feature::zvknhb as u32); + value.set(Feature::zvkb as u32); + let value = imply_features(value); + assert!(value.test(Feature::zvkn as u32)); + assert!(value.test(Feature::zvknc as u32)); + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs new file mode 100644 index 000000000000..937f9f26eedc --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs @@ -0,0 +1,125 @@ +//! Run-time feature detection for Aarch64 on Windows. + +use crate::detect::{Feature, cache}; + +/// Try to read the features using IsProcessorFeaturePresent. +pub(crate) fn detect_features() -> cache::Initializer { + type DWORD = u32; + type BOOL = i32; + + const FALSE: BOOL = 0; + // The following Microsoft documents isn't updated for aarch64. + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + // These are defined in winnt.h of Windows SDK + const PF_ARM_VFP_32_REGISTERS_AVAILABLE: u32 = 18; + const PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: u32 = 19; + const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: u32 = 30; + const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: u32 = 31; + const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: u32 = 34; + const PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE: u32 = 43; + const PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: u32 = 44; + const PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: u32 = 45; + const PF_ARM_SVE_INSTRUCTIONS_AVAILABLE: u32 = 46; + const PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE: u32 = 47; + const PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE: u32 = 48; + const PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE: u32 = 49; + const PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE: u32 = 50; + const PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE: u32 = 51; + // const PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE: u32 = 52; + // const PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE: u32 = 53; + const PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE: u32 = 54; + const PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE: u32 = 55; + const PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE: u32 = 56; + // const PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE: u32 = 57; + // const PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE: u32 = 58; + // const PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE: u32 = 59; + + unsafe extern "system" { + fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL; + } + + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Some features may be supported on current CPU, + // but no way to detect it by OS API. + // Also, we require unsafe block for the extern "system" calls. + unsafe { + enable_feature( + Feature::fp, + IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::asimd, + IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::crc, + IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::lse, + IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::dotprod, + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::jsconv, + IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::rcpc, + IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve, + IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2, + IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2p1, + IsProcessorFeaturePresent(PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_aes, + IsProcessorFeaturePresent(PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE) != FALSE + && IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE) + != FALSE, + ); + enable_feature( + Feature::sve2_bitperm, + IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve_b16b16, + IsProcessorFeaturePresent(PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_sha3, + IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_sm4, + IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and + // pmull support + let crypto = + IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE; + enable_feature(Feature::aes, crypto); + enable_feature(Feature::pmull, crypto); + enable_feature(Feature::sha2, crypto); + } + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs new file mode 100644 index 000000000000..8565c2f85e24 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -0,0 +1,335 @@ +//! x86 run-time feature detection is OS independent. + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +use core::mem; + +use crate::detect::{Feature, bit, cache}; + +/// Run-time feature detection on x86 works by using the CPUID instruction. +/// +/// The [CPUID Wikipedia page][wiki_cpuid] contains +/// all the information about which flags to set to query which values, and in +/// which registers these are reported. +/// +/// The definitive references are: +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +/// Instruction Set Reference, A-Z][intel64_ref]. +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +/// System Instructions][amd64_ref]. +/// +/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID +/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +#[allow(clippy::similar_names)] +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + if cfg!(target_env = "sgx") { + // doesn't support this because it is untrusted data + return value; + } + + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU + // has `cpuid` support. + + // 0. EAX = 0: Basic Information: + // - EAX returns the "Highest Function Parameter", that is, the maximum + // leaf value for subsequent calls of `cpuinfo` in range [0, + // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars, + // returned in EBX, EDX, and ECX (in that order): + let (max_basic_leaf, vendor_id) = unsafe { + let CpuidResult { + eax: max_basic_leaf, + ebx, + ecx, + edx, + } = __cpuid(0); + let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()]; + let vendor_id: [u8; 12] = mem::transmute(vendor_id); + (max_basic_leaf, vendor_id) + }; + + if max_basic_leaf < 1 { + // Earlier Intel 486, CPUID not implemented + return value; + } + + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { + ecx: proc_info_ecx, + edx: proc_info_edx, + .. + } = unsafe { __cpuid(0x0000_0001_u32) }; + + // EAX = 7: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let ( + extended_features_ebx, + extended_features_ecx, + extended_features_edx, + extended_features_eax_leaf_1, + extended_features_edx_leaf_1, + ) = if max_basic_leaf >= 7 { + let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + let CpuidResult { + eax: eax_1, + edx: edx_1, + .. + } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) }; + (ebx, ecx, edx, eax_1, edx_1) + } else { + (0, 0, 0, 0, 0) // CPUID does not support "Extended Features" + }; + + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported + // - EAX returns the max leaf value for extended information, that is, + // `cpuid` calls in range [0x8000_0000; u32::MAX]: + let CpuidResult { + eax: extended_max_basic_leaf, + .. + } = unsafe { __cpuid(0x8000_0000_u32) }; + + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature + // Bits" + let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 { + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; + ecx + } else { + 0 + }; + + { + // borrows value till the end of this scope: + let mut enable = |r, rb, f| { + let present = bit::test(r as usize, rb); + if present { + value.set(f as u32); + } + present + }; + + enable(proc_info_ecx, 0, Feature::sse3); + enable(proc_info_ecx, 1, Feature::pclmulqdq); + enable(proc_info_ecx, 9, Feature::ssse3); + enable(proc_info_ecx, 13, Feature::cmpxchg16b); + enable(proc_info_ecx, 19, Feature::sse4_1); + enable(proc_info_ecx, 20, Feature::sse4_2); + enable(proc_info_ecx, 22, Feature::movbe); + enable(proc_info_ecx, 23, Feature::popcnt); + enable(proc_info_ecx, 25, Feature::aes); + let f16c = enable(proc_info_ecx, 29, Feature::f16c); + enable(proc_info_ecx, 30, Feature::rdrand); + enable(extended_features_ebx, 18, Feature::rdseed); + enable(extended_features_ebx, 19, Feature::adx); + enable(extended_features_ebx, 11, Feature::rtm); + enable(proc_info_edx, 4, Feature::tsc); + enable(proc_info_edx, 23, Feature::mmx); + enable(proc_info_edx, 24, Feature::fxsr); + enable(proc_info_edx, 25, Feature::sse); + enable(proc_info_edx, 26, Feature::sse2); + enable(extended_features_ebx, 29, Feature::sha); + + enable(extended_features_ecx, 8, Feature::gfni); + enable(extended_features_ecx, 9, Feature::vaes); + enable(extended_features_ecx, 10, Feature::vpclmulqdq); + + enable(extended_features_ebx, 3, Feature::bmi1); + enable(extended_features_ebx, 8, Feature::bmi2); + + enable(extended_features_ebx, 9, Feature::ermsb); + + enable(extended_features_eax_leaf_1, 31, Feature::movrs); + + // Detect if CPUID.19h available + if bit::test(extended_features_ecx as usize, 23) { + let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) }; + enable(ebx, 0, Feature::kl); + enable(ebx, 2, Feature::widekl); + } + + // `XSAVE` and `AVX` support: + let cpu_xsave = bit::test(proc_info_ecx as usize, 26); + if cpu_xsave { + // 0. Here the CPU supports `XSAVE`. + + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and + // supports saving the state of the AVX/AVX2 vector registers on + // context-switches, see: + // + // - [intel: is avx enabled?][is_avx_enabled], + // - [mozilla: sse.cpp][mozilla_sse_cpp]. + // + // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 + let cpu_osxsave = bit::test(proc_info_ecx as usize, 27); + + if cpu_osxsave { + // 2. The OS must have signaled the CPU that it supports saving and + // restoring the: + // + // * SSE -> `XCR0.SSE[1]` + // * AVX -> `XCR0.AVX[2]` + // * AVX-512 -> `XCR0.AVX-512[7:5]`. + // * AMX -> `XCR0.AMX[18:17]` + // + // by setting the corresponding bits of `XCR0` to `1`. + // + // This is safe because the CPU supports `xsave` + // and the OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: + let os_avx_support = xcr0 & 6 == 6; + // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`: + let os_avx512_support = xcr0 & 0xe0 == 0xe0; + // Test `XCR0.AMX[18:17]` with the mask `0b110_0000_0000_0000_0000 == 0x60000` + let os_amx_support = xcr0 & 0x60000 == 0x60000; + + // Only if the OS and the CPU support saving/restoring the AVX + // registers we enable `xsave` support: + if os_avx_support { + // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED + // FEATURES" in the "Intel® 64 and IA-32 Architectures Software + // Developer’s Manual, Volume 1: Basic Architecture": + // + // "Software enables the XSAVE feature set by setting + // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 + // instruction). If this bit is 0, execution of any of XGETBV, + // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV + // causes an invalid-opcode exception (#UD)" + // + enable(proc_info_ecx, 26, Feature::xsave); + + // For `xsaveopt`, `xsavec`, and `xsaves` we need to query: + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, + // ECX = 1): + if max_basic_leaf >= 0xd { + let CpuidResult { + eax: proc_extended_state1_eax, + .. + } = unsafe { __cpuid_count(0xd_u32, 1) }; + enable(proc_extended_state1_eax, 0, Feature::xsaveopt); + enable(proc_extended_state1_eax, 1, Feature::xsavec); + enable(proc_extended_state1_eax, 3, Feature::xsaves); + } + + // FMA (uses 256-bit wide registers): + let fma = enable(proc_info_ecx, 12, Feature::fma); + + // And AVX/AVX2: + enable(proc_info_ecx, 28, Feature::avx); + enable(extended_features_ebx, 5, Feature::avx2); + + // "Short" versions of AVX512 instructions + enable(extended_features_eax_leaf_1, 4, Feature::avxvnni); + enable(extended_features_eax_leaf_1, 23, Feature::avxifma); + enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8); + enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert); + enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16); + + enable(extended_features_eax_leaf_1, 0, Feature::sha512); + enable(extended_features_eax_leaf_1, 1, Feature::sm3); + enable(extended_features_eax_leaf_1, 2, Feature::sm4); + + // For AVX-512 the OS also needs to support saving/restoring + // the extended state, only then we enable AVX-512 support: + // Also, Rust makes `avx512f` imply `fma` and `f16c`, because + // otherwise the assembler is broken. But Intel doesn't guarantee + // that `fma` and `f16c` are available with `avx512f`, so we + // need to check for them separately. + if os_avx512_support && f16c && fma { + enable(extended_features_ebx, 16, Feature::avx512f); + enable(extended_features_ebx, 17, Feature::avx512dq); + enable(extended_features_ebx, 21, Feature::avx512ifma); + enable(extended_features_ebx, 26, Feature::avx512pf); + enable(extended_features_ebx, 27, Feature::avx512er); + enable(extended_features_ebx, 28, Feature::avx512cd); + enable(extended_features_ebx, 30, Feature::avx512bw); + enable(extended_features_ebx, 31, Feature::avx512vl); + enable(extended_features_ecx, 1, Feature::avx512vbmi); + enable(extended_features_ecx, 6, Feature::avx512vbmi2); + enable(extended_features_ecx, 11, Feature::avx512vnni); + enable(extended_features_ecx, 12, Feature::avx512bitalg); + enable(extended_features_ecx, 14, Feature::avx512vpopcntdq); + enable(extended_features_edx, 8, Feature::avx512vp2intersect); + enable(extended_features_edx, 23, Feature::avx512fp16); + enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16); + } + } + + if os_amx_support { + enable(extended_features_edx, 24, Feature::amx_tile); + enable(extended_features_edx, 25, Feature::amx_int8); + enable(extended_features_edx, 22, Feature::amx_bf16); + enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); + enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); + + if max_basic_leaf >= 0x1e { + let CpuidResult { + eax: amx_feature_flags_eax, + .. + } = unsafe { __cpuid_count(0x1e_u32, 1) }; + + enable(amx_feature_flags_eax, 4, Feature::amx_fp8); + enable(amx_feature_flags_eax, 5, Feature::amx_transpose); + enable(amx_feature_flags_eax, 6, Feature::amx_tf32); + enable(amx_feature_flags_eax, 7, Feature::amx_avx512); + enable(amx_feature_flags_eax, 8, Feature::amx_movrs); + } + } + } + } + + // This detects ABM on AMD CPUs and LZCNT on Intel CPUs. + // On intel CPUs with popcnt, lzcnt implements the + // "missing part" of ABM, so we map both to the same + // internal feature. + // + // The `is_x86_feature_detected!("lzcnt")` macro then + // internally maps to Feature::abm. + enable(extended_proc_info_ecx, 5, Feature::lzcnt); + + // As Hygon Dhyana originates from AMD technology and shares most of the architecture with + // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series + // number(Family 18h). + // + // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD + // family 17h. + // + // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf. + // Related Hygon kernel patch can be found on + // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn + if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { + // These features are available on AMD arch CPUs: + enable(extended_proc_info_ecx, 6, Feature::sse4a); + enable(extended_proc_info_ecx, 21, Feature::tbm); + enable(extended_proc_info_ecx, 11, Feature::xop); + } + } + + // Unfortunately, some Skylake chips erroneously report support for BMI1 and + // BMI2 without actual support. These chips don't support AVX, and it seems + // that all Intel chips with non-erroneous support BMI do (I didn't check + // other vendors), so we can disable these flags for chips that don't also + // report support for AVX. + // + // It's possible this will pessimize future chips that do support BMI and + // not AVX, but this seems minor compared to a hard crash you get when + // executing an unsupported instruction (to put it another way, it's safe + // for us to under-report CPU features, but not to over-report them). Still, + // to limit any impact this may have in the future, we only do this for + // Intel chips, as it's a bug only present in their chips. + // + // This bug is documented as `SKL052` in the errata section of this document: + // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf + if vendor_id == *b"GenuineIntel" && !value.test(Feature::avx as u32) { + value.unset(Feature::bmi1 as u32); + value.unset(Feature::bmi2 as u32); + } + + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv new file mode 100644 index 000000000000..ec826afcf381 Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv differ diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv new file mode 100644 index 000000000000..95537b73f206 Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv differ diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv new file mode 100644 index 000000000000..1d87264b2219 Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv differ diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv new file mode 100644 index 000000000000..35f01cc767c5 Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv differ diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv new file mode 100644 index 000000000000..0538e661f63a Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv differ diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv new file mode 100644 index 000000000000..75abc02d1781 Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv differ diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs new file mode 100644 index 000000000000..ab1b77bad5be --- /dev/null +++ b/library/stdarch/crates/std_detect/src/lib.rs @@ -0,0 +1,36 @@ +//! Run-time feature detection for the Rust standard library. +//! +//! To detect whether a feature is enabled in the system running the binary +//! use one of the appropriate macro for the target: +//! +//! * `x86` and `x86_64`: [`is_x86_feature_detected`] +//! * `arm`: [`is_arm_feature_detected`] +//! * `aarch64`: [`is_aarch64_feature_detected`] +//! * `riscv`: [`is_riscv_feature_detected`] +//! * `mips`: [`is_mips_feature_detected`] +//! * `mips64`: [`is_mips64_feature_detected`] +//! * `powerpc`: [`is_powerpc_feature_detected`] +//! * `powerpc64`: [`is_powerpc64_feature_detected`] +//! * `loongarch`: [`is_loongarch_feature_detected`] +//! * `s390x`: [`is_s390x_feature_detected`] + +#![unstable(feature = "stdarch_internal", issue = "none")] +#![feature(staged_api, doc_cfg, allow_internal_unstable)] +#![deny(rust_2018_idioms)] +#![allow(clippy::shadow_reuse)] +#![cfg_attr(test, allow(unused_imports))] +#![no_std] +#![allow(internal_features)] + +#[cfg(test)] +#[macro_use] +extern crate std; + +// rust-lang/rust#83888: removing `extern crate` gives an error that `vec_spare> +#[cfg_attr(feature = "std_detect_file_io", allow(unused_extern_crates))] +#[cfg(feature = "std_detect_file_io")] +extern crate alloc; + +#[doc(hidden)] +#[unstable(feature = "stdarch_internal", issue = "none")] +pub mod detect; diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs new file mode 100644 index 000000000000..7976aedc7585 --- /dev/null +++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs @@ -0,0 +1,354 @@ +#![allow(internal_features)] +#![feature(stdarch_internal)] +#![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))] +#![cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + feature(stdarch_aarch64_feature_detection) +)] +#![cfg_attr( + any(target_arch = "riscv32", target_arch = "riscv64"), + feature(stdarch_riscv_feature_detection) +)] +#![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))] +#![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))] +#![cfg_attr(target_arch = "s390x", feature(stdarch_s390x_feature_detection))] +#![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)] + +#[cfg_attr( + any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ), + macro_use +)] +extern crate std_detect; + +#[test] +fn all() { + for (f, e) in std_detect::detect::features() { + println!("{f}: {e}"); + } +} + +#[test] +#[cfg(all(target_arch = "arm", target_os = "freebsd"))] +fn arm_freebsd() { + println!("neon: {}", is_arm_feature_detected!("neon")); + println!("pmull: {}", is_arm_feature_detected!("pmull")); + println!("crc: {}", is_arm_feature_detected!("crc")); + println!("aes: {}", is_arm_feature_detected!("aes")); + println!("sha2: {}", is_arm_feature_detected!("sha2")); +} + +#[test] +#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))] +fn arm_linux() { + println!("neon: {}", is_arm_feature_detected!("neon")); + println!("pmull: {}", is_arm_feature_detected!("pmull")); + println!("crc: {}", is_arm_feature_detected!("crc")); + println!("aes: {}", is_arm_feature_detected!("aes")); + println!("sha2: {}", is_arm_feature_detected!("sha2")); + println!("dotprod: {}", is_arm_feature_detected!("dotprod")); + println!("i8mm: {}", is_arm_feature_detected!("i8mm")); +} + +#[test] +#[cfg(all( + target_arch = "aarch64", + any(target_os = "linux", target_os = "android") +))] +fn aarch64_linux() { + println!("asimd: {}", is_aarch64_feature_detected!("asimd")); + println!("neon: {}", is_aarch64_feature_detected!("neon")); + println!("pmull: {}", is_aarch64_feature_detected!("pmull")); + println!("fp: {}", is_aarch64_feature_detected!("fp")); + println!("fp16: {}", is_aarch64_feature_detected!("fp16")); + println!("sve: {}", is_aarch64_feature_detected!("sve")); + println!("crc: {}", is_aarch64_feature_detected!("crc")); + println!("lse: {}", is_aarch64_feature_detected!("lse")); + println!("lse2: {}", is_aarch64_feature_detected!("lse2")); + println!("lse128: {}", is_aarch64_feature_detected!("lse128")); + println!("rdm: {}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {}", is_aarch64_feature_detected!("rcpc")); + println!("rcpc2: {}", is_aarch64_feature_detected!("rcpc2")); + println!("rcpc3: {}", is_aarch64_feature_detected!("rcpc3")); + println!("dotprod: {}", is_aarch64_feature_detected!("dotprod")); + println!("tme: {}", is_aarch64_feature_detected!("tme")); + println!("fhm: {}", is_aarch64_feature_detected!("fhm")); + println!("dit: {}", is_aarch64_feature_detected!("dit")); + println!("flagm: {}", is_aarch64_feature_detected!("flagm")); + println!("flagm2: {}", is_aarch64_feature_detected!("flagm2")); + println!("ssbs: {}", is_aarch64_feature_detected!("ssbs")); + println!("sb: {}", is_aarch64_feature_detected!("sb")); + println!("paca: {}", is_aarch64_feature_detected!("paca")); + println!("pacg: {}", is_aarch64_feature_detected!("pacg")); + // println!("pauth-lr: {}", is_aarch64_feature_detected!("pauth-lr")); + println!("dpb: {}", is_aarch64_feature_detected!("dpb")); + println!("dpb2: {}", is_aarch64_feature_detected!("dpb2")); + println!("sve-b16b16: {}", is_aarch64_feature_detected!("sve-b16b16")); + println!("sve2: {}", is_aarch64_feature_detected!("sve2")); + println!("sve2p1: {}", is_aarch64_feature_detected!("sve2p1")); + println!("sve2-aes: {}", is_aarch64_feature_detected!("sve2-aes")); + println!("sve2-sm4: {}", is_aarch64_feature_detected!("sve2-sm4")); + println!("sve2-sha3: {}", is_aarch64_feature_detected!("sve2-sha3")); + println!( + "sve2-bitperm: {}", + is_aarch64_feature_detected!("sve2-bitperm") + ); + println!("frintts: {}", is_aarch64_feature_detected!("frintts")); + println!("i8mm: {}", is_aarch64_feature_detected!("i8mm")); + println!("f32mm: {}", is_aarch64_feature_detected!("f32mm")); + println!("f64mm: {}", is_aarch64_feature_detected!("f64mm")); + println!("bf16: {}", is_aarch64_feature_detected!("bf16")); + println!("rand: {}", is_aarch64_feature_detected!("rand")); + println!("bti: {}", is_aarch64_feature_detected!("bti")); + println!("mte: {}", is_aarch64_feature_detected!("mte")); + println!("jsconv: {}", is_aarch64_feature_detected!("jsconv")); + println!("fcma: {}", is_aarch64_feature_detected!("fcma")); + println!("aes: {}", is_aarch64_feature_detected!("aes")); + println!("sha2: {}", is_aarch64_feature_detected!("sha2")); + println!("sha3: {}", is_aarch64_feature_detected!("sha3")); + println!("sm4: {}", is_aarch64_feature_detected!("sm4")); + println!("hbc: {}", is_aarch64_feature_detected!("hbc")); + println!("mops: {}", is_aarch64_feature_detected!("mops")); + println!("ecv: {}", is_aarch64_feature_detected!("ecv")); + println!("cssc: {}", is_aarch64_feature_detected!("cssc")); + println!("fpmr: {}", is_aarch64_feature_detected!("fpmr")); + println!("lut: {}", is_aarch64_feature_detected!("lut")); + println!("faminmax: {}", is_aarch64_feature_detected!("faminmax")); + println!("fp8: {}", is_aarch64_feature_detected!("fp8")); + println!("fp8fma: {}", is_aarch64_feature_detected!("fp8fma")); + println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4")); + println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2")); + println!("wfxt: {}", is_aarch64_feature_detected!("wfxt")); + println!("sme: {}", is_aarch64_feature_detected!("sme")); + println!("sme-b16b16: {}", is_aarch64_feature_detected!("sme-b16b16")); + println!("sme-i16i64: {}", is_aarch64_feature_detected!("sme-i16i64")); + println!("sme-f64f64: {}", is_aarch64_feature_detected!("sme-f64f64")); + println!("sme-fa64: {}", is_aarch64_feature_detected!("sme-fa64")); + println!("sme2: {}", is_aarch64_feature_detected!("sme2")); + println!("sme2p1: {}", is_aarch64_feature_detected!("sme2p1")); + println!("sme-f16f16: {}", is_aarch64_feature_detected!("sme-f16f16")); + println!("sme-lutv2: {}", is_aarch64_feature_detected!("sme-lutv2")); + println!("sme-f8f16: {}", is_aarch64_feature_detected!("sme-f8f16")); + println!("sme-f8f32: {}", is_aarch64_feature_detected!("sme-f8f32")); + println!( + "ssve-fp8fma: {}", + is_aarch64_feature_detected!("ssve-fp8fma") + ); + println!( + "ssve-fp8dot4: {}", + is_aarch64_feature_detected!("ssve-fp8dot4") + ); + println!( + "ssve-fp8dot2: {}", + is_aarch64_feature_detected!("ssve-fp8dot2") + ); +} + +#[test] +#[cfg(all( + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_os = "windows" +))] +fn aarch64_windows() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); +} + +#[test] +#[cfg(all( + target_arch = "aarch64", + any(target_os = "freebsd", target_os = "openbsd") +))] +fn aarch64_bsd() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); + println!("sve: {:?}", is_aarch64_feature_detected!("sve")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("lse2: {:?}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("tme: {:?}", is_aarch64_feature_detected!("tme")); + println!("paca: {:?}", is_aarch64_feature_detected!("paca")); + println!("pacg: {:?}", is_aarch64_feature_detected!("pacg")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); +} + +#[test] +#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))] +fn aarch64_darwin() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("lse2: {:?}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("rcpc2: {:?}", is_aarch64_feature_detected!("rcpc2")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("fhm: {:?}", is_aarch64_feature_detected!("fhm")); + println!("flagm: {:?}", is_aarch64_feature_detected!("flagm")); + println!("ssbs: {:?}", is_aarch64_feature_detected!("ssbs")); + println!("sb: {:?}", is_aarch64_feature_detected!("sb")); + println!("paca: {:?}", is_aarch64_feature_detected!("paca")); + println!("dpb: {:?}", is_aarch64_feature_detected!("dpb")); + println!("dpb2: {:?}", is_aarch64_feature_detected!("dpb2")); + println!("frintts: {:?}", is_aarch64_feature_detected!("frintts")); + println!("i8mm: {:?}", is_aarch64_feature_detected!("i8mm")); + println!("bf16: {:?}", is_aarch64_feature_detected!("bf16")); + println!("bti: {:?}", is_aarch64_feature_detected!("bti")); + println!("fcma: {:?}", is_aarch64_feature_detected!("fcma")); + println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); + println!("sha3: {:?}", is_aarch64_feature_detected!("sha3")); +} + +#[test] +#[cfg(all( + any(target_arch = "riscv32", target_arch = "riscv64"), + any(target_os = "linux", target_os = "android") +))] +fn riscv_linux() { + println!("rv32i: {}", is_riscv_feature_detected!("rv32i")); + println!("rv32e: {}", is_riscv_feature_detected!("rv32e")); + println!("rv64i: {}", is_riscv_feature_detected!("rv64i")); + println!("rv128i: {}", is_riscv_feature_detected!("rv128i")); + println!( + "unaligned-scalar-mem: {}", + is_riscv_feature_detected!("unaligned-scalar-mem") + ); + println!( + "unaligned-vector-mem: {}", + is_riscv_feature_detected!("unaligned-vector-mem") + ); + println!("zicsr: {}", is_riscv_feature_detected!("zicsr")); + println!("zicntr: {}", is_riscv_feature_detected!("zicntr")); + println!("zihpm: {}", is_riscv_feature_detected!("zihpm")); + println!("zifencei: {}", is_riscv_feature_detected!("zifencei")); + println!("zihintntl: {}", is_riscv_feature_detected!("zihintntl")); + println!("zihintpause: {}", is_riscv_feature_detected!("zihintpause")); + println!("zimop: {}", is_riscv_feature_detected!("zimop")); + println!("zicbom: {}", is_riscv_feature_detected!("zicbom")); + println!("zicboz: {}", is_riscv_feature_detected!("zicboz")); + println!("zicond: {}", is_riscv_feature_detected!("zicond")); + println!("m: {}", is_riscv_feature_detected!("m")); + println!("a: {}", is_riscv_feature_detected!("a")); + println!("zalrsc: {}", is_riscv_feature_detected!("zalrsc")); + println!("zaamo: {}", is_riscv_feature_detected!("zaamo")); + println!("zawrs: {}", is_riscv_feature_detected!("zawrs")); + println!("zacas: {}", is_riscv_feature_detected!("zacas")); + println!("zam: {}", is_riscv_feature_detected!("zam")); + println!("ztso: {}", is_riscv_feature_detected!("ztso")); + println!("f: {}", is_riscv_feature_detected!("f")); + println!("d: {}", is_riscv_feature_detected!("d")); + println!("q: {}", is_riscv_feature_detected!("q")); + println!("zfh: {}", is_riscv_feature_detected!("zfh")); + println!("zfhmin: {}", is_riscv_feature_detected!("zfhmin")); + println!("zfa: {}", is_riscv_feature_detected!("zfa")); + println!("zfbfmin: {}", is_riscv_feature_detected!("zfbfmin")); + println!("zfinx: {}", is_riscv_feature_detected!("zfinx")); + println!("zdinx: {}", is_riscv_feature_detected!("zdinx")); + println!("zhinx: {}", is_riscv_feature_detected!("zhinx")); + println!("zhinxmin: {}", is_riscv_feature_detected!("zhinxmin")); + println!("c: {}", is_riscv_feature_detected!("c")); + println!("zca: {}", is_riscv_feature_detected!("zca")); + println!("zcf: {}", is_riscv_feature_detected!("zcf")); + println!("zcd: {}", is_riscv_feature_detected!("zcd")); + println!("zcb: {}", is_riscv_feature_detected!("zcb")); + println!("zcmop: {}", is_riscv_feature_detected!("zcmop")); + println!("b: {}", is_riscv_feature_detected!("b")); + println!("zba: {}", is_riscv_feature_detected!("zba")); + println!("zbb: {}", is_riscv_feature_detected!("zbb")); + println!("zbc: {}", is_riscv_feature_detected!("zbc")); + println!("zbs: {}", is_riscv_feature_detected!("zbs")); + println!("zbkb: {}", is_riscv_feature_detected!("zbkb")); + println!("zbkc: {}", is_riscv_feature_detected!("zbkc")); + println!("zbkx: {}", is_riscv_feature_detected!("zbkx")); + println!("zknd: {}", is_riscv_feature_detected!("zknd")); + println!("zkne: {}", is_riscv_feature_detected!("zkne")); + println!("zknh: {}", is_riscv_feature_detected!("zknh")); + println!("zksed: {}", is_riscv_feature_detected!("zksed")); + println!("zksh: {}", is_riscv_feature_detected!("zksh")); + println!("zkr: {}", is_riscv_feature_detected!("zkr")); + println!("zkn: {}", is_riscv_feature_detected!("zkn")); + println!("zks: {}", is_riscv_feature_detected!("zks")); + println!("zk: {}", is_riscv_feature_detected!("zk")); + println!("zkt: {}", is_riscv_feature_detected!("zkt")); + println!("v: {}", is_riscv_feature_detected!("v")); + println!("zve32x: {}", is_riscv_feature_detected!("zve32x")); + println!("zve32f: {}", is_riscv_feature_detected!("zve32f")); + println!("zve64x: {}", is_riscv_feature_detected!("zve64x")); + println!("zve64f: {}", is_riscv_feature_detected!("zve64f")); + println!("zve64d: {}", is_riscv_feature_detected!("zve64d")); + println!("zvfh: {}", is_riscv_feature_detected!("zvfh")); + println!("zvfhmin: {}", is_riscv_feature_detected!("zvfhmin")); + println!("zvfbfmin: {}", is_riscv_feature_detected!("zvfbfmin")); + println!("zvfbfwma: {}", is_riscv_feature_detected!("zvfbfwma")); + println!("zvbb: {}", is_riscv_feature_detected!("zvbb")); + println!("zvbc: {}", is_riscv_feature_detected!("zvbc")); + println!("zvkb: {}", is_riscv_feature_detected!("zvkb")); + println!("zvkg: {}", is_riscv_feature_detected!("zvkg")); + println!("zvkned: {}", is_riscv_feature_detected!("zvkned")); + println!("zvknha: {}", is_riscv_feature_detected!("zvknha")); + println!("zvknhb: {}", is_riscv_feature_detected!("zvknhb")); + println!("zvksed: {}", is_riscv_feature_detected!("zvksed")); + println!("zvksh: {}", is_riscv_feature_detected!("zvksh")); + println!("zvkn: {}", is_riscv_feature_detected!("zvkn")); + println!("zvknc: {}", is_riscv_feature_detected!("zvknc")); + println!("zvkng: {}", is_riscv_feature_detected!("zvkng")); + println!("zvks: {}", is_riscv_feature_detected!("zvks")); + println!("zvksc: {}", is_riscv_feature_detected!("zvksc")); + println!("zvksg: {}", is_riscv_feature_detected!("zvksg")); + println!("zvkt: {}", is_riscv_feature_detected!("zvkt")); + println!("j: {}", is_riscv_feature_detected!("j")); + println!("p: {}", is_riscv_feature_detected!("p")); +} + +#[test] +#[cfg(all(target_arch = "powerpc", target_os = "linux"))] +fn powerpc_linux() { + println!("altivec: {}", is_powerpc_feature_detected!("altivec")); + println!("vsx: {}", is_powerpc_feature_detected!("vsx")); + println!("power8: {}", is_powerpc_feature_detected!("power8")); +} + +#[test] +#[cfg(all( + target_arch = "powerpc64", + any(target_os = "linux", target_os = "freebsd"), +))] +fn powerpc64_linux_or_freebsd() { + println!("altivec: {}", is_powerpc64_feature_detected!("altivec")); + println!("vsx: {}", is_powerpc64_feature_detected!("vsx")); + println!("power8: {}", is_powerpc64_feature_detected!("power8")); + println!("power9: {}", is_powerpc64_feature_detected!("power9")); +} + +#[test] +#[cfg(all(target_arch = "s390x", target_os = "linux",))] +fn s390x_linux() { + println!("vector: {}", is_s390x_feature_detected!("vector")); +} diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs new file mode 100644 index 000000000000..fa3a23c79681 --- /dev/null +++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs @@ -0,0 +1,107 @@ +#![allow(internal_features)] +#![cfg_attr( + any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "loongarch64" + ), + feature(stdarch_internal) +)] +#![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))] +#![cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + feature(stdarch_aarch64_feature_detection) +)] +#![cfg_attr( + any(target_arch = "powerpc", target_arch = "powerpc64"), + feature(stdarch_powerpc_feature_detection) +)] +#![cfg_attr(target_arch = "s390x", feature(stdarch_s390x_feature_detection))] +#![cfg_attr( + any(target_arch = "riscv32", target_arch = "riscv64"), + feature(stdarch_riscv_feature_detection) +)] +#![cfg_attr( + target_arch = "loongarch64", + feature(stdarch_loongarch_feature_detection) +)] + +#[cfg(any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "loongarch64" +))] +#[macro_use] +extern crate std_detect; + +#[test] +#[cfg(target_arch = "arm")] +fn arm() { + let _ = is_arm_feature_detected!("neon"); + let _ = is_arm_feature_detected!("neon",); +} + +#[test] +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +fn aarch64() { + let _ = is_aarch64_feature_detected!("fp"); + let _ = is_aarch64_feature_detected!("fp",); +} + +#[test] +#[cfg(target_arch = "loongarch64")] +fn loongarch64() { + let _ = is_loongarch_feature_detected!("lsx"); + let _ = is_loongarch_feature_detected!("lsx",); +} + +#[test] +#[cfg(target_arch = "powerpc")] +fn powerpc() { + let _ = is_powerpc_feature_detected!("altivec"); + let _ = is_powerpc_feature_detected!("altivec",); +} + +#[test] +#[cfg(target_arch = "powerpc64")] +fn powerpc64() { + let _ = is_powerpc64_feature_detected!("altivec"); + let _ = is_powerpc64_feature_detected!("altivec",); +} + +#[test] +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +fn riscv() { + let _ = is_riscv_feature_detected!("zk"); + let _ = is_riscv_feature_detected!("zk",); +} + +#[test] +#[cfg(target_arch = "s390x")] +fn s390x() { + let _ = is_s390x_feature_detected!("vector"); + let _ = is_s390x_feature_detected!("vector",); +} + +#[test] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn x86() { + let _ = is_x86_feature_detected!("sse"); + let _ = is_x86_feature_detected!("sse",); +} diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs new file mode 100644 index 000000000000..d9ec79821baf --- /dev/null +++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs @@ -0,0 +1,117 @@ +#![cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#![allow(internal_features)] +#![feature( + stdarch_internal, + x86_amx_intrinsics, + xop_target_feature, + movrs_target_feature +)] + +#[macro_use] +extern crate std_detect; + +#[test] +fn dump() { + println!("aes: {:?}", is_x86_feature_detected!("aes")); + println!("pclmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq")); + println!("rdrand: {:?}", is_x86_feature_detected!("rdrand")); + println!("rdseed: {:?}", is_x86_feature_detected!("rdseed")); + println!("tsc: {:?}", is_x86_feature_detected!("tsc")); + println!("sse: {:?}", is_x86_feature_detected!("sse")); + println!("sse2: {:?}", is_x86_feature_detected!("sse2")); + println!("sse3: {:?}", is_x86_feature_detected!("sse3")); + println!("ssse3: {:?}", is_x86_feature_detected!("ssse3")); + println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1")); + println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2")); + println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); + println!("sha: {:?}", is_x86_feature_detected!("sha")); + println!("f16c: {:?}", is_x86_feature_detected!("f16c")); + println!("avx: {:?}", is_x86_feature_detected!("avx")); + println!("avx2: {:?}", is_x86_feature_detected!("avx2")); + println!("sha512: {:?}", is_x86_feature_detected!("sha512")); + println!("sm3: {:?}", is_x86_feature_detected!("sm3")); + println!("sm4: {:?}", is_x86_feature_detected!("sm4")); + println!("avx512f: {:?}", is_x86_feature_detected!("avx512f")); + println!("avx512cd: {:?}", is_x86_feature_detected!("avx512cd")); + println!("avx512er: {:?}", is_x86_feature_detected!("avx512er")); + println!("avx512pf: {:?}", is_x86_feature_detected!("avx512pf")); + println!("avx512bw: {:?}", is_x86_feature_detected!("avx512bw")); + println!("avx512dq: {:?}", is_x86_feature_detected!("avx512dq")); + println!("avx512vl: {:?}", is_x86_feature_detected!("avx512vl")); + println!("avx512_ifma: {:?}", is_x86_feature_detected!("avx512ifma")); + println!("avx512vbmi {:?}", is_x86_feature_detected!("avx512vbmi")); + println!( + "avx512_vpopcntdq: {:?}", + is_x86_feature_detected!("avx512vpopcntdq") + ); + println!("avx512vbmi2: {:?}", is_x86_feature_detected!("avx512vbmi2")); + println!("gfni: {:?}", is_x86_feature_detected!("gfni")); + println!("vaes: {:?}", is_x86_feature_detected!("vaes")); + println!("vpclmulqdq: {:?}", is_x86_feature_detected!("vpclmulqdq")); + println!("avx512vnni: {:?}", is_x86_feature_detected!("avx512vnni")); + println!( + "avx512bitalg: {:?}", + is_x86_feature_detected!("avx512bitalg") + ); + println!("avx512bf16: {:?}", is_x86_feature_detected!("avx512bf16")); + println!( + "avx512vp2intersect: {:?}", + is_x86_feature_detected!("avx512vp2intersect") + ); + println!("avx512fp16: {:?}", is_x86_feature_detected!("avx512fp16")); + println!("fma: {:?}", is_x86_feature_detected!("fma")); + println!("abm: {:?}", is_x86_feature_detected!("abm")); + println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); + println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); + println!("tbm: {:?}", is_x86_feature_detected!("tbm")); + println!("popcnt: {:?}", is_x86_feature_detected!("popcnt")); + println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); + println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); + println!("xsave: {:?}", is_x86_feature_detected!("xsave")); + println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt")); + println!("xsaves: {:?}", is_x86_feature_detected!("xsaves")); + println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); + println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b")); + println!("adx: {:?}", is_x86_feature_detected!("adx")); + println!("rtm: {:?}", is_x86_feature_detected!("rtm")); + println!("movbe: {:?}", is_x86_feature_detected!("movbe")); + println!("avxvnni: {:?}", is_x86_feature_detected!("avxvnni")); + println!("avxvnniint8: {:?}", is_x86_feature_detected!("avxvnniint8")); + println!( + "avxneconvert: {:?}", + is_x86_feature_detected!("avxneconvert") + ); + println!("avxifma: {:?}", is_x86_feature_detected!("avxifma")); + println!( + "avxvnniint16: {:?}", + is_x86_feature_detected!("avxvnniint16") + ); + println!("amx-bf16: {:?}", is_x86_feature_detected!("amx-bf16")); + println!("amx-tile: {:?}", is_x86_feature_detected!("amx-tile")); + println!("amx-int8: {:?}", is_x86_feature_detected!("amx-int8")); + println!("amx-fp16: {:?}", is_x86_feature_detected!("amx-fp16")); + println!("amx-complex: {:?}", is_x86_feature_detected!("amx-complex")); + println!("xop: {:?}", is_x86_feature_detected!("xop")); + println!("kl: {:?}", is_x86_feature_detected!("kl")); + println!("widekl: {:?}", is_x86_feature_detected!("widekl")); + println!("movrs: {:?}", is_x86_feature_detected!("movrs")); + println!("amx-fp8: {:?}", is_x86_feature_detected!("amx-fp8")); + println!( + "amx-transpose: {:?}", + is_x86_feature_detected!("amx-transpose") + ); + println!("amx-tf32: {:?}", is_x86_feature_detected!("amx-tf32")); + println!("amx-avx512: {:?}", is_x86_feature_detected!("amx-avx512")); + println!("amx-movrs: {:?}", is_x86_feature_detected!("amx-movrs")); +} + +#[test] +#[allow(deprecated)] +fn x86_deprecated() { + println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni")); + println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes")); + println!( + "avx512vpclmulqdq {:?}", + is_x86_feature_detected!("avx512vpclmulqdq") + ); +} diff --git a/library/stdarch/crates/stdarch-gen-arm/Cargo.toml b/library/stdarch/crates/stdarch-gen-arm/Cargo.toml new file mode 100644 index 000000000000..899296d25ea7 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "stdarch-gen-arm" +version = "0.1.0" +authors = ["Luca Vizzarro ", + "Jamie Cunliffe ", + "Adam Gemmell ", + "James Barford-Evans "] +license = "MIT OR Apache-2.0" +edition = "2024" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +itertools = "0.14.0" +lazy_static = "1.4.0" +proc-macro2 = "1.0" +quote = "1.0" +regex = "1.5" +serde = { version = "1.0", features = ["derive"] } +serde_with = "1.14" +serde_yaml = "0.8" +walkdir = "2.3.2" diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml new file mode 100644 index 000000000000..f658267b9a19 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -0,0 +1,14198 @@ +arch_cfgs: + - arch_name: aarch64 + target_feature: [neon] + llvm_prefix: llvm.aarch64.neon +# Generate big endian shuffles +auto_big_endian: true + +# We do not want to automatically generate signed/unsigned casts +auto_llvm_sign_conversion: false + +# Repeatedly used anchors +# #[stable(feature = "neon_intrinsics", since = "1.59.0")] +neon-stable: &neon-stable + FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + +# #[cfg(not(target_arch = "arm"))] +target-not-arm: &target-not-arm + FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm"']]}]] + +# #[cfg_attr(all(test, not(target_env = "msvc"))] +msvc-disabled: &msvc-disabled + FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]] + +# all(test, target_arch = "arm") +test-is-arm: &test-is-arm + FnCall: [all, [test, 'target_arch = "arm"']] + +# #[target_feature(enable = "neon,aes")] +neon-aes: &neon-aes + FnCall: [target_feature, ['enable = "neon,aes"']] + +# #[target_feature(enable = "neon,i8mm")] +neon-i8mm: &neon-i8mm + FnCall: [target_feature, ['enable = "neon,i8mm"']] + +# #[target_feature(enable = "neon,fp16")] +neon-fp16: &neon-fp16 + FnCall: [target_feature, ['enable = "neon,fp16"']] + +# #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +enable-fhm: &enable-fhm + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fhm"']] }]] + +enable-fcma: &enable-fcma + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fcma"']] }]] + +# #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +neon-unstable-fcma: &neon-unstable-fcma + FnCall: [unstable, ['feature = "stdarch_neon_fcma"', 'issue = "117222"']] + +aarch64-crc-stable: &aarch64-crc-stable + FnCall: [stable, ['feature = "stdarch_aarch64_crc32"', 'since = "1.80.0"']] + +# #[unstable(feature = "stdarch_neon_f16", issue = "136306")] +neon-unstable-f16: &neon-unstable-f16 + FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']] + +# #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +neon-unstable-feat-lut: &neon-unstable-feat-lut + FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']] + +# #[cfg(target_endian = "little")] +little-endian: &little-endian + FnCall: [cfg, ['target_endian = "little"']] + +# #[cfg(target_endian = "big")] +big-endian: &big-endian + FnCall: [cfg, ['target_endian = "big"']] + +intrinsics: + - name: "vaddd_{type}" + doc: Add + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: [*neon-stable] + assert_instr: [nop] + safety: safe + types: + - i64 + - u64 + compose: + - MethodCall: + - a + - wrapping_add + - - b + + - name: "veor3{neon_type.no}" + doc: Three-way exclusive OR + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + assert_instr: [eor3] + safety: safe + types: + - int8x16_t + - int16x8_t + - int32x4_t + - int64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.crypto.eor3s.{neon_type}" + links: + - link: "llvm.aarch64.crypto.eor3s.{neon_type}" + arch: aarch64,arm64ec + + - name: "veor3{neon_type.no}" + doc: Three-way exclusive OR + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + assert_instr: [eor3] + safety: safe + types: + - uint8x16_t + - uint16x8_t + - uint32x4_t + - uint64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.crypto.eor3u.{neon_type}" + links: + - link: "llvm.aarch64.crypto.eor3u.{neon_type}" + arch: aarch64,arm64ec + + - name: "vabd{neon_type.no}" + doc: Absolute difference between the arguments of Floating + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [fabd] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "fabd.{neon_type}" + links: + - link: "llvm.aarch64.neon.fabd.{neon_type}" + arch: aarch64,arm64ec + + - name: "vabd{type[0]}" + doc: "Floating-point absolute difference" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [fabd] + safety: safe + types: + - ['s_f32', 'f32'] + - ['d_f64', 'f64'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vabd_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - 0 + + - name: "vabd{type[0]}" + doc: "Floating-point absolute difference" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fabd] + safety: safe + types: + - ['h_f16', 'f16'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vabd_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - 0 + + - name: "vabdl_high{neon_type[0].noq}" + doc: Signed Absolute difference Long + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [sabdl] + safety: safe + types: + - [int8x16_t, int16x8_t, int8x8_t, uint8x8_t] + compose: + - Let: + - c + - "{neon_type[2]}" + - FnCall: + - simd_shuffle! + - - a + - a + - [8, 9, 10, 11, 12, 13, 14, 15] + - Let: + - d + - "{neon_type[2]}" + - FnCall: + - simd_shuffle! + - - b + - b + - [8, 9, 10, 11, 12, 13, 14, 15] + - Let: + - e + - "{neon_type[3]}" + - FnCall: + - simd_cast + - - FnCall: + - "vabd_{neon_type[0]}" + - - c + - d + - FnCall: + - simd_cast + - - e + + - name: "vabdl_high{neon_type[0].noq}" + doc: Signed Absolute difference Long + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: + - stable + - - 'feature = "neon_intrinsics"' + - 'since = "1.59.0"' + assert_instr: [sabdl] + safety: safe + types: + - [int16x8_t, int32x4_t, int16x4_t, uint16x4_t] + compose: + - Let: + - c + - "{neon_type[2]}" + - FnCall: + - simd_shuffle! + - - a + - a + - [4, 5, 6, 7] + - Let: + - d + - "{neon_type[2]}" + - FnCall: + - simd_shuffle! + - - b + - b + - [4, 5, 6, 7] + - Let: + - e + - "{neon_type[3]}" + - FnCall: + - simd_cast + - - FnCall: + - "vabd_{neon_type[0]}" + - - c + - d + - FnCall: + - simd_cast + - - e + + - name: "vabdl_high{neon_type[0].noq}" + doc: Signed Absolute difference Long + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: + - stable + - - 'feature = "neon_intrinsics"' + - 'since = "1.59.0"' + assert_instr: [sabdl] + safety: safe + types: + - [int32x4_t, int64x2_t, int32x2_t, uint32x2_t] + compose: + - Let: + - c + - "{neon_type[2]}" + - FnCall: + - simd_shuffle! + - - a + - a + - [2, 3] + - Let: + - d + - "{neon_type[2]}" + - FnCall: + - simd_shuffle! + - - b + - b + - [2, 3] + - Let: + - e + - "{neon_type[3]}" + - FnCall: + - simd_cast + - - FnCall: + - "vabd_{neon_type[0]}" + - - c + - d + - FnCall: + - simd_cast + - - e + + - name: "vceq{neon_type[0].no}" + doc: "Compare bitwise Equal (vector)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint64x1_t, uint64x1_t] + - [uint64x2_t, uint64x2_t] + - [int64x1_t, uint64x1_t] + - [int64x2_t, uint64x2_t] + - [poly64x1_t, uint64x1_t] + - [poly64x2_t, uint64x2_t] + compose: + - FnCall: [simd_eq, [a, b]] + + - name: "vceq{neon_type[0].no}" + doc: "Floating-point compare equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - FnCall: [simd_eq, [a, b]] + + - name: "vceq{type[0]}" + doc: "Floating-point compare equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vceq_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + + + - name: "vceq{type[0]}" + doc: "Floating-point compare equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vceq_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + + - name: "vceqd_{type[0]}" + doc: "Compare bitwise equal" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "u64", "s64"] + - ["u64", "u64", "u64"] + compose: + - FnCall: + - transmute + - - FnCall: + - "vceq_{type[2]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vtst{neon_type[0].no}" + doc: "Signed compare bitwise Test bits nonzero" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)'] + - [int64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)'] + - [poly64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)'] + - [poly64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)'] + compose: + - Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}] + - Let: [d, "{type[2]}", "{type[3]}"] + - FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]] + + - name: "vtstd_{type[0]}" + doc: "Compare bitwise test bits nonzero" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tst]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "u64", "s64"] + - ["u64", "u64", "u64"] + compose: + - FnCall: + - transmute + - - FnCall: + - "vtst_{type[2]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vuqadd{type[0]}" + doc: "Signed saturating accumulate of unsigned value" + arguments: ["a: {type[1]}", "b: {type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_s32", "i32", "u32"] + - ["d_s64", "i64", "u64"] + compose: + - LLVMLink: + name: "vuqadd{type[0]}" + links: + - link: "llvm.aarch64.neon.suqadd.{type[1]}" + arch: aarch64,arm64ec + + - name: "vuqadd{type[0]}" + doc: "Signed saturating accumulate of unsigned value" + arguments: ["a: {type[1]}", "b: {type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["b_s8", "i8", "u8", "s8"] + - ["h_s16", "i16", "u16", "s16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vuqadd_{type[3]}" + - - FnCall: ["vdup_n_{type[3]}", [a]] + - FnCall: ["vdup_n_{type[2]}", [b]] + - '0' + + - name: "vabs{neon_type.no}" + doc: "Floating-point absolute value" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fabs]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_fabs, [a]] + + - name: "vcgt{neon_type[0].no}" + doc: "Compare signed greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int64x1_t, uint64x1_t] + - [int64x2_t, uint64x2_t] + compose: + - FnCall: [simd_gt, [a, b]] + + - name: "vcgt{neon_type.no}" + doc: "Compare unsigned greater than" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - uint64x1_t + - uint64x2_t + compose: + - FnCall: [simd_gt, [a, b]] + + - name: "vcgt{neon_type[0].no}" + doc: "Floating-point compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - FnCall: [simd_gt, [a, b]] + + - name: "vcgt{type[0]}" + doc: "Floating-point compare greater than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: + - 'simd_extract!' + - - FnCall: + - "vcgt_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + + + - name: "vcgt{type[0]}" + doc: "Floating-point compare greater than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - 'simd_extract!' + - - FnCall: + - "vcgt_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + + - name: "vclt{neon_type[0].no}" + doc: "Compare signed less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int64x1_t, uint64x1_t] + - [int64x2_t, uint64x2_t] + compose: + - FnCall: [simd_lt, [a, b]] + + - name: "vcle{neon_type[0].no}" + doc: "Compare signed less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int64x1_t, uint64x1_t] + - [int64x2_t, uint64x2_t] + compose: + - FnCall: [simd_le, [a, b]] + + - name: "vcle{neon_type[0].no}" + doc: "Floating-point compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - FnCall: [simd_le, [a, b]] + + - name: "vcle{type[0]}" + doc: "Floating-point compare less than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcle_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + + + - name: "vcle{type[0]}" + doc: "Floating-point compare less than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcle_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + + - name: "vcge{neon_type[0].no}" + doc: "Compare signed greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int64x1_t, uint64x1_t] + - [int64x2_t, uint64x2_t] + compose: + - FnCall: [simd_ge, [a, b]] + + - name: "vcgez{neon_type[0].no}" + doc: "Compare signed greater than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)'] + - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)'] + - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)'] + - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)'] + - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: [simd_ge, [a, {FnCall: [transmute, [b]]}]] + + - name: "vcgezd_s64" + doc: "Compare signed greater than or equal to zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "u64"] + compose: + - FnCall: + - transmute + - - FnCall: + - vcgez_s64 + - - FnCall: [transmute, [a]] + + - name: "vclez{neon_type[0].no}" + doc: "Compare signed less than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmle]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)'] + - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)'] + - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)'] + - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)'] + - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_le + - - a + - FnCall: [transmute, [b]] + + - name: "vclez{neon_type[0].no}" + doc: "Floating-point compare less than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmle]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)'] + - [float32x4_t, uint32x4_t, f32x4, 'f32x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float64x1_t, uint64x1_t, f64, '0.0'] + - [float64x2_t, uint64x2_t, f64x2, 'f64x2::new(0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_le + - - a + - FnCall: [transmute, [b]] + + - name: "vclez{type[0]}" + doc: "Floating-point compare less than or equal to zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vclez_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + + - name: "vclez{type[0]}" + doc: "Floating-point compare less than or equal to zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vclez_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + + - name: "vcltz{neon_type[0].no}" + doc: "Compare signed less than zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmlt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)'] + - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)'] + - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)'] + - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)'] + - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_lt + - - a + - FnCall: [transmute, [b]] + + - name: "vcltz{neon_type[0].no}" + doc: "Floating-point compare less than zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmlt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)'] + - [float32x4_t, uint32x4_t, f32x4, 'f32x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float64x1_t, uint64x1_t, f64, '0.0'] + - [float64x2_t, uint64x2_t, f64x2, 'f64x2::new(0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_lt + - - a + - FnCall: [transmute, [b]] + + - name: "vcltz{type[0]}" + doc: "Floating-point compare less than zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcltz_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + + - name: "vcltz{type[0]}" + doc: "Floating-point compare less than zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcltz_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + + - name: "vcltzd_s64" + doc: "Compare less than zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [asr]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "u64"] + compose: + - FnCall: + - transmute + - - FnCall: + - vcltz_s64 + - - FnCall: [transmute, [a]] + + - name: "vcagt{neon_type[0].no}" + doc: "Floating-point absolute compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "vcagt{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.facgt.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcagt{type[0]}" + doc: "Floating-point absolute compare greater than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32", i32] + - ["d_f64", "f64", "u64", i64] + compose: + - LLVMLink: + name: "vcagt{type[0]}" + links: + - link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}" + arch: aarch64,arm64ec + + - name: "vcagt{type[0]}" + doc: "Floating-point absolute compare greater than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16", i32] + compose: + - LLVMLink: + name: "vcagt{type[0]}" + return_type: "{type[3]}" + links: + - link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}" + arch: aarch64,arm64ec + - 'unsafe {{ _vcagth_f16(a, b) as u16 }}' + + - name: "vcage{neon_type[0].no}" + doc: "Floating-point absolute compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "vcage{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.facge.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcage{type[0]}" + doc: "Floating-point absolute compare greater than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32", i32] + - ["d_f64", "f64", "u64", i64] + compose: + - LLVMLink: + name: "vcage{type[0]}" + links: + - link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vcage{type[0]}" + doc: "Floating-point absolute compare greater than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16", i32] + compose: + - LLVMLink: + name: "vcage{type[0]}" + return_type: "{type[3]}" + links: + - link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}" + arch: aarch64,arm64ec + - "unsafe {{ _vcageh_f16(a, b) as u16 }}" + + - name: "vcalt{neon_type[0].no}" + doc: "Floating-point absolute compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - FnCall: ["vcagt{neon_type[0].no}", [b, a]] + + - name: "vcalt{type[0]}" + doc: "Floating-point absolute compare less than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: ["vcagt{type[0]}", [b, a]] + + - name: "vcalt{type[0]}" + doc: "Floating-point absolute compare less than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: ["vcagt{type[0]}", [b, a]] + + - name: "vcale{neon_type[0].no}" + doc: "Floating-point absolute compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - FnCall: ["vcage{neon_type[0].no}", [b, a]] + + - name: "vcale{type[0]}" + doc: "Floating-point absolute compare less than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: ["vcage{type[0]}", [b, a]] + + - name: "vcale{type[0]}" + doc: "Floating-point absolute compare less than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: ["vcage{type[0]}", [b, a]] + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int64x1_t, float64x1_t] + - [int64x2_t, float64x2_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt{type[0]}_{type[3]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "i32", "f32", s32] + - ["d_f64", "i64", "f64", s64] + compose: + - Identifier: ["a as {type[2]}", Symbol] + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint64x1_t, float64x1_t] + - [uint64x2_t, float64x2_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt{type[2]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["u32", "f32", "s_f32"] + - ["u64", "f64", "d_f64"] + compose: + - Identifier: ["a as {type[1]}", Symbol] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int64x1_t, float64x1_t] + - [int64x2_t, float64x2_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 64']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N], [], true] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [i32, f16, 'h'] + - [i64, f16, 'h'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: + - "a: {type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxs2fp.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N], [], true] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[4]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [f16, s16, 'h', i32, i16] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[3]}_{type[0]}::(a) as i16" + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [u32, f16, 'h'] + - [u64, f16, 'h'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: + - "a: {type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N], [], true] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [i16, f16, 'h', 'i32', 'as i32'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[1]}_{type[3]}::(a {type[4]}) as {type[1]}" + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [u16, f16, 'h', u32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[1]}_{type[3]}::(a as {type[3]}) as {type[1]}" + + + - name: "vcvt{type[2]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["i32", "f32", 's_n_f32_s32', 'N >= 1 && N <= 32'] + - ["i64", "f64", 'd_n_f64_s64', 'N >= 1 && N <= 64'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 64']] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: + - "a: {type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxs2fp.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}", [a, N], [], true] + + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [uint64x1_t, float64x1_t] + - [uint64x2_t, float64x2_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 64']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N], [], true] + + - name: "vcvt{type[2]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["u32", "f32", 's_n_f32_u32', 'N >= 1 && N <= 32'] + - ["u64", "f64", 'd_n_f64_u64', 'N >= 1 && N <= 64'] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: + - "a: {type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}", ["a", N], [], true] + + - name: "vcvt{type[2]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "i32", "s_s32_f32", "32"] + - ["f64", "i64", "d_s64_f64", "64"] + compose: + - Identifier: ["a as i{type[3]}", Symbol] + + - name: "vcvt{type[2]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "u32", "s_u32_f32"] + - ["f64", "u64", "d_u64_f64"] + compose: + - Identifier: ["a as {type[1]}", Symbol] + + + - name: "vcvt{type[2]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[3]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["s16", "f16", "h_f16_s16", i16] + - ["s32", "f16", "h_f16_s32", i32] + - ["s64", "f16", "h_f16_s64", i64] + compose: + - Identifier: ["a as {type[1]}", Symbol] + + - name: "vcvt{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to signed fixed-point" + arguments: ["a: {type[0]}"] + return_type: "{type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "s16", "h", i16, 'a as i16'] + - ["f16", "s32", "h", i32, 'a as i32'] + - ["f16", "s64", "h", i64, 'a as i64'] + compose: + - Identifier: ["{type[4]}", Symbol] + + - name: "vcvt{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned fixed-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u16", "h", 'a as u16'] + - ["f16", "u32", "h", 'a as u32'] + - ["f16", "u64", "h", 'a as u64'] + compose: + - Identifier: ["{type[3]}", Symbol] + + + - name: "vcvt{type[2]}" + doc: "Unsigned fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["u16", "f16", "h_f16_u16"] + - ["u32", "f16", "h_f16_u32"] + - ["u64", "f16", "h_f16_u64"] + compose: + - Identifier: ["a as {type[1]}", Symbol] + + + - name: "vcvt_f64_f32" + doc: "Floating-point convert to higher precision long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, float64x2_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt_high_f64_f32" + doc: "Floating-point convert to higher precision long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x4_t, float64x2_t] + compose: + - Let: + - b + - float32x2_t + - FnCall: + - simd_shuffle! + - - a + - a + - '[2, 3]' + - FnCall: [simd_cast, [b]] + + - name: "vcvt_high_f16_f32" + doc: "Floating-point convert to lower precision" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x8_t, float16x4_t, float32x4_t] + compose: + - FnCall: + - vcombine_f16 + - - a + - FnCall: [vcvt_f16_f32, [b]] + + - name: "vcvt_high_f32_f16" + doc: "Floating-point convert to higher precision" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float32x4_t, float16x8_t] + compose: + - FnCall: + - vcvt_f32_f16 + - - FnCall: [vget_high_f16, [a]] + + + - name: "vcvt_f32_f64" + doc: "Floating-point convert" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x2_t, float32x2_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt_high_f32_f64" + doc: "Floating-point convert to lower precision narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, float64x2_t, float32x4_t] + compose: + - FnCall: + - simd_shuffle! + - - a + - FnCall: [simd_cast, [b]] + - '[0, 1, 2, 3]' + + - name: "vcvtx_f32_f64" + doc: "Floating-point convert to lower precision narrow, rounding to odd" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x2_t, float32x2_t] + compose: + - LLVMLink: + name: "vcvtx_f32_f64" + links: + - link: "llvm.aarch64.neon.fcvtxn.v2f32.v2f64" + arch: aarch64,arm64ec + + - name: "vcvtxd_f32_f64" + doc: "Floating-point convert to lower precision narrow, rounding to odd" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f64", "f32"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - vcvtx_f32_f64 + - - FnCall: [vdupq_n_f64, [a]] + - '0' + + - name: "vcvtx_high_f32_f64" + doc: "Floating-point convert to lower precision narrow, rounding to odd" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, float64x2_t, float32x4_t] + compose: + - FnCall: + - simd_shuffle! + - - a + - FnCall: [vcvtx_f32_f64, [b]] + - '[0, 1, 2, 3]' + + - name: "vcvt{type[2]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [float64x1_t, int64x1_t, _n_s64_f64, '64'] + - [float64x2_t, int64x2_t, q_n_s64_f64, '64'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxs.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}", [a, N], [], true] + + - name: "vcvt{type[2]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["f32", "i32", s_n_s32_f32, '32'] + - ["f64", "i64", d_n_s64_f64, '64'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxs.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}", [a, N], [], true] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - ["f16", "i32", 'h', '16'] + - ["f16", "i64", 'h', '16'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxs.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N], [], true] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [f16, u16, 'h', u32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[3]}_{type[0]}::(a) as {type[1]}" + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - ["f16", "u32", 'h', '16'] + - ["f16", "u64", 'h', '16'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N], [], true] + + - name: "vcvt{type[2]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [float64x1_t, uint64x1_t, _n_u64_f64, '64'] + - [float64x2_t, uint64x2_t, q_n_u64_f64, '64'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}", [a, N], [], true] + + - name: "vcvt{type[2]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["f32", "u32", s_n_u32_f32, '32'] + - ["f64", "u64", d_n_u64_f64, '64'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}", [a, N], [], true] + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to away" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, int32x2_t, _s32_f32] + - [float32x4_t, int32x4_t, q_s32_f32] + - [float64x1_t, int64x1_t, _s64_f64] + - [float64x2_t, int64x2_t, q_s64_f64] + compose: + - LLVMLink: + name: "vcvta{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtas.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvta{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to away" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvta_{neon_type[1]}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtas.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "i32", 's_s32_f32'] + - ["f64", "i64", 'd_s64_f64'] + compose: + - LLVMLink: + name: "vcvta{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u32", 'h_u32_f16'] + - ["f16", "u64", 'h_u64_f16'] + + compose: + - LLVMLink: + name: "vcvta{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtau.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "i32", 'h_s32_f16'] + - ["f16", "i64", 'h_s64_f16'] + compose: + - LLVMLink: + name: "vcvta{type[2]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "i16", 'h_s16_f16', 's32'] + compose: + - 'vcvtah_{type[3]}_f16(a) as i16' + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u16", 'h_u16_f16', 'u32'] + compose: + - 'vcvtah_{type[3]}_f16(a) as u16' + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "u32", 's_u32_f32'] + - ["f64", "u64", 'd_u64_f64'] + compose: + - LLVMLink: + name: "vcvta{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtau.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to even" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, int32x2_t] + - [float32x4_t, int32x4_t] + - [float64x1_t, int64x1_t] + - [float64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtns.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "i32", 's_s32_f32'] + - ["f64", "i64", 'd_s64_f64'] + compose: + - LLVMLink: + name: "vcvtn{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to even" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtns.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtnu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "i32", 'h'] + - ["f16", "i64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "i16", 'h', 'i32'] + compose: + - 'vcvtnh_{type[3]}_f16(a) as i16' + + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u32", 'h'] + - ["f16", "u64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u16", 'h', 'u32'] + compose: + - 'vcvtnh_{type[3]}_f16(a) as u16' + + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding toward minus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, int32x2_t] + - [float32x4_t, int32x4_t] + - [float64x1_t, int64x1_t] + - [float64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding toward minus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding toward minus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtmu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtm{type[2]}" + doc: "Floating-point convert to signed integer, rounding toward minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "i32", 's_s32_f32'] + - ["f64", "i64", 'd_s64_f64'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding toward plus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, int32x2_t] + - [float32x4_t, int32x4_t] + - [float64x1_t, int64x1_t] + - [float64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{type[2]}" + doc: "Floating-point convert to signed integer, rounding toward plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "i32", 's_s32_f32'] + - ["f64", "i64", 'd_s64_f64'] + compose: + - LLVMLink: + name: "vcvtp{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtnu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "u32", 's_u32_f32'] + - ["f64", "u64", 'd_u64_f64'] + compose: + - LLVMLink: + name: "vcvtn{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding toward minus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtmu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{type[2]}" + doc: "Floating-point convert to unsigned integer, rounding toward minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "u32", s_u32_f32] + - ["f64", "u64", d_u64_f64] + compose: + - LLVMLink: + name: "vcvtm{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding toward plus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "vcvtp{neon_type[1].no}_{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtpu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{type[2]}" + doc: "Floating-point convert to unsigned integer, rounding toward plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "u32", s_u32_f32, 'i32'] + - ["f64", "u64", d_u64_f64, 'u64'] + compose: + - LLVMLink: + name: "vcvtp{type[2]}" + arguments: + - "a: {type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtpu.{type[3]}.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to plus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to plus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtpu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "i32", 'h'] + - ["f16", "i64", 'h'] + compose: + - LLVMLink: + name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "i16", 'h', 'i32'] + compose: + - 'vcvtph_{type[3]}_f16(a) as i16' + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u32", 'h'] + - ["f16", "u64", 'h'] + compose: + - LLVMLink: + name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u16", 'h', 'u32'] + compose: + - 'vcvtph_{type[3]}_f16(a) as u16' + + - name: "vdup{neon_type.laneq_nox}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - poly64x2_t + - float64x2_t + compose: + - FnCall: [static_assert_uimm_bits!, [N, 1]] + - FnCall: [simd_shuffle!, [a, a, '[N as u32, N as u32]']] + + - name: "vdup{neon_type[1].lane_nox}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [poly64x1_t, poly64x2_t] + - [float64x1_t, float64x2_t] + compose: + - FnCall: [static_assert!, ['N == 0']] + - FnCall: [simd_shuffle!, [a, a, '[N as u32, N as u32]']] + + - name: "vdup{neon_type.lane_nox}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - poly64x1_t + - float64x1_t + compose: + - FnCall: [static_assert!, ['N == 0']] + - Identifier: [a, Symbol] + + - name: "vdupd{neon_type[0].lane_nox}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int64x1_t, "i64"] + - [uint64x1_t, "u64"] + - [float64x1_t, "f64"] + compose: + - FnCall: [static_assert!, ['N == 0']] + - FnCall: [simd_extract!, [a, 'N as u32']] + + - name: "vdup_laneq_{neon_type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [poly64x2_t, poly64x1_t, 'u64'] + - [float64x2_t, float64x1_t, 'f64'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 1]] + - FnCall: + - "transmute::<{type[2]}, _>" + - - FnCall: [simd_extract!, [a, 'N as u32']] + + - name: "vdup{type[2]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x2_t, "i32", s_lane_s32] + - [int64x2_t, "i64", d_laneq_s64] + - [uint32x2_t, "u32", s_lane_u32] + - [uint64x2_t, "u64", d_laneq_u64] + - [float32x2_t, "f32", s_lane_f32] + - [float64x2_t, "f64", d_laneq_f64] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 1]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + - name: "vdup{type[2]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, "i8", b_lane_s8] + - [int16x8_t, "i16", h_laneq_s16] + - [uint8x8_t, "u8", b_lane_u8] + - [uint16x8_t, "u16", h_laneq_u16] + - [poly8x8_t, "p8", b_lane_p8] + - [poly16x8_t, "p16", h_laneq_p16] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 3]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + + - name: "vdup{type[2]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [float16x4_t, "f16", h_lane_f16] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 2]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + + - name: "vdup{type[2]}" + doc: "Extract an element from a vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [float16x8_t, "f16", h_laneq_f16] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 4]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + + - name: "vdup{type[2]}" + doc: "Extract an element from a vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 8']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x16_t, "i8", b_laneq_s8] + - [uint8x16_t, "u8", b_laneq_u8] + - [poly8x16_t, "p8", b_laneq_p8] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 4]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + - name: "vdup{type[2]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x4_t, "i16", h_lane_s16] + - [int32x4_t, "i32", s_laneq_s32] + - [uint16x4_t, "u16", h_lane_u16] + - [uint32x4_t, "u32", s_laneq_u32] + - [poly16x4_t, "p16", h_lane_p16] + - [float32x4_t, "f32", s_laneq_f32] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 2]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ext, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [poly64x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }'] + - [float64x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }'] + compose: + - Identifier: ["{type[1]}", Symbol] + - Identifier: ["{type[2]}", Symbol] + + - name: "vmla{neon_type.no}" + doc: "Floating-point multiply-add to accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_add, [a, {FnCall: [simd_mul, [b, c]]}]] + + - name: "vmlal_high_{neon_type[1]}" + doc: "Signed multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]'] + - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]', '[2, 3]'] + compose: + - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]}] + - FnCall: ["vmlal_{neon_type[2]}", [a, b, c]] + + - name: "vmlal_high_{neon_type[1]}" + doc: "Unsigned multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]'] + compose: + - Let: + - b + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + - Let: + - c + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - FnCall: ["vmlal_{neon_type[1]}", [a, b, c]] + + - name: "vmlsl_high_{neon_type[1]}" + doc: "Signed multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]'] + - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]'] + compose: + - Let: + - b + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + - Let: + - c + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]] + + - name: "vmlsl_high_{neon_type[1]}" + doc: "Unsigned multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]'] + compose: + - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}] + - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]] + + - name: "vmovn_high{neon_type[1].noq}" + doc: Extract narrow + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: [*neon-stable] + assert_instr: [xtn2] + safety: safe + types: + - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + compose: + - Let: + - c + - "{neon_type[0]}" + - FnCall: + - simd_cast + - - b + - FnCall: + - simd_shuffle! + - - a + - c + - "{type[3]}" + + - name: "vneg{neon_type.no}" + doc: Negate + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [neg] + safety: safe + types: + - int64x1_t + - int64x2_t + compose: + - FnCall: + - simd_neg + - - a + + - name: "vnegd_s64" + doc: Negate + arguments: ["a: {type}"] + return_type: "{type}" + attr: [*neon-stable] + assert_instr: [neg] + safety: safe + types: + - i64 + compose: + - MethodCall: [a, wrapping_neg, []] + + + - name: "vnegh_{type}" + doc: Negate + arguments: ["a: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fneg] + safety: safe + types: + - f16 + compose: + - '-a' + + - name: "vneg{neon_type.no}" + doc: Negate + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: + - stable + - - 'feature = "neon_intrinsics"' + - 'since = "1.59.0"' + assert_instr: [fneg] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - FnCall: + - simd_neg + - - a + + - name: "vqneg{type[1]}" + doc: Signed saturating negate + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [sqneg] + safety: safe + types: + - [i8, 'b_s8', 's8'] + - [i16, 'h_s16', 's16'] + - [i32, 's_s32', 's32'] + - [i64, 'd_s64', 's64'] + compose: + - FnCall: + - 'simd_extract!' + - - FnCall: + - 'vqneg_{type[2]}' + - - FnCall: ['vdup_n_{type[2]}', [a]] + - 0 + + - name: "vqneg{neon_type[0].no}" + doc: Signed saturating negate + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [sqneg] + safety: safe + types: + - [int64x1_t, 'i64'] + - [int64x2_t, 'i64'] + compose: + - LLVMLink: + name: "sqneg.{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.sqneg.v{neon_type[0].lane}{type[1]}" + arch: aarch64,arm64ec + + - name: "vqsub{type[1]}" + doc: Saturating subtract + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [sqsub] + safety: safe + types: + - [i32, 's_s32', 'i32'] + - [i64, 'd_s64', 'i64'] + compose: + - LLVMLink: + name: "sqsub.{type[0]}" + links: + - link: "llvm.aarch64.neon.sqsub.{type[2]}" + arch: aarch64,arm64ec + + - name: "vqsub{type[1]}" + doc: Saturating subtract + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [uqsub] + safety: safe + types: + - [u32, 's_u32', 'i32'] + - [u64, 'd_u64', 'i64'] + compose: + - LLVMLink: + name: "uqsub.{type[0]}" + links: + - link: "llvm.aarch64.neon.uqsub.{type[2]}" + arch: aarch64,arm64ec + + - name: "vqsub{type[3]}" + doc: Saturating subtract + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [sqsub] + safety: safe + types: + - [i8, int8x8_t, s8, 'b_s8'] + - [i16, int16x4_t, s16, 'h_s16'] + compose: + - Let: + - a + - "{neon_type[1]}" + - FnCall: + - "vdup_n_{type[2]}" + - - a + - Let: + - b + - "{neon_type[1]}" + - FnCall: + - "vdup_n_{type[2]}" + - - b + - FnCall: + - 'simd_extract!' + - - FnCall: + - "vqsub_{type[2]}" + - - a + - b + - "0" + + - name: "vqsub{type[3]}" + doc: Saturating subtract + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [uqsub] + safety: safe + types: + - [u8, uint8x8_t, u8, 'b_u8'] + - [u16, uint16x4_t, u16, 'h_u16'] + compose: + - Let: + - a + - "{neon_type[1]}" + - FnCall: + - "vdup_n_{type[2]}" + - - a + - Let: + - b + - "{neon_type[1]}" + - FnCall: + - "vdup_n_{type[2]}" + - - b + - FnCall: + - 'simd_extract!' + - - FnCall: + - "vqsub_{type[2]}" + - - a + - b + - "0" + + - name: "vrbit{neon_type.no}" + doc: Reverse bit order + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [rbit] + safety: safe + types: + - int8x8_t + - int8x16_t + compose: + - FnCall: + - simd_bitreverse + - - a + + - name: "vrbit{neon_type[0].no}" + doc: Reverse bit order + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: [*neon-stable] + assert_instr: [rbit] + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [uint8x16_t, int8x16_t] + - [poly8x8_t, int8x8_t] + - [poly8x16_t, int8x16_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vrbit{neon_type[1].no}" + - - FnCall: [transmute, [a]] + + - name: "vrndx{neon_type.no}" + doc: "Floating-point round to integral exact, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frintx] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.rint.{neon_type}" + links: + - link: "llvm.rint.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndx{neon_type.no}" + doc: "Floating-point round to integral exact, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintx] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.rint.{neon_type}" + links: + - link: "llvm.rint.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndx{type[1]}{type[0]}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintx] + safety: safe + types: + - [f16, 'h_'] + compose: + - FnCall: [round_ties_even_f16, [a]] + + + - name: "vrnda{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frinta] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_round, [a]] + + + - name: "vrnda{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frinta] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_round, [a]] + + + - name: "vrnda{type[1]}{type[0]}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frinta] + safety: safe + types: + - [f16, 'h_'] + compose: + - FnCall: [roundf16, [a], [], true] + + - name: "vrndn{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to even" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frintn] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "frintn.{neon_type}" + links: + - link: "llvm.roundeven.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrndns_{type}" + doc: "Floating-point round to integral, to nearest with ties to even" + arguments: ["a: {type}"] + return_type: "{type}" + attr: [*neon-stable] + assert_instr: [frintn] + safety: safe + types: + - f32 + compose: + - LLVMLink: + name: "roundeven.{type}" + links: + - link: "llvm.roundeven.{type}" + arch: aarch64,arm64ec + + - name: "vrndn{type[1]}{type[0]}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintn] + safety: safe + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.roundeven.{type[0]}" + links: + - link: "llvm.roundeven.{type[0]}" + arch: aarch64,arm64ec + + - name: "vrndm{neon_type.no}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frintm] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_floor, [a]] + + + - name: "vrndm{neon_type.no}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintm] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_floor, [a]] + + + - name: "vrndm{type[1]}{type[0]}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintm] + safety: safe + types: + - [f16, 'h_'] + compose: + - FnCall: [floorf16, [a], [], true] + + + + - name: "vrndp{neon_type.no}" + doc: "Floating-point round to integral, toward plus infinity" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frintp] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_ceil, [a]] + + + - name: "vrndp{neon_type.no}" + doc: "Floating-point round to integral, toward plus infinity" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintp] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_ceil, [a]] + + - name: "vrndp{type[1]}{type[0]}" + doc: "Floating-point round to integral, toward plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintp] + safety: safe + types: + - [f16, 'h_'] + compose: + - FnCall: [ceilf16, [a], [], true] + + - name: "vrnd{neon_type.no}" + doc: "Floating-point round to integral, toward zero" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frintz] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_trunc, [a]] + + - name: "vrnd{neon_type.no}" + doc: "Floating-point round to integral, toward zero" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintz] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_trunc, [a]] + + + - name: "vrnd{type[1]}{type[0]}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintz] + safety: safe + types: + - [f16, 'h_'] + compose: + - FnCall: [truncf16, [a], [], true] + + + - name: "vrndi{neon_type.no}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frinti] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.nearbyint.{neon_type}" + links: + - link: "llvm.nearbyint.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndi{neon_type.no}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frinti] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.nearbyint.{neon_type}" + links: + - link: "llvm.nearbyint.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndi{type[1]}{type[0]}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + # TODO: double check me + assert_instr: [frinti] + safety: safe + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.nearbyint.{type[0]}" + links: + - link: "llvm.nearbyint.{type[0]}" + arch: aarch64,arm64ec + + - name: "vqadd{type[1]}" + doc: Saturating add + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [uqadd] + safety: safe + types: + - [u32, 's_u32', i32] + - [u64, 'd_u64', i64] + compose: + - LLVMLink: + name: "uqadd.{type[2]}" + links: + - link: "llvm.aarch64.neon.uqadd.{type[2]}" + arch: aarch64,arm64ec + + - name: "vqadd{type[1]}" + doc: Saturating add + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [sqadd] + safety: safe + types: + - [i32, 's_s32', i32] + - [i64, 'd_s64', i64] + compose: + - LLVMLink: + name: "uqadd.{type[2]}" + links: + - link: "llvm.aarch64.neon.sqadd.{type[2]}" + arch: aarch64,arm64ec + + - name: "vqadd{type[2]}" + doc: Saturating add + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [sqadd] + safety: safe + types: + - [i8, int8x8_t, 'b_s8'] + - [i16, int16x4_t, 'h_s16'] + compose: + - Let: + - a + - "{neon_type[1]}" + - FnCall: + - "vdup_n_{type[0]}" + - - a + - Let: + - b + - "{neon_type[1]}" + - FnCall: + - "vdup_n_{type[0]}" + - - b + - FnCall: + - simd_extract! + - - FnCall: + - "vqadd_{type[0]}" + - - a + - b + - "0" + + - name: "vqadd{type[2]}" + doc: Saturating add + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: [*neon-stable] + assert_instr: [uqadd] + safety: safe + types: + - [u8, uint8x8_t, 'b_u8'] + - [u16, uint16x4_t, 'h_u16'] + compose: + - Let: + - a + - "{neon_type[1]}" + - FnCall: + - "vdup_n_{type[0]}" + - - a + - Let: + - b + - "{neon_type[1]}" + - FnCall: + - "vdup_n_{type[0]}" + - - b + - FnCall: + - simd_extract! + - - FnCall: + - "vqadd_{type[0]}" + - - a + - b + - "0" + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [ld1] + safety: + unsafe: [neon] + types: + - ["*const f64", float64x1x2_t] + - ["*const f64", float64x2x2_t] + - ["*const f64", float64x1x3_t] + - ["*const f64", float64x2x3_t] + - ["*const f64", float64x1x4_t] + - ["*const f64", float64x2x4_t] + compose: + - LLVMLink: + name: "vld1{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0" + arch: aarch64,arm64ec + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const i8", int8x16x2_t, i8, int8x16_t, "4"] + - ["*const i64", int64x2x2_t, i64, int64x2_t, "1"] + - ["*const f64", float64x2x2_t, f64, float64x2_t, "1"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i64" + - "ptr: *const i8" + links: + - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vld2{neon_type[1].lane_nox}", ["b.0", "b.1", "LANE as i64", "a as _"]] + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const i64", int64x1x2_t, i64, int64x1_t] + - ["*const f64", float64x1x2_t, f64, float64x1_t] + compose: + - FnCall: ["static_assert!", ['LANE == 0']] + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i64" + - "ptr: *const i8" + links: + - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vld2{neon_type[1].lane_nox}", ["b.0", "b.1", "LANE as i64", "a as _"]] + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - "static_assert!" + - - 'LANE == 0' + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - "static_assert!" + - - 'LANE == 0' + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const u8", uint8x16x2_t, int8x16x2_t, "4"] + - ["*const p8", poly8x16x2_t, int8x16x2_t, "4"] + - ["*const u64", uint64x2x2_t, int64x2x2_t, "1"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[3]}" + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x2x2_t, int64x2x2_t, "1"] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, '{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [ld2] + safety: + unsafe: [neon] + types: + - ["*const f64", float64x2x2_t, f64, float64x2_t] + - ["*const i64", int64x2x2_t, i64, int64x2_t] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const {neon_type[3]}" + links: + - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ["*const f64", float64x1x2_t, f64, float64x1_t] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const {neon_type[3]}" + links: + - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [ld2] + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x2x2_t, int64x2x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-stable + assert_instr: [ld2] + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x2x2_t, int64x2x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [ld2r] + safety: + unsafe: [neon] + types: + - ["*const i64", int64x2x2_t, i64] + - ["*const f64", float64x1x2_t, f64] + - ["*const f64", float64x2x2_t, f64] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as _" + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [ld2r] + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x2x2_t, int64x2x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].dup_nox}" + - - FnCall: + - transmute + - - a + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-aes + - *neon-stable + assert_instr: [ld2r] + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x2x2_t, int64x2x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].dup_nox}" + - - FnCall: + - transmute + - - a + + - name: "vld3{neon_type[1].lane_nox}" + doc: "Load multiple 3-element structures to two registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const i8', int8x16x3_t, int8x16_t, i8, '3'] + - ['*const i64', int64x2x3_t, int64x2_t, i64, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] + - LLVMLink: + name: 'ld3lane.{neon_type[2]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i64' + - 'ptr: *const i8' + links: + - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vld3{neon_type[1].lane_nox}" + doc: "Load multiple 3-element structures to three registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const f64', float64x2x3_t, float64x2_t, f64, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] + - LLVMLink: + name: 'ld3lane.{neon_type[2]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i64' + - 'ptr: *const i8' + links: + - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const f64', float64x1x3_t, float64x1_t, f64] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - LLVMLink: + name: 'vld3.{neon_type[2]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i64' + - 'ptr: *const i8' + links: + - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vld3{neon_type[1].lane_nox}" + doc: "Load multiple 3-element structures to two registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const i64', int64x1x3_t, int64x1_t, i64] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - LLVMLink: + name: 'vld3.{neon_type[2]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i64' + - 'ptr: *const i8' + links: + - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const p8', poly8x16x3_t, int8x16x3_t, '4'] + - ['*const u8', uint8x16x3_t, int8x16x3_t, '4'] + - ['*const u64', uint64x2x3_t, int64x2x3_t, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const u64', uint64x1x3_t, int64x1x3_t, '1'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const p64', poly64x2x3_t, int64x2x3_t] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', 1]] + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const p64', poly64x1x3_t, int64x1x3_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + safety: + unsafe: [neon] + assert_instr: [ld3] + types: + - ['*const i64', int64x2x3_t, '*const int64x2_t', i64] + - ['*const f64', float64x2x3_t, '*const float64x2_t', f64] + compose: + - LLVMLink: + name: 'vld3{neon_type[1].nox}' + arguments: + - 'ptr: {type[2]}' + links: + - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + safety: + unsafe: [neon] + assert_instr: [nop] + types: + - ['*const f64', float64x1x3_t, '*const float64x1_t', f64] + compose: + - LLVMLink: + name: 'vld3{neon_type[1].nox}' + arguments: + - 'ptr: {type[2]}' + links: + - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + safety: + unsafe: [neon] + assert_instr: [ld3] + types: + - ['*const u64', uint64x2x3_t, int64x2x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].nox}' + - - FnCall: + - transmute + - - a + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-stable + safety: + unsafe: [neon] + assert_instr: [ld3] + types: + - ['*const p64', poly64x2x3_t, int64x2x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].nox}' + - - FnCall: + - transmute + - - a + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [ld3r] + safety: + unsafe: [neon] + types: + - ["*const i64", int64x2x3_t, i64] + - ["*const f64", float64x1x3_t, f64] + - ["*const f64", float64x2x3_t, f64] + compose: + - LLVMLink: + name: 'ld3r{neon_type[1].dup_nox}' + arguments: + - 'ptr: {type[0]}' + links: + - link: 'llvm.aarch64.neon.ld3r.v{neon_type[1].lane}{type[2]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as _']] + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [ld3r] + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x2x3_t, int64x2x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld3{neon_type[2].dup_nox}" + - - FnCall: + - transmute + - - a + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-stable + assert_instr: [ld3r] + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x2x3_t, int64x2x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld3{neon_type[2].dup_nox}" + - - FnCall: + - transmute + - - a + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-stable + assert_instr: [ld4] + safety: + unsafe: [neon] + types: + - ['*const f64', float64x2x4_t, f64, '*const float64x2_t'] + - ['*const i64', int64x2x4_t, i64, '*const int64x2_t'] + compose: + - LLVMLink: + name: 'vld4{neon_type[1].nox}' + arguments: + - 'ptr: {type[3]}' + links: + - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']] + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-stable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ['*const f64', float64x1x4_t, f64, '*const float64x1_t'] + compose: + - LLVMLink: + name: 'vld4{neon_type[1].nox}' + arguments: + - 'ptr: {type[3]}' + links: + - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']] + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [ld4] + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x2x4_t, int64x2x4_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].nox}' + - - FnCall: + - transmute + - - a + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-stable + - *neon-aes + assert_instr: [ld4] + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x2x4_t, int64x2x4_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].nox}' + - - FnCall: + - transmute + - - a + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const i8', int8x16x4_t, int8x16_t, i8, '3'] + - ['*const i64', int64x2x4_t, int64x2_t, i64, '1'] + - ['*const f64', float64x2x4_t, float64x2_t, f64, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] + - LLVMLink: + name: 'ld4lane.{neon_type[2]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'n: i64' + - 'ptr: *const i8' + links: + - link: 'llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const i64', int64x1x4_t, int64x1_t, i64] + - ['*const f64', float64x1x4_t, float64x1_t, f64] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - LLVMLink: + name: 'ld4lane.{neon_type[2]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'n: i64' + - 'ptr: *const i8' + links: + - link: 'llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const p8', poly8x16x4_t, int8x16x4_t, '4'] + - ['*const u8', uint8x16x4_t, int8x16x4_t, '4'] + - ['*const u64', uint64x2x4_t, int64x2x4_t, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const p64', poly64x2x4_t, int64x2x4_t, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const u64', uint64x1x4_t, int64x1x4_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: + - target_feature + - - 'enable = "neon,aes"' + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - ['*const p64', poly64x1x4_t, int64x1x4_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst1{neon_type[1].lane_nox}" + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + types: + - ['*mut f64', float64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Assign: + - "*a" + - FnCall: [simd_extract!, [b, 'LANE as u32']] + - Identifier: [';', Symbol] + + - name: "vst1{neon_type[1].lane_nox}" + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + types: + - ['*mut f64', float64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - Assign: + - "*a" + - FnCall: [simd_extract!, [b, 'LANE as u32']] + - Identifier: [';', Symbol] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *neon-stable + assert_instr: [st1] + types: + - ['f64', float64x1x2_t, float64x1_t] + compose: + - LLVMLink: + name: 'st2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *neon-stable + assert_instr: [st2] + types: + - [i64, int64x2x2_t, int64x2_t] + - [f64, float64x2x2_t, float64x2_t] + compose: + - LLVMLink: + name: 'st2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [i64, int64x1x2_t, int64x1_t] + - [f64, float64x1x2_t, float64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - LLVMLink: + name: 'st2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [i8, int8x16x2_t, int8x16_t, '4'] + - [i64, int64x2x2_t, int64x2_t, '1'] + - [f64, float64x2x2_t, float64x2_t, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - LLVMLink: + name: 'st2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [u8, uint8x16x2_t, int8x16x2_t, '4'] + - [u64, uint64x2x2_t, int64x2x2_t, '1'] + - [p8, poly8x16x2_t, int8x16x2_t, '4'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - FnCall: + - "vst2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [u64, uint64x1x2_t, int64x1x2_t, '1'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - "vst2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + assert_instr: [st2] + safety: + unsafe: [neon] + types: + - [u64, uint64x2x2_t, int64x2x2_t] + compose: + - FnCall: + - "vst2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [p64, poly64x1x2_t, int64x1x2_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - "vst2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [p64, poly64x2x2_t, int64x2x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '1']] + - FnCall: + - "vst2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2]]}]] + - *neon-stable + safety: + unsafe: [neon] + types: + - [p64, poly64x2x2_t, int64x2x2_t] + compose: + - FnCall: + - "vst2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: [*neon-stable] + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [f64, float64x1x3_t, float64x1_t] + compose: + - LLVMLink: + name: 'st3.{neon_type[1].nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [p64, poly64x1x3_t, int64x1x3_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - "vst3{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [p64, poly64x2x3_t, int64x2x3_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - FnCall: + - "vst3{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - *neon-aes + assert_instr: [st3] + safety: + unsafe: [neon] + types: + - [p64, poly64x2x3_t, int64x2x3_t] + compose: + - FnCall: + - "vst3{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: [*neon-stable] + assert_instr: [st3] + safety: + unsafe: [neon] + types: + - [i64, int64x2x3_t, int64x2_t] + - [f64, float64x2x3_t, float64x2_t] + compose: + - LLVMLink: + name: 'st3.{neon_type[1].nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: [*neon-stable] + assert_instr: [st3] + safety: + unsafe: [neon] + types: + - [u64, uint64x2x3_t, int64x2x3_t] + compose: + - FnCall: + - "vst3{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [u64, uint64x1x3_t, int64x1x3_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - "vst3{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [u8, uint8x16x3_t, int8x16x3_t, '4'] + - [u64, uint64x2x3_t, int64x2x3_t, '1'] + - [p8, poly8x16x3_t, int8x16x3_t, '4'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vst3{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f64, float64x2x3_t, float64x2_t, '1'] + - [i8, int8x16x3_t, int8x16_t, '4'] + - [i64, int64x2x3_t, int64x2_t, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - LLVMLink: + name: 'st3lane.{neon_type[1].nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [i64, int64x1x3_t, int64x1_t, '1'] + - [f64, float64x1x3_t, float64x1_t, '1'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - LLVMLink: + name: 'st3lane.{neon_type[1].nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: [*neon-stable] + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [f64, float64x1x4_t, float64x1_t] + compose: + - LLVMLink: + name: 'st4.{neon_type[1].nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [p64, poly64x1x4_t, int64x1x4_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - "vst4{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - *neon-aes + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [p64, poly64x2x4_t, int64x2x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - FnCall: + - "vst4{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - *neon-aes + assert_instr: [st4] + safety: + unsafe: [neon] + types: + - [p64, poly64x2x4_t, int64x2x4_t] + compose: + - FnCall: + - "vst4{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: [*neon-stable] + assert_instr: [st4] + safety: + unsafe: [neon] + types: + - [i64, int64x2x4_t, int64x2_t] + - [f64, float64x2x4_t, float64x2_t] + compose: + - LLVMLink: + name: 'st4.{neon_type[1].nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: [*neon-stable] + assert_instr: [st4] + safety: + unsafe: [neon] + types: + - [u64, uint64x2x4_t, int64x2x4_t] + compose: + - FnCall: + - "vst4{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [u64, uint64x1x4_t, int64x1x4_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - "vst4{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-stable + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [u8, uint8x16x4_t, int8x16x4_t, '4'] + - [u64, uint64x2x4_t, int64x2x4_t, '1'] + - [p8, poly8x16x4_t, int8x16x4_t, '4'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vst4{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f64, float64x2x4_t, float64x2_t, '1'] + - [i8, int8x16x4_t, int8x16_t, '4'] + - [i64, int64x2x4_t, int64x2_t, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - LLVMLink: + name: 'st4lane.{neon_type[1].nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [i64, int64x1x4_t, int64x1_t, '1'] + - [f64, float64x1x4_t, float64x1_t, '1'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - LLVMLink: + name: 'st4lane.{neon_type[1].nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + - name: "vusdot{neon_type[0].laneq_nox}" + doc: "Dot product index form with unsigned and signed integers" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-i8mm + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usdot, 'LANE = 3']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] + static_defs: ["const LANE: i32"] + safety: safe + types: + - [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]'] + - [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '2']] + - Let: [c, int32x4_t, {FnCall: [transmute, [c]]}] + - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}] + - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]] + + - name: "vsudot{neon_type[0].laneq_nox}" + doc: "Dot product index form with signed and unsigned integers" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-i8mm + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] + static_defs: ["const LANE: i32"] + safety: safe + types: + - [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t] + - [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - uint32x4_t + - FnCall: [transmute, [c]] + - Let: + - c + - "{type[4]}" + - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]] + + - name: "vmul{neon_type.no}" + doc: Multiply + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [fmul] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_mul, [a, b]] + + - name: "vmull_high{neon_type[0].noq}" + doc: Signed multiply long + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[3]}" + attr: [*neon-stable] + assert_instr: [smull2] + safety: safe + types: + - [int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int16x8_t] + - [int16x8_t, int16x4_t, '[4, 5, 6, 7]', int32x4_t] + - [int32x4_t, int32x2_t, '[2, 3]', int64x2_t] + compose: + - Let: + - a + - "{neon_type[1]}" + - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] + - Let: + - b + - "{neon_type[1]}" + - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - FnCall: ["vmull_{neon_type[0]}", [a, b]] + + - name: "vmull_high{neon_type[0].noq}" + doc: "Unsigned multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[3]}" + attr: [*neon-stable] + assert_instr: [umull2] + safety: safe + types: + - [uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint16x8_t] + - [uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', uint32x4_t] + - [uint32x4_t, uint32x2_t, '[2, 3]', uint64x2_t] + compose: + - Let: + - a + - "{neon_type[1]}" + - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] + - Let: + - b + - "{neon_type[1]}" + - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - FnCall: ["vmull_{neon_type[0]}", [a, b]] + + - name: "vmull_p64" + doc: "Polynomial multiply long" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-aes + - *neon-stable + safety: safe + assert_instr: [pmull] + types: + - ["p64", "p128"] + compose: + - LLVMLink: + name: "pmull.{type[0]}" + return_type: "int8x16_t" + links: + - link: "llvm.aarch64.neon.pmull64" + arch: aarch64,arm64ec + - FnCall: [transmute, [{FnCall: ["_vmull_p64", [a, b]]}]] + + - name: "vmull_high{neon_type[0].noq}" + doc: "Polynomial multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[3]}" + attr: + - *neon-stable + safety: safe + assert_instr: [pmull] + types: + - [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t] + compose: + - Let: + - a + - "{neon_type[1]}" + - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] + - Let: + - b + - "{neon_type[1]}" + - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - FnCall: ["vmull_{neon_type[0]}", [a, b]] + + - name: "vmull_high{neon_type[0].noq}" + doc: "Polynomial multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-aes + - *neon-stable + safety: safe + assert_instr: [pmull] + types: + - [poly64x2_t, "p128"] + compose: + - FnCall: + - "vmull_{neon_type[0]}" + - - FnCall: [simd_extract!, [a, '1']] + - FnCall: [simd_extract!, [b, '1']] + + - name: "vmulx{neon_type.no}" + doc: Floating-point multiply extended + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [fmulx] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "fmulx.{neon_type.no}" + links: + - link: "llvm.aarch64.neon.fmulx.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vmulx{neon_type.no}" + doc: Floating-point multiply extended + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmulx] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmulx.{neon_type.no}" + links: + - link: "llvm.aarch64.neon.fmulx.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [fmulx] + safety: safe + types: + - ["s_f32", "f32"] + - ["d_f64", "f64"] + compose: + - LLVMLink: + name: "fmulx.{type[1]}" + links: + - link: "llvm.aarch64.neon.fmulx.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmulx] + safety: safe + types: + - ["h_f16", "f16"] + compose: + - LLVMLink: + name: "fmulx.{type[1]}" + links: + - link: "llvm.aarch64.neon.fmulx.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vmulx_lane_f64" + doc: Floating-point multiply extended + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: safe + types: + - float64x1_t + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - vmulx_f64 + - - a + - FnCall: + - 'transmute::' + - - FnCall: + - "simd_extract!" + - - b + - 'LANE as u32' + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: safe + types: + - ["q_lane_f64", float64x2_t, float64x1_t, "q_f64", '[LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - "vmulx{type[3]}" + - - a + - FnCall: + - "simd_shuffle!" + - - b + - b + - "{type[4]}" + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: safe + types: + - ["d_lane_f64", "f64", float64x1_t, "d_f64", 'LANE as u32'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - "vmulx{type[3]}" + - - a + - FnCall: + - "simd_extract!" + - - b + - "{type[4]}" + + - name: "vmulx_laneq_f64" + doc: Floating-point multiply extended + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float64x1_t, float64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '1']] + - FnCall: + - vmulx_f64 + - - a + - FnCall: + - 'transmute::' + - - FnCall: + - "simd_extract!" + - - b + - 'LANE as u32' + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: safe + types: + - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32, LANE as u32]'] + - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32, LANE as u32]'] + - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - FnCall: + - "vmulx{type[4]}" + - - a + - FnCall: + - "simd_shuffle!" + - - b + - b + - "{type[5]}" + + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - FnCall: + - "vmulx{type[4]}" + - - a + - FnCall: + - "simd_shuffle!" + - - b + - b + - "{type[5]}" + + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ["const LANE: i32"] + safety: safe + types: + - ['s_lane_f32', f32, float32x2_t, '1', 's_f32', 'LANE as u32'] + - ['s_laneq_f32', f32, float32x4_t, '2', 's_f32', 'LANE as u32'] + - ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64', 'LANE as u32'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - FnCall: + - "vmulx{type[4]}" + - - a + - FnCall: + - "simd_extract!" + - - b + - "{type[5]}" + + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - ['h_lane_f16', f16, float16x4_t, '2', 'h_f16', "LANE as u32"] + - ['h_laneq_f16', f16, float16x8_t, '3', 'h_f16', "LANE as u32"] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - FnCall: + - "vmulx{type[4]}" + - - a + - FnCall: + - "simd_extract!" + - - b + - "{type[5]}" + + + - name: "vmulx{neon_type[0].N}" + doc: "Vector multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, "f16"] + - [float16x8_t, "f16"] + compose: + - FnCall: + - vmulx{neon_type[0].no} + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + + - name: "vfma{neon_type.no}" + doc: Floating-point fused Multiply-Add to accumulator(vector) + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + assert_instr: [fmadd] + safety: safe + types: + - float64x1_t + compose: + - FnCall: [simd_fma, [b, c, a]] + + - name: "vfma{neon_type.no}" + doc: Floating-point fused Multiply-Add to accumulator(vector) + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + assert_instr: [fmla] + safety: safe + types: + - float64x2_t + compose: + - FnCall: [simd_fma, [b, c, a]] + + - name: "vfma_n_f64" + doc: Floating-point fused Multiply-Add to accumulator(vector) + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + assert_instr: [fmadd] + safety: safe + types: + - [float64x1_t, f64] + compose: + - FnCall: + - "vfma_f64" + - - a + - b + - FnCall: + - "vdup_n_f64" + - - c + + - name: "vfmaq_n_f64" + doc: Floating-point fused Multiply-Add to accumulator(vector) + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: + - stable + - - 'feature = "neon_intrinsics"' + - 'since = "1.59.0"' + assert_instr: [fmla] + safety: safe + types: + - [float64x2_t, f64] + compose: + - FnCall: + - "vfmaq_f64" + - - a + - b + - FnCall: + - "vdupq_n_f64" + - - c + + - name: "vfma{neon_type[0].N}" + doc: Floating-point fused Multiply-Subtract from accumulator. + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmla] + safety: safe + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: + - "vdup{neon_type[0].N}" + - - c + + - name: "vdiv{neon_type.no}" + doc: "Divide" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [fdiv] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_div, [a, b]] + + - name: "vdiv{neon_type.no}" + doc: "Divide" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fdiv] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_div, [a, b]] + + - name: "vdiv{type[1]}_{type[0]}" + doc: Divide + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: safe + types: + - [f16, 'h'] + compose: + - 'a / b' + + - name: "vsub{neon_type.no}" + doc: "Subtract" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [fsub] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_sub, [a, b]] + + - name: "vsub{type[0]}" + doc: "Subtract" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [nop] + safety: safe + types: + - ['d_s64', 'i64'] + - ['d_u64', 'u64'] + compose: + - MethodCall: [a, wrapping_sub, [b]] + + - name: "vsub{type[0]}" + doc: "Subtract" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: safe + types: + - ['h_f16', 'f16'] + compose: + - 'a - b' + + - name: "vaddv{neon_type[0].no}" + doc: Floating-point add across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: + - stable + - - 'feature = "neon_intrinsics"' + - 'since = "1.59.0"' + assert_instr: [faddp] + safety: safe + types: + - [float32x2_t, f32] + - [float32x4_t, f32] + - [float64x2_t, f64] + compose: + - LLVMLink: + name: "faddv.{type[1]}.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.faddv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vaddlv{neon_type[0].no}" + doc: Signed Add Long across Vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [saddlv] + safety: safe + types: + - [int16x4_t, i32] + - [int16x8_t, i32] + - [int32x4_t, i64] + compose: + - LLVMLink: + name: "llvm.aarch64.neon.saddlv.{type[1]}.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.saddlv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vaddlv{neon_type.no}" + doc: Signed Add Long across Vector + arguments: ["a: {neon_type}"] + return_type: "i64" + attr: [*neon-stable] + assert_instr: [saddlp] + safety: safe + types: + - int32x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.saddlv.i64.v2i32" + links: + - link: "llvm.aarch64.neon.saddlv.i64.v2i32" + arch: aarch64,arm64ec + + - name: "vaddlv{neon_type[0].no}" + doc: Unsigned Add Long across Vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [uaddlv] + safety: safe + types: + - [uint16x4_t, u32, i32] + - [uint16x8_t, u32, i32] + - [uint32x4_t, u64, i64] + compose: + - LLVMLink: + name: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ['_vaddlv{neon_type[0].no}', ['a'], [], true] + + - name: "vaddlv{neon_type[0].no}" + doc: Unsigned Add Long across Vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [uaddlp] + safety: safe + types: + - [uint32x2_t, u64, i64] + compose: + - LLVMLink: + name: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ['_vaddlv{neon_type[0].no}', ['a'], [], true] + + - name: "vsubw_high{neon_type[1].noq}" + doc: Signed Subtract Wide + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: [*neon-stable] + assert_instr: [ssubw] + safety: safe + types: + - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]'] + - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]'] + compose: + - Let: + - c + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + - FnCall: + - simd_sub + - - a + - FnCall: [simd_cast, [c]] + + - name: "vsubw_high{neon_type[1].noq}" + doc: Unsigned Subtract Wide + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: [*neon-stable] + assert_instr: [usubw] + safety: safe + types: + - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]'] + compose: + - Let: + - c + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + - FnCall: + - simd_sub + - - a + - FnCall: [simd_cast, [c]] + + - name: "vsubl_high{neon_type[0].noq}" + doc: "Signed Subtract Long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [ssubl] + safety: safe + types: + - [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t] + - [int16x8_t, int32x4_t, '[4, 5, 6, 7]', int16x4_t] + - [int32x4_t, int64x2_t, '[2, 3]', int32x2_t] + compose: + - Let: + - c + - "{neon_type[3]}" + - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] + - Let: + - d + - "{neon_type[1]}" + - FnCall: [simd_cast, [c]] + - Let: + - e + - "{neon_type[3]}" + - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - Let: + - f + - "{neon_type[1]}" + - FnCall: [simd_cast, [e]] + - FnCall: [simd_sub, [d, f]] + + - name: "vsubl_high{neon_type[0].noq}" + doc: "Unsigned Subtract Long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [usubl] + safety: safe + types: + - [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t] + - [uint16x8_t, uint32x4_t, '[4, 5, 6, 7]', uint16x4_t] + - [uint32x4_t, uint64x2_t, '[2, 3]', uint32x2_t] + compose: + - Let: + - c + - "{neon_type[3]}" + - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] + - Let: + - d + - "{neon_type[1]}" + - FnCall: [simd_cast, [c]] + - Let: + - e + - "{neon_type[3]}" + - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - Let: + - f + - "{neon_type[1]}" + - FnCall: [simd_cast, [e]] + - FnCall: [simd_sub, [d, f]] + + - name: "vbcax{neon_type.no}" + doc: Bit clear and exclusive OR + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + assert_instr: [bcax] + safety: safe + types: + - int8x16_t + - int16x8_t + - int32x4_t + - int64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.crypto.bcaxs.{neon_type}" + links: + - link: "llvm.aarch64.crypto.bcaxs.{neon_type}" + arch: aarch64,arm64ec + + - name: "vbcax{neon_type.no}" + doc: Bit clear and exclusive OR + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + assert_instr: [bcax] + safety: safe + types: + - uint8x16_t + - uint16x8_t + - uint32x4_t + - uint64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.crypto.bcaxu.{neon_type}" + links: + - link: "llvm.aarch64.crypto.bcaxu.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcadd{neon_type.rot270}" + doc: "Floating-point complex add" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-unstable-fcma + assert_instr: [fcadd] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcadd.rot270.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcadd.rot270.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcadd{neon_type.rot90}" + doc: "Floating-point complex add" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-unstable-fcma + assert_instr: [fcadd] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcadd{neon_type.rot270}" + doc: "Floating-point complex add" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *enable-fcma + - *neon-unstable-f16 + assert_instr: [fcadd] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vcadd.rot270.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcadd.rot270.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcadd{neon_type.rot90}" + doc: "Floating-point complex add" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *enable-fcma + - *neon-unstable-f16 + assert_instr: [fcadd] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vcadd.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.no}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-unstable-fcma + assert_instr: [fcmla] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.no}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.rot90}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-unstable-fcma + assert_instr: [fcmla] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.rot90}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.rot270}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-unstable-fcma + assert_instr: [fcmla] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vcmla{neon_type.rot270}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type[0].laneq_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + + - name: "vcmla{neon_type[0].laneq_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot90_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot90_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot90_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot90_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + + - name: "vcmla{neon_type.rot180}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-unstable-fcma + assert_instr: [fcmla] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vcmla{neon_type.rot180}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vcmla{neon_type[0].rot180_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot180_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, + '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]' + ] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + + - name: "vcmla{type[3]}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f32'] + - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f32'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + + - name: "vcmla{type[3]}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f16'] + - [float16x8_t, float16x4_t, + '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f16' + ] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot270_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot270_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + + - name: "vcmla{neon_type[0].lane_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + + + - name: "vcmla{neon_type[0].lane_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot270_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + + + - name: "vcmla{neon_type[0].rot270_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + + - name: "vdot{neon_type[0].laneq_nox}" + doc: Dot product arithmetic (indexed) + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + static_defs: ["const LANE: i32"] + attr: + - FnCall: [target_feature, ['enable = "neon,dotprod"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']] + safety: safe + types: + - [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]'] + - [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '2']] + - Let: + - c + - "{neon_type[3]}" + - FnCall: [transmute, [c]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + - FnCall: + - "vdot{neon_type[0].no}" + - - a + - b + - FnCall: [transmute, [c]] + + - name: "vdot{neon_type[0].laneq_nox}" + doc: Dot product arithmetic (indexed) + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + static_defs: ["const LANE: i32"] + attr: + - FnCall: [target_feature, ['enable = "neon,dotprod"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']] + safety: safe + types: + - [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]'] + - [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '2']] + - Let: + - c + - "{neon_type[3]}" + - FnCall: [transmute, [c]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + - FnCall: + - "vdot{neon_type[0].no}" + - - a + - b + - FnCall: [transmute, [c]] + + - name: "vmax{neon_type.no}" + doc: Maximum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [fmax] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "fmax.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmax.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vmaxh_{type}" + doc: Maximum (vector) + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmax] + safety: safe + types: + - f16 + compose: + - LLVMLink: + name: "vmaxh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmax.{type}" + arch: aarch64,arm64ec + + + + - name: "vmaxnm{neon_type.no}" + doc: Floating-point Maximum Number (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [fmaxnm] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "fmaxnm.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxnm.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vmaxnmh_{type}" + doc: Floating-point Maximum Number + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxnm] + safety: safe + types: + - f16 + compose: + - LLVMLink: + name: "vmaxh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxnm.{type}" + arch: aarch64,arm64ec + + + - name: "vminnmh_{type}" + doc: Floating-point Minimum Number + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminnm] + safety: safe + types: + - f16 + compose: + - LLVMLink: + name: "vminh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminnm.{type}" + arch: aarch64,arm64ec + + + - name: "vmaxnmv{neon_type[0].no}" + doc: Floating-point maximum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [fmaxnmp] + safety: safe + types: + - [float32x2_t, f32] + - [float64x2_t, f64] + compose: + - LLVMLink: + name: "fmaxnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vmaxnmv{neon_type[0].no}" + doc: Floating-point maximum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [fmaxnmv] + safety: safe + types: + - [float32x4_t, f32] + compose: + - LLVMLink: + name: "fmaxnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vmaxnmv{neon_type[0].no}" + doc: Floating-point maximum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxnmv] + safety: safe + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fmaxnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vminnmv{neon_type[0].no}" + doc: Floating-point minimum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminnmv] + safety: safe + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fminnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vmaxv{neon_type[0].no}" + doc: Floating-point maximum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxv] + safety: safe + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fmaxv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vminv{neon_type[0].no}" + doc: Floating-point minimum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminv] + safety: safe + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fminv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fminv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vpmax{type[0]}" + doc: "Floating-point maximum pairwise" + arguments: ["a: {neon_type[1]}"] + return_type: "{type[2]}" + attr: [*neon-stable] + assert_instr: [fmaxp] + safety: safe + types: + - ["s_f32", float32x2_t, f32] + - ["qd_f64", float64x2_t, f64] + compose: + - LLVMLink: + name: "fmaxv.{type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxv.{type[2]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vmin{neon_type.no}" + doc: "Minimum (vector)" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [fmin] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "fmin.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmin.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vminh_{type}" + doc: Minimum (vector) + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmin] + safety: safe + types: + - f16 + compose: + - LLVMLink: + name: "vminh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmin.{type}" + arch: aarch64,arm64ec + + + - name: "vminnm{neon_type.no}" + doc: "Floating-point Minimum Number (vector)" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [fminnm] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "fminnm.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminnm.{neon_type}" + arch: aarch64,arm64ec + + - name: "vminnmv{neon_type[0].no}" + doc: "Floating-point minimum number across vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, "f32"] + - [float64x2_t, "f64"] + compose: + - LLVMLink: + name: "vminnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vminnmv{neon_type[0].no}" + doc: "Floating-point minimum number across vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmv]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x4_t, "f32"] + compose: + - LLVMLink: + name: "vminnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vmovl_high{neon_type[0].noq}" + doc: Vector move + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [sxtl2] + safety: safe + types: + - [int8x16_t, int16x8_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]'] + - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]'] + compose: + - Let: + - a + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]] + - FnCall: ["vmovl{neon_type[0].noq}", [a]] + + - name: "vmovl_high{neon_type[0].noq}" + doc: Vector move + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: [*neon-stable] + assert_instr: [uxtl2] + safety: safe + types: + - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]'] + - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]'] + compose: + - Let: + - a + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]] + - FnCall: ["vmovl{neon_type[0].noq}", [a]] + + - name: "vpadd{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: [*neon-stable] + assert_instr: [faddp] + safety: safe + types: + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "faddp.{neon_type}" + links: + - link: "llvm.aarch64.neon.faddp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpadd{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [faddp] + safety: safe + types: + - float16x8_t + compose: + - LLVMLink: + name: "faddp.{neon_type}" + links: + - link: "llvm.aarch64.neon.faddp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpmax{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxp] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmaxp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpmaxnm{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxnmp] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmaxnmp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxnmp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpmin{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminp] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fminp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpminnm{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminnmp] + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fminnmp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminnmp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpadd{type[0]}" + doc: "Floating-point add pairwise" + arguments: ["a: {neon_type[1]}"] + return_type: "{type[2]}" + attr: [*neon-stable] + assert_instr: [nop] + safety: safe + types: + - ["s_f32", float32x2_t, f32] + - ["d_f64", float64x2_t, f64] + compose: + - Let: + - a1 + - "{type[2]}" + - FnCall: [simd_extract!, [a, '0']] + - Let: + - a2 + - "{type[2]}" + - FnCall: [simd_extract!, [a, '1']] + - Identifier: ['a1 + a2', Symbol] + + - name: "vpmin{type[0]}" + doc: Floating-point minimum pairwise + arguments: ["a: {neon_type[1]}"] + return_type: "{type[2]}" + attr: [*neon-stable] + assert_instr: [fminp] + safety: safe + types: + - ["s_f32", float32x2_t, f32] + - ["qd_f64", float64x2_t, f64] + compose: + - LLVMLink: + name: "fminv.{type[2]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fminv.{type[2]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vqdmullh_s16" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i16", "i32"] + compose: + - Let: [a, int16x4_t, {FnCall: [vdup_n_s16, [a]]}] + - Let: [b, int16x4_t, {FnCall: [vdup_n_s16, [b]]}] + - FnCall: [simd_extract!, [{FnCall: [vqdmull_s16, [a, b]]}, '0']] + + - name: "vqdmulls_s32" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i32", "i64"] + compose: + - LLVMLink: + name: "vqdmulls_s32" + links: + - link: "llvm.aarch64.neon.sqdmulls.scalar" + arch: aarch64,arm64ec + + - name: "vqdmull_high{neon_type[0].noq}" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]'] + - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]'] + compose: + - Let: [a, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, '{type[3]}']]}] + - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, '{type[3]}']]}] + - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] + + - name: "vqdmull_high_n_{type[1]}" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int16x8_t, "i16", int32x4_t, int16x4_t, '[4, 5, 6, 7]'] + - [int32x4_t, "i32", int64x2_t, int32x2_t, '[2, 3]'] + compose: + - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] + - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}] + - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] + + - name: "vqdmull{type[3]}" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["i16", int16x4_t, "i32", 'h_lane_s16', 'h_s16'] + - ["i32", int32x4_t, "i64", 's_laneq_s32', 's_s32'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 2]] + - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}] + - FnCall: ["vqdmull{type[4]}", [a, b]] + + - name: "vqdmullh_laneq_s16" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, N = 4]]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["i16", int16x8_t, "i32"] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 3]] + - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}] + - FnCall: ["vqdmullh_s16", [a, b]] + + - name: "vqdmulls_lane_s32" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["i32", int32x2_t, "i64"] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 1]] + - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}] + - FnCall: ["vqdmulls_s32", [a, b]] + + - name: "vqdmull{type[6]}" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, int16x4_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]', '_high_lane_s16'] + - [int32x4_t, int32x4_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]', '_high_laneq_s32'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '2']] + - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] + - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}] + - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] + + - name: "vqdmull_high_lane_s32" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x4_t, int32x2_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '1']] + - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] + - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}] + - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] + + - name: "vqdmull_high_laneq_s16" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '3']] + - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] + - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}] + - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] + + - name: "vqdmull_laneq_s16" + doc: "Vector saturating doubling long multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x4_t, int16x8_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '3']] + - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - FnCall: [vqdmull_s16, [a, b]] + + - name: "vqdmull_laneq_s32" + doc: "Vector saturating doubling long multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x2_t, int32x4_t, int64x2_t, '[N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '2']] + - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - FnCall: [vqdmull_s32, [a, b]] + + - name: "vqdmlal{type[4]}" + doc: "Signed saturating doubling multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16] + - [int64x2_t, int32x4_t, int32x4_t, int64x2_t, _high_s32] + - [int32x4_t, int16x8_t, "i16", int32x4_t, _high_n_s16] + - [int64x2_t, int32x4_t, "i32", int64x2_t, _high_n_s32] + compose: + - FnCall: ["vqadd{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}", [b, c]]}]] + + - name: "vqdmlal{type[4]}" + doc: "Signed saturating doubling multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x4_t, int16x8_t, int16x4_t, int32x4_t, _high_lane_s16, '2'] + - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_laneq_s16, '3'] + - [int64x2_t, int32x4_t, int32x2_t, int64x2_t, _high_lane_s32, '1'] + - [int64x2_t, int32x4_t, int32x4_t, int64x2_t, _high_laneq_s32, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[5]}"]] + - FnCall: ["vqadd{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}::", [b, c]]}]] + + - name: "vqdmlalh_{type[2]}" + doc: "Signed saturating doubling multiply-add long" + arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i32", "i16", "s16"] + compose: + - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}] + - FnCall: [vqadds_s32, [a, {FnCall: [simd_extract!, [x, 0]]}]] + + - name: "vqdmlals_s32" + doc: "Signed saturating doubling multiply-add long" + arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "i32", "i32", "i64"] + compose: + - Let: [x, i64, {FnCall: [vqaddd_s64, [a, {FnCall: [vqdmulls_s32, [b, c]]}]]}] + - Identifier: ['x as i64', Symbol] + + - name: "vqdmlal{type[4]}" + doc: "Signed saturating doubling multiply-add long" + arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["i32", "i16", int16x4_t, "i32", h_lane_s16, '2', h_s16] + - ["i32", "i16", int16x8_t, "i32", h_laneq_s16, '3', h_s16] + - ["i64", "i32", int32x2_t, "i64", s_lane_s32, '1', s_s32] + - ["i64", "i32", int32x4_t, "i64", s_laneq_s32, '2', s_s32] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + - name: "vqdmlal_laneq_s16" + doc: "Vector widening saturating doubling multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x4_t, int16x4_t, int16x8_t, int32x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '3']] + - FnCall: [vqaddq_s32, [a, {FnCall: ["vqdmull_laneq_s16::", [b, c]]}]] + + - name: "vqdmlal_laneq_s32" + doc: "Vector widening saturating doubling multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int64x2_t, int32x2_t, int32x4_t, int64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '2']] + - FnCall: [vqaddq_s64, [a, {FnCall: ["vqdmull_laneq_s32::", [b, c]]}]] + + - name: "vqdmlsl{type[4]}" + doc: "Signed saturating doubling multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16] + - [int64x2_t, int32x4_t, int32x4_t, int64x2_t, _high_s32] + - [int32x4_t, int16x8_t, "i16", int32x4_t, _high_n_s16] + - [int64x2_t, int32x4_t, "i32", int64x2_t, _high_n_s32] + compose: + - FnCall: ["vqsub{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}", [b, c]]}]] + + - name: "vqdmlsl{type[4]}" + doc: "Signed saturating doubling multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x4_t, int16x8_t, int16x4_t, int32x4_t, '_high_lane_s16', '2'] + - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, '_high_laneq_s16', '3'] + - [int64x2_t, int32x4_t, int32x2_t, int64x2_t, '_high_lane_s32', '1'] + - [int64x2_t, int32x4_t, int32x4_t, int64x2_t, '_high_laneq_s32', '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[5]}"]] + - FnCall: ["vqsub{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}::", [b, c]]}]] + + - name: "vqdmlslh_s16" + doc: "Signed saturating doubling multiply-subtract long" + arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i32", "i16"] + compose: + - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}] + - FnCall: [vqsubs_s32, [a, {FnCall: [simd_extract!, [x, '0']]}]] + + - name: "vqdmlsls_s32" + doc: "Signed saturating doubling multiply-subtract long" + arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "i32", "i32", "i64"] + compose: + - Let: [x, i64, {FnCall: [vqsubd_s64, [a, {FnCall: [vqdmulls_s32, [b, c]]}]]}] + - Identifier: ['x as i64', Symbol] + + - name: "vqdmlsl{type[4]}" + doc: "Signed saturating doubling multiply-subtract long" + arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["i32", "i16", int16x4_t, "i32", 'h_lane_s16', '2', 'h_s16'] + - ["i32", "i16", int16x8_t, "i32", 'h_laneq_s16', '3', 'h_s16'] + - ["i64", "i32", int32x2_t, "i64", 's_lane_s32', '1', 's_s32'] + - ["i64", "i32", int32x4_t, "i64", 's_laneq_s32', '2', 's_s32'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + - name: "vqdmlsl_laneq_s16" + doc: "Vector widening saturating doubling multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x4_t, int16x4_t, int16x8_t, int32x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '3']] + - FnCall: ["vqsubq_s32", [a, {FnCall: ["vqdmull_laneq_s16::", [b, c]]}]] + + - name: "vqdmlsl_laneq_s32" + doc: "Vector widening saturating doubling multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int64x2_t, int32x2_t, int32x4_t, int64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '2']] + - FnCall: [vqsubq_s64, [a, {FnCall: ["vqdmull_laneq_s32::", [b, c]]}]] + + - name: "vqdmulh{type[4]}" + doc: "Signed saturating doubling multiply returning high half" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i16", "i16", "i16", int16x4_t, 'h_s16'] + - ["i32", "i32", "i32", int32x2_t, 's_s32'] + compose: + - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [a]]}] + - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [b]]}] + - FnCall: [simd_extract!, [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}, '0']] + + - name: "vqdmulhh{type[3]}" + doc: "Signed saturating doubling multiply returning high half" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["i16", int16x4_t, "i16", '_lane_s16', '2'] + - ["i16", int16x8_t, "i16", '_laneq_s16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]] + - Let: [b, 'i16', {FnCall: [simd_extract!, [b, 'N as u32']]}] + - FnCall: ['vqdmulhh_s16', [a, b]] + + - name: "vqdmulhs{type[3]}" + doc: "Signed saturating doubling multiply returning high half" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["i32", int32x2_t, "i32", "_lane_s32", '1'] + - ["i32", int32x4_t, "i32", "_laneq_s32", '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]] + - Let: [b, 'i32', {FnCall: [simd_extract!, [b, 'N as u32']]}] + - FnCall: ['vqdmulhs_s32', [a, b]] + + - name: "vqmovn_high{neon_type[1].noq}" + doc: "Signed saturating extract narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] + compose: + - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]] + + - name: "vqmovn_high{neon_type[1].noq}" + doc: "Signed saturating extract narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + compose: + - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]] + + - name: "vqmovn{type[2]}" + doc: "Saturating extract narrow" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i16", "i8", 'h_s16', s16] + - ["i32", "i16", 's_s32', s32] + compose: + - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']] + + - name: "vqmovn{type[2]}" + doc: "Saturating extract narrow" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["u16", "u8", 'h_u16', 'u16'] + - ["u32", "u16", 's_u32', 'u32'] + compose: + - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']] + + - name: "vqmovnd_s64" + doc: "Saturating extract narrow" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "i32"] + compose: + - LLVMLink: + name: "vqmovnd_s64" + links: + - link: "llvm.aarch64.neon.scalar.sqxtn.i32.i64" + arch: aarch64,arm64ec + + - name: "vqmovnd_u64" + doc: "Saturating extract narrow" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["u64", "u32"] + compose: + - LLVMLink: + name: "vqmovnd_u64" + links: + - link: "llvm.aarch64.neon.scalar.uqxtn.i32.i64" + arch: aarch64,arm64ec + + - name: "vqmovun{type[2]}" + doc: "Signed saturating extract unsigned narrow" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i16", "u8", 'h_s16', s16] + - ["i32", "u16", 's_s32', s32] + - ["i64", "u32", 'd_s64', s64] + compose: + - FnCall: [simd_extract!, [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']] + + - name: "vqmovun_high_{neon_type[1]}" + doc: "Signed saturating extract unsigned narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, int16x8_t, uint8x16_t, s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, int32x4_t, uint16x8_t, s32, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, int64x2_t, uint32x4_t, s64, '[0, 1, 2, 3]'] + compose: + - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovun_{type[3]}", [b]]}, "{type[4]}"]] + + - name: "vqrdmulh{type[1]}" + doc: "Signed saturating rounding doubling multiply returning high half" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i16", 'h_s16', 's16'] + - ["i32", 's_s32', 's32'] + compose: + - FnCall: [simd_extract!, [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}, '0']] + + - name: "vqrdmulh{type[2]}" + doc: "Signed saturating rounding doubling multiply returning high half" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh, LANE = 1]]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["i16", int16x4_t, 'h_lane_s16', 'h_s16', '2'] + - ["i16", int16x8_t, 'h_laneq_s16', 'h_s16', '3'] + - ["i32", int32x2_t, 's_lane_s32', 's_s32', '1'] + - ["i32", int32x4_t, 's_laneq_s32', 's_s32', '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]] + - FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + - name: "vqrdmlah{neon_type.no}" + doc: "Signed saturating rounding doubling multiply accumulate returning high half" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "rdm"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlah]]}]] + - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']] + safety: safe + types: + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + compose: + - LLVMLink: + name: "vqrdmlah{neon_type.no}" + links: + - link: "llvm.aarch64.neon.sqrdmlah.{neon_type}" + arch: aarch64,arm64ec + + - name: "vqrdmlah{type[3]}" + doc: "Signed saturating rounding doubling multiply accumulate returning high half" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "rdm"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlah]]}]] + - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']] + safety: safe + types: + - ["i16", int16x4_t, s16, 'h_s16'] + - ["i32", int32x2_t, s32, 's_s32'] + compose: + - Let: [a, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [a]]}] + - Let: [b, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [b]]}] + - Let: [c, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [c]]}] + - FnCall: [simd_extract!, [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}, '0']] + + - name: "vqrdmlah{type[0]}" + doc: "Signed saturating rounding doubling multiply accumulate returning high half" + arguments: ["a: {type[1]}", "b: {type[2]}", "c: {neon_type[3]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [target_feature, ['enable = "rdm"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlah, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] + - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}] + - FnCall: ["vqrdmlah{neon_type[2].no}", [a, b, c]] + + - name: "vqrdmlah{type[4]}" + doc: "Signed saturating rounding doubling multiply accumulate returning high half" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "rdm"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlah, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["i16", int16x4_t, '2', "h_s16", h_lane_s16, h_s16] + - ["i16", int16x8_t, '3', "h_s16", h_laneq_s16, h_s16] + - ["i32", int32x2_t, '1', "s_s32", s_lane_s32, s_s32] + - ["i32", int32x4_t, '2', "s_s32", s_laneq_s32, s_s32] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + - name: "vqrdmlsh{neon_type.no}" + doc: "Signed saturating rounding doubling multiply subtract returning high half" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "rdm"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlsh]]}]] + - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']] + safety: safe + types: + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + compose: + - LLVMLink: + name: "vqrdmlsh{neon_type.no}" + links: + - link: "llvm.aarch64.neon.sqrdmlsh.{neon_type}" + arch: aarch64,arm64ec + + - name: "vqrdmlsh{type[1]}" + doc: "Signed saturating rounding doubling multiply subtract returning high half" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "rdm"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlsh]]}]] + - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']] + safety: safe + types: + - ["i16", "h_s16", int16x4_t, s16] + - ["i32", "s_s32", int32x2_t, s32] + compose: + - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}] + - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}] + - Let: [c, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [c]]}] + - FnCall: [simd_extract!, [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}, '0']] + + - name: "vqrdmlsh{type[0]}" + doc: "Signed saturating rounding doubling multiply subtract returning high half" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[3]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [target_feature, ['enable = "rdm"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlsh, LANE = 1]]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] + - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}] + - FnCall: ["vqrdmlsh{neon_type[2].no}", [a, b, c]] + + - name: "vqrdmlsh{type[3]}" + doc: "Signed saturating rounding doubling multiply subtract returning high half" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "rdm"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlsh, LANE = 1]]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["i16", int16x4_t, '2', h_lane_s16, h_s16] + - ["i16", int16x8_t, '3', h_laneq_s16, h_s16] + - ["i32", int32x2_t, '1', s_lane_s32, s_s32] + - ["i32", int32x4_t, '2', s_laneq_s32, s_s32] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + - name: "vqrshl{type[0]}" + doc: "Signed saturating rounding shift left" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['s_s32', "i32"] + - ['d_s64', "i64"] + compose: + - LLVMLink: + name: "vqrshl{type[0]}" + links: + - link: "llvm.aarch64.neon.sqrshl.{type[1]}" + arch: aarch64,arm64ec + + - name: "vqrshl{type[1]}" + doc: "Signed saturating rounding shift left" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i8", 'b_s8', int8x8_t, s8] + - ["i16", 'h_s16', int16x4_t, s16] + compose: + - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}] + - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}] + - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[3]}", [a, b]]}, '0']] + + - name: "vqrshl{type[2]}" + doc: "Unsigned signed saturating rounding shift left" + arguments: ["a: {type[0]}", "b: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["u32", "i32", 's_u32'] + - ["u64", "i64", 'd_u64'] + compose: + - LLVMLink: + name: "vqrshl{type[2]}" + links: + - link: "llvm.aarch64.neon.uqrshl.{type[1]}" + arch: aarch64,arm64ec + + - name: "vqrshl{type[2]}" + doc: "Unsigned signed saturating rounding shift left" + arguments: ["a: {type[0]}", "b: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["u8", "i8", "b_u8", uint8x8_t, int8x8_t, s8] + - ["u16", "i16", "h_u16", uint16x4_t, int16x4_t, s16] + compose: + - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n_{type[0]}", [a]]}] + - Let: [b, "{neon_type[4]}", {FnCall: ["vdup_n_{type[5]}", [b]]}] + - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[0]}", [a, b]]}, '0']] + + - name: "vqrshrn{type[2]}" + doc: "Signed saturating rounded shift right narrow" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["i16", "i8", 'h_n_s16', 'N >= 1 && N <= 8', int16x8_t, q_n_s16] + - ["i32", "i16", 's_n_s32', 'N >= 1 && N <= 16', int32x4_t, q_n_s32] + - ["i64", "i32", 'd_n_s64', 'N >= 1 && N <= 32', int64x2_t, q_n_s64] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}] + - FnCall: [simd_extract!, [{FnCall: ["vqrshrn_n{neon_type[4].noq}::", [a]]}, '0']] + + - name: "vqrshrn{type[3]}" + doc: "Signed saturating rounded shift right narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', 'N >= 1 && N <= 8'] + - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', '[0, 1, 2, 3, 4, 5, 6, 7]', 'N >= 1 && N <= 16'] + - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', '[0, 1, 2, 3]', 'N >= 1 && N <= 32'] + compose: + - FnCall: [static_assert!, ["{type[5]}"]] + - FnCall: [simd_shuffle!, [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::", [b]]}, "{type[4]}"]] + + - name: "vqrshrn{type[0]}" + doc: "Unsigned saturating rounded shift right narrow" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [h_n_u16, u16, u8, 'N >= 1 && N <= 8', uint16x8_t, q_n_u16, _n_u16] + - [s_n_u32, u32, u16, 'N >= 1 && N <= 16', uint32x4_t, q_n_u32, _n_u32] + - [d_n_u64, u64, u32, 'N >= 1 && N <= 32', uint64x2_t, q_n_u64, _n_u64] + compose: + - FnCall: [static_assert!, ['{type[3]}']] + - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}] + - FnCall: [simd_extract!, [{FnCall: ["vqrshrn{type[6]}::", [a]]}, '0']] + + - name: "vqrshrn_high_n{neon_type[1].noq}" + doc: "Unsigned saturating rounded shift right narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + compose: + - FnCall: [static_assert!, ['{type[3]}']] + - FnCall: + - simd_shuffle! + - - a + - FnCall: + - "vqrshrn_n{neon_type[1].noq}::" + - - b + - "{type[4]}" + + - name: "vqrshrun{type[0]}" + doc: "Signed saturating rounded shift right unsigned narrow" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [h_n_s16, "i16", "u8", 'N >= 1 && N <= 8', int16x8_t, s16] + - [s_n_s32, "i32", "u16", 'N >= 1 && N <= 16', int32x4_t, s32] + - [d_n_s64, "i64", "u32", 'N >= 1 && N <= 32', int64x2_t, s64] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - Let: + - a + - "{neon_type[4]}" + - FnCall: ["vdupq_n_{type[5]}", [a]] + - FnCall: + - simd_extract! + - - FnCall: + - "vqrshrun_n_{type[5]}::" + - - a + - '0' + + - name: "vqrshrun_high_n{neon_type[1].noq}" + doc: "Signed saturating rounded shift right unsigned narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', s32, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', s64, '[0, 1, 2, 3]'] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - FnCall: + - simd_shuffle! + - - a + - FnCall: + - "vqrshrun_n_{type[4]}::" + - - b + - "{type[5]}" + + - name: "vqshld_{type}" + doc: "Signed saturating shift left" + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - i64 + compose: + - LLVMLink: + name: "vqshld{type}" + links: + - link: "llvm.aarch64.neon.sqshl.{type}" + arch: aarch64,arm64ec + + - name: "vqshl{type[0]}" + doc: "Signed saturating shift left" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [b_s8, "i8", int8x8_t] + - [h_s16, "i16", int16x4_t] + - [s_s32, "i32", int32x2_t] + compose: + - Let: + - c + - "{neon_type[2]}" + - FnCall: + - "vqshl{neon_type[2].noq}" + - - FnCall: ["vdup_n{neon_type[2].no}", [a]] + - FnCall: ["vdup_n{neon_type[2].no}", [b]] + - FnCall: [simd_extract!, [c, '0']] + + - name: "vqshl{type[0]}" + doc: "Signed saturating shift left" + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [b_n_s8, "i8", "3", s8] + - [h_n_s16, "i16", "4", s16] + - [s_n_s32, "i32", "5", s32] + - [d_n_s64, "i64", "6", s64] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]] + - FnCall: + - simd_extract! + - - FnCall: + - "vqshl_n_{type[3]}::" + - - FnCall: ["vdup_n_{type[3]}", [a]] + - '0' + + - name: "vqshld_{type[0]}" + doc: "Unsigned saturating shift left" + arguments: ["a: {type[0]}", "b: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["u64", "i64"] + compose: + - LLVMLink: + name: "vqshld{type[0]}" + links: + - link: "llvm.aarch64.neon.uqshl.{type[1]}" + arch: aarch64,arm64ec + + - name: "vqshl{type[0]}" + doc: "Unsigned saturating shift left" + arguments: ["a: {type[1]}", "b: {type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [b_u8, "u8", "i8", uint8x8_t, int8x8_t] + - [h_u16, "u16", "i16", uint16x4_t, int16x4_t] + - [s_u32, "u32", "i32", uint32x2_t, int32x2_t] + compose: + - Let: + - c + - "{neon_type[3]}" + - FnCall: + - "vqshl{neon_type[3].noq}" + - - FnCall: ["vdup{neon_type[3].N}", [a]] + - FnCall: ["vdup{neon_type[4].N}", [b]] + - FnCall: [simd_extract!, [c, '0']] + + - name: "vqshl{type[0]}" + doc: "Unsigned saturating shift left" + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [b_n_u8, "u8", '3'] + - [h_n_u16, "u16", '4'] + - [s_n_u32, "u32", '5'] + - [d_n_u64, "u64", '6'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]] + - FnCall: + - simd_extract! + - - FnCall: ["vqshl_n_{type[1]}::", [{FnCall: ["vdup_n_{type[1]}", [a]]}]] + - '0' + + - name: "vqshrnd_n_s64" + doc: "Signed saturating shift right narrow" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["i64", "i32"] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vqshrnd{type[1]}" + arguments: + - "a: {type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.sqshrn.{type[1]}" + arch: aarch64,arm64ec + - FnCall: ["_vqshrnd_n_s64", [a, N], [], true] + + - name: "vqshrn{type[0]}" + doc: "Signed saturating shift right narrow" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [h_n_s16, "i16", "i8", 'N >= 1 && N <= 8', s16] + - [s_n_s32, "i32", "i16", 'N >= 1 && N <= 16', s32] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - FnCall: + - simd_extract! + - - FnCall: + - "vqshrn_n_{type[4]}::" + - - FnCall: ["vdupq_n_{type[4]}", [a]] + - '0' + + - name: "vqshrn{type[0]}" + doc: "Signed saturating shift right narrow" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', s16] + - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]', s32] + - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]', s64] + compose: + - FnCall: [static_assert!, ["{type[4]}"]] + - FnCall: + - simd_shuffle! + - - a + - FnCall: ["vqshrn_n_{type[6]}::", [b]] + - "{type[5]}" + + - name: "vqshrnd_n_u64" + doc: "Unsigned saturating shift right narrow" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["u64", "u32"] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vqshrnd_n_u64" + arguments: + - "a: u64" + - "n: i32" + links: + - link: "llvm.aarch64.neon.uqshrn.i32" + arch: aarch64,arm64ec + - FnCall: ["_vqshrnd_n_u64", ["a", N], [], true] + + - name: "vqshrn{type[0]}" + doc: "Unsigned saturating shift right narrow" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ['h_n_u16', "u16", "u8", 'N >= 1 && N <= 8'] + - ['s_n_u32', "u32", "u16", 'N >= 1 && N <= 16'] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - FnCall: + - "simd_extract!" + - - FnCall: + - "vqshrn_n_{type[1]}::" + - - FnCall: ["vdupq_n_{type[1]}", [a]] + - '0' + + - name: "vqshrn{type[0]}" + doc: "Unsigned saturating shift right narrow" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + compose: + - FnCall: [static_assert!, ["{type[4]}"]] + - FnCall: + - simd_shuffle! + - - a + - FnCall: ["vqshrn_n_{neon_type[2]}::", [b]] + - "{type[5]}" + + - name: "vqshrun{type[0]}" + doc: "Signed saturating shift right unsigned narrow" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [h_n_s16, "i16", "u8", 'N >= 1 && N <= 8', s16] + - [s_n_s32, "i32", "u16", 'N >= 1 && N <= 16', s32] + - [d_n_s64, "i64", "u32", 'N >= 1 && N <= 32', s64] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - FnCall: + - simd_extract! + - - FnCall: + - "vqshrun_n_{type[4]}::" + - - FnCall: ["vdupq_n_{type[4]}", [a]] + - '0' + + - name: "vqshrun_high_n_{neon_type[1]}" + doc: "Signed saturating shift right unsigned narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - FnCall: + - simd_shuffle! + - - a + - FnCall: ["vqshrun_n_{neon_type[1]}::", [b]] + - "{type[4]}" + + - name: "vsqadd{type[0]}" + doc: "Unsigned saturating accumulate of signed value" + arguments: ["a: {type[1]}", "b: {type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [b_u8, "u8", "i8", s8] + - [h_u16, "u16", "i16", s16] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vsqadd_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[2]}", [b]] + - '0' + + - name: "vsqadd{type[0]}" + doc: "Unsigned saturating accumulate of signed value" + arguments: ["a: {type[1]}", "b: {type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [s_u32, "u32", "i32"] + - [d_u64, "u64", "i64"] + compose: + - LLVMLink: + name: "vsqadd{type[0]}" + links: + - link: "llvm.aarch64.neon.usqadd.{type[2]}" + arch: aarch64,arm64ec + + - name: "vsqrt{neon_type.no}" + doc: "Calculates the square root of each lane." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_fsqrt, [a]] + + - name: "vsqrt{neon_type.no}" + doc: "Calculates the square root of each lane." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_fsqrt, [a]] + + - name: "vsqrt{type[1]}{type[0]}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fsqrt] + safety: safe + types: + - [f16, 'h_'] + compose: + - FnCall: [sqrtf16, [a], [], true] + + - name: "vrsqrts{type[0]}" + doc: "Floating-point reciprocal square root step" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [_f64, float64x1_t, v1f64] + - [q_f64, float64x2_t, v2f64] + compose: + - LLVMLink: + name: "vrsqrts{type[0]}" + links: + - link: "llvm.aarch64.neon.frsqrts.{type[2]}" + arch: aarch64,arm64ec + + - name: "vrsqrts{type[0]}" + doc: "Floating-point reciprocal square root step" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [s_f32, "f32"] + - [d_f64, "f64"] + compose: + - LLVMLink: + name: "vrsqrts{type[0]}" + links: + - link: "llvm.aarch64.neon.frsqrts.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vrsqrts{type[0]}" + doc: "Floating-point reciprocal square root step" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]] + - *neon-unstable-f16 + safety: safe + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrsqrts{type[0]}" + links: + - link: "llvm.aarch64.neon.frsqrts.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vrecpe{type[0]}" + doc: "Reciprocal estimate." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [_f64, float64x1_t, v1f64] + - [q_f64, float64x2_t, v2f64] + compose: + - LLVMLink: + name: "vrecpe{type[0]}" + links: + - link: "llvm.aarch64.neon.frecpe.{type[2]}" + arch: aarch64,arm64ec + + - name: "vrecpe{type[0]}" + doc: "Reciprocal estimate." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [s_f32, "f32"] + - [d_f64, "f64"] + compose: + - LLVMLink: + name: "vrecpe{type[0]}" + links: + - link: "llvm.aarch64.neon.frecpe.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vrecpe{type[0]}" + doc: "Reciprocal estimate." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrecpe{type[0]}" + links: + - link: "llvm.aarch64.neon.frecpe.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vrecps{type[0]}" + doc: "Floating-point reciprocal step" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [_f64, float64x1_t, v1f64] + - [q_f64, float64x2_t, v2f64] + compose: + - LLVMLink: + name: "vrecps{type[0]}" + links: + - link: "llvm.aarch64.neon.frecps.{type[2]}" + arch: aarch64,arm64ec + + - name: "vrecps{type[0]}" + doc: "Floating-point reciprocal step" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [s_f32, "f32"] + - [d_f64, "f64"] + compose: + - LLVMLink: + name: "vrecps{type[0]}" + links: + - link: "llvm.aarch64.neon.frecps.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vrecps{type[0]}" + doc: "Floating-point reciprocal step" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrecps{type[0]}" + links: + - link: "llvm.aarch64.neon.frecps.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vrecpx{type[0]}" + doc: "Floating-point reciprocal exponent" + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [s_f32, "f32"] + - [d_f64, "f64"] + compose: + - LLVMLink: + name: "vrecpxs{type[0]}" + links: + - link: "llvm.aarch64.neon.frecpx.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vrecpx{type[0]}" + doc: "Floating-point reciprocal exponent" + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrecpxs{type[0]}" + links: + - link: "llvm.aarch64.neon.frecpx.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [nop] + safety: safe + types: + - [poly64x1_t, int64x1_t] + - [poly64x1_t, uint64x1_t] + - [int64x1_t, poly64x1_t] + - [uint64x1_t, poly64x1_t] + - [poly64x2_t, int64x2_t] + - [poly64x2_t, uint64x2_t] + - [int64x2_t, poly64x2_t] + - [uint64x2_t, poly64x2_t] + - [float64x1_t, int8x8_t] + - [float64x1_t, int16x4_t] + - [float64x1_t, int32x2_t] + - [float64x1_t, int64x1_t] + - [float64x2_t, int8x16_t] + - [float64x2_t, int16x8_t] + - [float64x2_t, int32x4_t] + - [float64x2_t, int64x2_t] + - [float64x1_t, uint8x8_t] + - [float64x1_t, uint16x4_t] + - [float64x1_t, uint32x2_t] + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint8x16_t] + - [float64x2_t, uint16x8_t] + - [float64x2_t, uint32x4_t] + - [float64x2_t, uint64x2_t] + - [float64x1_t, poly8x8_t] + - [float64x1_t, poly16x4_t] + - [float32x2_t, poly64x1_t] + - [float64x1_t, poly64x1_t] + - [float64x2_t, poly8x16_t] + - [float64x2_t, poly16x8_t] + - [float32x4_t, poly64x2_t] + - [float64x2_t, poly64x2_t] + - [float64x2_t, p128] + - [int8x8_t, float64x1_t] + - [int16x4_t, float64x1_t] + - [int32x2_t, float64x1_t] + - [int64x1_t, float64x1_t] + - [int8x16_t, float64x2_t] + - [int16x8_t, float64x2_t] + - [int32x4_t, float64x2_t] + - [int64x2_t, float64x2_t] + - [poly8x8_t, float64x1_t] + - [uint16x4_t, float64x1_t] + - [uint32x2_t, float64x1_t] + - [uint64x1_t, float64x1_t] + - [poly8x16_t, float64x2_t] + - [uint16x8_t, float64x2_t] + - [uint32x4_t, float64x2_t] + - [uint64x2_t, float64x2_t] + - [uint8x8_t, float64x1_t] + - [poly16x4_t, float64x1_t] + - [poly64x1_t, float64x1_t] + - [poly64x1_t, float32x2_t] + - [uint8x16_t, float64x2_t] + - [poly16x8_t, float64x2_t] + - [poly64x2_t, float64x2_t] + - [poly64x2_t, float32x4_t] + - [p128, float64x2_t] + - [float32x2_t, float64x1_t] + - [float64x1_t, float32x2_t] + - [float32x4_t, float64x2_t] + - [float64x2_t, float32x4_t] + compose: + - FnCall: [transmute, [a]] + + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: safe + types: + - [float64x1_t, float16x4_t] + - [float16x4_t, float64x1_t] + # q + - [float64x2_t, float16x8_t] + - [float16x8_t, float64x2_t] + compose: + - FnCall: [transmute, [a]] + + + - name: "vrshld_s64" + doc: "Signed rounding shift left" + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - "i64" + compose: + - LLVMLink: + name: "vrshld_{type}" + links: + - link: "llvm.aarch64.neon.srshl.{type}" + arch: aarch64,arm64ec + + - name: "vrshld_{type[0]}" + doc: "Unsigned rounding shift left" + arguments: ["a: {type[0]}", "b: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["u64", "i64"] + compose: + - LLVMLink: + name: "vrshld_{type[0]}" + links: + - link: "llvm.aarch64.neon.urshl.{type[1]}" + arch: aarch64,arm64ec + + - name: "vrshrd_n_s64" + doc: "Signed rounding shift right" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - ["i64", 'N >= 1 && N <= 64', '-N as i64'] + compose: + - FnCall: [static_assert!, ["{type[1]}"]] + - FnCall: [vrshld_s64, [a, "{type[2]}"]] + + - name: "vrshrd_n_u64" + doc: "Unsigned rounding shift right" + arguments: ["a: {type}"] + return_type: "{type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - "u64" + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 64']] + - FnCall: ["vrshld_u64", [a, '-N as i64']] + + - name: "vrshrn_high_n_{neon_type[1]}" + doc: "Rounding shift right narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - FnCall: + - simd_shuffle! + - - a + - FnCall: ["vrshrn_n_{neon_type[1]}::", [b]] + - "{type[4]}" + + - name: "vrsubhn_high_{neon_type[1]}" + doc: "Rounding subtract returning high narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - *little-endian + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + compose: + - Let: + - x + - "{neon_type[0]}" + - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]] + - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]] + + - name: "vrsubhn_high_{neon_type[1]}" + doc: "Rounding subtract returning high narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - *big-endian + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + compose: + - Let: + - x + - "{neon_type[0]}" + - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]] + - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]] + + - name: "vcopy{neon_type[0].lane_nox}" + doc: "Insert vector element from another vector element" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [int8x8_t, int8x8_t, int8x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [int16x4_t, int16x4_t, int16x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [int32x2_t, int32x2_t, int32x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [float32x2_t, float32x2_t, float32x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']] + - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']] + - Identifier: ["{type[5]}", Symbol] + + - name: "vcopy{neon_type[0].lane_nox}" + doc: "Insert vector element from another vector element" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [int8x16_t, int8x8_t, int8x16_t, '4', '3', ' let b: int8x16_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [int16x8_t, int16x4_t, int16x8_t, '3', '2', ' let b: int16x8_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [int32x4_t, int32x2_t, int32x4_t, '2', '1', ' let b: int32x4_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3', ' let b: uint8x16_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2', ' let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1', ' let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3', ' let b: poly8x16_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2', ' let b: poly16x8_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']] + - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']] + - Identifier: ["{type[5]}", Symbol] + - Identifier: ["{type[6]}", Symbol] + + - name: "vcopy{neon_type[0].laneq_nox}" + doc: "Insert vector element from another vector element" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [int8x16_t, int8x16_t, int8x16_t, '4', '4', ' unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [int16x8_t, int16x8_t, int16x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [int32x4_t, int32x4_t, int32x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [int64x2_t, int64x2_t, int64x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4', ' unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4', ' unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [float32x4_t, float32x4_t, float32x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [float64x2_t, float64x2_t, float64x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']] + - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']] + - Identifier: ["{type[5]}", Symbol] + + - name: "vcopy{neon_type[0].laneq_nox}" + doc: "Insert vector element from another vector element" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [int8x8_t, int8x16_t, int8x8_t, '3', '4', ' let a: int8x16_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [int16x4_t, int16x8_t, int16x4_t, '2', '3', ' let a: int16x8_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [int32x2_t, int32x4_t, int32x2_t, '1', '2', ' let a: int32x4_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4', ' let a: uint8x16_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3', ' let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2', 'let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4', ' let a: poly8x16_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3', ' let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [float32x2_t, float32x4_t, float32x2_t, '1', '2', ' let a: float32x4_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']] + - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']] + - Identifier: ["{type[5]}", Symbol] + - Identifier: ["{type[6]}", Symbol] + + - name: "vcopyq_lane_{neon_type[0]}" + doc: "Insert vector element from another vector element" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [int64x2_t, int64x1_t, ' let b: int64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [uint64x2_t, uint64x1_t, ' let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [poly64x2_t, poly64x1_t, ' let b: poly64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + - [float64x2_t, float64x1_t, ' let b: float64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE1, '1']] + - FnCall: [static_assert!, ['LANE2 == 0']] + - Identifier: ['{type[2]}', Symbol] + - Identifier: ['{type[3]}', Symbol] + + - name: "vcopyq_lane_f32" + doc: "Insert vector element from another vector element" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [float32x4_t, float32x2_t, ' let b: float32x4_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE1, 2]] + - FnCall: [static_assert_uimm_bits!, [LANE2, 1]] + - Identifier: ["{type[2]}", Symbol] + - Identifier: ["{type[3]}", Symbol] + + - name: "vcreate_f64" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["u64", float64x1_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vset_lane_f64" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f64", float64x1_t, float64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + - name: "vsetq_lane_f64" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f64", float64x2_t, float64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + - name: "vshld_s64" + doc: "Signed Shift left" + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - "i64" + compose: + - FnCall: + - transmute + - - FnCall: + - vshl_s64 + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vshld_{type[0]}" + doc: "Unsigned Shift left" + arguments: ["a: {type[0]}", "b: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["u64", "i64"] + compose: + - FnCall: + - transmute + - - FnCall: + - vshl_u64 + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vshll_high_n_{neon_type[0]}" + doc: "Signed shift left long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]'] + - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32', '[2, 3]'] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] + - FnCall: ["vshll_n_{neon_type[2]}::", [b]] + + - name: "vshll_high_n_{neon_type[0]}" + doc: "Signed shift left long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]'] + - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32', '[2, 3]'] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] + - FnCall: ["vshll_n_{neon_type[2]}::", [b]] + + - name: "vshrn_high_n_{neon_type[1]}" + doc: "Shift right narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - FnCall: + - simd_shuffle! + - - a + - FnCall: ["vshrn_n_{neon_type[1]}::", [b]] + - "{type[4]}" + + - name: "vsm3partw1{neon_type.no}" + doc: "SM3PARTW1" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sm4"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw1]]}]] + - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] + safety: safe + types: + - uint32x4_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.sm3partw1 + links: + - link: "llvm.aarch64.crypto.sm3partw1" + arch: aarch64,arm64ec + + - name: "vsm3partw2{neon_type.no}" + doc: "SM3PARTW2" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sm4"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw2]]}]] + - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] + safety: safe + types: + - uint32x4_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.sm3partw2 + links: + - link: "llvm.aarch64.crypto.sm3partw2" + arch: aarch64,arm64ec + + - name: "vsm3ss1{neon_type.no}" + doc: "SM3SS1" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sm4"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3ss1]]}]] + - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] + safety: safe + types: + - uint32x4_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.sm3ss1 + links: + - link: "llvm.aarch64.crypto.sm3ss1" + arch: aarch64,arm64ec + + - name: "vsm4ekey{neon_type.no}" + doc: "SM4 key" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sm4"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4ekey]]}]] + - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] + safety: safe + types: + - uint32x4_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.sm4ekey + links: + - link: "llvm.aarch64.crypto.sm4ekey" + arch: aarch64,arm64ec + + - name: "vsm4e{neon_type.no}" + doc: "SM4 encode" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sm4"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4e]]}]] + - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] + safety: safe + types: + - uint32x4_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.sm4e + links: + - link: "llvm.aarch64.crypto.sm4e" + arch: aarch64,arm64ec + + - name: "vrax1{neon_type.no}" + doc: "Rotate and exclusive OR" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rax1]]}]] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + safety: safe + types: + - uint64x2_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.rax1 + links: + - link: "llvm.aarch64.crypto.rax1" + arch: aarch64,arm64ec + + - name: "vsha512h{neon_type.no}" + doc: "SHA512 hash update part 1" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h]]}]] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + safety: safe + types: + - uint64x2_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.sha512h + links: + - link: "llvm.aarch64.crypto.sha512h" + arch: aarch64,arm64ec + + - name: "vsha512h2{neon_type.no}" + doc: "SHA512 hash update part 2" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h2]]}]] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + safety: safe + types: + - uint64x2_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.sha512h2 + links: + - link: "llvm.aarch64.crypto.sha512h2" + arch: aarch64,arm64ec + + - name: "vsha512su0{neon_type.no}" + doc: "SHA512 schedule update 0" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su0]]}]] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + safety: safe + types: + - uint64x2_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.sha512su0 + links: + - link: "llvm.aarch64.crypto.sha512su0" + arch: aarch64,arm64ec + + - name: "vsha512su1{neon_type.no}" + doc: "SHA512 schedule update 1" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su1]]}]] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + safety: safe + types: + - uint64x2_t + compose: + - LLVMLink: + name: llvm.aarch64.crypto.sha512su1 + links: + - link: "llvm.aarch64.crypto.sha512su1" + arch: aarch64,arm64ec + + - name: "vsm3tt{type[0]}" + doc: "{type[3]}" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [target_feature, ['enable = "neon,sm4"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, ['{type[2]}', 'IMM2 = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] + static_defs: ["const IMM2: i32"] + safety: safe + types: + - ['1aq_u32', uint32x4_t, 'sm3tt1a', 'SM3TT1A'] + - ['1bq_u32', uint32x4_t, 'sm3tt1b', 'SM3TT1B'] + - ['2aq_u32', uint32x4_t, 'sm3tt2a', 'SM3TT2A'] + - ['2bq_u32', uint32x4_t, 'sm3tt2b', 'SM3TT2B'] + compose: + - FnCall: ["static_assert_uimm_bits!", [IMM2, "2"]] + - LLVMLink: + name: "_vsm3tt{type[0]}" + arguments: + - "a: {neon_type[1]}" + - "b: {neon_type[1]}" + - "c: {neon_type[1]}" + - "n: i64" + links: + - link: "llvm.aarch64.crypto.{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vsm3tt{type[0]}" + - - "a" + - "b" + - "c" + - "IMM2 as i64" + - [] + - true + + - name: "vxarq_u64" + doc: "Exclusive OR and rotate" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,sha3"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, ['xar', 'IMM6 = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] + static_defs: ["const IMM6: i32"] + safety: safe + types: + - uint64x2_t + compose: + - FnCall: ["static_assert_uimm_bits!", [IMM6, "6"]] + - LLVMLink: + name: "_vxarq_u64" + arguments: + - "a: {neon_type}" + - "b: {neon_type}" + - "n: i64" + links: + - link: "llvm.aarch64.crypto.xar" + arch: aarch64,arm64ec + - FnCall: + - "_vxarq_u64" + - - "a" + - "b" + - "IMM6 as i64" + - [] + - true + + - name: "vrnd32x{neon_type.no}" + doc: "Floating-point round to 32-bit integer, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,frintts"']] + - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint32x]]}]] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "vrnd32x{neon_type.no}" + links: + - link: "llvm.aarch64.neon.frint32x.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrnd32x{neon_type.no}" + doc: "Floating-point round to 32-bit integer, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,frintts"']] + - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint32x]]}]] + safety: safe + types: + - float64x1_t + compose: + - LLVMLink: + name: "vrnd32x{neon_type.no}" + arguments: + - "a: f64" + return_type: "f64" + links: + - link: "llvm.aarch64.frint32x.f64" + arch: aarch64,arm64ec + - FnCall: + - transmute + - - FnCall: + - _vrnd32x_f64 + - - FnCall: [simd_extract!, [a, 0]] + + - name: "vrnd32z{neon_type.no}" + doc: "Floating-point round to 32-bit integer toward zero" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,frintts"']] + - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint32z]]}]] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "vrnd32z{neon_type.no}" + links: + - link: "llvm.aarch64.neon.frint32z.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrnd32z{neon_type.no}" + doc: "Floating-point round to 32-bit integer toward zero" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,frintts"']] + - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint32z]]}]] + safety: safe + types: + - float64x1_t + compose: + - LLVMLink: + name: "vrnd32z{neon_type.no}" + arguments: + - "a: f64" + return_type: "f64" + links: + - link: "llvm.aarch64.frint32z.f64" + arch: aarch64,arm64ec + - FnCall: + - transmute + - - FnCall: [_vrnd32z_f64, [{FnCall: [simd_extract!, [a, 0]]}]] + + - name: "vrnd64x{neon_type.no}" + doc: "Floating-point round to 64-bit integer, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,frintts"']] + - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint64x]]}]] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "vrnd64x{neon_type.no}" + links: + - link: "llvm.aarch64.neon.frint64x.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrnd64x{neon_type.no}" + doc: "Floating-point round to 64-bit integer, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,frintts"']] + - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint64x]]}]] + safety: safe + types: + - float64x1_t + compose: + - LLVMLink: + name: "vrnd64x{neon_type.no}" + arguments: + - "a: f64" + return_type: "f64" + links: + - link: "llvm.aarch64.frint64x.f64" + arch: aarch64,arm64ec + - FnCall: + - transmute + - - FnCall: [_vrnd64x_f64, [{FnCall: [simd_extract!, [a, 0]]}]] + + - name: "vrnd64z{neon_type.no}" + doc: "Floating-point round to 64-bit integer toward zero" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,frintts"']] + - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint64z]]}]] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "vrnd64z{neon_type.no}" + links: + - link: "llvm.aarch64.neon.frint64z.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrnd64z{neon_type.no}" + doc: "Floating-point round to 64-bit integer toward zero" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,frintts"']] + - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint64z]]}]] + safety: safe + types: + - float64x1_t + compose: + - LLVMLink: + name: "vrnd64z{neon_type.no}" + arguments: + - "a: f64" + return_type: "f64" + links: + - link: "llvm.aarch64.frint64z.f64" + arch: aarch64,arm64ec + - FnCall: + - transmute + - - FnCall: [_vrnd64z_f64, [{FnCall: [simd_extract!, [a, 0]]}]] + + - name: "vtrn1{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]] + safety: safe + types: + - [int8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] + - [int8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]'] + - [int16x4_t, '[0, 4, 2, 6]'] + - [int16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] + - [int32x4_t, '[0, 4, 2, 6]'] + - [uint8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] + - [uint8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]'] + - [uint16x4_t, '[0, 4, 2, 6]'] + - [uint16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] + - [uint32x4_t, '[0, 4, 2, 6]'] + - [poly8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] + - [poly8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]'] + - [poly16x4_t, '[0, 4, 2, 6]'] + - [poly16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] + - [float32x4_t, '[0, 4, 2, 6]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + + - name: "vtrn1{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]] + safety: safe + types: + - [float16x4_t, '[0, 4, 2, 6]'] + - [float16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vtrn1{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + safety: safe + types: + - [int32x2_t, '[0, 2]'] + - [int64x2_t, '[0, 2]'] + - [uint32x2_t, '[0, 2]'] + - [uint64x2_t, '[0, 2]'] + - [poly64x2_t, '[0, 2]'] + - [float32x2_t, '[0, 2]'] + - [float64x2_t, '[0, 2]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vtrn2{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]] + safety: safe + types: + - [int8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [int8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]'] + - [int16x4_t, '[1, 5, 3, 7]'] + - [int16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [int32x4_t, '[1, 5, 3, 7]'] + - [uint8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [uint8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]'] + - [uint16x4_t, '[1, 5, 3, 7]'] + - [uint16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [uint32x4_t, '[1, 5, 3, 7]'] + - [poly8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [poly8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]'] + - [poly16x4_t, '[1, 5, 3, 7]'] + - [poly16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [float32x4_t, '[1, 5, 3, 7]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vtrn2{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]] + safety: safe + types: + - [float16x4_t, '[1, 5, 3, 7]'] + - [float16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vtrn2{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + safety: safe + types: + - [int32x2_t, '[1, 3]'] + - [int64x2_t, '[1, 3]'] + - [uint32x2_t, '[1, 3]'] + - [uint64x2_t, '[1, 3]'] + - [poly64x2_t, '[1, 3]'] + - [float32x2_t, '[1, 3]'] + - [float64x2_t, '[1, 3]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vzip2{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + safety: safe + types: + - [int8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [int8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] + - [int16x4_t, '[2, 6, 3, 7]'] + - [int16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [int32x2_t, '[1, 3]'] + - [int32x4_t, '[2, 6, 3, 7]'] + - [int64x2_t, '[1, 3]'] + - [uint8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [uint8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] + - [uint16x4_t, '[2, 6, 3, 7]'] + - [uint16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [uint32x2_t, '[1, 3]'] + - [uint32x4_t, '[2, 6, 3, 7]'] + - [uint64x2_t, '[1, 3]'] + - [poly8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [poly8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] + - [poly16x4_t, '[2, 6, 3, 7]'] + - [poly16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [poly64x2_t, '[1, 3]'] + - [float32x2_t, '[1, 3]'] + - [float32x4_t, '[2, 6, 3, 7]'] + - [float64x2_t, '[1, 3]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vzip2{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + safety: safe + types: + - [float16x4_t, '[2, 6, 3, 7]'] + - [float16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vzip1{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + safety: safe + types: + - [int8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] + - [int8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]'] + - [int16x4_t, '[0, 4, 1, 5]'] + - [int16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] + - [int32x2_t, '[0, 2]'] + - [int32x4_t, '[0, 4, 1, 5]'] + - [int64x2_t, '[0, 2]'] + - [uint8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] + - [uint8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]'] + - [uint16x4_t, '[0, 4, 1, 5]'] + - [uint16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] + - [uint32x2_t, '[0, 2]'] + - [uint32x4_t, '[0, 4, 1, 5]'] + - [uint64x2_t, '[0, 2]'] + - [poly8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] + - [poly8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]'] + - [poly16x4_t, '[0, 4, 1, 5]'] + - [poly16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] + - [poly64x2_t, '[0, 2]'] + - [float32x2_t, '[0, 2]'] + - [float32x4_t, '[0, 4, 1, 5]'] + - [float64x2_t, '[0, 2]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + + - name: "vzip1{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + safety: safe + types: + - [float16x4_t, '[0, 4, 1, 5]'] + - [float16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vuzp1{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + safety: safe + types: + - [int32x2_t, '[0, 2]'] + - [int64x2_t, '[0, 2]'] + - [uint32x2_t, '[0, 2]'] + - [uint64x2_t, '[0, 2]'] + - [poly64x2_t, '[0, 2]'] + - [float32x2_t, '[0, 2]'] + - [float64x2_t, '[0, 2]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vuzp1{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]] + safety: safe + types: + - [int8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] + - [int8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]'] + - [int16x4_t, '[0, 2, 4, 6]'] + - [int16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] + - [int32x4_t, '[0, 2, 4, 6]'] + - [uint8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] + - [uint8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]'] + - [uint16x4_t, '[0, 2, 4, 6]'] + - [uint16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] + - [uint32x4_t, '[0, 2, 4, 6] '] + - [poly8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] + - [poly8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]'] + - [poly16x4_t, '[0, 2, 4, 6]'] + - [poly16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] + - [float32x4_t, '[0, 2, 4, 6]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vuzp1{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]] + safety: safe + types: + - [float16x4_t, '[0, 2, 4, 6]'] + - [float16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vuzp2{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + safety: safe + types: + - [int32x2_t, '[1, 3]'] + - [int64x2_t, '[1, 3]'] + - [uint32x2_t, '[1, 3]'] + - [uint64x2_t, '[1, 3]'] + - [poly64x2_t, '[1, 3]'] + - [float32x2_t, '[1, 3]'] + - [float64x2_t, '[1, 3]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vuzp2{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]] + safety: safe + types: + - [int8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [int8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]'] + - [int16x4_t, '[1, 3, 5, 7]'] + - [int16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [int32x4_t, '[1, 3, 5, 7]'] + - [uint8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [uint8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]'] + - [uint16x4_t, '[1, 3, 5, 7]'] + - [uint16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [uint32x4_t, '[1, 3, 5, 7]'] + - [poly8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [poly8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]'] + - [poly16x4_t, '[1, 3, 5, 7]'] + - [poly16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [float32x4_t, '[1, 3, 5, 7]'] + compose: + - FnCall: + - "simd_shuffle!" + - - a + - b + - "{type[1]}" + + - name: "vuzp2{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]] + safety: safe + types: + - [float16x4_t, '[1, 3, 5, 7]'] + - [float16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] + compose: + - FnCall: + - "simd_shuffle!" + - - a + - b + - "{type[1]}" + + - name: "vabal_high_{neon_type[1]}" + doc: "Unsigned Absolute difference and Accumulate Long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal]]}]] + safety: safe + types: + - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]', '[2, 3]'] + compose: + - Let: + - d + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + - Let: + - e + - "{neon_type[2]}" + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - Let: [f, "{neon_type[2]}", {FnCall: ["vabd_{neon_type[2]}", [d, e]]}] + - FnCall: + - simd_add + - - a + - FnCall: [simd_cast, [f]] + + - name: "vabal_high{neon_type[1].noq}" + doc: Signed Absolute difference and Accumulate Long + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal]]}]] + safety: safe + types: + - [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t] + - [int32x4_t, int16x8_t, int16x8_t, '[4, 5, 6, 7]', int16x4_t, uint16x4_t] + - [int64x2_t, int32x4_t, int32x4_t, '[2, 3]', int32x2_t, uint32x2_t] + compose: + - Let: + - d + - "{neon_type[4]}" + - FnCall: + - simd_shuffle! + - - b + - b + - "{type[3]}" + - Let: + - e + - "{neon_type[4]}" + - FnCall: + - simd_shuffle! + - - c + - c + - "{type[3]}" + - Let: + - f + - "{neon_type[4]}" + - FnCall: + - "vabd{neon_type[4].no}" + - - d + - e + - Let: + - f + - "{neon_type[5]}" + - FnCall: + - simd_cast + - - f + - FnCall: + - simd_add + - - a + - FnCall: + - simd_cast + - - f + + - name: "vqabs{neon_type.no}" + doc: Signed saturating Absolute value + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]] + safety: safe + types: + - int64x1_t + - int64x2_t + compose: + - LLVMLink: + name: "sqabs.{neon_type}" + links: + - link: "llvm.aarch64.neon.sqabs.{neon_type}" + arch: aarch64,arm64ec + + - name: "vslid_n_{type}" + doc: Shift left and insert + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + static_defs: + - "const N: i32" + attr: + - *neon-stable + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sli, 'N = 2']]}]] + safety: safe + types: + - i64 + - u64 + compose: + - FnCall: + - "static_assert!" + - - 'N >= 0 && N <= 63' + - FnCall: + - transmute + - - FnCall: + - "vsli_n_{type}::" + - - FnCall: + - transmute + - - a + - FnCall: + - transmute + - - b + + - name: "vsrid_n_{type}" + doc: Shift right and insert + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + static_defs: + - "const N: i32" + attr: + - *neon-stable + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sri, 'N = 2']]}]] + safety: safe + types: + - i64 + - u64 + compose: + - FnCall: + - "static_assert!" + - - 'N >= 1 && N <= 64' + - FnCall: + - transmute + - - FnCall: + - "vsri_n_{type}::" + - - FnCall: + - transmute + - - a + - FnCall: + - transmute + - - b + + - name: "vpmaxnm{neon_type.no}" + doc: "Floating-point Maximum Number Pairwise (vector)." + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - float32x2_t + - float64x2_t + - float32x4_t + compose: + - LLVMLink: + name: "vpmaxnm{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxnmp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ['*mut f64', float64x1x2_t, float64x1_t] + - ['*mut f64', float64x2x2_t, float64x2_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x{neon_type[1].tuple}.{neon_type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'a']] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ['*mut f64', float64x1x3_t, float64x1_t] + - ['*mut f64', float64x2x3_t, float64x2_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x{neon_type[1].tuple}.{neon_type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'a']] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ['*mut f64', float64x1x4_t, float64x1_t] + - ['*mut f64', float64x2x4_t, float64x2_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "d: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x{neon_type[1].tuple}.{neon_type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'b.3', 'a']] + + - name: "vfma{type[3]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float32x2_t, float32x2_t, '1', '_lane_f32'] + - [float32x2_t, float32x4_t, '2', '_laneq_f32'] + - [float32x4_t, float32x2_t, '1', 'q_lane_f32'] + - [float32x4_t, float32x4_t, '2', 'q_laneq_f32'] + - [float64x2_t, float64x2_t, '1', 'q_laneq_f64'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + + - name: "vfma{type[3]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float16x4_t, float16x4_t, '2', '_lane_f16'] + - [float16x4_t, float16x8_t, '3', '_laneq_f16'] + - [float16x8_t, float16x4_t, '2', 'q_lane_f16'] + - [float16x8_t, float16x8_t, '3', 'q_laneq_f16'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + + # vfms lane f16 + - name: "vfms{type[3]}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float16x4_t, float16x4_t, '2', '_lane_f16'] + - [float16x4_t, float16x8_t, '3', '_laneq_f16'] + - [float16x8_t, float16x4_t, '2', 'q_lane_f16'] + - [float16x8_t, float16x8_t, '3', 'q_laneq_f16'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - "vfms{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + + - name: "vfms{type[1]}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "h_f16"] + compose: + - FnCall: ["vfma{type[1]}", [a, -b, c]] + + + - name: "vfma_lane_f64" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - float64x1_t + compose: + - FnCall: ["static_assert!", ["LANE == 0"]] + - FnCall: + - "vfma{neon_type.no}" + - - a + - b + - FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + - name: "vfma_laneq_f64" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float64x1_t, float64x2_t] + compose: + - FnCall: ["static_assert_uimm_bits!", ["LANE", "1"]] + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + - name: "vfmaq_lane_f64" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float64x2_t, float64x1_t] + compose: + - FnCall: ["static_assert!", ["LANE == 0"]] + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + - name: "vfma{type[2]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f32", float32x2_t, "s_lane_f32", '1'] + - ["f32", float32x4_t, "s_laneq_f32", '2'] + - ["f64", float64x2_t, "d_laneq_f64", '1'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}] + - FnCall: ["fma{type[0]}", [b, c, a]] + + - name: "vfmad_lane_f64" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f64", float64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}] + - FnCall: [fmaf64, [b, c, a]] + + + - name: "vfma{type[1]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "h_f16"] + compose: + - FnCall: [fmaf16, [b, c, a], [], true] + + + - name: "vfmah_lane{type[2]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f16", float16x4_t, '_f16', '2'] + - ["f16", float16x8_t, 'q_f16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}] + - FnCall: ["vfmah_{type[0]}", [a, b, c]] + + - name: "vfmsh_lane{type[2]}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f16", float16x4_t, '_f16', '2'] + - ["f16", float16x8_t, 'q_f16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}] + - FnCall: ["vfmsh_{type[0]}", [a, b, c]] + + - name: "vfms_f64" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - float64x1_t + compose: + - Let: [b, "{neon_type}", {FnCall: [simd_neg, [b]]}] + - FnCall: [vfma_f64, [a, b, c]] + + - name: "vfms{neon_type.no}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - float64x2_t + compose: + - Let: [b, "{neon_type}", {FnCall: [simd_neg, [b]]}] + - FnCall: [vfmaq_f64, [a, b, c]] + + - name: "vmls{neon_type.no}" + doc: "Floating-point multiply-subtract from accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - FnCall: [simd_sub, [a, {FnCall: [simd_mul, [b, c]]}]] + + - name: "vfms{type[3]}" + doc: "Floating-point fused multiply-subtract to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float32x2_t, float32x2_t, '1', _lane_f32] + - [float32x2_t, float32x4_t, '2', _laneq_f32] + - [float32x4_t, float32x2_t, '1', q_lane_f32] + - [float32x4_t, float32x4_t, '2', q_laneq_f32] + - [float64x2_t, float64x2_t, '1', q_laneq_f64] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[2]}']] + - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]] + + - name: "vfms_lane_f64" + doc: "Floating-point fused multiply-subtract to accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - float64x1_t + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]] + + - name: "vfms_laneq_f64" + doc: "Floating-point fused multiply-subtract to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float64x1_t, float64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]] + + - name: "vfmsq_lane_f64" + doc: "Floating-point fused multiply-subtract to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float64x2_t, float64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]] + + - name: "vfms{type[2]}" + doc: "Floating-point fused multiply-subtract to accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f32", float32x2_t, "s_lane_f32"] + - ["f32", float32x4_t, "s_laneq_f32"] + - ["f64", float64x1_t, "d_lane_f64"] + - ["f64", float64x2_t, "d_laneq_f64"] + compose: + - FnCall: ["vfma{type[2]}::", ['a', '-b', 'c']] + + + - name: "vceqz{neon_type[0].no}" + doc: "Floating-point compare bitwise equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, uint32x2_t, 'f32x2', 'f32x2::new(0.0, 0.0)'] + - [float32x4_t, uint32x4_t, 'f32x4', 'f32x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float64x1_t, uint64x1_t, 'f64', '0.0'] + - [float64x2_t, uint64x2_t, 'f64x2', 'f64x2::new(0.0, 0.0)'] + compose: + - Let: [b, '{type[2]}', '{type[3]}'] + - FnCall: [simd_eq, [a, {FnCall: [transmute, [b]]}]] + + - name: "vceqz{neon_type[0].no}" + doc: "Floating-point compare bitwise equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t, 'f16x4', 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, 'f16x8', 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, '{type[2]}', '{type[3]}'] + - FnCall: [simd_eq, [a, {FnCall: [transmute, [b]]}]] + + - name: "vceqz{type[2]}" + doc: "Floating-point compare bitwise equal to zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "u32", "s_f32"] + - ["f64", "u64", "d_f64"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vceqz_{type[0]}" + - - FnCall: ["vdup_n_{type[0]}", [a]] + - '0' + + - name: "vceqz{type[2]}" + doc: "Floating-point compare bitwise equal to zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u16", "h_f16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vceqz_{type[0]}" + - - FnCall: ["vdup_n_{type[0]}", [a]] + - '0' + + - name: "vceqzd_{type[2]}" + doc: "Compare bitwise equal to zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "u64", "s64"] + - ["u64", "u64", "u64"] + compose: + - FnCall: + - transmute + - - FnCall: + - "vceqz_{type[2]}" + - - FnCall: [transmute, [a]] + + - name: "vceqz{neon_type[0].no}" + doc: "Signed compare bitwise equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)'] + - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)'] + - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)'] + - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)'] + - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)'] + - [poly8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [poly8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [poly64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)'] + - [poly64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_eq + - - a + - FnCall: [transmute, [b]] + + - name: "vceqz{neon_type[0].no}" + doc: "Unsigned compare bitwise equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, uint8x8_t, u8x8, 'u8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [uint8x16_t, uint8x16_t, u8x16, 'u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [uint16x4_t, uint16x4_t, u16x4, 'u16x4::new(0, 0, 0, 0)'] + - [uint16x8_t, uint16x8_t, u16x8, 'u16x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [uint32x2_t, uint32x2_t, u32x2, 'u32x2::new(0, 0)'] + - [uint32x4_t, uint32x4_t, u32x4, 'u32x4::new(0, 0, 0, 0)'] + - [uint64x1_t, uint64x1_t, u64x1, 'u64x1::new(0)'] + - [uint64x2_t, uint64x2_t, u64x2, 'u64x2::new(0, 0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_eq + - - a + - FnCall: [transmute, [b]] + + - name: "vcge{neon_type.no}" + doc: "Compare unsigned greater than or equal" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - uint64x1_t + - uint64x2_t + compose: + - FnCall: [simd_ge, [a, b]] + + - name: "vcge{type[0]}" + doc: "Floating-point compare greater than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcge_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + + + - name: "vcge{type[0]}" + doc: "Floating-point compare greater than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcge_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + + - name: "vcge{neon_type[0].no}" + doc: "Floating-point compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - FnCall: [simd_ge, [a, b]] + + - name: "vcge{type[0]}" + doc: "Compare greater than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["d_s64", "i64", "u64", s64] + - ["d_u64", "u64", "u64", u64] + compose: + - FnCall: + - transmute + - - FnCall: + - "vcge_{type[3]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vclt{neon_type.no}" + doc: "Compare unsigned less than" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - uint64x1_t + - uint64x2_t + compose: + - FnCall: [simd_lt, [a, b]] + + - name: "vcltd_{type[0]}" + doc: "Compare less than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s64", "i64", "u64"] + - ["u64", "u64", "u64"] + compose: + - FnCall: + - transmute + - - FnCall: + - "vclt_{type[0]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vtst{neon_type[0].no}" + doc: "Unsigned compare bitwise Test bits nonzero" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint64x1_t, u64x1, 'u64x1::new(0)'] + - [uint64x2_t, u64x2, 'u64x2::new(0, 0)'] + compose: + - Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}] + - Let: [d, "{type[1]}", "{type[2]}"] + - FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]] + + - name: "vcgez{neon_type[0].no}" + doc: "Floating-point compare greater than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)'] + - [float32x4_t, uint32x4_t, f32x4, 'f32x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float64x1_t, uint64x1_t, f64, '0.0'] + - [float64x2_t, uint64x2_t, f64x2, 'f64x2::new(0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_ge + - - a + - FnCall: [transmute, [b]] + + - name: "vcgez{type[0]}" + doc: "Floating-point compare greater than or equal to zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcgez_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + + + - name: "vcgez{type[0]}" + doc: "Floating-point compare greater than or equal to zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcgez_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + + - name: "vclezd_s64" + doc: "Compare less than or equal to zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "u64"] + compose: + - FnCall: + - transmute + - - FnCall: [vclez_s64, [{FnCall: [transmute, [a]]}]] + + - name: "vcgtd_{type[2]}" + doc: "Compare greater than" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "u64", 's64'] + - ["u64", "u64", 'u64'] + compose: + - FnCall: + - transmute + - - FnCall: + - "vcgt_{type[2]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vcgtz{neon_type[0].no}" + doc: "Compare signed greater than zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)'] + - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)'] + - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)'] + - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)'] + - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_gt + - - a + - FnCall: [transmute, [b]] + + - name: "vcgtzd_s64" + doc: "Compare signed greater than zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["i64", "u64"] + compose: + - FnCall: + - transmute + - - FnCall: + - vcgtz_s64 + - - FnCall: [transmute, [a]] + + - name: "vcgtz{neon_type[0].no}" + doc: "Floating-point compare greater than zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)'] + - [float32x4_t, uint32x4_t, f32x4, 'f32x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float64x1_t, uint64x1_t, f64, '0.0'] + - [float64x2_t, uint64x2_t, f64x2, 'f64x2::new(0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: [simd_gt, [a, {FnCall: [transmute, [b]]}]] + + - name: "vcgtz{type[0]}" + doc: "Floating-point compare greater than zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32", "u32"] + - ["d_f64", "f64", "u64"] + compose: + - FnCall: + - "simd_extract!" + - - FnCall: + - "vcgtz_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + + - name: "vcgtz{type[0]}" + doc: "Floating-point compare greater than zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - "simd_extract!" + - - FnCall: + - "vcgtz_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "vcvt{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.fptoui.sat.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vmul{neon_type[0].N}" + doc: "Vector multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, "f64"] + - [float64x2_t, "f64"] + compose: + - FnCall: + - simd_mul + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + + - name: "vmul_lane_f64" + doc: "Floating-point multiply" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - float64x1_t + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - simd_mul + - - a + - FnCall: + - "transmute::" + - - FnCall: [simd_extract!, [b, 'LANE as u32']] + + - name: "vmulq_lane_f64" + doc: "Floating-point multiply" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float64x2_t, float64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: + - simd_mul + - - a + - FnCall: ["simd_shuffle!", [b, b, '[LANE as u32, LANE as u32]']] + + - name: "vmuld_lane_f64" + doc: "Floating-point multiply" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f64", float64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] + - Identifier: ['a * b', Symbol] + + - name: "vmul_laneq_f64" + doc: "Floating-point multiply" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float64x1_t, float64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - FnCall: + - simd_mul + - - a + - FnCall: + - "transmute::" + - - FnCall: [simd_extract!, [b, 'LANE as u32']] + + - name: "vmulq_laneq_f64" + doc: "Floating-point multiply" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float64x2_t, float64x2_t, float64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - FnCall: + - simd_mul + - - a + - FnCall: [simd_shuffle!, [b, b, '[LANE as u32, LANE as u32]']] + + + # vmulq_laneq_f16 + - name: "vmul{type[2]}{neon_type[1].no}" + doc: "Floating-point multiply" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float16x4_t, float16x8_t, '_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32]"] + - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]"] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '3']] + - FnCall: + - simd_mul + - - a + - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + + + - name: "vmul{type[1]}_{type[0]}" + doc: Add + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: safe + types: + - [f16, 'h'] + compose: + - 'a * b' + + + - name: "vmul{type[2]}" + doc: "Floating-point multiply" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f32", float32x2_t, "s_lane_f32", '1'] + - ["f32", float32x4_t, "s_laneq_f32", '2'] + - ["f64", float64x2_t, "d_laneq_f64", '1'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] + - Identifier: ['a * b', Symbol] + + + - name: "vmul{type[2]}" + doc: "Floating-point multiply" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f16", float16x4_t, "h_lane_f16", '2'] + - ["f16", float16x8_t, "h_laneq_f16", '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] + - Identifier: ['a * b', Symbol] + + + - name: "vrsrad_n_s64" + doc: "Signed rounding shift right and accumulate." + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - "i64" + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 64']] + - Let: [b, "{type}", {FnCall: ["vrshrd_n_s64::", [b]]}] + - Identifier: ['a.wrapping_add(b)', Symbol] + + - name: "vmlsl_high_n_{neon_type[1]}" + doc: "Multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int32x4_t, int16x8_t, "i16"] + - [int64x2_t, int32x4_t, "i32"] + compose: + - FnCall: ["vmlsl_high_{neon_type[1]}", [a, b, {FnCall: ["vdupq_n_{neon_type[1]}", [c]]}]] + + - name: "vmlsl_high_n_{neon_type[1]}" + doc: "Multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint32x4_t, uint16x8_t, "u16"] + - [uint64x2_t, uint32x4_t, "u32"] + compose: + - FnCall: ["vmlsl_high_{neon_type[1]}", [a, b, {FnCall: ["vdupq_n_{neon_type[1]}", [c]]}]] + + - name: "vmlsl_high_lane{neon_type[2].no}" + doc: "Multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - "vmlsl_high_{neon_type[1]}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vmlsl_high_lane{neon_type[2].no}" + doc: "Multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - "vmlsl_high_{neon_type[1]}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vclt{neon_type[0].no}" + doc: "Floating-point compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - FnCall: [simd_lt, [a, b]] + + - name: "vclt{type[2]}" + doc: "Floating-point compare less than" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["f32", "u32", 's_f32'] + - ["f64", "u64", 'd_f64'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vclt_{type[0]}" + - - FnCall: ["vdup_n_{type[0]}", [a]] + - FnCall: ["vdup_n_{type[0]}", [b]] + - '0' + + + - name: "vclt{type[2]}" + doc: "Floating-point compare less than" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u16", 'h_f16'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vclt_{type[0]}" + - - FnCall: ["vdup_n_{type[0]}", [a]] + - FnCall: ["vdup_n_{type[0]}", [b]] + - '0' + + - name: "vabdl_high_{neon_type[0]}" + doc: "Unsigned Absolute difference Long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]'] + - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]'] + compose: + - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]}] + - Let: [d, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - FnCall: [simd_cast, [{FnCall: ["vabd_{neon_type[0]}", [c, d]]}]] + + - name: "vfms_n_f64" + doc: "Floating-point fused Multiply-subtract to accumulator(vector)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, "f64"] + compose: + - FnCall: + - "vfms{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [c]] + + - name: "vfmsq_n_f64" + doc: "Floating-point fused Multiply-subtract to accumulator(vector)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x2_t, "f64"] + compose: + - FnCall: + - "vfms{neon_type[1].no}" + - - a + - b + - FnCall: ["vdup{neon_type[1].N}", [c]] + + + - name: "vfms{neon_type[0].N}" + doc: Floating-point fused Multiply-Subtract from accumulator. + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmls] + safety: safe + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - FnCall: + - "vfms{neon_type[0].no}" + - - a + - b + - FnCall: + - "vdup{neon_type[0].N}" + - - c + + + - name: "vpminnm{type[0]}" + doc: "Floating-point minimum number pairwise" + arguments: ["a: {neon_type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['s_f32', float32x2_t, "f32"] + - ['qd_f64', float64x2_t, "f64"] + compose: + - LLVMLink: + name: "vpminnm{type[0]}" + links: + - link: "llvm.aarch64.neon.fminnmv.{type[2]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vpmaxnm{type[0]}" + doc: "Floating-point maximum number pairwise" + arguments: ["a: {neon_type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['s_f32', float32x2_t, "f32"] + - ['qd_f64', float64x2_t, "f64"] + compose: + - LLVMLink: + name: "vpmaxnm{type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxnmv.{type[2]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vcled_{type[0]}" + doc: "Compare less than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s64", "i64", "u64"] + - ["u64", "u64", "u64"] + compose: + - FnCall: + - transmute + - - FnCall: + - "vcle_{type[0]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vqdmulh{neon_type[0].lane_nox}" + doc: "Vector saturating doubling multiply high by scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [int16x4_t, int16x4_t, '2'] + - [int16x8_t, int16x4_t, '2'] + - [int32x2_t, int32x2_t, '1'] + - [int32x4_t, int32x2_t, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - FnCall: + - "vqdmulh{neon_type[0].no}" + - - a + - FnCall: + - "vdup{neon_type[0].N}" + - - FnCall: [simd_extract!, [b, 'LANE as u32']] + + - name: "vqabs{type[2]}" + doc: "Signed saturating absolute value" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]] + safety: safe + types: + - ["i8", "s8", 'b_s8'] + - ["i16", "s16", 'h_s16'] + compose: + - FnCall: + - "simd_extract!" + - - FnCall: ["vqabs_{type[1]}", [{FnCall: ["vdup_n_{type[1]}", [a]]}]] + - '0' + + - name: "vqabs{type[1]}" + doc: "Signed saturating absolute value" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]] + safety: safe + types: + - ["i32", "s_s32"] + - ["i64", "d_s64"] + compose: + - LLVMLink: + name: "vqabs{type[1]}" + links: + - link: "llvm.aarch64.neon.sqabs.{type[0]}" + arch: aarch64,arm64ec + + - name: "vmull_high_n_{neon_type[0]}" + doc: "Multiply long" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int16x8_t, "i16", int32x4_t] + - [int32x4_t, "i32", int64x2_t] + compose: + - FnCall: + - "vmull_high_{neon_type[0]}" + - - a + - FnCall: ["vdupq_n_{neon_type[0]}", [b]] + + - name: "vmull_high_n_{neon_type[0]}" + doc: "Multiply long" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint16x8_t, "u16", uint32x4_t] + - [uint32x4_t, "u32", uint64x2_t] + compose: + - FnCall: + - "vmull_high_{neon_type[0]}" + - - a + - FnCall: ["vdupq_n_{neon_type[0]}", [b]] + + - name: "vmull_high_lane{neon_type[1].no}" + doc: "Multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vmull_high_{neon_type[0]}" + - - a + - FnCall: [simd_shuffle!, [b, b, '{type[4]}']] + + - name: "vmull_high_lane{neon_type[1].no}" + doc: "Multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vmull_high_{neon_type[0]}" + - - a + - FnCall: [simd_shuffle!, [b, b, '{type[4]}']] + + - name: "vrsqrte{neon_type.no}" + doc: "Reciprocal square-root estimate." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "vrsqrte{neon_type.no}" + links: + - link: "llvm.aarch64.neon.frsqrte.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrsqrte{type[0]}" + doc: "Reciprocal square-root estimate." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["s_f32", "f32"] + - ["d_f64", "f64"] + compose: + - LLVMLink: + name: "vrsqrte{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.frsqrte.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vrsqrte{type[0]}" + doc: "Reciprocal square-root estimate." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["h_f16", "f16"] + compose: + - LLVMLink: + name: "vrsqrte{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.frsqrte.{type[1]}" + arch: aarch64,arm64ec + + + - name: "vpminnm{neon_type.no}" + doc: "Floating-point Minimum Number Pairwise (vector)." + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - float32x2_t + - float64x2_t + - float32x4_t + compose: + - LLVMLink: + name: "vpminnm{neon_type.no}" + links: + - link: "llvm.aarch64.neon.fminnmp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vqshlu{type[0]}" + doc: "Signed saturating shift left unsigned" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshlu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - [b_n_s8, i8, u8, '3', s8] + - [h_n_s16, i16, u16, '4', s16] + - [s_n_s32, i32, u32, '5', s32] + - [d_n_s64, i64, u64, '6', s64] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: + - simd_extract! + - - FnCall: + - "vqshlu_n_{type[4]}::" + - - FnCall: ["vdup_n_{type[4]}", [a]] + - '0' + + - name: "vcvta{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to away" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "vcvta{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtau.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvta{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to away" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvta{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtau.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [float64x1_t, int64x1_t] + - [float64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vcvt{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "i32", 'h'] + - ["f16", "i64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "i16", 'h', 'i32'] + compose: + - 'vcvtmh_{type[3]}_f16(a) as i16' + + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u32", 'h'] + - ["f16", "u64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["f16", "u16", 'h', 'u32'] + compose: + - 'vcvtmh_{type[3]}_f16(a) as u16' + + - name: "vmlal_high_n_{neon_type[1]}" + doc: "Multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int32x4_t, int16x8_t, "i16"] + - [int64x2_t, int32x4_t, "i32"] + compose: + - FnCall: + - "vmlal_high_{neon_type[1]}" + - - a + - b + - FnCall: ["vdupq_n_{neon_type[1]}", [c]] + + - name: "vmlal_high_n_{neon_type[1]}" + doc: "Multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint32x4_t, uint16x8_t, "u16"] + - [uint64x2_t, uint32x4_t, "u32"] + compose: + - FnCall: + - "vmlal_high_{neon_type[1]}" + - - a + - b + - FnCall: ["vdupq_n_{neon_type[1]}", [c]] + + - name: "vmlal_high_lane{neon_type[2].no}" + doc: "Multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]] + + - name: "vmlal_high_lane{neon_type[2].no}" + doc: "Multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: safe + types: + - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]] + + - name: "vrsrad_n_u64" + doc: "Unsigned rounding shift right and accumulate." + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: safe + types: + - "u64" + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 64']] + - Let: [b, u64, {FnCall: ["vrshrd_n_u64::", [b]]}] + - Identifier: ['a.wrapping_add(b)', Symbol] + + - name: "vcle{neon_type.no}" + doc: "Compare unsigned less than or equal" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - uint64x1_t + - uint64x2_t + compose: + - FnCall: [simd_le, [a, b]] + + - name: "vld4{neon_type[1].dup_nox}" + doc: "Load single 4-element structure and replicate to all lanes of four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ["*const i64", int64x2x4_t, "v2i64"] + - ["*const f64", float64x1x4_t, "v1f64"] + - ["*const f64", float64x2x4_t, "v2f64"] + compose: + - LLVMLink: + name: "vld4{neon_type[1].dup_nox}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld4r.{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as _']] + + - name: "vld4{neon_type[1].dup_nox}" + doc: "Load single 4-element structure and replicate to all lanes of four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x2x4_t, "q_dup_s64"] + compose: + - FnCall: + - transmute + - - FnCall: ["vld4{type[2]}", [{FnCall: [transmute, [a]]}]] + + - name: "vld4{neon_type[1].dup_nox}" + doc: "Load single 4-element structure and replicate to all lanes of four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [target_feature, ['enable = "neon,aes"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x2x4_t, "q_dup_s64"] + compose: + - FnCall: + - transmute + - - FnCall: ["vld4{type[2]}", [{FnCall: [transmute, [a]]}]] + + - name: "vtbx4{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, int8x8x4_t] + compose: + - FnCall: + - "vqtbx2" + - - FnCall: [transmute, [a]] + - FnCall: + - transmute + - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]] + - FnCall: + - transmute + - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]] + - FnCall: [transmute, [c]] + + - name: "vtbx4{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, uint8x8x4_t, uint8x8_t] + - [poly8x8_t, poly8x8x4_t, uint8x8_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vqtbx2" + - - FnCall: [transmute, [a]] + - FnCall: + - transmute + - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]] + - FnCall: + - transmute + - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]] + - c + + - name: "vtbl1{neon_type[0].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, 'int8x8_t', 'unsafe {{ transmute(b) }}'] + - [uint8x8_t, 'uint8x8_t', 'b'] + - [poly8x8_t, 'uint8x8_t', 'b'] + compose: + - FnCall: + - 'vqtbl1{neon_type[0].no}' + - - FnCall: + - 'vcombine{neon_type[0].no}' + - - a + - 'unsafe {{ crate::mem::zeroed() }}' + - Identifier: ['{type[2]}', Symbol] + + - name: "vtbl2{neon_type[1].noq}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8x2_t, 'int8x8_t'] + compose: + - FnCall: + - vqtbl1 + - - FnCall: + - transmute + - - FnCall: + - 'vcombine{neon_type[1].noq}' + - - 'a.0' + - 'a.1' + - FnCall: [transmute, [b]] + + - name: "vtbl2{neon_type[2].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8x2_t, 'uint8x8_t', 'uint8x8_t'] + - [poly8x8x2_t, 'uint8x8_t', 'poly8x8_t'] + compose: + - FnCall: + - transmute + - - FnCall: + - vqtbl1 + - - FnCall: + - transmute + - - FnCall: + - 'vcombine{neon_type[2].noq}' + - - 'a.0' + - 'a.1' + - b + + - name: "vtbl3{neon_type[1].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8x3_t, 'int8x8_t', 'int8x16x2'] + compose: + - Let: + - x + - FnCall: + - '{type[2]}_t' + - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']] + - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']] + - FnCall: + - transmute + - - FnCall: + - vqtbl2 + - - FnCall: [transmute, ['x.0']] + - FnCall: [transmute, ['x.1']] + - FnCall: [transmute, [b]] + + - name: "vtbl3{neon_type[3].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8x3_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t'] + - [poly8x8x3_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t'] + big_endian_inverse: true + compose: + - Let: + - x + - FnCall: + - '{type[2]}_t' + - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']] + - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']] + - FnCall: + - transmute + - - FnCall: + - vqtbl2 + - - FnCall: [transmute, ['x.0']] + - FnCall: [transmute, ['x.1']] + - b + + - name: "vtbl4{neon_type[1].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8x4_t, 'int8x8_t', 'int8x16x2'] + compose: + - Let: + - x + - FnCall: + - '{type[2]}_t' + - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']] + - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']] + - FnCall: + - transmute + - - FnCall: + - 'vqtbl2' + - - FnCall: [transmute, ['x.0']] + - FnCall: [transmute, ['x.1']] + - FnCall: [transmute, [b]] + + - name: "vtbl4{neon_type[3].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8x4_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t'] + - [poly8x8x4_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t'] + big_endian_inverse: true + compose: + - Let: + - x + - FnCall: + - '{type[2]}_t' + - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']] + - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'a.3']] + - FnCall: + - transmute + - - FnCall: + - 'vqtbl2' + - - FnCall: [transmute, ['x.0']] + - FnCall: [transmute, ['x.1']] + - b + + - name: "vqtbx1{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, int8x16_t, uint8x8_t, vqtbx1] + - [int8x16_t, int8x16_t, uint8x16_t, vqtbx1q] + compose: + - FnCall: ['{type[3]}', [a, b, c]] + + - name: "vqtbx1{type[4]}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, "uint8x16_t", uint8x8_t, "vqtbx1", "_u8"] + - [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"] + - [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"] + - [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"] + compose: + - Let: + - x + - FnCall: + - transmute + - - FnCall: + - "{type[3]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + - c + - Identifier: [x, Symbol] + + - name: "vtbx1{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, "int8x8_t", "transmute(c)", "i8x8::splat(8)", "int8x8"] + - [uint8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"] + - [poly8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"] + compose: + - FnCall: + - simd_select + - - FnCall: + - "simd_lt::<{type[4]}_t, int8x8_t>" + - - c + - FnCall: [transmute, ["{type[3]}"]] + - FnCall: + - transmute + - - FnCall: + - "vqtbx1" + - - "transmute(a)" + - FnCall: + - transmute + - - FnCall: ["vcombine{neon_type[0].no}", [b, "crate::mem::zeroed()"]] + - "{type[2]}" + - a + + - name: "vtbx2{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, 'int8x8x2_t'] + compose: + - FnCall: + - vqtbx1 + - - FnCall: [transmute, [a]] + - FnCall: + - transmute + - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']] + - FnCall: [transmute, [c]] + + - name: "vtbx2{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, 'uint8x8x2_t', uint8x8_t] + - [poly8x8_t, 'poly8x8x2_t', uint8x8_t] + compose: + - FnCall: + - transmute + - - FnCall: + - vqtbx1 + - - FnCall: [transmute, [a]] + - FnCall: + - transmute + - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']] + - c + + - name: "vtbx3{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'i8x8::splat(24)', 'int8x8'] + compose: + - Let: + - x + - FnCall: + - '{type[2]}_t' + - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']] + - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']] + - FnCall: + - transmute + - - FnCall: + - simd_select + - - FnCall: + - 'simd_lt::<{type[4]}_t, int8x8_t>' + - - FnCall: [transmute, [c]] + - FnCall: [transmute, ['{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vqtbx2' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, ['x.0']] + - FnCall: [transmute, ['x.1']] + - FnCall: [transmute, [c]] + - a + + - name: "vtbx3{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: uint8x8_t"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'u8x8::splat(24)', 'uint8x8'] + - [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'u8x8::splat(24)', 'poly8x8'] + big_endian_inverse: true + compose: + - Let: + - x + - FnCall: + - '{type[2]}_t' + - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']] + - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']] + - FnCall: + - transmute + - - FnCall: + - simd_select + - - FnCall: + - 'simd_lt::<{type[4]}_t, int8x8_t>' + - - FnCall: [transmute, [c]] + - FnCall: [transmute, ['{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vqtbx2' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, ['x.0']] + - FnCall: [transmute, ['x.1']] + - c + - a + + - name: "vqtbl1{neon_type[3].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['int8x16_t', uint8x8_t, 'vqtbl1', 'int8x8_t'] + - ['int8x16_t', uint8x16_t, 'vqtbl1q', 'int8x16_t'] + compose: + - FnCall: ['{type[2]}', ['a', b]] + + - name: "vqtbl1{neon_type[3].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['uint8x16_t', uint8x8_t, 'vqtbl1', 'uint8x8_t'] + - ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t'] + - ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t'] + - ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t'] + compose: + - Let: + - x + - FnCall: + - transmute + - - FnCall: + - '{type[2]}' + - - FnCall: [transmute, ['a']] + - b + - Identifier: [x, Symbol] + + - name: "vqtbl2{neon_type[3].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['int8x16x2_t', uint8x8_t, 'vqtbl2', 'int8x8_t'] + - ['int8x16x2_t', uint8x16_t, 'vqtbl2q', 'int8x16_t'] + compose: + - FnCall: ['{type[2]}', ['a.0', 'a.1', b]] + + - name: "vqtbl2{neon_type[3].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['uint8x16x2_t', uint8x8_t, 'vqtbl2', 'uint8x8_t'] + - ['uint8x16x2_t', uint8x16_t, 'vqtbl2q', 'uint8x16_t'] + - ['poly8x16x2_t', uint8x8_t, 'vqtbl2', 'poly8x8_t'] + - ['poly8x16x2_t', uint8x16_t, 'vqtbl2q', 'poly8x16_t'] + compose: + - FnCall: + - transmute + - - FnCall: + - '{type[2]}' + - - FnCall: [transmute, ['a.0']] + - FnCall: [transmute, ['a.1']] + - b + + - name: "vqtbx2{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, 'int8x16x2_t', uint8x8_t, 'vqtbx2'] + - [int8x16_t, 'int8x16x2_t', uint8x16_t, 'vqtbx2q'] + compose: + - FnCall: ['{type[3]}', [a, 'b.0', 'b.1', c]] + + - name: "vqtbx2{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, 'uint8x16x2_t', uint8x8_t, 'vqtbx2'] + - [uint8x16_t, 'uint8x16x2_t', uint8x16_t, 'vqtbx2q'] + - [poly8x8_t, 'poly8x16x2_t', uint8x8_t, 'vqtbx2'] + - [poly8x16_t, 'poly8x16x2_t', uint8x16_t, 'vqtbx2q'] + compose: + - FnCall: + - transmute + - - FnCall: + - '{type[3]}' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, ['b.0']] + - FnCall: [transmute, ['b.1']] + - c + + - name: "vqtbl3{neon_type[0].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['int8x8_t', 'int8x16x3_t', uint8x8_t, 'vqtbl3'] + - ['int8x16_t', 'int8x16x3_t', uint8x16_t, 'vqtbl3q'] + compose: + - FnCall: ['{type[3]}', ['a.0', 'a.1', 'a.2', b]] + + - name: "vqtbl3{neon_type[0].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['uint8x8_t', 'uint8x16x3_t', uint8x8_t, 'vqtbl3'] + - ['uint8x16_t','uint8x16x3_t', uint8x16_t, 'vqtbl3q'] + - ['poly8x8_t', 'poly8x16x3_t', uint8x8_t, 'vqtbl3'] + - ['poly8x16_t','poly8x16x3_t', uint8x16_t, 'vqtbl3q'] + compose: + - FnCall: + - transmute + - - FnCall: + - '{type[3]}' + - - FnCall: [transmute, ['a.0']] + - FnCall: [transmute, ['a.1']] + - FnCall: [transmute, ['a.2']] + - b + + - name: "vqtbx3{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, 'int8x16x3_t', uint8x8_t, 'vqtbx3'] + - [int8x16_t, 'int8x16x3_t', uint8x16_t, 'vqtbx3q'] + compose: + - FnCall: ['{type[3]}', [a, 'b.0', 'b.1', 'b.2', c]] + + - name: "vqtbx3{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, 'uint8x16x3_t', uint8x8_t, 'vqtbx3'] + - [uint8x16_t, 'uint8x16x3_t', uint8x16_t, 'vqtbx3q'] + - [poly8x8_t, 'poly8x16x3_t', uint8x8_t, 'vqtbx3'] + - [poly8x16_t, 'poly8x16x3_t', uint8x16_t, 'vqtbx3q'] + compose: + - FnCall: + - transmute + - - FnCall: + - '{type[3]}' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, ['b.0']] + - FnCall: [transmute, ['b.1']] + - FnCall: [transmute, ['b.2']] + - c + + - name: "vqtbl4{neon_type[3].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['int8x16x4_t', uint8x8_t, 'vqtbl4', 'int8x8_t'] + - ['int8x16x4_t', uint8x16_t, 'vqtbl4q', 'int8x16_t'] + compose: + - FnCall: ['{type[2]}', ['a.0', 'a.1', 'a.2', 'a.3', b]] + + - name: "vqtbl4{neon_type[3].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ['uint8x16x4_t', uint8x8_t, 'vqtbl4', 'uint8x8_t'] + - ['uint8x16x4_t', uint8x16_t, 'vqtbl4q', 'uint8x16_t'] + - ['poly8x16x4_t', uint8x8_t, 'vqtbl4', 'poly8x8_t'] + - ['poly8x16x4_t', uint8x16_t, 'vqtbl4q', 'poly8x16_t'] + compose: + - FnCall: + - transmute + - - FnCall: + - '{type[2]}' + - - FnCall: [transmute, ['a.0']] + - FnCall: [transmute, ['a.1']] + - FnCall: [transmute, ['a.2']] + - FnCall: [transmute, ['a.3']] + - b + + - name: "vqtbx4{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, 'int8x16x4_t', uint8x8_t, 'vqtbx4'] + - [int8x16_t, 'int8x16x4_t', uint8x16_t, 'vqtbx4q'] + compose: + - FnCall: ['{type[3]}', [a, 'b.0', 'b.1', 'b.2', 'b.3', c]] + + - name: "vqtbx4{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [uint8x8_t, 'uint8x16x4_t', uint8x8_t, 'vqtbx4'] + - [uint8x16_t, 'uint8x16x4_t', uint8x16_t, 'vqtbx4q'] + - [poly8x8_t, 'poly8x16x4_t', uint8x8_t, 'vqtbx4'] + - [poly8x16_t, 'poly8x16x4_t', uint8x16_t, 'vqtbx4q'] + compose: + - FnCall: + - transmute + - - FnCall: + - '{type[3]}' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, ['b.0']] + - FnCall: [transmute, ['b.1']] + - FnCall: [transmute, ['b.2']] + - FnCall: [transmute, ['b.3']] + - c + + - name: "{type[0]}" + visibility: private + doc: "Table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["vqtbl1", "int8x16_t", "uint8x8_t", "int8x8_t"] + - ["vqtbl1q", "int8x16_t", "uint8x16_t", "int8x16_t"] + compose: + - LLVMLink: + name: "_{type[0]}" + links: + - link: "llvm.aarch64.neon.tbl1.{neon_type[3]}" + arch: aarch64,arm64ec + + - name: "{type[0]}" + visibility: private + doc: "Table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["vqtbl2", "int8x16_t", "uint8x8_t", "int8x8_t"] + - ["vqtbl2q", "int8x16_t", "uint8x16_t", "int8x16_t"] + compose: + - LLVMLink: + name: "_{type[0]}" + links: + - link: "llvm.aarch64.neon.tbl2.{neon_type[3]}" + arch: aarch64,arm64ec + + - name: "{type[0]}" + visibility: private + doc: "Table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}", "d: {neon_type[2]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["vqtbl3", int8x16_t, uint8x8_t, int8x8_t] + - ["vqtbl3q", int8x16_t, uint8x16_t, int8x16_t] + compose: + - LLVMLink: + name: "_{type[0]}" + links: + - link: "llvm.aarch64.neon.tbl3.{neon_type[3]}" + arch: aarch64,arm64ec + + - name: "{type[0]}" + visibility: private + doc: "Table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}", "d: {neon_type[1]}", "e: {neon_type[2]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - ["vqtbl4", int8x16_t, uint8x8_t, int8x8_t] + - ["vqtbl4q", int8x16_t, uint8x16_t, int8x16_t] + compose: + - LLVMLink: + name: "_{type[0]}" + links: + - link: "llvm.aarch64.neon.tbl4.{neon_type[3]}" + arch: aarch64,arm64ec + + - name: "{type[0]}" + visibility: private + doc: "Extended table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[3]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [vqtbx1, "int8x8_t", "int8x16_t", "uint8x8_t"] + - [vqtbx1q, "int8x16_t", "int8x16_t", "uint8x16_t"] + compose: + - LLVMLink: + name: "_{type[0]}" + links: + - link: "llvm.aarch64.neon.tbx1.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "{type[0]}" + visibility: private + doc: "Extended table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[2]}", "d: {neon_type[3]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [vqtbx2, "int8x8_t", "int8x16_t", "uint8x8_t"] + - [vqtbx2q, "int8x16_t", "int8x16_t", "uint8x16_t"] + compose: + - LLVMLink: + name: "_{type[0]}" + links: + - link: "llvm.aarch64.neon.tbx2.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "{type[0]}" + visibility: private + doc: "Extended table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[2]}", "d: {neon_type[2]}", "e: {neon_type[3]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [vqtbx3, "int8x8_t", "int8x16_t", "uint8x8_t"] + - [vqtbx3q, "int8x16_t", "int8x16_t", "uint8x16_t"] + compose: + - LLVMLink: + name: "_{type[0]}" + links: + - link: "llvm.aarch64.neon.tbx3.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "{type[0]}" + visibility: private + doc: "Extended table look-up" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[2]}", "d: {neon_type[2]}", "e: {neon_type[2]}", "f: {neon_type[3]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [vqtbx4, "int8x8_t", "int8x16_t", "uint8x8_t"] + - [vqtbx4q, "int8x16_t", "int8x16_t", "uint8x16_t"] + compose: + - LLVMLink: + name: "_{type[0]}" + links: + - link: "llvm.aarch64.neon.tbx4.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [target_feature, ['enable = "{type[2]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ['*const i8', int8x8_t, "neon"] + - ['*const i8', int8x16_t, "neon"] + - ['*const i16', int16x4_t, "neon"] + - ['*const i16', int16x8_t, "neon"] + - ['*const i32', int32x2_t, "neon"] + - ['*const i32', int32x4_t, "neon"] + - ['*const i64', int64x1_t, "neon"] + - ['*const i64', int64x2_t, "neon"] + - ['*const u8', uint8x8_t, "neon"] + - ['*const u8', uint8x16_t, "neon"] + - ['*const u16', uint16x4_t, "neon"] + - ['*const u16', uint16x8_t, "neon"] + - ['*const u32', uint32x2_t, "neon"] + - ['*const u32', uint32x4_t, "neon"] + - ['*const u64', uint64x1_t, "neon"] + - ['*const u64', uint64x2_t, "neon"] + - ['*const p8', poly8x8_t, "neon"] + - ['*const p8', poly8x16_t, "neon"] + - ['*const p16', poly16x4_t, "neon"] + - ['*const p16', poly16x8_t, "neon"] + - ['*const p64', poly64x1_t, "neon,aes"] + - ['*const p64', poly64x2_t, "neon,aes"] + - ['*const f32', float32x2_t, "neon"] + - ['*const f32', float32x4_t, "neon"] + - ['*const f64', float64x1_t, "neon"] + - ['*const f64', float64x2_t, "neon"] + compose: + - FnCall: + - 'crate::ptr::read_unaligned' + - - MethodCall: + - ptr + - cast + - [] + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [target_feature, ['enable = "{type[2]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*const f16', float16x4_t, "neon,fp16"] + - ['*const f16', float16x8_t, "neon,fp16"] + compose: + - FnCall: + - 'crate::ptr::read_unaligned' + - - MethodCall: + - ptr + - cast + - [] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] + attr: + - FnCall: [target_feature, ['enable = "{type[2]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]] + - FnCall: [allow, ['clippy::cast_ptr_alignment']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ['*mut i8', int8x8_t, "neon"] + - ['*mut i8', int8x16_t, "neon"] + - ['*mut i16', int16x4_t, "neon"] + - ['*mut i16', int16x8_t, "neon"] + - ['*mut i32', int32x2_t, "neon"] + - ['*mut i32', int32x4_t, "neon"] + - ['*mut i64', int64x1_t, "neon"] + - ['*mut i64', int64x2_t, "neon"] + - ['*mut u8', uint8x8_t, "neon"] + - ['*mut u8', uint8x16_t, "neon"] + - ['*mut u16', uint16x4_t, "neon"] + - ['*mut u16', uint16x8_t, "neon"] + - ['*mut u32', uint32x2_t, "neon"] + - ['*mut u32', uint32x4_t, "neon"] + - ['*mut u64', uint64x1_t, "neon"] + - ['*mut u64', uint64x2_t, "neon"] + - ['*mut p8', poly8x8_t, "neon"] + - ['*mut p8', poly8x16_t, "neon"] + - ['*mut p16', poly16x4_t, "neon"] + - ['*mut p16', poly16x8_t, "neon"] + - ['*mut p64', poly64x1_t, "neon,aes"] + - ['*mut p64', poly64x2_t, "neon,aes"] + - ['*mut f32', float32x2_t, "neon"] + - ['*mut f32', float32x4_t, "neon"] + - ['*mut f64', float64x1_t, "neon"] + - ['*mut f64', float64x2_t, "neon"] + compose: + - FnCall: + - 'crate::ptr::write_unaligned' + - - MethodCall: + - ptr + - cast + - [] + - a + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] + attr: + - FnCall: [target_feature, ['enable = "{type[2]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]] + - FnCall: [allow, ['clippy::cast_ptr_alignment']] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*mut f16', float16x4_t, "neon,fp16"] + - ['*mut f16', float16x8_t, "neon,fp16"] + compose: + - FnCall: + - 'crate::ptr::write_unaligned' + - - MethodCall: + - ptr + - cast + - [] + - a + + - name: "__crc32d" + doc: "CRC32 single round checksum for quad words (64 bits)." + arguments: ["crc: {type[0]}", "data: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *target-not-arm + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32x"]] }]] + - *aarch64-crc-stable + safety: safe + types: + - [u32, u64] + compose: + - LLVMLink: + name: "crc32x" + arguments: + - "crc: u32" + - "data: u64" + links: + - link: "llvm.aarch64.crc32x" + arch: aarch64,arm64ec + + - name: "__crc32cd" + doc: "CRC32-C single round checksum for quad words (64 bits)." + arguments: ["crc: {type[0]}", "data: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *target-not-arm + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32cx"]] }]] + - *aarch64-crc-stable + safety: safe + types: + - [u32, u64] + compose: + - LLVMLink: + name: "crc32cx" + arguments: + - "crc: u32" + - "data: u64" + links: + - link: "llvm.aarch64.crc32cx" + arch: aarch64,arm64ec + + - name: "{type[0]}" + doc: "Absolute Value (wrapping)." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [abs] + safety: safe + types: + - ['vabsd_s64', i64, i64] + - ['vabs_s64', int64x1_t, v1i64] + - ['vabsq_s64', int64x2_t, v2i64] + compose: + - LLVMLink: + name: "{type[0]}" + links: + - link: "llvm.aarch64.neon.abs.{type[2]}" + arch: aarch64,arm64ec + + - name: "vuqadd{neon_type[0].no}" + doc: "Signed saturating Accumulate of Unsigned value." + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + assert_instr: [suqadd] + safety: safe + types: + - [int8x8_t, uint8x8_t] + - [int8x16_t, uint8x16_t] + - [int16x4_t, uint16x4_t] + - [int16x8_t, uint16x8_t] + - [int32x2_t, uint32x2_t] + - [int32x4_t, uint32x4_t] + - [int64x1_t, uint64x1_t] + - [int64x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "vuqadd{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.suqadd.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vsqadd{neon_type[0].no}" + doc: "Unsigned saturating Accumulate of Signed value." + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + assert_instr: [usqadd] + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [uint8x16_t, int8x16_t] + - [uint16x4_t, int16x4_t] + - [uint16x8_t, int16x8_t] + - [uint32x2_t, int32x2_t] + - [uint32x4_t, int32x4_t] + - [uint64x1_t, int64x1_t] + - [uint64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vsqadd{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.usqadd.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vpadd{neon_type.no}" + doc: "Add Pairwise" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + assert_instr: [addp] + safety: safe + types: + - int8x16_t + - int16x8_t + - int32x4_t + - int64x2_t + compose: + - LLVMLink: + name: "vpadd{neon_type.no}" + links: + - link: "llvm.aarch64.neon.addp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpadd{neon_type[0].no}" + doc: "Add Pairwise" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-stable + assert_instr: [addp] + safety: safe + types: + - [uint8x16_t, int8x16_t] + - [uint16x8_t, int16x8_t] + - [uint32x4_t, int32x4_t] + - [uint64x2_t, int64x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vpadd{neon_type[1].no}' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vpaddd_s64" + doc: "Add pairwise" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [addp] + safety: safe + types: + - [int64x2_t, i64] + compose: + - FnCall: + - transmute + - - FnCall: + - "vaddvq_u64" + - - FnCall: [transmute, [a]] + + - name: "vpaddd_u64" + doc: "Add pairwise" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [addp] + safety: safe + types: + - [uint64x2_t, u64] + compose: + - FnCall: [vaddvq_u64, [a]] + + - name: "vaddv{neon_type[0].no}" + doc: "Add across vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [addv] + safety: safe + types: + - [int8x8_t, i8] + - [int16x4_t, i16] + - [int8x16_t, i8] + - [int16x8_t, i16] + - [int32x4_t, i32] + compose: + - LLVMLink: + name: "vaddv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.saddv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vaddv{neon_type[0].no}" + doc: "Add across vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [addp] + safety: safe + types: + - [int32x2_t, i32] + compose: + - LLVMLink: + name: "vaddv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.saddv.i32.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vaddv{neon_type[0].no}" + doc: "Add across vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [addp] + safety: safe + types: + - [int64x2_t, i64] + compose: + - LLVMLink: + name: "vaddv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.saddv.i64.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vaddv{neon_type[0].no}" + doc: "Add across vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [addv] + safety: safe + types: + - [uint8x8_t, u8] + - [uint16x4_t, u16] + - [uint8x16_t, u8] + - [uint16x8_t, u16] + - [uint32x4_t, u32] + compose: + - LLVMLink: + name: "vaddv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.uaddv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vaddv{neon_type[0].no}" + doc: "Add across vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [addp] + safety: safe + types: + - [uint32x2_t, u32, i32] + compose: + - LLVMLink: + name: "vaddv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vaddv{neon_type[0].no}" + doc: "Add across vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [addp] + safety: safe + types: + - [uint64x2_t, u64, i64] + compose: + - LLVMLink: + name: "vaddv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vaddlv{neon_type[0].no}" + doc: "Signed Add Long across Vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [saddlv] + safety: safe + types: + - [int8x8_t, i16] + - [int8x16_t, i16] + compose: + - LLVMLink: + name: "vaddlv{neon_type[0].no}" + return_type: "i32" + links: + - link: "llvm.aarch64.neon.saddlv.i32.{neon_type[0]}" + arch: aarch64,arm64ec + - Identifier: ["unsafe {{ _vaddlv{neon_type[0].no}(a) as i16 }}", Symbol] + + - name: "vaddlv{neon_type[0].no}" + doc: "Unsigned Add Long across Vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: [uaddlv] + safety: safe + types: + - [uint8x8_t, u16] + - [uint8x16_t, u16] + compose: + - LLVMLink: + name: "vaddlv{neon_type[0].no}" + return_type: "i32" + links: + - link: "llvm.aarch64.neon.uaddlv.i32.{neon_type[0]}" + arch: aarch64,arm64ec + - Identifier: ["unsafe {{ _vaddlv{neon_type[0].no}(a) as u16 }}", Symbol] + + - name: "vmaxv{neon_type[0].no}" + doc: "Horizontal vector max." + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: ['{type[2]}'] + safety: safe + types: + - [int8x8_t, i8, 'smaxv'] + - [int16x4_t, i16, 'smaxv'] + - [int32x2_t, i32, 'smaxp'] + - [int8x16_t, i8, 'smaxv'] + - [int16x8_t, i16, 'smaxv'] + - [int32x4_t, i32, 'smaxv'] + compose: + - LLVMLink: + name: "vmaxv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.smaxv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vmaxv{neon_type[0].no}" + doc: "Horizontal vector max." + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: ['{type[2]}'] + safety: safe + types: + - [uint8x8_t, u8, 'umaxv'] + - [uint16x4_t, u16, 'umaxv'] + - [uint32x2_t, u32, 'umaxp'] + - [uint8x16_t, u8, 'umaxv'] + - [uint16x8_t, u16, 'umaxv'] + - [uint32x4_t, u32, 'umaxv'] + compose: + - LLVMLink: + name: "vmaxv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.umaxv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vmaxv{neon_type[0].no}" + doc: "Horizontal vector max." + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: ['{type[2]}'] + safety: safe + types: + - [float32x2_t, f32, 'fmaxp'] + - [float32x4_t, f32, 'fmaxv'] + - [float64x2_t, f64, 'fmaxp'] + compose: + - LLVMLink: + name: "vmaxv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.fmaxv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vminv{neon_type[0].no}" + doc: "Horizontal vector min." + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: ['{type[2]}'] + safety: safe + types: + - [int8x8_t, i8, 'sminv'] + - [int16x4_t, i16, 'sminv'] + - [int32x2_t, i32, 'sminp'] + - [int8x16_t, i8, 'sminv'] + - [int16x8_t, i16, 'sminv'] + - [int32x4_t, i32, 'sminv'] + compose: + - LLVMLink: + name: "vminv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.sminv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vminv{neon_type[0].no}" + doc: "Horizontal vector min." + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: ['{type[2]}'] + safety: safe + types: + - [uint8x8_t, u8, 'uminv'] + - [uint16x4_t, u16, 'uminv'] + - [uint32x2_t, u32, 'uminp'] + - [uint8x16_t, u8, 'uminv'] + - [uint16x8_t, u16, 'uminv'] + - [uint32x4_t, u32, 'uminv'] + compose: + - LLVMLink: + name: "vminv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.uminv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vminv{neon_type[0].no}" + doc: "Horizontal vector min." + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + assert_instr: ['{type[2]}'] + safety: safe + types: + - [float32x2_t, f32, 'fminp'] + - [float32x4_t, f32, 'fminv'] + - [float64x2_t, f64, 'fminp'] + compose: + - LLVMLink: + name: "vminv{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.fminv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vpmin{neon_type.no}" + doc: "Folding minimum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + assert_instr: ['sminp'] + safety: safe + types: + - int8x16_t + - int16x8_t + - int32x4_t + compose: + - LLVMLink: + name: "vpmin{neon_type.no}" + links: + - link: "llvm.aarch64.neon.sminp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpmin{neon_type.no}" + doc: "Folding minimum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + assert_instr: ['uminp'] + safety: safe + types: + - uint8x16_t + - uint16x8_t + - uint32x4_t + compose: + - LLVMLink: + name: "vpmin{neon_type.no}" + links: + - link: "llvm.aarch64.neon.uminp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpmin{neon_type.no}" + doc: "Folding minimum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + assert_instr: ['fminp'] + safety: safe + types: + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "vpmin{neon_type.no}" + links: + - link: "llvm.aarch64.neon.fminp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpmax{neon_type.no}" + doc: "Folding maximum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + assert_instr: ['smaxp'] + safety: safe + types: + - int8x16_t + - int16x8_t + - int32x4_t + compose: + - LLVMLink: + name: "vpmax{neon_type.no}" + links: + - link: "llvm.aarch64.neon.smaxp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpmax{neon_type.no}" + doc: "Folding maximum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + assert_instr: ['umaxp'] + safety: safe + types: + - uint8x16_t + - uint16x8_t + - uint32x4_t + compose: + - LLVMLink: + name: "vpmax{neon_type.no}" + links: + - link: "llvm.aarch64.neon.umaxp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpmax{neon_type.no}" + doc: "Folding maximum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-stable + assert_instr: ['fmaxp'] + safety: safe + types: + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "vpmax{neon_type.no}" + links: + - link: "llvm.aarch64.neon.fmaxp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vsli{neon_type[0].N}" + doc: "Shift Left and Insert (immediate)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sli, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, 'static_assert_uimm_bits!', 'N, 3'] + - [int8x16_t, 'static_assert_uimm_bits!', 'N, 3'] + - [int16x4_t, 'static_assert_uimm_bits!', 'N, 4'] + - [int16x8_t, 'static_assert_uimm_bits!', 'N, 4'] + - [int32x2_t, 'static_assert!', 'N >= 0 && N <= 31'] + - [int32x4_t, 'static_assert!', 'N >= 0 && N <= 31'] + - [int64x1_t, 'static_assert!', 'N >= 0 && N <= 63'] + - [int64x2_t, 'static_assert!', 'N >= 0 && N <= 63'] + compose: + - FnCall: ['{type[1]}', ['{type[2]}']] + - LLVMLink: + name: "vsli{neon_type[0].N}" + arguments: + - "a: {neon_type[0]}" + - "b: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vsli.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vsli{neon_type[0].N}", [a, b, N], [], true] + + - name: "vsli{neon_type[0].N}" + doc: "Shift Left and Insert (immediate)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "{type[4]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sli, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, int8x8_t, 'static_assert_uimm_bits!', 'N, 3', "neon"] + - [uint8x16_t, int8x16_t, 'static_assert_uimm_bits!', 'N, 3', "neon"] + - [uint16x4_t, int16x4_t, 'static_assert_uimm_bits!', 'N, 4', "neon"] + - [uint16x8_t, int16x8_t, 'static_assert_uimm_bits!', 'N, 4', "neon"] + - [uint32x2_t, int32x2_t, 'static_assert!', 'N >= 0 && N <= 31', "neon"] + - [uint32x4_t, int32x4_t, 'static_assert!', 'N >= 0 && N <= 31', "neon"] + - [uint64x1_t, int64x1_t, 'static_assert!', 'N >= 0 && N <= 63', "neon"] + - [uint64x2_t, int64x2_t, 'static_assert!', 'N >= 0 && N <= 63', "neon"] + - [poly8x8_t, int8x8_t, 'static_assert_uimm_bits!', 'N, 3', "neon"] + - [poly8x16_t, int8x16_t, 'static_assert_uimm_bits!', 'N, 3', "neon"] + - [poly16x4_t, int16x4_t, 'static_assert_uimm_bits!', 'N, 4', "neon"] + - [poly16x8_t, int16x8_t, 'static_assert_uimm_bits!', 'N, 4', "neon"] + - [poly64x1_t, int64x1_t, 'static_assert!', 'N >= 0 && N <= 63', "neon,aes"] + - [poly64x2_t, int64x2_t, 'static_assert!', 'N >= 0 && N <= 63', "neon,aes"] + compose: + - FnCall: ['{type[2]}', ['{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vsli{neon_type[1].N}::' + - - FnCall: + - transmute + - - a + - FnCall: + - transmute + - - b + + - name: "vsri{neon_type[0].N}" + doc: "Shift Right and Insert (immediate)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sri, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, 'N >= 1 && N <= 8'] + - [int8x16_t, 'N >= 1 && N <= 8'] + - [int16x4_t, 'N >= 1 && N <= 16'] + - [int16x8_t, 'N >= 1 && N <= 16'] + - [int32x2_t, 'N >= 1 && N <= 32'] + - [int32x4_t, 'N >= 1 && N <= 32'] + - [int64x1_t, 'N >= 1 && N <= 64'] + - [int64x2_t, 'N >= 1 && N <= 64'] + compose: + - FnCall: ['static_assert!', ['{type[1]}']] + - LLVMLink: + name: "vsri{neon_type[0].N}" + arguments: + - "a: {neon_type[0]}" + - "b: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vsri.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vsri{neon_type[0].N}", [a, b, N], [], true] + + - name: "vsri{neon_type[0].N}" + doc: "Shift Right and Insert (immediate)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "{type[3]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sri, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, int8x8_t, 'N >= 1 && N <= 8', "neon"] + - [uint8x16_t, int8x16_t, 'N >= 1 && N <= 8', "neon"] + - [uint16x4_t, int16x4_t, 'N >= 1 && N <= 16', "neon"] + - [uint16x8_t, int16x8_t, 'N >= 1 && N <= 16', "neon"] + - [uint32x2_t, int32x2_t, 'N >= 1 && N <= 32', "neon"] + - [uint32x4_t, int32x4_t, 'N >= 1 && N <= 32', "neon"] + - [uint64x1_t, int64x1_t, 'N >= 1 && N <= 64', "neon"] + - [uint64x2_t, int64x2_t, 'N >= 1 && N <= 64', "neon"] + - [poly8x8_t, int8x8_t, 'N >= 1 && N <= 8', "neon"] + - [poly8x16_t, int8x16_t, 'N >= 1 && N <= 8', "neon"] + - [poly16x4_t, int16x4_t, 'N >= 1 && N <= 16', "neon"] + - [poly16x8_t, int16x8_t, 'N >= 1 && N <= 16', "neon"] + - [poly64x1_t, int64x1_t, 'N >= 1 && N <= 64', "neon,aes"] + - [poly64x2_t, int64x2_t, 'N >= 1 && N <= 64', "neon,aes"] + compose: + - FnCall: ['static_assert!', ['{type[2]}']] + - FnCall: + - transmute + - - FnCall: + - 'vsri{neon_type[1].N}::' + - - FnCall: + - transmute + - - a + - FnCall: + - transmute + - - b + + - name: "vfmlal{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlal2] + safety: safe + types: + - [float32x2_t, float16x4_t, '_high_'] + - [float32x4_t, float16x8_t, 'q_high_'] + compose: + - LLVMLink: + name: "vfmlal{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlal2.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + + - name: "vfmlal{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlal2, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlal{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + + - name: "vfmlal{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlal] + safety: safe + types: + - [float32x2_t, float16x4_t, '_low_'] + - [float32x4_t, float16x8_t, 'q_low_'] + compose: + - LLVMLink: + name: "vfmlal{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlal.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + + - name: "vfmlal{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlal, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlal{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + + - name: "vfmlsl{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlsl2] + safety: safe + types: + - [float32x2_t, float16x4_t, '_high_'] + - [float32x4_t, float16x8_t, 'q_high_'] + compose: + - LLVMLink: + name: "vfmlsl{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlsl2.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vfmlsl{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlsl2, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlsl{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + + - name: "vfmlsl{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlsl] + safety: safe + types: + - [float32x2_t, float16x4_t, '_low_'] + - [float32x4_t, float16x8_t, 'q_low_'] + compose: + - LLVMLink: + name: "vfmlsl{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlsl.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vfmlsl{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlsl, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlsl{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + - name: "vamax{neon_type.no}" + doc: "Multi-vector floating-point absolute maximum" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,faminmax"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [unstable, ['feature = "faminmax"', 'issue = "137933"']] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "_vamax{neon_type.no}" + links: + - link: "llvm.aarch64.neon.famax.{neon_type}" + arch: aarch64,arm64ec + + - name: "vamin{neon_type.no}" + doc: "Multi-vector floating-point absolute minimum" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,faminmax"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [unstable, ['feature = "faminmax"', 'issue = "137933"']] + safety: safe + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "_vamin{neon_type.no}" + links: + - link: "llvm.aarch64.neon.famin.{neon_type}" + arch: aarch64,arm64ec + + - name: "vluti2{neon_type[0].lane_nox}" + doc: "Lookup table read with 2-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int8x8_t, uint8x8_t, int8x16_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"'] + - [int8x16_t, uint8x8_t, int8x16_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"'] + - [int16x4_t, uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"'] + - [int16x8_t, uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"'] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - LLVMLink: + name: "vluti2{neon_type[0].lane_nox}" + arguments: + - 'a: {neon_type[0]}' + - 'b: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti2.lane.{neon_type[2]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti2{neon_type[0].lane_nox}', [a, b, LANE]] + + - name: "vluti2{neon_type[0].lane_nox}" + doc: "Lookup table read with 2-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = "neon,lut"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint8x8_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t'] + - [uint8x16_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t'] + - [poly8x8_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t'] + - [poly8x16_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t'] + - [uint16x4_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t'] + - [uint16x8_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t'] + - [poly16x4_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t'] + - [poly16x8_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t'] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti2{neon_type[4].lane_nox}::' + - - FnCall: [transmute, [a]] + - b + + - name: "vluti4{neon_type[0].lane_nox}" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = {type[3]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int8x16_t, uint8x8_t, 'LANE == 0', '"neon,lut"'] + compose: + - FnCall: ['static_assert!', ['{type[2]}']] + - LLVMLink: + name: "vluti4{neon_type[0].lane_nox}" + arguments: + - 'a: {neon_type[0]}' + - 'b: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti4q.lane.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti4{neon_type[0].lane_nox}', [a, b, LANE]] + + - name: "vluti4{neon_type[0].lane_nox}" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,lut"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint8x16_t, uint8x8_t, 'LANE == 0', int8x16_t] + - [poly8x16_t, uint8x8_t, 'LANE == 0', int8x16_t] + compose: + - FnCall: ['static_assert!', ['{type[2]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti4{neon_type[3].lane_nox}::' + - - FnCall: [transmute, [a]] + - b + + - name: "vluti4{neon_type[0].laneq_nox}" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,lut"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1'] + compose: + - FnCall: ['static_assert!', ['{type[2]}']] + - LLVMLink: + name: "vluti4{neon_type[0].laneq_nox}" + arguments: + - 'a: {neon_type[0]}' + - 'b: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti4q.laneq.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti4{neon_type[0].laneq_nox}', [a, b, LANE]] + + - name: "vluti4{neon_type[0].laneq_nox}" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,lut"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', int8x16_t] + - [poly8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', int8x16_t] + compose: + - FnCall: ['static_assert!', ['{type[2]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti4{neon_type[3].laneq_nox}::' + - - FnCall: [transmute, [a]] + - b + + - name: "vluti4q_lane_{neon_type[0]}_x2" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int16x8x2_t, uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"'] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - LLVMLink: + name: "vluti4q_lane_{neon_type[0]}_x2" + arguments: + - 'a: {neon_type[2]}' + - 'a: {neon_type[2]}' + - 'b: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti4q.lane.x2.{neon_type[2]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti4q_lane_{neon_type[0]}_x2', ['a.0', 'a.1', b, LANE]] + + - name: "vluti4q_lane_{neon_type[0]}_x2" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint16x8x2_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"', int16x8x2_t] + - [poly16x8x2_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"', int16x8x2_t] + - [float16x8x2_t, uint8x8_t, float16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut,fp16"', int16x8x2_t] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti4q_lane_{neon_type[5]}_x2::' + - - FnCall: [transmute, [a]] + - b + + - name: "vluti4q_laneq_{neon_type[0]}_x2" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 3']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int16x8x2_t, uint8x16_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"'] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - LLVMLink: + name: "vluti4{neon_type[0].lane_nox}" + arguments: + - 'a: {neon_type[2]}' + - 'b: {neon_type[2]}' + - 'c: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti4q.laneq.x2.{neon_type[2]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti4q_laneq_{neon_type[0]}_x2', ['a.0', 'a.1', b, LANE]] + + - name: "vluti4q_laneq_{neon_type[0]}_x2" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 3']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint16x8x2_t, uint8x16_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"', int16x8x2_t] + - [poly16x8x2_t, uint8x16_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"', int16x8x2_t] + - [float16x8x2_t, uint8x16_t, float16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut,fp16"', int16x8x2_t] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti4q_laneq_{neon_type[5]}_x2::' + - - FnCall: [transmute, [a]] + - b diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml new file mode 100644 index 000000000000..118f5808f758 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -0,0 +1,15071 @@ +arch_cfgs: + - arch_name: aarch64 + target_feature: [neon] + llvm_prefix: llvm.aarch64.neon +# Generate big endian shuffles +auto_big_endian: true + +# Repeatedly used anchors +# #[stable(feature = "neon_intrinsics", since = "1.59.0")] +neon-stable: &neon-stable + FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + +# #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] +neon-cfg-arm-unstable: &neon-cfg-arm-unstable + FnCall: ['cfg_attr', ['target_arch = "arm"', {FnCall: ['unstable', ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]] + +# #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +neon-arm-unstable: &neon-arm-unstable + FnCall: ['unstable', ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']] + +# #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +neon-v7: &neon-v7 + FnCall: [cfg_attr, ['target_arch = "arm"', { FnCall: [target_feature, [ 'enable = "v7"']]} ]] + +# #[target_feature(enable = "neon,v7")] +enable-v7: &enable-v7 + FnCall: [target_feature, ['enable = "neon,v7"']] + +# #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +neon-v8: &neon-v8 + FnCall: [cfg_attr, ['target_arch = "arm"', { FnCall: [target_feature, [ 'enable = "v8"']]} ]] + +target-is-arm: &target-is-arm + FnCall: [cfg, ['target_arch = "arm"']] + +# #[cfg(not(target_arch = "arm"))] +target-not-arm: &target-not-arm + FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm"']]}]] + +not-arm: ¬-arm + FnCall: [not, ['target_arch = "arm"']] + +neon-target-aarch64-arm64ec: &neon-target-aarch64-arm64ec + FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]] + +# #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))] +neon-not-arm-stable: &neon-not-arm-stable + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]] + +# #[cfg_attr(all(test, not(target_env = "msvc"))] +msvc-disabled: &msvc-disabled + FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]] + +# all(test, target_arch = "arm") +test-is-arm: &test-is-arm + FnCall: [all, [test, 'target_arch = "arm"']] + +# #[target_feature(enable = "neon,aes")] +neon-aes: &neon-aes + FnCall: [target_feature, ['enable = "neon,aes"']] + +# #[target_feature(enable = "neon,i8mm")] +neon-i8mm: &neon-i8mm + FnCall: [target_feature, ['enable = "neon,i8mm"']] + +# #[target_feature(enable = "neon,fp16")] +neon-fp16: &neon-fp16 + FnCall: [target_feature, ['enable = "neon,fp16"']] + +enable-fcma: &enable-fcma + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fcma"']] }]] + +#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))] +neon-unstable-i8mm: &neon-unstable-i8mm + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']] }, { FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] } ]] + +# #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +neon-unstable-fcma: &neon-unstable-fcma + FnCall: [unstable, ['feature = "stdarch_neon_fcma"', 'issue = "117222"']] + +arm-crc-unstable: &arm-crc-unstable + FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_aarch32_crc32"', 'issue = "125085"']]}]] + +aarch64-crc-stable: &aarch64-crc-stable + FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "stdarch_aarch64_crc32"', 'since = "1.80.0"']]}]] + +# #[unstable(feature = "stdarch_neon_f16", issue = "136306")] +neon-unstable-f16: &neon-unstable-f16 + FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']] + +intrinsics: + - name: "vand{neon_type.no}" + doc: Vector bitwise and + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vand]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [and]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + - int64x1_t + - int64x2_t + - uint64x1_t + - uint64x2_t + compose: + - FnCall: + - simd_and + - - a + - b + + - name: "vorr{neon_type.no}" + doc: "Vector bitwise or (immediate, inclusive)" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vorr]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [orr]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + - int64x1_t + - int64x2_t + - uint64x1_t + - uint64x2_t + compose: + - FnCall: + - simd_or + - - a + - b + + - name: "veor{neon_type.no}" + doc: Vector bitwise exclusive or (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [veor]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [eor]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + - int64x1_t + - int64x2_t + - uint64x1_t + - uint64x2_t + compose: + - FnCall: + - simd_xor + - - a + - b + + - name: "vabd{neon_type.no}" + doc: Absolute difference between the arguments + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sabd]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + compose: + - LLVMLink: + name: "sabd.{neon_type}" + links: + - link: "llvm.aarch64.neon.sabd.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vabds.{neon_type}" + arch: arm + + - name: "vabd{neon_type.no}" + doc: Absolute difference between the arguments + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uabd]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + compose: + - LLVMLink: + name: "uabd.{neon_type}" + links: + - link: "llvm.aarch64.neon.uabd.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vabdu.{neon_type}" + arch: arm + + - name: "vabd{neon_type.no}" + doc: Absolute difference between the arguments of Floating + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabd]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "fabd.{neon_type}" + links: + - link: "llvm.arm.neon.vabds.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fabd.{neon_type}" + arch: aarch64,arm64ec + + - name: "vabd{neon_type.no}" + doc: Absolute difference between the arguments of Floating + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fabd.{neon_type}" + links: + - link: "llvm.arm.neon.vabds.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fabd.{neon_type}" + arch: aarch64,arm64ec + + - name: "vabdl{neon_type[0].noq}" + doc: Signed Absolute difference Long + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - 'target_arch = "arm"' + - FnCall: + - assert_instr + - - '"vabdl.{neon_type[0]}"' + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - FnCall: + - any + - - 'target_arch = "aarch64"' + - 'target_arch = "arm64ec"' + - FnCall: + - assert_instr + - - sabdl + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, int16x8_t, uint8x8_t] + - [int16x4_t, int32x4_t, uint16x4_t] + - [int32x2_t, int64x2_t, uint32x2_t] + compose: + - Let: + - c + - "{neon_type[2]}" + - FnCall: + - simd_cast + - - FnCall: + - "vabd_{neon_type[0]}" + - - a + - b + - FnCall: + - simd_cast + - - c + + - name: "vceq{neon_type[0].no}" + doc: "Compare bitwise Equal (vector)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vceq{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmeq]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, uint8x8_t, ".i8"] + - [uint8x16_t, uint8x16_t, ".i8"] + - [int8x8_t, uint8x8_t, ".i8"] + - [int8x16_t, uint8x16_t, ".i8"] + - [poly8x8_t, uint8x8_t, ".i8"] + - [poly8x16_t, uint8x16_t, ".i8"] + - [uint16x4_t, uint16x4_t, ".i16"] + - [uint16x8_t, uint16x8_t, ".i16"] + - [int16x4_t, uint16x4_t, ".i16"] + - [int16x8_t, uint16x8_t, ".i16"] + - [uint32x2_t, uint32x2_t, ".i32"] + - [uint32x4_t, uint32x4_t, ".i32"] + - [int32x2_t, uint32x2_t, ".i32"] + - [int32x4_t, uint32x4_t, ".i32"] + compose: + - FnCall: [simd_eq, [a, b]] + + - name: "vceq{neon_type[0].no}" + doc: "Floating-point compare equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vceq.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmeq]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - FnCall: [simd_eq, [a, b]] + + + - name: "vceq{neon_type[0].no}" + doc: "Floating-point compare equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vceq.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmeq]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_eq, [a, b]] + + - name: "vtst{neon_type[0].no}" + doc: "Signed compare bitwise Test bits nonzero" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtst]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmtst]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)'] + - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)'] + - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)'] + - [poly8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [poly8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [poly16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)'] + - [poly16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + compose: + - Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}] + - Let: [d, "{type[2]}", "{type[3]}"] + - FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]] + + - name: "vabs{neon_type.no}" + doc: "Floating-point absolute value" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vabs]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabs]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - FnCall: [simd_fabs, [a]] + + - name: "vabs{neon_type.no}" + doc: "Floating-point absolute value" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vabs]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_fabs, [a]] + + - name: "vabs{type[0]}" + doc: "Floating-point absolute value" + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vabs]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ['h_f16', 'f16'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vabs_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - 0 + + - name: "vcgt{neon_type[0].no}" + doc: "Compare signed greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmgt]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, uint8x8_t, "s8"] + - [int8x16_t, uint8x16_t, "s8"] + - [int16x4_t, uint16x4_t, s16] + - [int16x8_t, uint16x8_t, s16] + - [int32x2_t, uint32x2_t, "s32"] + - [int32x4_t, uint32x4_t, "s32"] + compose: + - FnCall: [simd_gt, [a, b]] + + - name: "vcgt{neon_type.no}" + doc: "Compare unsigned greater than" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmhi]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + compose: + - FnCall: [simd_gt, [a, b]] + + - name: "vcgt{neon_type[0].no}" + doc: "Floating-point compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - FnCall: [simd_gt, [a, b]] + + + - name: "vcgt{neon_type[0].no}" + doc: "Floating-point compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_gt, [a, b]] + + + - name: "vcgtz{neon_type[0].no}" + doc: "Floating-point compare greater than zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: [simd_gt, [a, {FnCall: [transmute, [b]]}]] + + - name: "vclt{neon_type[0].no}" + doc: "Compare signed less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.{neon_type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmgt]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, uint8x8_t] + - [int8x16_t, uint8x16_t] + - [int16x4_t, uint16x4_t] + - [int16x8_t, uint16x8_t] + - [int32x2_t, uint32x2_t] + - [int32x4_t, uint32x4_t] + compose: + - FnCall: [simd_lt, [a, b]] + + - name: "vcle{neon_type[0].no}" + doc: "Compare signed less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.{neon_type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmge]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, uint8x8_t] + - [int8x16_t, uint8x16_t] + - [int16x4_t, uint16x4_t] + - [int16x8_t, uint16x8_t] + - [int32x2_t, uint32x2_t] + - [int32x4_t, uint32x4_t] + compose: + - FnCall: [simd_le, [a, b]] + + - name: "vcle{neon_type[0].no}" + doc: "Floating-point compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - FnCall: [simd_le, [a, b]] + + + - name: "vcle{neon_type[0].no}" + doc: "Floating-point compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_le, [a, b]] + + - name: "vclez{neon_type[0].no}" + doc: "Floating-point compare less than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcle.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmle]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_le + - - a + - FnCall: [transmute, [b]] + + - name: "vcge{neon_type[0].no}" + doc: "Compare signed greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.{neon_type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmge]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, uint8x8_t] + - [int8x16_t, uint8x16_t] + - [int16x4_t, uint16x4_t] + - [int16x8_t, uint16x8_t] + - [int32x2_t, uint32x2_t] + - [int32x4_t, uint32x4_t] + compose: + - FnCall: [simd_ge, [a, b]] + + - name: "vcls{neon_type.no}" + doc: "Count leading sign bits" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcls.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cls]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + compose: + - LLVMLink: + name: "vcls{neon_type.no}" + links: + - link: "llvm.arm.neon.vcls.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.cls.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcls{neon_type[0].no}" + doc: "Count leading sign bits" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcls]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cls]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [uint8x16_t, int8x16_t] + - [uint16x4_t, int16x4_t] + - [uint16x8_t, int16x8_t] + - [uint32x2_t, int32x2_t] + - [uint32x4_t, int32x4_t] + compose: + - FnCall: + - "vcls{neon_type[1].no}" + - - FnCall: [transmute, [a]] + + - name: "vclz{neon_type[0].no}" + doc: "Count leading zero bits" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vclz.i8"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [clz]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [uint8x16_t, int8x16_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vclz{neon_type[1].no}" + - - FnCall: [transmute, [a]] + + - name: "vclz{neon_type[0].no}" + doc: "Count leading zero bits" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vclz{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [clz]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, '.i8'] + - [int8x16_t, '.i8'] + - [int16x4_t, '.i16'] + - [int16x8_t, '.i16'] + - [int32x2_t, '.i32'] + - [int32x4_t, '.i32'] + compose: + - FnCall: [simd_ctlz, [a]] + + - name: "vclz{neon_type[0].no}" + doc: "Count leading zero bits" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vclz{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [clz]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint32x2_t, '.i32', int32x2_t] + - [uint32x4_t, '.i32', int32x4_t] + - [uint16x4_t, '.i16', int16x4_t] + - [uint16x8_t, '.i16', int16x8_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vclz{neon_type[2].no}" + - - FnCall: [transmute, [a]] + + - name: "vcagt{neon_type[0].no}" + doc: "Floating-point absolute compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacgt.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facgt]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - LLVMLink: + name: "vcagt{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vacgt.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.facgt.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcagt{neon_type[0].no}" + doc: "Floating-point absolute compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacgt.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcagt{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vacgt.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.facgt.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcage{neon_type[0].no}" + doc: "Floating-point absolute compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacge.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facge]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - LLVMLink: + name: "vcage{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vacge.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.facge.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcage{neon_type[0].no}" + doc: "Floating-point absolute compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacge.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcage{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vacge.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.facge.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcalt{neon_type[0].no}" + doc: "Floating-point absolute compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacgt.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facgt]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - FnCall: ["vcagt{neon_type[0].no}", [b, a]] + + - name: "vcalt{neon_type[0].no}" + doc: "Floating-point absolute compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacgt.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: ["vcagt{neon_type[0].no}", [b, a]] + + - name: "vcale{neon_type[0].no}" + doc: "Floating-point absolute compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacge.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facge]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - FnCall: ["vcage{neon_type[0].no}", [b, a]] + + + - name: "vcale{neon_type[0].no}" + doc: "Floating-point absolute compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacge.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: ["vcage{neon_type[0].no}", [b, a]] + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [scvtf]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x2_t, float32x2_t] + - [int32x4_t, float32x4_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [scvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [int16x4_t, float16x4_t] + - [int16x8_t, float16x8_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ucvtf]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint32x2_t, float32x2_t] + - [uint32x4_t, float32x4_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ucvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [uint16x4_t, float16x4_t] + - [uint16x8_t, float16x8_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint32x2_t, float32x2_t] + - [uint32x4_t, float32x4_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" + arch: arm + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N], [], true] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint32x2_t, float32x2_t] + - [uint32x4_t, float32x4_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N], [], true] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ucvtf, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [uint16x4_t, float16x4_t] + - [uint16x8_t, float16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N], [], true] + + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Floating-point convert to signed fixed-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzs, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N], [], true] + + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to unsigned fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzu, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N], [], true] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x2_t, float32x2_t] + - [int32x4_t, float32x4_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: arm + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N], [], true] + + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [scvtf, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x4_t, float16x4_t] + - [int16x8_t, float16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N], [], true] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x2_t, float32x2_t] + - [int32x4_t, float32x4_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N], [], true] + + - name: "vcvt{type[2]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [float32x2_t, int32x2_t, _n_s32_f32] + - [float32x4_t, int32x4_t, q_n_s32_f32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.arm.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}" + arch: arm + - FnCall: ["_vcvt{type[2]}", [a, N], [], true] + + - name: "vcvt{type[2]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [float32x2_t, uint32x2_t, _n_u32_f32] + - [float32x4_t, uint32x4_t, q_n_u32_f32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.arm.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}" + arch: arm + - FnCall: ["_vcvt{type[2]}", [a, N], [], true] + + - name: "vcvt{type[2]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [float32x2_t, int32x2_t, _n_s32_f32] + - [float32x4_t, int32x4_t, q_n_s32_f32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}", [a, N], [], true] + + - name: "vcvt{type[2]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [float32x2_t, uint32x2_t, _n_u32_f32] + - [float32x4_t, uint32x4_t, q_n_u32_f32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{type[2]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}", [a, N], [], true] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.8"', 'N = 4']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_lane_u8, uint8x8_t, uint8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [q_lane_u8, uint8x8_t, uint8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_lane_p8, poly8x8_t, poly8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [q_lane_p8, poly8x8_t, poly8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.8"', 'N = 8']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 8']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [q_laneq_u8, uint8x16_t, uint8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_u8, uint8x16_t, uint8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [q_laneq_p8, poly8x16_t, poly8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_p8, poly8x16_t, poly8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_lane_u16, uint16x4_t, uint16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_lane_p16, poly16x4_t, poly16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_lane_p16, poly16x4_t, poly16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"', 'N = 4']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_u16, uint16x8_t, uint16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_laneq_p16, poly16x8_t, poly16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_p16, poly16x8_t, poly16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"', 'N = 4']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[3]}{neon_type[0]}" + doc: "Create a new vector with all lanes set to a value" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, f16, 'float16x4', '_n_'] + - [float16x8_t, f16, 'float16x8', 'q_n_'] + compose: + - "{type[2]}_t::splat(a)" + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.32"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [_lane_s32, int32x2_t, int32x2_t, '1', '[N as u32, N as u32]'] + - [q_lane_s32, int32x2_t, int32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]'] + - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[N as u32, N as u32]'] + - [q_lane_u32, uint32x2_t, uint32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]'] + - [_lane_f32, float32x2_t, float32x2_t, '1', '[N as u32, N as u32]'] + - [q_lane_f32, float32x2_t, float32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.32"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_s32, int32x4_t, int32x2_t, '2', '[N as u32, N as u32]'] + - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_u32, uint32x4_t, uint32x2_t, '2', '[N as u32, N as u32]'] + - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_f32, float32x4_t, float32x2_t, '2', '[N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmov, 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [q_laneq_s64, int64x2_t, '1', '[N as u32, N as u32]'] + - [q_laneq_u64, uint64x2_t, '1', '[N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmov, 'N = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [q_lane_s64, int64x1_t, int64x2_t] + - [q_lane_u64, uint64x1_t, uint64x2_t] + compose: + - FnCall: [static_assert!, ['N == 0']] + - FnCall: [simd_shuffle!, [a, a, '[N as u32, N as u32]']] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'N = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'N = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [_lane_s64, int64x1_t] + - [_lane_u64, uint64x1_t] + compose: + - FnCall: [static_assert!, ['N == 0']] + - Identifier: [a, Symbol] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmov, 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [_laneq_s64, int64x2_t, int64x1_t, '::'] + - [_laneq_u64, uint64x2_t, uint64x1_t, '::'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 1]] + - FnCall: + - "transmute{type[3]}" + - - FnCall: [simd_extract!, [a, 'N as u32']] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ext, 'N = 7']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }'] + - [int16x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }'] + - [uint8x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }'] + - [uint16x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }'] + - [poly8x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }'] + - [poly16x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }'] + compose: + - Identifier: ["{type[1]}", Symbol] + - Identifier: ["{type[2]}", Symbol] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 15']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ext, 'N = 15']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x16_t, ' static_assert_uimm_bits!(N, 4);', 'unsafe { match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), } }'] + - [uint8x16_t, ' static_assert_uimm_bits!(N, 4);', 'unsafe { match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), } }'] + - [poly8x16_t, ' static_assert_uimm_bits!(N, 4);', 'unsafe { match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), } }'] + compose: + - Identifier: ["{type[1]}", Symbol] + - Identifier: ["{type[2]}", Symbol] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ext, 'N = 3']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x4_t, 'static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }'] + - [int32x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }'] + - [uint16x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }'] + - [uint32x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }'] + - [poly16x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }'] + - [float32x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }'] + compose: + - Identifier: ["{type[1]}", Symbol] + - Identifier: ["{type[2]}", Symbol] + + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ext, 'N = 3']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [float16x4_t, ' static_assert_uimm_bits!(N, 2); unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }'] + compose: + - Identifier: ["{type[1]}", Symbol] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ext, 'N = 7']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: safe + types: + - [float16x8_t, ' static_assert_uimm_bits!(N, 3); unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }'] + compose: + - Identifier: ["{type[1]}", Symbol] + + + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ext, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }'] + - [uint32x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }'] + - [float32x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }'] + compose: + - Identifier: ["{type[1]}", Symbol] + - Identifier: ["{type[2]}", Symbol] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmov, 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ext, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int64x2_t, 'static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }'] + - [uint64x2_t, 'static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }'] + compose: + - Identifier: ["{type[1]}", Symbol] + - Identifier: ["{type[2]}", Symbol] + + - name: "vmla{neon_type[0].no}" + doc: "Multiply-add to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmla{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mla]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, ".i8"] + - [int8x16_t, ".i8"] + - [uint8x8_t, ".i8"] + - [uint8x16_t, ".i8"] + - [int16x4_t, ".i16"] + - [int16x8_t, ".i16"] + - [uint16x4_t, ".i16"] + - [uint16x8_t, ".i16"] + - [int32x2_t, ".i32"] + - [int32x4_t, ".i32"] + - [uint32x2_t, ".i32"] + - [uint32x4_t, ".i32"] + compose: + - FnCall: [simd_add, [a, {FnCall: [simd_mul, [b, c]]}]] + + - name: "vmla{neon_type.no}" + doc: "Floating-point multiply-add to accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmla.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - FnCall: [simd_add, [a, {FnCall: [simd_mul, [b, c]]}]] + + - name: "vmlal{neon_type[1].no}" + doc: "Signed multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlal.{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smlal]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x8_t, int8x8_t, "s8"] + - [int32x4_t, int16x4_t, "s16"] + - [int64x2_t, int32x2_t, "s32"] + compose: + - FnCall: [simd_add, [a, {FnCall: ["vmull_{type[2]}", [b, c]]}]] + + - name: "vmlal_n_{type[4]}" + doc: "Vector widening multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlal.{type[4]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smlal]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x4_t, int16x4_t, "i16", int32x4_t, 's16'] + - [int64x2_t, int32x2_t, "i32", int64x2_t, 's32'] + compose: + - FnCall: + - "vmlal{neon_type[1].noq}" + - - a + - b + - FnCall: ["vdup_n_{neon_type[1]}", [c]] + + - name: "vmlal_n_{type[2]}" + doc: "Vector widening multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlal.{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umlal]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint32x4_t, uint16x4_t, "u16", uint32x4_t] + - [uint64x2_t, uint32x2_t, "u32", uint64x2_t] + compose: + - FnCall: + - "vmlal{neon_type[1].noq}" + - - a + - b + - FnCall: ["vdup_n_{neon_type[1]}", [c]] + + - name: "vmlal_lane{neon_type[2].no}" + doc: "Vector widening multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlal.{neon_type[1]}"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smlal, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int64x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [int64x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vmlal_{neon_type[1]}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + + - name: "vmlal_lane{neon_type[2].no}" + doc: "Vector widening multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlal.{neon_type[1]}"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umlal, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [uint32x4_t, uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x4_t, uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint64x2_t, uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]'] + - [uint64x2_t, uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]] + - FnCall: + - "vmlal_{neon_type[1]}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, '{type[5]}']] + + - name: "vmlal_{neon_type[1]}" + doc: "Unsigned multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlal.{neon_type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umlal]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint16x8_t, uint8x8_t] + - [uint32x4_t, uint16x4_t] + - [uint64x2_t, uint32x2_t] + compose: + - FnCall: + - simd_add + - - a + - FnCall: ["vmull_{neon_type[1]}", [b, c]] + + - name: "vmls{neon_type[0].no}" + doc: "Multiply-subtract from accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmls{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mls]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, '.i8'] + - [int8x16_t, '.i8'] + - [uint8x8_t, '.i8'] + - [uint8x16_t, '.i8'] + - [int16x4_t, ".i16"] + - [int16x8_t, ".i16"] + - [uint16x4_t, ".i16"] + - [uint16x8_t, ".i16"] + - [int32x2_t, ".i32"] + - [int32x4_t, ".i32"] + - [uint32x2_t, ".i32"] + - [uint32x4_t, ".i32"] + compose: + - FnCall: + - simd_sub + - - a + - FnCall: [simd_mul, [b, c]] + + - name: "vmlsl_{neon_type[1]}" + doc: "Signed multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smlsl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x8_t, int8x8_t] + - [int32x4_t, int16x4_t] + - [int64x2_t, int32x2_t] + compose: + - FnCall: [simd_sub, [a, {FnCall: ["vmull_{neon_type[1]}", [b, c]]}]] + + - name: "vmlsl_n_{neon_type[1]}" + doc: "Vector widening multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smlsl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x4_t, int16x4_t, "i16"] + - [int64x2_t, int32x2_t, "i32"] + compose: + - FnCall: ["vmlsl_{neon_type[1]}", [a, b, {FnCall: ["vdup_n_{neon_type[1]}", [c]]}]] + + - name: "vmlsl_n_{neon_type[1]}" + doc: "Vector widening multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umlsl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint32x4_t, uint16x4_t, "u16"] + - [uint64x2_t, uint32x2_t, "u32"] + compose: + - FnCall: ["vmlsl_{neon_type[1]}", [a, b, {FnCall: ["vdup_n_{neon_type[1]}", [c]]}]] + + - name: "vmlsl_lane{neon_type[2].no}" + doc: "Vector widening multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smlsl, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - "vmlsl_{neon_type[1]}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vmlsl_lane{neon_type[2].no}" + doc: "Vector widening multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smlsl, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [int64x2_t, int32x2_t, int32x2_t, '[LANE as u32, LANE as u32]', '1'] + - [int64x2_t, int32x2_t, int32x4_t, '[LANE as u32, LANE as u32]', '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]] + - FnCall: + - "vmlsl_{neon_type[1]}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + + - name: "vmlsl_lane{neon_type[2].no}" + doc: "Vector widening multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umlsl, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [uint32x4_t, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x4_t, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint64x2_t, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [uint64x2_t, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vmlsl_{neon_type[1]}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vmlsl_{neon_type[1]}" + doc: "Unsigned multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umlsl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint16x8_t, uint8x8_t] + - [uint32x4_t, uint16x4_t] + - [uint64x2_t, uint32x2_t] + compose: + - FnCall: [simd_sub, [a, {FnCall: ["vmull_{neon_type[1]}", [b, c]]}]] + + - name: "vneg{neon_type[0].no}" + doc: Negate + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vneg.{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [neg]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, 's8'] + - [int8x16_t, 's8'] + - [int16x4_t, 's16'] + - [int16x8_t, 's16'] + - [int32x2_t, 's32'] + - [int32x4_t, 's32'] + compose: + - FnCall: [simd_neg, [a]] + + - name: "vneg{neon_type[0].no}" + doc: Negate + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vneg.{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fneg]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, 'f32'] + - [float32x4_t, 'f32'] + compose: + - FnCall: [simd_neg, [a]] + + - name: "vneg{neon_type[0].no}" + doc: Negate + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vneg.{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fneg]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, 'f16'] + - [float16x8_t, 'f16'] + compose: + - FnCall: [simd_neg, [a]] + + - name: "vqneg{neon_type[0].no}" + doc: Signed saturating negate + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqneg.{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqneg]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, 's8', 'i8'] + - [int8x16_t, 's8', 'i8'] + - [int16x4_t, 's16', 'i16'] + - [int16x8_t, 's16', 'i16'] + - [int32x2_t, 's32', 'i32'] + - [int32x4_t, 's32', 'i32'] + compose: + - LLVMLink: + name: "sqneg.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.sqneg.v{neon_type[0].lane}{type[2]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vqneg.v{neon_type[0].lane}{type[2]}" + arch: arm + + - name: "vqsub{neon_type[0].no}" + doc: Saturating subtract + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqsub.{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uqsub]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, u8, i8] + - [uint8x16_t, u8, i8] + - [uint16x4_t, u16, i16] + - [uint16x8_t, u16, i16] + - [uint32x2_t, u32, i32] + - [uint32x4_t, u32, i32] + - [uint64x1_t, u64, i64] + - [uint64x2_t, u64, i64] + compose: + - LLVMLink: + name: "uqsub.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.uqsub.v{neon_type[0].lane}{type[2]}" + arch: aarch64,arm64ec + - link: "llvm.usub.sat.v{neon_type[0].lane}{type[2]}" + arch: arm + + - name: "vqsub{neon_type[0].no}" + doc: Saturating subtract + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqsub.{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqsub]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, s8, i8] + - [int8x16_t, s8, i8] + - [int16x4_t, s16, i16] + - [int16x8_t, s16, i16] + - [int32x2_t, s32, i32] + - [int32x4_t, s32, i32] + - [int64x1_t, s64, i64] + - [int64x2_t, s64, i64] + compose: + - LLVMLink: + name: "sqsub.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.sqsub.v{neon_type[0].lane}{type[2]}" + arch: aarch64,arm64ec + - link: "llvm.ssub.sat.v{neon_type[0].lane}{type[2]}" + arch: arm + + - name: "vhadd{neon_type.no}" + doc: Halving add + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - 'target_arch = "arm"' + - FnCall: + - assert_instr + - - '"vhadd.{neon_type}"' + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - FnCall: + - any + - - 'target_arch = "aarch64"' + - 'target_arch = "arm64ec"' + - FnCall: + - assert_instr + - - uhadd + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + compose: + - LLVMLink: + name: "uhadd.{neon_type}" + links: + - link: "llvm.aarch64.neon.uhadd.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vhaddu.{neon_type}" + arch: arm + + - name: "vhadd{neon_type.no}" + doc: Halving add + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - 'target_arch = "arm"' + - FnCall: + - assert_instr + - - '"vhadd.{neon_type}"' + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - FnCall: + - any + - - 'target_arch = "aarch64"' + - 'target_arch = "arm64ec"' + - FnCall: + - assert_instr + - - shadd + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + compose: + - LLVMLink: + name: "shadd.{neon_type}" + links: + - link: "llvm.aarch64.neon.shadd.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vhadds.{neon_type}" + arch: arm + + - name: "vrhadd{neon_type.no}" + doc: Rounding halving add + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vrhadd.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [srhadd]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + compose: + - LLVMLink: + name: "vrhadd.{neon_type}" + links: + - link: "llvm.aarch64.neon.srhadd.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vrhadds.{neon_type}" + arch: arm + + - name: "vrhadd{neon_type.no}" + doc: Rounding halving add + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vrhadd.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [urhadd]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + compose: + - LLVMLink: + name: "vrhaddu.{neon_type}" + links: + - link: "llvm.aarch64.neon.urhadd.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vrhaddu.{neon_type}" + arch: arm + + - name: "vrndn{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to even" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrintn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frintn]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "llvm.frinn.{neon_type}" + links: + - link: "llvm.roundeven.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vrintn.{neon_type}" + arch: arm + + - name: "vrndn{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to even" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrintn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frintn]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.frinn.{neon_type}" + links: + - link: "llvm.roundeven.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vrintn.{neon_type}" + arch: arm + + - name: "vqadd{neon_type.no}" + doc: Saturating add + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqadd.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uqadd]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + - uint64x1_t + - uint64x2_t + compose: + - LLVMLink: + name: "uqadd.{neon_type}" + links: + - link: "llvm.aarch64.neon.uqadd.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.uadd.sat.{neon_type}" + arch: arm + + - name: "vqadd{neon_type.no}" + doc: Saturating add + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqadd.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqadd]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + - int64x1_t + - int64x2_t + compose: + - LLVMLink: + name: "sqadd.{neon_type}" + links: + - link: "llvm.aarch64.neon.sqadd.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.sadd.sat.{neon_type}" + arch: arm + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const f32", float32x2x2_t] + - ["*const f32", float32x4x2_t] + - ["*const f32", float32x2x3_t] + - ["*const f32", float32x4x3_t] + - ["*const f32", float32x2x4_t] + - ["*const f32", float32x4x4_t] + compose: + - LLVMLink: + name: "vld1x{neon_type[1].tuple}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0" + arch: arm + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x2_t] + - ["*const i8", int8x16x2_t] + - ["*const i8", int8x8x3_t] + - ["*const i8", int8x16x3_t] + - ["*const i8", int8x8x4_t] + - ["*const i8", int8x16x4_t] + - ["*const i16", int16x4x2_t] + - ["*const i16", int16x8x2_t] + - ["*const i16", int16x4x3_t] + - ["*const i16", int16x8x3_t] + - ["*const i16", int16x4x4_t] + - ["*const i16", int16x8x4_t] + - ["*const i32", int32x2x2_t] + - ["*const i32", int32x4x2_t] + - ["*const i32", int32x2x3_t] + - ["*const i32", int32x4x3_t] + - ["*const i32", int32x2x4_t] + - ["*const i32", int32x4x4_t] + - ["*const i64", int64x1x2_t] + - ["*const i64", int64x1x3_t] + - ["*const i64", int64x1x4_t] + - ["*const i64", int64x2x2_t] + - ["*const i64", int64x2x3_t] + - ["*const i64", int64x2x4_t] + compose: + - LLVMLink: + name: "ld1x{neon_type[1].tuple}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0" + arch: arm + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const u8", uint8x8x2_t, int8x8x2_t] + - ["*const u8", uint8x16x2_t, int8x16x2_t] + - ["*const u8", uint8x8x3_t, int8x8x3_t] + - ["*const u8", uint8x16x3_t, int8x16x3_t] + - ["*const u8", uint8x8x4_t, int8x8x4_t] + - ["*const u8", uint8x16x4_t, int8x16x4_t] + - ["*const u16", uint16x4x2_t, int16x4x2_t] + - ["*const u16", uint16x8x2_t, int16x8x2_t] + - ["*const u16", uint16x4x3_t, int16x4x3_t] + - ["*const u16", uint16x8x3_t, int16x8x3_t] + - ["*const u16", uint16x4x4_t, int16x4x4_t] + - ["*const u16", uint16x8x4_t, int16x8x4_t] + - ["*const u32", uint32x2x2_t, int32x2x2_t] + - ["*const u32", uint32x4x2_t, int32x4x2_t] + - ["*const u32", uint32x2x3_t, int32x2x3_t] + - ["*const u32", uint32x4x3_t, int32x4x3_t] + - ["*const u32", uint32x2x4_t, int32x2x4_t] + - ["*const u32", uint32x4x4_t, int32x4x4_t] + - ["*const u64", uint64x1x2_t, int64x1x2_t] + - ["*const u64", uint64x1x3_t, int64x1x3_t] + - ["*const u64", uint64x1x4_t, int64x1x4_t] + - ["*const u64", uint64x2x2_t, int64x2x2_t] + - ["*const u64", uint64x2x3_t, int64x2x3_t] + - ["*const u64", uint64x2x4_t, int64x2x4_t] + - ["*const p8", poly8x8x2_t, int8x8x2_t] + - ["*const p8", poly8x8x3_t, int8x8x3_t] + - ["*const p8", poly8x8x4_t, int8x8x4_t] + - ["*const p8", poly8x16x2_t, int8x16x2_t] + - ["*const p8", poly8x16x3_t, int8x16x3_t] + - ["*const p8", poly8x16x4_t, int8x16x4_t] + - ["*const p16", poly16x4x2_t, int16x4x2_t] + - ["*const p16", poly16x4x3_t, int16x4x3_t] + - ["*const p16", poly16x4x4_t, int16x4x4_t] + - ["*const p16", poly16x8x2_t, int16x8x2_t] + - ["*const p16", poly16x8x3_t, int16x8x3_t] + - ["*const p16", poly16x8x4_t, int16x8x4_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld1{neon_type[2].no}" + - - FnCall: + - transmute + - - a + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x1x3_t, int64x1x3_t] + - ["*const p64", poly64x1x4_t, int64x1x4_t] + - ["*const p64", poly64x2x2_t, int64x2x2_t] + - ["*const p64", poly64x2x3_t, int64x2x3_t] + - ["*const p64", poly64x2x4_t, int64x2x4_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld1{neon_type[2].no}" + - - FnCall: + - transmute + - - a + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld1{neon_type[2].no}" + - - FnCall: + - transmute + - - a + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t] + - ["*const f16", float16x8x2_t] + - ["*const f16", float16x4x3_t] + - ["*const f16", float16x8x3_t] + - ["*const f16", float16x4x4_t] + - ["*const f16", float16x8x4_t] + compose: + - LLVMLink: + name: "vld1x{neon_type[1].tuple}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0" + arch: arm + + - name: "vld1{type[2]}_{neon_type[1]}" + doc: "Load one single-element structure to one lane of one register" + arguments: ["ptr: {type[0]}", "src: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4_t, '_lane', '2'] + - ["*const f16", float16x8_t, 'q_lane', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: [simd_insert!, [src, "LANE as u32", "*ptr"]] + + - name: "vld1{type[2]}_{neon_type[1]}" + doc: "Load one single-element structure and replicate to all lanes of one register" + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vld1"]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4_t, '_dup', 'f16x4', "[0, 0, 0, 0]"] + - ["*const f16", float16x8_t, 'q_dup', 'f16x8', "[0, 0, 0, 0, 0, 0, 0, 0]"] + compose: + - Let: [x, "{neon_type[1]}", "vld1{neon_type[1].lane_nox}::<0>(ptr, transmute({type[3]}::splat(0.0)))"] + - FnCall: [simd_shuffle!, [x, x, "{type[4]}"]] + + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vld2] + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x2_t, i8] + - ["*const i16", int16x4x2_t, i16] + - ["*const i32", int32x2x2_t, i32] + - ["*const i8", int8x16x2_t, i8] + - ["*const i16", int16x8x2_t, i16] + - ["*const i32", int32x4x2_t, i32] + - ["*const f32", float32x2x2_t, f32] + - ["*const f32", float32x4x2_t, f32] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const i8" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}" + arch: arm + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as *const i8" + - "{neon_type[1].base_byte_size}" + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ["*const i64", int64x1x2_t, i64] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const i8" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}" + arch: arm + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as *const i8" + - "{neon_type[1].base_byte_size}" + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - *neon-stable + assert_instr: [ld2] + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x2_t, i8, int8x8_t] + - ["*const i16", int16x4x2_t, i16, int16x4_t] + - ["*const i32", int32x2x2_t, i32, int32x2_t] + - ["*const i8", int8x16x2_t, i8, int8x16_t] + - ["*const i16", int16x8x2_t, i16, int16x8_t] + - ["*const i32", int32x4x2_t, i32, int32x4_t] + - ["*const f32", float32x2x2_t, f32, float32x2_t] + - ["*const f32", float32x4x2_t, f32, float32x4_t] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const {neon_type[3]}" + links: + - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - *neon-stable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ["*const i64", int64x1x2_t, i64, int64x1_t] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const {neon_type[3]}" + links: + - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const u8", uint8x8x2_t, int8x8x2_t] + - ["*const u16", uint16x4x2_t, int16x4x2_t] + - ["*const u32", uint32x2x2_t, int32x2x2_t] + - ["*const u8", uint8x16x2_t, int8x16x2_t] + - ["*const u16", uint16x8x2_t, int16x8x2_t] + - ["*const u32", uint32x4x2_t, int32x4x2_t] + - ["*const p8", poly8x8x2_t, int8x8x2_t] + - ["*const p16", poly16x4x2_t, int16x4x2_t] + - ["*const p8", poly8x16x2_t, int8x16x2_t] + - ["*const p16", poly16x8x2_t, int16x8x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: + - cfg_attr + - - test + - FnCall: + - assert_instr + - - vld2 + - "LANE = 0" + - FnCall: + - rustc_legacy_const_generics + - - "2" + - *neon-arm-unstable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x2_t, i8, int8x8_t, "3"] + - ["*const i16", int16x4x2_t, i16, int16x4_t, "2"] + - ["*const i32", int32x2x2_t, i32, int32x2_t, "1"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const i8" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld2_lane{neon_type[1].nox}" + - - "a as _" + - "b.0" + - "b.1" + - "LANE" + - "{neon_type[1].base_byte_size}" + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const u8", uint8x8x2_t, int8x8_t, "3"] + - ["*const u16", uint16x4x2_t, int16x4_t, "2"] + - ["*const u32", uint32x2x2_t, int32x2_t, "1"] + - ["*const u16", uint16x8x2_t, int16x8_t, "3"] + - ["*const u32", uint32x4x2_t, int32x4_t, "2"] + - ["*const p8", poly8x8x2_t, int8x8_t, "3"] + - ["*const p16", poly16x4x2_t, int16x4_t, "2"] + - ["*const p16", poly16x8x2_t, int16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[3]}" + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: + - cfg_attr + - - test + - FnCall: + - assert_instr + - - ld2 + - "LANE = 0" + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x2_t, i8, int8x8_t, "3"] + - ["*const i16", int16x4x2_t, i16, int16x4_t, "2"] + - ["*const i32", int32x2x2_t, i32, int32x2_t, "1"] + - ["*const i16", int16x8x2_t, i16, int16x8_t, "3"] + - ["*const i32", int32x4x2_t, i32, int32x4_t, "2"] + - ["*const f32", float32x2x2_t, f32, float32x2_t, "2"] + - ["*const f32", float32x4x2_t, f32, float32x4_t, "2"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i64" + - "ptr: *const i8" + links: + - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "LANE as i64" + - "a as _" + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: + - cfg_attr + - - test + - FnCall: + - assert_instr + - - vld2 + - "LANE = 0" + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-arm-unstable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const i16", int16x8x2_t, i16, int16x8_t, "3"] + - ["*const i32", int32x4x2_t, i32, int32x4_t, "2"] + - ["*const f32", float32x2x2_t, f32, float32x2_t, "1"] + - ["*const f32", float32x4x2_t, f32, float32x4_t, "2"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const i8" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld2{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "LANE" + - "{neon_type[1].base_byte_size}" + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ["*const i64", int64x1x2_t, i64] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: *const i8" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as *const i8" + - "{neon_type[1].base_byte_size}" + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - *neon-stable + assert_instr: [ld2r] + safety: + unsafe: [neon] + types: + - ["*const i64", int64x1x2_t, i64] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: *const i64" + links: + - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as _" + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vld2] + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x2_t, i8] + - ["*const i16", int16x4x2_t, i16] + - ["*const i32", int32x2x2_t, i32] + - ["*const i8", int8x16x2_t, i8] + - ["*const i16", int16x8x2_t, i16] + - ["*const i32", int32x4x2_t, i32] + - ["*const f32", float32x2x2_t, f32] + - ["*const f32", float32x4x2_t, f32] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: *const i8" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as *const i8" + - "{neon_type[1].base_byte_size}" + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const u8", uint8x8x2_t, int8x8x2_t] + - ["*const u16", uint16x4x2_t, int16x4x2_t] + - ["*const u32", uint32x2x2_t, int32x2x2_t] + - ["*const u8", uint8x16x2_t, int8x16x2_t] + - ["*const u16", uint16x8x2_t, int16x8x2_t] + - ["*const u32", uint32x4x2_t, int32x4x2_t] + - ["*const p8", poly8x8x2_t, int8x8x2_t] + - ["*const p16", poly16x4x2_t, int16x4x2_t] + - ["*const p8", poly8x16x2_t, int8x16x2_t] + - ["*const p16", poly16x8x2_t, int16x8x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].dup_nox}" + - - FnCall: + - transmute + - - a + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].dup_nox}" + - - FnCall: + - transmute + - - a + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld2{neon_type[2].dup_nox}" + - - FnCall: + - transmute + - - a + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: + - cfg + - - FnCall: + - not + - - 'target_arch = "arm"' + - *neon-stable + assert_instr: [ld2r] + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x2_t, i8] + - ["*const i16", int16x4x2_t, i16] + - ["*const i32", int32x2x2_t, i32] + - ["*const i8", int8x16x2_t, i8] + - ["*const i16", int16x8x2_t, i16] + - ["*const i32", int32x4x2_t, i32] + - ["*const f32", float32x2x2_t, f32] + - ["*const f32", float32x4x2_t, f32] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as _" + + - name: "vld2{neon_type[1].nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + - "2" + + - name: "vld2{neon_type[1].nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - *neon-fp16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as _" + - "2" + + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as _" + + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld2', 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x2_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const f16" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld2{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "LANE" + - "2" + + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x2_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i64" + - "ptr: *const f16" + links: + - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "LANE as i64" + - "a as _" + + + - name: "vld3{neon_type[1].nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld3{neon_type[1].nox}" + - - "a as _" + - "2" + + - name: "vld3{neon_type[1].nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld3{neon_type[1].nox}" + - - "a as _" + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld3{neon_type[1].dup_nox}" + - - "a as _" + - "2" + + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld3r.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld3{neon_type[1].dup_nox}" + - - "a as _" + + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld3', 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x3_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "ptr: *const f16" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld3{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "b.2" + - "LANE" + - "2" + + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x3_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "n: i64" + - "ptr: *const f16" + links: + - link: "llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld3{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "b.2" + - "LANE as i64" + - "a as _" + + - name: "vld3{neon_type[1].lane_nox}" + doc: "Load multiple 3-element structures to two registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const i8', int8x8x3_t, int8x8_t, i8, '3'] + - ['*const i16', int16x8x3_t, int16x8_t, i16, '4'] + - ['*const i32', int32x4x3_t, int32x4_t, i32, '2'] + - ['*const i16', int16x4x3_t, int16x4_t, i16, '2'] + - ['*const i32', int32x2x3_t, int32x2_t, i32, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] + - LLVMLink: + name: 'ld3lane.{neon_type[2]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i64' + - 'ptr: *const i8' + links: + - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-stable + - *target-not-arm + assert_instr: [ld3] + safety: + unsafe: [neon] + types: + - ['*const i8', int8x8x3_t, '*const int8x8_t', i8] + - ['*const i16', int16x4x3_t, '*const int16x4_t', i16] + - ['*const i32', int32x2x3_t, '*const int32x2_t', i32] + - ['*const i8', int8x16x3_t, '*const int8x16_t', i8] + - ['*const i16', int16x8x3_t, '*const int16x8_t', i16] + - ['*const i32', int32x4x3_t, '*const int32x4_t', i32] + - ['*const f32', float32x2x3_t, '*const float32x2_t', f32] + - ['*const f32', float32x4x3_t, '*const float32x4_t', f32] + compose: + - LLVMLink: + name: 'vld3{neon_type[1].nox}' + arguments: + - 'ptr: {type[2]}' + links: + - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-stable + - *target-not-arm + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ['*const i64', int64x1x3_t, '*const int64x1_t', i64] + compose: + - LLVMLink: + name: "vld3{neon_type[1].nox}" + arguments: + - 'ptr: {type[2]}' + links: + - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [vld3] + safety: + unsafe: [neon] + types: + - ['*const i8', int8x8x3_t, i8] + - ['*const i16', int16x4x3_t, i16] + - ['*const i32', int32x2x3_t, i32] + - ['*const i8', int8x16x3_t, i8] + - ['*const i16', int16x8x3_t, i16] + - ['*const i32', int32x4x3_t, i32] + - ['*const f32', float32x2x3_t, f32] + - ['*const f32', float32x4x3_t, f32] + compose: + - LLVMLink: + name: 'vld3{neon_type[1].nox}' + arguments: + - 'ptr: *const i8' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0' + arch: arm + - FnCall: ['_vld3{neon_type[1].nox}', ['a as *const i8', '{neon_type[1].base_byte_size}']] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ['*const i64', int64x1x3_t, i64] + compose: + - LLVMLink: + name: 'vld3{neon_type[1].nox}' + arguments: + - 'ptr: *const i8' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0' + arch: arm + - FnCall: ['_vld3{neon_type[1].nox}', ['a as *const i8', '{neon_type[1].base_byte_size}']] + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ['*const f32', float32x4x3_t, float32x4_t, f32, '2'] + - ['*const f32', float32x2x3_t, float32x2_t, f32, '1'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[4]}']] + - LLVMLink: + name: 'vld3{neon_type[1].lane_nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i64' + - 'ptr: *const i8' + links: + - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vld3{neon_type[2].lane_nox}" + doc: "Load multiple 3-element structures to three registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-arm-unstable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ['*const f32', float32x2x3_t, float32x2_t, f32, '1', '4'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[4]}']] + - LLVMLink: + name: 'vld3{neon_type[1].lane_nox}' + arguments: + - 'ptr: *const i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0' + arch: arm + - FnCall: ['_vld3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', '{type[5]}']] + + - name: "vld3{neon_type[2].lane_nox}" + doc: "Load multiple 3-element structures to two registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-arm-unstable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ['*const i8', int8x8x3_t, int8x8_t, i8, '3', '1'] + - ['*const i16', int16x4x3_t, int16x4_t, i16, '2', '2'] + - ['*const i32', int32x2x3_t, int32x2_t, i32, '1', '4'] + - ['*const i16', int16x8x3_t, int16x8_t, i16, '3', '2'] + - ['*const i32', int32x4x3_t, int32x4_t, i32, '2', '4'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[4]}']] + - LLVMLink: + name: 'vld3{neon_type[1].lane_nox}' + arguments: + - 'ptr: *const i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0' + arch: arm + - FnCall: ['_vld3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', '{type[5]}']] + + - name: "vld3{neon_type[2].lane_nox}" + doc: "Load multiple 3-element structures to three registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-arm-unstable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ['*const f32', float32x4x3_t, float32x4_t, f32, '2', '4'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[4]}']] + - LLVMLink: + name: 'vld3{neon_type[1].lane_nox}' + arguments: + - 'ptr: *const i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0' + arch: arm + - FnCall: ['_vld3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', '{type[5]}']] + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ['*const u8', uint8x8x3_t, int8x8x3_t, '3'] + - ['*const u16', uint16x4x3_t, int16x4x3_t, '2'] + - ['*const u32', uint32x2x3_t, int32x2x3_t, '1'] + - ['*const p8', poly8x8x3_t, int8x8x3_t, '3'] + - ['*const u16', uint16x8x3_t, int16x8x3_t, '3'] + - ['*const p16', poly16x4x3_t, int16x4x3_t, '2'] + - ['*const p16', poly16x8x3_t, int16x8x3_t, '3'] + - ['*const u32', uint32x4x3_t, int32x4x3_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['*const u8', uint8x8x3_t, int8x8x3_t] + - ['*const u8', uint8x16x3_t, int8x16x3_t] + - ['*const u16', uint16x4x3_t, int16x4x3_t] + - ['*const u32', uint32x2x3_t, int32x2x3_t] + - ['*const u16', uint16x8x3_t, int16x8x3_t] + - ['*const u32', uint32x4x3_t, int32x4x3_t] + - ['*const p8', poly8x8x3_t, int8x8x3_t] + - ['*const p8', poly8x16x3_t, int8x16x3_t] + - ['*const p16', poly16x4x3_t, int16x4x3_t] + - ['*const p16', poly16x8x3_t, int16x8x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].nox}' + - - FnCall: [transmute, [a]] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['*const u64', uint64x1x3_t, int64x1x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].nox}' + - - FnCall: [transmute, [a]] + + - name: "vld3{neon_type[1].nox}" + doc: Load multiple 3-element structures to three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['*const p64', poly64x1x3_t, int64x1x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].nox}' + - - FnCall: + - transmute + - - a + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*target-not-arm, *neon-stable] + assert_instr: [ld3r] + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x3_t, i8] + - ["*const i16", int16x4x3_t, i16] + - ["*const i32", int32x2x3_t, i32] + - ["*const i32", int32x4x3_t, i32] + - ["*const i16", int16x8x3_t, i16] + - ["*const i8", int8x16x3_t, i8] + - ["*const i64", int64x1x3_t, i64] + - ["*const f32", float32x4x3_t, f32] + - ["*const f32", float32x2x3_t, f32] + compose: + - LLVMLink: + name: 'ld3r{neon_type[1].dup_nox}' + arguments: + - 'ptr: {type[0]}' + links: + - link: 'llvm.aarch64.neon.ld3r.v{neon_type[1].lane}{type[2]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as _']] + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*enable-v7, *target-is-arm, *neon-arm-unstable] + assert_instr: [vld3] + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x3_t, i8, '1'] + - ["*const i16", int16x4x3_t, i16, '2'] + - ["*const i32", int32x2x3_t, i32, '4'] + - ["*const i8", int8x16x3_t, i8, '1'] + - ["*const i16", int16x8x3_t, i16, '2'] + - ["*const i32", int32x4x3_t, i32, '4'] + - ["*const f32", float32x4x3_t, f32, '4'] + - ["*const f32", float32x2x3_t, f32, '4'] + compose: + - LLVMLink: + name: 'vld3{neon_type[1].dup_nox}' + arguments: + - 'ptr: *const i8' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0' + arch: arm + - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as *const i8', '{type[3]}']] + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['*const u8', uint8x8x3_t, int8x8x3_t] + - ['*const u16', uint16x4x3_t, int16x4x3_t] + - ['*const u32', uint32x2x3_t, int32x2x3_t] + - ['*const u8', uint8x16x3_t, int8x16x3_t] + - ['*const u16', uint16x8x3_t, int16x8x3_t] + - ['*const u32', uint32x4x3_t, int32x4x3_t] + - ['*const p8', poly8x8x3_t, int8x8x3_t] + - ['*const p16', poly16x4x3_t, int16x4x3_t] + - ['*const p8', poly8x16x3_t, int8x16x3_t] + - ['*const p16', poly16x8x3_t, int16x8x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].dup_nox}' + - - FnCall: + - transmute + - - a + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*target-is-arm, *enable-v7, *neon-arm-unstable] + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ["*const i64", int64x1x3_t, i64, '8'] + compose: + - LLVMLink: + name: 'vld3{neon_type[1].dup_nox}' + arguments: + - 'ptr: *const i8' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0' + arch: arm + - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as *const i8', '{type[3]}']] + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x1x3_t, int64x1x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].dup_nox}' + - - FnCall: + - transmute + - - a + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of three registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x1x3_t, int64x1x3_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld3{neon_type[2].dup_nox}' + - - FnCall: + - transmute + - - a + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - *neon-stable + assert_instr: [ld4] + safety: + unsafe: [neon] + types: + - ['*const i8', int8x8x4_t, i8, '*const int8x8_t'] + - ['*const i32', int32x4x4_t, i32, '*const int32x4_t'] + - ['*const i16', int16x4x4_t, i16, '*const int16x4_t'] + - ['*const i32', int32x2x4_t, i32, '*const int32x2_t'] + - ['*const i8', int8x16x4_t, i8, '*const int8x16_t'] + - ['*const i16', int16x8x4_t, i16, '*const int16x8_t'] + - ['*const f32', float32x2x4_t, f32, '*const float32x2_t'] + - ['*const f32', float32x4x4_t, f32, '*const float32x4_t'] + compose: + - LLVMLink: + name: 'vld4{neon_type[1].nox}' + arguments: + - 'ptr: {type[3]}' + links: + - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']] + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: [*target-not-arm, *neon-stable] + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ['*const i64', int64x1x4_t, i64, '*const int64x1_t'] + compose: + - LLVMLink: + name: 'vld4{neon_type[1].nox}' + arguments: + - 'ptr: {type[3]}' + links: + - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']] + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-stable + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ['*const i8', int8x8x4_t, int8x8_t, i8, '3'] + - ['*const i16', int16x4x4_t, int16x4_t, i16, '2'] + - ['*const i16', int16x8x4_t, int16x8_t, i16, '3'] + - ['*const i32', int32x2x4_t, int32x2_t, i32, '1'] + - ['*const i32', int32x4x4_t, int32x4_t, i32, '2'] + - ['*const f32', float32x2x4_t, float32x2_t, f32, '1'] + - ['*const f32', float32x4x4_t, float32x4_t, f32, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] + - LLVMLink: + name: 'ld4lane.{neon_type[2]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'n: i64' + - 'ptr: *const i8' + links: + - link: 'llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[3]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vld4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vld4] + safety: + unsafe: [neon] + types: + - ['*const i8', int8x8x4_t, i8, '1'] + - ['*const i16', int16x4x4_t, i16, '2'] + - ['*const i32', int32x2x4_t, i32, '4'] + - ['*const i8', int8x16x4_t, i8, '1'] + - ['*const i16', int16x8x4_t, i16, '2'] + - ['*const i32', int32x4x4_t, i32, '4'] + - ['*const f32', float32x4x4_t, f32, '4'] + - ['*const f32', float32x2x4_t, f32, '4'] + compose: + - LLVMLink: + name: 'vld4{neon_type[1].nox}' + arguments: + - 'ptr: *const i8' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0' + arch: arm + - FnCall: ['_vld4{neon_type[1].nox}', ['a as *const i8', '{type[3]}']] + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ['*const i64', int64x1x4_t, i64, '8'] + compose: + - LLVMLink: + name: 'vld4{neon_type[1].nox}' + arguments: + - 'ptr: *const i8' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0' + arch: arm + - FnCall: ['_vld4{neon_type[1].nox}', ['a as *const i8', '{type[3]}']] + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['*const u8', uint8x8x4_t, int8x8x4_t] + - ['*const u16', uint16x4x4_t, int16x4x4_t] + - ['*const u32', uint32x2x4_t, int32x2x4_t] + - ['*const u8', uint8x16x4_t, int8x16x4_t] + - ['*const u16', uint16x8x4_t, int16x8x4_t] + - ['*const u32', uint32x4x4_t, int32x4x4_t] + - ['*const p8', poly8x8x4_t, int8x8x4_t] + - ['*const p16', poly16x4x4_t, int16x4x4_t] + - ['*const p8', poly8x16x4_t, int8x16x4_t] + - ['*const p16', poly16x8x4_t, int16x8x4_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].nox}' + - - FnCall: + - transmute + - - a + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['*const u64', uint64x1x4_t, int64x1x4_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].nox}' + - - FnCall: + - transmute + - - a + + - name: "vld4{neon_type[1].nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: + - target_feature + - - 'enable = "neon,aes"' + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['*const p64', poly64x1x4_t, int64x1x4_t] + compose: + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].nox}' + - - FnCall: + - transmute + - - a + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-arm-unstable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - ['*const i8', int8x8x4_t, int8x8_t, i8, '1', '3'] + - ['*const i16', int16x4x4_t, int16x4_t, i16, '2', '2'] + - ['*const i32', int32x2x4_t, int32x2_t, i32, '4', '1'] + - ['*const i16', int16x8x4_t, int16x8_t, i16, '2', '3'] + - ['*const i32', int32x4x4_t, int32x4_t, i32, '4', '2'] + - ['*const f32', float32x2x4_t, float32x2_t, f32, '4', '1'] + - ['*const f32', float32x4x4_t, float32x4_t, f32, '4', '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[5]}']] + - LLVMLink: + name: 'ld4lane.{neon_type[2]}' + arguments: + - 'ptr: *const i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vld4lane.v{neon_type[1].lane}{type[3]}.p0' + arch: arm + - FnCall: ['_vld4{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', LANE, '{type[4]}']] + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to four registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - ['*const u8', uint8x8x4_t, int8x8x4_t, '3'] + - ['*const u16', uint16x4x4_t, int16x4x4_t, '2'] + - ['*const u32', uint32x2x4_t, int32x2x4_t, '1'] + - ['*const u16', uint16x8x4_t, int16x8x4_t, '3'] + - ['*const u32', uint32x4x4_t, int32x4x4_t, '2'] + - ['*const p8', poly8x8x4_t, int8x8x4_t, '3'] + - ['*const p16', poly16x4x4_t, int16x4x4_t, '2'] + - ['*const p16', poly16x8x4_t, int16x8x4_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vld4{neon_type[2].lane_nox}::' + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst1{neon_type[1].lane_nox}" + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - ['*mut i64', int64x1_t] + - ['*mut u64', uint64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Assign: + - "*a" + - FnCall: [simd_extract!, [b, 'LANE as u32']] + - Identifier: [';', Symbol] + + - name: "vst1{neon_type[1].lane_nox}" + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + attr: + - *neon-v8 + - FnCall: + - target_feature + - - 'enable = "neon,aes"' + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - ['*mut p64', poly64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - Assign: + - "*a" + - FnCall: [simd_extract!, [b, 'LANE as u32']] + - Identifier: [';', Symbol] + + - name: "vst1{neon_type[1].lane_nox}" + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + attr: + - *neon-v8 + - FnCall: + - target_feature + - - 'enable = "neon,aes"' + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - ['*mut p64', poly64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - Assign: + - "*a" + - FnCall: [simd_extract!, [b, 'LANE as u32']] + - Identifier: [';', Symbol] + + - name: "vst1{neon_type[1].lane_nox}" + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - ['*mut i8', int8x8_t, '3'] + - ['*mut i16', int16x4_t, '2'] + - ['*mut i32', int32x2_t, '1'] + - ['*mut i8', int8x16_t, '4'] + - ['*mut i16', int16x8_t, '3'] + - ['*mut i32', int32x4_t, '2'] + - ['*mut i64', int64x2_t, '1'] + - ['*mut u8', uint8x8_t, '3'] + - ['*mut u16', uint16x4_t, '2'] + - ['*mut u32', uint32x2_t, '1'] + - ['*mut u8', uint8x16_t, '4'] + - ['*mut u16', uint16x8_t, '3'] + - ['*mut u32', uint32x4_t, '2'] + - ['*mut u64', uint64x2_t, '1'] + - ['*mut p8', poly8x8_t, '3'] + - ['*mut p16', poly16x4_t, '2'] + - ['*mut p8', poly8x16_t, '4'] + - ['*mut p16', poly16x8_t, '3'] + - ['*mut f32', float32x2_t, '1'] + - ['*mut f32', float32x4_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - Assign: + - "*a" + - FnCall: [simd_extract!, [b, 'LANE as u32']] + - Identifier: [';', Symbol] + + + - name: "vst1{neon_type[1].lane_nox}" + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + types: + - ['*mut f16', float16x4_t, '2'] + - ['*mut f16', float16x8_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - Assign: + - "*a" + - FnCall: [simd_extract!, [b, 'LANE as u32']] + - Identifier: [';', Symbol] + + + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-not-arm + - *neon-stable + assert_instr: [st1] + types: + - [i8, int8x8x2_t, int8x8_t] + - [i16, int16x4x2_t, int16x4_t] + - [i32, int32x2x2_t, int32x2_t] + - [i64, int64x1x2_t, int64x1_t] + - [i8, int8x16x2_t, int8x16_t] + - [i16, int16x8x2_t, int16x8_t] + - [i32, int32x4x2_t, int32x4_t] + - [i64, int64x2x2_t, int64x2_t] + compose: + - LLVMLink: + name: 'st1x2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'ptr: *mut {type[0]}' + links: + - link: 'llvm.aarch64.neon.st1x2.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst1{neon_type[1].no}', ['b.0', 'b.1', 'a']] + + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-not-arm + - *neon-stable + assert_instr: [st1] + types: + - [i8, int8x8x3_t, int8x8_t] + - [i16, int16x4x3_t, int16x4_t] + - [i32, int32x2x3_t, int32x2_t] + - [i64, int64x1x3_t, int64x1_t] + - [i8, int8x16x3_t, int8x16_t] + - [i16, int16x8x3_t, int16x8_t] + - [i32, int32x4x3_t, int32x4_t] + - [i64, int64x2x3_t, int64x2_t] + compose: + - LLVMLink: + name: 'st1x3.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'ptr: *mut {type[0]}' + links: + - link: 'llvm.aarch64.neon.st1x3.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst1{neon_type[1].no}', ['b.0', 'b.1', 'b.2', 'a']] + + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-not-arm + - *neon-stable + assert_instr: [st1] + types: + - [i8, int8x8x4_t, int8x8_t] + - [i16, int16x4x4_t, int16x4_t] + - [i32, int32x2x4_t, int32x2_t] + - [i64, int64x1x4_t, int64x1_t] + - [i8, int8x16x4_t, int8x16_t] + - [i16, int16x8x4_t, int16x8_t] + - [i32, int32x4x4_t, int32x4_t] + - [i64, int64x2x4_t, int64x2_t] + compose: + - LLVMLink: + name: 'st1x4.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'ptr: *mut {type[0]}' + links: + - link: 'llvm.aarch64.neon.st1x4.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst1{neon_type[1].no}', ['b.0', 'b.1', 'b.2', 'b.3', 'a']] + + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vst1] + types: + - [i8, int8x8x2_t, int8x8_t] + - [i16, int16x4x2_t, int16x4_t] + - [i32, int32x2x2_t, int32x2_t] + - [i64, int64x1x2_t, int64x1_t] + - [i8, int8x16x2_t, int8x16_t] + - [i16, int16x8x2_t, int16x8_t] + - [i32, int32x4x2_t, int32x4_t] + - [i64, int64x2x2_t, int64x2_t] + compose: + - LLVMLink: + name: 'st1x2.{neon_type[1]}' + arguments: + - 'ptr: *mut {type[0]}' + - 'a: {type[2]}' + - 'b: {type[2]}' + links: + - link: 'llvm.arm.neon.vst1x2.v{neon_type[1].lane}{type[0]}.p0' + arch: arm + - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1']] + + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vst1] + types: + - [i8, int8x8x3_t, int8x8_t] + - [i16, int16x4x3_t, int16x4_t] + - [i32, int32x2x3_t, int32x2_t] + - [i64, int64x1x3_t, int64x1_t] + - [i8, int8x16x3_t, int8x16_t] + - [i16, int16x8x3_t, int16x8_t] + - [i32, int32x4x3_t, int32x4_t] + - [i64, int64x2x3_t, int64x2_t] + compose: + - LLVMLink: + name: 'st1x3.{neon_type[1]}' + arguments: + - 'ptr: *mut {type[0]}' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + links: + - link: 'llvm.arm.neon.vst1x3.p0.v{neon_type[1].lane}{type[0]}.p0' + arch: arm + - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2']] + + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [vst1] + types: + - [i8, int8x8x4_t, int8x8_t] + - [i16, int16x4x4_t, int16x4_t] + - [i32, int32x2x4_t, int32x2_t] + - [i64, int64x1x4_t, int64x1_t] + - [i8, int8x16x4_t, int8x16_t] + - [i16, int16x8x4_t, int16x8_t] + - [i32, int32x4x4_t, int32x4_t] + - [i64, int64x2x4_t, int64x2_t] + compose: + - LLVMLink: + name: 'st1x4.{neon_type[1]}' + arguments: + - 'ptr: *mut {type[0]}' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + links: + - link: 'llvm.arm.neon.vst1x4.p0.v{neon_type[1].lane}{type[0]}.p0' + arch: arm + - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']] + + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [vst1] + types: + - [f32, float32x2x4_t, float32x2_t] + - [f32, float32x4x4_t, float32x4_t] + compose: + - LLVMLink: + name: 'st1x4.{neon_type[1]}' + arguments: + - 'ptr: *mut {type[0]}' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + links: + - link: 'llvm.arm.neon.vst1x4.p0.v{neon_type[1].lane}{type[0]}.p0' + arch: arm + - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']] + + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst1] + types: + - [f16, float16x4x4_t, float16x4_t] + - [f16, float16x8x4_t, float16x8_t] + compose: + - LLVMLink: + name: 'st1x4.{neon_type[1]}' + arguments: + - 'ptr: *mut {type[0]}' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + links: + - link: 'llvm.arm.neon.vst1x4.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v8 + - *neon-aes + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - [p64, poly64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - "vst2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [i64, int64x1x2_t, int64x1_t] + compose: + - LLVMLink: + name: 'vst2.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst2.v{neon_type[1].lane}{type[0]}.p0' + arch: arm + - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', '8']] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - [u64, uint64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - "vst2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-stable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [i64, int64x1x2_t, int64x1_t] + compose: + - LLVMLink: + name: 'st2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-stable + assert_instr: [st2] + safety: + unsafe: [neon] + types: + - [i8, int8x8x2_t, int8x8_t] + - [i16, int16x4x2_t, int16x4_t] + - [i32, int32x2x2_t, int32x2_t] + - [i8, int8x16x2_t, int8x16_t] + - [i16, int16x8x2_t, int16x8_t] + - [i32, int32x4x2_t, int32x4_t] + - [f32, float32x2x2_t, float32x2_t] + - [f32, float32x4x2_t, float32x4_t] + compose: + - LLVMLink: + name: 'st2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [st2] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, float16x4_t] + - [f16, float16x8x2_t, float16x8_t] + compose: + - LLVMLink: + name: 'st2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst2]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st2]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - [u8, uint8x8x2_t, int8x8x2_t] + - [u16, uint16x4x2_t, int16x4x2_t] + - [u32, uint32x2x2_t, int32x2x2_t] + - [u8, uint8x16x2_t, int8x16x2_t] + - [u16, uint16x8x2_t, int16x8x2_t] + - [u32, uint32x4x2_t, int32x4x2_t] + - [p8, poly8x8x2_t, int8x8x2_t] + - [p16, poly16x4x2_t, int16x4x2_t] + - [p8, poly8x16x2_t, int8x16x2_t] + - [p16, poly16x8x2_t, int16x8x2_t] + compose: + - FnCall: + - "vst2{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - *neon-stable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [i8, int8x8x2_t, '3', int8x8_t] + - [i16, int16x4x2_t, '2', int16x4_t] + - [i32, int32x2x2_t, '1', int32x2_t] + - [i16, int16x8x2_t, '3', int16x8_t] + - [i32, int32x4x2_t, '2', int32x4_t] + - [f32, float32x2x2_t, '1', float32x2_t] + - [f32, float32x4x2_t, '2', float32x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst2.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']] + + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, '2', float16x4_t] + - [f16, float16x8x2_t, '3', float16x8_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst2.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']] + + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst2, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [u8, uint8x8x2_t, int8x8x2_t, '3'] + - [u16, uint16x4x2_t, int16x4x2_t, '2'] + - [u32, uint32x2x2_t, int32x2x2_t, '1'] + - [u16, uint16x8x2_t, int16x8x2_t, '3'] + - [u32, uint32x4x2_t, int32x4x2_t, '2'] + - [p8, poly8x8x2_t, int8x8x2_t, '3'] + - [p16, poly16x4x2_t, int16x4x2_t, '2'] + - [p16, poly16x8x2_t, int16x8x2_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vst2{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [vst2] + safety: + unsafe: [neon] + types: + - [i8, int8x8x2_t, int8x8_t, '1'] + - [i16, int16x4x2_t, int16x4_t, '2'] + - [i32, int32x2x2_t, int32x2_t, '4'] + - [i8, int8x16x2_t, int8x16_t, '1'] + - [i16, int16x8x2_t, int16x8_t, '2'] + - [i32, int32x4x2_t, int32x4_t, '4'] + - [f32, float32x2x2_t, float32x2_t, '4'] + - [f32, float32x4x2_t, float32x4_t, '4'] + compose: + - LLVMLink: + name: 'vst2.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst2.v{neon_type[1].lane}{type[0]}.p0' + arch: arm + - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', "{type[3]}"]] + + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst2] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, float16x4_t, '2'] + - [f16, float16x8x2_t, float16x8_t, '2'] + compose: + - LLVMLink: + name: 'vst2.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst2.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', "{type[3]}"]] + + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-arm-unstable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [i8, int8x8x2_t, '3', int8x8_t, '1'] + - [i16, int16x4x2_t, '2', int16x4_t, '2'] + - [i32, int32x2x2_t, '1', int32x2_t, '4'] + - [i16, int16x8x2_t, '3', int16x8_t, '2'] + - [i32, int32x4x2_t, '2', int32x4_t, '4'] + - [f32, float32x4x2_t, '2', float32x4_t, '4'] + - [f32, float32x2x2_t, '1', float32x2_t, '4'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst2lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst2lane.v{neon_type[1].lane}{type[0]}.p0' + arch: arm + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'LANE', "{type[4]}"]] + + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, '2', float16x4_t, '2'] + - [f16, float16x8x2_t, '1', float16x8_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst2lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst2lane.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'LANE', "{type[4]}"]] + + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-stable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [i64, int64x1x3_t, int64x1_t] + compose: + - LLVMLink: + name: 'st3.{neon_type[1].nox}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v8 + - *neon-aes + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - [p64, poly64x1x3_t, int64x1x3_t] + compose: + - FnCall: + - "vst3{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [i64, int64x1x3_t, int64x1_t] + compose: + - LLVMLink: + name: 'vst3.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', '8']] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - [u64, uint64x1x3_t, int64x1x3_t] + compose: + - FnCall: + - "vst3{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst3, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [u8, uint8x8x3_t, int8x8x3_t, '3'] + - [u16, uint16x4x3_t, int16x4x3_t, '2'] + - [u32, uint32x2x3_t, int32x2x3_t, '1'] + - [u16, uint16x8x3_t, int16x8x3_t, '3'] + - [u32, uint32x4x3_t, int32x4x3_t, '2'] + - [p8, poly8x8x3_t, int8x8x3_t, '3'] + - [p16, poly16x4x3_t, int16x4x3_t, '2'] + - [p16, poly16x8x3_t, int16x8x3_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vst3{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst3]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st3]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - [u8, uint8x8x3_t, int8x8x3_t] + - [u16, uint16x4x3_t, int16x4x3_t] + - [u32, uint32x2x3_t, int32x2x3_t] + - [u8, uint8x16x3_t, int8x16x3_t] + - [u16, uint16x8x3_t, int16x8x3_t] + - [u32, uint32x4x3_t, int32x4x3_t] + - [p8, poly8x8x3_t, int8x8x3_t] + - [p16, poly16x4x3_t, int16x4x3_t] + - [p8, poly8x16x3_t, int8x16x3_t] + - [p16, poly16x8x3_t, int16x8x3_t] + compose: + - FnCall: + - "vst3{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [vst3] + safety: + unsafe: [neon] + types: + - [i8, int8x8x3_t, int8x8_t, '1'] + - [i16, int16x4x3_t, int16x4_t, '2'] + - [i32, int32x2x3_t, int32x2_t, '4'] + - [i8, int8x16x3_t, int8x16_t, '1'] + - [i16, int16x8x3_t, int16x8_t, '2'] + - [i32, int32x4x3_t, int32x4_t, '4'] + - [f32, float32x2x3_t, float32x2_t, '4'] + - [f32, float32x4x3_t, float32x4_t, '4'] + compose: + - LLVMLink: + name: 'vst3.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]] + + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst3] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, float16x4_t, '2'] + - [f16, float16x8x3_t, float16x8_t, '2'] + compose: + - LLVMLink: + name: 'vst3.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]] + + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-arm-unstable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [i8, int8x8x3_t, '3', int8x8_t, '1'] + - [i16, int16x4x3_t, '2', int16x4_t, '2'] + - [i32, int32x2x3_t, '1', int32x2_t, '4'] + - [i16, int16x8x3_t, '3', int16x8_t, '2'] + - [i32, int32x4x3_t, '2', int32x4_t, '4'] + - [f32, float32x2x3_t, '1', float32x2_t, '4'] + - [f32, float32x4x3_t, '2', float32x4_t, '4'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst3lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst3lane.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', "{type[4]}"]] + + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, '2', float16x4_t, '4'] + - [f16, float16x8x3_t, '3', float16x8_t, '4'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst3lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst3lane.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', "{type[4]}"]] + + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: [*target-not-arm, *neon-stable] + assert_instr: [st3] + safety: + unsafe: [neon] + types: + - [i8, int8x8x3_t, int8x8_t] + - [i16, int16x4x3_t, int16x4_t] + - [i32, int32x2x3_t, int32x2_t] + - [i8, int8x16x3_t, int8x16_t] + - [i16, int16x8x3_t, int16x8_t] + - [i32, int32x4x3_t, int32x4_t] + - [f32, float32x2x3_t, float32x2_t] + - [f32, float32x4x3_t, float32x4_t] + compose: + - LLVMLink: + name: 'vst3.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [st3] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, float16x4_t] + - [f16, float16x8x3_t, float16x8_t] + compose: + - LLVMLink: + name: 'vst3.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - *neon-stable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [i8, int8x8x3_t, '3', int8x8_t] + - [i16, int16x4x3_t, '2', int16x4_t] + - [i32, int32x2x3_t, '1', int32x2_t] + - [i16, int16x8x3_t, '3', int16x8_t] + - [i32, int32x4x3_t, '2', int32x4_t] + - [f32, float32x2x3_t, '1', float32x2_t] + - [f32, float32x4x3_t, '2', float32x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst3.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, '2', float16x4_t] + - [f16, float16x8x3_t, '3', float16x8_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst3.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v8 + - *neon-aes + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - [p64, poly64x1x4_t, int64x1x4_t] + compose: + - FnCall: + - "vst4{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [i64, int64x1x4_t, int64x1_t] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst4.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', '8']] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-stable + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [i64, int64x1x4_t, int64x1_t] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4.{neon_type[2]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - [u64, uint64x1x4_t, int64x1x3_t] + compose: + - FnCall: + - "vst4{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst4, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [u8, uint8x8x4_t, int8x8x4_t, '3'] + - [u16, uint16x4x4_t, int16x4x4_t, '2'] + - [u32, uint32x2x4_t, int32x2x4_t, '1'] + - [u16, uint16x8x4_t, int16x8x4_t, '3'] + - [u32, uint32x4x4_t, int32x4x4_t, '2'] + - [p8, poly8x8x4_t, int8x8x4_t, '3'] + - [p16, poly16x4x4_t, int16x4x4_t, '2'] + - [p16, poly16x8x4_t, int16x8x4_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vst4{neon_type[2].lane_nox}::" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst4]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st4]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - [u8, uint8x8x4_t, int8x8x4_t] + - [u16, uint16x4x4_t, int16x4x4_t] + - [u32, uint32x2x4_t, int32x2x4_t] + - [u8, uint8x16x4_t, int8x16x4_t] + - [u16, uint16x8x4_t, int16x8x4_t] + - [u32, uint32x4x4_t, int32x4x4_t] + - [p8, poly8x8x4_t, int8x8x4_t] + - [p16, poly16x4x4_t, int16x4x4_t] + - [p8, poly8x16x4_t, int8x16x4_t] + - [p16, poly16x8x4_t, int16x8x4_t] + compose: + - FnCall: + - "vst4{neon_type[2].nox}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + assert_instr: [vst4] + safety: + unsafe: [neon] + types: + - [i8, int8x8x4_t, int8x8_t, '1'] + - [i16, int16x4x4_t, int16x4_t, '2'] + - [i32, int32x2x4_t, int32x2_t, '4'] + - [i8, int8x16x4_t, int8x16_t, '1'] + - [i16, int16x8x4_t, int16x8_t, '2'] + - [i32, int32x4x4_t, int32x4_t, '4'] + - [f32, float32x2x4_t, float32x2_t, '4'] + - [f32, float32x4x4_t, float32x4_t, '4'] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst4.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', "{type[3]}"]] + + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst4] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, float16x4_t, '2'] + - [f16, float16x8x4_t, float16x8_t, '2'] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst4.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', "{type[3]}"]] + + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-arm-unstable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [i8, int8x8x4_t, '3', int8x8_t, '1'] + - [i16, int16x4x4_t, '2', int16x4_t, '2'] + - [i32, int32x2x4_t, '1', int32x2_t, '4'] + - [i16, int16x8x4_t, '3', int16x8_t, '2'] + - [i32, int32x4x4_t, '2', int32x4_t, '4'] + - [f32, float32x2x4_t, '1', float32x2_t, '4'] + - [f32, float32x4x4_t, '2', float32x4_t, '4'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst4lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'd: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst4lane.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', 'LANE', "{type[4]}"]] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, '2', float16x4_t, '2'] + - [f16, float16x8x4_t, '3', float16x8_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst4lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'd: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst4lane.p0.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', 'LANE', "{type[4]}"]] + + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: [*target-not-arm, *neon-stable] + assert_instr: [st4] + safety: + unsafe: [neon] + types: + - [i8, int8x8x4_t, int8x8_t] + - [i16, int16x4x4_t, int16x4_t] + - [i32, int32x2x4_t, int32x2_t] + - [i8, int8x16x4_t, int8x16_t] + - [i16, int16x8x4_t, int16x8_t] + - [i32, int32x4x4_t, int32x4_t] + - [f32, float32x2x4_t, float32x2_t] + - [f32, float32x4x4_t, float32x4_t] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [st4] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, float16x4_t] + - [f16, float16x8x4_t, float16x8_t] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - *neon-stable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [i8, int8x8x4_t, '3', int8x8_t] + - [i16, int16x4x4_t, '2', int16x4_t] + - [i32, int32x2x4_t, '1', int32x2_t] + - [i16, int16x8x4_t, '3', int16x8_t] + - [i32, int32x4x4_t, '2', int32x4_t] + - [f32, float32x2x4_t, '1', float32x2_t] + - [f32, float32x4x4_t, '2', float32x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst4.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'd: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, '2', float16x4_t] + - [f16, float16x8x4_t, '3', float16x8_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst4.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'd: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4lane.v{neon_type[1].lane}{type[0]}.p0' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + + - name: "vusdot{neon_type[0].no}" + doc: "Dot product vector form with unsigned and signed integers" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-i8mm + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot]]}]] + - *neon-unstable-i8mm + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x2_t, uint8x8_t, int8x8_t] + - [int32x4_t, uint8x16_t, int8x16_t] + compose: + - LLVMLink: + name: "usdot.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.usdot.v{neon_type[0].lane}i32.v{neon_type[1].lane}i8" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.usdot.v{neon_type[0].lane}i32.v{neon_type[1].lane}i8" + arch: arm + + - name: "vusdot{type[0]}" + doc: "Dot product index form with unsigned and signed integers" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: int8x8_t"] + return_type: "{neon_type[1]}" + attr: + - *neon-i8mm + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-i8mm + - *neon-cfg-arm-unstable + static_defs: ["const LANE: i32"] + safety: safe + types: + - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]'] + - ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - Let: + - c + - int32x2_t + - FnCall: [transmute, [c]] + - Let: + - c + - "{type[1]}" + - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] + + - name: "vsudot{neon_type[0].lane_nox}" + doc: "Dot product index form with signed and unsigned integers" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-i8mm + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-i8mm + - *neon-cfg-arm-unstable + static_defs: ["const LANE: i32"] + safety: safe + types: + - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t] + - [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - Let: + - c + - uint32x2_t + - FnCall: [transmute, [c]] + - Let: + - c + - "{type[4]}" + - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]] + + - name: "vmul{neon_type[1].no}" + doc: Multiply + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmul{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mul]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['.i8', int8x8_t] + - ['.i8', int8x16_t] + - ['.i16', int16x4_t] + - ['.i16', int16x8_t] + - ['.i32', int32x2_t] + - ['.i32', int32x4_t] + - ['.i8', uint8x8_t] + - ['.i8', uint8x16_t] + - ['.i16', uint16x4_t] + - ['.i16', uint16x8_t] + - ['.i32', uint32x2_t] + - ['.i32', uint32x4_t] + compose: + - FnCall: [simd_mul, [a, b]] + + - name: "vmul{neon_type[1].no}" + doc: Multiply + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmul.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [f32, float32x2_t] + - [f32, float32x4_t] + compose: + - FnCall: [simd_mul, [a, b]] + + + - name: "vmul{neon_type[1].no}" + doc: Multiply + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmul.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [f16, float16x4_t] + - [f16, float16x8_t] + compose: + - FnCall: [simd_mul, [a, b]] + + + - name: "vmul{neon_type[0].lane_nox}" + doc: Multiply + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mul, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ["const LANE: i32"] + safety: safe + types: + - [int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - simd_mul + - - a + - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]] + + + - name: "vmul{neon_type[0].lane_nox}" + doc: Multiply + arguments: ["a: {neon_type[0]}", "v: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: safe + types: + - [float16x4_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [float16x8_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - simd_mul + - - a + - FnCall: ["simd_shuffle!", [v, v, "{type[3]}"]] + + + - name: "vmul{neon_type[0].laneq_nox}" + doc: Multiply + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mul, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ["const LANE: i32"] + safety: safe + types: + - [int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - simd_mul + - - a + - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]] + + - name: "vmull{neon_type[1].no}" + doc: Signed multiply long + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmull.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smull]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ["s8", int8x8_t, int16x8_t] + - ["s16", int16x4_t, int32x4_t] + - ["s32", int32x2_t, int64x2_t] + compose: + - LLVMLink: + name: "smull.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.smull.{neon_type[2]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vmulls.{neon_type[2]}" + arch: arm + + - name: "vmull{neon_type[1].no}" + doc: "Unsigned multiply long" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmull.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umull]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ["u8", uint8x8_t, uint16x8_t] + - ["u16", uint16x4_t, uint32x4_t] + - ["u32", uint32x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "smull.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.umull.{neon_type[2]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vmullu.{neon_type[2]}" + arch: arm + + - name: "vmull{neon_type[1].no}" + doc: "Polynomial multiply long" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmull.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [pmull]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ["p8", poly8x8_t, poly16x8_t] + compose: + - LLVMLink: + name: "pmull.{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.pmull.v8i16" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vmullp.v8i16" + arch: arm + + - name: "vmull_n{neon_type[0].no}" + doc: Vector long multiply with scalar + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vmull"]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smull]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, "i16", int32x4_t] + - [int32x2_t, "i32", int64x2_t] + compose: + - FnCall: + - "vmull{neon_type[0].no}" + - - a + - FnCall: + - "vdup_n{neon_type[0].no}" + - - b + + - name: "vmull_n{neon_type[0].no}" + doc: Vector long multiply with scalar + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vmull"]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umull]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint16x4_t, "u16", uint32x4_t] + - [uint32x2_t, "u32", uint64x2_t] + compose: + - FnCall: + - "vmull{neon_type[0].no}" + - - a + - FnCall: + - "vdup_n{neon_type[0].no}" + - - b + + - name: "vfma{neon_type.no}" + doc: Floating-point fused Multiply-Add to accumulator(vector) + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - 'target_arch = "arm"' + - FnCall: + - assert_instr + - - vfma + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - FnCall: + - any + - - 'target_arch = "aarch64"' + - 'target_arch = "arm64ec"' + - FnCall: + - assert_instr + - - fmla + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - FnCall: [simd_fma, [b, c, a]] + + + - name: "vfma{neon_type.no}" + doc: Floating-point fused Multiply-Add to accumulator (vector) + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vfma]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmla]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_fma, [b, c, a]] + + + - name: "vfma{neon_type[0].N}" + doc: Floating-point fused Multiply-Add to accumulator(vector) + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - 'target_arch = "arm"' + - FnCall: + - assert_instr + - - vfma + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - FnCall: + - any + - - 'target_arch = "aarch64"' + - 'target_arch = "arm64ec"' + - FnCall: + - assert_instr + - - fmla + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, f32] + - [float32x4_t, f32] + compose: + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: + - "vdup{neon_type[0].N}_vfp4" + - - c + + - name: "vsub{neon_type[1].no}" + doc: "Subtract" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsub{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sub]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['.i8', int8x8_t] + - ['.i8', int8x16_t] + - ['.i16', int16x4_t] + - ['.i16', int16x8_t] + - ['.i32', int32x2_t] + - ['.i32', int32x4_t] + - ['.i8', uint8x8_t] + - ['.i8', uint8x16_t] + - ['.i16', uint16x4_t] + - ['.i16', uint16x8_t] + - ['.i32', uint32x2_t] + - ['.i32', uint32x4_t] + - ['.i64', int64x1_t] + - ['.i64', int64x2_t] + - ['.i64', uint64x1_t] + - ['.i64', uint64x2_t] + compose: + - FnCall: [simd_sub, [a, b]] + + - name: "vsub{neon_type[1].no}" + doc: "Subtract" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsub.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fsub]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['f32', float32x2_t] + - ['f32', float32x4_t] + compose: + - FnCall: [simd_sub, [a, b]] + + + - name: "vsub{neon_type[1].no}" + doc: "Subtract" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsub.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fsub]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ['f16', float16x4_t] + - ['f16', float16x8_t] + compose: + - FnCall: [simd_sub, [a, b]] + + + - name: "vadd{neon_type.no}" + doc: Floating-point Add (vector). + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vadd.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fadd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: + - simd_add + - - a + - b + + - name: "vadd{type[0]}" + doc: Add + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vadd.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fadd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ['h_f16', 'f16'] + compose: + - 'a + b' + + - name: "vadd{neon_type.no}" + doc: Bitwise exclusive OR + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - 'target_arch = "arm"' + - FnCall: + - assert_instr + - - nop + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - FnCall: + - any + - - 'target_arch = "aarch64"' + - 'target_arch = "arm64ec"' + - FnCall: + - assert_instr + - - nop + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - poly8x8_t + - poly16x4_t + - poly8x16_t + - poly16x8_t + - poly64x1_t + - poly64x2_t + compose: + - FnCall: + - simd_xor + - - a + - b + + - name: "vaddq_{type}" + doc: Bitwise exclusive OR + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-v7 + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - 'target_arch = "arm"' + - FnCall: + - assert_instr + - - nop + - FnCall: + - cfg_attr + - - FnCall: + - all + - - test + - FnCall: + - any + - - 'target_arch = "aarch64"' + - 'target_arch = "arm64ec"' + - FnCall: + - assert_instr + - - nop + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - p128 + compose: + - Xor: + - a + - b + + - name: "vsubhn{neon_type[0].noq}" + doc: Subtract returning high narrow + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vsubhn"]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [subhn]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x8_t, int8x8_t, 'i16x8', 'i16x8::new(8, 8, 8, 8, 8, 8, 8, 8)'] + - [int32x4_t, int16x4_t, 'i32x4', 'i32x4::new(16, 16, 16, 16)'] + - [int64x2_t, int32x2_t, 'i64x2', 'i64x2::new(32, 32)'] + - [uint16x8_t, uint8x8_t, 'u16x8', 'u16x8::new(8, 8, 8, 8, 8, 8, 8, 8)'] + - [uint32x4_t, uint16x4_t, 'u32x4', 'u32x4::new(16, 16, 16, 16)'] + - [uint64x2_t, uint32x2_t, 'u64x2', 'u64x2::new(32, 32)'] + compose: + - Let: [c, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_cast + - - FnCall: + - simd_shr + - - FnCall: [simd_sub, [a, b]] + - FnCall: [transmute, [c]] + + - name: "vsubhn_high{neon_type[1].noq}" + doc: Subtract returning high narrow + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vsubhn"]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [subhn2]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + compose: + - Let: + - d + - "{neon_type[0]}" + - FnCall: ["vsubhn{neon_type[1].noq}", [b, c]] + - FnCall: [simd_shuffle!, [a, d, "{type[3]}"]] + + - name: "vhsub{neon_type[1].no}" + doc: "Signed halving subtract" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vhsub.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uhsub]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['u8', uint8x8_t] + - ['u8', uint8x16_t] + - ['u16', uint16x4_t] + - ['u16', uint16x8_t] + - ['u32', uint32x2_t] + - ['u32', uint32x4_t] + compose: + - LLVMLink: + name: "uhsub.{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.uhsub.{neon_type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vhsubu.{neon_type[1]}" + arch: arm + + - name: "vhsub{neon_type[1].no}" + doc: "Signed halving subtract" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vhsub.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [shsub]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['s8', int8x8_t] + - ['s8', int8x16_t] + - ['s16', int16x4_t] + - ['s16', int16x8_t] + - ['s32', int32x2_t] + - ['s32', int32x4_t] + compose: + - LLVMLink: + name: "shsub.{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.shsub.{neon_type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vhsubs.{neon_type[1]}" + arch: arm + + - name: "vsubw{neon_type[1].noq}" + doc: Signed Subtract Wide + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsubw]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ssubw]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x8_t, int8x8_t] + - [int32x4_t, int16x4_t] + - [int64x2_t, int32x2_t] + compose: + - FnCall: + - simd_sub + - - a + - FnCall: [simd_cast, [b]] + + - name: "vsubw{neon_type[1].noq}" + doc: Unsigned Subtract Wide + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsubw]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usubw]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint16x8_t, uint8x8_t] + - [uint32x4_t, uint16x4_t] + - [uint64x2_t, uint32x2_t] + compose: + - FnCall: + - simd_sub + - - a + - FnCall: [simd_cast, [b]] + + - name: "vsubl{neon_type[0].noq}" + doc: "Signed Subtract Long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsubl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ssubl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, int16x8_t] + - [int16x4_t, int32x4_t] + - [int32x2_t, int64x2_t] + compose: + - Let: + - c + - "{neon_type[1]}" + - FnCall: [simd_cast, [a]] + - Let: + - d + - "{neon_type[1]}" + - FnCall: [simd_cast, [b]] + - FnCall: [simd_sub, [c, d]] + + - name: "vsubl{neon_type[0].noq}" + doc: "Unsigned Subtract Long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsubl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usubl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, uint16x8_t] + - [uint16x4_t, uint32x4_t] + - [uint32x2_t, uint64x2_t] + compose: + - Let: + - c + - "{neon_type[1]}" + - FnCall: [simd_cast, [a]] + - Let: + - d + - "{neon_type[1]}" + - FnCall: [simd_cast, [b]] + - FnCall: [simd_sub, [c, d]] + + - name: "vdot{neon_type[0].no}" + doc: Dot product arithmetic (vector) + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [target_feature, ['enable = "neon,dotprod"']] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsdot]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sdot]]}]] + - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x2_t, int8x8_t] + - [int32x4_t, int8x16_t] + compose: + - LLVMLink: + name: "sdot.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.arm.neon.sdot.{neon_type[0]}.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.sdot.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vdot{neon_type[0].no}" + doc: Dot product arithmetic (vector) + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [target_feature, ['enable = "neon,dotprod"']] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vudot]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [udot]]}]] + - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint32x2_t, uint8x8_t] + - [uint32x4_t, uint8x16_t] + compose: + - LLVMLink: + name: "udot.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.arm.neon.udot.{neon_type[0]}.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.udot.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vdot{neon_type[0].lane_nox}" + doc: Dot product arithmetic (indexed) + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + static_defs: ["const LANE: i32"] + attr: + - *neon-v8 + - FnCall: [target_feature, ['enable = "neon,dotprod"']] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsdot, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]'] + - [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - Let: + - c + - "{neon_type[3]}" + - FnCall: [transmute, [c]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + - FnCall: + - "vdot{neon_type[0].no}" + - - a + - b + - FnCall: [transmute, [c]] + + - name: "vdot{neon_type[0].lane_nox}" + doc: Dot product arithmetic (indexed) + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + static_defs: ["const LANE: i32"] + attr: + - *neon-v8 + - FnCall: [target_feature, ['enable = "neon,dotprod"']] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vudot, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]'] + - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - Let: + - c + - "{neon_type[3]}" + - FnCall: [transmute, [c]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + - FnCall: + - "vdot{neon_type[0].no}" + - - a + - b + - FnCall: [transmute, [c]] + + - name: "vmax{neon_type.no}" + doc: Maximum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smax]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + compose: + - LLVMLink: + name: "smax.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxs.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.smax.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmax{neon_type.no}" + doc: Maximum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umax]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + compose: + - LLVMLink: + name: "smax.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxu.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.umax.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmax{neon_type.no}" + doc: Maximum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmax]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "smax.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxs.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmax.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vmax{neon_type.no}" + doc: Maximum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmax]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vmax.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxs.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmax.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vmaxnm{neon_type.no}" + doc: Floating-point Maximum Number (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmaxnm]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmaxnm]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "fmaxnm.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxnm.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmaxnm.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vmaxnm{neon_type.no}" + doc: Floating-point Maximum Number (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmaxnm]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmaxnm]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmaxnm.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxnm.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmaxnm.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vminnm{neon_type.no}" + doc: Floating-point Minimum Number (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vminnm]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fminnm]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fminnm.{neon_type}" + links: + - link: "llvm.arm.neon.vminnm.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fminnm.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vmin{neon_type.no}" + doc: "Minimum (vector)" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smin]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + compose: + - LLVMLink: + name: "smin.{neon_type}" + links: + - link: "llvm.arm.neon.vmins.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.smin.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmin{neon_type.no}" + doc: "Minimum (vector)" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umin]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + compose: + - LLVMLink: + name: "umin.{neon_type}" + links: + - link: "llvm.arm.neon.vminu.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.umin.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmin{neon_type.no}" + doc: "Minimum (vector)" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmin]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "fmin.{neon_type}" + links: + - link: "llvm.arm.neon.vmins.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmin.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vmin{neon_type.no}" + doc: Minimum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmin]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vmin.{neon_type}" + links: + - link: "llvm.arm.neon.vmins.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmin.{neon_type}" + arch: aarch64,arm64ec + + + + - name: "vminnm{neon_type.no}" + doc: "Floating-point Minimum Number (vector)" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vminnm]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fminnm]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "fminnm.{neon_type}" + links: + - link: "llvm.arm.neon.vminnm.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fminnm.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpadd{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [faddp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + compose: + - LLVMLink: + name: "faddp.{neon_type}" + links: + - link: "llvm.arm.neon.vpadd.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.faddp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpadd{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [faddp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + compose: + - LLVMLink: + name: "faddp.{neon_type}" + links: + - link: "llvm.arm.neon.vpadd.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.faddp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vqdmull{neon_type[0].noq}" + doc: "Signed saturating doubling multiply long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmull]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmull]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, int32x4_t] + - [int32x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vqdmull{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vqdmull.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.sqdmull.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vqdmull_n{neon_type[0].no}" + doc: "Vector saturating doubling long multiply with scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmull]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmull]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, "i16", int32x4_t] + - [int32x2_t, "i32", int64x2_t] + compose: + - FnCall: ["vqdmull{neon_type[0].noq}", [a, {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]] + + - name: "vqdmull_lane_s16" + doc: "Vector saturating doubling long multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmull, 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x4_t, int16x4_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '2']] + - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - FnCall: [vqdmull_s16, [a, b]] + + - name: "vqdmull_lane_s32" + doc: "Vector saturating doubling long multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmull, 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmull, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x2_t, int32x2_t, int64x2_t, '[N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '1']] + - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - FnCall: [vqdmull_s32, [a, b]] + + - name: "vqdmlal{neon_type[1].noq}" + doc: "Signed saturating doubling multiply-add long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x4_t, int16x4_t, int16x4_t, int32x4_t] + - [int64x2_t, int32x2_t, int32x2_t, int64x2_t] + compose: + - FnCall: ["vqadd{neon_type[0].no}", [a, {FnCall: ["vqdmull{neon_type[2].noq}", [b, c]]}]] + + - name: "vqdmlal_n{neon_type[1].noq}" + doc: "Vector widening saturating doubling multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x4_t, int16x4_t, "i16", int32x4_t] + - [int64x2_t, int32x2_t, "i32", int64x2_t] + compose: + - FnCall: ["vqadd{neon_type[0].no}", [a, {FnCall: ["vqdmull_n{neon_type[1].noq}", [b, c]]}]] + + - name: "vqdmlal_lane_s16" + doc: "Vector widening saturating doubling multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 2]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x4_t, int16x4_t, int16x4_t, int32x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '2']] + - FnCall: [vqaddq_s32, [a, {FnCall: ["vqdmull_lane_s16::", [b, c]]}]] + + - name: "vqdmlal_lane_s32" + doc: "Vector widening saturating doubling multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int64x2_t, int32x2_t, int32x2_t, int64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '1']] + - FnCall: [vqaddq_s64, [a, {FnCall: ["vqdmull_lane_s32::", [b, c]]}]] + + - name: "vqdmlsl{neon_type[1].noq}" + doc: "Signed saturating doubling multiply-subtract long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x4_t, int16x4_t, int16x4_t, int32x4_t] + - [int64x2_t, int32x2_t, int32x2_t, int64x2_t] + compose: + - FnCall: ["vqsub{neon_type[0].no}", [a, {FnCall: ["vqdmull{neon_type[1].noq}", [b, c]]}]] + + - name: "vqdmlsl{type[4]}" + doc: "Vector widening saturating doubling multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x4_t, int16x4_t, "i16", int32x4_t, '_n_s16'] + - [int64x2_t, int32x2_t, "i32", int64x2_t, '_n_s32'] + compose: + - FnCall: ["vqsub{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}", [b, c]]}]] + + - name: "vqdmlsl_lane_s16" + doc: "Vector widening saturating doubling multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 2]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x4_t, int16x4_t, int16x4_t, int32x4_t] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '2']] + - FnCall: [vqsubq_s32, [a, {FnCall: ["vqdmull_lane_s16::", [b, c]]}]] + + - name: "vqdmlsl_lane_s32" + doc: "Vector widening saturating doubling multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int64x2_t, int32x2_t, int32x2_t, int64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [N, '1']] + - FnCall: [vqsubq_s64, [a, {FnCall: ["vqdmull_lane_s32::", [b, c]]}]] + + - name: "vqdmulh{neon_type[0].no}" + doc: "Signed saturating doubling multiply returning high half" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmulh]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmulh]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, int16x4_t, int16x4_t] + - [int16x8_t, int16x8_t, int16x8_t] + - [int32x2_t, int32x2_t, int32x2_t] + - [int32x4_t, int32x4_t, int32x4_t] + compose: + - LLVMLink: + name: "vqdmulh{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vqdmulh.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.sqdmulh.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vqdmulh{type[3]}" + doc: "Vector saturating doubling multiply high with scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmulh]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmulh]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, "i16", int16x4_t, '_n_s16'] + - [int32x2_t, "i32", int32x2_t, '_n_s32'] + - [int16x8_t, "i16", int16x8_t, 'q_n_s16'] + - [int32x4_t, "i32", int32x4_t, 'q_n_s32'] + compose: + - Let: [b, "{neon_type[0]}", {FnCall: ["vdup{type[3]}", [b]]}] + - FnCall: ["vqdmulh{neon_type[0].no}", [a, b]] + + - name: "vqmovn{neon_type[0].noq}" + doc: "Signed saturating extract narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqmovn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqxtn]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x8_t, int8x8_t] + - [int32x4_t, int16x4_t] + - [int64x2_t, int32x2_t] + compose: + - LLVMLink: + name: "vqmovn{neon_type[0].noq}" + links: + - link: "llvm.arm.neon.vqmovns.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.sqxtn.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vqmovun{neon_type[0].noq}" + doc: "Signed saturating extract unsigned narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqmovun]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqxtun]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x8_t, uint8x8_t] + - [int32x4_t, uint16x4_t] + - [int64x2_t, uint32x2_t] + compose: + - LLVMLink: + name: "vqmovun{neon_type[0].noq}" + links: + - link: "llvm.arm.neon.vqmovnsu.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.sqxtun.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vqrdmulh{neon_type[0].no}" + doc: "Signed saturating rounding doubling multiply returning high half" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqrdmulh]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqrdmulh]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, int16x4_t, int16x4_t] + - [int16x8_t, int16x8_t, int16x8_t] + - [int32x2_t, int32x2_t, int32x2_t] + - [int32x4_t, int32x4_t, int32x4_t] + compose: + - LLVMLink: + name: "vqrdmulh{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vqrdmulh.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.sqrdmulh.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vqrshl{neon_type.no}" + doc: "Signed saturating rounding shift left" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqrshl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqrshl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + - int64x1_t + - int64x2_t + compose: + - LLVMLink: + name: "vqrshl{neon_type}" + links: + - link: "llvm.arm.neon.vqrshifts.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.sqrshl.{neon_type}" + arch: aarch64,arm64ec + + - name: "vqrshl{neon_type[0].no}" + doc: "Unsigned signed saturating rounding shift left" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqrshl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uqrshl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [uint8x16_t, int8x16_t] + - [uint16x4_t, int16x4_t] + - [uint16x8_t, int16x8_t] + - [uint32x2_t, int32x2_t] + - [uint32x4_t, int32x4_t] + - [uint64x1_t, int64x1_t] + - [uint64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vqrshl{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vqrshiftu.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.uqrshl.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vqrshrn_n{neon_type[0].noq}" + doc: "Signed saturating rounded shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqrshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }'] + - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }'] + - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqrshrn{neon_type[0].noq}" + arguments: + - "a: {neon_type[0]}" + - "n: {neon_type[0]}" + links: + - link: "llvm.arm.neon.vqrshiftns.{neon_type[1]}" + arch: arm + - FnCall: ["_vqrshrn_n{neon_type[0].noq}", [a, '{type[3]}'], [], true] + + - name: "vqrshrn_n{neon_type[0].noq}" + doc: "Signed saturating rounded shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8'] + - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16'] + - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqrshrn{neon_type[0].no}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.sqrshrn.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ["_vqrshrn_n{neon_type[0].noq}", [a, N], [], true] + + - name: "vqrshrun_n{neon_type[0].noq}" + doc: "Signed saturating rounded shift right unsigned narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqrshrun, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }'] + - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }'] + - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqrshrun_n{neon_type[0].noq}" + arguments: + - 'a: {neon_type[0]}' + - 'n: {neon_type[0]}' + links: + - link: "llvm.arm.neon.vqrshiftnsu.{neon_type[1]}" + arch: arm + - FnCall: + - "_vqrshrun_n{neon_type[0].noq}" + - - a + - "{type[3]}" + - [] + - true + + - name: "vqrshrun_n{neon_type[0].noq}" + doc: "Signed saturating rounded shift right unsigned narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8'] + - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16'] + - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqrshrun_n{neon_type[0].noq}" + arguments: + - 'a: {neon_type[0]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.sqrshrun.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ["_vqrshrun_n{neon_type[0].noq}", [a, N], [], true] + + - name: "vqshl{neon_type.no}" + doc: "Signed saturating shift left" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqshl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqshl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + - int64x1_t + - int64x2_t + compose: + - LLVMLink: + name: "vqshl{neon_type}" + links: + - link: "llvm.arm.neon.vqshifts.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.sqshl.{neon_type}" + arch: aarch64,arm64ec + + - name: "vqshl{neon_type[0].N}" + doc: "Signed saturating shift left" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqshl, 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqshl, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, '3'] + - [int8x16_t, '3'] + - [int16x4_t, '4'] + - [int16x8_t, '4'] + - [int32x2_t, '5'] + - [int32x4_t, '5'] + - [int64x1_t, '6'] + - [int64x2_t, '6'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[1]}"]] + - FnCall: + - "vqshl{neon_type[0].no}" + - - a + - FnCall: ["vdup{neon_type[0].N}", ['N as _']] + + - name: "vqshl{neon_type[0].no}" + doc: "Unsigned saturating shift left" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqshl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uqshl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [uint8x16_t, int8x16_t] + - [uint16x4_t, int16x4_t] + - [uint16x8_t, int16x8_t] + - [uint32x2_t, int32x2_t] + - [uint32x4_t, int32x4_t] + - [uint64x1_t, int64x1_t] + - [uint64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vqshl{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vqshiftu.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.uqshl.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vqshl{neon_type[0].N}" + doc: "Unsigned saturating shift left" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqshl, N = 2]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uqshl, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, '3', int8x8_t] + - [uint8x16_t, '3', int8x16_t] + - [uint16x4_t, '4', int16x4_t] + - [uint16x8_t, '4', int16x8_t] + - [uint32x2_t, '5', int32x2_t] + - [uint32x4_t, '5', int32x4_t] + - [uint64x1_t, '6', int64x1_t] + - [uint64x2_t, '6', int64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[1]}"]] + - FnCall: + - "vqshl{neon_type[0].no}" + - - a + - FnCall: ["vdup{neon_type[2].N}", ['N as _']] + + - name: "vqshrn_n{neon_type[0].noq}" + doc: "Signed saturating shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }'] + - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }'] + - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqshrn{neon_type[0].no}" + arguments: + - "a: {neon_type[0]}" + - "n: {neon_type[0]}" + links: + - link: "llvm.arm.neon.vqshiftns.{neon_type[1]}" + arch: arm + - FnCall: ["_vqshrn_n{neon_type[0].noq}", [a, "{type[3]}"], [], true] + + - name: "vqshrn_n{neon_type[0].noq}" + doc: "Signed saturating shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8'] + - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16'] + - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqshrn_n{neon_type[0].noq}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.sqshrn.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ["_vqshrn_n{neon_type[0].noq}", [a, N], [], true] + + - name: "vqshrn_n_{neon_type[0]}" + doc: "Unsigned saturating shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }'] + - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }'] + - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqshrn_n_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: {neon_type[0]}" + links: + - link: "llvm.arm.neon.vqshiftnu.{neon_type[1]}" + arch: arm + - FnCall: ["_vqshrn_n_{neon_type[0]}", ["a", "{type[3]}"], [], true] + + - name: "vqshrn_n_{neon_type[0]}" + doc: "Unsigned saturating shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8'] + - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16'] + - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqshrn{neon_type[1].no}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.uqshrn.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ["_vqshrn_n_{neon_type[0]}", ["a", N], [], true] + + - name: "vqshrun_n_{neon_type[0]}" + doc: "Signed saturating shift right unsigned narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqshrun, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }'] + - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }'] + - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqshrun_n_{neon_type[1]}" + arguments: + - "a: {neon_type[0]}" + - "n: {neon_type[0]}" + links: + - link: "llvm.arm.neon.vqshiftnsu.{neon_type[1]}" + arch: arm + - FnCall: ["_vqshrun_n_{neon_type[0]}", [a, "{type[3]}"], [], true] + + - name: "vqshrun_n_{neon_type[0]}" + doc: "Signed saturating shift right unsigned narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8'] + - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16'] + - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vqshrun_n_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.sqshrun.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ["_vqshrun_n_{neon_type[0]}", [a, N], [], true] + + - name: "vrsqrts{neon_type.no}" + doc: "Floating-point reciprocal square root step" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsqrts]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frsqrts]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "vrsqrts{neon_type.no}" + links: + - link: "llvm.arm.neon.vrsqrts.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frsqrts.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrsqrts{neon_type.no}" + doc: "Floating-point reciprocal square root step" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - *neon-fp16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsqrts]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frsqrts]]}]] + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrsqrts{neon_type.no}" + links: + - link: "llvm.arm.neon.vrsqrts.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frsqrts.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrecpe{neon_type.no}" + doc: "Reciprocal estimate." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrecpe]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frecpe]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "vrecpe{neon_type.no}" + links: + - link: "llvm.arm.neon.vrecpe.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frecpe.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrecpe{neon_type.no}" + doc: "Reciprocal estimate." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrecpe]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frecpe]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrecpe{neon_type.no}" + links: + - link: "llvm.arm.neon.vrecpe.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frecpe.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrecps{neon_type.no}" + doc: "Floating-point reciprocal step" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrecps]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frecps]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "vrecps{neon_type.no}" + links: + - link: "llvm.arm.neon.vrecps.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frecps.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrecps{neon_type.no}" + doc: "Floating-point reciprocal step" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrecps]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frecps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrecps{neon_type.no}" + links: + - link: "llvm.arm.neon.vrecps.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frecps.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [poly64x1_t, int32x2_t] + - [poly64x1_t, uint32x2_t] + - [poly64x2_t, int32x4_t] + - [poly64x2_t, uint32x4_t] + - [p128, int64x2_t] + - [p128, uint64x2_t] + - [p128, poly64x2_t] + - [poly8x16_t, p128] + - [p128, int8x16_t] + - [p128, uint8x16_t] + - [p128, poly8x16_t] + - [int32x2_t, poly64x1_t] + - [uint32x2_t, poly64x1_t] + - [int32x4_t, poly64x2_t] + - [uint32x4_t, poly64x2_t] + - [int64x2_t, p128] + - [uint64x2_t, p128] + - [poly64x2_t, p128] + - [poly64x1_t, int16x4_t] + - [poly64x1_t, uint16x4_t] + - [poly64x1_t, poly16x4_t] + - [poly64x2_t, int16x8_t] + - [poly64x2_t, uint16x8_t] + - [poly64x2_t, poly16x8_t] + - [p128, int32x4_t] + - [p128, uint32x4_t] + - [poly16x4_t, poly64x1_t] + - [int16x4_t, poly64x1_t] + - [uint16x4_t, poly64x1_t] + - [poly16x8_t, poly64x2_t] + - [int16x8_t, poly64x2_t] + - [uint16x8_t, poly64x2_t] + - [int32x4_t, p128] + - [uint32x4_t, p128] + - [poly64x1_t, int8x8_t] + - [poly64x1_t, uint8x8_t] + - [poly64x1_t, poly8x8_t] + - [poly64x2_t, int8x16_t] + - [poly64x2_t, uint8x16_t] + - [poly64x2_t, poly8x16_t] + - [p128, int16x8_t] + - [p128, uint16x8_t] + - [p128, poly16x8_t] + - [poly8x8_t, poly64x1_t] + - [int8x8_t, poly64x1_t] + - [uint8x8_t, poly64x1_t] + - [poly8x16_t, poly64x2_t] + - [int8x16_t, poly64x2_t] + - [uint8x16_t, poly64x2_t] + - [int16x8_t, p128] + - [uint16x8_t, p128] + - [poly16x8_t, p128] + - [int8x16_t, p128] + - [uint8x16_t, p128] + compose: + - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [poly8x8_t, int8x8_t] + - [poly16x4_t, int16x4_t] + - [uint16x4_t, int16x4_t] + - [uint32x2_t, int32x2_t] + - [uint64x1_t, int64x1_t] + - [uint8x16_t, int8x16_t] + - [poly8x16_t, int8x16_t] + - [poly16x8_t, int16x8_t] + - [uint16x8_t, int16x8_t] + - [uint32x4_t, int32x4_t] + - [uint64x2_t, int64x2_t] + - [poly8x8_t, uint8x8_t] + - [int8x8_t, uint8x8_t] + - [poly16x4_t, uint16x4_t] + - [int16x4_t, uint16x4_t] + - [int32x2_t, uint32x2_t] + - [int64x1_t, uint64x1_t] + - [poly8x16_t, uint8x16_t] + - [int8x16_t, uint8x16_t] + - [poly16x8_t, uint16x8_t] + - [int16x8_t, uint16x8_t] + - [int32x4_t, uint32x4_t] + - [int64x2_t, uint64x2_t] + - [int8x8_t, poly8x8_t] + - [uint8x8_t, poly8x8_t] + - [int16x4_t, poly16x4_t] + - [uint16x4_t, poly16x4_t] + - [int8x16_t, poly8x16_t] + - [uint8x16_t, poly8x16_t] + - [int16x8_t, poly16x8_t] + - [uint16x8_t, poly16x8_t] + - [int16x4_t, int8x8_t] + - [uint16x4_t, int8x8_t] + - [poly16x4_t, int8x8_t] + - [int32x2_t, int16x4_t] + - [uint32x2_t, int16x4_t] + - [int64x1_t, int32x2_t] + - [uint64x1_t, int32x2_t] + - [int16x8_t, int8x16_t] + - [uint16x8_t, int8x16_t] + - [poly16x8_t, int8x16_t] + - [int32x4_t, int16x8_t] + - [uint32x4_t, int16x8_t] + - [int64x2_t, int32x4_t] + - [uint64x2_t, int32x4_t] + - [poly16x4_t, uint8x8_t] + - [int16x4_t, uint8x8_t] + - [uint16x4_t, uint8x8_t] + - [int32x2_t, uint16x4_t] + - [uint32x2_t, uint16x4_t] + - [int64x1_t, uint32x2_t] + - [uint64x1_t, uint32x2_t] + - [poly16x8_t, uint8x16_t] + - [int16x8_t, uint8x16_t] + - [uint16x8_t, uint8x16_t] + - [int32x4_t, uint16x8_t] + - [uint32x4_t, uint16x8_t] + - [int64x2_t, uint32x4_t] + - [uint64x2_t, uint32x4_t] + - [poly16x4_t, poly8x8_t] + - [int16x4_t, poly8x8_t] + - [uint16x4_t, poly8x8_t] + - [int32x2_t, poly16x4_t] + - [uint32x2_t, poly16x4_t] + - [poly16x8_t, poly8x16_t] + - [int16x8_t, poly8x16_t] + - [uint16x8_t, poly8x16_t] + - [int32x4_t, poly16x8_t] + - [uint32x4_t, poly16x8_t] + - [poly8x8_t, int16x4_t] + - [int8x8_t, int16x4_t] + - [uint8x8_t, int16x4_t] + - [poly16x4_t, int32x2_t] + - [int16x4_t, int32x2_t] + - [uint16x4_t, int32x2_t] + - [int32x2_t, int64x1_t] + - [uint32x2_t, int64x1_t] + - [poly8x16_t, int16x8_t] + - [int8x16_t, int16x8_t] + - [uint8x16_t, int16x8_t] + - [poly16x8_t, int32x4_t] + - [int16x8_t, int32x4_t] + - [uint16x8_t, int32x4_t] + - [int32x4_t, int64x2_t] + - [uint32x4_t, int64x2_t] + - [poly8x8_t, uint16x4_t] + - [int8x8_t, uint16x4_t] + - [uint8x8_t, uint16x4_t] + - [poly16x4_t, uint32x2_t] + - [int16x4_t, uint32x2_t] + - [uint16x4_t, uint32x2_t] + - [int32x2_t, uint64x1_t] + - [uint32x2_t, uint64x1_t] + - [poly8x16_t, uint16x8_t] + - [int8x16_t, uint16x8_t] + - [uint8x16_t, uint16x8_t] + - [poly16x8_t, uint32x4_t] + - [int16x8_t, uint32x4_t] + - [uint16x8_t, uint32x4_t] + - [int32x4_t, uint64x2_t] + - [uint32x4_t, uint64x2_t] + - [poly8x8_t, poly16x4_t] + - [int8x8_t, poly16x4_t] + - [uint8x8_t, poly16x4_t] + - [poly8x16_t, poly16x8_t] + - [int8x16_t, poly16x8_t] + - [uint8x16_t, poly16x8_t] + - [int32x2_t, int8x8_t] + - [uint32x2_t, int8x8_t] + - [int64x1_t, int16x4_t] + - [uint64x1_t, int16x4_t] + - [int32x4_t, int8x16_t] + - [uint32x4_t, int8x16_t] + - [int64x2_t, int16x8_t] + - [uint64x2_t, int16x8_t] + - [int32x2_t, uint8x8_t] + - [uint32x2_t, uint8x8_t] + - [int64x1_t, uint16x4_t] + - [uint64x1_t, uint16x4_t] + - [int32x4_t, uint8x16_t] + - [uint32x4_t, uint8x16_t] + - [int64x2_t, uint16x8_t] + - [uint64x2_t, uint16x8_t] + - [int32x2_t, poly8x8_t] + - [uint32x2_t, poly8x8_t] + - [int64x1_t, poly16x4_t] + - [uint64x1_t, poly16x4_t] + - [int32x4_t, poly8x16_t] + - [uint32x4_t, poly8x16_t] + - [int64x2_t, poly16x8_t] + - [uint64x2_t, poly16x8_t] + - [poly8x8_t, int32x2_t] + - [int8x8_t, int32x2_t] + - [uint8x8_t, int32x2_t] + - [poly16x4_t, int64x1_t] + - [int16x4_t, int64x1_t] + - [uint16x4_t, int64x1_t] + - [poly8x16_t, int32x4_t] + - [int8x16_t, int32x4_t] + - [uint8x16_t, int32x4_t] + - [poly16x8_t, int64x2_t] + - [int16x8_t, int64x2_t] + - [uint16x8_t, int64x2_t] + - [poly8x8_t, uint32x2_t] + - [int8x8_t, uint32x2_t] + - [uint8x8_t, uint32x2_t] + - [poly16x4_t, uint64x1_t] + - [int16x4_t, uint64x1_t] + - [uint16x4_t, uint64x1_t] + - [poly8x16_t, uint32x4_t] + - [int8x16_t, uint32x4_t] + - [uint8x16_t, uint32x4_t] + - [poly16x8_t, uint64x2_t] + - [int16x8_t, uint64x2_t] + - [uint16x8_t, uint64x2_t] + - [int64x1_t, int8x8_t] + - [uint64x1_t, int8x8_t] + - [int64x1_t, uint8x8_t] + - [uint64x1_t, uint8x8_t] + - [int64x1_t, poly8x8_t] + - [uint64x1_t, poly8x8_t] + - [int64x2_t, int8x16_t] + - [uint64x2_t, int8x16_t] + - [int64x2_t, uint8x16_t] + - [uint64x2_t, uint8x16_t] + - [int64x2_t, poly8x16_t] + - [uint64x2_t, poly8x16_t] + - [poly8x8_t, int64x1_t] + - [int8x8_t, int64x1_t] + - [uint8x8_t, int64x1_t] + - [poly8x8_t, uint64x1_t] + - [int8x8_t, uint64x1_t] + - [uint8x8_t, uint64x1_t] + - [poly8x16_t, int64x2_t] + - [int8x16_t, int64x2_t] + - [uint8x16_t, int64x2_t] + - [poly8x16_t, uint64x2_t] + - [int8x16_t, uint64x2_t] + - [uint8x16_t, uint64x2_t] + - [float32x2_t, int8x8_t] + - [float32x2_t, int16x4_t] + - [float32x2_t, int32x2_t] + - [float32x2_t, int64x1_t] + - [float32x4_t, int8x16_t] + - [float32x4_t, int16x8_t] + - [float32x4_t, int32x4_t] + - [float32x4_t, int64x2_t] + - [float32x2_t, uint8x8_t] + - [float32x2_t, uint16x4_t] + - [float32x2_t, uint32x2_t] + - [float32x2_t, uint64x1_t] + - [float32x4_t, uint8x16_t] + - [float32x4_t, uint16x8_t] + - [float32x4_t, uint32x4_t] + - [float32x4_t, uint64x2_t] + - [float32x2_t, poly8x8_t] + - [float32x2_t, poly16x4_t] + - [float32x4_t, poly8x16_t] + - [float32x4_t, poly16x8_t] + - [float32x4_t, p128] + - [int8x8_t, float32x2_t] + - [int16x4_t, float32x2_t] + - [int32x2_t, float32x2_t] + - [int64x1_t, float32x2_t] + - [int8x16_t, float32x4_t] + - [int16x8_t, float32x4_t] + - [int32x4_t, float32x4_t] + - [int64x2_t, float32x4_t] + - [uint8x8_t, float32x2_t] + - [uint16x4_t, float32x2_t] + - [uint32x2_t, float32x2_t] + - [uint64x1_t, float32x2_t] + - [uint8x16_t, float32x4_t] + - [uint16x8_t, float32x4_t] + - [uint32x4_t, float32x4_t] + - [uint64x2_t, float32x4_t] + - [poly8x8_t, float32x2_t] + - [poly16x4_t, float32x2_t] + - [poly8x16_t, float32x4_t] + - [poly16x8_t, float32x4_t] + - [p128, float32x4_t] + compose: + - FnCall: [transmute, [a]] + + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + # non-q + - [float32x2_t, float16x4_t] + - [poly16x4_t, float16x4_t] + - [poly8x8_t, float16x4_t] + - [int8x8_t, float16x4_t] + - [int16x4_t, float16x4_t] + - [int32x2_t, float16x4_t] + - [int64x1_t, float16x4_t] + - [uint8x8_t, float16x4_t] + - [uint16x4_t, float16x4_t] + - [uint32x2_t, float16x4_t] + - [uint64x1_t, float16x4_t] + - [float16x4_t, float32x2_t] + - [float16x4_t, poly16x4_t] + - [float16x4_t, poly8x8_t] + - [float16x4_t, int8x8_t] + - [float16x4_t, int16x4_t] + - [float16x4_t, int32x2_t] + - [float16x4_t, int64x1_t] + - [float16x4_t, uint8x8_t] + - [float16x4_t, uint16x4_t] + - [float16x4_t, uint32x2_t] + - [float16x4_t, uint64x1_t] + # q + - [float32x4_t, float16x8_t] + - [poly16x8_t, float16x8_t] + - [poly8x16_t, float16x8_t] + - [int8x16_t, float16x8_t] + - [int16x8_t, float16x8_t] + - [int32x4_t, float16x8_t] + - [int64x2_t, float16x8_t] + - [uint8x16_t, float16x8_t] + - [uint16x8_t, float16x8_t] + - [uint32x4_t, float16x8_t] + - [uint64x2_t, float16x8_t] + - [float16x8_t, float32x4_t] + - [float16x8_t, poly16x8_t] + - [float16x8_t, poly8x16_t] + - [float16x8_t, int8x16_t] + - [float16x8_t, int16x8_t] + - [float16x8_t, int32x4_t] + - [float16x8_t, int64x2_t] + - [float16x8_t, uint8x16_t] + - [float16x8_t, uint16x8_t] + - [float16x8_t, uint32x4_t] + - [float16x8_t, uint64x2_t] + compose: + - FnCall: [transmute, [a]] + + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [poly64x1_t, float16x4_t] + - [float16x4_t, poly64x1_t] + # q + - [poly64x2_t, float16x8_t] + - [poly128_t, float16x8_t] + - [float16x8_t, poly128_t] + - [float16x8_t, poly64x2_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vrev64{neon_type[0].no}" + doc: Reverse elements in 64-bit doublewords + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrev64]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [rev64]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, "[3, 2, 1, 0]"] + - [float16x8_t, "[3, 2, 1, 0, 7, 6, 5, 4]"] + compose: + - FnCall: [simd_shuffle!, [a, a, "{type[1]}"]] + + - name: "vrshl{neon_type.no}" + doc: "Signed rounding shift left" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrshl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [srshl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + - int64x1_t + - int64x2_t + compose: + - LLVMLink: + name: "vrshl{neon_type.no}" + links: + - link: "llvm.arm.neon.vrshifts.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.srshl.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrshl{neon_type[0].no}" + doc: "Unsigned rounding shift left" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrshl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [urshl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [uint8x16_t, int8x16_t] + - [uint16x4_t, int16x4_t] + - [uint16x8_t, int16x8_t] + - [uint32x2_t, int32x2_t] + - [uint32x4_t, int32x4_t] + - [uint64x1_t, int64x1_t] + - [uint64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vrshl{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vrshiftu.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.urshl.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vrshr{neon_type[0].N}" + doc: "Signed rounding shift right" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrshr, 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [srshr, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, 'N >= 1 && N <= 8'] + - [int8x16_t, 'N >= 1 && N <= 8'] + - [int16x4_t, 'N >= 1 && N <= 16'] + - [int16x8_t, 'N >= 1 && N <= 16'] + - [int32x2_t, 'N >= 1 && N <= 32'] + - [int32x4_t, 'N >= 1 && N <= 32'] + - [int64x1_t, 'N >= 1 && N <= 64'] + - [int64x2_t, 'N >= 1 && N <= 64'] + compose: + - FnCall: [static_assert!, ["{type[1]}"]] + - FnCall: + - "vrshl{neon_type[0].no}" + - - a + - FnCall: ["vdup{neon_type[0].N}", ['-N as _']] + + - name: "vrshr{neon_type[0].N}" + doc: "Unsigned rounding shift right" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrshr, N = 2]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [urshr, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, int8x8_t, 'N >= 1 && N <= 8'] + - [uint8x16_t, int8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, int16x4_t, 'N >= 1 && N <= 16'] + - [uint16x8_t, int16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, int32x2_t, 'N >= 1 && N <= 32'] + - [uint32x4_t, int32x4_t, 'N >= 1 && N <= 32'] + - [uint64x1_t, int64x1_t, 'N >= 1 && N <= 64'] + - [uint64x2_t, int64x2_t, 'N >= 1 && N <= 64'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - FnCall: + - "vrshl{neon_type[0].no}" + - - a + - FnCall: ["vdup{neon_type[1].N}", ['-N as _']] + + - name: "vrshrn_n_{neon_type[0]}" + doc: "Rounding shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vrshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }'] + - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }'] + - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vrshrn_n_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: {neon_type[0]}" + links: + - link: "llvm.arm.neon.vrshiftn.{neon_type[1]}" + arch: arm + - FnCall: ["_vrshrn_n_{neon_type[0]}", [a, "{type[3]}"], [], true] + + - name: "vrshrn_n_{neon_type[0]}" + doc: "Rounding shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8'] + - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16'] + - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - LLVMLink: + name: "vrshrn_n_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.rshrn.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ["_vrshrn_n_{neon_type[0]}", [a, N], [], true] + + - name: "vrshrn_n_{neon_type[0]}" + doc: "Rounding shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrshrn, N = 2]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [rshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', s16] + - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', s32] + - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', s64] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - FnCall: + - transmute + - - FnCall: + - "vrshrn_n_{type[3]}::" + - - FnCall: [transmute, [a]] + + - name: "vrsra{neon_type[0].N}" + doc: "Signed rounding shift right and accumulate" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsra, 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [srsra, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, 'N >= 1 && N <= 8'] + - [int8x16_t, 'N >= 1 && N <= 8'] + - [int16x4_t, 'N >= 1 && N <= 16'] + - [int16x8_t, 'N >= 1 && N <= 16'] + - [int32x2_t, 'N >= 1 && N <= 32'] + - [int32x4_t, 'N >= 1 && N <= 32'] + - [int64x1_t, 'N >= 1 && N <= 64'] + - [int64x2_t, 'N >= 1 && N <= 64'] + compose: + - FnCall: [static_assert!, ["{type[1]}"]] + - FnCall: + - simd_add + - - a + - FnCall: ["vrshr{neon_type[0].N}::", [b]] + + - name: "vrsubhn_{neon_type[0]}" + doc: "Rounding subtract returning high narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsubhn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [rsubhn]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x8_t, int16x8_t, int8x8_t] + - [int32x4_t, int32x4_t, int16x4_t] + - [int64x2_t, int64x2_t, int32x2_t] + compose: + - LLVMLink: + name: "vrsubhn_{neon_type[0]}" + links: + - link: "llvm.arm.neon.vrsubhn.{neon_type[2]}" + arch: arm + - link: "llvm.aarch64.neon.rsubhn.{neon_type[2]}" + arch: aarch64,arm64ec + + - name: "vrsubhn_{neon_type[0]}" + doc: "Rounding subtract returning high narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsubhn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [rsubhn]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint16x8_t, uint16x8_t, uint8x8_t, s16] + - [uint32x4_t, uint32x4_t, uint16x4_t, s32] + - [uint64x2_t, uint64x2_t, uint32x2_t, s64] + compose: + - FnCall: + - transmute + - - FnCall: + - "vrsubhn_{type[3]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vcreate_{neon_type[1]}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ["u64", int8x8_t] + - ["u64", int16x4_t] + - ["u64", int32x2_t] + - ["u64", int64x1_t] + - ["u64", uint8x8_t] + - ["u64", uint16x4_t] + - ["u64", uint32x2_t] + - ["u64", uint64x1_t] + - ["u64", poly8x8_t] + - ["u64", poly16x4_t] + - ["u64", float32x2_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vcreate_{neon_type[1]}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - ["u64", float16x4_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vcreate_p64" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ["u64", poly64x1_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vset{neon_type[1].lane_nox}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, LANE = 0]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["i8", int8x8_t, '3'] + - ["i16", int16x4_t, '2'] + - ["i32", int32x2_t, '1'] + - ["u8", uint8x8_t, '3'] + - ["u16", uint16x4_t, '2'] + - ["u32", uint32x2_t, '1'] + - ["p8", poly8x8_t, '3'] + - ["p16", poly16x4_t, '2'] + - ["i8", int8x16_t, '4'] + - ["i16", int16x8_t, '3'] + - ["i32", int32x4_t, '2'] + - ["i64", int64x2_t, '1'] + - ["u8", uint8x16_t, '4'] + - ["u16", uint16x8_t, '3'] + - ["u32", uint32x4_t, '2'] + - ["u64", uint64x2_t, '1'] + - ["p8", poly8x16_t, '4'] + - ["p16", poly16x8_t, '3'] + - ["f32", float32x2_t, '1'] + - ["f32", float32x4_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + + - name: "vset{neon_type[1].lane_nox}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, LANE = 0]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["f16", float16x4_t, '2'] + - ["f16", float16x8_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + + - name: "vset_lane_{neon_type[0]}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["i64", int64x1_t, int64x1_t] + - ["u64", uint64x1_t, uint64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + - name: "vset_lane_{neon_type[0]}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["p64", poly64x1_t, poly64x1_t] + compose: + - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + - name: "vsetq_lane_p64" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - ["p64", poly64x2_t, poly64x2_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '1']] + - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + - name: "vshl{neon_type.no}" + doc: "Signed Shift left" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vshl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sshl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + - int64x1_t + - int64x2_t + compose: + - LLVMLink: + name: "vshl{neon_type.no}" + links: + - link: "llvm.arm.neon.vshifts.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.sshl.{neon_type}" + arch: aarch64,arm64ec + + - name: "vshl{neon_type[0].no}" + doc: "Unsigned Shift left" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vshl]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ushl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [uint8x16_t, int8x16_t] + - [uint16x4_t, int16x4_t] + - [uint16x8_t, int16x8_t] + - [uint32x2_t, int32x2_t] + - [uint32x4_t, int32x4_t] + - [uint64x1_t, int64x1_t] + - [uint64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vshl{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vshiftu.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.ushl.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vshll_n_s8" + doc: "Signed shift left long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vshll.s8"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sshll, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, int16x8_t, 'N >= 0 && N <= 8'] + compose: + - FnCall: [static_assert!, ["{type[2]}"]] + - FnCall: + - simd_shl + - - FnCall: [simd_cast, [a]] + - FnCall: [vdupq_n_s16, ['N as _']] + + - name: "vshll_n_s16" + doc: "Signed shift left long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vshll.s16"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sshll, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x4_t, int32x4_t] + compose: + - FnCall: [static_assert!, ["N >= 0 && N <= 16"]] + - FnCall: + - simd_shl + - - FnCall: [simd_cast, [a]] + - FnCall: [vdupq_n_s32, ['N as _']] + + - name: "vshll_n_s32" + doc: "Signed shift left long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vshll.s32"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sshll, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int32x2_t, int64x2_t] + compose: + - FnCall: [static_assert!, ["N >= 0 && N <= 32"]] + - FnCall: + - simd_shl + - - FnCall: [simd_cast, [a]] + - FnCall: [vdupq_n_s64, ['N as _']] + + - name: "vshll_n_u8" + doc: "Signed shift left long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vshll.u8"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ushll, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, uint16x8_t] + compose: + - FnCall: [static_assert!, ["N >= 0 && N <= 8"]] + - FnCall: + - simd_shl + - - FnCall: [simd_cast, [a]] + - FnCall: [vdupq_n_u16, ['N as _']] + + - name: "vshll_n_u16" + doc: "Signed shift left long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vshll.u16"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ushll, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint16x4_t, uint32x4_t] + compose: + - FnCall: [static_assert!, ["N >= 0 && N <= 16"]] + - FnCall: + - simd_shl + - - FnCall: [simd_cast, [a]] + - FnCall: [vdupq_n_u32, ['N as _']] + + - name: "vshll_n_u32" + doc: "Signed shift left long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vshll.u32"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ushll, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint32x2_t, uint64x2_t] + compose: + - FnCall: [static_assert!, ["N >= 0 && N <= 32"]] + - FnCall: + - simd_shl + - - FnCall: [simd_cast, [a]] + - FnCall: [vdupq_n_u64, ['N as _']] + + - name: "vshr{neon_type[0].N}" + doc: "Shift right" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vshr.{neon_type[0]}"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sshr, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, 'N >= 1 && N <= 8', 'let n: i32 = if N == 8 { 7 }', 'else { N };'] + - [int8x16_t, 'N >= 1 && N <= 8', 'let n: i32 = if N == 8 { 7 }', 'else { N };'] + - [int16x4_t, 'N >= 1 && N <= 16', 'let n: i32 = if N == 16 { 15 }', 'else { N };'] + - [int16x8_t, 'N >= 1 && N <= 16', 'let n: i32 = if N == 16 { 15 }', 'else { N };'] + - [int32x2_t, 'N >= 1 && N <= 32', 'let n: i32 = if N == 32 { 31 }', 'else { N };'] + - [int32x4_t, 'N >= 1 && N <= 32', 'let n: i32 = if N == 32 { 31 }', 'else { N };'] + - [int64x1_t, 'N >= 1 && N <= 64', 'let n: i32 = if N == 64 { 63 }', 'else { N };'] + - [int64x2_t, 'N >= 1 && N <= 64', 'let n: i32 = if N == 64 { 63 }', 'else { N };'] + compose: + - FnCall: [static_assert!, ["{type[1]}"]] + - Identifier: ["{type[2]}{type[3]}", Symbol] + - FnCall: + - simd_shr + - - a + - FnCall: ["vdup{neon_type[0].N}", ['n as _']] + + - name: "vshr{neon_type[0].N}" + doc: "Shift right" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vshr.{neon_type[0]}"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ushr, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, 'N >= 1 && N <= 8', 'let n: i32 = if N == 8 { return vdup_n_u8(0); }', 'else { N };'] + - [uint8x16_t, 'N >= 1 && N <= 8', 'let n: i32 = if N == 8 { return vdupq_n_u8(0); }', 'else { N };'] + - [uint16x4_t, 'N >= 1 && N <= 16', 'let n: i32 = if N == 16 { return vdup_n_u16(0); }', 'else { N };'] + - [uint16x8_t, 'N >= 1 && N <= 16', 'let n: i32 = if N == 16 { return vdupq_n_u16(0); }', 'else { N };'] + - [uint32x2_t, 'N >= 1 && N <= 32', 'let n: i32 = if N == 32 { return vdup_n_u32(0); }', 'else { N };'] + - [uint32x4_t, 'N >= 1 && N <= 32', 'let n: i32 = if N == 32 { return vdupq_n_u32(0); }', 'else { N };'] + - [uint64x1_t, 'N >= 1 && N <= 64', 'let n: i32 = if N == 64 { return vdup_n_u64(0); }', 'else { N };'] + - [uint64x2_t, 'N >= 1 && N <= 64', 'let n: i32 = if N == 64 { return vdupq_n_u64(0); }', 'else { N };'] + compose: + - FnCall: [static_assert!, ["{type[1]}"]] + - Identifier: ['{type[2]}{type[3]}', Symbol] + - FnCall: + - simd_shr + - - a + - FnCall: ["vdup{neon_type[0].N}", ['n as _']] + + - name: "vshrn_n_{neon_type[0]}" + doc: "Shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vshrn{type[2]}"', 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [shrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int16x8_t, int8x8_t, '.i16', 'N >= 1 && N <= 8'] + - [uint16x8_t, uint8x8_t, '.i16', 'N >= 1 && N <= 8'] + - [int32x4_t, int16x4_t, '.i32', 'N >= 1 && N <= 16'] + - [uint32x4_t, uint16x4_t, '.i32', 'N >= 1 && N <= 16'] + - [int64x2_t, int32x2_t, '.i64', 'N >= 1 && N <= 32'] + - [uint64x2_t, uint32x2_t, '.i64', 'N >= 1 && N <= 32'] + compose: + - FnCall: [static_assert!, ["{type[3]}"]] + - FnCall: + - simd_cast + - - FnCall: + - simd_shr + - - a + - FnCall: ["vdupq_n_{neon_type[0]}", ['N as _']] + + - name: "vsra{neon_type[0].N}" + doc: "Signed shift right and accumulate" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsra, 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ssra, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, 'N >= 1 && N <= 8'] + - [int8x16_t, 'N >= 1 && N <= 8'] + - [int16x4_t, 'N >= 1 && N <= 16'] + - [int16x8_t, 'N >= 1 && N <= 16'] + - [int32x2_t, 'N >= 1 && N <= 32'] + - [int32x4_t, 'N >= 1 && N <= 32'] + - [int64x1_t, 'N >= 1 && N <= 64'] + - [int64x2_t, 'N >= 1 && N <= 64'] + compose: + - FnCall: [static_assert!, ["{type[1]}"]] + - FnCall: + - simd_add + - - a + - FnCall: ["vshr{neon_type[0].N}::", [b]] + + - name: "vtrn{neon_type[0].no}" + doc: "Transpose elements" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, int8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [int16x4_t, int16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] + - [int8x16_t, int8x16x2_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]', '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]'] + - [int16x8_t, int16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [int32x4_t, int32x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] + - [uint8x8_t, uint8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [uint16x4_t, uint16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] + - [uint8x16_t, uint8x16x2_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]', '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]'] + - [uint16x8_t, uint16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [uint32x4_t, uint32x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] + - [poly8x8_t, poly8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [poly16x4_t, poly16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] + - [poly8x16_t, poly8x16x2_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]', '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]'] + - [poly16x8_t, poly16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] + - [float32x4_t, float32x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] + compose: + - Let: + - a1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[2]}"]] + - Let: + - b1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - Identifier: ['(a1, b1)', Symbol] + + + - name: "vtrn{neon_type[0].no}" + doc: "Transpose elements" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, float16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] + - [float16x8_t, float16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] + compose: + - Let: + - a1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[2]}"]] + - Let: + - b1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - Identifier: ['(a1, b1)', Symbol] + + + - name: "vtrn{neon_type[0].no}" + doc: "Transpose elements" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]'] + - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]'] + - [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]'] + compose: + - Let: + - a1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[2]}"]] + - Let: + - b1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - Identifier: ['(a1, b1)', Symbol] + + - name: "vzip{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vorr]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x16_t, int8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] + - [int16x8_t, int16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [int32x4_t, int32x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] + - [uint8x16_t, uint8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] + - [uint16x8_t, uint16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [uint32x4_t, uint32x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] + - [poly8x16_t, poly8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] + - [poly16x8_t, poly16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [float32x4_t, float32x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + + - name: "vzip{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]'] + - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]'] + - [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + + - name: "vzip{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vzip]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, int8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [int16x4_t, int16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] + - [uint8x8_t, uint8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [uint16x4_t, uint16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] + - [poly8x8_t, poly8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] + - [poly16x4_t, poly16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + + + - name: "vzip{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vzip.16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, float16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] + - [float16x8_t, float16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + + - name: "vuzp{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t, int8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [int16x4_t, int16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] + - [int8x16_t, int8x16x2_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]', '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]'] + - [int16x8_t, int16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [int32x4_t, int32x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] + - [uint8x8_t, uint8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [uint16x4_t, uint16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] + - [uint8x16_t, uint8x16x2_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]', '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]'] + - [uint16x8_t, uint16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [uint32x4_t, uint32x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] + - [poly8x8_t, poly8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [poly16x4_t, poly16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] + - [poly8x16_t, poly8x16x2_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]', '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]'] + - [poly16x8_t, poly16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] + - [float32x4_t, float32x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + + + - name: "vuzp{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, float16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] + - [float16x8_t, float16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + + + - name: "vuzp{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]'] + - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]'] + - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + + - name: "vabal_{neon_type[1]}" + doc: "Unsigned Absolute difference and Accumulate Long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabal.{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uabal]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint16x8_t, uint8x8_t, "u8"] + - [uint32x4_t, uint16x4_t, "u16"] + - [uint64x2_t, uint32x2_t, "u32"] + compose: + - Let: [d, "{neon_type[1]}", {FnCall: ["vabd_{type[2]}", [b, c]]}] + - FnCall: [simd_add, [a, {FnCall: [simd_cast, [d]]}]] + + - name: "vabal_{neon_type[1]}" + doc: "Signed Absolute difference and Accumulate Long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabal.{neon_type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sabal]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x8_t, int8x8_t, uint8x8_t] + - [int32x4_t, int16x4_t, uint16x4_t] + - [int64x2_t, int32x2_t, uint32x2_t] + compose: + - Let: [d, "{type[1]}", {FnCall: ["vabd_{neon_type[1]}", [b, c]]}] + - Let: [e, "{type[2]}", {FnCall: ["simd_cast", [d]]}] + - FnCall: [simd_add, [a, {FnCall: [simd_cast, [e]]}]] + + - name: "vqabs{neon_type.no}" + doc: Signed saturating Absolute value + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqabs.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqabs]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int8x16_t + - int16x4_t + - int16x8_t + - int32x2_t + - int32x4_t + compose: + - LLVMLink: + name: "sqabs.{neon_type}" + links: + - link: "llvm.aarch64.neon.sqabs.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vqabs.{neon_type}" + arch: arm + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st1]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*mut u8", uint8x8x2_t, int8x8x2_t] + - ["*mut u16", uint16x4x2_t, int16x4x2_t] + - ["*mut u32", uint32x2x2_t, int32x2x2_t] + - ["*mut u64", uint64x1x2_t, int64x1x2_t] + - ["*mut u8", uint8x16x2_t, int8x16x2_t] + - ["*mut u16", uint16x8x2_t, int16x8x2_t] + - ["*mut u32", uint32x4x2_t, int32x4x2_t] + - ["*mut u64", uint64x2x2_t, int64x2x2_t] + - ["*mut u8", uint8x8x3_t, int8x8x3_t] + - ["*mut u16", uint16x4x3_t, int16x4x3_t] + - ["*mut u32", uint32x2x3_t, int32x2x3_t] + - ["*mut u64", uint64x1x3_t, int64x1x3_t] + - ["*mut u8", uint8x16x3_t, int8x16x3_t] + - ["*mut u16", uint16x8x3_t, int16x8x3_t] + - ["*mut u32", uint32x4x3_t, int32x4x3_t] + - ["*mut u64", uint64x2x3_t, int64x2x3_t] + - ["*mut u8", uint8x8x4_t, int8x8x4_t] + - ["*mut u16", uint16x4x4_t, int16x4x4_t] + - ["*mut u32", uint32x2x4_t, int32x2x4_t] + - ["*mut u64", uint64x1x4_t, int64x1x4_t] + - ["*mut u8", uint8x16x4_t, int8x16x4_t] + - ["*mut u16", uint16x8x4_t, int16x8x4_t] + - ["*mut u32", uint32x4x4_t, int32x4x4_t] + - ["*mut u64", uint64x2x4_t, int64x2x4_t] + - ["*mut p8", poly8x8x2_t, int8x8x2_t] + - ["*mut p8", poly8x8x3_t, int8x8x3_t] + - ["*mut p8", poly8x8x4_t, int8x8x4_t] + - ["*mut p8", poly8x16x2_t, int8x16x2_t] + - ["*mut p8", poly8x16x3_t, int8x16x3_t] + - ["*mut p8", poly8x16x4_t, int8x16x4_t] + - ["*mut p16", poly16x4x2_t, int16x4x2_t] + - ["*mut p16", poly16x4x3_t, int16x4x3_t] + - ["*mut p16", poly16x4x4_t, int16x4x4_t] + - ["*mut p16", poly16x8x2_t, int16x8x2_t] + - ["*mut p16", poly16x8x3_t, int16x8x3_t] + - ["*mut p16", poly16x8x4_t, int16x8x4_t] + compose: + - FnCall: + - "vst1{neon_type[2].no}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st1]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*mut p64", poly64x1x2_t, int64x1x2_t] + compose: + - FnCall: + - "vst1{neon_type[2].no}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st1]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*mut p64", poly64x1x3_t, int64x1x3_t] + - ["*mut p64", poly64x1x4_t, int64x1x4_t] + - ["*mut p64", poly64x2x2_t, int64x2x2_t] + - ["*mut p64", poly64x2x3_t, int64x2x3_t] + - ["*mut p64", poly64x2x4_t, int64x2x4_t] + compose: + - FnCall: + - "vst1{neon_type[2].no}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]] + - *neon-arm-unstable + safety: + unsafe: [neon] + types: + - ['*mut f32', float32x2x2_t, float32x2_t] + - ['*mut f32', float32x4x2_t, float32x4_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "ptr: {type[0]}" + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + links: + - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.{neon_type[2]}.p0" + arch: arm + - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1']] + + + # vst1_f16_x2 - arm + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*mut f16', float16x4x2_t, float16x4_t] + - ['*mut f16', float16x8x2_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "ptr: {type[0]}" + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + links: + - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0.{neon_type[2]}" + arch: arm + - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1']] + + + # vst1_f16_x2 - aarch64 + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*mut f16", float16x4x2_t, float16x4_t] + - ["*mut f16", float16x8x2_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x2.{neon_type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', a]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*mut f16', float16x4x3_t, float16x4_t] + - ['*mut f16', float16x8x3_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "ptr: {type[0]}" + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + links: + - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0.{neon_type[2]}" + arch: arm + - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1', 'b.2']] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-stable + safety: + unsafe: [neon] + types: + - ["*mut f32", float32x2x2_t, float32x2_t] + - ["*mut f32", float32x4x2_t, float32x4_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x2.{neon_type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', a]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-stable + safety: + unsafe: [neon] + types: + - ["*mut f32", float32x2x3_t, float32x2_t] + - ["*mut f32", float32x4x3_t, float32x4_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x3.{neon_type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', a]] + + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*mut f16", float16x4x3_t, float16x4_t] + - ["*mut f16", float16x8x3_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x3.{neon_type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', a]] + + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-stable + safety: + unsafe: [neon] + types: + - ["*mut f32", float32x2x4_t, float32x2_t] + - ["*mut f32", float32x4x4_t, float32x4_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "d: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x4.{neon_type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'b.3', a]] + + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*mut f16", float16x4x4_t, float16x4_t] + - ["*mut f16", float16x8x4_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "d: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x4.{neon_type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'b.3', a]] + + +# - name: "vst1{neon_type[1].no}" +# doc: "Store a single-element structures to one register." +# arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] +# attr: +# - *neon-v7 +# - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst1]]}]] +# - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st1]]}]] +# - *neon-fp16 +# - *neon-unstable-f16 +# safety: +# unsafe: [neon] +# types: +# - ["*mut f16", float16x4_t] +# - ["*mut f16", float16x8_t] +# compose: +# - FnCall: [core::ptr::write_unaligned, ['ptr.cast()', a]] + + - name: "vfms{neon_type.no}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vfms]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmls]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - Let: [b, "{neon_type}", {FnCall: [simd_neg, [b]]}] + - FnCall: ["vfma{neon_type.no}", [a, b, c]] + + - name: "vmul{neon_type[0].no}" + doc: "Polynomial multiply" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [pmul]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [poly8x8_t, int8x8_t] + - [poly8x16_t, int8x16_t] + compose: + - LLVMLink: + name: "vmul{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vmulp.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.pmul.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vmls{neon_type.no}" + doc: "Floating-point multiply-subtract from accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmls.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - FnCall: [simd_sub, [a, {FnCall: [simd_mul, [b, c]]}]] + + - name: "vcge{neon_type.no}" + doc: "Compare unsigned greater than or equal" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmhs]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + compose: + - FnCall: [simd_ge, [a, b]] + + - name: "vcge{neon_type[0].no}" + doc: "Floating-point compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - FnCall: [simd_ge, [a, b]] + + - name: "vcge{neon_type[0].no}" + doc: "Floating-point compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_ge, [a, b]] + + + - name: "vcgez{neon_type[0].no}" + doc: "Floating-point compare greater than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_ge + - - a + - FnCall: [transmute, [b]] + + - name: "vclt{neon_type.no}" + doc: "Compare unsigned less than" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmhi]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + compose: + - FnCall: [simd_lt, [a, b]] + + - name: "vtst{neon_type[0].no}" + doc: "Unsigned compare bitwise Test bits nonzero" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtst]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmtst]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, u8x8, 'u8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [uint8x16_t, u8x16, 'u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)'] + - [uint16x4_t, u16x4, 'u16x4::new(0, 0, 0, 0)'] + - [uint16x8_t, u16x8, 'u16x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] + - [uint32x2_t, u32x2, 'u32x2::new(0, 0)'] + - [uint32x4_t, u32x4, 'u32x4::new(0, 0, 0, 0)'] + compose: + - Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}] + - Let: [d, "{type[1]}", "{type[2]}"] + - FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]] + + - name: "vshl{neon_type[0].N}" + doc: "Shift left" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vshl, 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [shl, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, '3'] + - [int8x16_t, '3'] + - [int16x4_t, '4'] + - [int16x8_t, '4'] + - [int32x2_t, '5'] + - [int32x4_t, '5'] + - [uint8x8_t, '3'] + - [uint8x16_t, '3'] + - [uint16x4_t, '4'] + - [uint16x8_t, '4'] + - [uint32x2_t, '5'] + - [uint32x4_t, '5'] + - [int64x1_t, '6'] + - [int64x2_t, '6'] + - [uint64x1_t, '6'] + - [uint64x2_t, '6'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[1]}"]] + - FnCall: + - simd_shl + - - a + - FnCall: ["vdup{neon_type[0].N}", ['N as _']] + + - name: "vsra{neon_type[0].N}" + doc: "Unsigned shift right and accumulate" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsra, 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usra, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, '8'] + - [uint8x16_t, '8'] + - [uint16x4_t, '16'] + - [uint16x8_t, '16'] + - [uint32x2_t, '32'] + - [uint32x4_t, '32'] + - [uint64x1_t, '64'] + - [uint64x2_t, '64'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[1]}']] + - FnCall: + - simd_add + - - a + - FnCall: ["vshr{neon_type[0].N}::", [b]] + + - name: "vrsra{neon_type[0].N}" + doc: "Unsigned rounding shift right and accumulate" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsra, 'N = 2']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ursra, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint8x8_t, '8'] + - [uint8x16_t, '8'] + - [uint16x4_t, '16'] + - [uint16x8_t, '16'] + - [uint32x2_t, '32'] + - [uint32x4_t, '32'] + - [uint64x1_t, '64'] + - [uint64x2_t, '64'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[1]}']] + - FnCall: + - simd_add + - - a + - FnCall: ["vrshr{neon_type[0].N}::", [b]] + + - name: "vqrshrn_n_{neon_type[0]}" + doc: "Unsigned signed saturating rounded shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqrshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }'] + - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }'] + - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']] + - LLVMLink: + name: "vqrshrn{neon_type[0].N}" + arguments: + - "a: {neon_type[0]}" + - "n: {neon_type[0]}" + links: + - link: "llvm.arm.neon.vqrshiftnu.{neon_type[1]}" + arch: arm + - FnCall: ["_vqrshrn_n{neon_type[0].noq}", ["a", "{type[3]}"], [], true] + + - name: "vqrshrn_n_{neon_type[0]}" + doc: "Unsigned signed saturating rounded shift right narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [uint16x8_t, uint8x8_t, '8'] + - [uint32x4_t, uint16x4_t, '16'] + - [uint64x2_t, uint32x2_t, '32'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']] + - LLVMLink: + name: "vqrshrn_n_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.uqrshrn.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ["_vqrshrn_n_{neon_type[0]}", ["a", N], [], true] + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzu]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - LLVMLink: + name: "vcvt{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.fptoui.sat.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.fptoui.sat.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: + - simd_cast + - - a + + - name: "vcvt_f16_{neon_type[0]}" + doc: "Floating-point convert to lower precision narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt.f16.f32]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtn]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float32x4_t, float16x4_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt_f32_f16" + doc: "Floating-point convert to higher precision long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtl]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, float32x4_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vmla{neon_type[0].N}" + doc: "Vector multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmla.i16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mla]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, "i16", int16x4_t] + - [int16x8_t, "i16", int16x8_t] + - [uint16x4_t, "u16", uint16x4_t] + - [uint16x8_t, "u16", uint16x8_t] + compose: + - FnCall: + - "vmla{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [c]] + + - name: "vmla{neon_type[0].N}" + doc: "Vector multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmla.i32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mla]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x2_t, "i32", int32x2_t] + - [int32x4_t, "i32", int32x4_t] + - [uint32x2_t, "u32", uint32x2_t] + - [uint32x4_t, "u32", uint32x4_t] + compose: + - FnCall: + - "vmla{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [c]] + + - name: "vmla{neon_type[0].N}" + doc: "Vector multiply accumulate with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmla.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, "f32", float32x2_t] + - [float32x4_t, "f32", float32x4_t] + compose: + - FnCall: ["vmla{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [c]]}]] + + - name: "vmla{type[0]}" + doc: "Vector multiply accumulate with scalar" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmla.i16"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mla, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - "vmla{neon_type[1].no}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vmla{type[0]}" + doc: "Vector multiply accumulate with scalar" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmla.i32"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mla, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - "vmla{neon_type[1].no}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vmla{type[0]}" + doc: "Vector multiply accumulate with scalar" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmla.f32"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - "vmla{neon_type[1].no}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vmls{neon_type[0].N}" + doc: "Vector multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmls.i16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mls]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, "i16", int16x4_t] + - [int16x8_t, "i16", int16x8_t] + - [uint16x4_t, "u16", uint16x4_t] + - [uint16x8_t, "u16", uint16x8_t] + compose: + - FnCall: + - "vmls{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [c]] + + - name: "vmls{neon_type[0].N}" + doc: "Vector multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmls.i32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mls]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int32x2_t, "i32", int32x2_t] + - [int32x4_t, "i32", int32x4_t] + - [uint32x2_t, "u32", uint32x2_t] + - [uint32x4_t, "u32", uint32x4_t] + compose: + - FnCall: + - "vmls{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [c]] + + - name: "vmls{neon_type[0].N}" + doc: "Vector multiply subtract with scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmls.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, "f32", float32x2_t] + - [float32x4_t, "f32", float32x4_t] + compose: + - FnCall: ["vmls{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [c]]}]] + + - name: "vmls{type[0]}" + doc: "Vector multiply subtract with scalar" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmls.i16"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mls, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - "vmls{neon_type[1].no}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vmls{type[0]}" + doc: "Vector multiply subtract with scalar" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmls.i32"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mls, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - "vmls{neon_type[1].no}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vmls{type[0]}" + doc: "Vector multiply subtract with scalar" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmls.f32"', 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - "vmls{neon_type[1].no}" + - - a + - b + - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + + - name: "vmul{neon_type[0].N}" + doc: "Vector multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mul]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, "i16"] + - [int16x8_t, "i16"] + - [int32x2_t, "i32"] + - [int32x4_t, "i32"] + - [uint16x4_t, "u16"] + - [uint16x8_t, "u16"] + - [uint32x2_t, "u32"] + - [uint32x4_t, "u32"] + compose: + - FnCall: + - simd_mul + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + + - name: "vmul{neon_type[0].N}" + doc: "Vector multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, "f32"] + - [float32x4_t, "f32"] + compose: + - FnCall: + - simd_mul + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + + + - name: "vmul{neon_type[0].N}" + doc: "Vector multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, "f16"] + - [float16x8_t, "f16"] + compose: + - FnCall: + - simd_mul + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + + + - name: "vmul{type[2]}" + doc: "Floating-point multiply" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float32x2_t, float32x2_t, '_lane_f32', '1', '[LANE as u32, LANE as u32]'] + - [float32x2_t, float32x4_t, '_laneq_f32', '2', '[LANE as u32, LANE as u32]'] + - [float32x4_t, float32x2_t, 'q_lane_f32', '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [float32x4_t, float32x4_t, 'q_laneq_f32', '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: + - simd_mul + - - a + - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]] + + - name: "vqrdmulh{type[0]}" + doc: "Vector rounding saturating doubling multiply high by scalar" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqrdmulh, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqrdmulh, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] + - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - Let: [b, "{neon_type[1]}", {FnCall: [simd_shuffle!, [b, b, '{type[4]}']]}] + - FnCall: ["vqrdmulh{neon_type[1].no}", [a, b]] + + - name: "vqrdmulh{neon_type[0].N}" + doc: "Vector saturating rounding doubling multiply high with scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqrdmulh]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqrdmulh]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x4_t, "i16"] + - [int16x8_t, "i16"] + - [int32x2_t, "i32"] + - [int32x4_t, "i32"] + compose: + - FnCall: + - "vqrdmulh{neon_type[0].no}" + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + + - name: "vclt{neon_type[0].no}" + doc: "Floating-point compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.f32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + compose: + - FnCall: [simd_lt, [a, b]] + + - name: "vclt{neon_type[0].no}" + doc: "Floating-point compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_lt, [a, b]] + + + - name: "vcltz{neon_type[0].no}" + doc: "Floating-point compare less than" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vclt.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmlt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_lt + - - a + - FnCall: [transmute, [b]] + + - name: "vabdl_{neon_type[0]}" + doc: "Unsigned Absolute difference Long" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabdl.{neon_type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uabdl]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, uint16x8_t] + - [uint16x4_t, uint32x4_t] + - [uint32x2_t, uint64x2_t] + compose: + - FnCall: [simd_cast, [{FnCall: ["vabd_{neon_type[0]}", [a, b]]}]] + + - name: "vmull_lane{neon_type[1].no}" + doc: "Vector long multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmull, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smull, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [int16x4_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int16x4_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x2_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32]'] + - [int32x2_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vmull_{neon_type[0]}" + - - a + - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]] + + - name: "vmull_lane{neon_type[1].no}" + doc: "Vector long multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmull, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umull, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]'] + - [uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] + - FnCall: + - "vmull_{neon_type[0]}" + - - a + - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]] + + - name: "vfms{neon_type[0].N}" + doc: "Floating-point fused Multiply-subtract to accumulator(vector)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vfms]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmls]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, "f32"] + - [float32x4_t, "f32"] + compose: + - FnCall: + - "vfms{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}_vfp4", [c]] + + + - name: "vfms{neon_type.no}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmls]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - Let: [b, "{neon_type}", {FnCall: [simd_neg, [b]]}] + - FnCall: ["vfma{neon_type.no}", [a, b, c]] + + - name: "vqdmulh{neon_type[0].laneq_nox}" + doc: "Vector saturating doubling multiply high by scalar" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmulh, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmulh, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: safe + types: + - [int16x8_t, int16x8_t, '3'] + - [int16x4_t, int16x8_t, '3'] + - [int32x4_t, int32x4_t, '2'] + - [int32x2_t, int32x4_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - FnCall: + - "vqdmulh{neon_type[0].no}" + - - a + - FnCall: + - "vdup{neon_type[0].N}" + - - FnCall: [simd_extract!, [b, 'LANE as u32']] + + - name: "vrecpe{neon_type.no}" + doc: "Unsigned reciprocal estimate" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrecpe]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [urecpe]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint32x2_t + - uint32x4_t + compose: + - LLVMLink: + name: "vrecpe{neon_type.no}" + links: + - link: "llvm.arm.neon.vrecpe.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.urecpe.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrsqrte{neon_type.no}" + doc: "Unsigned reciprocal square root estimate" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsqrte]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ursqrte]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint32x2_t + - uint32x4_t + compose: + - LLVMLink: + name: "vrsqrte{neon_type.no}" + links: + - link: "llvm.arm.neon.vrsqrte.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.ursqrte.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrsqrte{neon_type.no}" + doc: "Reciprocal square-root estimate." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsqrte]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frsqrte]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + - float32x4_t + compose: + - LLVMLink: + name: "vrsqrte{neon_type.no}" + links: + - link: "llvm.arm.neon.vrsqrte.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frsqrte.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrsqrte{neon_type.no}" + doc: "Reciprocal square-root estimate." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - *neon-fp16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsqrte]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frsqrte]]}]] + - *neon-unstable-f16 + safety: safe + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrsqrte{neon_type.no}" + links: + - link: "llvm.arm.neon.vrsqrte.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frsqrte.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vqshlu{neon_type[0].N}" + doc: "Signed saturating shift left unsigned" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqshlu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-arm-unstable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }'] + - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }'] + - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N as i32, N as i32]) }'] + - [int64x1_t, uint64x1_t, '6', 'const { int64x1_t([N as i64]) }'] + - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }'] + - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) }'] + - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }'] + - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64, N as i64]) }'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]] + - LLVMLink: + name: "vqshlu{neon_type[0].N}" + arguments: + - "a: {neon_type[0]}" + - "n: {neon_type[0]}" + links: + - link: "llvm.arm.neon.vqshiftsu.{neon_type[0]}" + arch: arm + - FnCall: ["_vqshlu{neon_type[0].N}", [a, "{type[3]}"], [], true] + + - name: "vqshlu{neon_type[0].N}" + doc: "Signed saturating shift left unsigned" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshlu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-stable + static_defs: ['const N: i32'] + safety: safe + types: + - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }'] + - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }'] + - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N as i32, N as i32]) }'] + - [int64x1_t, uint64x1_t, '6', 'const { int64x1_t([N as i64]) }'] + - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }'] + - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) }'] + - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }'] + - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64, N as i64]) }'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]] + - LLVMLink: + name: "vqshlu{neon_type[0].N}" + arguments: + - "a: {neon_type[0]}" + - "n: {neon_type[0]}" + links: + - link: "llvm.aarch64.neon.sqshlu.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vqshlu{neon_type[0].N}", [a, "{type[3]}"], [], true] + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzs]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, int32x2_t] + - [float32x4_t, int32x4_t] + compose: + - LLVMLink: + name: "vcvt{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - FnCall: + - simd_cast + - - a + + - name: "vqmovn_{neon_type[0]}" + doc: "Unsigned saturating extract narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqmovn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uqxtn]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint16x8_t, uint8x8_t] + - [uint32x4_t, uint16x4_t] + - [uint64x2_t, uint32x2_t] + compose: + - LLVMLink: + name: "vqmovn_{neon_type[1]}" + links: + - link: "llvm.arm.neon.vqmovnu.{neon_type[1]}" + arch: arm + - link: "llvm.aarch64.neon.uqxtn.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vcle{neon_type.no}" + doc: "Compare unsigned less than or equal" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.{neon_type}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cmhs]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint8x16_t + - uint16x4_t + - uint16x8_t + - uint32x2_t + - uint32x4_t + compose: + - FnCall: [simd_le, [a, b]] + + - name: "vld4{neon_type[1].dup_nox}" + doc: "Load single 4-element structure and replicate to all lanes of four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld4]]}]] + - *neon-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x4_t, int8x8_t, '1'] + - ["*const i16", int16x4x4_t, int16x4_t, '2'] + - ["*const i32", int32x2x4_t, int32x2_t, '4'] + - ["*const i8", int8x16x4_t, int8x16_t, '1'] + - ["*const i16", int16x8x4_t, int16x8_t, '2'] + - ["*const i32", int32x4x4_t, int32x4_t, '4'] + - ["*const f32", float32x2x4_t, float32x2_t, '4'] + - ["*const f32", float32x4x4_t, float32x4_t, '4'] + compose: + - LLVMLink: + name: "vld4{neon_type[1].dup_nox}" + arguments: + - "ptr: *const i8" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4dup.{neon_type[2]}.p0" + arch: arm + - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as *const i8', "{type[3]}"]] + + - name: "vld4{neon_type[1].dup_nox}" + doc: "Load single 4-element structure and replicate to all lanes of four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]] + - *neon-stable + safety: + unsafe: [neon] + types: + - ["*const i8", int8x8x4_t, int8x8_t] + - ["*const i16", int16x4x4_t, int16x4_t] + - ["*const i32", int32x2x4_t, int32x2_t] + - ["*const i8", int8x16x4_t, int8x16_t] + - ["*const i16", int16x8x4_t, int16x8_t] + - ["*const i32", int32x4x4_t, int32x4_t] + - ["*const i64", int64x1x4_t, int64x1_t] + - ["*const f32", float32x2x4_t, float32x2_t] + - ["*const f32", float32x4x4_t, float32x4_t] + compose: + - LLVMLink: + name: "vld4{neon_type[1].dup_nox}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld4r.{neon_type[2]}.p0.p0" + arch: aarch64,arm64ec + - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as _']] + + - name: "vld4{neon_type[1].dup_nox}" + doc: "Load single 4-element structure and replicate to all lanes of four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]] + - *neon-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const i64", int64x1x4_t] + compose: + - LLVMLink: + name: "vld4{neon_type[1].dup_nox}" + arguments: + - "ptr: *const i8" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4dup.v1i64.p0" + arch: arm + - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as *const i8', '8']] + + - name: "vld4{neon_type[1].dup_nox}" + doc: "Load single 4-element structure and replicate to all lanes of four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4r]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const u8", uint8x8x4_t, int8x8x4_t] + - ["*const u16", uint16x4x4_t, int16x4x4_t] + - ["*const u32", uint32x2x4_t, int32x2x4_t] + - ["*const u8", uint8x16x4_t, int8x16x4_t] + - ["*const u16", uint16x8x4_t, int16x8x4_t] + - ["*const u32", uint32x4x4_t, int32x4x4_t] + - ["*const p8", poly8x8x4_t, int8x8x4_t] + - ["*const p16", poly16x4x4_t, int16x4x4_t] + - ["*const p8", poly8x16x4_t, int8x16x4_t] + - ["*const p16", poly16x8x4_t, int16x8x4_t] + compose: + - FnCall: + - "transmute" + - - FnCall: ["vld4{neon_type[2].dup_nox}", [{FnCall: [transmute, [a]]}]] + + - name: "vld4{neon_type[1].dup_nox}" + doc: "Load single 4-element structure and replicate to all lanes of four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4r]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const u64", uint64x1x4_t, int64x1x4_t] + compose: + - FnCall: + - "transmute" + - - FnCall: ["vld4{neon_type[2].dup_nox}", [{FnCall: [transmute, [a]]}]] + + - name: "vld4{neon_type[1].dup_nox}" + doc: "Load single 4-element structure and replicate to all lanes of four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-aes + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4r]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ["*const p64", poly64x1x4_t, int64x1x4_t] + compose: + - FnCall: + - "transmute" + - - FnCall: ["vld4{neon_type[2].dup_nox}", [{FnCall: [transmute, [a]]}]] + + - name: "vld1{type[0]}" + visibility: private + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[1]}", "b: {type[2]}"] + return_type: "{neon_type[3]}" + attr: + - *target-is-arm + - *enable-v7 + # - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - *neon-arm-unstable + safety: + unsafe: [neon] + types: + - ["_v8i8", "*const i8", "i32", "int8x8_t"] + - ["q_v16i8", "*const i8", "i32", "int8x16_t"] + - ["_v4i16", "*const i8", "i32", "int16x4_t"] + - ["q_v8i16", "*const i8", "i32", "int16x8_t"] + - ["_v2i32", "*const i8", "i32", "int32x2_t"] + - ["q_v4i32", "*const i8", "i32", "int32x4_t"] + - ["_v1i64", "*const i8", "i32", "int64x1_t"] + - ["q_v2i64", "*const i8", "i32", "int64x2_t"] + - ["_v2f32", "*const i8", "i32", "float32x2_t"] + - ["q_v4f32", "*const i8", "i32", "float32x4_t"] + compose: + - LLVMLink: + name: "vld1.{type[0]}" + links: + - link: "llvm.arm.neon.vld1.{neon_type[3]}" + arch: arm + - FnCall: ["_vld1{type[0]}", [a, b]] + + + - name: "vld1{type[0]}" + visibility: private + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[1]}", "b: {type[2]}"] + return_type: "{neon_type[3]}" + attr: + - *target-is-arm + - *enable-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["_v4f16", "*const i8", "i32", "float16x4_t"] + - ["q_v8f16", "*const i8", "i32", "float16x8_t"] + compose: + - LLVMLink: + name: "vld1.{type[0]}" + links: + - link: "llvm.arm.neon.vld1.{neon_type[3]}" + arch: arm + - FnCall: ["_vld1{type[0]}", [a, b]] + + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers." + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] + types: + - ['*const i8', int8x8_t, '"vld1.8"', 'crate::mem::align_of::() as i32', '_v8i8'] + - ['*const i8', int8x16_t, '"vld1.8"', 'crate::mem::align_of::() as i32', 'q_v16i8'] + - ['*const i16', int16x4_t, '"vld1.16"', 'crate::mem::align_of::() as i32', '_v4i16'] + - ['*const i16', int16x8_t, '"vld1.16"', 'crate::mem::align_of::() as i32', 'q_v8i16'] + - ['*const i32', int32x2_t, 'vldr', 'crate::mem::align_of::() as i32', '_v2i32'] + - ['*const i32', int32x4_t, '"vld1.32"', 'crate::mem::align_of::() as i32', 'q_v4i32'] + - ['*const i64', int64x1_t, 'vldr', 'crate::mem::align_of::() as i32', '_v1i64'] + - ['*const i64', int64x2_t, '"vld1.64"', 'crate::mem::align_of::() as i32', 'q_v2i64'] + compose: + - FnCall: + - "vld1{type[4]}" + - - 'ptr as *const i8' + - '{type[3]}' + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers." + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + safety: + unsafe: [neon] + attr: + - *target-is-arm + - FnCall: [target_feature, ['enable = "{type[3]}"']] + - *neon-arm-unstable + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] + types: + - ['*const u8', uint8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v8i8'] + - ['*const u8', uint8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v16i8'] + - ['*const u16', uint16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v4i16'] + - ['*const u16', uint16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v8i16'] + - ['*const u32', uint32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::() as i32', '_v2i32'] + - ['*const u32', uint32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v4i32'] + - ['*const u64', uint64x1_t, 'vldr', 'neon,v7', 'crate::mem::align_of::() as i32', '_v1i64'] + - ['*const u64', uint64x2_t, '"vld1.64"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v2i64'] + - ['*const p8', poly8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v8i8'] + - ['*const p8', poly8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v16i8'] + - ['*const p16', poly16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v4i16'] + - ['*const p16', poly16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v8i16'] + - ['*const p64', poly64x2_t, '"vld1.64"', 'neon,aes', 'crate::mem::align_of::() as i32', 'q_v2i64'] + - ['*const f32', float32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::() as i32', '_v2f32'] + - ['*const f32', float32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v4f32'] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld1{type[5]}" + - - 'ptr as *const i8' + - '{type[4]}' + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers." + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + safety: + unsafe: [neon] + attr: + - *target-is-arm + - FnCall: [target_feature, ['enable = "{type[3]}"']] + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] + types: + - ['*const f16', float16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v4f16'] + - ['*const f16', float16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v8f16'] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld1{type[5]}" + - - 'ptr as *const i8' + - '{type[4]}' + + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers." + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-aes + - *neon-arm-unstable + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vldr']]}]] + types: + - ['*const p64', poly64x1_t] + compose: + # Inlining seems broken for 'fn vld1_v1i64', this "fixes" it + - Let: [a, '*const i8', 'ptr as *const i8'] + - Let: [b, i32, 'crate::mem::align_of::() as i32'] + - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; }} transmute(_vld1_v1i64(a, b))' + + - name: "vtbx1" + visibility: private + doc: "Extended table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - "int8x8_t" + compose: + - LLVMLink: + name: "vtbx1" + links: + - link: "llvm.arm.neon.vtbx1" + arch: arm + + - name: "vtbx1_s8" + doc: "Extended table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - int8x8_t + compose: + - FnCall: [vtbx1, [a, b, c]] + + - name: "vtbx1{neon_type.no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: uint8x8_t"] + return_type: "{neon_type}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - uint8x8_t + - poly8x8_t + compose: + - FnCall: + - transmute + - - FnCall: + - vtbx1 + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + - FnCall: [transmute, [c]] + + - name: "vtbx2" + visibility: private + doc: "Extended table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}"] + return_type: "{neon_type}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - "int8x8_t" + compose: + - LLVMLink: + name: "vtbx2" + links: + - link: "llvm.arm.neon.vtbx2" + arch: arm + + - name: "vtbx2_s8" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - [int8x8_t, int8x8x2_t] + compose: + - FnCall: [vtbx2, [a, 'b.0', 'b.1', c]] + + - name: "vtbx2{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - [uint8x8_t, uint8x8x2_t, uint8x8_t] + - [poly8x8_t, poly8x8x2_t, uint8x8_t] + compose: + - FnCall: + - transmute + - - FnCall: + - vtbx2 + - - FnCall: [transmute, [a]] + - FnCall: [transmute, ['b.0']] + - FnCall: [transmute, ['b.1']] + - FnCall: [transmute, [c]] + + - name: "vtbx3" + visibility: private + doc: "Extended table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}", "e: {neon_type}"] + return_type: "{neon_type}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - "int8x8_t" + compose: + - LLVMLink: + name: "vtbx3" + links: + - link: "llvm.arm.neon.vtbx3" + arch: arm + + - name: "vtbx3_s8" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - [int8x8_t, int8x8x3_t] + compose: + - FnCall: [vtbx3, [a, 'b.0', 'b.1', 'b.2', c]] + + - name: "vtbx3{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - [uint8x8_t, uint8x8x3_t, uint8x8_t] + - [poly8x8_t, poly8x8x3_t, uint8x8_t] + compose: + - FnCall: + - transmute + - - FnCall: + - vtbx3 + - - FnCall: [transmute, [a]] + - FnCall: [transmute, ['b.0']] + - FnCall: [transmute, ['b.1']] + - FnCall: [transmute, ['b.2']] + - FnCall: [transmute, [c]] + + - name: "vtbx4" + visibility: private + doc: "Extended table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}", "e: {neon_type}", "f: {neon_type}"] + return_type: "{neon_type}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - "int8x8_t" + compose: + - LLVMLink: + name: "vtbx4" + links: + - link: "llvm.arm.neon.vtbx4" + arch: arm + + - name: "vtbx4{neon_type[0].noq}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - ["uint8x8_t", "uint8x8x4_t", "uint8x8_t"] + - ["poly8x8_t", "poly8x8x4_t", "uint8x8_t"] + compose: + - FnCall: + - "transmute" + - - FnCall: + - vtbx4 + - - FnCall: [transmute, [a]] + - FnCall: [transmute, ["b.0"]] + - FnCall: [transmute, ["b.1"]] + - FnCall: [transmute, ["b.2"]] + - FnCall: [transmute, ["b.3"]] + - FnCall: [transmute, [c]] + + - name: "vtbx4{neon_type[0].noq}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + assert_instr: [vtbx] + safety: safe + types: + - ["int8x8_t", "int8x8x4_t"] + big_endian_inverse: true + compose: + - FnCall: + - vtbx4 + - - a + - FnCall: [transmute, ["b.0"]] + - FnCall: [transmute, ["b.1"]] + - FnCall: [transmute, ["b.2"]] + - FnCall: [transmute, ["b.3"]] + - c + + - name: "vld4{neon_type[1].nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld4{neon_type[1].nox}" + - - "a as _" + - "2" + + - name: "vld4{neon_type[1].nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld4{neon_type[1].nox}" + - - "a as _" + + - name: "vld4{neon_type[1].dup_nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4dup.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld4{neon_type[1].dup_nox}" + - - "a as _" + - "2" + + + - name: "vld4{neon_type[1].dup_nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld4r.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld4{neon_type[1].dup_nox}" + - - "a as _" + + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld4', 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x4_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "ptr: *const f16" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "d: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld4{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "b.2" + - "b.3" + - "LANE" + - "2" + + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x4_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "d: {neon_type[3]}" + - "n: i64" + - "ptr: *const f16" + links: + - link: "llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld4{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "b.2" + - "b.3" + - "LANE as i64" + - "a as _" + + - name: "vcombine{neon_type[0].noq}" + doc: Join two smaller vectors into a single larger vector + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [float32x2_t, float32x4_t, '[0, 1, 2, 3]'] + - [poly8x8_t, poly8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [poly16x4_t, poly16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int8x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int32x4_t, '[0, 1, 2, 3]'] + - [int64x1_t, int64x2_t, '[0, 1]'] + - [uint8x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint32x4_t, '[0, 1, 2, 3]'] + - [uint64x1_t, uint64x2_t, '[0, 1]'] + - [poly64x1_t, poly64x2_t, '[0, 1]'] + compose: + - FnCall: [simd_shuffle!, [a, b, '{type[2]}']] + + - name: "vaeseq_u8" + doc: "AES single round encryption." + arguments: ["data: {neon_type}", "key: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "aes"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, [aese]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - uint8x16_t + compose: + - LLVMLink: + name: "vaeseq_u8" + links: + - link: "llvm.aarch64.crypto.aese" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.aese" + arch: arm + + - name: "vaesdq_u8" + doc: "AES single round encryption." + arguments: ["data: {neon_type}", "key: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "aes"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, [aesd]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - uint8x16_t + compose: + - LLVMLink: + name: "vaesdq_u8" + links: + - link: "llvm.aarch64.crypto.aesd" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.aesd" + arch: arm + + - name: "vaesmcq_u8" + doc: "AES mix columns." + arguments: ["data: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "aes"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [uint8x16_t, "aesmc"] + compose: + - LLVMLink: + name: "vaesmcq_u8" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vaesimcq_u8" + doc: "AES inverse mix columns." + arguments: ["data: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "aes"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [uint8x16_t, "aesimc"] + compose: + - LLVMLink: + name: "vaesimcq_u8" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha1h_u32" + doc: "SHA1 fixed rotate." + arguments: ["hash_e: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [u32, "sha1h"] + compose: + - LLVMLink: + name: "vsha1h_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha1cq_u32" + doc: "SHA1 hash update accelerator, choose." + arguments: ["hash_abcd: {neon_type[2]}", "hash_e: {type[0]}", "wk: {neon_type[2]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [u32, "sha1c", "uint32x4_t"] + compose: + - LLVMLink: + name: "vsha1cq_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha1mq_u32" + doc: "SHA1 hash update accelerator, majority" + arguments: ["hash_abcd: {neon_type[2]}", "hash_e: {type[0]}", "wk: {neon_type[2]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [u32, "sha1m", "uint32x4_t"] + compose: + - LLVMLink: + name: "vsha1mq_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha1pq_u32" + doc: "SHA1 hash update accelerator, parity" + arguments: ["hash_abcd: {neon_type[2]}", "hash_e: {type[0]}", "wk: {neon_type[2]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [u32, "sha1p", "uint32x4_t"] + compose: + - LLVMLink: + name: "vsha1pq_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha1su0q_u32" + doc: "SHA1 schedule update accelerator, first part." + arguments: ["w0_3: {neon_type[0]}", "w4_7: {neon_type[0]}", "w8_11: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [uint32x4_t, "sha1su0"] + compose: + - LLVMLink: + name: "vsha1su0q_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha1su1q_u32" + doc: "SHA1 schedule update accelerator, second part." + arguments: ["tw0_3: {neon_type[0]}", "w12_15: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [uint32x4_t, "sha1su1"] + compose: + - LLVMLink: + name: "vsha1su0q_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha256hq_u32" + doc: "SHA1 schedule update accelerator, first part." + arguments: ["hash_abcd: {neon_type[0]}", "hash_efgh: {neon_type[0]}", "wk: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [uint32x4_t, "sha256h"] + compose: + - LLVMLink: + name: "vsha256hq_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha256h2q_u32" + doc: "SHA1 schedule update accelerator, upper part." + arguments: ["hash_abcd: {neon_type[0]}", "hash_efgh: {neon_type[0]}", "wk: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [uint32x4_t, "sha256h2"] + compose: + - LLVMLink: + name: "vsha256h2q_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha256su0q_u32" + doc: "SHA256 schedule update accelerator, first part." + arguments: ["w0_3: {neon_type[0]}", "w4_7: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [uint32x4_t, "sha256su0"] + compose: + - LLVMLink: + name: "vsha256su0q_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "vsha256su1q_u32" + doc: "SHA256 schedule update accelerator, second part." + arguments: ["tw0_3: {neon_type[0]}", "w8_11: {neon_type[0]}", "w12_15: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "sha2"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]] + - *neon-cfg-arm-unstable + - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] + safety: safe + types: + - [uint32x4_t, "sha256su1"] + compose: + - LLVMLink: + name: "vsha256su1q_u32" + links: + - link: "llvm.aarch64.crypto.{type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.{type[1]}" + arch: arm + + - name: "__crc32b" + doc: "CRC32 single round checksum for bytes (8 bits)." + arguments: ["crc: {type[0]}", "data: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32b"]] }]] + - *arm-crc-unstable + - *aarch64-crc-stable + safety: safe + types: + - [u32, u8] + compose: + - LLVMLink: + name: "crc32b" + arguments: + - "crc: u32" + - "data: u32" + links: + - link: "llvm.aarch64.crc32b" + arch: aarch64,arm64ec + - link: "llvm.arm.crc32b" + arch: arm + - FnCall: ["___crc32b", ["crc", "data as u32"], [], true] + + - name: "__crc32h" + doc: "CRC32 single round checksum for bytes (16 bits)." + arguments: ["crc: {type[0]}", "data: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32h"]] }]] + - *arm-crc-unstable + - *aarch64-crc-stable + safety: safe + types: + - [u32, u16] + compose: + - LLVMLink: + name: "crc32h" + arguments: + - "crc: u32" + - "data: u32" + links: + - link: "llvm.aarch64.crc32h" + arch: aarch64,arm64ec + - link: "llvm.arm.crc32h" + arch: arm + - FnCall: ["___crc32h", ["crc", "data as u32"], [], true] + + - name: "__crc32w" + doc: "CRC32 single round checksum for bytes (32 bits)." + arguments: ["crc: {type}", "data: {type}"] + return_type: "{type}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32w"]] }]] + - *arm-crc-unstable + - *aarch64-crc-stable + safety: safe + types: + - u32 + compose: + - LLVMLink: + name: "crc32w" + links: + - link: "llvm.aarch64.crc32w" + arch: aarch64,arm64ec + - link: "llvm.arm.crc32w" + arch: arm + + - name: "__crc32cb" + doc: "CRC32-C single round checksum for bytes (8 bits)." + arguments: ["crc: {type[0]}", "data: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32cb"]] }]] + - *arm-crc-unstable + - *aarch64-crc-stable + safety: safe + types: + - [u32, u8] + compose: + - LLVMLink: + name: "crc32cb" + arguments: + - "crc: u32" + - "data: u32" + links: + - link: "llvm.aarch64.crc32cb" + arch: aarch64,arm64ec + - link: "llvm.arm.crc32cb" + arch: arm + - FnCall: ["___crc32cb", ["crc", "data as u32"], [], true] + + - name: "__crc32ch" + doc: "CRC32-C single round checksum for bytes (16 bits)." + arguments: ["crc: {type[0]}", "data: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32ch"]] }]] + - *arm-crc-unstable + - *aarch64-crc-stable + safety: safe + types: + - [u32, u16] + compose: + - LLVMLink: + name: "crc32ch" + arguments: + - "crc: u32" + - "data: u32" + links: + - link: "llvm.aarch64.crc32ch" + arch: aarch64,arm64ec + - link: "llvm.arm.crc32ch" + arch: arm + - FnCall: ["___crc32ch", ["crc", "data as u32"], [], true] + + - name: "__crc32cw" + doc: "CRC32-C single round checksum for bytes (32 bits)." + arguments: ["crc: {type}", "data: {type}"] + return_type: "{type}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *neon-v8 + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32cw"]] }]] + - *arm-crc-unstable + - *aarch64-crc-stable + safety: safe + types: + - u32 + compose: + - LLVMLink: + name: "crc32cw" + links: + - link: "llvm.aarch64.crc32cw" + arch: aarch64,arm64ec + - link: "llvm.arm.crc32cw" + arch: arm + + - name: "__crc32d" + doc: "CRC32 single round checksum for quad words (64 bits)." + arguments: ["crc: {type[0]}", "data: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *target-is-arm + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32w"]] }]] + - *arm-crc-unstable + safety: safe + types: + - [u32, u64] + compose: + # As the call to `__crc32` does not get inlined, we define an LLVM binding + # here, which is the same as above, and call it directly which results + # in the correct instructions being generated + - Let: [b, u32, '(data & 0xFFFFFFFF) as u32'] + - Let: [c, u32, '(data >> 32) as u32'] + - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] fn ___crc32w(crc: u32, data: u32) -> u32;}} unsafe {{ ___crc32w(___crc32w(crc, b), c) }}' + + - name: "__crc32cd" + doc: "CRC32-C single round checksum for quad words (64 bits)." + arguments: ["crc: {type[0]}", "data: {type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [target_feature, ['enable = "crc"']] + - *target-is-arm + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32cw"]] }]] + - *arm-crc-unstable + safety: safe + types: + - [u32, u64] + compose: + - Let: [b, u32, '(data & 0xFFFFFFFF) as u32'] + - Let: [c, u32, '(data >> 32) as u32'] + - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn ___crc32cw(crc: u32, data: u32) -> u32;}} unsafe {{ ___crc32cw(___crc32cw(crc, b), c) }}' + + - name: "vabs{neon_type.no}" + doc: "Absolute value (wrapping)." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vabs]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [abs]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int16x4_t + - int32x2_t + - int8x16_t + - int16x8_t + - int32x4_t + compose: + - LLVMLink: + name: "vabs{neon_type.no}" + links: + - link: "llvm.aarch64.neon.abs.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vabs.{neon_type}" + arch: arm + + - name: "vpmin{neon_type.no}" + doc: "Folding minimum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmin]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sminp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int16x4_t + - int32x2_t + compose: + - LLVMLink: + name: "vabs{neon_type.no}" + links: + - link: "llvm.aarch64.neon.sminp.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vpmins.{neon_type}" + arch: arm + + - name: "vpmin{neon_type.no}" + doc: "Folding minimum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmin]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uminp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint16x4_t + - uint32x2_t + compose: + - LLVMLink: + name: "vabs{neon_type.no}" + links: + - link: "llvm.aarch64.neon.uminp.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vpminu.{neon_type}" + arch: arm + + - name: "vpmin{neon_type.no}" + doc: "Folding minimum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmin]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fminp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + compose: + - LLVMLink: + name: "vabs{neon_type.no}" + links: + - link: "llvm.aarch64.neon.fminp.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vpmins.{neon_type}" + arch: arm + + - name: "vpmax{neon_type.no}" + doc: "Folding maximum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmax]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smaxp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int16x4_t + - int32x2_t + compose: + - LLVMLink: + name: "vabs{neon_type.no}" + links: + - link: "llvm.aarch64.neon.smaxp.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vpmaxs.{neon_type}" + arch: arm + + - name: "vpmax{neon_type.no}" + doc: "Folding maximum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmax]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umaxp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - uint8x8_t + - uint16x4_t + - uint32x2_t + compose: + - LLVMLink: + name: "vabs{neon_type.no}" + links: + - link: "llvm.aarch64.neon.umaxp.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vpmaxu.{neon_type}" + arch: arm + + - name: "vpmax{neon_type.no}" + doc: "Folding maximum of adjacent pairs" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmax]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmaxp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - float32x2_t + compose: + - LLVMLink: + name: "vabs{neon_type.no}" + links: + - link: "llvm.aarch64.neon.fmaxp.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vpmaxs.{neon_type}" + arch: arm + + - name: "vraddhn{neon_type[0].noq}" + doc: "Rounding Add returning High Narrow." + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int16x8_t, int8x8_t, 'vraddhn.i16'] + - [int32x4_t, int16x4_t, 'vraddhn.i32'] + - [int64x2_t, int32x2_t, 'vraddhn.i64'] + compose: + - LLVMLink: + name: "vraddhn{neon_type[0].noq}" + links: + - link: "llvm.aarch64.neon.raddhn.{neon_type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vraddhn.{neon_type[1]}" + arch: arm + + - name: "vraddhn{neon_type[0].noq}" + doc: "Rounding Add returning High Narrow." + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint16x8_t, uint8x8_t, 'vraddhn.i16', int16x8_t] + - [uint32x4_t, uint16x4_t, 'vraddhn.i32', int32x4_t] + - [uint64x2_t, uint32x2_t, 'vraddhn.i64', int64x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vraddhn{neon_type[3].noq}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vraddhn_high{neon_type[1].noq}" + doc: "Rounding Add returning High Narrow (high half)." + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t, '[0, 1, 2, 3]'] + compose: + - Let: + - x + - "{neon_type[0]}" + - FnCall: + - transmute + - - FnCall: + - "vraddhn{neon_type[4].noq}" + - - FnCall: [transmute, [b]] + - FnCall: [transmute, [c]] + - FnCall: ["simd_shuffle!", [a, x, '{type[5]}']] + + - name: "vraddhn_high{neon_type[1].noq}" + doc: "Rounding Add returning High Narrow (high half)." + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64', '[0, 1, 2, 3]'] + compose: + - Let: + - x + - FnCall: + - "vraddhn{neon_type[1].noq}" + - - b + - c + - FnCall: ["simd_shuffle!", [a, x, '{type[4]}']] + + - name: "vpadd{neon_type.no}" + doc: "Add pairwise." + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [addp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - int8x8_t + - int16x4_t + - int32x2_t + compose: + - LLVMLink: + name: "vpadd{neon_type.no}" + links: + - link: "llvm.aarch64.neon.addp.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vpadd.{neon_type}" + arch: arm + + - name: "vpadd{neon_type[0].no}" + doc: "Add pairwise." + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [addp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [uint16x4_t, int16x4_t] + - [uint32x2_t, int32x2_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vpadd{neon_type[1].no}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + # This was not publically exposed + - name: "priv_vpadal{neon_type[1].no}" + visibility: private + doc: "Signed Add and Accumulate Long Pairwise." + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] + - *neon-cfg-arm-unstable + types: + - [int16x4_t, int8x8_t, '"vpadal.s8"'] + - [int32x2_t, int16x4_t, '"vpadal.s16"'] + - [int64x1_t, int32x2_t, '"vpadal.s32"'] + - [int16x8_t, int8x16_t, '"vpadal.s8"'] + - [int32x4_t, int16x8_t, '"vpadal.s16"'] + - [int64x2_t, int32x4_t, '"vpadal.s32"'] + compose: + - LLVMLink: + name: "vpadal{neon_type[1].no}" + links: + - link: "llvm.arm.neon.vpadals.{neon_type[0]}.{neon_type[1]}" + arch: arm + + # This was not publically exposed + - name: "priv_vpadal{neon_type[1].no}" + visibility: private + doc: "Signed Add and Accumulate Long Pairwise." + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] + - *neon-cfg-arm-unstable + types: + - [uint16x4_t, uint8x8_t , '"vpadal.u8"'] + - [uint32x2_t, uint16x4_t, '"vpadal.u16"'] + - [uint64x1_t, uint32x2_t, '"vpadal.u32"'] + - [uint16x8_t, uint8x16_t, '"vpadal.u8"'] + - [uint32x4_t, uint16x8_t, '"vpadal.u16"'] + - [uint64x2_t, uint32x4_t, '"vpadal.u32"'] + compose: + - LLVMLink: + name: "vpadal{neon_type[1].no}" + links: + - link: "llvm.arm.neon.vpadalu.{neon_type[0]}.{neon_type[1]}" + arch: arm + + - name: "vpaddl{neon_type[0].no}" + doc: "Signed Add and Accumulate Long Pairwise." + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [saddlp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - [int8x8_t, int16x4_t , '"vpaddl.s8"'] + - [int16x4_t, int32x2_t, '"vpaddl.s16"'] + - [int32x2_t, int64x1_t, '"vpaddl.s32"'] + - [int8x16_t, int16x8_t, '"vpaddl.s8"'] + - [int16x8_t, int32x4_t, '"vpaddl.s16"'] + - [int32x4_t, int64x2_t, '"vpaddl.s32"'] + compose: + - LLVMLink: + name: "vpaddl{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.saddlp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vpaddls.{neon_type[1]}.{neon_type[0]}" + arch: arm + + - name: "vpaddl{neon_type[0].no}" + doc: "Unsigned Add and Accumulate Long Pairwise." + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uaddlp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - [uint8x8_t, uint16x4_t , '"vpaddl.u8"'] + - [uint16x4_t, uint32x2_t, '"vpaddl.u16"'] + - [uint32x2_t, uint64x1_t, '"vpaddl.u32"'] + - [uint8x16_t, uint16x8_t, '"vpaddl.u8"'] + - [uint16x8_t, uint32x4_t, '"vpaddl.u16"'] + - [uint32x4_t, uint64x2_t, '"vpaddl.u32"'] + compose: + - LLVMLink: + name: "vpaddl{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.uaddlp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vpaddlu.{neon_type[1]}.{neon_type[0]}" + arch: arm + + - name: "vpadal{neon_type[1].no}" + doc: "Signed Add and Accumulate Long Pairwise." + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sadalp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - [int16x4_t, int8x8_t, 'vpadal.s8', 'let x: int16x4_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_s8(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddl_s8(b), a);}'] + - [int32x2_t, int16x4_t, 'vpadal.s16', 'let x: int32x2_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_s16(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddl_s16(b), a);}'] + - [int64x1_t, int32x2_t, 'vpadal.s32', 'let x: int64x1_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_s32(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddl_s32(b), a);}'] + - [int16x8_t, int8x16_t, 'vpadal.s8', 'let x: int16x8_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_s8(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddlq_s8(b), a);}'] + - [int32x4_t, int16x8_t, 'vpadal.s16', 'let x: int32x4_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_s16(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddlq_s16(b), a);}'] + - [int64x2_t, int32x4_t, 'vpadal.s32', 'let x: int64x2_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_s32(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddlq_s32(b), a);}'] + compose: + - Identifier: ['{type[3]}', Symbol] + - Identifier: [x, Symbol] + + - name: "vpadal{neon_type[1].no}" + doc: "Unsigned Add and Accumulate Long Pairwise." + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uadalp]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - [uint16x4_t, uint8x8_t, 'vpadal.u8', 'let x: uint16x4_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_u8(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddl_u8(b), a);}'] + - [uint32x2_t, uint16x4_t, 'vpadal.u16', 'let x: uint32x2_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_u16(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddl_u16(b), a);}'] + - [uint64x1_t, uint32x2_t, 'vpadal.u32', 'let x: uint64x1_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_u32(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddl_u32(b), a);}'] + - [uint16x8_t, uint8x16_t, 'vpadal.u8', 'let x: uint16x8_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_u8(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddlq_u8(b), a);}'] + - [uint32x4_t, uint16x8_t, 'vpadal.u16', 'let x: uint32x4_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_u16(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddlq_u16(b), a);}'] + - [uint64x2_t, uint32x4_t, 'vpadal.u32', 'let x: uint64x2_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_u32(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe { x = simd_add(vpaddlq_u32(b), a);}'] + compose: + - Identifier: ['{type[3]}', Symbol] + - Identifier: [x, Symbol] + + - name: "vcnt{neon_type.no}" + doc: "Population count per byte." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + safety: safe + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcnt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cnt]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - int8x8_t + - int8x16_t + compose: + - FnCall: [simd_ctpop, [a]] + + - name: "vcnt{neon_type[0].no}" + doc: "Population count per byte." + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcnt]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cnt]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - [uint8x8_t, int8x8_t] + - [uint8x16_t, int8x16_t] + - [poly8x8_t, int8x8_t] + - [poly8x16_t, int8x16_t] + compose: + - FnCall: + - transmute + - - FnCall: + - "vcnt{neon_type[1].no}" + - - FnCall: + - transmute + - - a + + - name: "vmmla{neon_type[0].no}" + doc: "8-bit integer matrix multiply-accumulate" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *neon-i8mm + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smmla]]}]] + - *neon-unstable-i8mm + - *neon-cfg-arm-unstable + types: + - [int32x4_t, int8x16_t] + compose: + - LLVMLink: + name: "vmmla{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.smmla.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.smmla.{neon_type[0]}.{neon_type[1]}" + arch: arm + + - name: "vmmla{neon_type[0].no}" + doc: "8-bit integer matrix multiply-accumulate" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *neon-i8mm + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ummla]]}]] + - *neon-unstable-i8mm + - *neon-cfg-arm-unstable + types: + - [uint32x4_t, uint8x16_t] + compose: + - LLVMLink: + name: "vmmla{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.ummla.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.ummla.{neon_type[0]}.{neon_type[1]}" + arch: arm + + - name: "vusmmla{neon_type[0].no}" + doc: "Unsigned and signed 8-bit integer matrix multiply-accumulate" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *neon-i8mm + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usmmla]]}]] + - *neon-unstable-i8mm + - *neon-cfg-arm-unstable + types: + - [int32x4_t, uint8x16_t, int8x16_t] + compose: + - LLVMLink: + name: "vmmla{neon_type[0].no}" + links: + - link: "llvm.aarch64.neon.usmmla.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.usmmla.{neon_type[0]}.{neon_type[1]}" + arch: arm + + - name: "vtbl1" + visibility: private + doc: "Table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - int8x8_t + compose: + - LLVMLink: + name: "vtbl1" + links: + - link: "llvm.arm.neon.vtbl1" + arch: arm + + - name: "vtbl1_s8" + doc: "Table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - int8x8_t + compose: + - FnCall: [vtbl1, [a, b]] + + - name: "vtbl1{neon_type[0].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: uint8x8_t"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - [uint8x8_t, uint8x8_t] + - [poly8x8_t, poly8x8_t] + compose: + - FnCall: + - transmute + - - FnCall: + - vtbl1 + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + + - name: "vtbl2" + visibility: private + doc: "Table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - int8x8_t + compose: + - LLVMLink: + name: "vtbl2" + links: + - link: "llvm.arm.neon.vtbl2" + arch: arm + + - name: "vtbl2_s8" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - [int8x8x2_t, int8x8_t] + compose: + - FnCall: [vtbl2, ['a.0', 'a.1', b]] + + - name: "vtbl2{neon_type[1].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: uint8x8_t"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - [uint8x8x2_t, uint8x8_t] + - [poly8x8x2_t, poly8x8_t] + compose: + - FnCall: + - transmute + - - FnCall: + - vtbl2 + - - FnCall: [transmute, ['a.0']] + - FnCall: [transmute, ['a.1']] + - FnCall: [transmute, [b]] + + - name: "vtbl3" + visibility: private + doc: "Table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}"] + return_type: "{neon_type}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - int8x8_t + compose: + - LLVMLink: + name: "vtbl3" + links: + - link: "llvm.arm.neon.vtbl3" + arch: arm + + - name: "vtbl3_s8" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - [int8x8x3_t, int8x8_t] + compose: + - FnCall: [vtbl3, ['a.0', 'a.1', 'a.2', b]] + + - name: "vtbl3{neon_type[1].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: uint8x8_t"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - [uint8x8x3_t, uint8x8_t] + - [poly8x8x3_t, poly8x8_t] + compose: + - FnCall: + - transmute + - - FnCall: + - vtbl3 + - - FnCall: [transmute, ['a.0']] + - FnCall: [transmute, ['a.1']] + - FnCall: [transmute, ['a.2']] + - FnCall: [transmute, [b]] + + - name: "vtbl4" + visibility: private + doc: "Table look-up" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}", "e: {neon_type}"] + return_type: "{neon_type}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - int8x8_t + compose: + - LLVMLink: + name: "vtbl4" + links: + - link: "llvm.arm.neon.vtbl4" + arch: arm + + - name: "vtbl4_s8" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - [int8x8x4_t, int8x8_t] + compose: + - FnCall: [vtbl4, ['a.0', 'a.1', 'a.2', 'a.3', b]] + + - name: "vtbl4{neon_type[1].no}" + doc: "Table look-up" + arguments: ["a: {neon_type[0]}", "b: uint8x8_t"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + assert_instr: [vtbl] + types: + - [uint8x8x4_t, uint8x8_t] + - [poly8x8x4_t, poly8x8_t] + compose: + - FnCall: + - transmute + - - FnCall: + - vtbl4 + - - FnCall: [transmute, ['a.0']] + - FnCall: [transmute, ['a.1']] + - FnCall: [transmute, ['a.2']] + - FnCall: [transmute, ['a.3']] + - FnCall: [transmute, [b]] + + - name: "vst1{type[0]}" + visibility: private + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: {type[3]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]] + types: + - ['_v8i8', '* const i8', int8x8_t, i32, '8'] + - ['q_v16i8', '* const i8', int8x16_t, i32, '8'] + - ['_v4i16', '* const i8', int16x4_t, i32, '16'] + - ['q_v8i16', '* const i8', int16x8_t, i32, '16'] + - ['_v2i32', '* const i8', int32x2_t, i32, '32'] + - ['q_v4i32', '* const i8', int32x4_t, i32, '32'] + - ['_v1i64', '* const i8', int64x1_t, i32, '64'] + - ['q_v2i64', '* const i8', int64x2_t, i32, '64'] + - ['_v2f32', '* const i8', float32x2_t, i32, '32'] + - ['q_v4f32', '* const i8', float32x4_t, i32, '32'] + compose: + - LLVMLink: + name: "_vst1{type[0]}" + links: + - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0" + arch: arm + + - name: "vst1{type[0]}" + visibility: private + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: {type[3]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]] + types: + - ['_v4f16', '* const i8', float16x4_t, i32, '16'] + - ['q_v8f16', '* const i8', float16x8_t, i32, '16'] + compose: + - LLVMLink: + name: "_vst1{type[0]}" + links: + - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0" + arch: arm + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[2]}"']]}]] + types: + - ['*mut i8', int8x8_t, '8', 'a', 'crate::mem::align_of::() as i32', '_v8i8'] + - ['*mut i8', int8x16_t, '8', 'a', 'crate::mem::align_of::() as i32', 'q_v16i8'] + - ['*mut i16', int16x4_t, '16', 'a', 'crate::mem::align_of::() as i32', '_v4i16'] + - ['*mut i16', int16x8_t, '16', 'a', 'crate::mem::align_of::() as i32', 'q_v8i16'] + - ['*mut i32', int32x2_t, '32', 'a', 'crate::mem::align_of::() as i32', '_v2i32'] + - ['*mut i32', int32x4_t, '32', 'a', 'crate::mem::align_of::() as i32', 'q_v4i32'] + - ['*mut i64', int64x1_t, '64', 'a', 'crate::mem::align_of::() as i32', '_v1i64'] + - ['*mut i64', int64x2_t, '64', 'a', 'crate::mem::align_of::() as i32', 'q_v2i64'] + - ['*mut u8', uint8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v8i8'] + - ['*mut u8', uint8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v16i8'] + - ['*mut u16', uint16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v4i16'] + - ['*mut u16', uint16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v8i16'] + - ['*mut u32', uint32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v2i32'] + - ['*mut u32', uint32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v4i32'] + - ['*mut u64', uint64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v1i64'] + - ['*mut u64', uint64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v2i64'] + - ['*mut p8', poly8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v8i8'] + - ['*mut p8', poly8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v16i8'] + - ['*mut p16', poly16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v4i16'] + - ['*mut p16', poly16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v8i16'] + - ['*mut p64', poly64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v1i64'] + - ['*mut p64', poly64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v2i64'] + - ['*mut f32', float32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v2f32'] + - ['*mut f32', float32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v4f32'] + compose: + - FnCall: + - "vst1{type[5]}" + - - 'ptr as *const i8' + - '{type[3]}' + - '{type[4]}' + + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[2]}"']]}]] + types: + - ['*mut f16', float16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v4f16'] + - ['*mut f16', float16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v8f16'] + compose: + - FnCall: + - "vst1{type[5]}" + - - 'ptr as *const i8' + - '{type[3]}' + - '{type[4]}' + + + - name: "vshiftins{type[0]}" + visibility: private + doc: "Shift Right and Insert (immediate)" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + types: + - ['_v8i8', "int8x8_t", '8'] + - ['_v16i8', 'int8x16_t', '8'] + - ['_v4i16', 'int16x4_t', '16'] + - ['_v8i16', 'int16x8_t', '16'] + - ['_v2i32', 'int32x2_t', '32'] + - ['_v4i32', 'int32x4_t', '32'] + - ['_v1i64', 'int64x1_t', '64'] + - ['_v2i64', 'int64x2_t', '64'] + compose: + - LLVMLink: + name: "_vshiftins{type[0]}" + links: + - link: "llvm.arm.neon.vshiftins.{neon_type[1]}" + arch: arm + + - name: "vsri{neon_type[0].N}" + doc: "Shift Right and Insert (immediate)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *target-is-arm + - FnCall: [target_feature, ['enable = "{type[1]}"']] + - *neon-arm-unstable + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsri.{type[2]}"', 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const N: i32'] + types: + - [uint8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8', 'int8x8_t::splat', '-N as i8'] + - [uint8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8'] + - [uint16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16'] + - [uint16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16'] + - [uint32x2_t, "neon,v7", '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N'] + - [uint32x4_t, "neon,v7", '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N'] + - [uint64x1_t, "neon,v7", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64'] + - [uint64x2_t, "neon,v7", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64'] + - [poly8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8', 'int8x8_t::splat', '-N as i8'] + - [poly8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8'] + - [poly16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16'] + - [poly16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16'] + ## These live in ./crates/core_arch/src/arm/neon.rs + #- [poly64x1_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64'] + #- [poly64x2_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64'] + compose: + - FnCall: ["static_assert!", ['{type[3]}']] + - FnCall: + - 'transmute' + - - FnCall: + - "vshiftins_{type[4]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + - FnCall: ["{type[5]}", ["{type[6]}"]] + + - name: "vsri{neon_type[0].N}" + doc: "Shift Right and Insert (immediate)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + static_defs: ['const N: i32'] + attr: + - *enable-v7 + - *target-is-arm + - *neon-arm-unstable + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsri.{type[1]}"', 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + safety: safe + types: + - [int8x8_t, '8', '1 <= N && N <= 8', 'v8i8', 'int8x8_t::splat', '-N as i8'] + - [int8x16_t, '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8'] + - [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16'] + - [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16'] + - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N as i32'] + - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N as i32'] + - [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64'] + - [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64'] + compose: + - FnCall: ["static_assert!", ['{type[2]}']] + - FnCall: + - "vshiftins_{type[3]}" + - - a + - b + - FnCall: ["{type[4]}", ["{type[5]}"]] + + - name: "vsli{neon_type[0].N}" + doc: "Shift Left and Insert (immediate)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *target-is-arm + - FnCall: [target_feature, ['enable = "{type[1]}"']] + - *neon-arm-unstable + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsli.{type[2]}"', 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const N: i32'] + types: + - [uint8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8'] + - [uint8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8'] + - [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16'] + - [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16'] + - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N as i32'] + - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N as i32'] + - [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64'] + - [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64'] + - [poly8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8'] + - [poly8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8'] + - [poly16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16'] + - [poly16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16'] + ## These live in ./crates/core_arch/src/arm/neon.rs + #- [poly64x1_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64'] + #- [poly64x2_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64'] + compose: + - FnCall: ["{type[3]}", ['{type[4]}']] + - FnCall: + - 'transmute' + - - FnCall: + - "vshiftins_{type[5]}" + - - FnCall: [transmute, [a]] + - FnCall: [transmute, [b]] + - FnCall: ["{type[6]}", ["{type[7]}"]] + + - name: "vsli{neon_type[0].N}" + doc: "Shift Left and Insert (immediate)" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + safety: safe + attr: + - *target-is-arm + - *enable-v7 + - *neon-arm-unstable + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsli.{type[1]}"', 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const N: i32'] + types: + - [int8x8_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8'] + - [int8x16_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8'] + - [int16x4_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16'] + - [int16x8_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16'] + - [int32x2_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N'] + - [int32x4_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N'] + - [int64x1_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64'] + - [int64x2_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64'] + compose: + - FnCall: ["{type[2]}", ['{type[3]}']] + - FnCall: + - "vshiftins_{type[4]}" + - - a + - b + - FnCall: ["{type[5]}", ["{type[6]}"]] + + - name: "vcombine{neon_type[0].no}" + doc: Join two smaller vectors into a single larger vector + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: safe + types: + - [float16x4_t, float16x8_t] + compose: + - FnCall: [simd_shuffle!, [a, b, '[0, 1, 2, 3, 4, 5, 6, 7]']] + + - name: "vget_{type[2]}_{neon_type[0]}" + doc: Duplicate vector element to vector + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: safe + types: + - [float16x4_t, float16x8_t, 'low', "[0, 1, 2, 3]"] + - [float16x4_t, float16x8_t, 'high', "[4, 5, 6, 7]"] + compose: + - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]] + + - name: "vget{type[2]}" + doc: Duplicate vector element to scalar + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *neon-fp16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["1"]] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: safe + types: + - [float16x4_t, f16, '_lane_f16', '2'] + - [float16x8_t, f16, 'q_lane_f16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: [simd_extract!, [a, "LANE as u32"]] + + - name: "vmov{neon_type[0].N}" + doc: "Duplicate element to vector" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: safe + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - FnCall: ["vdup{neon_type[0].N}", [a]] + + - name: "{type[0]}" + doc: "Load one single-element structure to one lane of one register." + arguments: ["ptr: {type[1]}", "src: {neon_type[2]}"] + return_type: "{neon_type[2]}" + static_defs: ['const LANE: i32'] + attr: + - *neon-v7 + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ["{type[3]}", 'LANE = {type[4]}']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[5]}', 'LANE = {type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['vld1_lane_s8', '*const i8', 'int8x8_t', '"vld1.8"', '7', 'ld1', 'static_assert_uimm_bits!', 'LANE, 3'] + - ['vld1_lane_u8', '*const u8', 'uint8x8_t', '"vld1.8"', '7', 'ld1', 'static_assert_uimm_bits!', 'LANE, 3'] + - ['vld1_lane_p8', '*const p8', 'poly8x8_t', '"vld1.8"', '7', 'ld1', 'static_assert_uimm_bits!', 'LANE, 3'] + - ['vld1q_lane_s8', '*const i8', 'int8x16_t', '"vld1.8"', '15', 'ld1', 'static_assert_uimm_bits!', 'LANE, 4'] + - ['vld1q_lane_u8', '*const u8', 'uint8x16_t', '"vld1.8"', '15', 'ld1', 'static_assert_uimm_bits!', 'LANE, 4'] + - ['vld1q_lane_p8', '*const p8', 'poly8x16_t', '"vld1.8"', '15', 'ld1', 'static_assert_uimm_bits!', 'LANE, 4'] + - ['vld1_lane_s16', '*const i16', 'int16x4_t', '"vld1.16"', '3', 'ld1', 'static_assert_uimm_bits!', 'LANE, 2'] + - ['vld1_lane_u16', '*const u16', 'uint16x4_t', '"vld1.16"', '3', 'ld1', 'static_assert_uimm_bits!', 'LANE, 2'] + - ['vld1_lane_p16', '*const p16', 'poly16x4_t', '"vld1.16"', '3', 'ld1', 'static_assert_uimm_bits!', 'LANE, 2'] + - ['vld1q_lane_s16', '*const i16', 'int16x8_t', '"vld1.16"', '7', 'ld1', 'static_assert_uimm_bits!', 'LANE, 3'] + - ['vld1q_lane_u16', '*const u16', 'uint16x8_t', '"vld1.16"', '7', 'ld1', 'static_assert_uimm_bits!', 'LANE, 3'] + - ['vld1q_lane_p16', '*const p16', 'poly16x8_t', '"vld1.16"', '7', 'ld1', 'static_assert_uimm_bits!', 'LANE, 3'] + - ['vld1_lane_s32', '*const i32', 'int32x2_t', '"vld1.32"', '1', 'ld1', 'static_assert_uimm_bits!', 'LANE, 1'] + - ['vld1_lane_u32', '*const u32', 'uint32x2_t', '"vld1.32"', '1', 'ld1', 'static_assert_uimm_bits!', 'LANE, 1'] + - ['vld1_lane_f32', '*const f32', 'float32x2_t', '"vld1.32"', '1', 'ld1', 'static_assert_uimm_bits!', 'LANE, 1'] + - ['vld1q_lane_s32', '*const i32', 'int32x4_t', '"vld1.32"', '3', 'ld1', 'static_assert_uimm_bits!', 'LANE, 2'] + - ['vld1q_lane_u32', '*const u32', 'uint32x4_t', '"vld1.32"', '3', 'ld1', 'static_assert_uimm_bits!', 'LANE, 2'] + - ['vld1q_lane_f32', '*const f32', 'float32x4_t', '"vld1.32"', '3', 'ld1', 'static_assert_uimm_bits!', 'LANE, 2'] + - ['vld1_lane_s64', '*const i64', 'int64x1_t', 'vldr', '0', 'ldr', 'static_assert!', 'LANE == 0'] + - ['vld1_lane_u64', '*const u64', 'uint64x1_t', 'vldr', '0', 'ldr', 'static_assert!', 'LANE == 0'] + - ['vld1q_lane_s64', '*const i64', 'int64x2_t', 'vldr', '1', 'ld1', 'static_assert_uimm_bits!', 'LANE, 1'] + - ['vld1q_lane_u64', '*const u64', 'uint64x2_t', 'vldr', '1', 'ld1', 'static_assert_uimm_bits!', 'LANE, 1'] + compose: + - FnCall: ["{type[6]}", ["{type[7]}"]] + - FnCall: [simd_insert!, [src, 'LANE as u32', '*ptr']] + + - name: "{type[0]}" + doc: "Load one single-element structure to one lane of one register." + arguments: ["ptr: {type[1]}", "src: {neon_type[2]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-aes + - *neon-v7 + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ["{type[3]}", 'LANE = {type[4]}']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[5]}', 'LANE = {type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ['vld1_lane_p64', '*const p64', 'poly64x1_t', 'vldr', '0', 'ldr', 'static_assert!', 'LANE == 0'] + - ['vld1q_lane_p64', '*const p64', 'poly64x2_t', 'vldr', '1', 'ld1', 'static_assert_uimm_bits!', 'LANE, 1'] + compose: + - FnCall: ["{type[6]}", ["{type[7]}"]] + - FnCall: [simd_insert!, [src, 'LANE as u32', '*ptr']] + + - name: "{type[0]}" + doc: "Load one single-element structure and Replicate to all lanes (of one register)." + arguments: ["ptr: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ["{type[3]}"]] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['vld1_dup_s64', '*const i64', 'int64x1_t', 'vldr', 'ldr', 'let x: int64x1_t; #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = crate::core_arch::aarch64::vld1_s64(ptr); } #[cfg(target_arch = "arm")] { x = crate::core_arch::arm::vld1_s64(ptr); }'] + - ['vld1_dup_u64', '*const u64', 'uint64x1_t', 'vldr', 'ldr', 'let x: uint64x1_t; #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = crate::core_arch::aarch64::vld1_u64(ptr); } #[cfg(target_arch = "arm")] { x = crate::core_arch::arm::vld1_u64(ptr); }'] + compose: + - Identifier: ['{type[5]}', Symbol] + - Identifier: [x, Symbol] + + - name: "{type[0]}" + doc: "Load one single-element structure and Replicate to all lanes (of one register)." + arguments: ["ptr: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-aes + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ["{type[3]}"]] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['vld1_dup_p64', '*const p64', 'poly64x1_t', 'vldr', 'ldr', 'let x: poly64x1_t; #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = crate::core_arch::aarch64::vld1_p64(ptr); } #[cfg(target_arch = "arm")] { x = crate::core_arch::arm::vld1_p64(ptr); }'] + compose: + - Identifier: ['{type[5]}', Symbol] + - Identifier: [x, Symbol] + + - name: "{type[0]}" + doc: "Load one single-element structure and Replicate to all lanes (of one register)." + arguments: ["ptr: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-aes + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ["{type[3]}"]] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['vld1q_dup_p64', '*const p64', 'poly64x2_t', 'vldr', 'ld1r', 'vld1q_lane_p64::<0>', 'u64x2::splat(0)', '[0, 0]'] + compose: + - Let: + - x + - FnCall: + - '{type[5]}' + - - ptr + - FnCall: [transmute, ['{type[6]}']] + - FnCall: ['simd_shuffle!', [x, x, '{type[7]}']] + + - name: "{type[0]}" + doc: "Load one single-element structure and Replicate to all lanes (of one register)." + arguments: ["ptr: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['vld1_dup_s8', '*const i8', 'int8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_s8::<0>', 'i8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]'] + - ['vld1_dup_u8', '*const u8', 'uint8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_u8::<0>', 'u8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]'] + - ['vld1_dup_p8', '*const p8', 'poly8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_p8::<0>', 'u8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]'] + + - ['vld1q_dup_s8', '*const i8', 'int8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_s8::<0>', 'i8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]'] + - ['vld1q_dup_u8', '*const u8', 'uint8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_u8::<0>', 'u8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]'] + - ['vld1q_dup_p8', '*const p8', 'poly8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_p8::<0>', 'u8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]'] + + - ['vld1_dup_s16', '*const i16', 'int16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_s16::<0>', 'i16x4::splat(0)', '[0, 0, 0, 0]'] + - ['vld1_dup_u16', '*const u16', 'uint16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_u16::<0>', 'u16x4::splat(0)', '[0, 0, 0, 0]'] + - ['vld1_dup_p16', '*const p16', 'poly16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_p16::<0>', 'u16x4::splat(0)', '[0, 0, 0, 0]'] + + - ['vld1q_dup_s16', '*const i16', 'int16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_s16::<0>', 'i16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]'] + - ['vld1q_dup_u16', '*const u16', 'uint16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_u16::<0>', 'u16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]'] + - ['vld1q_dup_p16', '*const p16', 'poly16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_p16::<0>', 'u16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]'] + + - ['vld1_dup_s32', '*const i32', 'int32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_s32::<0>', 'i32x2::splat(0)', '[0, 0]'] + - ['vld1_dup_u32', '*const u32', 'uint32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_u32::<0>', 'u32x2::splat(0)', '[0, 0]'] + - ['vld1_dup_f32', '*const f32', 'float32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_f32::<0>', 'f32x2::splat(0.0)', '[0, 0]'] + + - ['vld1q_dup_s32', '*const i32', 'int32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_s32::<0>', 'i32x4::splat(0)', '[0, 0, 0, 0]'] + - ['vld1q_dup_u32', '*const u32', 'uint32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_u32::<0>', 'u32x4::splat(0)', '[0, 0, 0, 0]'] + - ['vld1q_dup_f32', '*const f32', 'float32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_f32::<0>', 'f32x4::splat(0.0)', '[0, 0, 0, 0]'] + + - ['vld1q_dup_s64', '*const i64', 'int64x2_t', 'vldr', 'ld1', 'vld1q_lane_s64::<0>', 'i64x2::splat(0)', '[0, 0]'] + - ['vld1q_dup_u64', '*const u64', 'uint64x2_t', 'vldr', 'ld1', 'vld1q_lane_u64::<0>', 'u64x2::splat(0)', '[0, 0]'] + compose: + - Let: + - x + - FnCall: + - '{type[5]}' + - - ptr + - FnCall: [transmute, ['{type[6]}']] + - FnCall: ['simd_shuffle!', [x, x, '{type[7]}']] + + - name: "{type[0]}" + doc: "Absolute difference and accumulate (64-bit)" + arguments: ['a: {neon_type[1]}', 'b: {neon_type[1]}', 'c: {neon_type[1]}'] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[2]}"']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[3]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vaba_s8', 'int8x8_t', 'vaba.s8', 'saba', 'vabd_s8'] + - ['vaba_u8', 'uint8x8_t', 'vaba.u8', 'uaba', 'vabd_u8'] + - ['vaba_s16', 'int16x4_t', 'vaba.s16', 'saba', 'vabd_s16'] + - ['vaba_u16', 'uint16x4_t', 'vaba.u16', 'uaba', 'vabd_u16'] + - ['vaba_s32', 'int32x2_t', 'vaba.s32', 'saba', 'vabd_s32'] + - ['vaba_u32', 'uint32x2_t', 'vaba.u32', 'uaba', 'vabd_u32'] + compose: + - FnCall: + - 'simd_add' + - - a + - FnCall: ['{type[4]}', [b, c]] + + - name: "{type[0]}" + doc: "Absolute difference and accumulate (128-bit)" + arguments: ['a: {neon_type[1]}', 'b: {neon_type[1]}', 'c: {neon_type[1]}'] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[2]}"']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[3]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vabaq_s8', 'int8x16_t', 'vaba.s8', 'saba', 'vabdq_s8'] + - ['vabaq_u8', 'uint8x16_t', 'vaba.u8', 'uaba', 'vabdq_u8'] + - ['vabaq_s16', 'int16x8_t', 'vaba.s16', 'saba', 'vabdq_s16'] + - ['vabaq_u16', 'uint16x8_t', 'vaba.u16', 'uaba', 'vabdq_u16'] + - ['vabaq_s32', 'int32x4_t', 'vaba.s32', 'saba', 'vabdq_s32'] + - ['vabaq_u32', 'uint32x4_t', 'vaba.u32', 'uaba', 'vabdq_u32'] + compose: + - FnCall: + - 'simd_add' + - - a + - FnCall: ['{type[4]}', [b, c]] + + - name: "{type[0]}" + doc: "Vector add." + arguments: ['a: {neon_type[1]}', 'b: {neon_type[1]}'] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[2]}']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[3]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vadd_s8', 'int8x8_t', 'vadd', 'add'] + - ['vaddq_s8', 'int8x16_t', 'vadd', 'add'] + - ['vadd_s16', 'int16x4_t', 'vadd', 'add'] + - ['vaddq_s16', 'int16x8_t', 'vadd', 'add'] + - ['vadd_s32', 'int32x2_t', 'vadd', 'add'] + - ['vaddq_s32', 'int32x4_t', 'vadd', 'add'] + - ['vaddq_s64', 'int64x2_t', 'vadd', 'add'] + - ['vadd_f32', 'float32x2_t', 'vadd', 'fadd'] + - ['vaddq_f32', 'float32x4_t', 'vadd', 'fadd'] + - ['vadd_u8', 'uint8x8_t', 'vadd', 'add'] + - ['vaddq_u8', 'uint8x16_t', 'vadd', 'add'] + - ['vadd_u16', 'uint16x4_t', 'vadd', 'add'] + - ['vaddq_u16', 'uint16x8_t', 'vadd', 'add'] + - ['vadd_u32', 'uint32x2_t', 'vadd', 'add'] + - ['vaddq_u32', 'uint32x4_t', 'vadd', 'add'] + - ['vaddq_u64', 'uint64x2_t', 'vadd', 'add'] + compose: + - FnCall: ['simd_add', [a, b]] + + - name: "{type[0]}" + doc: "Add Long (vector)." + arguments: ['a: {neon_type[1]}', 'b: {neon_type[1]}'] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vaddl_s8', 'int8x8_t', 'int16x8_t', 'vaddl', 'saddl'] + - ['vaddl_s16', 'int16x4_t', 'int32x4_t', 'vaddl', 'saddl'] + - ['vaddl_s32', 'int32x2_t', 'int64x2_t', 'vaddl', 'saddl'] + - ['vaddl_u8', 'uint8x8_t', 'uint16x8_t', 'vaddl', 'uaddl'] + - ['vaddl_u16', 'uint16x4_t', 'uint32x4_t', 'vaddl', 'uaddl'] + - ['vaddl_u32', 'uint32x2_t', 'uint64x2_t', 'vaddl', 'uaddl'] + compose: + - Let: + - a + - '{neon_type[2]}' + - FnCall: [simd_cast, [a]] + - Let: + - b + - '{neon_type[2]}' + - FnCall: [simd_cast, [b]] + - FnCall: ['simd_add', [a, b]] + + - name: "{type[0]}" + doc: "Signed Add Long (vector, high half)." + arguments: ['a: {neon_type[1]}', 'b: {neon_type[1]}'] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vaddl_high_s8', 'int8x16_t', 'int16x8_t', 'vaddl', 'saddl2', 'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - ['vaddl_high_s16', 'int16x8_t', 'int32x4_t', 'vaddl', 'saddl2', 'int16x4_t', '[4, 5, 6, 7]'] + - ['vaddl_high_s32', 'int32x4_t', 'int64x2_t', 'vaddl', 'saddl2', 'int32x2_t', '[2, 3]'] + - ['vaddl_high_u8', 'uint8x16_t', 'uint16x8_t', 'vaddl', 'uaddl2', 'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - ['vaddl_high_u16', 'uint16x8_t', 'uint32x4_t', 'vaddl', 'uaddl2', 'uint16x4_t', '[4, 5, 6, 7]'] + - ['vaddl_high_u32', 'uint32x4_t', 'uint64x2_t', 'vaddl', 'uaddl2', 'uint32x2_t', '[2, 3]'] + compose: + - Let: + - a + - '{neon_type[5]}' + - FnCall: ['simd_shuffle!', [a, a, '{type[6]}']] + - Let: + - b + - '{neon_type[5]}' + - FnCall: ['simd_shuffle!', [b, b, '{type[6]}']] + - Let: [a, '{neon_type[2]}', {FnCall: [simd_cast, [a]]}] + - Let: [b, '{neon_type[2]}', {FnCall: [simd_cast, [b]]}] + - FnCall: [simd_add, [a, b]] + + - name: "{type[0]}" + doc: "Add Wide" + arguments: ['a: {neon_type[1]}', 'b: {neon_type[2]}'] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vaddw_s8', 'int16x8_t', 'int8x8_t', 'vaddw', 'saddw'] + - ['vaddw_s16', 'int32x4_t', 'int16x4_t', 'vaddw', 'saddw'] + - ['vaddw_s32', 'int64x2_t', 'int32x2_t', 'vaddw', 'saddw'] + - ['vaddw_u8', 'uint16x8_t', 'uint8x8_t', 'vaddw', 'uaddw'] + - ['vaddw_u16', 'uint32x4_t', 'uint16x4_t', 'vaddw', 'uaddw'] + - ['vaddw_u32', 'uint64x2_t', 'uint32x2_t', 'vaddw', 'uaddw'] + compose: + - Let: + - b + - '{neon_type[1]}' + - FnCall: ['simd_cast', [b]] + - FnCall: [simd_add, [a, b]] + + - name: "{type[0]}" + doc: "Add Wide (high half)." + arguments: ['a: {neon_type[1]}', 'b: {neon_type[2]}'] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vaddw_high_s8', 'int16x8_t', 'int8x16_t', 'vaddw', 'saddw2', 'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - ['vaddw_high_s16', 'int32x4_t', 'int16x8_t', 'vaddw', 'saddw2', 'int16x4_t', '[4, 5, 6, 7]'] + - ['vaddw_high_s32', 'int64x2_t', 'int32x4_t', 'vaddw', 'saddw2', 'int32x2_t', '[2, 3]'] + - ['vaddw_high_u8', 'uint16x8_t', 'uint8x16_t', 'vaddw', 'uaddw2', 'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - ['vaddw_high_u16', 'uint32x4_t', 'uint16x8_t', 'vaddw', 'uaddw2', 'uint16x4_t', '[4, 5, 6, 7]'] + - ['vaddw_high_u32', 'uint64x2_t', 'uint32x4_t', 'vaddw', 'uaddw2', 'uint32x2_t', '[2, 3]'] + compose: + - Let: + - b + - '{neon_type[5]}' + - FnCall: ['simd_shuffle!', [b, b, '{type[6]}']] + - Let: + - b + - '{neon_type[1]}' + - FnCall: ['simd_cast', [b]] + - FnCall: [simd_add, [a, b]] + + - name: "{type[0]}" + doc: "Add returning High Narrow." + arguments: ['a: {neon_type[1]}', 'b: {neon_type[1]}'] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vaddhn']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['addhn']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vaddhn_s16', 'int16x8_t', 'int8x8_t', 'int16x8_t::splat(8)'] + - ['vaddhn_s32', 'int32x4_t', 'int16x4_t', 'int32x4_t::splat(16)'] + - ['vaddhn_s64', 'int64x2_t', 'int32x2_t', 'int64x2_t::splat(32)'] + - ['vaddhn_u16', 'uint16x8_t', 'uint8x8_t', 'uint16x8_t::splat(8)'] + - ['vaddhn_u32', 'uint32x4_t', 'uint16x4_t', 'uint32x4_t::splat(16)'] + - ['vaddhn_u64', 'uint64x2_t', 'uint32x2_t', 'uint64x2_t::splat(32)'] + compose: + - FnCall: + - simd_cast + - - FnCall: + - simd_shr + - - FnCall: + - simd_add + - - a + - b + - '{type[3]}' + + - name: "{type[0]}" + doc: "Add returning High Narrow (high half)." + arguments: ['r: {neon_type[1]}', 'a: {neon_type[2]}', 'b: {neon_type[2]}'] + return_type: "{neon_type[3]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vaddhn']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['addhn2']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vaddhn_high_s16', 'int8x8_t', 'int16x8_t', 'int8x16_t', 'int16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - ['vaddhn_high_s32', 'int16x4_t', 'int32x4_t', 'int16x8_t', 'int32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - ['vaddhn_high_s64', 'int32x2_t', 'int64x2_t', 'int32x4_t', 'int64x2_t::splat(32)', '[0, 1, 2, 3]'] + - ['vaddhn_high_u16', 'uint8x8_t', 'uint16x8_t', 'uint8x16_t', 'uint16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - ['vaddhn_high_u32', 'uint16x4_t', 'uint32x4_t', 'uint16x8_t', 'uint32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - ['vaddhn_high_u64', 'uint32x2_t', 'uint64x2_t', 'uint32x4_t', 'uint64x2_t::splat(32)', '[0, 1, 2, 3]'] + compose: + - Let: + - x + - FnCall: + - simd_cast + - - FnCall: + - simd_shr + - - FnCall: + - simd_add + - - a + - b + - '{type[4]}' + - FnCall: ['simd_shuffle!', [r, x, '{type[5]}']] + + - name: "{type[0]}" + doc: "Vector narrow integer." + arguments: ['a: {neon_type[1]}'] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vmovn']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['xtn']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vmovn_s16', 'int16x8_t', 'int8x8_t'] + - ['vmovn_s32', 'int32x4_t', 'int16x4_t'] + - ['vmovn_s64', 'int64x2_t', 'int32x2_t'] + - ['vmovn_u16', 'uint16x8_t', 'uint8x8_t'] + - ['vmovn_u32', 'uint32x4_t', 'uint16x4_t'] + - ['vmovn_u64', 'uint64x2_t', 'uint32x2_t'] + compose: + - FnCall: [simd_cast, [a]] + + - name: "{type[0]}" + doc: "Vector long move." + arguments: ['a: {neon_type[1]}'] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vmovl']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[3]}']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vmovl_s8', 'int8x8_t', 'int16x8_t', 'sxtl'] + - ['vmovl_s16', 'int16x4_t', 'int32x4_t', 'sxtl'] + - ['vmovl_s32', 'int32x2_t', 'int64x2_t', 'sxtl'] + - ['vmovl_u8', 'uint8x8_t', 'uint16x8_t', 'uxtl'] + - ['vmovl_u16', 'uint16x4_t', 'uint32x4_t', 'uxtl'] + - ['vmovl_u32', 'uint32x2_t', 'uint64x2_t', 'uxtl'] + compose: + - FnCall: [simd_cast, [a]] + + - name: "{type[0]}" + doc: "Vector bitwise not." + arguments: ['a: {neon_type[1]}'] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vmvn']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['mvn']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vmvn_s8', 'int8x8_t', 'int8x8_t::splat(-1)'] + - ['vmvnq_s8', 'int8x16_t', 'int8x16_t::splat(-1)'] + - ['vmvn_s16', 'int16x4_t', 'int16x4_t::splat(-1)'] + - ['vmvnq_s16', 'int16x8_t', 'int16x8_t::splat(-1)'] + - ['vmvn_s32', 'int32x2_t', 'int32x2_t::splat(-1)'] + - ['vmvnq_s32', 'int32x4_t', 'int32x4_t::splat(-1)'] + - ['vmvn_u8', 'uint8x8_t', 'uint8x8_t::splat(255)'] + - ['vmvnq_u8', 'uint8x16_t', 'uint8x16_t::splat(255)'] + - ['vmvn_u16', 'uint16x4_t', 'uint16x4_t::splat(65_535)'] + - ['vmvnq_u16', 'uint16x8_t', 'uint16x8_t::splat(65_535)'] + - ['vmvn_u32', 'uint32x2_t', 'uint32x2_t::splat(4_294_967_295)'] + - ['vmvnq_u32', 'uint32x4_t', 'uint32x4_t::splat(4_294_967_295)'] + - ['vmvn_p8', 'poly8x8_t', 'poly8x8_t::splat(255)'] + - ['vmvnq_p8', 'poly8x16_t', 'poly8x16_t::splat(255)'] + compose: + - Let: [b, '{type[2]}'] + - FnCall: [simd_xor, [a, b]] + + - name: "{type[0]}" + doc: "Vector bitwise bit clear." + arguments: ['a: {neon_type[1]}', 'b: {neon_type[1]}'] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vbic']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['bic']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vbic_s8', 'int8x8_t', 'int8x8_t::splat(-1)'] + - ['vbic_s16', 'int16x4_t', 'int16x4_t::splat(-1)'] + - ['vbic_s32', 'int32x2_t', 'int32x2_t::splat(-1)'] + - ['vbic_s64', 'int64x1_t', 'int64x1_t::splat(-1)'] + - ['vbicq_s8', 'int8x16_t', 'int8x16_t::splat(-1)'] + - ['vbicq_s16', 'int16x8_t', 'int16x8_t::splat(-1)'] + - ['vbicq_s32', 'int32x4_t', 'int32x4_t::splat(-1)'] + - ['vbicq_s64', 'int64x2_t', 'int64x2_t::splat(-1)'] + compose: + - Let: [c, '{type[2]}'] + - FnCall: + - simd_and + - - FnCall: [simd_xor, [b, c]] + - a + + - name: "{type[0]}" + doc: "Vector bitwise bit clear." + arguments: ['a: {neon_type[1]}', 'b: {neon_type[1]}'] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vbic']] } ]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['bic']]}] ] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vbic_u8', 'uint8x8_t', 'int8x8_t::splat(-1)'] + - ['vbic_u16', 'uint16x4_t', 'int16x4_t::splat(-1)'] + - ['vbic_u32', 'uint32x2_t', 'int32x2_t::splat(-1)'] + - ['vbic_u64', 'uint64x1_t', 'int64x1_t::splat(-1)'] + - ['vbicq_u8', 'uint8x16_t', 'int8x16_t::splat(-1)'] + - ['vbicq_u16', 'uint16x8_t', 'int16x8_t::splat(-1)'] + - ['vbicq_u32', 'uint32x4_t', 'int32x4_t::splat(-1)'] + - ['vbicq_u64', 'uint64x2_t', 'int64x2_t::splat(-1)'] + compose: + - Let: [c, '{type[2]}'] + - FnCall: + - simd_and + - - FnCall: + - simd_xor + - - b + - FnCall: [transmute, [c]] + - a + + - name: "{type[0]}" + doc: "Bitwise Select." + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[2]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vbsl']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['bsl']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vbsl_s8', 'uint8x8_t', 'int8x8_t', 'int8x8_t::splat(-1)'] + - ['vbsl_s16', 'uint16x4_t', 'int16x4_t', 'int16x4_t::splat(-1)'] + - ['vbsl_s32', 'uint32x2_t', 'int32x2_t', 'int32x2_t::splat(-1)'] + - ['vbsl_s64', 'uint64x1_t', 'int64x1_t', 'int64x1_t::splat(-1)'] + - ['vbsl_f32', 'uint32x2_t', 'float32x2_t', 'int32x2_t::splat(-1)'] + - ['vbslq_f32', 'uint32x4_t', 'float32x4_t', 'int32x4_t::splat(-1)'] + - ['vbsl_p8', 'uint8x8_t', 'poly8x8_t', 'int8x8_t::splat(-1)'] + - ['vbsl_p16', 'uint16x4_t', 'poly16x4_t', 'int16x4_t::splat(-1)'] + - ['vbslq_s8', 'uint8x16_t', 'int8x16_t', 'int8x16_t::splat(-1)'] + - ['vbslq_s16', 'uint16x8_t', 'int16x8_t', 'int16x8_t::splat(-1)'] + - ['vbslq_s32', 'uint32x4_t', 'int32x4_t', 'int32x4_t::splat(-1)'] + - ['vbslq_s64', 'uint64x2_t', 'int64x2_t', 'int64x2_t::splat(-1)'] + - ['vbslq_p8', 'uint8x16_t', 'poly8x16_t', 'int8x16_t::splat(-1)'] + - ['vbslq_p16', 'uint16x8_t', 'poly16x8_t', 'int16x8_t::splat(-1)'] + compose: + - Let: [not, '{type[3]}'] + - FnCall: + - transmute + - - FnCall: + - simd_or + - - FnCall: + - simd_and + - - a + - FnCall: [transmute, [b]] + - FnCall: + - simd_and + - - FnCall: + - simd_xor + - - a + - FnCall: [transmute, [not]] + - FnCall: [transmute, [c]] + + - name: "{type[0]}" + doc: "Bitwise Select." + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[2]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-fp16 + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vbsl']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['bsl']]}]] + - *neon-unstable-f16 + safety: safe + types: + - ['vbslq_f16', 'uint16x8_t', 'float16x8_t', 'int16x8_t::splat(-1)'] + - ['vbsl_f16', 'uint16x4_t', 'float16x4_t', 'int16x4_t::splat(-1)'] + compose: + - Let: [not, '{type[3]}'] + - FnCall: + - transmute + - - FnCall: + - simd_or + - - FnCall: + - simd_and + - - a + - FnCall: [transmute, [b]] + - FnCall: + - simd_and + - - FnCall: + - simd_xor + - - a + - FnCall: [transmute, [not]] + - FnCall: [transmute, [c]] + + - name: "{type[0]}" + doc: "Bitwise Select." + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vbsl']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['bsl']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vbslq_u8', 'uint8x16_t', 'int8x16_t::splat(-1)'] + - ['vbslq_u16', 'uint16x8_t', 'int16x8_t::splat(-1)'] + - ['vbslq_u32', 'uint32x4_t', 'int32x4_t::splat(-1)'] + - ['vbslq_u64', 'uint64x2_t', 'int64x2_t::splat(-1)'] + - ['vbsl_u8', 'uint8x8_t', 'int8x8_t::splat(-1)'] + - ['vbsl_u16', 'uint16x4_t', 'int16x4_t::splat(-1)'] + - ['vbsl_u32', 'uint32x2_t', 'int32x2_t::splat(-1)'] + - ['vbsl_u64', 'uint64x1_t', 'int64x1_t::splat(-1)'] + compose: + - Let: [not, '{type[2]}'] + - FnCall: + - transmute + - - FnCall: + - simd_or + - - FnCall: [simd_and, [a, b]] + - FnCall: + - simd_and + - - FnCall: + - simd_xor + - - a + - FnCall: [transmute, [not]] + - c + + - name: "{type[0]}" + doc: "Vector bitwise inclusive OR NOT" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vorn']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['orn']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vorn_s8', 'int8x8_t', 'int8x8_t::splat(-1)'] + - ['vornq_s8', 'int8x16_t', 'int8x16_t::splat(-1)'] + - ['vorn_s16', 'int16x4_t', 'int16x4_t::splat(-1)'] + - ['vornq_s16', 'int16x8_t', 'int16x8_t::splat(-1)'] + - ['vorn_s32', 'int32x2_t', 'int32x2_t::splat(-1)'] + - ['vornq_s32', 'int32x4_t', 'int32x4_t::splat(-1)'] + - ['vorn_s64', 'int64x1_t', 'int64x1_t::splat(-1)'] + - ['vornq_s64', 'int64x2_t', 'int64x2_t::splat(-1)'] + compose: + - Let: [c, '{type[2]}'] + - FnCall: + - simd_or + - - FnCall: [simd_xor, [b, c]] + - a + + - name: "{type[0]}" + doc: "Vector bitwise inclusive OR NOT" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vorn']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['orn']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vorn_u8', 'uint8x8_t', 'int8x8_t::splat(-1)'] + - ['vornq_u8', 'uint8x16_t', 'int8x16_t::splat(-1)'] + - ['vorn_u16', 'uint16x4_t', 'int16x4_t::splat(-1)'] + - ['vornq_u16', 'uint16x8_t', 'int16x8_t::splat(-1)'] + - ['vorn_u32', 'uint32x2_t', 'int32x2_t::splat(-1)'] + - ['vornq_u32', 'uint32x4_t', 'int32x4_t::splat(-1)'] + - ['vorn_u64', 'uint64x1_t', 'int64x1_t::splat(-1)'] + - ['vornq_u64', 'uint64x2_t', 'int64x2_t::splat(-1)'] + compose: + - Let: [c, '{type[2]}'] + - FnCall: + - simd_or + - - FnCall: + - simd_xor + - - b + - FnCall: [transmute, [c]] + - a + + - name: "{type[0]}" + doc: "Move vector element to general-purpose register" + arguments: ["v: {neon_type[1]}"] + return_type: "{type[2]}" + safety: safe + static_defs: ['const IMM5: i32'] + attr: + - *neon-v7 + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'IMM5 = {type[3]}']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - ['vget_lane_s8', 'int8x8_t', 'i8', '2', 'IMM5, 3', 'IMM5 as u32'] + - ['vget_lane_u8', 'uint8x8_t', 'u8', '2', 'IMM5, 3', 'IMM5 as u32'] + - ['vget_lane_p8', 'poly8x8_t', 'p8', '2', 'IMM5, 3', 'IMM5 as u32'] + - ['vgetq_lane_s8', 'int8x16_t', 'i8', '2', 'IMM5, 4', 'IMM5 as u32'] + - ['vgetq_lane_u8', 'uint8x16_t', 'u8', '2', 'IMM5, 4', 'IMM5 as u32'] + - ['vgetq_lane_p8', 'poly8x16_t', 'p8', '2', 'IMM5, 4', 'IMM5 as u32'] + - ['vget_lane_u16', 'uint16x4_t', 'u16', '2', 'IMM5, 2', 'IMM5 as u32'] + - ['vget_lane_s16', 'int16x4_t', 'i16', '2', 'IMM5, 2', 'IMM5 as u32'] + - ['vget_lane_p16', 'poly16x4_t', 'p16', '2', 'IMM5, 2', 'IMM5 as u32'] + - ['vgetq_lane_u16', 'uint16x8_t', 'u16', '2', 'IMM5, 3', 'IMM5 as u32'] + - ['vgetq_lane_s16', 'int16x8_t', 'i16', '2', 'IMM5, 3', 'IMM5 as u32'] + - ['vgetq_lane_p16', 'poly16x8_t', 'p16', '2', 'IMM5, 3', 'IMM5 as u32'] + - ['vget_lane_u32', 'uint32x2_t', 'u32', '1', 'IMM5, 1', 'IMM5 as u32'] + - ['vget_lane_s32', 'int32x2_t', 'i32', '1', 'IMM5, 1', 'IMM5 as u32'] + - ['vgetq_lane_u32', 'uint32x4_t', 'u32', '2', 'IMM5, 2', 'IMM5 as u32'] + - ['vgetq_lane_s32', 'int32x4_t', 'i32', '2', 'IMM5, 2', 'IMM5 as u32'] + - ['vget_lane_f32', 'float32x2_t', 'f32', '1', 'IMM5, 1', 'IMM5 as u32'] + - ['vgetq_lane_f32', 'float32x4_t', 'f32', '1', 'IMM5, 2', 'IMM5 as u32'] + - ['vgetq_lane_p64', 'poly64x2_t', 'p64', '1', 'IMM5, 1', 'IMM5 as u32'] + - ['vgetq_lane_s64', 'int64x2_t', 'i64', '1', 'IMM5, 1', 'IMM5 as u32'] + - ['vgetq_lane_u64', 'uint64x2_t', 'u64', '1', 'IMM5, 2', 'IMM5 as u32'] + compose: + - FnCall: ['static_assert_uimm_bits!', ['{type[4]}']] + - FnCall: ['simd_extract!', [v, '{type[5]}']] + + - name: "{type[0]}" + doc: "Move vector element to general-purpose register" + arguments: ["v: {neon_type[1]}"] + return_type: "{type[2]}" + safety: safe + static_defs: ['const IMM5: i32'] + attr: + - *neon-v7 + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'IMM5 = 0']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + types: + - ['vget_lane_u64', 'uint64x1_t', 'u64', '0'] + - ['vget_lane_p64', 'poly64x1_t', 'p64', 'IMM5 as u32'] + - ['vget_lane_s64', 'int64x1_t', 'i64', 'IMM5 as u32'] + compose: + - FnCall: ['static_assert!', ['IMM5 == 0']] + - FnCall: ['simd_extract!', [v, '{type[3]}']] + + # Private vfp4 version used by FMA intriniscs because LLVM does + # not inline the non-vfp4 version in vfp4 functions. + - name: "{type[0]}" + visibility: private + doc: "Duplicate vector element to vector or scalar" + arguments: ["value: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"vdup.32"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['dup']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vdup_n_f32_vfp4', 'f32', 'float32x2_t', 'float32x2_t::splat(value)'] + - ['vdupq_n_f32_vfp4', 'f32', 'float32x4_t', 'float32x4_t::splat(value)'] + compose: + - Identifier: ['{type[3]}', Symbol] + + - name: "{type[0]}" + doc: "Duplicate vector element to vector or scalar" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['{type[4]}']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vget_high_s64', 'int64x2_t', 'int64x1_t', 'vmov', 'ext', 'unsafe { int64x1_t([simd_extract!(a, 1)]) }'] + - ['vget_high_u64', 'uint64x2_t', 'uint64x1_t', 'vmov', 'ext', 'unsafe { uint64x1_t([simd_extract!(a, 1)]) }'] + compose: + - Identifier: ['{type[5]}', Symbol] + + - name: "{type[0]}" + doc: "Duplicate vector element to vector or scalar" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vget_low_s64', 'int64x2_t', 'int64x1_t', 'unsafe { int64x1_t([simd_extract!(a, 0)]) }'] + - ['vget_low_u64', 'uint64x2_t', 'uint64x1_t', 'unsafe { uint64x1_t([simd_extract!(a, 0)]) }'] + compose: + - Identifier: ['{type[3]}', Symbol] + + - name: "{type[0]}" + doc: "Duplicate vector element to vector or scalar" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['{type[4]}']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vget_high_s8', 'int8x16_t', 'int8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - ['vget_high_u8', 'uint8x16_t', 'uint8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - ['vget_high_p8', 'poly8x16_t', 'poly8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]'] + - ['vget_high_s16', 'int16x8_t', 'int16x4_t', 'vmov', 'ext', '[4, 5, 6, 7]'] + - ['vget_high_u16', 'uint16x8_t', 'uint16x4_t', 'vmov', 'ext', '[4, 5, 6, 7]'] + - ['vget_high_p16', 'poly16x8_t', 'poly16x4_t', 'vmov', 'ext', '[4, 5, 6, 7]'] + - ['vget_high_s32', 'int32x4_t', 'int32x2_t', 'vmov', 'ext', '[2, 3]'] + - ['vget_high_u32', 'uint32x4_t', 'uint32x2_t', 'vmov', 'ext', '[2, 3]'] + - ['vget_high_f32', 'float32x4_t', 'float32x2_t', 'vmov', 'ext', '[2, 3]'] + compose: + - FnCall: ['simd_shuffle!', [a, a, '{type[5]}']] + + - name: "{type[0]}" + doc: "Duplicate vector element to vector or scalar" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vget_low_s8', 'int8x16_t', 'int8x8_t', '[0, 1, 2, 3, 4, 5, 6, 7]'] + - ['vget_low_u8', 'uint8x16_t', 'uint8x8_t','[0, 1, 2, 3, 4, 5, 6, 7]'] + - ['vget_low_p8', 'poly8x16_t', 'poly8x8_t','[0, 1, 2, 3, 4, 5, 6, 7]'] + - ['vget_low_s16', 'int16x8_t', 'int16x4_t', '[0, 1, 2, 3]'] + - ['vget_low_u16', 'uint16x8_t', 'uint16x4_t', '[0, 1, 2, 3]'] + - ['vget_low_p16', 'poly16x8_t', 'poly16x4_t', '[0, 1, 2, 3]'] + - ['vget_low_s32', 'int32x4_t', 'int32x2_t', '[0, 1]'] + - ['vget_low_f32', 'float32x4_t', 'float32x2_t', '[0, 1]'] + - ['vget_low_u32', 'uint32x4_t', 'uint32x2_t', '[0, 1]'] + compose: + - FnCall: ['simd_shuffle!', [a, a, '{type[3]}']] + + - name: "{type[0]}" + doc: "Duplicate vector element to vector or scalar" + arguments: ["value: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['{type[4]}']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vdupq_n_s8', 'i8', 'int8x16_t', 'vdup.8', 'dup', 'int8x16_t::splat(value)'] + - ['vdupq_n_s16', 'i16', 'int16x8_t', 'vdup.16', 'dup', 'int16x8_t::splat(value)'] + - ['vdupq_n_s32', 'i32', 'int32x4_t', 'vdup.32', 'dup', 'int32x4_t::splat(value)'] + - ['vdupq_n_s64', 'i64', 'int64x2_t', 'vmov', 'dup', 'int64x2_t::splat(value)'] + - ['vdupq_n_u8', 'u8', 'uint8x16_t', 'vdup.8', 'dup', 'uint8x16_t::splat(value)'] + - ['vdupq_n_u16', 'u16', 'uint16x8_t', 'vdup.16', 'dup', 'uint16x8_t::splat(value)'] + - ['vdupq_n_u32', 'u32', 'uint32x4_t', 'vdup.32', 'dup', 'uint32x4_t::splat(value)'] + - ['vdupq_n_f32', 'f32', 'float32x4_t', 'vdup.32', 'dup', 'float32x4_t::splat(value)'] + - ['vdupq_n_u64', 'u64', 'uint64x2_t', 'vmov', 'dup', 'uint64x2_t::splat(value)'] + - ['vdupq_n_p8', 'p8', 'poly8x16_t', 'vdup.8', 'dup', 'poly8x16_t::splat(value)'] + - ['vdupq_n_p16', 'p16', 'poly16x8_t', 'vdup.16', 'dup', 'poly16x8_t::splat(value)'] + - ['vdup_n_s8', 'i8', 'int8x8_t', 'vdup.8', 'dup', 'int8x8_t::splat(value)'] + - ['vdup_n_s16', 'i16', 'int16x4_t', 'vdup.16', 'dup', 'int16x4_t::splat(value)'] + - ['vdup_n_s32', 'i32', 'int32x2_t', 'vdup.32', 'dup', 'int32x2_t::splat(value)'] + - ['vdup_n_s64', 'i64', 'int64x1_t', 'vmov', 'fmov', 'int64x1_t::splat(value)'] + - ['vdup_n_u8', 'u8', 'uint8x8_t', 'vdup.8', 'dup', 'uint8x8_t::splat(value)'] + - ['vdup_n_u16', 'u16', 'uint16x4_t', 'vdup.16', 'dup', 'uint16x4_t::splat(value)'] + - ['vdup_n_u32', 'u32', 'uint32x2_t', 'vdup.32', 'dup', 'uint32x2_t::splat(value)'] + - ['vdup_n_f32', 'f32', 'float32x2_t', 'vdup.32', 'dup', 'float32x2_t::splat(value)'] + - ['vdup_n_u64', 'u64', 'uint64x1_t', 'vmov', 'fmov', 'uint64x1_t::splat(value)'] + - ['vdup_n_p8', 'p8', 'poly8x8_t', 'vdup.8', 'dup', 'poly8x8_t::splat(value)'] + - ['vdup_n_p16', 'p16', 'poly16x4_t', 'vdup.16', 'dup', 'poly16x4_t::splat(value)'] + compose: + - Identifier: ['{type[5]}', Symbol] + + - name: "{type[0]}" + doc: "Duplicate vector element to vector or scalar" + arguments: ["value: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['{type[4]}']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vmov_n_s8', 'i8', 'int8x8_t', 'vdup.8', 'dup', 'vdup_n_s8'] + - ['vmov_n_s16', 'i16', 'int16x4_t', 'vdup.16', 'dup', 'vdup_n_s16'] + - ['vmov_n_s32', 'i32', 'int32x2_t', 'vdup.32', 'dup', 'vdup_n_s32'] + - ['vmov_n_s64', 'i64', 'int64x1_t', 'vmov', 'fmov', 'vdup_n_s64'] + - ['vmov_n_u8', 'u8', 'uint8x8_t', 'vdup.8', 'dup', 'vdup_n_u8'] + - ['vmov_n_u16', 'u16', 'uint16x4_t', 'vdup.16', 'dup', 'vdup_n_u16'] + - ['vmov_n_u32', 'u32', 'uint32x2_t', 'vdup.32', 'dup', 'vdup_n_u32'] + - ['vmov_n_u64', 'u64', 'uint64x1_t', 'vmov', 'fmov', 'vdup_n_u64'] + - ['vmov_n_p8', 'p8', 'poly8x8_t', 'vdup.8', 'dup', 'vdup_n_p8'] + - ['vmov_n_p16', 'p16', 'poly16x4_t', 'vdup.16', 'dup', 'vdup_n_p16'] + - ['vmov_n_f32', 'f32', 'float32x2_t', 'vdup.32', 'dup', 'vdup_n_f32'] + - ['vmovq_n_s8', 'i8', 'int8x16_t', 'vdup.8', 'dup', 'vdupq_n_s8'] + - ['vmovq_n_s16', 'i16', 'int16x8_t', 'vdup.16', 'dup', 'vdupq_n_s16'] + - ['vmovq_n_s32', 'i32', 'int32x4_t', 'vdup.32', 'dup', 'vdupq_n_s32'] + - ['vmovq_n_s64', 'i64', 'int64x2_t', 'vmov', 'dup', 'vdupq_n_s64'] + - ['vmovq_n_u8', 'u8', 'uint8x16_t', 'vdup.8', 'dup', 'vdupq_n_u8'] + - ['vmovq_n_u16', 'u16', 'uint16x8_t', 'vdup.16', 'dup', 'vdupq_n_u16'] + - ['vmovq_n_u32', 'u32', 'uint32x4_t', 'vdup.32', 'dup', 'vdupq_n_u32'] + - ['vmovq_n_u64', 'u64', 'uint64x2_t', 'vmov', 'dup', 'vdupq_n_u64'] + - ['vmovq_n_p8', 'p8', 'poly8x16_t', 'vdup.8', 'dup', 'vdupq_n_p8'] + - ['vmovq_n_p16', 'p16', 'poly16x8_t', 'vdup.16', 'dup', 'vdupq_n_p16'] + - ['vmovq_n_f32', 'f32', 'float32x4_t', 'vdup.32', 'dup', 'vdupq_n_f32'] + compose: + - FnCall: ['{type[5]}', [value]] + + - name: "{type[0]}" + doc: "Store SIMD&FP register (immediate offset)" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['nop']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['nop']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['vldrq_p128', '* const p128', 'p128'] + compose: + - Identifier: ['*a', Symbol] + + - name: "{type[0]}" + doc: "Store SIMD&FP register (immediate offset)" + arguments: ["a: {type[1]}", "b: {type[2]}"] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['nop']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['nop']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: + unsafe: [neon] + types: + - ['vstrq_p128', '* mut p128', 'p128'] + compose: + - Identifier: ['*a = b', Symbol] + + - name: "{type[0]}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[1]}", "_b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['nop', 'N = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['nop', 'N = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - ['vext_s64', 'int64x1_t'] + - ['vext_u64', 'uint64x1_t'] + compose: + - FnCall: ['static_assert!', ['N == 0']] + - Identifier: ['a', Symbol] + + - name: "{type[0]}" + doc: "Reversing vector elements (swap endianness)" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[2]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['{type[3]}']]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - ['vrev16_s8', 'int8x8_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6]'] + - ['vrev16q_s8', 'int8x16_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]'] + - ['vrev16_u8', 'uint8x8_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6]'] + - ['vrev16q_u8', 'uint8x16_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]'] + - ['vrev16_p8', 'poly8x8_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6]'] + - ['vrev16q_p8', 'poly8x16_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]'] + - ['vrev32_s8', 'int8x8_t', 'vrev32.8', 'rev32', '[3, 2, 1, 0, 7, 6, 5, 4]'] + - ['vrev32q_s8', 'int8x16_t', 'vrev32.8', 'rev32', '[3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]'] + - ['vrev32_u8', 'uint8x8_t', 'vrev32.8', 'rev32', '[3, 2, 1, 0, 7, 6, 5, 4]'] + - ['vrev32q_u8', 'uint8x16_t', 'vrev32.8', 'rev32', '[3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]'] + - ['vrev32_p8', 'poly8x8_t', 'vrev32.8', 'rev32', '[3, 2, 1, 0, 7, 6, 5, 4]'] + - ['vrev32q_p8', 'poly8x16_t', 'vrev32.8', 'rev32', '[3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]'] + - ['vrev32_s16', 'int16x4_t', 'vrev32.16', 'rev32', '[1, 0, 3, 2]'] + - ['vrev32q_s16', 'int16x8_t', 'vrev32.16', 'rev32', '[1, 0, 3, 2, 5, 4, 7, 6]'] + - ['vrev32_u16', 'uint16x4_t', 'vrev32.16', 'rev32', '[1, 0, 3, 2]'] + - ['vrev32q_u16', 'uint16x8_t', 'vrev32.16', 'rev32', '[1, 0, 3, 2, 5, 4, 7, 6]'] + - ['vrev32_p16', 'poly16x4_t', 'vrev32.16', 'rev32', '[1, 0, 3, 2]'] + - ['vrev32q_p16', 'poly16x8_t', 'vrev32.16', 'rev32', '[1, 0, 3, 2, 5, 4, 7, 6]'] + - ['vrev64_s8', 'int8x8_t', 'vrev64.8', 'rev64', '[7, 6, 5, 4, 3, 2, 1, 0]'] + - ['vrev64q_s8', 'int8x16_t', 'vrev64.8', 'rev64', '[7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]'] + - ['vrev64_u8', 'uint8x8_t', 'vrev64.8', 'rev64', '[7, 6, 5, 4, 3, 2, 1, 0]'] + - ['vrev64q_u8', 'uint8x16_t', 'vrev64.8', 'rev64', '[7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]'] + - ['vrev64_p8', 'poly8x8_t', 'vrev64.8', 'rev64', '[7, 6, 5, 4, 3, 2, 1, 0]'] + - ['vrev64q_p8', 'poly8x16_t', 'vrev64.8', 'rev64', '[7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]'] + - ['vrev64_s16', 'int16x4_t', 'vrev64.16', 'rev64', '[3, 2, 1, 0]'] + - ['vrev64q_s16', 'int16x8_t', 'vrev64.16', 'rev64', '[3, 2, 1, 0, 7, 6, 5, 4]'] + - ['vrev64_u16', 'uint16x4_t', 'vrev64.16', 'rev64', '[3, 2, 1, 0]'] + - ['vrev64q_u16', 'uint16x8_t', 'vrev64.16', 'rev64', '[3, 2, 1, 0, 7, 6, 5, 4]'] + - ['vrev64_p16', 'poly16x4_t', 'vrev64.16', 'rev64', '[3, 2, 1, 0]'] + - ['vrev64q_p16', 'poly16x8_t', 'vrev64.16', 'rev64', '[3, 2, 1, 0, 7, 6, 5, 4]'] + - ['vrev64_s32', 'int32x2_t', 'vrev64.32', 'rev64', '[1, 0]'] + - ['vrev64q_s32', 'int32x4_t', 'vrev64.32', 'rev64', '[1, 0, 3, 2]'] + - ['vrev64_u32', 'uint32x2_t', 'vrev64.32', 'rev64', '[1, 0]'] + - ['vrev64q_u32', 'uint32x4_t', 'vrev64.32', 'rev64', '[1, 0, 3, 2]'] + - ['vrev64_f32', 'float32x2_t', 'vrev64.32', 'rev64', '[1, 0]'] + - ['vrev64q_f32', 'float32x4_t', 'vrev64.32', 'rev64', '[1, 0, 3, 2]'] + compose: + - FnCall: ['simd_shuffle!', [a, a, '{type[4]}']] diff --git a/library/stdarch/crates/stdarch-gen-arm/src/assert_instr.rs b/library/stdarch/crates/stdarch-gen-arm/src/assert_instr.rs new file mode 100644 index 000000000000..799b3379a851 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/assert_instr.rs @@ -0,0 +1,372 @@ +use proc_macro2::TokenStream; +use quote::{ToTokens, TokenStreamExt, format_ident, quote}; +use serde::de::{self, MapAccess, Visitor}; +use serde::{Deserialize, Deserializer, Serialize, ser::SerializeSeq}; +use std::fmt; + +use crate::{ + context::{self, Context}, + typekinds::{BaseType, BaseTypeKind}, + wildstring::WildString, +}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum InstructionAssertion { + Basic(WildString), + WithArgs(WildString, WildString), +} + +impl InstructionAssertion { + fn build(&mut self, ctx: &Context) -> context::Result { + match self { + InstructionAssertion::Basic(ws) => ws.build_acle(ctx.local), + InstructionAssertion::WithArgs(ws, args_ws) => [ws, args_ws] + .into_iter() + .try_for_each(|ws| ws.build_acle(ctx.local)), + } + } +} + +impl ToTokens for InstructionAssertion { + fn to_tokens(&self, tokens: &mut TokenStream) { + let instr = format_ident!( + "{}", + match self { + Self::Basic(instr) => instr, + Self::WithArgs(instr, _) => instr, + } + .to_string() + ); + tokens.append_all(quote! { #instr }); + + if let Self::WithArgs(_, args) = self { + let ex: TokenStream = args + .to_string() + .parse() + .expect("invalid instruction assertion arguments expression given"); + tokens.append_all(quote! {, #ex}) + } + } +} + +// Asserts that the given instruction is present for the intrinsic of the associated type bitsize. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(remote = "Self")] +pub struct InstructionAssertionMethodForBitsize { + pub default: InstructionAssertion, + pub byte: Option, + pub halfword: Option, + pub word: Option, + pub doubleword: Option, +} + +impl InstructionAssertionMethodForBitsize { + fn build(&mut self, ctx: &Context) -> context::Result { + if let Some(ref mut byte) = self.byte { + byte.build(ctx)? + } + if let Some(ref mut halfword) = self.halfword { + halfword.build(ctx)? + } + if let Some(ref mut word) = self.word { + word.build(ctx)? + } + if let Some(ref mut doubleword) = self.doubleword { + doubleword.build(ctx)? + } + self.default.build(ctx) + } +} + +impl Serialize for InstructionAssertionMethodForBitsize { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + InstructionAssertionMethodForBitsize { + default: InstructionAssertion::Basic(instr), + byte: None, + halfword: None, + word: None, + doubleword: None, + } => serializer.serialize_str(&instr.to_string()), + InstructionAssertionMethodForBitsize { + default: InstructionAssertion::WithArgs(instr, args), + byte: None, + halfword: None, + word: None, + doubleword: None, + } => { + let mut seq = serializer.serialize_seq(Some(2))?; + seq.serialize_element(&instr.to_string())?; + seq.serialize_element(&args.to_string())?; + seq.end() + } + _ => InstructionAssertionMethodForBitsize::serialize(self, serializer), + } + } +} + +impl<'de> Deserialize<'de> for InstructionAssertionMethodForBitsize { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct IAMVisitor; + + impl<'de> Visitor<'de> for IAMVisitor { + type Value = InstructionAssertionMethodForBitsize; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("array, string or map") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + Ok(InstructionAssertionMethodForBitsize { + default: InstructionAssertion::Basic(value.parse().map_err(E::custom)?), + byte: None, + halfword: None, + word: None, + doubleword: None, + }) + } + + fn visit_seq
(self, mut seq: A) -> Result + where + A: de::SeqAccess<'de>, + { + use serde::de::Error; + let make_err = + || Error::custom("invalid number of arguments passed to assert_instruction"); + let instruction = seq.next_element()?.ok_or_else(make_err)?; + let args = seq.next_element()?.ok_or_else(make_err)?; + + if let Some(true) = seq.size_hint().map(|len| len > 0) { + Err(make_err()) + } else { + Ok(InstructionAssertionMethodForBitsize { + default: InstructionAssertion::WithArgs(instruction, args), + byte: None, + halfword: None, + word: None, + doubleword: None, + }) + } + } + + fn visit_map(self, map: M) -> Result + where + M: MapAccess<'de>, + { + InstructionAssertionMethodForBitsize::deserialize( + de::value::MapAccessDeserializer::new(map), + ) + } + } + + deserializer.deserialize_any(IAMVisitor) + } +} + +/// Asserts that the given instruction is present for the intrinsic of the associated type. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(remote = "Self")] +pub struct InstructionAssertionMethod { + /// Instruction for integer intrinsics + pub default: InstructionAssertionMethodForBitsize, + /// Instruction for floating-point intrinsics (optional) + #[serde(default)] + pub float: Option, + /// Instruction for unsigned integer intrinsics (optional) + #[serde(default)] + pub unsigned: Option, +} + +impl InstructionAssertionMethod { + pub(crate) fn build(&mut self, ctx: &Context) -> context::Result { + if let Some(ref mut float) = self.float { + float.build(ctx)? + } + if let Some(ref mut unsigned) = self.unsigned { + unsigned.build(ctx)? + } + self.default.build(ctx) + } +} + +impl Serialize for InstructionAssertionMethod { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + InstructionAssertionMethod { + default: + InstructionAssertionMethodForBitsize { + default: InstructionAssertion::Basic(instr), + byte: None, + halfword: None, + word: None, + doubleword: None, + }, + float: None, + unsigned: None, + } => serializer.serialize_str(&instr.to_string()), + InstructionAssertionMethod { + default: + InstructionAssertionMethodForBitsize { + default: InstructionAssertion::WithArgs(instr, args), + byte: None, + halfword: None, + word: None, + doubleword: None, + }, + float: None, + unsigned: None, + } => { + let mut seq = serializer.serialize_seq(Some(2))?; + seq.serialize_element(&instr.to_string())?; + seq.serialize_element(&args.to_string())?; + seq.end() + } + _ => InstructionAssertionMethod::serialize(self, serializer), + } + } +} + +impl<'de> Deserialize<'de> for InstructionAssertionMethod { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct IAMVisitor; + + impl<'de> Visitor<'de> for IAMVisitor { + type Value = InstructionAssertionMethod; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("array, string or map") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + Ok(InstructionAssertionMethod { + default: InstructionAssertionMethodForBitsize { + default: InstructionAssertion::Basic(value.parse().map_err(E::custom)?), + byte: None, + halfword: None, + word: None, + doubleword: None, + }, + float: None, + unsigned: None, + }) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: de::SeqAccess<'de>, + { + use serde::de::Error; + let make_err = + || Error::custom("invalid number of arguments passed to assert_instruction"); + let instruction = seq.next_element()?.ok_or_else(make_err)?; + let args = seq.next_element()?.ok_or_else(make_err)?; + + if let Some(true) = seq.size_hint().map(|len| len > 0) { + Err(make_err()) + } else { + Ok(InstructionAssertionMethod { + default: InstructionAssertionMethodForBitsize { + default: InstructionAssertion::WithArgs(instruction, args), + byte: None, + halfword: None, + word: None, + doubleword: None, + }, + float: None, + unsigned: None, + }) + } + } + + fn visit_map(self, map: M) -> Result + where + M: MapAccess<'de>, + { + InstructionAssertionMethod::deserialize(de::value::MapAccessDeserializer::new(map)) + } + } + + deserializer.deserialize_any(IAMVisitor) + } +} + +#[derive(Debug)] +pub struct InstructionAssertionsForBaseType<'a>( + pub &'a Vec, + pub &'a Option<&'a BaseType>, +); + +impl<'a> ToTokens for InstructionAssertionsForBaseType<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.0.iter().for_each( + |InstructionAssertionMethod { + default, + float, + unsigned, + }| { + let kind = self.1.map(|ty| ty.kind()); + let instruction = match (kind, float, unsigned) { + (None, float, unsigned) if float.is_some() || unsigned.is_some() => { + unreachable!( + "cannot determine the base type kind for instruction assertion: {self:#?}") + } + (Some(BaseTypeKind::Float), Some(float), _) => float, + (Some(BaseTypeKind::UInt), _, Some(unsigned)) => unsigned, + _ => default, + }; + + let bitsize = self.1.and_then(|ty| ty.get_size().ok()); + let instruction = match (bitsize, instruction) { + ( + Some(8), + InstructionAssertionMethodForBitsize { + byte: Some(byte), .. + }, + ) => byte, + ( + Some(16), + InstructionAssertionMethodForBitsize { + halfword: Some(halfword), + .. + }, + ) => halfword, + ( + Some(32), + InstructionAssertionMethodForBitsize { + word: Some(word), .. + }, + ) => word, + ( + Some(64), + InstructionAssertionMethodForBitsize { + doubleword: Some(doubleword), + .. + }, + ) => doubleword, + (_, InstructionAssertionMethodForBitsize { default, .. }) => default, + }; + + tokens.append_all(quote! { #[cfg_attr(test, assert_instr(#instruction))]}) + }, + ); + } +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/big_endian.rs b/library/stdarch/crates/stdarch-gen-arm/src/big_endian.rs new file mode 100644 index 000000000000..b982ff53ec3d --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/big_endian.rs @@ -0,0 +1,184 @@ +use crate::expression::LetVariant; +use crate::wildstring::WildStringPart; +use crate::{ + expression::{Expression, IdentifierType}, + typekinds::*, + wildstring::WildString, +}; + +/// Simplifies creating a string that can be used in an Expression, as Expression +/// expects all strings to be `WildString` +fn create_single_wild_string(name: &str) -> WildString { + WildString(vec![WildStringPart::String(name.to_string())]) +} + +/// Creates an Identifier with name `name` with no wildcards. This, for example, +/// can be used to create variables, function names or arbitrary input. Is is +/// extremely flexible. +pub fn create_symbol_identifier(arbitrary_string: &str) -> Expression { + let identifier_name = create_single_wild_string(arbitrary_string); + Expression::Identifier(identifier_name, IdentifierType::Symbol) +} + +/// To compose the simd_shuffle! call we need: +/// - simd_shuffle!(, , ) +/// +/// Here we are creating a string version of the `` that can be used as an +/// Expression Identifier +/// +/// In textual form `a: int32x4_t` which has 4 lanes would generate: +/// ``` +/// [0, 1, 2, 3] +/// ``` +fn create_array(lanes: u32) -> Option { + match lanes { + 1 => None, /* Makes no sense to shuffle an array of size 1 */ + 2 => Some("[1, 0]".to_string()), + 3 => Some("[2, 1, 0]".to_string()), + 4 => Some("[3, 2, 1, 0]".to_string()), + 8 => Some("[7, 6, 5, 4, 3, 2, 1, 0]".to_string()), + 16 => Some("[15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]".to_string()), + _ => panic!("Incorrect vector number of vector lanes: {lanes}"), + } +} + +/// Creates: `let : = ` +pub fn create_let_variable( + variable_name: &str, + type_kind: &TypeKind, + expression: Expression, +) -> Expression { + let identifier_name = create_single_wild_string(variable_name); + Expression::Let(LetVariant::WithType( + identifier_name, + type_kind.clone(), + Box::new(expression), + )) +} + +pub fn create_mut_let_variable( + variable_name: &str, + type_kind: &TypeKind, + expression: Expression, +) -> Expression { + let identifier_name = create_single_wild_string(variable_name); + Expression::Let(LetVariant::MutWithType( + identifier_name, + type_kind.clone(), + Box::new(expression), + )) +} + +pub fn type_has_tuple(type_kind: &TypeKind) -> bool { + if let TypeKind::Vector(vector_type) = type_kind { + vector_type.tuple_size().is_some() + } else { + false + } +} + +pub fn make_variable_mutable(variable_name: &str, type_kind: &TypeKind) -> Expression { + let mut_variable = format!("let mut {variable_name}: {type_kind} = {variable_name}"); + let identifier_name = create_single_wild_string(&mut_variable); + Expression::Identifier(identifier_name, IdentifierType::Symbol) +} + +/// For creating shuffle calls, accepts function pointers for formatting for tuple +/// types and types without a tuple +/// +/// Example: +/// +/// `a: int32x4_t` with formatting function `create_shuffle_call_fmt` creates: +/// ``` +/// simd_shuffle!(a, a, [0, 1, 2, 3]) +/// ``` +/// +/// `a: int32x4x2_t` creates: +/// ``` +/// a.0 = simd_shuffle!(a.0, a.0, [0, 1, 2, 3]) +/// a.1 = simd_shuffle!(a.1, a.1, [0, 1, 2, 3]) +/// ``` +fn create_shuffle_internal( + variable_name: &String, + type_kind: &TypeKind, + fmt_tuple: fn(variable_name: &String, idx: u32, array_lanes: &String) -> String, + fmt: fn(variable_name: &String, type_kind: &TypeKind, array_lanes: &String) -> String, +) -> Option { + let TypeKind::Vector(vector_type) = type_kind else { + return None; + }; + + let lane_count = vector_type.lanes(); + let array_lanes = create_array(lane_count)?; + + let tuple_count = vector_type.tuple_size().map_or_else(|| 0, |t| t.to_int()); + + if tuple_count > 0 { + let capacity_estimate: usize = + tuple_count as usize * (lane_count as usize + ((variable_name.len() + 2) * 3)); + let mut string_builder = String::with_capacity(capacity_estimate); + + /* .idx = simd_shuffle!(.idx, .idx, []) */ + for idx in 0..tuple_count { + let formatted = fmt_tuple(variable_name, idx, &array_lanes); + string_builder += formatted.as_str(); + } + Some(create_symbol_identifier(&string_builder)) + } else { + /* Generate a list of shuffles for each tuple */ + let expression = fmt(variable_name, type_kind, &array_lanes); + Some(create_symbol_identifier(&expression)) + } +} + +fn create_assigned_tuple_shuffle_call_fmt( + variable_name: &String, + idx: u32, + array_lanes: &String, +) -> String { + format!( + "{variable_name}.{idx} = unsafe {{ simd_shuffle!({variable_name}.{idx}, {variable_name}.{idx}, {array_lanes}) }};\n" + ) +} + +fn create_assigned_shuffle_call_fmt( + variable_name: &String, + type_kind: &TypeKind, + array_lanes: &String, +) -> String { + format!( + "let {variable_name}: {type_kind} = unsafe {{ simd_shuffle!({variable_name}, {variable_name}, {array_lanes}) }}" + ) +} + +fn create_shuffle_call_fmt( + variable_name: &String, + _type_kind: &TypeKind, + array_lanes: &String, +) -> String { + format!("simd_shuffle!({variable_name}, {variable_name}, {array_lanes})") +} + +/// Create a `simd_shuffle!(<...>, [...])` call, where the output is stored +/// in a variable named `variable_name` +pub fn create_assigned_shuffle_call( + variable_name: &String, + type_kind: &TypeKind, +) -> Option { + create_shuffle_internal( + variable_name, + type_kind, + create_assigned_tuple_shuffle_call_fmt, + create_assigned_shuffle_call_fmt, + ) +} + +/// Create a `simd_shuffle!(<...>, [...])` call +pub fn create_shuffle_call(variable_name: &String, type_kind: &TypeKind) -> Option { + create_shuffle_internal( + variable_name, + type_kind, + create_assigned_tuple_shuffle_call_fmt, + create_shuffle_call_fmt, + ) +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/context.rs b/library/stdarch/crates/stdarch-gen-arm/src/context.rs new file mode 100644 index 000000000000..9b8eb8e8b9bf --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/context.rs @@ -0,0 +1,274 @@ +use itertools::Itertools; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +use crate::{ + expression::Expression, + input::{InputSet, InputType}, + intrinsic::{Constraint, Intrinsic, Signature}, + matching::SizeMatchable, + predicate_forms::PredicateForm, + typekinds::{ToRepr, TypeKind}, + wildcards::Wildcard, + wildstring::WildString, +}; + +/// Maximum SVE vector size +const SVE_VECTOR_MAX_SIZE: u32 = 2048; +/// Vector register size +const VECTOR_REG_SIZE: u32 = 128; + +/// Generator result +pub type Result = std::result::Result; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArchitectureSettings { + #[serde(alias = "arch")] + pub arch_name: String, + pub target_feature: Vec, + #[serde(alias = "llvm_prefix")] + pub llvm_link_prefix: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GlobalContext { + pub arch_cfgs: Vec, + #[serde(default)] + pub uses_neon_types: bool, + + /// Should the yaml file automagically generate big endian shuffling + #[serde(default)] + pub auto_big_endian: Option, + + /// Should all LLVM wrappers convert their arguments to a signed type + #[serde(default)] + pub auto_llvm_sign_conversion: bool, +} + +/// Context of an intrinsic group +#[derive(Debug, Clone, Default)] +pub struct GroupContext { + /// LLVM links to target input sets + pub links: HashMap, +} + +#[derive(Debug, Clone, Copy)] +pub enum VariableType { + Argument, + Internal, +} + +#[derive(Debug, Clone)] +pub struct LocalContext { + pub signature: Signature, + + pub input: InputSet, + + pub substitutions: HashMap, + pub variables: HashMap, +} + +impl LocalContext { + pub fn new(input: InputSet, original: &Intrinsic) -> LocalContext { + LocalContext { + signature: original.signature.clone(), + input, + substitutions: HashMap::new(), + variables: HashMap::new(), + } + } + + pub fn provide_type_wildcard(&self, wildcard: &Wildcard) -> Result { + let err = || { + format!( + "provide_type_wildcard() wildcard {{{wildcard}}} not found for {}", + &self.signature.name.to_string() + ) + }; + + /* If the type is already a vector then we can just return the vector */ + let make_neon = |tuple_size| { + move |ty| match ty { + TypeKind::Vector(_) => Ok(ty), + _ => TypeKind::make_vector(ty, false, tuple_size), + } + }; + let make_sve = |tuple_size| move |ty| TypeKind::make_vector(ty, true, tuple_size); + + match wildcard { + Wildcard::Type(idx) => self.input.typekind(*idx).ok_or_else(err), + Wildcard::NEONType(idx, tuple_size, _) => self + .input + .typekind(*idx) + .ok_or_else(|| { + dbg!("{:?}", &self); + err() + }) + .and_then(make_neon(*tuple_size)), + Wildcard::SVEType(idx, tuple_size) => self + .input + .typekind(*idx) + .ok_or_else(err) + .and_then(make_sve(*tuple_size)), + Wildcard::Predicate(idx) => self.input.typekind(*idx).map_or_else( + || { + if idx.is_none() && self.input.types_len() == 1 { + Err(err()) + } else { + Err(format!( + "there is no type at index {} to infer the predicate from", + idx.unwrap_or(0) + )) + } + }, + |ref ty| TypeKind::make_predicate_from(ty), + ), + Wildcard::MaxPredicate => self + .input + .iter() + .filter_map(|arg| arg.typekind()) + .max_by(|x, y| { + x.base_type() + .and_then(|bt| bt.get_size().ok()) + .unwrap_or(0) + .cmp(&y.base_type().and_then(|bt| bt.get_size().ok()).unwrap_or(0)) + }) + .map_or_else( + || Err("there are no types available to infer the predicate from".to_string()), + TypeKind::make_predicate_from, + ), + Wildcard::Scale(w, as_ty) => { + let mut ty = self.provide_type_wildcard(w)?; + if let Some(vty) = ty.vector_mut() { + let base_ty = if let Some(w) = as_ty.wildcard() { + *self.provide_type_wildcard(w)?.base_type().unwrap() + } else { + *as_ty.base_type().unwrap() + }; + vty.cast_base_type_as(base_ty) + } + Ok(ty) + } + _ => Err(err()), + } + } + + pub fn provide_substitution_wildcard(&self, wildcard: &Wildcard) -> Result { + let err = || Err(format!("wildcard {{{wildcard}}} not found")); + + match wildcard { + Wildcard::SizeLiteral(idx) => self.input.typekind(*idx) + .map_or_else(err, |ty| Ok(ty.size_literal())), + Wildcard::Size(idx) => self.input.typekind(*idx) + .map_or_else(err, |ty| Ok(ty.size())), + Wildcard::SizeMinusOne(idx) => self.input.typekind(*idx) + .map_or_else(err, |ty| Ok((ty.size().parse::().unwrap()-1).to_string())), + Wildcard::SizeInBytesLog2(idx) => self.input.typekind(*idx) + .map_or_else(err, |ty| Ok(ty.size_in_bytes_log2())), + Wildcard::NVariant if !self.substitutions.contains_key(wildcard) => Ok(String::new()), + Wildcard::TypeKind(idx, opts) => { + self.input.typekind(*idx) + .map_or_else(err, |ty| { + let literal = if let Some(opts) = opts { + #[allow(clippy::obfuscated_if_else)] + opts.contains(ty.base_type().map(|bt| *bt.kind()).ok_or_else(|| { + format!("cannot retrieve a type literal out of {ty}") + })?) + .then(|| ty.type_kind()) + .unwrap_or_default() + } else { + ty.type_kind() + }; + Ok(literal) + }) + } + Wildcard::PredicateForms(_) => self + .input + .iter() + .find_map(|arg| { + if let InputType::PredicateForm(pf) = arg { + Some(pf.get_suffix().to_string()) + } else { + None + } + }) + .ok_or_else(|| unreachable!("attempting to render a predicate form wildcard, but no predicate form was compiled for it")), + _ => self + .substitutions + .get(wildcard) + .map_or_else(err, |s| Ok(s.clone())), + } + } + + pub fn make_assertion_from_constraint(&self, constraint: &Constraint) -> Result { + match constraint { + Constraint::AnyI32 { + variable, + any_values, + } => { + let where_ex = any_values + .iter() + .map(|value| format!("{variable} == {value}")) + .join(" || "); + Ok(Expression::MacroCall("static_assert".to_string(), where_ex)) + } + Constraint::RangeI32 { + variable, + range: SizeMatchable::Matched(range), + } => Ok(Expression::MacroCall( + "static_assert_range".to_string(), + format!( + "{variable}, {min}, {max}", + min = range.start(), + max = range.end() + ), + )), + Constraint::SVEMaxElems { + variable, + sve_max_elems_type: ty, + } + | Constraint::VecMaxElems { + variable, + vec_max_elems_type: ty, + } => { + if !self.input.is_empty() { + let higher_limit = match constraint { + Constraint::SVEMaxElems { .. } => SVE_VECTOR_MAX_SIZE, + Constraint::VecMaxElems { .. } => VECTOR_REG_SIZE, + _ => unreachable!(), + }; + + let max = ty.base_type() + .map(|ty| ty.get_size()) + .transpose()? + .map_or_else( + || Err(format!("can't make an assertion out of constraint {self:?}: no valid type is present")), + |bitsize| Ok(higher_limit / bitsize - 1))?; + Ok(Expression::MacroCall( + "static_assert_range".to_string(), + format!("{variable}, 0, {max}"), + )) + } else { + Err(format!( + "can't make an assertion out of constraint {self:?}: no types are being used" + )) + } + } + _ => unreachable!("constraints were not built successfully!"), + } + } + + pub fn predicate_form(&self) -> Option<&PredicateForm> { + self.input.iter().find_map(|arg| arg.predicate_form()) + } + + pub fn n_variant_op(&self) -> Option<&WildString> { + self.input.iter().find_map(|arg| arg.n_variant_op()) + } +} + +pub struct Context<'ctx> { + pub local: &'ctx mut LocalContext, + pub group: &'ctx mut GroupContext, + pub global: &'ctx GlobalContext, +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/expression.rs b/library/stdarch/crates/stdarch-gen-arm/src/expression.rs new file mode 100644 index 000000000000..56c94602fff9 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/expression.rs @@ -0,0 +1,618 @@ +use itertools::Itertools; +use lazy_static::lazy_static; +use proc_macro2::{Literal, Punct, Spacing, TokenStream}; +use quote::{ToTokens, TokenStreamExt, format_ident, quote}; +use regex::Regex; +use serde::de::{self, MapAccess, Visitor}; +use serde::{Deserialize, Deserializer, Serialize}; +use std::fmt; +use std::str::FromStr; + +use crate::intrinsic::Intrinsic; +use crate::wildstring::WildStringPart; +use crate::{ + context::{self, Context, VariableType}, + intrinsic::{Argument, LLVMLink, StaticDefinition}, + matching::{MatchKindValues, MatchSizeValues}, + typekinds::{BaseType, BaseTypeKind, TypeKind}, + wildcards::Wildcard, + wildstring::WildString, +}; + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub enum IdentifierType { + Variable, + Symbol, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum LetVariant { + Basic(WildString, Box), + WithType(WildString, TypeKind, Box), + MutWithType(WildString, TypeKind, Box), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FnCall( + /// Function pointer + pub Box, + /// Function arguments + pub Vec, + /// Function turbofish arguments + #[serde(default)] + pub Vec, + /// Function requires unsafe wrapper + #[serde(default)] + pub bool, +); + +impl FnCall { + pub fn new_expression(fn_ptr: Expression, arguments: Vec) -> Expression { + FnCall(Box::new(fn_ptr), arguments, Vec::new(), false).into() + } + + pub fn new_unsafe_expression(fn_ptr: Expression, arguments: Vec) -> Expression { + FnCall(Box::new(fn_ptr), arguments, Vec::new(), true).into() + } + + pub fn is_llvm_link_call(&self, llvm_link_name: &str) -> bool { + self.is_expected_call(llvm_link_name) + } + + pub fn is_target_feature_call(&self) -> bool { + self.is_expected_call("target_feature") + } + + pub fn is_expected_call(&self, fn_call_name: &str) -> bool { + if let Expression::Identifier(fn_name, IdentifierType::Symbol) = self.0.as_ref() { + fn_name.to_string() == fn_call_name + } else { + false + } + } + + pub fn pre_build(&mut self, ctx: &mut Context) -> context::Result { + self.0.pre_build(ctx)?; + self.1 + .iter_mut() + .chain(self.2.iter_mut()) + .try_for_each(|ex| ex.pre_build(ctx)) + } + + pub fn build(&mut self, intrinsic: &Intrinsic, ctx: &mut Context) -> context::Result { + self.0.build(intrinsic, ctx)?; + self.1 + .iter_mut() + .chain(self.2.iter_mut()) + .try_for_each(|ex| ex.build(intrinsic, ctx)) + } +} + +impl ToTokens for FnCall { + fn to_tokens(&self, tokens: &mut TokenStream) { + let FnCall(fn_ptr, arguments, turbofish, _requires_unsafe_wrapper) = self; + + fn_ptr.to_tokens(tokens); + + if !turbofish.is_empty() { + tokens.append_all(quote! {::<#(#turbofish),*>}); + } + + tokens.append_all(quote! { (#(#arguments),*) }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(remote = "Self", deny_unknown_fields)] +pub enum Expression { + /// (Re)Defines a variable + Let(LetVariant), + /// Performs a variable assignment operation + Assign(String, Box), + /// Performs a macro call + MacroCall(String, String), + /// Performs a function call + FnCall(FnCall), + /// Performs a method call. The following: + /// `MethodCall: ["$object", "to_string", []]` + /// is tokenized as: + /// `object.to_string()`. + MethodCall(Box, String, Vec), + /// Symbol identifier name, prepend with a `$` to treat it as a scope variable + /// which engages variable tracking and enables inference. + /// E.g. `my_function_name` for a generic symbol or `$my_variable` for + /// a variable. + Identifier(WildString, IdentifierType), + /// Constant signed integer number expression + IntConstant(i32), + /// Constant floating point number expression + FloatConstant(f32), + /// Constant boolean expression, either `true` or `false` + BoolConstant(bool), + /// Array expression + Array(Vec), + + // complex expressions + /// Makes an LLVM link. + /// + /// It stores the link's function name in the wildcard `{llvm_link}`, for use in + /// subsequent expressions. + LLVMLink(LLVMLink), + /// Casts the given expression to the specified (unchecked) type + CastAs(Box, String), + /// Returns the LLVM `undef` symbol + SvUndef, + /// Multiplication + Multiply(Box, Box), + /// Xor + Xor(Box, Box), + /// Converts the specified constant to the specified type's kind + ConvertConst(TypeKind, i32), + /// Yields the given type in the Rust representation + Type(TypeKind), + + MatchSize(TypeKind, MatchSizeValues>), + MatchKind(TypeKind, MatchKindValues>), +} + +impl Expression { + pub fn pre_build(&mut self, ctx: &mut Context) -> context::Result { + match self { + Self::FnCall(fn_call) => fn_call.pre_build(ctx), + Self::MethodCall(cl_ptr_ex, _, arg_exs) => { + cl_ptr_ex.pre_build(ctx)?; + arg_exs.iter_mut().try_for_each(|ex| ex.pre_build(ctx)) + } + Self::Let( + LetVariant::Basic(_, ex) + | LetVariant::WithType(_, _, ex) + | LetVariant::MutWithType(_, _, ex), + ) => ex.pre_build(ctx), + Self::CastAs(ex, _) => ex.pre_build(ctx), + Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => { + lhs.pre_build(ctx)?; + rhs.pre_build(ctx) + } + Self::MatchSize(match_ty, values) => { + *self = *values.get(match_ty, ctx.local)?.to_owned(); + self.pre_build(ctx) + } + Self::MatchKind(match_ty, values) => { + *self = *values.get(match_ty, ctx.local)?.to_owned(); + self.pre_build(ctx) + } + _ => Ok(()), + } + } + + pub fn build(&mut self, intrinsic: &Intrinsic, ctx: &mut Context) -> context::Result { + match self { + Self::LLVMLink(link) => link.build_and_save(ctx), + Self::Identifier(identifier, id_type) => { + identifier.build_acle(ctx.local)?; + + if let IdentifierType::Variable = id_type { + ctx.local + .variables + .get(&identifier.to_string()) + .map(|_| ()) + .ok_or_else(|| format!("invalid variable {identifier} being referenced")) + } else { + Ok(()) + } + } + Self::FnCall(fn_call) => { + fn_call.build(intrinsic, ctx)?; + + #[allow(clippy::collapsible_if)] + if let Some(llvm_link_name) = ctx.local.substitutions.get(&Wildcard::LLVMLink) { + if fn_call.is_llvm_link_call(llvm_link_name) { + *self = intrinsic + .llvm_link() + .expect("got LLVMLink wildcard without a LLVM link in `compose`") + .apply_conversions_to_call(fn_call.clone(), ctx)? + } + } + + Ok(()) + } + Self::MethodCall(cl_ptr_ex, _, arg_exs) => { + cl_ptr_ex.build(intrinsic, ctx)?; + arg_exs + .iter_mut() + .try_for_each(|ex| ex.build(intrinsic, ctx)) + } + Self::Let(variant) => { + let (var_name, ex, ty) = match variant { + LetVariant::Basic(var_name, ex) => (var_name, ex, None), + LetVariant::WithType(var_name, ty, ex) + | LetVariant::MutWithType(var_name, ty, ex) => { + if let Some(w) = ty.wildcard() { + ty.populate_wildcard(ctx.local.provide_type_wildcard(w)?)?; + } + (var_name, ex, Some(ty.to_owned())) + } + }; + + var_name.build_acle(ctx.local)?; + ctx.local.variables.insert( + var_name.to_string(), + ( + ty.unwrap_or_else(|| TypeKind::Custom("unknown".to_string())), + VariableType::Internal, + ), + ); + ex.build(intrinsic, ctx) + } + Self::CastAs(ex, _) => ex.build(intrinsic, ctx), + Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => { + lhs.build(intrinsic, ctx)?; + rhs.build(intrinsic, ctx) + } + Self::ConvertConst(ty, num) => { + if let Some(w) = ty.wildcard() { + *ty = ctx.local.provide_type_wildcard(w)? + } + + if let Some(BaseType::Sized(BaseTypeKind::Float, _)) = ty.base() { + *self = Expression::FloatConstant(*num as f32) + } else { + *self = Expression::IntConstant(*num) + } + Ok(()) + } + Self::Type(ty) => { + if let Some(w) = ty.wildcard() { + *ty = ctx.local.provide_type_wildcard(w)? + } + + Ok(()) + } + _ => Ok(()), + } + } + + /// True if the expression requires an `unsafe` context in a safe function. + /// + /// The classification is somewhat fuzzy, based on actual usage (e.g. empirical function names) + /// rather than a full parse. This is a reasonable approach because mistakes here will usually + /// be caught at build time: + /// + /// - Missing an `unsafe` is a build error. + /// - An unnecessary `unsafe` is a warning, made into an error by the CI's `-D warnings`. + /// + /// This **panics** if it encounters an expression that shouldn't appear in a safe function at + /// all (such as `SvUndef`). + pub fn requires_unsafe_wrapper(&self, ctx_fn: &str) -> bool { + match self { + // The call will need to be unsafe, but the declaration does not. + Self::LLVMLink(..) => false, + // Identifiers, literals and type names are never unsafe. + Self::Identifier(..) => false, + Self::IntConstant(..) => false, + Self::FloatConstant(..) => false, + Self::BoolConstant(..) => false, + Self::Type(..) => false, + Self::ConvertConst(..) => false, + // Nested structures that aren't inherently unsafe, but could contain other expressions + // that might be. + Self::Assign(_var, exp) => exp.requires_unsafe_wrapper(ctx_fn), + Self::Let( + LetVariant::Basic(_, exp) + | LetVariant::WithType(_, _, exp) + | LetVariant::MutWithType(_, _, exp), + ) => exp.requires_unsafe_wrapper(ctx_fn), + Self::Array(exps) => exps.iter().any(|exp| exp.requires_unsafe_wrapper(ctx_fn)), + Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => { + lhs.requires_unsafe_wrapper(ctx_fn) || rhs.requires_unsafe_wrapper(ctx_fn) + } + Self::CastAs(exp, _ty) => exp.requires_unsafe_wrapper(ctx_fn), + // Functions and macros can be unsafe, but can also contain other expressions. + Self::FnCall(FnCall(fn_exp, args, turbo_args, requires_unsafe_wrapper)) => { + let fn_name = fn_exp.to_string(); + fn_exp.requires_unsafe_wrapper(ctx_fn) + || fn_name.starts_with("_sv") + || fn_name.starts_with("simd_") + || fn_name.ends_with("transmute") + || args.iter().any(|exp| exp.requires_unsafe_wrapper(ctx_fn)) + || turbo_args + .iter() + .any(|exp| exp.requires_unsafe_wrapper(ctx_fn)) + || *requires_unsafe_wrapper + } + Self::MethodCall(exp, fn_name, args) => match fn_name.as_str() { + // `as_signed` and `as_unsigned` are unsafe because they're trait methods with + // target features to allow use on feature-dependent types (such as SVE vectors). + // We can safely wrap them here. + "as_signed" => true, + "as_unsigned" => true, + _ => { + exp.requires_unsafe_wrapper(ctx_fn) + || args.iter().any(|exp| exp.requires_unsafe_wrapper(ctx_fn)) + } + }, + // We only use macros to check const generics (using static assertions). + Self::MacroCall(_name, _args) => false, + // Materialising uninitialised values is always unsafe, and we avoid it in safe + // functions. + Self::SvUndef => panic!("Refusing to wrap unsafe SvUndef in safe function '{ctx_fn}'."), + // Variants that aren't tokenised. We shouldn't encounter these here. + Self::MatchKind(..) => { + unimplemented!("The unsafety of {self:?} cannot be determined in '{ctx_fn}'.") + } + Self::MatchSize(..) => { + unimplemented!("The unsafety of {self:?} cannot be determined in '{ctx_fn}'.") + } + } + } + + /// Determine if an expression is a `static_assert<...>` function call. + pub fn is_static_assert(&self) -> bool { + match self { + Expression::FnCall(fn_call) => match fn_call.0.as_ref() { + Expression::Identifier(wild_string, _) => { + if let WildStringPart::String(function_name) = &wild_string.0[0] { + function_name.starts_with("static_assert") + } else { + false + } + } + _ => panic!("Badly defined function call: {fn_call:?}"), + }, + _ => false, + } + } + + /// Determine if an espression is a LLVM binding + pub fn is_llvm_link(&self) -> bool { + matches!(self, Expression::LLVMLink(_)) + } +} + +impl FromStr for Expression { + type Err = String; + + fn from_str(s: &str) -> Result { + lazy_static! { + static ref MACRO_RE: Regex = + Regex::new(r"^(?P[\w\d_]+)!\((?P.*?)\);?$").unwrap(); + } + + if s == "SvUndef" { + Ok(Expression::SvUndef) + } else if MACRO_RE.is_match(s) { + let c = MACRO_RE.captures(s).unwrap(); + let ex = c["ex"].to_string(); + let _: TokenStream = ex + .parse() + .map_err(|e| format!("could not parse macro call expression: {e:#?}"))?; + Ok(Expression::MacroCall(c["name"].to_string(), ex)) + } else { + let (s, id_type) = if let Some(varname) = s.strip_prefix('$') { + (varname, IdentifierType::Variable) + } else { + (s, IdentifierType::Symbol) + }; + let identifier = s.trim().parse()?; + Ok(Expression::Identifier(identifier, id_type)) + } + } +} + +impl From for Expression { + fn from(fn_call: FnCall) -> Self { + Expression::FnCall(fn_call) + } +} + +impl From for Expression { + fn from(ws: WildString) -> Self { + Expression::Identifier(ws, IdentifierType::Symbol) + } +} + +impl From<&Argument> for Expression { + fn from(a: &Argument) -> Self { + Expression::Identifier(a.name.to_owned(), IdentifierType::Variable) + } +} + +impl TryFrom<&StaticDefinition> for Expression { + type Error = String; + + fn try_from(sd: &StaticDefinition) -> Result { + match sd { + StaticDefinition::Constant(imm) => Ok(imm.into()), + StaticDefinition::Generic(t) => t.parse(), + } + } +} + +impl fmt::Display for Expression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Identifier(identifier, kind) => { + write!( + f, + "{}{identifier}", + matches!(kind, IdentifierType::Variable) + .then_some("$") + .unwrap_or_default() + ) + } + Self::MacroCall(name, expression) => { + write!(f, "{name}!({expression})") + } + _ => Err(fmt::Error), + } + } +} + +impl ToTokens for Expression { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + Self::Let(LetVariant::Basic(var_name, exp)) => { + let var_ident = format_ident!("{}", var_name.to_string()); + tokens.append_all(quote! { let #var_ident = #exp }) + } + Self::Let(LetVariant::WithType(var_name, ty, exp)) => { + let var_ident = format_ident!("{}", var_name.to_string()); + tokens.append_all(quote! { let #var_ident: #ty = #exp }) + } + Self::Let(LetVariant::MutWithType(var_name, ty, exp)) => { + let var_ident = format_ident!("{}", var_name.to_string()); + tokens.append_all(quote! { let mut #var_ident: #ty = #exp }) + } + Self::Assign(var_name, exp) => { + /* If we are dereferencing a variable to assign a value \ + * the 'format_ident!' macro does not like the asterix */ + let var_name_str: &str; + + if let Some(ch) = var_name.chars().nth(0) { + /* Manually append the asterix and split out the rest of + * the variable name */ + if ch == '*' { + tokens.append(Punct::new('*', Spacing::Alone)); + var_name_str = &var_name[1..var_name.len()]; + } else { + var_name_str = var_name.as_str(); + } + } else { + /* Should not be reached as you cannot have a variable + * without a name */ + panic!("Invalid variable name, must be at least one character") + } + + let var_ident = format_ident!("{}", var_name_str); + tokens.append_all(quote! { #var_ident = #exp }) + } + Self::MacroCall(name, ex) => { + let name = format_ident!("{name}"); + let ex: TokenStream = ex.parse().unwrap(); + tokens.append_all(quote! { #name!(#ex) }) + } + Self::FnCall(fn_call) => fn_call.to_tokens(tokens), + Self::MethodCall(exp, fn_name, args) => { + let fn_ident = format_ident!("{}", fn_name); + tokens.append_all(quote! { #exp.#fn_ident(#(#args),*) }) + } + Self::Identifier(identifier, _) => { + assert!( + !identifier.has_wildcards(), + "expression {self:#?} was not built before calling to_tokens" + ); + identifier + .to_string() + .parse::() + .unwrap_or_else(|_| panic!("invalid syntax: {self:?}")) + .to_tokens(tokens); + } + Self::IntConstant(n) => tokens.append(Literal::i32_unsuffixed(*n)), + Self::FloatConstant(n) => tokens.append(Literal::f32_unsuffixed(*n)), + Self::BoolConstant(true) => tokens.append(format_ident!("true")), + Self::BoolConstant(false) => tokens.append(format_ident!("false")), + Self::Array(vec) => tokens.append_all(quote! { [ #(#vec),* ] }), + Self::LLVMLink(link) => link.to_tokens(tokens), + Self::CastAs(ex, ty) => { + let ty: TokenStream = ty.parse().expect("invalid syntax"); + tokens.append_all(quote! { #ex as #ty }) + } + Self::SvUndef => tokens.append_all(quote! { simd_reinterpret(()) }), + Self::Multiply(lhs, rhs) => tokens.append_all(quote! { #lhs * #rhs }), + Self::Xor(lhs, rhs) => tokens.append_all(quote! { #lhs ^ #rhs }), + Self::Type(ty) => ty.to_tokens(tokens), + _ => unreachable!("{self:?} cannot be converted to tokens."), + } + } +} + +impl Serialize for Expression { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + Self::IntConstant(v) => serializer.serialize_i32(*v), + Self::FloatConstant(v) => serializer.serialize_f32(*v), + Self::BoolConstant(v) => serializer.serialize_bool(*v), + Self::Identifier(..) => serializer.serialize_str(&self.to_string()), + Self::MacroCall(..) => serializer.serialize_str(&self.to_string()), + _ => Expression::serialize(self, serializer), + } + } +} + +impl<'de> Deserialize<'de> for Expression { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct CustomExpressionVisitor; + + impl<'de> Visitor<'de> for CustomExpressionVisitor { + type Value = Expression; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("integer, float, boolean, string or map") + } + + fn visit_bool(self, v: bool) -> Result + where + E: de::Error, + { + Ok(Expression::BoolConstant(v)) + } + + fn visit_i64(self, v: i64) -> Result + where + E: de::Error, + { + Ok(Expression::IntConstant(v as i32)) + } + + fn visit_u64(self, v: u64) -> Result + where + E: de::Error, + { + Ok(Expression::IntConstant(v as i32)) + } + + fn visit_f64(self, v: f64) -> Result + where + E: de::Error, + { + Ok(Expression::FloatConstant(v as f32)) + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + FromStr::from_str(value).map_err(de::Error::custom) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: de::SeqAccess<'de>, + { + let arr = std::iter::from_fn(|| seq.next_element::().transpose()) + .try_collect()?; + Ok(Expression::Array(arr)) + } + + fn visit_map(self, map: M) -> Result + where + M: MapAccess<'de>, + { + // `MapAccessDeserializer` is a wrapper that turns a `MapAccess` + // into a `Deserializer`, allowing it to be used as the input to T's + // `Deserialize` implementation. T then deserializes itself using + // the entries from the map visitor. + Expression::deserialize(de::value::MapAccessDeserializer::new(map)) + } + } + + deserializer.deserialize_any(CustomExpressionVisitor) + } +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs b/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs new file mode 100644 index 000000000000..26c156ae178a --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs @@ -0,0 +1,231 @@ +use std::fmt::{self}; + +/* This file is acting as a bridge between the old neon types and how they + * have a fairly complex way of picking suffixes and the new world. If possible + * it would be good to clean this up. At least it is self contained and the + * logic simple */ +use crate::typekinds::{BaseType, BaseTypeKind, TypeKind, VectorType}; +use serde::{Deserialize, Serialize}; + +use std::str::FromStr; + +#[allow(clippy::enum_variant_names)] +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Deserialize, Serialize)] +pub enum SuffixKind { + Normal, + Base, + NoQ, + NSuffix, + NoQNSuffix, + DupNox, + Dup, + /* Get the number of lanes or panic if there are not any Lanes */ + Lane, + Rot270, + Rot270Lane, + Rot270LaneQ, + Rot180, + Rot180Lane, + Rot180LaneQ, + Rot90, + Rot90Lane, + Rot90LaneQ, + /* Force the type to be unsigned */ + Unsigned, + Tuple, + NoX, + BaseByteSize, + LaneNoX, + LaneQNoX, +} + +pub fn type_to_size(str_type: &str) -> i32 { + match str_type { + "int8x8_t" | "int8x16_t" | "i8" | "s8" | "uint8x8_t" | "uint8x16_t" | "u8" + | "poly8x8_t" | "poly8x16_t" => 8, + "int16x4_t" | "int16x8_t" | "i16" | "s16" | "uint16x4_t" | "uint16x8_t" | "u16" + | "float16x4_t" | "float16x8_t" | "_f16" | "poly16x4_t" | "poly16x8_t" => 16, + "int32x2_t" | "int32x4_t" | "i32" | "s32" | "uint32x2_t" | "uint32x4_t" | "u32" + | "float32x2_t" | "float32x4_t" | "f32" => 32, + "int64x1_t" | "int64x2_t" | "i64" | "s64" | "uint64x1_t" | "uint64x2_t" | "u64" + | "float64x1_t" | "float64x2_t" | "f64" | "poly64x1_t" | "poly64x2_t" | "p64" => 64, + "p128" => 128, + _ => panic!("unknown type: {str_type}"), + } +} + +fn neon_get_base_and_char(ty: &VectorType) -> (u32, char, bool) { + let lanes = ty.lanes(); + match ty.base_type() { + BaseType::Sized(BaseTypeKind::Float, size) => (*size, 'f', *size * lanes == 128), + BaseType::Sized(BaseTypeKind::Int, size) => (*size, 's', *size * lanes == 128), + BaseType::Sized(BaseTypeKind::UInt, size) => (*size, 'u', *size * lanes == 128), + BaseType::Sized(BaseTypeKind::Poly, size) => (*size, 'p', *size * lanes == 128), + _ => panic!("Unhandled {ty:?}"), + } +} + +/* @TODO + * for the chained enum types we can safely delete them as we can index the + * types array */ +pub fn make_neon_suffix(type_kind: TypeKind, suffix_kind: SuffixKind) -> String { + match type_kind { + TypeKind::Vector(ty) => { + let tuple_size = ty.tuple_size().map_or(0, |t| t.to_int()); + let (base_size, prefix_char, requires_q) = neon_get_base_and_char(&ty); + let prefix_q = if requires_q { "q" } else { "" }; + let lanes = ty.lanes(); + match suffix_kind { + SuffixKind::Normal => { + let mut str_suffix: String = format!("{prefix_q}_{prefix_char}{base_size}"); + if tuple_size > 0 { + str_suffix.push_str("_x"); + str_suffix.push_str(tuple_size.to_string().as_str()); + } + str_suffix + } + SuffixKind::NSuffix => { + format!("{prefix_q}_n_{prefix_char}{base_size}") + } + + SuffixKind::NoQ => format!("_{prefix_char}{base_size}"), + SuffixKind::NoQNSuffix => format!("_n{prefix_char}{base_size}"), + + SuffixKind::Unsigned => { + let t = type_kind.to_string(); + if t.starts_with("u") { + return t; + } + format!("u{t}") + } + SuffixKind::Lane => { + if lanes == 0 { + panic!("type {type_kind} has no lanes!") + } else { + format!("{lanes}") + } + } + SuffixKind::Tuple => { + if tuple_size == 0 { + panic!("type {type_kind} has no lanes!") + } else { + format!("{tuple_size}") + } + } + SuffixKind::Base => base_size.to_string(), + SuffixKind::NoX => { + format!("{prefix_q}_{prefix_char}{base_size}") + } + SuffixKind::Dup => { + let mut str_suffix: String = format!("{prefix_q}_dup_{prefix_char}{base_size}"); + if tuple_size > 0 { + str_suffix.push_str("_x"); + str_suffix.push_str(tuple_size.to_string().as_str()); + } + str_suffix + } + SuffixKind::DupNox => { + format!("{prefix_q}_dup_{prefix_char}{base_size}") + } + SuffixKind::LaneNoX => { + format!("{prefix_q}_lane_{prefix_char}{base_size}") + } + SuffixKind::LaneQNoX => { + format!("{prefix_q}_laneq_{prefix_char}{base_size}") + } + SuffixKind::Rot270 => { + format!("{prefix_q}_rot270_{prefix_char}{base_size}") + } + SuffixKind::Rot270Lane => { + format!("{prefix_q}_rot270_lane_{prefix_char}{base_size}") + } + SuffixKind::Rot270LaneQ => { + format!("{prefix_q}_rot270_laneq_{prefix_char}{base_size}") + } + SuffixKind::Rot180 => { + format!("{prefix_q}_rot180_{prefix_char}{base_size}") + } + SuffixKind::Rot180Lane => { + format!("{prefix_q}_rot180_lane_{prefix_char}{base_size}") + } + SuffixKind::Rot180LaneQ => { + format!("{prefix_q}_rot180_laneq_{prefix_char}{base_size}") + } + SuffixKind::Rot90 => { + format!("{prefix_q}_rot90_{prefix_char}{base_size}") + } + SuffixKind::Rot90Lane => { + format!("{prefix_q}_rot90_lane_{prefix_char}{base_size}") + } + SuffixKind::Rot90LaneQ => { + format!("{prefix_q}_rot90_laneq_{prefix_char}{base_size}") + } + SuffixKind::BaseByteSize => format!("{}", base_size / 8), + } + } + _ => panic!("Cannot only make neon vector types suffixed"), + } +} + +impl FromStr for SuffixKind { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "no" => Ok(SuffixKind::Normal), + "noq" => Ok(SuffixKind::NoQ), + "N" => Ok(SuffixKind::NSuffix), + "noq_N" => Ok(SuffixKind::NoQNSuffix), + "dup_nox" => Ok(SuffixKind::DupNox), + "dup" => Ok(SuffixKind::Dup), + "lane" => Ok(SuffixKind::Lane), + "base" => Ok(SuffixKind::Base), + "tuple" => Ok(SuffixKind::Tuple), + "rot270" => Ok(SuffixKind::Rot270), + "rot270_lane" => Ok(SuffixKind::Rot270Lane), + "rot270_laneq" => Ok(SuffixKind::Rot270LaneQ), + "rot90" => Ok(SuffixKind::Rot90), + "rot90_lane" => Ok(SuffixKind::Rot90Lane), + "rot90_laneq" => Ok(SuffixKind::Rot90LaneQ), + "rot180" => Ok(SuffixKind::Rot180), + "rot180_lane" => Ok(SuffixKind::Rot180LaneQ), + "rot180_laneq" => Ok(SuffixKind::Rot180LaneQ), + "u" => Ok(SuffixKind::Unsigned), + "nox" => Ok(SuffixKind::NoX), + "base_byte_size" => Ok(SuffixKind::BaseByteSize), + "lane_nox" => Ok(SuffixKind::LaneNoX), + "laneq_nox" => Ok(SuffixKind::LaneQNoX), + _ => Err(format!("unknown suffix type: {s}")), + } + } +} + +impl fmt::Display for SuffixKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SuffixKind::Normal => write!(f, "normal"), + SuffixKind::NoQ => write!(f, "NoQ"), + SuffixKind::NSuffix => write!(f, "NSuffix"), + SuffixKind::NoQNSuffix => write!(f, "NoQNSuffix"), + SuffixKind::DupNox => write!(f, "DupNox"), + SuffixKind::Dup => write!(f, "Dup",), + SuffixKind::Lane => write!(f, "Lane"), + SuffixKind::LaneNoX => write!(f, "LaneNoX"), + SuffixKind::LaneQNoX => write!(f, "LaneQNoX"), + SuffixKind::Base => write!(f, "Base"), + SuffixKind::Rot270 => write!(f, "Rot270",), + SuffixKind::Rot270Lane => write!(f, "Rot270Lane"), + SuffixKind::Rot270LaneQ => write!(f, "Rot270LaneQ"), + SuffixKind::Rot90 => write!(f, "Rot90",), + SuffixKind::Rot90Lane => write!(f, "Rot90Lane"), + SuffixKind::Rot90LaneQ => write!(f, "Rot90LaneQ"), + SuffixKind::Rot180 => write!(f, "Rot180",), + SuffixKind::Rot180Lane => write!(f, "Rot180Lane"), + SuffixKind::Rot180LaneQ => write!(f, "Rot180LaneQ"), + SuffixKind::Unsigned => write!(f, "Unsigned"), + SuffixKind::Tuple => write!(f, "Tuple"), + SuffixKind::NoX => write!(f, "NoX"), + SuffixKind::BaseByteSize => write!(f, "BaseByteSize"), + } + } +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/input.rs b/library/stdarch/crates/stdarch-gen-arm/src/input.rs new file mode 100644 index 000000000000..adefbf3215b5 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/input.rs @@ -0,0 +1,433 @@ +use itertools::Itertools; +use serde::{Deserialize, Deserializer, Serialize, de}; + +use crate::{ + context::{self, GlobalContext}, + intrinsic::Intrinsic, + predicate_forms::{PredicateForm, PredicationMask, PredicationMethods}, + typekinds::TypeKind, + wildstring::WildString, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(untagged)] +pub enum InputType { + /// PredicateForm variant argument + #[serde(skip)] // Predicate forms have their own dedicated deserialization field. Skip. + PredicateForm(PredicateForm), + /// Operand from which to generate an N variant + #[serde(skip)] + NVariantOp(Option), + /// TypeKind variant argument + Type(TypeKind), +} + +impl InputType { + /// Optionally unwraps as a PredicateForm. + pub fn predicate_form(&self) -> Option<&PredicateForm> { + match self { + InputType::PredicateForm(pf) => Some(pf), + _ => None, + } + } + + /// Optionally unwraps as a mutable PredicateForm + pub fn predicate_form_mut(&mut self) -> Option<&mut PredicateForm> { + match self { + InputType::PredicateForm(pf) => Some(pf), + _ => None, + } + } + + /// Optionally unwraps as a TypeKind. + pub fn typekind(&self) -> Option<&TypeKind> { + match self { + InputType::Type(ty) => Some(ty), + _ => None, + } + } + + /// Optionally unwraps as a NVariantOp + pub fn n_variant_op(&self) -> Option<&WildString> { + match self { + InputType::NVariantOp(Some(op)) => Some(op), + _ => None, + } + } +} + +impl PartialOrd for InputType { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for InputType { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + use std::cmp::Ordering::*; + + match (self, other) { + (InputType::PredicateForm(pf1), InputType::PredicateForm(pf2)) => pf1.cmp(pf2), + (InputType::Type(ty1), InputType::Type(ty2)) => ty1.cmp(ty2), + + (InputType::NVariantOp(None), InputType::NVariantOp(Some(..))) => Less, + (InputType::NVariantOp(Some(..)), InputType::NVariantOp(None)) => Greater, + (InputType::NVariantOp(_), InputType::NVariantOp(_)) => Equal, + + (InputType::Type(..), InputType::PredicateForm(..)) => Less, + (InputType::PredicateForm(..), InputType::Type(..)) => Greater, + + (InputType::Type(..), InputType::NVariantOp(..)) => Less, + (InputType::NVariantOp(..), InputType::Type(..)) => Greater, + + (InputType::PredicateForm(..), InputType::NVariantOp(..)) => Less, + (InputType::NVariantOp(..), InputType::PredicateForm(..)) => Greater, + } + } +} + +mod many_or_one { + use serde::{Deserialize, Serialize, de::Deserializer, ser::Serializer}; + + pub fn serialize(vec: &Vec, serializer: S) -> Result + where + T: Serialize, + S: Serializer, + { + if vec.len() == 1 { + vec.first().unwrap().serialize(serializer) + } else { + vec.serialize(serializer) + } + } + + pub fn deserialize<'de, T, D>(deserializer: D) -> Result, D::Error> + where + T: Deserialize<'de>, + D: Deserializer<'de>, + { + #[derive(Debug, Clone, Serialize, Deserialize)] + #[serde(untagged)] + enum ManyOrOne { + Many(Vec), + One(T), + } + + match ManyOrOne::deserialize(deserializer)? { + ManyOrOne::Many(vec) => Ok(vec), + ManyOrOne::One(val) => Ok(vec![val]), + } + } +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +pub struct InputSet(#[serde(with = "many_or_one")] Vec); + +impl InputSet { + pub fn get(&self, idx: usize) -> Option<&InputType> { + self.0.get(idx) + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn iter(&self) -> impl Iterator + '_ { + self.0.iter() + } + + pub fn iter_mut(&mut self) -> impl Iterator + '_ { + self.0.iter_mut() + } + + pub fn into_iter(self) -> impl Iterator + Clone { + self.0.into_iter() + } + + pub fn types_len(&self) -> usize { + self.iter().filter_map(|arg| arg.typekind()).count() + } + + pub fn typekind(&self, idx: Option) -> Option { + let types_len = self.types_len(); + self.get(idx.unwrap_or(0)).and_then(move |arg: &InputType| { + if (idx.is_none() && types_len != 1) || (idx.is_some() && types_len == 1) { + None + } else { + arg.typekind().cloned() + } + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct InputSetEntry(#[serde(with = "many_or_one")] Vec); + +impl InputSetEntry { + pub fn new(input: Vec) -> Self { + Self(input) + } + + pub fn get(&self, idx: usize) -> Option<&InputSet> { + self.0.get(idx) + } +} + +fn validate_types<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let v: Vec = Vec::deserialize(deserializer)?; + + let mut it = v.iter(); + if let Some(first) = it.next() { + it.try_fold(first, |last, cur| { + if last.0.len() == cur.0.len() { + Ok(cur) + } else { + Err("the length of the InputSets and the product lists must match".to_string()) + } + }) + .map_err(de::Error::custom)?; + } + + Ok(v) +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct IntrinsicInput { + #[serde(default)] + #[serde(deserialize_with = "validate_types")] + pub types: Vec, + + #[serde(flatten)] + pub predication_methods: PredicationMethods, + + /// Generates a _n variant where the specified operand is a primitive type + /// that requires conversion to an SVE one. The `{_n}` wildcard is required + /// in the intrinsic's name, otherwise an error will be thrown. + #[serde(default)] + pub n_variant_op: WildString, +} + +impl IntrinsicInput { + /// Extracts all the possible variants as an iterator. + pub fn variants( + &self, + intrinsic: &Intrinsic, + ) -> context::Result + '_> { + let mut top_product = vec![]; + + if !self.types.is_empty() { + top_product.push( + self.types + .iter() + .flat_map(|ty_in| { + ty_in + .0 + .iter() + .map(|v| v.clone().into_iter()) + .multi_cartesian_product() + }) + .collect_vec(), + ) + } + + if let Ok(mask) = PredicationMask::try_from(&intrinsic.signature.name) { + top_product.push( + PredicateForm::compile_list(&mask, &self.predication_methods)? + .into_iter() + .map(|pf| vec![InputType::PredicateForm(pf)]) + .collect_vec(), + ) + } + + if !self.n_variant_op.is_empty() { + top_product.push(vec![ + vec![InputType::NVariantOp(None)], + vec![InputType::NVariantOp(Some(self.n_variant_op.to_owned()))], + ]) + } + + let it = top_product + .into_iter() + .map(|v| v.into_iter()) + .multi_cartesian_product() + .filter(|set| !set.is_empty()) + .map(|set| InputSet(set.into_iter().flatten().collect_vec())); + Ok(it) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GeneratorInput { + #[serde(flatten)] + pub ctx: GlobalContext, + pub intrinsics: Vec, +} + +#[cfg(test)] +mod tests { + use crate::{ + input::*, + predicate_forms::{DontCareMethod, ZeroingMethod}, + }; + + #[test] + fn test_empty() { + let str = r#"types: []"#; + let input: IntrinsicInput = serde_yaml::from_str(str).expect("failed to parse"); + let mut variants = input.variants(&Intrinsic::default()).unwrap().into_iter(); + assert_eq!(variants.next(), None); + } + + #[test] + fn test_product() { + let str = r#"types: +- [f64, f32] +- [i64, [f64, f32]] +"#; + let input: IntrinsicInput = serde_yaml::from_str(str).expect("failed to parse"); + let mut intrinsic = Intrinsic::default(); + intrinsic.signature.name = "test_intrinsic{_mx}".parse().unwrap(); + let mut variants = input.variants(&intrinsic).unwrap().into_iter(); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("f64".parse().unwrap()), + InputType::Type("f32".parse().unwrap()), + InputType::PredicateForm(PredicateForm::Merging), + ])) + ); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("f64".parse().unwrap()), + InputType::Type("f32".parse().unwrap()), + InputType::PredicateForm(PredicateForm::DontCare(DontCareMethod::AsMerging)), + ])) + ); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("i64".parse().unwrap()), + InputType::Type("f64".parse().unwrap()), + InputType::PredicateForm(PredicateForm::Merging), + ])) + ); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("i64".parse().unwrap()), + InputType::Type("f64".parse().unwrap()), + InputType::PredicateForm(PredicateForm::DontCare(DontCareMethod::AsMerging)), + ])) + ); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("i64".parse().unwrap()), + InputType::Type("f32".parse().unwrap()), + InputType::PredicateForm(PredicateForm::Merging), + ])) + ); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("i64".parse().unwrap()), + InputType::Type("f32".parse().unwrap()), + InputType::PredicateForm(PredicateForm::DontCare(DontCareMethod::AsMerging)), + ])), + ); + assert_eq!(variants.next(), None); + } + + #[test] + fn test_n_variant() { + let str = r#"types: +- [f64, f32] +n_variant_op: op2 +"#; + let input: IntrinsicInput = serde_yaml::from_str(str).expect("failed to parse"); + let mut variants = input.variants(&Intrinsic::default()).unwrap().into_iter(); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("f64".parse().unwrap()), + InputType::Type("f32".parse().unwrap()), + InputType::NVariantOp(None), + ])) + ); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("f64".parse().unwrap()), + InputType::Type("f32".parse().unwrap()), + InputType::NVariantOp(Some("op2".parse().unwrap())), + ])) + ); + assert_eq!(variants.next(), None) + } + + #[test] + fn test_invalid_length() { + let str = r#"types: [i32, [[u64], [u32]]]"#; + serde_yaml::from_str::(str).expect_err("failure expected"); + } + + #[test] + fn test_invalid_predication() { + let str = "types: []"; + let input: IntrinsicInput = serde_yaml::from_str(str).expect("failed to parse"); + let mut intrinsic = Intrinsic::default(); + intrinsic.signature.name = "test_intrinsic{_mxz}".parse().unwrap(); + input + .variants(&intrinsic) + .map(|v| v.collect_vec()) + .expect_err("failure expected"); + } + + #[test] + fn test_invalid_predication_mask() { + "test_intrinsic{_mxy}" + .parse::() + .expect_err("failure expected"); + "test_intrinsic{_}" + .parse::() + .expect_err("failure expected"); + } + + #[test] + fn test_zeroing_predication() { + let str = r#"types: [i64] +zeroing_method: { drop: inactive }"#; + let input: IntrinsicInput = serde_yaml::from_str(str).expect("failed to parse"); + let mut intrinsic = Intrinsic::default(); + intrinsic.signature.name = "test_intrinsic{_mxz}".parse().unwrap(); + let mut variants = input.variants(&intrinsic).unwrap(); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("i64".parse().unwrap()), + InputType::PredicateForm(PredicateForm::Merging), + ])) + ); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("i64".parse().unwrap()), + InputType::PredicateForm(PredicateForm::DontCare(DontCareMethod::AsZeroing)), + ])) + ); + assert_eq!( + variants.next(), + Some(InputSet(vec![ + InputType::Type("i64".parse().unwrap()), + InputType::PredicateForm(PredicateForm::Zeroing(ZeroingMethod::Drop { + drop: "inactive".parse().unwrap() + })), + ])) + ); + assert_eq!(variants.next(), None) + } +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs new file mode 100644 index 000000000000..efaa9e141889 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs @@ -0,0 +1,1851 @@ +use itertools::Itertools; +use proc_macro2::{Delimiter, Group, Punct, Spacing, TokenStream}; +use quote::{ToTokens, TokenStreamExt, format_ident, quote}; +use serde::{Deserialize, Serialize}; +use serde_with::{DeserializeFromStr, SerializeDisplay}; +use std::collections::{HashMap, HashSet}; +use std::fmt::{self}; +use std::num::ParseIntError; +use std::ops::RangeInclusive; +use std::str::FromStr; + +use crate::assert_instr::InstructionAssertionsForBaseType; +use crate::big_endian::{ + create_assigned_shuffle_call, create_let_variable, create_mut_let_variable, + create_shuffle_call, create_symbol_identifier, make_variable_mutable, type_has_tuple, +}; +use crate::context::{GlobalContext, GroupContext}; +use crate::input::{InputSet, InputSetEntry}; +use crate::predicate_forms::{DontCareMethod, PredicateForm, PredicationMask, ZeroingMethod}; +use crate::{ + assert_instr::InstructionAssertionMethod, + context::{self, ArchitectureSettings, Context, LocalContext, VariableType}, + expression::{Expression, FnCall, IdentifierType}, + fn_suffix::{SuffixKind, type_to_size}, + input::IntrinsicInput, + matching::{KindMatchable, SizeMatchable}, + typekinds::*, + wildcards::Wildcard, + wildstring::WildString, +}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum SubstitutionType { + MatchSize(SizeMatchable), + MatchKind(KindMatchable), +} + +impl SubstitutionType { + pub fn get(&mut self, ctx: &LocalContext) -> context::Result { + match self { + Self::MatchSize(smws) => { + smws.perform_match(ctx)?; + Ok(smws.as_ref().clone()) + } + Self::MatchKind(kmws) => { + kmws.perform_match(ctx)?; + Ok(kmws.as_ref().clone()) + } + } + } +} + +/// Mutability level +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum AccessLevel { + /// Immutable + R, + /// Mutable + RW, +} + +/// Function signature argument. +/// +/// Prepend the `mut` keyword for a mutable argument. Separate argument name +/// and type with a semicolon `:`. Usage examples: +/// - Mutable argument: `mut arg1: *u64` +/// - Immutable argument: `arg2: u32` +#[derive(Debug, Clone, SerializeDisplay, DeserializeFromStr)] +pub struct Argument { + /// Argument name + pub name: WildString, + /// Mutability level + pub rw: AccessLevel, + /// Argument type + pub kind: TypeKind, +} + +impl Argument { + pub fn populate_variables(&self, vars: &mut HashMap) { + vars.insert( + self.name.to_string(), + (self.kind.clone(), VariableType::Argument), + ); + } +} + +impl FromStr for Argument { + type Err = String; + + fn from_str(s: &str) -> Result { + let mut it = s.splitn(2, ':').map(::trim); + if let Some(mut lhs) = it.next().map(|s| s.split_whitespace()) { + let lhs_len = lhs.clone().count(); + match (lhs_len, lhs.next(), it.next()) { + (2, Some("mut"), Some(kind)) => Ok(Argument { + name: lhs.next().unwrap().parse()?, + rw: AccessLevel::RW, + kind: kind.parse()?, + }), + (2, Some(ident), _) => Err(format!("invalid {ident:#?} keyword")), + (1, Some(name), Some(kind)) => Ok(Argument { + name: name.parse()?, + rw: AccessLevel::R, + kind: kind.parse()?, + }), + _ => Err(format!("invalid argument `{s}` provided")), + } + } else { + Err(format!("invalid argument `{s}` provided")) + } + } +} + +impl fmt::Display for Argument { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let AccessLevel::RW = &self.rw { + write!(f, "mut ")?; + } + + write!(f, "{}: {}", self.name, self.kind) + } +} + +impl ToTokens for Argument { + fn to_tokens(&self, tokens: &mut TokenStream) { + if let AccessLevel::RW = &self.rw { + tokens.append(format_ident!("mut")) + } + + let (name, kind) = (format_ident!("{}", self.name.to_string()), &self.kind); + tokens.append_all(quote! { #name: #kind }) + } +} + +/// Static definition part of the signature. It may evaluate to a constant +/// expression with e.g. `const imm: u64`, or a generic `T: Into`. +#[derive(Debug, Clone, SerializeDisplay, DeserializeFromStr)] +pub enum StaticDefinition { + /// Constant expression + Constant(Argument), + /// Generic type + Generic(String), +} + +impl StaticDefinition { + pub fn as_variable(&self) -> Option<(String, (TypeKind, VariableType))> { + match self { + StaticDefinition::Constant(arg) => Some(( + arg.name.to_string(), + (arg.kind.clone(), VariableType::Argument), + )), + StaticDefinition::Generic(..) => None, + } + } +} + +impl FromStr for StaticDefinition { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.trim() { + s if s.starts_with("const ") => Ok(StaticDefinition::Constant(s[6..].trim().parse()?)), + s => Ok(StaticDefinition::Generic(s.to_string())), + } + } +} + +impl fmt::Display for StaticDefinition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + StaticDefinition::Constant(arg) => write!(f, "const {arg}"), + StaticDefinition::Generic(generic) => write!(f, "{generic}"), + } + } +} + +impl ToTokens for StaticDefinition { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(match self { + StaticDefinition::Constant(arg) => quote! { const #arg }, + StaticDefinition::Generic(generic) => { + let generic: TokenStream = generic.parse().expect("invalid Rust code"); + quote! { #generic } + } + }) + } +} + +/// Function constraints +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum Constraint { + /// Asserts that the given variable equals to any of the given integer values + AnyI32 { + variable: String, + any_values: Vec, + }, + /// WildString version of RangeI32. If the string values given for the range + /// are valid, this gets built into a RangeI32. + RangeWildstring { + variable: String, + range: (WildString, WildString), + }, + /// Asserts that the given variable's value falls in the specified range + RangeI32 { + variable: String, + range: SizeMatchable>, + }, + /// Asserts that the number of elements/lanes does not exceed the 2048-bit SVE constraint + SVEMaxElems { + variable: String, + sve_max_elems_type: TypeKind, + }, + /// Asserts that the number of elements/lanes does not exceed the 128-bit register constraint + VecMaxElems { + variable: String, + vec_max_elems_type: TypeKind, + }, +} + +impl Constraint { + fn variable(&self) -> &str { + match self { + Constraint::AnyI32 { variable, .. } + | Constraint::RangeWildstring { variable, .. } + | Constraint::RangeI32 { variable, .. } + | Constraint::SVEMaxElems { variable, .. } + | Constraint::VecMaxElems { variable, .. } => variable, + } + } + pub fn build(&mut self, ctx: &Context) -> context::Result { + if let Self::RangeWildstring { + variable, + range: (min, max), + } = self + { + min.build_acle(ctx.local)?; + max.build_acle(ctx.local)?; + let min = min.to_string(); + let max = max.to_string(); + let min: i32 = min + .parse() + .map_err(|_| format!("the minimum value `{min}` is not a valid number"))?; + let max: i32 = max + .parse() + .or_else(|_| Ok(type_to_size(max.as_str()))) + .map_err(|_: ParseIntError| { + format!("the maximum value `{max}` is not a valid number") + })?; + *self = Self::RangeI32 { + variable: variable.to_owned(), + range: SizeMatchable::Matched(RangeInclusive::new(min, max)), + } + } + + #[allow(clippy::collapsible_if)] + if let Self::SVEMaxElems { + sve_max_elems_type: ty, + .. + } + | Self::VecMaxElems { + vec_max_elems_type: ty, + .. + } = self + { + if let Some(w) = ty.wildcard() { + ty.populate_wildcard(ctx.local.provide_type_wildcard(w)?)?; + } + } + + if let Self::RangeI32 { range, .. } = self { + range.perform_match(ctx.local)?; + } + + let variable = self.variable(); + ctx.local + .variables + .contains_key(variable) + .then_some(()) + .ok_or_else(|| format!("cannot build constraint, could not find variable {variable}")) + } +} + +/// Function signature +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Signature { + /// Function name + pub name: WildString, + /// List of function arguments, leave unset or empty for no arguments + pub arguments: Vec, + + /// Function return type, leave unset for void + pub return_type: Option, + + /// For some neon intrinsics we want to modify the suffix of the function name + pub suffix_type: Option, + + /// List of static definitions, leave unset of empty if not required + #[serde(default)] + pub static_defs: Vec, + + /// **Internal use only.** + /// Condition for which the ultimate function is specific to predicates. + #[serde(skip)] + pub is_predicate_specific: bool, + + /// **Internal use only.** + /// Setting this property will trigger the signature builder to convert any `svbool*_t` to `svbool_t` in the input and output. + #[serde(skip)] + pub predicate_needs_conversion: bool, +} + +impl Signature { + pub fn drop_argument(&mut self, arg_name: &WildString) -> Result<(), String> { + if let Some(idx) = self + .arguments + .iter() + .position(|arg| arg.name.to_string() == arg_name.to_string()) + { + self.arguments.remove(idx); + Ok(()) + } else { + Err(format!("no argument {arg_name} found to drop")) + } + } + + pub fn build(&mut self, ctx: &LocalContext) -> context::Result { + if self.name_has_neon_suffix() { + self.name.build_neon_intrinsic_signature(ctx)?; + } else { + self.name.build_acle(ctx)?; + } + + #[allow(clippy::collapsible_if)] + if let Some(ref mut return_type) = self.return_type { + if let Some(w) = return_type.clone().wildcard() { + return_type.populate_wildcard(ctx.provide_type_wildcard(w)?)?; + } + } + + self.arguments + .iter_mut() + .try_for_each(|arg| arg.name.build_acle(ctx))?; + + self.arguments + .iter_mut() + .filter_map(|arg| { + arg.kind + .clone() + .wildcard() + .map(|w| (&mut arg.kind, w.clone())) + }) + .try_for_each(|(ty, w)| ty.populate_wildcard(ctx.provide_type_wildcard(&w)?)) + } + + pub fn fn_name(&self) -> WildString { + self.name.replace(['[', ']'], "") + } + + pub fn doc_name(&self) -> String { + self.name.to_string() + } + + fn name_has_neon_suffix(&self) -> bool { + for part in self.name.wildcards() { + let has_suffix = match part { + Wildcard::NEONType(_, _, suffix_type) => suffix_type.is_some(), + _ => false, + }; + + if has_suffix { + return true; + } + } + false + } +} + +impl ToTokens for Signature { + fn to_tokens(&self, tokens: &mut TokenStream) { + let name_ident = format_ident!("{}", self.fn_name().to_string()); + let arguments = self + .arguments + .clone() + .into_iter() + .map(|mut arg| { + if arg.kind.vector().is_some_and(|ty| ty.base_type().is_bool()) + && self.predicate_needs_conversion + { + arg.kind = TypeKind::Vector(VectorType::make_predicate_from_bitsize(8)) + } + arg + }) + .collect_vec(); + let static_defs = &self.static_defs; + tokens.append_all(quote! { fn #name_ident<#(#static_defs),*>(#(#arguments),*) }); + + if let Some(ref return_type) = self.return_type { + if return_type + .vector() + .is_some_and(|ty| ty.base_type().is_bool()) + && self.predicate_needs_conversion + { + tokens.append_all(quote! { -> svbool_t }) + } else { + tokens.append_all(quote! { -> #return_type }) + } + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LLVMLinkAttribute { + /// Either one architecture or a comma separated list of architectures with NO spaces + pub arch: String, + pub link: WildString, +} + +impl ToTokens for LLVMLinkAttribute { + fn to_tokens(&self, tokens: &mut TokenStream) { + let LLVMLinkAttribute { arch, link } = self; + let link = link.to_string(); + + // For example: + // + // #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i16")] + // + // #[cfg_attr( + // any(target_arch = "aarch64", target_arch = "arm64ec"), + // link_name = "llvm.aarch64.neon.suqadd.i32" + // )] + + let mut cfg_attr_cond = TokenStream::new(); + let mut single_arch = true; + for arch in arch.split(',') { + if !cfg_attr_cond.is_empty() { + single_arch = false; + cfg_attr_cond.append(Punct::new(',', Spacing::Alone)); + } + cfg_attr_cond.append_all(quote! { target_arch = #arch }); + } + assert!(!cfg_attr_cond.is_empty()); + if !single_arch { + cfg_attr_cond = quote! { any( #cfg_attr_cond ) }; + } + tokens.append_all(quote! { + #[cfg_attr(#cfg_attr_cond, link_name = #link)] + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LLVMLink { + /// LLVM link function name without namespace and types, + /// e.g. `st1` in `llvm.aarch64.sve.st1.nxv4i32` + pub name: WildString, + + /// LLVM link signature arguments, leave unset if it inherits from intrinsic's signature + pub arguments: Option>, + /// LLVM link signature return type, leave unset if it inherits from intrinsic's signature + pub return_type: Option, + + /// **This will be set automatically if not set** + /// Attribute LLVM links for the function. First element is the architecture it targets, + /// second element is the LLVM link itself. + pub links: Option>, + + /// **Internal use only. Do not set.** + /// Generated signature from these `arguments` and/or `return_type` if set, and the intrinsic's signature. + #[serde(skip)] + pub signature: Option>, +} + +impl LLVMLink { + pub fn resolve(&self, cfg: &ArchitectureSettings) -> String { + if self.name.starts_with("llvm") { + self.name.to_string() + } else { + format!("{}.{}", cfg.llvm_link_prefix, self.name) + } + } + + pub fn build_and_save(&mut self, ctx: &mut Context) -> context::Result { + self.build(ctx)?; + + // Save LLVM link to the group context + ctx.global.arch_cfgs.iter().for_each(|cfg| { + ctx.group + .links + .insert(self.resolve(cfg), ctx.local.input.clone()); + }); + + Ok(()) + } + + pub fn build(&mut self, ctx: &mut Context) -> context::Result { + let mut sig_name = ctx.local.signature.name.clone(); + sig_name.prepend_str("_"); + + let argv = self + .arguments + .clone() + .unwrap_or_else(|| ctx.local.signature.arguments.clone()); + + let mut sig = Signature { + name: sig_name, + arguments: argv, + return_type: self + .return_type + .clone() + .or_else(|| ctx.local.signature.return_type.clone()), + suffix_type: None, + static_defs: vec![], + is_predicate_specific: ctx.local.signature.is_predicate_specific, + predicate_needs_conversion: false, + }; + + sig.build(ctx.local)?; + self.name.build(ctx.local, TypeRepr::LLVMMachine)?; + + // Add link function name to context + ctx.local + .substitutions + .insert(Wildcard::LLVMLink, sig.fn_name().to_string()); + + self.signature = Some(Box::new(sig)); + + if let Some(ref mut links) = self.links { + links.iter_mut().for_each(|ele| { + ele.link + .build(ctx.local, TypeRepr::LLVMMachine) + .expect("Failed to transform to LLVMMachine representation"); + }); + } else { + self.links = Some( + ctx.global + .arch_cfgs + .iter() + .map(|cfg| LLVMLinkAttribute { + arch: cfg.arch_name.to_owned(), + link: self.resolve(cfg).into(), + }) + .collect_vec(), + ); + } + + Ok(()) + } + + /// Alters all the unsigned types from the signature. This is required where + /// a signed and unsigned variant require the same binding to an exposed + /// LLVM instrinsic. + pub fn sanitise_uints(&mut self) { + let transform = |tk: &mut TypeKind| { + if let Some(BaseType::Sized(BaseTypeKind::UInt, size)) = tk.base_type() { + *tk.base_type_mut().unwrap() = BaseType::Sized(BaseTypeKind::Int, *size) + } + }; + + if let Some(sig) = self.signature.as_mut() { + for arg in sig.arguments.iter_mut() { + transform(&mut arg.kind); + } + + sig.return_type.as_mut().map(transform); + } + } + + /// Make a function call to the LLVM link + pub fn make_fn_call(&self, intrinsic_sig: &Signature) -> context::Result { + let link_sig = self.signature.as_ref().ok_or_else(|| { + "cannot derive the LLVM link call, as it does not hold a valid function signature" + .to_string() + })?; + + if intrinsic_sig.arguments.len() != link_sig.arguments.len() { + return Err( + "cannot derive the LLVM link call, the number of arguments does not match" + .to_string(), + ); + } + + let call_args = intrinsic_sig + .arguments + .iter() + .zip(link_sig.arguments.iter()) + .map(|(intrinsic_arg, link_arg)| { + // Could also add a type check... + if intrinsic_arg.name == link_arg.name { + Ok(Expression::Identifier( + intrinsic_arg.name.to_owned(), + IdentifierType::Variable, + )) + } else { + Err("cannot derive the LLVM link call, the arguments do not match".to_string()) + } + }) + .try_collect()?; + + Ok(FnCall::new_unsafe_expression( + link_sig.fn_name().into(), + call_args, + )) + } + + /// Given a FnCall, apply all the predicate and unsigned conversions as required. + pub fn apply_conversions_to_call( + &self, + mut fn_call: FnCall, + ctx: &Context, + ) -> context::Result { + use BaseType::{Sized, Unsized}; + use BaseTypeKind::{Bool, UInt}; + use VariableType::Argument; + + let convert = + |method: &str, ex| Expression::MethodCall(Box::new(ex), method.to_string(), vec![]); + + fn_call.1 = fn_call + .1 + .into_iter() + .map(|arg| -> context::Result { + if let Expression::Identifier(ref var_name, IdentifierType::Variable) = arg { + let (kind, scope) = ctx + .local + .variables + .get(&var_name.to_string()) + .ok_or_else(|| format!("invalid variable {var_name:?} being referenced"))?; + + match (scope, kind.base_type()) { + (Argument, Some(Sized(Bool, bitsize))) if *bitsize != 8 => { + Ok(convert("into", arg)) + } + (Argument, Some(Sized(UInt, _) | Unsized(UInt))) => { + if ctx.global.auto_llvm_sign_conversion { + Ok(convert("as_signed", arg)) + } else { + Ok(arg) + } + } + _ => Ok(arg), + } + } else { + Ok(arg) + } + }) + .try_collect()?; + + let return_type_conversion = if !ctx.global.auto_llvm_sign_conversion { + None + } else { + self.signature + .as_ref() + .and_then(|sig| sig.return_type.as_ref()) + .and_then(|ty| { + if let Some(Sized(Bool, bitsize)) = ty.base_type() { + (*bitsize != 8).then_some(Bool) + } else if let Some(Sized(UInt, _) | Unsized(UInt)) = ty.base_type() { + Some(UInt) + } else { + None + } + }) + }; + + let fn_call = Expression::FnCall(fn_call); + match return_type_conversion { + Some(Bool) => Ok(convert("into", fn_call)), + Some(UInt) => Ok(convert("as_unsigned", fn_call)), + _ => Ok(fn_call), + } + } +} + +impl ToTokens for LLVMLink { + fn to_tokens(&self, tokens: &mut TokenStream) { + assert!( + self.signature.is_some() && self.links.is_some(), + "expression {self:#?} was not built before calling to_tokens" + ); + + let signature = self.signature.as_ref().unwrap(); + let links = self.links.as_ref().unwrap(); + tokens.append_all(quote! { + unsafe extern "unadjusted" { + #(#links)* + #signature; + } + }) + } +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum FunctionVisibility { + #[default] + Public, + Private, +} + +/// Whether to generate a load/store test, and which typeset index +/// represents the data type of the load/store target address +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Test { + #[default] + #[serde(skip)] + None, // Covered by `intrinsic-test` + Load(usize), + Store(usize), +} + +impl Test { + pub fn get_typeset_index(&self) -> Option { + match *self { + Test::Load(n) => Some(n), + Test::Store(n) => Some(n), + _ => None, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Safety { + Safe, + Unsafe(Vec), +} + +impl Safety { + /// Return `Ok(Safety::Safe)` if safety appears reasonable for the given `intrinsic`'s name and + /// prototype. Otherwise, return `Err()` with a suitable diagnostic. + fn safe_checked(intrinsic: &Intrinsic) -> Result { + let name = intrinsic.signature.doc_name(); + if name.starts_with("sv") { + let handles_pointers = intrinsic + .signature + .arguments + .iter() + .any(|arg| matches!(arg.kind, TypeKind::Pointer(..))); + if name.starts_with("svld") + || name.starts_with("svst") + || name.starts_with("svprf") + || name.starts_with("svundef") + || handles_pointers + { + let doc = intrinsic.doc.as_ref().map(|s| s.to_string()); + let doc = doc.as_deref().unwrap_or("..."); + Err(format!( + "`{name}` has no safety specification, but it looks like it should be unsafe. \ + Consider specifying (un)safety explicitly: + + - name: {name} + doc: {doc} + safety: + unsafe: + - ... + ... +" + )) + } else { + Ok(Self::Safe) + } + } else { + Err(format!( + "Safety::safe_checked() for non-SVE intrinsic: {name}" + )) + } + } + + fn is_safe(&self) -> bool { + match self { + Self::Safe => true, + Self::Unsafe(..) => false, + } + } + + fn is_unsafe(&self) -> bool { + !self.is_safe() + } + + fn has_doc_comments(&self) -> bool { + match self { + Self::Safe => false, + Self::Unsafe(v) => !v.is_empty(), + } + } + + fn doc_comments(&self) -> &[UnsafetyComment] { + match self { + Self::Safe => &[], + Self::Unsafe(v) => v.as_slice(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum UnsafetyComment { + Custom(String), + Uninitialized, + PointerOffset(GovernedBy), + PointerOffsetVnum(GovernedBy), + Dereference(GovernedBy), + UnpredictableOnFault, + NonTemporal, + Neon, + NoProvenance(String), +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum GovernedBy { + #[default] + Predicated, + PredicatedNonFaulting, + PredicatedFirstFaulting, +} + +impl fmt::Display for GovernedBy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Predicated => write!(f, " (governed by `pg`)"), + Self::PredicatedNonFaulting => write!( + f, + " (governed by `pg`, the first-fault register (`FFR`) \ + and non-faulting behaviour)" + ), + Self::PredicatedFirstFaulting => write!( + f, + " (governed by `pg`, the first-fault register (`FFR`) \ + and first-faulting behaviour)" + ), + } + } +} + +impl fmt::Display for UnsafetyComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Custom(s) => s.fmt(f), + Self::Neon => write!(f, "Neon instrinsic unsafe"), + Self::Uninitialized => write!( + f, + "This creates an uninitialized value, and may be unsound (like \ + [`core::mem::uninitialized`])." + ), + Self::PointerOffset(gov) => write!( + f, + "[`pointer::offset`](pointer#method.offset) safety constraints must \ + be met for the address calculation for each active element{gov}." + ), + Self::PointerOffsetVnum(gov) => write!( + f, + "[`pointer::offset`](pointer#method.offset) safety constraints must \ + be met for the address calculation for each active element{gov}. \ + In particular, note that `vnum` is scaled by the vector \ + length, `VL`, which is not known at compile time." + ), + Self::Dereference(gov) => write!( + f, + "This dereferences and accesses the calculated address for each \ + active element{gov}." + ), + Self::NonTemporal => write!( + f, + "Non-temporal accesses have special memory ordering rules, and \ + [explicit barriers may be required for some applications]\ + (https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)." + ), + Self::NoProvenance(arg) => write!( + f, + "Addresses passed in `{arg}` lack provenance, so this is similar to using a \ + `usize as ptr` cast (or [`core::ptr::from_exposed_addr`]) on each lane before \ + using it." + ), + Self::UnpredictableOnFault => write!( + f, + "Result lanes corresponding to inactive FFR lanes (either before or as a result \ + of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of \ + predication. Refer to architectural documentation for details." + ), + } + } +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Intrinsic { + #[serde(default)] + pub visibility: FunctionVisibility, + #[serde(default)] + pub doc: Option, + #[serde(flatten)] + pub signature: Signature, + /// Function sequential composition + pub compose: Vec, + /// Input to generate the intrinsic against. Leave empty if the intrinsic + /// does not have any variants. + /// Specific variants contain one InputSet + #[serde(flatten, default)] + pub input: IntrinsicInput, + #[serde(default)] + pub constraints: Vec, + /// Additional target features to add to the global settings + #[serde(default)] + pub target_features: Vec, + /// Should the intrinsic be `unsafe`? By default, the generator will try to guess from the + /// prototype, but it errs on the side of `unsafe`, and prints a warning in that case. + #[serde(default)] + pub safety: Option, + #[serde(default)] + pub substitutions: HashMap, + /// List of the only indices in a typeset that require conversion to signed + /// when deferring unsigned intrinsics to signed. (optional, default + /// behaviour is all unsigned types are converted to signed) + #[serde(default)] + pub defer_to_signed_only_indices: HashSet, + pub assert_instr: Option>, + /// Whether we should generate a test for this intrinsic + #[serde(default)] + pub test: Test, + /// Primary base type, used for instruction assertion. + #[serde(skip)] + pub base_type: Option, + /// Attributes for the function + pub attr: Option>, + /// Big endian variant for composing, this gets populated internally + #[serde(skip)] + pub big_endian_compose: Vec, + /// Big endian sometimes needs the bits inverted in a way that cannot be + /// automatically detected + #[serde(default)] + pub big_endian_inverse: Option, +} + +impl Intrinsic { + pub fn llvm_link(&self) -> Option<&LLVMLink> { + self.compose.iter().find_map(|ex| { + if let Expression::LLVMLink(llvm_link) = ex { + Some(llvm_link) + } else { + None + } + }) + } + + pub fn llvm_link_mut(&mut self) -> Option<&mut LLVMLink> { + self.compose.iter_mut().find_map(|ex| { + if let Expression::LLVMLink(llvm_link) = ex { + Some(llvm_link) + } else { + None + } + }) + } + + pub fn generate_variants(&self, global_ctx: &GlobalContext) -> context::Result> { + let wrap_err = |err| format!("{}: {err}", self.signature.name); + + let mut group_ctx = GroupContext::default(); + self.input + .variants(self) + .map_err(wrap_err)? + .map(|input| { + self.generate_variant(input.clone(), &mut group_ctx, global_ctx) + .map_err(wrap_err) + .map(|variant| (variant, input)) + }) + .collect::>>() + .and_then(|mut variants| { + variants.sort_by_cached_key(|(_, input)| input.to_owned()); + + if variants.is_empty() { + let standalone_variant = self + .generate_variant(InputSet::default(), &mut group_ctx, global_ctx) + .map_err(wrap_err)?; + + Ok(vec![standalone_variant]) + } else { + Ok(variants + .into_iter() + .map(|(variant, _)| variant) + .collect_vec()) + } + }) + } + + pub fn generate_variant( + &self, + input: InputSet, + group_ctx: &mut GroupContext, + global_ctx: &GlobalContext, + ) -> context::Result { + let mut variant = self.clone(); + + variant.input.types = vec![InputSetEntry::new(vec![input.clone()])]; + + let mut local_ctx = LocalContext::new(input, self); + let mut ctx = Context { + local: &mut local_ctx, + group: group_ctx, + global: global_ctx, + }; + + variant.pre_build(&mut ctx)?; + + match ctx.local.predicate_form().cloned() { + Some(PredicateForm::DontCare(method)) => { + variant.compose = variant.generate_dont_care_pass_through(&mut ctx, method)? + } + Some(PredicateForm::Zeroing(method)) => { + variant.compose = variant.generate_zeroing_pass_through(&mut ctx, method)? + } + _ => { + for idx in 0..variant.compose.len() { + let mut ex = variant.compose[idx].clone(); + ex.build(&variant, &mut ctx)?; + variant.compose[idx] = ex; + } + } + }; + + if variant.attr.is_none() && variant.assert_instr.is_none() { + panic!( + "Error: {} is missing both 'attr' and 'assert_instr' fields. You must either manually declare the attributes using the 'attr' field or use 'assert_instr'!", + variant.signature.name + ); + } + + if variant.attr.is_some() { + let attr: &Vec = &variant.attr.clone().unwrap(); + let mut expanded_attr: Vec = Vec::new(); + for mut ex in attr.iter().cloned() { + ex.build(&variant, &mut ctx)?; + expanded_attr.push(ex); + } + variant.attr = Some(expanded_attr); + } + + variant.post_build(&mut ctx)?; + + /* If we should generate big endian we shall do so. It's possible + * we may not want to in some instances */ + if ctx.global.auto_big_endian.unwrap_or(false) { + self.generate_big_endian(&mut variant); + } + + if let Some(n_variant_op) = ctx.local.n_variant_op().cloned() { + variant.generate_n_variant(n_variant_op, &mut ctx) + } else { + Ok(variant) + } + } + + /// Add a big endian implementation + fn generate_big_endian(&self, variant: &mut Intrinsic) { + /* We can't always blindly reverse the bits only in certain conditions + * do we need a different order - thus this allows us to have the + * ability to do so without having to play codegolf with the yaml AST */ + let should_reverse = { + if let Some(should_reverse) = variant.big_endian_inverse { + should_reverse + } else if variant.compose.len() == 1 { + match &variant.compose[0] { + Expression::FnCall(fn_call) => fn_call.0.to_string() == "transmute", + _ => false, + } + } else { + false + } + }; + + if !should_reverse { + return; + } + + let mut big_endian_expressions: Vec = Vec::new(); + + /* We cannot assign `a.0 = ` directly to a function parameter so + * need to make them mutable */ + for function_parameter in &variant.signature.arguments { + if type_has_tuple(&function_parameter.kind) { + /* We do not want to be creating a `mut` variant if the type + * has one lane. If it has one lane that means it does not need + * shuffling */ + #[allow(clippy::collapsible_if)] + if let TypeKind::Vector(vector_type) = &function_parameter.kind { + if vector_type.lanes() == 1 { + continue; + } + } + + let mutable_variable = make_variable_mutable( + &function_parameter.name.to_string(), + &function_parameter.kind, + ); + big_endian_expressions.push(mutable_variable); + } + } + + /* Possibly shuffle the vectors */ + for function_parameter in &variant.signature.arguments { + if let Some(shuffle_call) = create_assigned_shuffle_call( + &function_parameter.name.to_string(), + &function_parameter.kind, + ) { + big_endian_expressions.push(shuffle_call); + } + } + + if !big_endian_expressions.is_empty() { + Vec::reserve( + &mut variant.big_endian_compose, + big_endian_expressions.len() + variant.compose.len(), + ); + let mut expression = &variant.compose[0]; + let needs_reordering = expression.is_static_assert() || expression.is_llvm_link(); + + /* We want to keep the asserts and llvm links at the start of + * the new big_endian_compose vector that we are creating */ + if needs_reordering { + let mut expression_idx = 0; + while expression.is_static_assert() || expression.is_llvm_link() { + /* Add static asserts and llvm links to the start of the + * vector */ + variant.big_endian_compose.push(expression.clone()); + expression_idx += 1; + expression = &variant.compose[expression_idx]; + } + + /* Add the big endian specific expressions */ + variant.big_endian_compose.extend(big_endian_expressions); + + /* Add the rest of the expressions */ + for i in expression_idx..variant.compose.len() { + variant.big_endian_compose.push(variant.compose[i].clone()); + } + } else { + /* If we do not need to reorder anything then immediately add + * the expressions from the big_endian_expressions and + * concatinate the compose vector */ + variant.big_endian_compose.extend(big_endian_expressions); + variant + .big_endian_compose + .extend(variant.compose.iter().cloned()); + } + } + + /* If we have a return type, there is a possibility we want to generate + * a shuffle call */ + if let Some(return_type) = &variant.signature.return_type { + let return_value = variant + .compose + .last() + .expect("Cannot define a return type with an empty function body"); + + /* If we do not create a shuffle call we do not need modify the + * return value and append to the big endian ast array. A bit confusing + * as in code we are making the final call before caputuring the return + * value of the intrinsic that has been called.*/ + let ret_val_name = "ret_val".to_string(); + if let Some(simd_shuffle_call) = create_shuffle_call(&ret_val_name, return_type) { + /* There is a possibility that the funcion arguments did not + * require big endian treatment, thus we need to now add the + * original function body before appending the return value.*/ + if variant.big_endian_compose.is_empty() { + variant + .big_endian_compose + .extend(variant.compose.iter().cloned()); + } + + /* Now we shuffle the return value - we are creating a new + * return value for the intrinsic. */ + let return_value_variable = if type_has_tuple(return_type) { + create_mut_let_variable(&ret_val_name, return_type, return_value.clone()) + } else { + create_let_variable(&ret_val_name, return_type, return_value.clone()) + }; + + /* Remove the last item which will be the return value */ + variant.big_endian_compose.pop(); + variant.big_endian_compose.push(return_value_variable); + variant.big_endian_compose.push(simd_shuffle_call); + if type_has_tuple(return_type) { + /* We generated `tuple_count` number of calls to shuffle + * re-assigning each tuple however those generated calls do + * not make the parent function return. So we add the return + * value here */ + variant + .big_endian_compose + .push(create_symbol_identifier(&ret_val_name)); + } + } + } + } + + /// Implement a "zeroing" (_z) method by calling an existing "merging" (_m) method, as required. + fn generate_zeroing_pass_through( + &mut self, + ctx: &mut Context, + method: ZeroingMethod, + ) -> context::Result> { + PredicationMask::try_from(&ctx.local.signature.name) + .ok() + .filter(|mask| mask.has_merging()) + .ok_or_else(|| format!("cannot generate zeroing passthrough for {}, no merging predicate form is specified", self.signature.name))?; + + // Determine the function to pass through to. + let mut target_ctx = ctx.local.clone(); + // Change target function predicate form to merging + *target_ctx.input.iter_mut() + .find_map(|arg| arg.predicate_form_mut()) + .expect("failed to generate zeroing pass through, could not find predicate form in the InputSet") = PredicateForm::Merging; + + let mut sig = target_ctx.signature.clone(); + sig.build(&target_ctx)?; + + let args_as_expressions = |arg: &Argument| -> context::Result { + let arg_name = arg.name.to_string(); + match &method { + ZeroingMethod::Drop { drop } if arg_name == drop.to_string() => { + Ok(PredicateForm::make_zeroinitializer(&arg.kind)) + } + ZeroingMethod::Select { select } if arg_name == select.to_string() => { + let pg = sig + .arguments + .iter() + .find_map(|arg| match arg.kind.vector() { + Some(ty) if ty.base_type().is_bool() => Some(arg.name.clone()), + _ => None, + }) + .ok_or_else(|| { + format!("cannot generate zeroing passthrough for {}, no predicate found in the signature for zero selection", self.signature.name) + })?; + Ok(PredicateForm::make_zeroselector( + pg, + select.clone(), + &arg.kind, + )) + } + _ => Ok(arg.into()), + } + }; + + let name: Expression = sig.fn_name().into(); + let args: Vec = sig + .arguments + .iter() + .map(args_as_expressions) + .try_collect()?; + let statics: Vec = sig + .static_defs + .iter() + .map(|sd| sd.try_into()) + .try_collect()?; + let mut call: Expression = FnCall(Box::new(name), args, statics, false).into(); + call.build(self, ctx)?; + Ok(vec![call]) + } + + /// Implement a "don't care" (_x) method by calling an existing "merging" (_m). + fn generate_dont_care_pass_through( + &mut self, + ctx: &mut Context, + method: DontCareMethod, + ) -> context::Result> { + PredicationMask::try_from(&ctx.local.signature.name).and_then(|mask| match method { + DontCareMethod::AsMerging if mask.has_merging() => Ok(()), + DontCareMethod::AsZeroing if mask.has_zeroing() => Ok(()), + _ => Err(format!( + "cannot generate don't care passthrough for {}, no {} predicate form is specified", + self.signature.name, + match method { + DontCareMethod::AsMerging => "merging", + DontCareMethod::AsZeroing => "zeroing", + _ => unreachable!(), + } + )), + })?; + + // Determine the function to pass through to. + let mut target_ctx = ctx.local.clone(); + // Change target function predicate form to merging + *target_ctx.input.iter_mut() + .find_map(|arg| arg.predicate_form_mut()) + .expect("failed to generate don't care passthrough, could not find predicate form in the InputSet") = PredicateForm::Merging; + + let mut sig = target_ctx.signature.clone(); + sig.build(&target_ctx)?; + + // We might need to drop an argument for a zeroing pass-through. + let drop = match (method, &self.input.predication_methods.zeroing_method) { + (DontCareMethod::AsZeroing, Some(ZeroingMethod::Drop { drop })) => Some(drop), + _ => None, + }; + + let name: Expression = sig.fn_name().into(); + let args: Vec = sig + .arguments + .iter() + .map(|arg| { + if Some(arg.name.to_string()) == drop.as_ref().map(|v| v.to_string()) { + // This argument is present in the _m form, but missing from the _x form. Clang + // typically replaces these with an uninitialised vector, but to avoid + // materialising uninitialised values in Rust, we instead merge with a known + // vector. This usually results in the same code generation. + // TODO: In many cases, it'll be better to use an unpredicated (or zeroing) form. + sig.arguments + .iter() + .filter(|&other| arg.name.to_string() != other.name.to_string()) + .find_map(|other| { + arg.kind.express_reinterpretation_from(&other.kind, other) + }) + .unwrap_or_else(|| PredicateForm::make_zeroinitializer(&arg.kind)) + } else { + arg.into() + } + }) + .collect(); + let statics: Vec = sig + .static_defs + .iter() + .map(|sd| sd.try_into()) + .try_collect()?; + let mut call: Expression = FnCall(Box::new(name), args, statics, false).into(); + call.build(self, ctx)?; + Ok(vec![call]) + } + + /// Implement a "_n" variant based on the given operand + fn generate_n_variant( + &self, + mut n_variant_op: WildString, + ctx: &mut Context, + ) -> context::Result { + let mut variant = self.clone(); + + n_variant_op.build_acle(ctx.local)?; + + let n_op_arg_idx = variant + .signature + .arguments + .iter_mut() + .position(|arg| arg.name.to_string() == n_variant_op.to_string()) + .ok_or_else(|| { + format!( + "cannot generate `_n` variant for {}, operand `{n_variant_op}` not found", + variant.signature.name + ) + })?; + + let has_n_wildcard = ctx + .local + .signature + .name + .wildcards() + .any(|w| matches!(w, Wildcard::NVariant)); + + if !has_n_wildcard { + return Err(format!( + "cannot generate `_n` variant for {}, no wildcard {{_n}} was specified in the intrinsic's name", + variant.signature.name + )); + } + + // Build signature + variant.signature = ctx.local.signature.clone(); + if let Some(pf) = ctx.local.predicate_form() { + // WARN: this may break in the future according to the underlying implementation + // Drops unwanted arguments if needed (required for the collection of arguments to pass to the function) + pf.post_build(&mut variant)?; + } + + let sig = &mut variant.signature; + + ctx.local + .substitutions + .insert(Wildcard::NVariant, "_n".to_owned()); + + let arg_kind = &mut sig.arguments.get_mut(n_op_arg_idx).unwrap().kind; + *arg_kind = match arg_kind { + TypeKind::Wildcard(Wildcard::SVEType(idx, None)) => { + TypeKind::Wildcard(Wildcard::Type(*idx)) + } + _ => { + return Err(format!( + "cannot generate `_n` variant for {}, the given operand is not a valid SVE type", + variant.signature.name + )); + } + }; + + sig.build(ctx.local)?; + + // Build compose + let name: Expression = self.signature.fn_name().into(); + let args: Vec = sig + .arguments + .iter() + .enumerate() + .map(|(idx, arg)| { + let ty = arg.kind.acle_notation_repr(); + if idx == n_op_arg_idx { + FnCall::new_expression( + WildString::from(format!("svdup_n_{ty}")).into(), + vec![arg.into()], + ) + } else { + arg.into() + } + }) + .collect(); + let statics: Vec = sig + .static_defs + .iter() + .map(|sd| sd.try_into()) + .try_collect()?; + let mut call: Expression = FnCall(Box::new(name), args, statics, false).into(); + call.build(self, ctx)?; + + variant.compose = vec![call]; + variant.signature.predicate_needs_conversion = true; + + Ok(variant) + } + + fn pre_build(&mut self, ctx: &mut Context) -> context::Result { + self.substitutions + .iter_mut() + .try_for_each(|(k, v)| -> context::Result { + let mut ws = v.get(ctx.local)?; + ws.build_acle(ctx.local)?; + ctx.local + .substitutions + .insert(Wildcard::Custom(k.to_owned()), ws.to_string()); + Ok(()) + })?; + + self.signature.build(ctx.local)?; + + if self.safety.is_none() { + self.safety = match Safety::safe_checked(self) { + Ok(safe) => Some(safe), + Err(err) => { + eprintln!("{err}"); + return Err(format!( + "Refusing to infer unsafety for {name}", + name = self.signature.doc_name() + )); + } + } + } + + if let Some(doc) = &mut self.doc { + doc.build_acle(ctx.local)? + } + + // Add arguments to variable tracking + self.signature + .arguments + .iter() + .for_each(|arg| arg.populate_variables(&mut ctx.local.variables)); + + // Add constant expressions to variable tracking + self.signature + .static_defs + .iter() + .filter_map(StaticDefinition::as_variable) + .for_each(|(var_name, var_properties)| { + ctx.local.variables.insert(var_name, var_properties); + }); + + // Pre-build compose expressions + for idx in 0..self.compose.len() { + let mut ex = self.compose[idx].clone(); + ex.pre_build(ctx)?; + self.compose[idx] = ex; + } + + if !ctx.local.input.is_empty() { + // We simplify the LLVM link transmute logic by deferring to a variant employing the same LLVM link where possible + if let Some(link) = self.compose.iter().find_map(|ex| match ex { + Expression::LLVMLink(link) => Some(link), + _ => None, + }) { + let mut link = link.clone(); + link.build(ctx)?; + + for cfg in ctx.global.arch_cfgs.iter() { + let expected_link = link.resolve(cfg); + if let Some(target_inputset) = ctx.group.links.get(&expected_link) { + self.defer_to_existing_llvm_link(ctx.local, target_inputset)?; + break; + } + } + } + } + + if let Some(ref mut assert_instr) = self.assert_instr { + assert_instr.iter_mut().try_for_each(|ai| ai.build(ctx))?; + } + + // Prepend constraint assertions + self.constraints.iter_mut().try_for_each(|c| c.build(ctx))?; + let assertions: Vec<_> = self + .constraints + .iter() + .map(|c| ctx.local.make_assertion_from_constraint(c)) + .try_collect()?; + self.compose.splice(0..0, assertions); + + Ok(()) + } + + fn post_build(&mut self, ctx: &mut Context) -> context::Result { + if let Some(Expression::LLVMLink(link)) = self.compose.last() { + let mut fn_call = link.make_fn_call(&self.signature)?; + // Required to inject conversions + fn_call.build(self, ctx)?; + self.compose.push(fn_call) + } + + if let Some(llvm_link) = self.llvm_link_mut() { + /* Turn all Rust unsigned types into signed if required */ + if ctx.global.auto_llvm_sign_conversion { + llvm_link.sanitise_uints(); + } + } + + if let Some(predicate_form) = ctx.local.predicate_form() { + predicate_form.post_build(self)? + } + + // Set for ToTokens to display a generic svbool_t + self.signature.predicate_needs_conversion = true; + + // Set base type kind for instruction assertion + self.base_type = ctx + .local + .input + .get(0) + .and_then(|arg| arg.typekind()) + .and_then(|ty| ty.base_type()) + .cloned(); + + // Add global target features + self.target_features = ctx + .global + .arch_cfgs + .iter() + .flat_map(|cfg| cfg.target_feature.clone()) + .chain(self.target_features.clone()) + .collect_vec(); + + Ok(()) + } + + fn defer_to_existing_llvm_link( + &mut self, + ctx: &LocalContext, + target_inputset: &InputSet, + ) -> context::Result { + let mut target_ctx = ctx.clone(); + target_ctx.input = target_inputset.clone(); + + let mut target_signature = target_ctx.signature.clone(); + target_signature.build(&target_ctx)?; + + let drop_var = if let Some(pred) = ctx.predicate_form().cloned() { + match pred { + PredicateForm::Zeroing(ZeroingMethod::Drop { drop }) => Some(drop), + PredicateForm::DontCare(DontCareMethod::AsZeroing) => { + if let Some(ZeroingMethod::Drop { drop }) = + self.input.predication_methods.zeroing_method.to_owned() + { + Some(drop) + } else { + None + } + } + _ => None, + } + } else { + None + }; + + let call_method = + |ex, method: &str| Expression::MethodCall(Box::new(ex), method.to_string(), vec![]); + let as_unsigned = |ex| call_method(ex, "as_unsigned"); + let as_signed = |ex| call_method(ex, "as_signed"); + let convert_if_required = |w: Option<&Wildcard>, from: &InputSet, to: &InputSet, ex| { + if let Some(w) = w { + if let Some(dest_idx) = w.get_typeset_index() { + let from_type = from.get(dest_idx); + let to_type = to.get(dest_idx); + + if from_type != to_type { + let from_base_type = from_type + .and_then(|in_arg| in_arg.typekind()) + .and_then(|ty| ty.base_type()) + .map(|bt| bt.kind()); + let to_base_type = to_type + .and_then(|in_arg| in_arg.typekind()) + .and_then(|ty| ty.base_type()) + .map(|bt| bt.kind()); + + match (from_base_type, to_base_type) { + // Use AsSigned for uint -> int + (Some(BaseTypeKind::UInt), Some(BaseTypeKind::Int)) => as_signed(ex), + (Some(BaseTypeKind::Int), Some(BaseTypeKind::Int)) => ex, + // Use AsUnsigned for int -> uint + (Some(BaseTypeKind::Int), Some(BaseTypeKind::UInt)) => as_unsigned(ex), + (Some(BaseTypeKind::Float), Some(BaseTypeKind::Float)) => ex, + (Some(BaseTypeKind::UInt), Some(BaseTypeKind::UInt)) => ex, + (Some(BaseTypeKind::Poly), Some(BaseTypeKind::Poly)) => ex, + + (None, None) => ex, + _ => unreachable!( + "unsupported conversion case from {from_base_type:?} to {to_base_type:?} hit" + ), + } + } else { + ex + } + } else { + ex + } + } else { + ex + } + }; + + let args = ctx + .signature + .arguments + .iter() + .filter_map(|arg| { + let var = Expression::Identifier(arg.name.to_owned(), IdentifierType::Variable); + if drop_var.as_ref().map(|v| v.to_string()) != Some(arg.name.to_string()) { + Some(convert_if_required( + arg.kind.wildcard(), + &ctx.input, + target_inputset, + var, + )) + } else { + None + } + }) + .collect_vec(); + + let turbofish = self + .signature + .static_defs + .iter() + .map(|def| { + let name = match def { + StaticDefinition::Constant(Argument { name, .. }) => name.to_string(), + StaticDefinition::Generic(name) => name.to_string(), + }; + Expression::Identifier(name.into(), IdentifierType::Symbol) + }) + .collect_vec(); + + let ret_wildcard = ctx + .signature + .return_type + .as_ref() + .and_then(|t| t.wildcard()); + let call = FnCall( + Box::new(target_signature.fn_name().into()), + args, + turbofish, + false, + ) + .into(); + + self.compose = vec![convert_if_required( + ret_wildcard, + target_inputset, + &ctx.input, + call, + )]; + + Ok(()) + } +} + +/// Some intrinsics require a little endian and big endian implementation, others +/// do not +enum Endianness { + Little, + Big, + NA, +} + +/// Based on the endianess will create the appropriate intrinsic, or simply +/// create the desired intrinsic without any endianess +fn create_tokens(intrinsic: &Intrinsic, endianness: Endianness, tokens: &mut TokenStream) { + let signature = &intrinsic.signature; + let fn_name = signature.fn_name().to_string(); + let target_feature = intrinsic.target_features.join(","); + let safety = intrinsic + .safety + .as_ref() + .expect("safety should be determined during `pre_build`"); + + if let Some(doc) = &intrinsic.doc { + let mut doc = vec![doc.to_string()]; + + doc.push(format!("[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/{})", &signature.doc_name())); + + if safety.has_doc_comments() { + doc.push("## Safety".to_string()); + for comment in safety.doc_comments() { + doc.push(format!(" * {comment}")); + } + } else { + assert!( + safety.is_safe(), + "{fn_name} is both public and unsafe, and so needs safety documentation" + ); + } + + tokens.append_all(quote! { #(#[doc = #doc])* }); + } else { + assert!( + matches!(intrinsic.visibility, FunctionVisibility::Private), + "{fn_name} needs to be private, or to have documentation." + ); + assert!( + !safety.has_doc_comments(), + "{fn_name} needs a documentation section for its safety comments." + ); + } + + tokens.append_all(quote! { #[inline] }); + + match endianness { + Endianness::Little => tokens.append_all(quote! { #[cfg(target_endian = "little")] }), + Endianness::Big => tokens.append_all(quote! { #[cfg(target_endian = "big")] }), + Endianness::NA => {} + }; + + let expressions = match endianness { + Endianness::Little | Endianness::NA => &intrinsic.compose, + Endianness::Big => &intrinsic.big_endian_compose, + }; + + /* If we have manually defined attributes on the block of yaml with + * 'attr:' we want to add them */ + if let Some(attr) = &intrinsic.attr { + /* Scan to see if we have defined `FnCall: [target_feature, ['']]`*/ + if !has_target_feature_attr(attr) { + /* If not add the default one that is defined at the top of + * the yaml file. This does mean we scan the attributes vector + * twice, once to see if the `target_feature` exists and again + * to actually append the tokens. We could impose that the + * `target_feature` call has to be the first argument of the + * `attr` block */ + tokens.append_all(quote! { + #[target_feature(enable = #target_feature)] + }); + } + + /* Target feature will get added here */ + let attr_expressions = &mut attr.iter().peekable(); + for ex in attr_expressions { + let mut inner = TokenStream::new(); + ex.to_tokens(&mut inner); + tokens.append(Punct::new('#', Spacing::Alone)); + tokens.append(Group::new(Delimiter::Bracket, inner)); + } + } else { + tokens.append_all(quote! { + #[target_feature(enable = #target_feature)] + }); + } + + #[allow(clippy::collapsible_if)] + if let Some(assert_instr) = &intrinsic.assert_instr { + if !assert_instr.is_empty() { + InstructionAssertionsForBaseType(assert_instr, &intrinsic.base_type.as_ref()) + .to_tokens(tokens) + } + } + + match &intrinsic.visibility { + FunctionVisibility::Public => tokens.append_all(quote! { pub }), + FunctionVisibility::Private => {} + } + if safety.is_unsafe() { + tokens.append_all(quote! { unsafe }); + } + tokens.append_all(quote! { #signature }); + + // If the intrinsic function is explicitly unsafe, we populate `body_default_safety` with + // the implementation. No explicit unsafe blocks are required. + // + // If the intrinsic is safe, we fill `body_default_safety` until we encounter an expression + // that requires an unsafe wrapper, then switch to `body_unsafe`. Since the unsafe + // operation (e.g. memory access) is typically the last step, this tends to minimises the + // amount of unsafe code required. + let mut body_default_safety = TokenStream::new(); + let mut body_unsafe = TokenStream::new(); + let mut body_current = &mut body_default_safety; + for (pos, ex) in expressions.iter().with_position() { + if safety.is_safe() && ex.requires_unsafe_wrapper(&fn_name) { + body_current = &mut body_unsafe; + } + ex.to_tokens(body_current); + let is_last = matches!(pos, itertools::Position::Last | itertools::Position::Only); + let is_llvm_link = matches!(ex, Expression::LLVMLink(_)); + if !is_last && !is_llvm_link { + body_current.append(Punct::new(';', Spacing::Alone)); + } + } + let mut body = body_default_safety; + if !body_unsafe.is_empty() { + body.append_all(quote! { unsafe { #body_unsafe } }); + } + + tokens.append(Group::new(Delimiter::Brace, body)); +} + +impl ToTokens for Intrinsic { + fn to_tokens(&self, tokens: &mut TokenStream) { + if !self.big_endian_compose.is_empty() { + for i in 0..2 { + match i { + 0 => create_tokens(self, Endianness::Little, tokens), + 1 => create_tokens(self, Endianness::Big, tokens), + _ => panic!("Currently only little and big endian exist"), + } + } + } else { + create_tokens(self, Endianness::NA, tokens); + } + } +} + +fn has_target_feature_attr(attrs: &[Expression]) -> bool { + attrs.iter().any(|attr| { + if let Expression::FnCall(fn_call) = attr { + fn_call.is_target_feature_call() + } else { + false + } + }) +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs b/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs new file mode 100644 index 000000000000..5cf39b2e11ae --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs @@ -0,0 +1,822 @@ +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; +use std::str::FromStr; + +use crate::format_code; +use crate::input::InputType; +use crate::intrinsic::Intrinsic; +use crate::typekinds::BaseType; +use crate::typekinds::{ToRepr, TypeKind}; + +use itertools::Itertools; +use lazy_static::lazy_static; +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; + +// Number of vectors in our buffers - the maximum tuple size, 4, plus 1 as we set the vnum +// argument to 1. +const NUM_VECS: usize = 5; +// The maximum vector length (in bits) +const VL_MAX_BITS: usize = 2048; +// The maximum vector length (in bytes) +const VL_MAX_BYTES: usize = VL_MAX_BITS / 8; +// The maximum number of elements in each vector type +const LEN_F32: usize = VL_MAX_BYTES / core::mem::size_of::(); +const LEN_F64: usize = VL_MAX_BYTES / core::mem::size_of::(); +const LEN_I8: usize = VL_MAX_BYTES / core::mem::size_of::(); +const LEN_I16: usize = VL_MAX_BYTES / core::mem::size_of::(); +const LEN_I32: usize = VL_MAX_BYTES / core::mem::size_of::(); +const LEN_I64: usize = VL_MAX_BYTES / core::mem::size_of::(); +const LEN_U8: usize = VL_MAX_BYTES / core::mem::size_of::(); +const LEN_U16: usize = VL_MAX_BYTES / core::mem::size_of::(); +const LEN_U32: usize = VL_MAX_BYTES / core::mem::size_of::(); +const LEN_U64: usize = VL_MAX_BYTES / core::mem::size_of::(); + +/// `load_intrinsics` and `store_intrinsics` is a vector of intrinsics +/// variants, while `out_path` is a file to write to. +pub fn generate_load_store_tests( + load_intrinsics: Vec, + store_intrinsics: Vec, + out_path: Option<&PathBuf>, +) -> Result<(), String> { + let output = match out_path { + Some(out) => { + Box::new(File::create(out).map_err(|e| format!("couldn't create tests file: {e}"))?) + as Box + } + None => Box::new(std::io::stdout()) as Box, + }; + let mut used_stores = vec![false; store_intrinsics.len()]; + let tests: Vec<_> = load_intrinsics + .iter() + .map(|load| { + let store_candidate = load + .signature + .fn_name() + .to_string() + .replace("svld1s", "svst1") + .replace("svld1u", "svst1") + .replace("svldnt1s", "svstnt1") + .replace("svldnt1u", "svstnt1") + .replace("svld", "svst") + .replace("gather", "scatter"); + + let store_index = store_intrinsics + .iter() + .position(|i| i.signature.fn_name().to_string() == store_candidate); + if let Some(i) = store_index { + used_stores[i] = true; + } + + generate_single_test( + load.clone(), + store_index.map(|i| store_intrinsics[i].clone()), + ) + }) + .try_collect()?; + + assert!( + used_stores.into_iter().all(|b| b), + "Not all store tests have been paired with a load. Consider generating specifc store-only tests" + ); + + let preamble = + TokenStream::from_str(&PREAMBLE).map_err(|e| format!("Preamble is invalid: {e}"))?; + // Only output manual tests for the SVE set + let manual_tests = match &load_intrinsics[0].target_features[..] { + [s] if s == "sve" => TokenStream::from_str(&MANUAL_TESTS) + .map_err(|e| format!("Manual tests are invalid: {e}"))?, + _ => quote!(), + }; + format_code( + output, + format!( + "// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen-arm/spec/sve` and run the following command to re-generate +// this file: +// +// ``` +// cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec +// ``` +{}", + quote! { #preamble #(#tests)* #manual_tests } + ), + ) + .map_err(|e| format!("couldn't write tests: {e}")) +} + +/// A test looks like this: +/// ``` +/// let data = [scalable vector]; +/// +/// let mut storage = [0; N]; +/// +/// store_intrinsic([true_predicate], storage.as_mut_ptr(), data); +/// [test contents of storage] +/// +/// let loaded == load_intrinsic([true_predicate], storage.as_ptr()) +/// assert!(loaded == data); +/// ``` +/// We intialise our data such that the value stored matches the index it's stored to. +/// By doing this we can validate scatters by checking that each value in the storage +/// array is either 0 or the same as its index. +fn generate_single_test( + load: Intrinsic, + store: Option, +) -> Result { + let chars = LdIntrCharacteristics::new(&load)?; + let fn_name = load.signature.fn_name().to_string(); + + #[allow(clippy::collapsible_if)] + if let Some(ty) = &chars.gather_bases_type { + if ty.base_type().unwrap().get_size() == Ok(32) + && chars.gather_index_type.is_none() + && chars.gather_offset_type.is_none() + { + // We lack a way to ensure data is in the bottom 32 bits of the address space + println!("Skipping test for {fn_name}"); + return Ok(quote!()); + } + } + + if fn_name.starts_with("svldff1") && fn_name.contains("gather") { + // TODO: We can remove this check when first-faulting gathers are fixed in CI's QEMU + // https://gitlab.com/qemu-project/qemu/-/issues/1612 + println!("Skipping test for {fn_name}"); + return Ok(quote!()); + } + + let fn_ident = format_ident!("{fn_name}"); + let test_name = format_ident!( + "test_{fn_name}{}", + if let Some(ref store) = store { + format!("_with_{}", store.signature.fn_name()) + } else { + String::new() + } + ); + + let load_type = &chars.load_type; + let acle_type = load_type.acle_notation_repr(); + + // If there's no return type, fallback to the load type for things that depend on it + let ret_type = &load + .signature + .return_type + .as_ref() + .and_then(TypeKind::base_type) + .unwrap_or(load_type); + + let pred_fn = format_ident!("svptrue_b{}", load_type.size()); + + let load_type_caps = load_type.rust_repr().to_uppercase(); + let data_array = format_ident!("{load_type_caps}_DATA"); + + let size_fn = format_ident!("svcnt{}", ret_type.size_literal()); + + let rust_ret_type = ret_type.rust_repr(); + let assert_fn = format_ident!("assert_vector_matches_{rust_ret_type}"); + + // Use vnum=1, so adjust all values by one vector length + let (length_call, vnum_arg) = if chars.vnum { + if chars.is_prf { + (quote!(), quote!(, 1)) + } else { + (quote!(let len = #size_fn() as usize;), quote!(, 1)) + } + } else { + (quote!(), quote!()) + }; + + let (bases_load, bases_arg) = if let Some(ty) = &chars.gather_bases_type { + // Bases is a vector of (sometimes 32-bit) pointers + // When we combine bases with an offset/index argument, we load from the data arrays + // starting at 1 + let base_ty = ty.base_type().unwrap(); + let rust_type = format_ident!("{}", base_ty.rust_repr()); + let index_fn = format_ident!("svindex_{}", base_ty.acle_notation_repr()); + let size_in_bytes = chars.load_type.get_size().unwrap() / 8; + + if base_ty.get_size().unwrap() == 32 { + // Treat bases as a vector of offsets here - we don't test this without an offset or + // index argument + ( + Some(quote!( + let bases = #index_fn(0, #size_in_bytes.try_into().unwrap()); + )), + quote!(, bases), + ) + } else { + // Treat bases as a vector of pointers + let base_fn = format_ident!("svdup_n_{}", base_ty.acle_notation_repr()); + let data_array = if store.is_some() { + format_ident!("storage") + } else { + format_ident!("{}_DATA", chars.load_type.rust_repr().to_uppercase()) + }; + + let add_fn = format_ident!("svadd_{}_x", base_ty.acle_notation_repr()); + ( + Some(quote! { + let bases = #base_fn(#data_array.as_ptr() as #rust_type); + let offsets = #index_fn(0, #size_in_bytes.try_into().unwrap()); + let bases = #add_fn(#pred_fn(), bases, offsets); + }), + quote!(, bases), + ) + } + } else { + (None, quote!()) + }; + + let index_arg = if let Some(ty) = &chars.gather_index_type { + let rust_type = format_ident!("{}", ty.rust_repr()); + if chars + .gather_bases_type + .as_ref() + .and_then(TypeKind::base_type) + .map_or(Err(String::new()), BaseType::get_size) + .unwrap() + == 32 + { + // Let index be the base of the data array + let data_array = if store.is_some() { + format_ident!("storage") + } else { + format_ident!("{}_DATA", chars.load_type.rust_repr().to_uppercase()) + }; + let size_in_bytes = chars.load_type.get_size().unwrap() / 8; + quote!(, #data_array.as_ptr() as #rust_type / (#size_in_bytes as #rust_type) + 1) + } else { + quote!(, 1.try_into().unwrap()) + } + } else { + quote!() + }; + + let offset_arg = if let Some(ty) = &chars.gather_offset_type { + let size_in_bytes = chars.load_type.get_size().unwrap() / 8; + if chars + .gather_bases_type + .as_ref() + .and_then(TypeKind::base_type) + .map_or(Err(String::new()), BaseType::get_size) + .unwrap() + == 32 + { + // Let offset be the base of the data array + let rust_type = format_ident!("{}", ty.rust_repr()); + let data_array = if store.is_some() { + format_ident!("storage") + } else { + format_ident!("{}_DATA", chars.load_type.rust_repr().to_uppercase()) + }; + quote!(, #data_array.as_ptr() as #rust_type + #size_in_bytes as #rust_type) + } else { + quote!(, #size_in_bytes.try_into().unwrap()) + } + } else { + quote!() + }; + + let (offsets_load, offsets_arg) = if let Some(ty) = &chars.gather_offsets_type { + // Offsets is a scalable vector of per-element offsets in bytes. We re-use the contiguous + // data for this, then multiply to get indices + let offsets_fn = format_ident!("svindex_{}", ty.base_type().unwrap().acle_notation_repr()); + let size_in_bytes = chars.load_type.get_size().unwrap() / 8; + ( + Some(quote! { + let offsets = #offsets_fn(0, #size_in_bytes.try_into().unwrap()); + }), + quote!(, offsets), + ) + } else { + (None, quote!()) + }; + + let (indices_load, indices_arg) = if let Some(ty) = &chars.gather_indices_type { + // There's no need to multiply indices by the load type width + let base_ty = ty.base_type().unwrap(); + let indices_fn = format_ident!("svindex_{}", base_ty.acle_notation_repr()); + ( + Some(quote! { + let indices = #indices_fn(0, 1); + }), + quote! {, indices}, + ) + } else { + (None, quote!()) + }; + + let ptr = if chars.gather_bases_type.is_some() { + quote!() + } else if chars.is_prf { + quote!(, I64_DATA.as_ptr()) + } else { + quote!(, #data_array.as_ptr()) + }; + + let tuple_len = &chars.tuple_len; + let expecteds = if chars.is_prf { + // No return value for prefetches + vec![] + } else { + (0..*tuple_len) + .map(|i| get_expected_range(i, &chars)) + .collect() + }; + let asserts: Vec<_> = + if *tuple_len > 1 { + let svget = format_ident!("svget{tuple_len}_{acle_type}"); + expecteds.iter().enumerate().map(|(i, expected)| { + quote! (#assert_fn(#svget::<{ #i as i32 }>(loaded), #expected);) + }).collect() + } else { + expecteds + .iter() + .map(|expected| quote! (#assert_fn(loaded, #expected);)) + .collect() + }; + + let function = if chars.is_prf { + if fn_name.contains("gather") && fn_name.contains("base") && !fn_name.starts_with("svprf_") + { + // svprf(b|h|w|d)_gather base intrinsics do not have a generic type parameter + quote!(#fn_ident::<{ svprfop::SV_PLDL1KEEP }>) + } else { + quote!(#fn_ident::<{ svprfop::SV_PLDL1KEEP }, i64>) + } + } else { + quote!(#fn_ident) + }; + + let octaword_guard = if chars.replicate_width == Some(256) { + let msg = format!("Skipping {test_name} due to SVE vector length"); + quote! { + if svcntb() < 32 { + println!(#msg); + return; + } + } + } else { + quote!() + }; + + let feats = load.target_features.join(","); + + if let Some(store) = store { + let data_init = if *tuple_len == 1 { + quote!(#(#expecteds)*) + } else { + let create = format_ident!("svcreate{tuple_len}_{acle_type}"); + quote!(#create(#(#expecteds),*)) + }; + let input = store.input.types.first().unwrap().get(0).unwrap(); + let store_type = input + .get(store.test.get_typeset_index().unwrap()) + .and_then(InputType::typekind) + .and_then(TypeKind::base_type) + .unwrap(); + + let store_type = format_ident!("{}", store_type.rust_repr()); + let storage_len = NUM_VECS * VL_MAX_BITS / chars.load_type.get_size()? as usize; + let store_fn = format_ident!("{}", store.signature.fn_name().to_string()); + let load_type = format_ident!("{}", chars.load_type.rust_repr()); + let (store_ptr, store_mut_ptr) = if chars.gather_bases_type.is_none() { + ( + quote!(, storage.as_ptr() as *const #load_type), + quote!(, storage.as_mut_ptr()), + ) + } else { + (quote!(), quote!()) + }; + let args = quote!(#pred_fn() #store_ptr #vnum_arg #bases_arg #offset_arg #index_arg #offsets_arg #indices_arg); + let call = if chars.uses_ffr { + // Doing a normal load first maximises the number of elements our ff/nf test loads + let non_ffr_fn_name = format_ident!( + "{}", + fn_name + .replace("svldff1", "svld1") + .replace("svldnf1", "svld1") + ); + quote! { + svsetffr(); + let _ = #non_ffr_fn_name(#args); + let loaded = #function(#args); + } + } else { + // Note that the FFR must be set for all tests as the assert functions mask against it + quote! { + svsetffr(); + let loaded = #function(#args); + } + }; + + Ok(quote! { + #[simd_test(enable = #feats)] + unsafe fn #test_name() { + #octaword_guard + #length_call + let mut storage = [0 as #store_type; #storage_len]; + let data = #data_init; + #bases_load + #offsets_load + #indices_load + + #store_fn(#pred_fn() #store_mut_ptr #vnum_arg #bases_arg #offset_arg #index_arg #offsets_arg #indices_arg, data); + for (i, &val) in storage.iter().enumerate() { + assert!(val == 0 as #store_type || val == i as #store_type); + } + + #call + #(#asserts)* + + } + }) + } else { + let args = quote!(#pred_fn() #ptr #vnum_arg #bases_arg #offset_arg #index_arg #offsets_arg #indices_arg); + let call = if chars.uses_ffr { + // Doing a normal load first maximises the number of elements our ff/nf test loads + let non_ffr_fn_name = format_ident!( + "{}", + fn_name + .replace("svldff1", "svld1") + .replace("svldnf1", "svld1") + ); + quote! { + svsetffr(); + let _ = #non_ffr_fn_name(#args); + let loaded = #function(#args); + } + } else { + // Note that the FFR must be set for all tests as the assert functions mask against it + quote! { + svsetffr(); + let loaded = #function(#args); + } + }; + Ok(quote! { + #[simd_test(enable = #feats)] + unsafe fn #test_name() { + #octaword_guard + #bases_load + #offsets_load + #indices_load + #call + #length_call + + #(#asserts)* + } + }) + } +} + +/// Assumes chars.ret_type is not None +fn get_expected_range(tuple_idx: usize, chars: &LdIntrCharacteristics) -> proc_macro2::TokenStream { + // vnum=1 + let vnum_adjust = if chars.vnum { quote!(len+) } else { quote!() }; + + let bases_adjust = + (chars.gather_index_type.is_some() || chars.gather_offset_type.is_some()) as usize; + + let tuple_len = chars.tuple_len; + let size = chars + .ret_type + .as_ref() + .and_then(TypeKind::base_type) + .unwrap_or(&chars.load_type) + .get_size() + .unwrap() as usize; + + if chars.replicate_width == Some(128) { + // svld1rq + let ty_rust = format_ident!( + "{}", + chars + .ret_type + .as_ref() + .unwrap() + .base_type() + .unwrap() + .rust_repr() + ); + let args: Vec<_> = (0..(128 / size)).map(|i| quote!(#i as #ty_rust)).collect(); + let dup = format_ident!( + "svdupq_n_{}", + chars.ret_type.as_ref().unwrap().acle_notation_repr() + ); + quote!(#dup(#(#args,)*)) + } else if chars.replicate_width == Some(256) { + // svld1ro - we use two interleaved svdups to create a repeating 256-bit pattern + let ty_rust = format_ident!( + "{}", + chars + .ret_type + .as_ref() + .unwrap() + .base_type() + .unwrap() + .rust_repr() + ); + let ret_acle = chars.ret_type.as_ref().unwrap().acle_notation_repr(); + let args: Vec<_> = (0..(128 / size)).map(|i| quote!(#i as #ty_rust)).collect(); + let args2: Vec<_> = ((128 / size)..(256 / size)) + .map(|i| quote!(#i as #ty_rust)) + .collect(); + let dup = format_ident!("svdupq_n_{ret_acle}"); + let interleave = format_ident!("svtrn1q_{ret_acle}"); + quote!(#interleave(#dup(#(#args,)*), #dup(#(#args2,)*))) + } else { + let start = bases_adjust + tuple_idx; + if chars + .ret_type + .as_ref() + .unwrap() + .base_type() + .unwrap() + .is_float() + { + // Use svcvt to create a linear sequence of floats + let cvt_fn = format_ident!("svcvt_f{size}_s{size}_x"); + let pred_fn = format_ident!("svptrue_b{size}"); + let svindex_fn = format_ident!("svindex_s{size}"); + quote! { #cvt_fn(#pred_fn(), #svindex_fn((#vnum_adjust #start).try_into().unwrap(), #tuple_len.try_into().unwrap()))} + } else { + let ret_acle = chars.ret_type.as_ref().unwrap().acle_notation_repr(); + let svindex = format_ident!("svindex_{ret_acle}"); + quote!(#svindex((#vnum_adjust #start).try_into().unwrap(), #tuple_len.try_into().unwrap())) + } + } +} + +struct LdIntrCharacteristics { + // The data type to load from (not necessarily the data type returned) + load_type: BaseType, + // The data type to return (None for unit) + ret_type: Option, + // The size of tuple to load/store + tuple_len: usize, + // Whether a vnum argument is present + vnum: bool, + // Is the intrinsic first/non-faulting? + uses_ffr: bool, + // Is it a prefetch? + is_prf: bool, + // The size of data loaded with svld1ro/q intrinsics + replicate_width: Option, + // Scalable vector of pointers to load from + gather_bases_type: Option, + // Scalar offset, paired with bases + gather_offset_type: Option, + // Scalar index, paired with bases + gather_index_type: Option, + // Scalable vector of offsets + gather_offsets_type: Option, + // Scalable vector of indices + gather_indices_type: Option, +} + +impl LdIntrCharacteristics { + fn new(intr: &Intrinsic) -> Result { + let input = intr.input.types.first().unwrap().get(0).unwrap(); + let load_type = input + .get(intr.test.get_typeset_index().unwrap()) + .and_then(InputType::typekind) + .and_then(TypeKind::base_type) + .unwrap(); + + let ret_type = intr.signature.return_type.clone(); + + let name = intr.signature.fn_name().to_string(); + let tuple_len = name + .chars() + .find(|c| c.is_numeric()) + .and_then(|c| c.to_digit(10)) + .unwrap_or(1) as usize; + + let uses_ffr = name.starts_with("svldff") || name.starts_with("svldnf"); + + let is_prf = name.starts_with("svprf"); + + let replicate_width = if name.starts_with("svld1ro") { + Some(256) + } else if name.starts_with("svld1rq") { + Some(128) + } else { + None + }; + + let get_ty_of_arg = |name: &str| { + intr.signature + .arguments + .iter() + .find(|a| a.name.to_string() == name) + .map(|a| a.kind.clone()) + }; + + let gather_bases_type = get_ty_of_arg("bases"); + let gather_offset_type = get_ty_of_arg("offset"); + let gather_index_type = get_ty_of_arg("index"); + let gather_offsets_type = get_ty_of_arg("offsets"); + let gather_indices_type = get_ty_of_arg("indices"); + + Ok(LdIntrCharacteristics { + load_type: *load_type, + ret_type, + tuple_len, + vnum: name.contains("vnum"), + uses_ffr, + is_prf, + replicate_width, + gather_bases_type, + gather_offset_type, + gather_index_type, + gather_offsets_type, + gather_indices_type, + }) + } +} + +lazy_static! { + static ref PREAMBLE: String = format!( + r#"#![allow(unused)] + +use super::*; +use std::boxed::Box; +use std::convert::{{TryFrom, TryInto}}; +use std::sync::LazyLock; +use std::vec::Vec; +use stdarch_test::simd_test; + +static F32_DATA: LazyLock<[f32; {LEN_F32} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_F32} * {NUM_VECS}) + .map(|i| i as f32) + .collect::>() + .try_into() + .expect("f32 data incorrectly initialised") +}}); +static F64_DATA: LazyLock<[f64; {LEN_F64} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_F64} * {NUM_VECS}) + .map(|i| i as f64) + .collect::>() + .try_into() + .expect("f64 data incorrectly initialised") +}}); +static I8_DATA: LazyLock<[i8; {LEN_I8} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_I8} * {NUM_VECS}) + .map(|i| ((i + 128) % 256 - 128) as i8) + .collect::>() + .try_into() + .expect("i8 data incorrectly initialised") +}}); +static I16_DATA: LazyLock<[i16; {LEN_I16} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_I16} * {NUM_VECS}) + .map(|i| i as i16) + .collect::>() + .try_into() + .expect("i16 data incorrectly initialised") +}}); +static I32_DATA: LazyLock<[i32; {LEN_I32} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_I32} * {NUM_VECS}) + .map(|i| i as i32) + .collect::>() + .try_into() + .expect("i32 data incorrectly initialised") +}}); +static I64_DATA: LazyLock<[i64; {LEN_I64} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_I64} * {NUM_VECS}) + .map(|i| i as i64) + .collect::>() + .try_into() + .expect("i64 data incorrectly initialised") +}}); +static U8_DATA: LazyLock<[u8; {LEN_U8} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_U8} * {NUM_VECS}) + .map(|i| i as u8) + .collect::>() + .try_into() + .expect("u8 data incorrectly initialised") +}}); +static U16_DATA: LazyLock<[u16; {LEN_U16} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_U16} * {NUM_VECS}) + .map(|i| i as u16) + .collect::>() + .try_into() + .expect("u16 data incorrectly initialised") +}}); +static U32_DATA: LazyLock<[u32; {LEN_U32} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_U32} * {NUM_VECS}) + .map(|i| i as u32) + .collect::>() + .try_into() + .expect("u32 data incorrectly initialised") +}}); +static U64_DATA: LazyLock<[u64; {LEN_U64} * {NUM_VECS}]> = LazyLock::new(|| {{ + (0..{LEN_U64} * {NUM_VECS}) + .map(|i| i as u64) + .collect::>() + .try_into() + .expect("u64 data incorrectly initialised") +}}); + +#[target_feature(enable = "sve")] +fn assert_vector_matches_f32(vector: svfloat32_t, expected: svfloat32_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b32(), defined)); + let cmp = svcmpne_f32(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} + +#[target_feature(enable = "sve")] +fn assert_vector_matches_f64(vector: svfloat64_t, expected: svfloat64_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b64(), defined)); + let cmp = svcmpne_f64(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} + +#[target_feature(enable = "sve")] +fn assert_vector_matches_i8(vector: svint8_t, expected: svint8_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b8(), defined)); + let cmp = svcmpne_s8(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} + +#[target_feature(enable = "sve")] +fn assert_vector_matches_i16(vector: svint16_t, expected: svint16_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b16(), defined)); + let cmp = svcmpne_s16(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} + +#[target_feature(enable = "sve")] +fn assert_vector_matches_i32(vector: svint32_t, expected: svint32_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b32(), defined)); + let cmp = svcmpne_s32(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} + +#[target_feature(enable = "sve")] +fn assert_vector_matches_i64(vector: svint64_t, expected: svint64_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b64(), defined)); + let cmp = svcmpne_s64(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} + +#[target_feature(enable = "sve")] +fn assert_vector_matches_u8(vector: svuint8_t, expected: svuint8_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b8(), defined)); + let cmp = svcmpne_u8(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} + +#[target_feature(enable = "sve")] +fn assert_vector_matches_u16(vector: svuint16_t, expected: svuint16_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b16(), defined)); + let cmp = svcmpne_u16(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} + +#[target_feature(enable = "sve")] +fn assert_vector_matches_u32(vector: svuint32_t, expected: svuint32_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b32(), defined)); + let cmp = svcmpne_u32(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} + +#[target_feature(enable = "sve")] +fn assert_vector_matches_u64(vector: svuint64_t, expected: svuint64_t) {{ + let defined = svrdffr(); + assert!(svptest_first(svptrue_b64(), defined)); + let cmp = svcmpne_u64(defined, vector, expected); + assert!(!svptest_any(defined, cmp)) +}} +"# + ); +} + +lazy_static! { + static ref MANUAL_TESTS: String = format!( + "#[simd_test(enable = \"sve\")] +unsafe fn test_ffr() {{ + svsetffr(); + let ffr = svrdffr(); + assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svindex_u8(1, 0)); + let pred = svdupq_n_b8(true, false, true, false, true, false, true, false, + true, false, true, false, true, false, true, false); + svwrffr(pred); + let ffr = svrdffr_z(svptrue_b8()); + assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svdup_n_u8_z(pred, 1)); +}} +" + ); +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/main.rs b/library/stdarch/crates/stdarch-gen-arm/src/main.rs new file mode 100644 index 000000000000..9bf7d0981deb --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/main.rs @@ -0,0 +1,311 @@ +#![feature(pattern)] + +mod assert_instr; +mod big_endian; +mod context; +mod expression; +mod fn_suffix; +mod input; +mod intrinsic; +mod load_store_tests; +mod matching; +mod predicate_forms; +mod typekinds; +mod wildcards; +mod wildstring; + +use intrinsic::Test; +use itertools::Itertools; +use quote::quote; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use walkdir::WalkDir; + +fn main() -> Result<(), String> { + parse_args() + .into_iter() + .map(|(filepath, out)| { + File::open(&filepath) + .map(|f| (f, filepath, out)) + .map_err(|e| format!("could not read input file: {e}")) + }) + .map(|res| { + let (file, filepath, out) = res?; + serde_yaml::from_reader(file) + .map(|input: input::GeneratorInput| (input, filepath, out)) + .map_err(|e| format!("could not parse input file: {e}")) + }) + .collect::, _>>()? + .into_iter() + .map(|(input, filepath, out)| { + let intrinsics = input.intrinsics.into_iter() + .map(|intrinsic| { + intrinsic.generate_variants(&input.ctx) + }) + .try_collect() + .map(|mut vv: Vec<_>| { + vv.sort_by_cached_key(|variants| { + variants.first().map_or_else(String::default, |variant| { + variant.signature.fn_name().to_string() + }) + }); + vv.into_iter().flatten().collect_vec() + })?; + + if filepath.ends_with("sve.spec.yml") || filepath.ends_with("sve2.spec.yml") { + let loads = intrinsics.iter() + .filter_map(|i| { + if matches!(i.test, Test::Load(..)) { + Some(i.clone()) + } else { + None + } + }).collect(); + let stores = intrinsics.iter() + .filter_map(|i| { + if matches!(i.test, Test::Store(..)) { + Some(i.clone()) + } else { + None + } + }).collect(); + load_store_tests::generate_load_store_tests(loads, stores, out.as_ref().map(|o| make_tests_filepath(&filepath, o)).as_ref())?; + } + + Ok(( + input::GeneratorInput { + intrinsics, + ctx: input.ctx, + }, + filepath, + out, + )) + }) + .try_for_each( + |result: context::Result<(input::GeneratorInput, PathBuf, Option)>| -> context::Result { + let (generated, filepath, out) = result?; + + let w = match out { + Some(out) => Box::new( + File::create(make_output_filepath(&filepath, &out)) + .map_err(|e| format!("could not create output file: {e}"))?, + ) as Box, + None => Box::new(std::io::stdout()) as Box, + }; + + generate_file(generated, w) + .map_err(|e| format!("could not generate output file: {e}")) + }, + ) +} + +fn parse_args() -> Vec<(PathBuf, Option)> { + let mut args_it = std::env::args().skip(1); + assert!( + 1 <= args_it.len() && args_it.len() <= 2, + "Usage: cargo run -p stdarch-gen-arm -- INPUT_DIR [OUTPUT_DIR]\n\ + where:\n\ + - INPUT_DIR contains a tree like: INPUT_DIR//.spec.yml\n\ + - OUTPUT_DIR is a directory like: crates/core_arch/src/" + ); + + let in_path = Path::new(args_it.next().unwrap().as_str()).to_path_buf(); + assert!( + in_path.exists() && in_path.is_dir(), + "invalid path {in_path:#?} given" + ); + + let out_dir = if let Some(dir) = args_it.next() { + let out_path = Path::new(dir.as_str()).to_path_buf(); + assert!( + out_path.exists() && out_path.is_dir(), + "invalid path {out_path:#?} given" + ); + Some(out_path) + } else { + std::env::current_exe() + .map(|mut f| { + f.pop(); + f.push("../../crates/core_arch/src/"); + f.exists().then_some(f) + }) + .ok() + .flatten() + }; + + WalkDir::new(in_path) + .into_iter() + .filter_map(Result::ok) + .filter(|f| f.file_type().is_file()) + .map(|f| (f.into_path(), out_dir.clone())) + .collect() +} + +fn generate_file( + generated_input: input::GeneratorInput, + mut out: Box, +) -> std::io::Result<()> { + write!( + out, + r#"// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen-arm/spec/` and run the following command to re-generate this file: +// +// ``` +// cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec +// ``` +#![allow(improper_ctypes)] + +#[cfg(test)] +use stdarch_test::assert_instr; + +use super::*;{uses_neon} + +"#, + uses_neon = if generated_input.ctx.uses_neon_types { + "\nuse crate::core_arch::arch::aarch64::*;" + } else { + "" + }, + )?; + let intrinsics = generated_input.intrinsics; + format_code(out, quote! { #(#intrinsics)* })?; + Ok(()) +} + +pub fn format_code( + mut output: impl std::io::Write, + input: impl std::fmt::Display, +) -> std::io::Result<()> { + let proc = Command::new("rustfmt") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn()?; + write!(proc.stdin.as_ref().unwrap(), "{input}")?; + output.write_all(proc.wait_with_output()?.stdout.as_slice()) +} + +/// Derive an output file path from an input file path and an output directory. +/// +/// `in_filepath` is expected to have a structure like: +/// ...//.spec.yml +/// +/// The resulting output path will have a structure like: +/// ///generated.rs +/// +/// Panics if the resulting name is empty, or if file_name() is not UTF-8. +fn make_output_filepath(in_filepath: &Path, out_dirpath: &Path) -> PathBuf { + make_filepath(in_filepath, out_dirpath, |_name: &str| { + "generated.rs".to_owned() + }) +} + +fn make_tests_filepath(in_filepath: &Path, out_dirpath: &Path) -> PathBuf { + make_filepath(in_filepath, out_dirpath, |name: &str| { + format!("ld_st_tests_{name}.rs") + }) +} + +fn make_filepath String>( + in_filepath: &Path, + out_dirpath: &Path, + name_formatter: F, +) -> PathBuf { + let mut parts = in_filepath.components().rev().map(|f| { + f.as_os_str() + .to_str() + .expect("Inputs must have valid, UTF-8 file_name()") + }); + let yml = parts.next().expect("Not enough input path elements."); + let feature = parts.next().expect("Not enough input path elements."); + + let arch = yml + .strip_suffix(".yml") + .expect("Expected .yml file input.") + .strip_suffix(".spec") + .expect("Expected .spec.yml file input."); + if arch.is_empty() { + panic!("Extended ARCH.spec.yml file input."); + } + + let mut output = out_dirpath.to_path_buf(); + output.push(arch); + output.push(feature); + output.push(name_formatter(arch)); + output +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn infer_output_file() { + macro_rules! t { + ($src:expr, $outdir:expr, $dst:expr, $ldst:expr) => { + let src: PathBuf = $src.iter().collect(); + let outdir: PathBuf = $outdir.iter().collect(); + let dst: PathBuf = $dst.iter().collect(); + let ldst: PathBuf = $ldst.iter().collect(); + assert_eq!(make_output_filepath(&src, &outdir), dst); + assert_eq!(make_tests_filepath(&src, &outdir), ldst); + }; + } + // Documented usage. + t!( + ["FEAT", "ARCH.spec.yml"], + [""], + ["ARCH", "FEAT", "generated.rs"], + ["ARCH", "FEAT", "ld_st_tests_ARCH.rs"] + ); + t!( + ["x", "y", "FEAT", "ARCH.spec.yml"], + ["out"], + ["out", "ARCH", "FEAT", "generated.rs"], + ["out", "ARCH", "FEAT", "ld_st_tests_ARCH.rs"] + ); + t!( + ["p", "q", "FEAT", "ARCH.spec.yml"], + ["a", "b"], + ["a", "b", "ARCH", "FEAT", "generated.rs"], + ["a", "b", "ARCH", "FEAT", "ld_st_tests_ARCH.rs"] + ); + // Extra extensions get treated as part of the stem. + t!( + ["FEAT", "ARCH.variant.spec.yml"], + ["out"], + ["out", "ARCH.variant", "FEAT", "generated.rs"], + ["out", "ARCH.variant", "FEAT", "ld_st_tests_ARCH.variant.rs"] + ); + } + + #[test] + #[should_panic] + fn infer_output_file_no_stem() { + let src = PathBuf::from("FEAT/.spec.yml"); + make_output_filepath(&src, Path::new("")); + } + + #[test] + #[should_panic] + fn infer_output_file_no_feat() { + let src = PathBuf::from("ARCH.spec.yml"); + make_output_filepath(&src, Path::new("")); + } + + #[test] + #[should_panic] + fn infer_output_file_ldst_no_stem() { + let src = PathBuf::from("FEAT/.spec.yml"); + make_tests_filepath(&src, Path::new("")); + } + + #[test] + #[should_panic] + fn infer_output_file_ldst_no_feat() { + let src = PathBuf::from("ARCH.spec.yml"); + make_tests_filepath(&src, Path::new("")); + } +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/matching.rs b/library/stdarch/crates/stdarch-gen-arm/src/matching.rs new file mode 100644 index 000000000000..0c4806204282 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/matching.rs @@ -0,0 +1,170 @@ +use proc_macro2::TokenStream; +use quote::ToTokens; +use serde::{Deserialize, Serialize}; +use std::fmt; + +use crate::context::{self, LocalContext}; +use crate::typekinds::{BaseType, BaseTypeKind, TypeKind}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct MatchSizeValues { + pub default: T, + pub byte: Option, + pub halfword: Option, + pub doubleword: Option, +} + +impl MatchSizeValues { + pub fn get(&mut self, ty: &TypeKind, ctx: &LocalContext) -> context::Result<&T> { + let base_ty = if let Some(w) = ty.wildcard() { + ctx.provide_type_wildcard(w)? + } else { + ty.clone() + }; + + if let BaseType::Sized(_, bitsize) = base_ty.base_type().unwrap() { + match (bitsize, &self.byte, &self.halfword, &self.doubleword) { + (64, _, _, Some(v)) | (16, _, Some(v), _) | (8, Some(v), _, _) => Ok(v), + _ => Ok(&self.default), + } + } else { + Err(format!("cannot match bitsize to unsized type {ty:?}!")) + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct MatchKindValues { + pub default: T, + pub float: Option, + pub unsigned: Option, +} + +impl MatchKindValues { + pub fn get(&mut self, ty: &TypeKind, ctx: &LocalContext) -> context::Result<&T> { + let base_ty = if let Some(w) = ty.wildcard() { + ctx.provide_type_wildcard(w)? + } else { + ty.clone() + }; + + match ( + base_ty.base_type().unwrap().kind(), + &self.float, + &self.unsigned, + ) { + (BaseTypeKind::Float, Some(v), _) | (BaseTypeKind::UInt, _, Some(v)) => Ok(v), + _ => Ok(&self.default), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged, deny_unknown_fields)] +pub enum SizeMatchable { + Matched(T), + Unmatched { + match_size: Option, + #[serde(flatten)] + values: MatchSizeValues>, + }, +} + +impl SizeMatchable { + pub fn perform_match(&mut self, ctx: &LocalContext) -> context::Result { + match self { + Self::Unmatched { + match_size: None, + values: MatchSizeValues { default, .. }, + } => *self = Self::Matched(*default.to_owned()), + Self::Unmatched { + match_size: Some(ty), + values, + } => *self = Self::Matched(*values.get(ty, ctx)?.to_owned()), + _ => {} + } + Ok(()) + } +} + +impl AsRef for SizeMatchable { + fn as_ref(&self) -> &T { + if let SizeMatchable::Matched(v) = self { + v + } else { + panic!("no match for {self:?} was performed"); + } + } +} + +impl AsMut for SizeMatchable { + fn as_mut(&mut self) -> &mut T { + if let SizeMatchable::Matched(v) = self { + v + } else { + panic!("no match for {self:?} was performed"); + } + } +} + +impl ToTokens for SizeMatchable { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.as_ref().to_tokens(tokens) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged, deny_unknown_fields)] +pub enum KindMatchable { + Matched(T), + Unmatched { + match_kind: Option, + #[serde(flatten)] + values: MatchKindValues>, + }, +} + +impl KindMatchable { + pub fn perform_match(&mut self, ctx: &LocalContext) -> context::Result { + match self { + Self::Unmatched { + match_kind: None, + values: MatchKindValues { default, .. }, + } => *self = Self::Matched(*default.to_owned()), + Self::Unmatched { + match_kind: Some(ty), + values, + } => *self = Self::Matched(*values.get(ty, ctx)?.to_owned()), + _ => {} + } + Ok(()) + } +} + +impl AsRef for KindMatchable { + fn as_ref(&self) -> &T { + if let KindMatchable::Matched(v) = self { + v + } else { + panic!("no match for {self:?} was performed"); + } + } +} + +impl AsMut for KindMatchable { + fn as_mut(&mut self) -> &mut T { + if let KindMatchable::Matched(v) = self { + v + } else { + panic!("no match for {self:?} was performed"); + } + } +} + +impl ToTokens for KindMatchable { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.as_ref().to_tokens(tokens) + } +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/predicate_forms.rs b/library/stdarch/crates/stdarch-gen-arm/src/predicate_forms.rs new file mode 100644 index 000000000000..02789bf7eb0b --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/predicate_forms.rs @@ -0,0 +1,249 @@ +use serde::{Deserialize, Serialize}; +use serde_with::{DeserializeFromStr, SerializeDisplay}; +use std::fmt; +use std::str::FromStr; + +use crate::context; +use crate::expression::{Expression, FnCall, IdentifierType}; +use crate::intrinsic::Intrinsic; +use crate::typekinds::{ToRepr, TypeKind}; +use crate::wildcards::Wildcard; +use crate::wildstring::WildString; + +const ZEROING_SUFFIX: &str = "_z"; +const MERGING_SUFFIX: &str = "_m"; +const DONT_CARE_SUFFIX: &str = "_x"; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(untagged)] +pub enum ZeroingMethod { + /// Drop the specified argument and replace it with a zeroinitializer + Drop { drop: WildString }, + /// Apply zero selection to the specified variable when zeroing + Select { select: WildString }, +} + +impl PartialOrd for ZeroingMethod { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for ZeroingMethod { + fn cmp(&self, _: &Self) -> std::cmp::Ordering { + std::cmp::Ordering::Equal + } +} + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +pub enum DontCareMethod { + #[default] + Inferred, + AsZeroing, + AsMerging, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct PredicationMethods { + /// Zeroing method, if the zeroing predicate form is used + #[serde(default)] + pub zeroing_method: Option, + /// Don't care method, if the don't care predicate form is used + #[serde(default)] + pub dont_care_method: DontCareMethod, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +pub enum PredicateForm { + /// Enables merging predicate form + Merging, + /// Enables "don't care" predicate form. + DontCare(DontCareMethod), + /// Enables zeroing predicate form. If LLVM zeroselection is performed, then + /// set the `select` field to the variable that gets set. Otherwise set the + /// `drop` field if the zeroinitializer replaces a predicate when merging. + Zeroing(ZeroingMethod), +} + +impl PredicateForm { + pub fn get_suffix(&self) -> &'static str { + match self { + PredicateForm::Zeroing { .. } => ZEROING_SUFFIX, + PredicateForm::Merging => MERGING_SUFFIX, + PredicateForm::DontCare { .. } => DONT_CARE_SUFFIX, + } + } + + pub fn make_zeroinitializer(ty: &TypeKind) -> Expression { + FnCall::new_expression( + format!("svdup_n_{}", ty.acle_notation_repr()) + .parse() + .unwrap(), + vec![if ty.base_type().unwrap().is_float() { + Expression::FloatConstant(0.0) + } else { + Expression::IntConstant(0) + }], + ) + } + + pub fn make_zeroselector(pg_var: WildString, op_var: WildString, ty: &TypeKind) -> Expression { + FnCall::new_expression( + format!("svsel_{}", ty.acle_notation_repr()) + .parse() + .unwrap(), + vec![ + Expression::Identifier(pg_var, IdentifierType::Variable), + Expression::Identifier(op_var, IdentifierType::Variable), + Self::make_zeroinitializer(ty), + ], + ) + } + + pub fn post_build(&self, intrinsic: &mut Intrinsic) -> context::Result { + // Drop the argument + match self { + PredicateForm::Zeroing(ZeroingMethod::Drop { drop: drop_var }) => { + intrinsic.signature.drop_argument(drop_var)? + } + PredicateForm::DontCare(DontCareMethod::AsZeroing) => { + if let ZeroingMethod::Drop { drop } = intrinsic + .input + .predication_methods + .zeroing_method + .to_owned() + .ok_or_else(|| { + "DontCareMethod::AsZeroing without zeroing method.".to_string() + })? + { + intrinsic.signature.drop_argument(&drop)? + } + } + _ => {} + } + + Ok(()) + } + + fn infer_dont_care(mask: &PredicationMask, methods: &PredicationMethods) -> PredicateForm { + let method = if methods.dont_care_method == DontCareMethod::Inferred { + if mask.has_zeroing() + && matches!(methods.zeroing_method, Some(ZeroingMethod::Drop { .. })) + { + DontCareMethod::AsZeroing + } else { + DontCareMethod::AsMerging + } + } else { + methods.dont_care_method + }; + + PredicateForm::DontCare(method) + } + + pub fn compile_list( + mask: &PredicationMask, + methods: &PredicationMethods, + ) -> context::Result> { + let mut forms = Vec::new(); + + if mask.has_merging() { + forms.push(PredicateForm::Merging) + } + + if mask.has_dont_care() { + forms.push(Self::infer_dont_care(mask, methods)) + } + + if mask.has_zeroing() { + if let Some(method) = methods.zeroing_method.to_owned() { + forms.push(PredicateForm::Zeroing(method)) + } else { + return Err( + "cannot create a zeroing variant without a zeroing method specified!" + .to_string(), + ); + } + } + + Ok(forms) + } +} + +#[derive( + Debug, Clone, Copy, Default, PartialEq, Eq, Hash, DeserializeFromStr, SerializeDisplay, +)] +pub struct PredicationMask { + /// Merging + m: bool, + /// Don't care + x: bool, + /// Zeroing + z: bool, +} + +impl PredicationMask { + pub fn has_merging(&self) -> bool { + self.m + } + + pub fn has_dont_care(&self) -> bool { + self.x + } + + pub fn has_zeroing(&self) -> bool { + self.z + } +} + +impl FromStr for PredicationMask { + type Err = String; + + fn from_str(s: &str) -> Result { + let mut result = Self::default(); + for kind in s.bytes() { + match kind { + b'm' => result.m = true, + b'x' => result.x = true, + b'z' => result.z = true, + _ => { + return Err(format!( + "unknown predicate form modifier: {}", + char::from(kind) + )); + } + } + } + + if result.m || result.x || result.z { + Ok(result) + } else { + Err("invalid predication mask".to_string()) + } + } +} + +impl fmt::Display for PredicationMask { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.m.then(|| write!(f, "m")).transpose()?; + self.x.then(|| write!(f, "x")).transpose()?; + self.z.then(|| write!(f, "z")).transpose().map(|_| ()) + } +} + +impl TryFrom<&WildString> for PredicationMask { + type Error = String; + + fn try_from(value: &WildString) -> Result { + value + .wildcards() + .find_map(|w| { + if let Wildcard::PredicateForms(mask) = w { + Some(*mask) + } else { + None + } + }) + .ok_or_else(|| "no predicate forms were specified in the name".to_string()) + } +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/typekinds.rs b/library/stdarch/crates/stdarch-gen-arm/src/typekinds.rs new file mode 100644 index 000000000000..7c697cb7c0c4 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/typekinds.rs @@ -0,0 +1,1051 @@ +use lazy_static::lazy_static; +use proc_macro2::TokenStream; +use quote::{ToTokens, TokenStreamExt, quote}; +use regex::Regex; +use serde_with::{DeserializeFromStr, SerializeDisplay}; +use std::fmt; +use std::str::FromStr; + +use crate::context; +use crate::expression::{Expression, FnCall}; +use crate::intrinsic::AccessLevel; +use crate::wildcards::Wildcard; + +const VECTOR_FULL_REGISTER_SIZE: u32 = 128; +const VECTOR_HALF_REGISTER_SIZE: u32 = VECTOR_FULL_REGISTER_SIZE / 2; + +#[derive(Debug, Clone, Copy)] +pub enum TypeRepr { + C, + Rust, + LLVMMachine, + ACLENotation, + Size, + SizeLiteral, + TypeKind, + SizeInBytesLog2, +} + +pub trait ToRepr { + fn repr(&self, repr: TypeRepr) -> String; + + fn c_repr(&self) -> String { + self.repr(TypeRepr::C) + } + + fn rust_repr(&self) -> String { + self.repr(TypeRepr::Rust) + } + + fn llvm_machine_repr(&self) -> String { + self.repr(TypeRepr::LLVMMachine) + } + + fn acle_notation_repr(&self) -> String { + self.repr(TypeRepr::ACLENotation) + } + + fn size(&self) -> String { + self.repr(TypeRepr::Size) + } + + fn size_literal(&self) -> String { + self.repr(TypeRepr::SizeLiteral) + } + + fn type_kind(&self) -> String { + self.repr(TypeRepr::TypeKind) + } + + fn size_in_bytes_log2(&self) -> String { + self.repr(TypeRepr::SizeInBytesLog2) + } +} + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash)] +pub struct TypeKindOptions { + f: bool, + s: bool, + u: bool, + p: bool, +} + +impl TypeKindOptions { + pub fn contains(&self, kind: BaseTypeKind) -> bool { + match kind { + BaseTypeKind::Float => self.f, + BaseTypeKind::Int => self.s, + BaseTypeKind::UInt => self.u, + BaseTypeKind::Poly => self.p, + BaseTypeKind::Bool => false, + } + } +} + +impl FromStr for TypeKindOptions { + type Err = String; + + fn from_str(s: &str) -> Result { + let mut result = Self::default(); + for kind in s.bytes() { + match kind { + b'f' => result.f = true, + b's' => result.s = true, + b'u' => result.u = true, + b'p' => result.p = true, + _ => { + return Err(format!("unknown type kind: {}", char::from(kind))); + } + } + } + Ok(result) + } +} + +impl fmt::Display for TypeKindOptions { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.f.then(|| write!(f, "f")).transpose()?; + self.s.then(|| write!(f, "s")).transpose()?; + self.u.then(|| write!(f, "u")).transpose().map(|_| ()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BaseTypeKind { + Float, + Int, + UInt, + Bool, + Poly, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BaseType { + Sized(BaseTypeKind, u32), + Unsized(BaseTypeKind), +} + +#[derive( + Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, SerializeDisplay, DeserializeFromStr, +)] +pub enum VectorTupleSize { + Two, + Three, + Four, +} + +impl VectorTupleSize { + pub fn to_int(self) -> u32 { + match self { + Self::Two => 2, + Self::Three => 3, + Self::Four => 4, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct VectorType { + base_type: BaseType, + lanes: u32, + is_scalable: bool, + tuple_size: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, SerializeDisplay, DeserializeFromStr)] +pub enum TypeKind { + Vector(VectorType), + Base(BaseType), + Pointer(Box, AccessLevel), + Custom(String), + Wildcard(Wildcard), +} + +impl TypeKind { + pub fn base_type(&self) -> Option<&BaseType> { + match self { + Self::Vector(t) => Some(t.base_type()), + Self::Pointer(t, _) => t.base_type(), + Self::Base(t) => Some(t), + Self::Wildcard(..) => None, + Self::Custom(..) => None, + } + } + + pub fn base_type_mut(&mut self) -> Option<&mut BaseType> { + match self { + Self::Vector(t) => Some(t.base_type_mut()), + Self::Pointer(t, _) => t.base_type_mut(), + Self::Base(t) => Some(t), + Self::Wildcard(..) => None, + Self::Custom(..) => None, + } + } + + pub fn populate_wildcard(&mut self, type_kind: TypeKind) -> context::Result { + match self { + Self::Wildcard(..) => *self = type_kind, + Self::Pointer(t, _) => t.populate_wildcard(type_kind)?, + _ => return Err("no wildcard available to populate".to_string()), + } + Ok(()) + } + + pub fn base(&self) -> Option<&BaseType> { + match self { + Self::Base(ty) => Some(ty), + Self::Pointer(tk, _) => tk.base(), + Self::Vector(ty) => Some(&ty.base_type), + _ => None, + } + } + + pub fn vector(&self) -> Option<&VectorType> { + match self { + Self::Vector(ty) => Some(ty), + _ => None, + } + } + + pub fn vector_mut(&mut self) -> Option<&mut VectorType> { + match self { + Self::Vector(ty) => Some(ty), + _ => None, + } + } + + pub fn wildcard(&self) -> Option<&Wildcard> { + match self { + Self::Wildcard(w) => Some(w), + Self::Pointer(w, _) => w.wildcard(), + _ => None, + } + } + + pub fn make_predicate_from(ty: &TypeKind) -> context::Result { + Ok(TypeKind::Vector(VectorType::make_predicate_from_bitsize( + ty.base_type() + .ok_or_else(|| format!("cannot infer predicate from type {ty}"))? + .get_size() + .map_err(|_| format!("cannot infer predicate from unsized type {ty}"))?, + ))) + } + + pub fn make_vector( + from: TypeKind, + is_scalable: bool, + tuple_size: Option, + ) -> context::Result { + from.base().cloned().map_or_else( + || Err(format!("cannot make a vector type out of {from}!")), + |base| { + let vt = VectorType::make_from_base(base, is_scalable, tuple_size); + Ok(TypeKind::Vector(vt)) + }, + ) + } + + /// Return a new expression that converts the provided `expr` from type `other` to `self`. + /// + /// Conversions are bitwise over the whole value, like `transmute`, though `transmute` + /// itself is only used as a last resort. + /// + /// This can fail (returning `None`) due to incompatible types, and many conversions are simply + /// unimplemented. + pub fn express_reinterpretation_from( + &self, + other: &TypeKind, + expr: impl Into, + ) -> Option { + if self == other { + Some(expr.into()) + } else if let (Some(self_vty), Some(other_vty)) = (self.vector(), other.vector()) { + if self_vty.is_scalable + && self_vty.tuple_size.is_none() + && other_vty.is_scalable + && other_vty.tuple_size.is_none() + { + // Plain scalable vectors. + use BaseTypeKind::*; + match (self_vty.base_type, other_vty.base_type) { + (BaseType::Sized(Int, self_size), BaseType::Sized(UInt, other_size)) + if self_size == other_size => + { + Some(Expression::MethodCall( + Box::new(expr.into()), + "as_signed".parse().unwrap(), + vec![], + )) + } + (BaseType::Sized(UInt, self_size), BaseType::Sized(Int, other_size)) + if self_size == other_size => + { + Some(Expression::MethodCall( + Box::new(expr.into()), + "as_unsigned".parse().unwrap(), + vec![], + )) + } + ( + BaseType::Sized(Float | Int | UInt, _), + BaseType::Sized(Float | Int | UInt, _), + ) => Some(FnCall::new_expression( + // Conversions between float and (u)int, or where the lane size changes. + "simd_reinterpret".parse().unwrap(), + vec![expr.into()], + )), + _ => None, + } + } else { + // Tuples and fixed-width vectors. + None + } + } else { + // Scalar types. + None + } + } +} + +impl FromStr for TypeKind { + type Err = String; + + fn from_str(s: &str) -> Result { + Ok(match s { + s if s.starts_with('{') && s.ends_with('}') => { + Self::Wildcard(s[1..s.len() - 1].trim().parse()?) + } + s if s.starts_with('*') => { + let mut split = s[1..].split_whitespace(); + let (ty, rw) = match (split.clone().count(), split.next(), split.next()) { + (2, Some("mut"), Some(ty)) => (ty, AccessLevel::RW), + (2, Some("const"), Some(ty)) => (ty, AccessLevel::R), + (1, Some(ty), None) => (ty, AccessLevel::R), + _ => return Err(format!("invalid pointer type {s:#?} given")), + }; + Self::Pointer(Box::new(ty.parse()?), rw) + } + _ => s + .parse::() + .map(TypeKind::Vector) + .or_else(|_| s.parse::().map(TypeKind::Base)) + .unwrap_or_else(|_| TypeKind::Custom(s.to_string())), + }) + } +} + +impl fmt::Display for TypeKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Vector(ty) => write!(f, "{ty}"), + Self::Pointer(ty, _) => write!(f, "{ty}"), + Self::Base(ty) => write!(f, "{ty}"), + Self::Wildcard(w) => write!(f, "{{{w}}}"), + Self::Custom(s) => write!(f, "{s}"), + } + } +} + +impl ToRepr for TypeKind { + fn repr(&self, repr: TypeRepr) -> String { + match self { + Self::Vector(ty) => ty.repr(repr), + Self::Pointer(ty, _) => ty.repr(repr), + Self::Base(ty) => ty.repr(repr), + Self::Wildcard(w) => format!("{w}"), + Self::Custom(s) => s.to_string(), + } + } +} + +impl ToTokens for TypeKind { + fn to_tokens(&self, tokens: &mut TokenStream) { + if let Self::Pointer(_, rw) = self { + tokens.append_all(match rw { + AccessLevel::RW => quote! { *mut }, + AccessLevel::R => quote! { *const }, + }) + } + + tokens.append_all( + self.to_string() + .parse::() + .expect("invalid syntax"), + ) + } +} + +impl PartialOrd for TypeKind { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl From<&TypeKind> for usize { + fn from(ty: &TypeKind) -> Self { + match ty { + TypeKind::Base(_) => 1, + TypeKind::Pointer(_, _) => 2, + TypeKind::Vector(_) => 3, + TypeKind::Custom(_) => 4, + TypeKind::Wildcard(_) => 5, + } + } +} + +impl Ord for TypeKind { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + use std::cmp::Ordering::*; + + let self_int: usize = self.into(); + let other_int: usize = other.into(); + + if self_int == other_int { + match (self, other) { + (TypeKind::Base(ty1), TypeKind::Base(ty2)) => ty1.cmp(ty2), + (TypeKind::Pointer(ty1, _), TypeKind::Pointer(ty2, _)) => ty1.cmp(ty2), + (TypeKind::Vector(vt1), TypeKind::Vector(vt2)) => vt1.cmp(vt2), + (TypeKind::Custom(s1), TypeKind::Custom(s2)) => s1.cmp(s2), + (TypeKind::Wildcard(..), TypeKind::Wildcard(..)) => Equal, + _ => unreachable!(), + } + } else { + self_int.cmp(&other_int) + } + } +} + +impl VectorType { + pub fn base_type(&self) -> &BaseType { + &self.base_type + } + + pub fn base_type_mut(&mut self) -> &mut BaseType { + &mut self.base_type + } + + fn sanitise_lanes( + mut base_type: BaseType, + lanes: Option, + ) -> Result<(BaseType, u32), String> { + let lanes = match (base_type, lanes) { + (BaseType::Sized(BaseTypeKind::Bool, lanes), None) => { + base_type = BaseType::Sized(BaseTypeKind::Bool, VECTOR_FULL_REGISTER_SIZE / lanes); + lanes + } + (BaseType::Unsized(BaseTypeKind::Bool), None) => { + base_type = BaseType::Sized(BaseTypeKind::Bool, 8); + 16 + } + (BaseType::Sized(_, size), None) => VECTOR_FULL_REGISTER_SIZE / size, + (BaseType::Sized(_, size), Some(lanes)) => match size * lanes { + VECTOR_FULL_REGISTER_SIZE | VECTOR_HALF_REGISTER_SIZE => lanes, + _ => return Err("invalid number of lanes".to_string()), + }, + _ => return Err("cannot infer number of lanes".to_string()), + }; + + Ok((base_type, lanes)) + } + + pub fn make_from_base( + base_ty: BaseType, + is_scalable: bool, + tuple_size: Option, + ) -> VectorType { + #[allow(clippy::collapsible_if)] + if is_scalable { + if let BaseType::Sized(BaseTypeKind::Bool, size) = base_ty { + return Self::make_predicate_from_bitsize(size); + } + } + + let (base_type, lanes) = Self::sanitise_lanes(base_ty, None).unwrap(); + + VectorType { + base_type, + lanes, + is_scalable, + tuple_size, + } + } + + pub fn make_predicate_from_bitsize(size: u32) -> VectorType { + VectorType { + base_type: BaseType::Sized(BaseTypeKind::Bool, size), + lanes: (VECTOR_FULL_REGISTER_SIZE / size), + is_scalable: true, + tuple_size: None, + } + } + + pub fn cast_base_type_as(&mut self, ty: BaseType) { + self.base_type = ty + } + + pub fn lanes(&self) -> u32 { + self.lanes + } + + pub fn tuple_size(&self) -> Option { + self.tuple_size + } +} + +impl FromStr for VectorType { + type Err = String; + + fn from_str(s: &str) -> Result { + lazy_static! { + static ref RE: Regex = Regex::new(r"^(?:(?:sv(?P(?:uint|int|bool|float)(?:\d+)?))|(?:(?P(?:uint|int|bool|poly|float)(?:\d+)?)x(?P(?:\d+)?)))(?:x(?P2|3|4))?_t$").unwrap(); + } + + if let Some(c) = RE.captures(s) { + let (base_type, lanes) = Self::sanitise_lanes( + c.name("sv_ty") + .or_else(|| c.name("ty")) + .map(<&str>::from) + .map(BaseType::from_str) + .unwrap()?, + c.name("lanes") + .map(<&str>::from) + .map(u32::from_str) + .transpose() + .unwrap(), + ) + .map_err(|e| format!("invalid {s:#?} vector type: {e}"))?; + + let tuple_size = c + .name("tuple_size") + .map(<&str>::from) + .map(VectorTupleSize::from_str) + .transpose() + .unwrap(); + + Ok(VectorType { + base_type, + is_scalable: c.name("sv_ty").is_some(), + lanes, + tuple_size, + }) + } else { + Err(format!("invalid vector type {s:#?} given")) + } + } +} + +impl ToRepr for VectorType { + fn repr(&self, repr: TypeRepr) -> String { + let make_llvm_repr = |show_unsigned| { + format!( + "{}v{}{}", + if self.is_scalable { "nx" } else { "" }, + self.lanes * (self.tuple_size.map(usize::from).unwrap_or(1) as u32), + match self.base_type { + BaseType::Sized(BaseTypeKind::UInt, size) if show_unsigned => + format!("u{size}"), + _ => self.base_type.llvm_machine_repr(), + } + ) + }; + + if matches!(repr, TypeRepr::ACLENotation) { + self.base_type.acle_notation_repr() + } else if matches!(repr, TypeRepr::LLVMMachine) { + make_llvm_repr(false) + } else if self.is_scalable { + match (self.base_type, self.lanes, self.tuple_size) { + (BaseType::Sized(BaseTypeKind::Bool, _), 16, _) => "svbool_t".to_string(), + (BaseType::Sized(BaseTypeKind::Bool, _), lanes, _) => format!("svbool{lanes}_t"), + (BaseType::Sized(_, size), lanes, _) + if VECTOR_FULL_REGISTER_SIZE != (size * lanes) => + { + // Special internal type case + make_llvm_repr(true) + } + (ty, _, None) => format!("sv{}_t", ty.c_repr()), + (ty, _, Some(tuple_size)) => format!("sv{}x{tuple_size}_t", ty.c_repr()), + } + } else { + match self.tuple_size { + Some(tuple_size) => format!( + "{}x{}x{}_t", + self.base_type.c_repr(), + self.lanes, + tuple_size + ), + None => format!("{}x{}_t", self.base_type.c_repr(), self.lanes), + } + } + } +} + +impl fmt::Display for VectorType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.c_repr()) + } +} + +impl From for usize { + fn from(t: VectorTupleSize) -> Self { + match t { + VectorTupleSize::Two => 2, + VectorTupleSize::Three => 3, + VectorTupleSize::Four => 4, + } + } +} + +impl FromStr for VectorTupleSize { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "2" => Ok(Self::Two), + "3" => Ok(Self::Three), + "4" => Ok(Self::Four), + _ => Err(format!("invalid vector tuple size `{s}` provided")), + } + } +} + +impl TryFrom for VectorTupleSize { + type Error = String; + + fn try_from(value: usize) -> Result { + match value { + 2 => Ok(Self::Two), + 3 => Ok(Self::Three), + 4 => Ok(Self::Four), + _ => Err(format!("invalid vector tuple size `{value}` provided")), + } + } +} + +impl fmt::Display for VectorTupleSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", usize::from(*self)) + } +} + +impl FromStr for BaseTypeKind { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "float" | "f" => Ok(Self::Float), + "int" | "i" => Ok(Self::Int), + "uint" | "u" => Ok(Self::UInt), + "poly" | "p" => Ok(Self::Poly), + "bool" | "b" => Ok(Self::Bool), + _ => Err(format!("no match for {s}")), + } + } +} + +impl ToRepr for BaseTypeKind { + fn repr(&self, repr: TypeRepr) -> String { + match (repr, self) { + (TypeRepr::C, Self::Float) => "float", + (TypeRepr::C, Self::Int) => "int", + (TypeRepr::C, Self::UInt) => "uint", + (TypeRepr::C, Self::Poly) => "poly", + (TypeRepr::Rust | TypeRepr::LLVMMachine | TypeRepr::ACLENotation, Self::Float) => "f", + (TypeRepr::Rust, Self::Int) | (TypeRepr::LLVMMachine, Self::Int | Self::UInt) => "i", + (TypeRepr::Rust | TypeRepr::ACLENotation, Self::UInt) => "u", + (TypeRepr::Rust | TypeRepr::LLVMMachine | TypeRepr::ACLENotation, Self::Poly) => "p", + (TypeRepr::ACLENotation, Self::Int) => "s", + (TypeRepr::ACLENotation, Self::Bool) => "b", + (_, Self::Bool) => "bool", + _ => { + unreachable!("no base type kind available for representation {repr:?}") + } + } + .to_string() + } +} + +impl fmt::Display for BaseTypeKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.c_repr()) + } +} + +impl BaseType { + pub fn get_size(&self) -> Result { + match self { + Self::Sized(_, size) => Ok(*size), + _ => Err(format!("unexpected invalid base type given {self:#?}")), + } + } + + pub fn kind(&self) -> &BaseTypeKind { + match self { + BaseType::Sized(kind, _) | BaseType::Unsized(kind) => kind, + } + } + + pub fn is_bool(&self) -> bool { + self.kind() == &BaseTypeKind::Bool + } + + pub fn is_float(&self) -> bool { + self.kind() == &BaseTypeKind::Float + } +} + +impl FromStr for BaseType { + type Err = String; + + fn from_str(s: &str) -> Result { + lazy_static! { + static ref RE: Regex = Regex::new(r"^(?P[a-zA-Z]+)(?P\d+)?(_t)?$").unwrap(); + } + + if let Some(c) = RE.captures(s) { + let kind = c["kind"].parse()?; + let size = c + .name("size") + .map(<&str>::from) + .map(u32::from_str) + .transpose() + .unwrap(); + match size { + Some(size) => Ok(Self::Sized(kind, size)), + None => Ok(Self::Unsized(kind)), + } + } else { + Err(format!("failed to parse type `{s}`")) + } + } +} + +impl ToRepr for BaseType { + fn repr(&self, repr: TypeRepr) -> String { + use BaseType::*; + use BaseTypeKind::*; + use TypeRepr::*; + match (self, &repr) { + (Sized(Bool, _) | Unsized(Bool), LLVMMachine) => "i1".to_string(), + (Sized(_, size), SizeLiteral) if *size == 8 => "b".to_string(), + (Sized(_, size), SizeLiteral) if *size == 16 => "h".to_string(), + (Sized(_, size), SizeLiteral) if *size == 32 => "w".to_string(), + (Sized(_, size), SizeLiteral) if *size == 64 => "d".to_string(), + (Sized(_, size), SizeLiteral) if *size == 128 => "q".to_string(), + (_, SizeLiteral) => unreachable!("cannot represent {self:#?} as size literal"), + (Sized(Float, _) | Unsized(Float), TypeKind) => "f".to_string(), + (Sized(Int, _) | Unsized(Int), TypeKind) => "s".to_string(), + (Sized(UInt, _) | Unsized(UInt), TypeKind) => "u".to_string(), + (Sized(_, size), Size) => size.to_string(), + (Sized(_, size), SizeInBytesLog2) => { + assert!(size.is_power_of_two() && *size >= 8); + (size >> 3).trailing_zeros().to_string() + } + (Sized(kind, size), _) => format!("{}{size}", kind.repr(repr)), + (Unsized(kind), _) => kind.repr(repr), + } + } +} + +impl fmt::Display for BaseType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.rust_repr()) + } +} + +#[cfg(test)] +mod tests { + use crate::typekinds::*; + + #[test] + fn test_predicate() { + assert_eq!( + "svbool_t".parse::().unwrap(), + TypeKind::Vector(VectorType { + base_type: BaseType::Sized(BaseTypeKind::Bool, 8), + is_scalable: true, + lanes: 16, + tuple_size: None + }) + ); + } + + #[test] + fn test_llvm_internal_predicate() { + assert_eq!( + "svbool4_t".parse::().unwrap(), + TypeKind::Vector(VectorType { + base_type: BaseType::Sized(BaseTypeKind::Bool, 32), + is_scalable: true, + lanes: 4, + tuple_size: None + }) + ); + } + + #[test] + fn test_llvm_internal_predicate_llvm() { + assert_eq!( + "svbool4_t".parse::().unwrap().llvm_machine_repr(), + "nxv4i1" + ); + } + + #[test] + fn test_llvm_internal_predicate_acle() { + assert_eq!( + "svbool4_t" + .parse::() + .unwrap() + .acle_notation_repr(), + "b32" + ); + } + + #[test] + fn test_predicate_from_bitsize() { + let pg = VectorType::make_predicate_from_bitsize(32); + assert_eq!(pg.acle_notation_repr(), "b32"); + assert_eq!(pg, "svbool4_t".parse().unwrap()); + assert_eq!(pg.lanes, 4); + assert_eq!(pg.base_type, BaseType::Sized(BaseTypeKind::Bool, 32)); + } + + #[test] + fn test_scalable_single() { + assert_eq!( + "svuint8_t".parse::().unwrap(), + TypeKind::Vector(VectorType { + base_type: BaseType::Sized(BaseTypeKind::UInt, 8), + is_scalable: true, + lanes: 16, + tuple_size: None + }) + ); + } + + #[test] + fn test_scalable_tuple() { + assert_eq!( + "svint64x3_t".parse::().unwrap(), + TypeKind::Vector(VectorType { + base_type: BaseType::Sized(BaseTypeKind::Int, 64), + is_scalable: true, + lanes: 2, + tuple_size: Some(VectorTupleSize::Three), + }) + ); + } + + #[test] + fn test_scalable_single_llvm() { + assert_eq!( + "svuint32_t" + .parse::() + .unwrap() + .llvm_machine_repr(), + "nxv4i32" + ); + } + + #[test] + fn test_scalable_tuple_llvm() { + assert_eq!( + "svint32x4_t" + .parse::() + .unwrap() + .llvm_machine_repr(), + "nxv16i32" + ); + } + + #[test] + fn test_vector_single_full() { + assert_eq!( + "uint32x4_t".parse::().unwrap(), + TypeKind::Vector(VectorType { + base_type: BaseType::Sized(BaseTypeKind::UInt, 32), + is_scalable: false, + lanes: 4, + tuple_size: None, + }) + ); + } + + #[test] + fn test_vector_single_half() { + assert_eq!( + "uint32x2_t".parse::().unwrap(), + TypeKind::Vector(VectorType { + base_type: BaseType::Sized(BaseTypeKind::UInt, 32), + is_scalable: false, + lanes: 2, + tuple_size: None, + }) + ); + } + + #[test] + fn test_vector_tuple() { + assert_eq!( + "uint64x2x4_t".parse::().unwrap(), + TypeKind::Vector(VectorType { + base_type: BaseType::Sized(BaseTypeKind::UInt, 64), + is_scalable: false, + lanes: 2, + tuple_size: Some(VectorTupleSize::Four), + }) + ); + } + + #[test] + fn test_const_pointer() { + let p = "*u32".parse::().unwrap(); + assert_eq!( + p, + TypeKind::Pointer( + Box::new(TypeKind::Base(BaseType::Sized(BaseTypeKind::UInt, 32))), + AccessLevel::R + ) + ); + assert_eq!(p.to_token_stream().to_string(), "* const u32") + } + + #[test] + fn test_mut_pointer() { + let p = "*mut u32".parse::().unwrap(); + assert_eq!( + p, + TypeKind::Pointer( + Box::new(TypeKind::Base(BaseType::Sized(BaseTypeKind::UInt, 32))), + AccessLevel::RW + ) + ); + assert_eq!(p.to_token_stream().to_string(), "* mut u32") + } + + #[test] + #[should_panic] + fn test_invalid_vector_single() { + assert_eq!( + "uint32x8_t".parse::().unwrap(), + TypeKind::Vector(VectorType { + base_type: BaseType::Sized(BaseTypeKind::UInt, 32), + is_scalable: false, + lanes: 8, + tuple_size: None, + }) + ); + } + + #[test] + #[should_panic] + fn test_invalid_vector_tuple() { + assert_eq!( + "uint32x4x5_t".parse::().unwrap(), + TypeKind::Vector(VectorType { + base_type: BaseType::Sized(BaseTypeKind::UInt, 32), + is_scalable: false, + lanes: 8, + tuple_size: None, // cannot represent + }) + ); + } + + #[test] + fn test_base() { + assert_eq!( + "u32".parse::().unwrap(), + TypeKind::Base(BaseType::Sized(BaseTypeKind::UInt, 32)), + ) + } + + #[test] + fn test_custom() { + assert_eq!( + "svpattern".parse::().unwrap(), + TypeKind::Custom("svpattern".to_string()), + ) + } + + #[test] + fn test_wildcard_type() { + assert_eq!( + "{type}".parse::().unwrap(), + TypeKind::Wildcard(Wildcard::Type(None)), + ) + } + + #[test] + fn test_wildcard_typeset() { + assert_eq!( + "{type[0]}".parse::().unwrap(), + TypeKind::Wildcard(Wildcard::Type(Some(0))), + ) + } + + #[test] + fn test_wildcard_sve_type() { + assert_eq!( + "{sve_type}".parse::().unwrap(), + TypeKind::Wildcard(Wildcard::SVEType(None, None)), + ) + } + + #[test] + fn test_wildcard_sve_typeset() { + assert_eq!( + "{sve_type[0]}".parse::().unwrap(), + TypeKind::Wildcard(Wildcard::SVEType(Some(0), None)), + ) + } + + #[test] + fn test_wildcard_sve_tuple_type() { + assert_eq!( + "{sve_type_x2}".parse::().unwrap(), + TypeKind::Wildcard(Wildcard::SVEType(None, Some(VectorTupleSize::Two))), + ) + } + + #[test] + fn test_wildcard_sve_tuple_typeset() { + assert_eq!( + "{sve_type_x2[0]}".parse::().unwrap(), + TypeKind::Wildcard(Wildcard::SVEType(Some(0), Some(VectorTupleSize::Two))), + ) + } + + #[test] + fn test_wildcard_predicate() { + assert_eq!( + "{predicate}".parse::().unwrap(), + TypeKind::Wildcard(Wildcard::Predicate(None)) + ) + } + + #[test] + fn test_wildcard_scale() { + assert_eq!( + "{sve_type as i8}".parse::().unwrap(), + TypeKind::Wildcard(Wildcard::Scale( + Box::new(Wildcard::SVEType(None, None)), + Box::new(TypeKind::Base(BaseType::Sized(BaseTypeKind::Int, 8))) + )) + ) + } + + #[test] + fn test_size_in_bytes_log2() { + assert_eq!("i8".parse::().unwrap().size_in_bytes_log2(), "0"); + assert_eq!("i16".parse::().unwrap().size_in_bytes_log2(), "1"); + assert_eq!("i32".parse::().unwrap().size_in_bytes_log2(), "2"); + assert_eq!("i64".parse::().unwrap().size_in_bytes_log2(), "3") + } + + #[test] + #[should_panic] + fn test_invalid_size_in_bytes_log2() { + "i9".parse::().unwrap().size_in_bytes_log2(); + } +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/wildcards.rs b/library/stdarch/crates/stdarch-gen-arm/src/wildcards.rs new file mode 100644 index 000000000000..25aa80348927 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/wildcards.rs @@ -0,0 +1,197 @@ +use lazy_static::lazy_static; +use regex::Regex; +use serde_with::{DeserializeFromStr, SerializeDisplay}; +use std::fmt; +use std::str::FromStr; + +use crate::{ + fn_suffix::SuffixKind, + predicate_forms::PredicationMask, + typekinds::{ToRepr, TypeKind, TypeKindOptions, VectorTupleSize}, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, SerializeDisplay, DeserializeFromStr)] +pub enum Wildcard { + Type(Option), + /// NEON type derivated by a base type + NEONType(Option, Option, Option), + /// SVE type derivated by a base type + SVEType(Option, Option), + /// Integer representation of bitsize + Size(Option), + /// Integer representation of bitsize minus one + SizeMinusOne(Option), + /// Literal representation of the bitsize: b(yte), h(half), w(ord) or d(ouble) + SizeLiteral(Option), + /// Literal representation of the type kind: f(loat), s(igned), u(nsigned) + TypeKind(Option, Option), + /// Log2 of the size in bytes + SizeInBytesLog2(Option), + /// Predicate to be inferred from the specified type + Predicate(Option), + /// Predicate to be inferred from the greatest type + MaxPredicate, + + Scale(Box, Box), + + // Other wildcards + LLVMLink, + NVariant, + /// Predicate forms to use and placeholder for a predicate form function name modifier + PredicateForms(PredicationMask), + + /// User-set wildcard through `substitutions` + Custom(String), +} + +impl Wildcard { + pub fn is_nonpredicate_type(&self) -> bool { + matches!( + self, + Wildcard::Type(..) | Wildcard::NEONType(..) | Wildcard::SVEType(..) + ) + } + + pub fn get_typeset_index(&self) -> Option { + match self { + Wildcard::Type(idx) | Wildcard::NEONType(idx, ..) | Wildcard::SVEType(idx, ..) => { + Some(idx.unwrap_or(0)) + } + _ => None, + } + } +} + +impl FromStr for Wildcard { + type Err = String; + + fn from_str(s: &str) -> Result { + lazy_static! { + static ref RE: Regex = Regex::new(r"^(?P\w+?)(?:_x(?P[2-4]))?(?:\[(?P\d+)\])?(?:\.(?P\w+))?(?:\s+as\s+(?P.*?))?$").unwrap(); + } + + if let Some(c) = RE.captures(s) { + let wildcard_name = &c["wildcard"]; + let inputset_index = c + .name("index") + .map(<&str>::from) + .map(usize::from_str) + .transpose() + .map_err(|_| format!("{:#?} is not a valid type index", &c["index"]))?; + let tuple_size = c + .name("tuple_size") + .map(<&str>::from) + .map(VectorTupleSize::from_str) + .transpose() + .map_err(|_| format!("{:#?} is not a valid tuple size", &c["tuple_size"]))?; + let modifiers = c.name("modifiers").map(<&str>::from); + + let wildcard = match (wildcard_name, inputset_index, tuple_size, modifiers) { + ("type", index, None, None) => Ok(Wildcard::Type(index)), + ("neon_type", index, tuple, modifier) => { + if let Some(str_suffix) = modifier { + let suffix_kind = SuffixKind::from_str(str_suffix); + return Ok(Wildcard::NEONType(index, tuple, Some(suffix_kind.unwrap()))); + } else { + Ok(Wildcard::NEONType(index, tuple, None)) + } + } + ("sve_type", index, tuple, None) => Ok(Wildcard::SVEType(index, tuple)), + ("size", index, None, None) => Ok(Wildcard::Size(index)), + ("size_minus_one", index, None, None) => Ok(Wildcard::SizeMinusOne(index)), + ("size_literal", index, None, None) => Ok(Wildcard::SizeLiteral(index)), + ("type_kind", index, None, modifiers) => Ok(Wildcard::TypeKind( + index, + modifiers.map(|modifiers| modifiers.parse()).transpose()?, + )), + ("size_in_bytes_log2", index, None, None) => Ok(Wildcard::SizeInBytesLog2(index)), + ("predicate", index, None, None) => Ok(Wildcard::Predicate(index)), + ("max_predicate", None, None, None) => Ok(Wildcard::MaxPredicate), + ("llvm_link", None, None, None) => Ok(Wildcard::LLVMLink), + ("_n", None, None, None) => Ok(Wildcard::NVariant), + (w, None, None, None) if w.starts_with('_') => { + // test for predicate forms + let pf_mask = PredicationMask::from_str(&w[1..]); + if let Ok(mask) = pf_mask { + if mask.has_merging() { + Ok(Wildcard::PredicateForms(mask)) + } else { + Err("cannot add predication without a Merging form".to_string()) + } + } else { + Err(format!("invalid wildcard `{s:#?}`")) + } + } + (cw, None, None, None) => Ok(Wildcard::Custom(cw.to_string())), + _ => Err(format!("invalid wildcard `{s:#?}`")), + }?; + + let scale_to = c + .name("scale_to") + .map(<&str>::from) + .map(TypeKind::from_str) + .transpose() + .map_err(|_| format!("{:#?} is not a valid type", &c["scale_to"]))?; + + if let Some(scale_to) = scale_to { + Ok(Wildcard::Scale(Box::new(wildcard), Box::new(scale_to))) + } else { + Ok(wildcard) + } + } else { + Err(format!("## invalid wildcard `{s:#?}`")) + } + } +} + +impl fmt::Display for Wildcard { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Type(None) => write!(f, "type"), + Self::Type(Some(index)) => write!(f, "type[{index}]"), + Self::NEONType(None, None, None) => write!(f, "neon_type"), + Self::NEONType(None, None, Some(suffix_kind)) => write!(f, "neon_type.{suffix_kind}"), + Self::NEONType(Some(index), None, None) => write!(f, "neon_type[{index}]"), + Self::NEONType(Some(index), None, Some(suffix_kind)) => { + write!(f, "neon_type[{index}].{suffix_kind}") + } + Self::NEONType(None, Some(tuple_size), Some(suffix_kind)) => { + write!(f, "neon_type_x{tuple_size}.{suffix_kind}") + } + Self::NEONType(None, Some(tuple_size), None) => write!(f, "neon_type_x{tuple_size}"), + Self::NEONType(Some(index), Some(tuple_size), None) => { + write!(f, "neon_type_x{tuple_size}[{index}]") + } + Self::NEONType(Some(index), Some(tuple_size), Some(suffix_kind)) => { + write!(f, "neon_type_x{tuple_size}[{index}].{suffix_kind}") + } + Self::SVEType(None, None) => write!(f, "sve_type"), + Self::SVEType(Some(index), None) => write!(f, "sve_type[{index}]"), + Self::SVEType(None, Some(tuple_size)) => write!(f, "sve_type_x{tuple_size}"), + Self::SVEType(Some(index), Some(tuple_size)) => { + write!(f, "sve_type_x{tuple_size}[{index}]") + } + Self::Size(None) => write!(f, "size"), + Self::Size(Some(index)) => write!(f, "size[{index}]"), + Self::SizeMinusOne(None) => write!(f, "size_minus_one"), + Self::SizeMinusOne(Some(index)) => write!(f, "size_minus_one[{index}]"), + Self::SizeLiteral(None) => write!(f, "size_literal"), + Self::SizeLiteral(Some(index)) => write!(f, "size_literal[{index}]"), + Self::TypeKind(None, None) => write!(f, "type_kind"), + Self::TypeKind(None, Some(opts)) => write!(f, "type_kind.{opts}"), + Self::TypeKind(Some(index), None) => write!(f, "type_kind[{index}]"), + Self::TypeKind(Some(index), Some(opts)) => write!(f, "type_kind[{index}].{opts}"), + Self::SizeInBytesLog2(None) => write!(f, "size_in_bytes_log2"), + Self::SizeInBytesLog2(Some(index)) => write!(f, "size_in_bytes_log2[{index}]"), + Self::Predicate(None) => write!(f, "predicate"), + Self::Predicate(Some(index)) => write!(f, "predicate[{index}]"), + Self::MaxPredicate => write!(f, "max_predicate"), + Self::LLVMLink => write!(f, "llvm_link"), + Self::NVariant => write!(f, "_n"), + Self::PredicateForms(mask) => write!(f, "_{mask}"), + + Self::Scale(wildcard, ty) => write!(f, "{wildcard} as {}", ty.rust_repr()), + Self::Custom(cw) => write!(f, "{cw}"), + } + } +} diff --git a/library/stdarch/crates/stdarch-gen-arm/src/wildstring.rs b/library/stdarch/crates/stdarch-gen-arm/src/wildstring.rs new file mode 100644 index 000000000000..4f8cc67f5e01 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-arm/src/wildstring.rs @@ -0,0 +1,399 @@ +use itertools::Itertools; +use proc_macro2::TokenStream; +use quote::{ToTokens, TokenStreamExt, quote}; +use serde_with::{DeserializeFromStr, SerializeDisplay}; +use std::str::pattern::Pattern; +use std::{fmt, str::FromStr}; + +use crate::context::LocalContext; +use crate::fn_suffix::make_neon_suffix; +use crate::typekinds::{ToRepr, TypeRepr}; +use crate::wildcards::Wildcard; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum WildStringPart { + String(String), + Wildcard(Wildcard), +} + +/// Wildcard-able string +#[derive(Debug, Clone, PartialEq, Eq, Default, SerializeDisplay, DeserializeFromStr)] +pub struct WildString(pub Vec); + +impl WildString { + pub fn has_wildcards(&self) -> bool { + for part in self.0.iter() { + if let WildStringPart::Wildcard(..) = part { + return true; + } + } + + false + } + + pub fn wildcards(&self) -> impl Iterator + '_ { + self.0.iter().filter_map(|part| match part { + WildStringPart::Wildcard(w) => Some(w), + _ => None, + }) + } + + pub fn iter(&self) -> impl Iterator + '_ { + self.0.iter() + } + + pub fn iter_mut(&mut self) -> impl Iterator + '_ { + self.0.iter_mut() + } + + pub fn starts_with(&self, s2: &str) -> bool { + self.to_string().starts_with(s2) + } + + pub fn prepend_str(&mut self, s: impl Into) { + self.0.insert(0, WildStringPart::String(s.into())) + } + + pub fn push_str(&mut self, s: impl Into) { + self.0.push(WildStringPart::String(s.into())) + } + + pub fn push_wildcard(&mut self, w: Wildcard) { + self.0.push(WildStringPart::Wildcard(w)) + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn replace

(&self, from: P, to: &str) -> WildString + where + P: Pattern + Copy, + { + WildString( + self.0 + .iter() + .map(|part| match part { + WildStringPart::String(s) => WildStringPart::String(s.replace(from, to)), + part => part.clone(), + }) + .collect_vec(), + ) + } + + pub fn build_acle(&mut self, ctx: &LocalContext) -> Result<(), String> { + self.build(ctx, TypeRepr::ACLENotation) + } + + pub fn build_neon_intrinsic_signature(&mut self, ctx: &LocalContext) -> Result<(), String> { + let repr = TypeRepr::ACLENotation; + self.iter_mut().try_for_each(|wp| -> Result<(), String> { + if let WildStringPart::Wildcard(w) = wp { + match w { + &mut Wildcard::NEONType(_, _, ref maybe_suffix_kind) => { + if let Some(suffix_kind) = maybe_suffix_kind { + let x = ctx.provide_type_wildcard(w).unwrap(); + *wp = WildStringPart::String(make_neon_suffix(x, *suffix_kind)) + } else { + *wp = WildString::make_default_build(ctx, repr, w) + } + } + _ => *wp = WildString::make_default_build(ctx, repr, w), + } + } + Ok(()) + }) + } + + pub fn build(&mut self, ctx: &LocalContext, repr: TypeRepr) -> Result<(), String> { + match repr { + TypeRepr::ACLENotation | TypeRepr::LLVMMachine => { + self.iter_mut().try_for_each(|wp| -> Result<(), String> { + if let WildStringPart::Wildcard(w) = wp { + match w { + &mut Wildcard::NEONType(_, _, ref maybe_suffix_kind) => { + if let Some(suffix_kind) = maybe_suffix_kind { + let x = ctx.provide_type_wildcard(w).unwrap(); + *wp = WildStringPart::String(make_neon_suffix(x, *suffix_kind)) + } else { + *wp = WildString::make_default_build(ctx, repr, w) + } + } + _ => *wp = WildString::make_default_build(ctx, repr, w), + } + } + Ok(()) + }) + } + _ => self.iter_mut().try_for_each(|wp| -> Result<(), String> { + if let WildStringPart::Wildcard(w) = wp { + *wp = WildString::make_default_build(ctx, repr, w); + } + Ok(()) + }), + } + } + + fn make_default_build(ctx: &LocalContext, repr: TypeRepr, w: &mut Wildcard) -> WildStringPart { + WildStringPart::String( + ctx.provide_substitution_wildcard(w) + .or_else(|_| ctx.provide_type_wildcard(w).map(|ty| ty.repr(repr))) + .unwrap(), + ) + } +} + +impl From for WildString { + fn from(s: String) -> Self { + WildString(vec![WildStringPart::String(s)]) + } +} + +impl FromStr for WildString { + type Err = String; + + fn from_str(s: &str) -> Result { + enum State { + Normal { start: usize }, + Wildcard { start: usize, count: usize }, + EscapeTokenOpen { start: usize, at: usize }, + EscapeTokenClose { start: usize, at: usize }, + } + + let mut ws = WildString::default(); + match s + .char_indices() + .try_fold(State::Normal { start: 0 }, |state, (idx, ch)| { + match (state, ch) { + (State::Normal { start }, '{') => Ok(State::EscapeTokenOpen { start, at: idx }), + (State::Normal { start }, '}') => { + Ok(State::EscapeTokenClose { start, at: idx }) + } + (State::EscapeTokenOpen { start, at }, '{') + | (State::EscapeTokenClose { start, at }, '}') => { + if start < at { + ws.push_str(&s[start..at]) + } + + Ok(State::Normal { start: idx }) + } + (State::EscapeTokenOpen { at, .. }, '}') => Err(format!( + "empty wildcard given in string {s:?} at position {at}" + )), + (State::EscapeTokenOpen { start, at }, _) => { + if start < at { + ws.push_str(&s[start..at]) + } + + Ok(State::Wildcard { + start: idx, + count: 0, + }) + } + (State::EscapeTokenClose { at, .. }, _) => Err(format!( + "closing a non-wildcard/bad escape in string {s:?} at position {at}" + )), + // Nesting wildcards is only supported for `{foo as {bar}}`, wildcards cannot be + // nested at the start of a WildString. + (State::Wildcard { start, count }, '{') => Ok(State::Wildcard { + start, + count: count + 1, + }), + (State::Wildcard { start, count: 0 }, '}') => { + ws.push_wildcard(s[start..idx].parse()?); + Ok(State::Normal { start: idx + 1 }) + } + (State::Wildcard { start, count }, '}') => Ok(State::Wildcard { + start, + count: count - 1, + }), + (state @ State::Normal { .. }, _) | (state @ State::Wildcard { .. }, _) => { + Ok(state) + } + } + })? { + State::Normal { start } => { + if start < s.len() { + ws.push_str(&s[start..]); + } + + Ok(ws) + } + State::EscapeTokenOpen { at, .. } | State::Wildcard { start: at, .. } => Err(format!( + "unclosed wildcard in string {s:?} at position {at}" + )), + State::EscapeTokenClose { at, .. } => Err(format!( + "closing a non-wildcard/bad escape in string {s:?} at position {at}" + )), + } + } +} + +impl fmt::Display for WildString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + self.0 + .iter() + .map(|part| match part { + WildStringPart::String(s) => s.to_owned(), + WildStringPart::Wildcard(w) => format!("{{{w}}}"), + }) + .join("") + ) + } +} + +impl ToTokens for WildString { + fn to_tokens(&self, tokens: &mut TokenStream) { + assert!( + !self.has_wildcards(), + "cannot convert string with wildcards {self:?} to TokenStream" + ); + let str = self.to_string(); + tokens.append_all(quote! { #str }) + } +} + +#[cfg(test)] +mod tests { + use crate::typekinds::*; + use crate::wildstring::*; + + #[test] + fn test_empty_string() { + let ws: WildString = "".parse().unwrap(); + assert_eq!(ws.0.len(), 0); + } + + #[test] + fn test_plain_string() { + let ws: WildString = "plain string".parse().unwrap(); + assert_eq!(ws.0.len(), 1); + assert_eq!( + ws, + WildString(vec![WildStringPart::String("plain string".to_string())]) + ) + } + + #[test] + fn test_escaped_curly_brackets() { + let ws: WildString = "VALUE = {{value}}".parse().unwrap(); + assert_eq!(ws.to_string(), "VALUE = {value}"); + assert!(!ws.has_wildcards()); + } + + #[test] + fn test_escaped_curly_brackets_wildcard() { + let ws: WildString = "TYPE = {{{type}}}".parse().unwrap(); + assert_eq!(ws.to_string(), "TYPE = {{type}}"); + assert_eq!(ws.0.len(), 4); + assert!(ws.has_wildcards()); + } + + #[test] + fn test_wildcard_right_boundary() { + let s = "string test {type}"; + let ws: WildString = s.parse().unwrap(); + assert_eq!(&ws.to_string(), s); + assert!(ws.has_wildcards()); + } + + #[test] + fn test_wildcard_left_boundary() { + let s = "{type} string test"; + let ws: WildString = s.parse().unwrap(); + assert_eq!(&ws.to_string(), s); + assert!(ws.has_wildcards()); + } + + #[test] + fn test_recursive_wildcard() { + let s = "string test {type[0] as {type[1]}}"; + let ws: WildString = s.parse().unwrap(); + + assert_eq!(ws.0.len(), 2); + assert_eq!( + ws, + WildString(vec![ + WildStringPart::String("string test ".to_string()), + WildStringPart::Wildcard(Wildcard::Scale( + Box::new(Wildcard::Type(Some(0))), + Box::new(TypeKind::Wildcard(Wildcard::Type(Some(1)))), + )) + ]) + ); + } + + #[test] + fn test_scale_wildcard() { + let s = "string {type[0] as i8} test"; + let ws: WildString = s.parse().unwrap(); + + assert_eq!(ws.0.len(), 3); + assert_eq!( + ws, + WildString(vec![ + WildStringPart::String("string ".to_string()), + WildStringPart::Wildcard(Wildcard::Scale( + Box::new(Wildcard::Type(Some(0))), + Box::new(TypeKind::Base(BaseType::Sized(BaseTypeKind::Int, 8))), + )), + WildStringPart::String(" test".to_string()) + ]) + ); + } + + #[test] + fn test_solitaire_wildcard() { + let ws: WildString = "{type}".parse().unwrap(); + assert_eq!(ws.0.len(), 1); + assert_eq!( + ws, + WildString(vec![WildStringPart::Wildcard(Wildcard::Type(None))]) + ) + } + + #[test] + fn test_empty_wildcard() { + "string {}" + .parse::() + .expect_err("expected parse error"); + } + + #[test] + fn test_invalid_open_wildcard_right() { + "string {" + .parse::() + .expect_err("expected parse error"); + } + + #[test] + fn test_invalid_close_wildcard_right() { + "string }" + .parse::() + .expect_err("expected parse error"); + } + + #[test] + fn test_invalid_open_wildcard_left() { + "{string" + .parse::() + .expect_err("expected parse error"); + } + + #[test] + fn test_invalid_close_wildcard_left() { + "}string" + .parse::() + .expect_err("expected parse error"); + } + + #[test] + fn test_consecutive_wildcards() { + let s = "svprf{size_literal[1]}_gather_{type[0]}{index_or_offset}"; + let ws: WildString = s.parse().unwrap(); + assert_eq!(ws.to_string(), s) + } +} diff --git a/library/stdarch/crates/stdarch-gen-loongarch/Cargo.toml b/library/stdarch/crates/stdarch-gen-loongarch/Cargo.toml new file mode 100644 index 000000000000..d3ac607c5576 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-loongarch/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "stdarch-gen-loongarch" +version = "0.1.0" +authors = ["ZHAI Xiang ", "WANG Rui "] +edition = "2024" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +rand = "0.8.5" diff --git a/library/stdarch/crates/stdarch-gen-loongarch/README.md b/library/stdarch/crates/stdarch-gen-loongarch/README.md new file mode 100644 index 000000000000..1fc81483a12e --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-loongarch/README.md @@ -0,0 +1,35 @@ +# LoongArch LSX/LASX intrinsic code generator + +A small tool that allows to quickly generate intrinsics for the LoongArch LSX/LASX architectures. + +The specification for the intrinsics can be found in `lsx.spec` or `lasx.spec`. + +To run and re-generate the code run the following from the root of the `stdarch` crate. + +LSX: +``` +# Generate bindings +OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsxintrin.h +OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsx.spec +rustfmt crates/core_arch/src/loongarch64/lsx/generated.rs + +# Generate tests +OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsx.spec test +loongarch64-unknown-linux-gnu-gcc -static -o lsx crates/stdarch-gen-loongarch/lsx.c -mlasx -mfrecipe +qemu-loongarch64 ./lsx > crates/core_arch/src/loongarch64/lsx/tests.rs +rustfmt crates/core_arch/src/loongarch64/lsx/tests.rs +``` + +LASX: +``` +# Generate bindings +OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasxintrin.h +OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasx.spec +rustfmt crates/core_arch/src/loongarch64/lasx/generated.rs + +# Generate tests +OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasx.spec test +loongarch64-unknown-linux-gnu-gcc -static -o lasx crates/stdarch-gen-loongarch/lasx.c -mlasx -mfrecipe +qemu-loongarch64 ./lasx > crates/core_arch/src/loongarch64/lasx/tests.rs +rustfmt crates/core_arch/src/loongarch64/lasx/tests.rs +``` diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec new file mode 100644 index 000000000000..e3bdfcb5e9fa --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec @@ -0,0 +1,3705 @@ +// This code is automatically generated. DO NOT MODIFY. +// ``` +// OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasxintrin.h +// ``` + +/// lasx_xvsll_b +name = lasx_xvsll_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvsll_h +name = lasx_xvsll_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvsll_w +name = lasx_xvsll_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvsll_d +name = lasx_xvsll_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvslli_b +name = lasx_xvslli_b +asm-fmts = xd, xj, ui3 +data-types = V32QI, V32QI, UQI + +/// lasx_xvslli_h +name = lasx_xvslli_h +asm-fmts = xd, xj, ui4 +data-types = V16HI, V16HI, UQI + +/// lasx_xvslli_w +name = lasx_xvslli_w +asm-fmts = xd, xj, ui5 +data-types = V8SI, V8SI, UQI + +/// lasx_xvslli_d +name = lasx_xvslli_d +asm-fmts = xd, xj, ui6 +data-types = V4DI, V4DI, UQI + +/// lasx_xvsra_b +name = lasx_xvsra_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvsra_h +name = lasx_xvsra_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvsra_w +name = lasx_xvsra_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvsra_d +name = lasx_xvsra_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsrai_b +name = lasx_xvsrai_b +asm-fmts = xd, xj, ui3 +data-types = V32QI, V32QI, UQI + +/// lasx_xvsrai_h +name = lasx_xvsrai_h +asm-fmts = xd, xj, ui4 +data-types = V16HI, V16HI, UQI + +/// lasx_xvsrai_w +name = lasx_xvsrai_w +asm-fmts = xd, xj, ui5 +data-types = V8SI, V8SI, UQI + +/// lasx_xvsrai_d +name = lasx_xvsrai_d +asm-fmts = xd, xj, ui6 +data-types = V4DI, V4DI, UQI + +/// lasx_xvsrar_b +name = lasx_xvsrar_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvsrar_h +name = lasx_xvsrar_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvsrar_w +name = lasx_xvsrar_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvsrar_d +name = lasx_xvsrar_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsrari_b +name = lasx_xvsrari_b +asm-fmts = xd, xj, ui3 +data-types = V32QI, V32QI, UQI + +/// lasx_xvsrari_h +name = lasx_xvsrari_h +asm-fmts = xd, xj, ui4 +data-types = V16HI, V16HI, UQI + +/// lasx_xvsrari_w +name = lasx_xvsrari_w +asm-fmts = xd, xj, ui5 +data-types = V8SI, V8SI, UQI + +/// lasx_xvsrari_d +name = lasx_xvsrari_d +asm-fmts = xd, xj, ui6 +data-types = V4DI, V4DI, UQI + +/// lasx_xvsrl_b +name = lasx_xvsrl_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvsrl_h +name = lasx_xvsrl_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvsrl_w +name = lasx_xvsrl_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvsrl_d +name = lasx_xvsrl_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsrli_b +name = lasx_xvsrli_b +asm-fmts = xd, xj, ui3 +data-types = V32QI, V32QI, UQI + +/// lasx_xvsrli_h +name = lasx_xvsrli_h +asm-fmts = xd, xj, ui4 +data-types = V16HI, V16HI, UQI + +/// lasx_xvsrli_w +name = lasx_xvsrli_w +asm-fmts = xd, xj, ui5 +data-types = V8SI, V8SI, UQI + +/// lasx_xvsrli_d +name = lasx_xvsrli_d +asm-fmts = xd, xj, ui6 +data-types = V4DI, V4DI, UQI + +/// lasx_xvsrlr_b +name = lasx_xvsrlr_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvsrlr_h +name = lasx_xvsrlr_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvsrlr_w +name = lasx_xvsrlr_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvsrlr_d +name = lasx_xvsrlr_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsrlri_b +name = lasx_xvsrlri_b +asm-fmts = xd, xj, ui3 +data-types = V32QI, V32QI, UQI + +/// lasx_xvsrlri_h +name = lasx_xvsrlri_h +asm-fmts = xd, xj, ui4 +data-types = V16HI, V16HI, UQI + +/// lasx_xvsrlri_w +name = lasx_xvsrlri_w +asm-fmts = xd, xj, ui5 +data-types = V8SI, V8SI, UQI + +/// lasx_xvsrlri_d +name = lasx_xvsrlri_d +asm-fmts = xd, xj, ui6 +data-types = V4DI, V4DI, UQI + +/// lasx_xvbitclr_b +name = lasx_xvbitclr_b +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvbitclr_h +name = lasx_xvbitclr_h +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvbitclr_w +name = lasx_xvbitclr_w +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvbitclr_d +name = lasx_xvbitclr_d +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvbitclri_b +name = lasx_xvbitclri_b +asm-fmts = xd, xj, ui3 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvbitclri_h +name = lasx_xvbitclri_h +asm-fmts = xd, xj, ui4 +data-types = UV16HI, UV16HI, UQI + +/// lasx_xvbitclri_w +name = lasx_xvbitclri_w +asm-fmts = xd, xj, ui5 +data-types = UV8SI, UV8SI, UQI + +/// lasx_xvbitclri_d +name = lasx_xvbitclri_d +asm-fmts = xd, xj, ui6 +data-types = UV4DI, UV4DI, UQI + +/// lasx_xvbitset_b +name = lasx_xvbitset_b +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvbitset_h +name = lasx_xvbitset_h +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvbitset_w +name = lasx_xvbitset_w +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvbitset_d +name = lasx_xvbitset_d +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvbitseti_b +name = lasx_xvbitseti_b +asm-fmts = xd, xj, ui3 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvbitseti_h +name = lasx_xvbitseti_h +asm-fmts = xd, xj, ui4 +data-types = UV16HI, UV16HI, UQI + +/// lasx_xvbitseti_w +name = lasx_xvbitseti_w +asm-fmts = xd, xj, ui5 +data-types = UV8SI, UV8SI, UQI + +/// lasx_xvbitseti_d +name = lasx_xvbitseti_d +asm-fmts = xd, xj, ui6 +data-types = UV4DI, UV4DI, UQI + +/// lasx_xvbitrev_b +name = lasx_xvbitrev_b +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvbitrev_h +name = lasx_xvbitrev_h +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvbitrev_w +name = lasx_xvbitrev_w +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvbitrev_d +name = lasx_xvbitrev_d +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvbitrevi_b +name = lasx_xvbitrevi_b +asm-fmts = xd, xj, ui3 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvbitrevi_h +name = lasx_xvbitrevi_h +asm-fmts = xd, xj, ui4 +data-types = UV16HI, UV16HI, UQI + +/// lasx_xvbitrevi_w +name = lasx_xvbitrevi_w +asm-fmts = xd, xj, ui5 +data-types = UV8SI, UV8SI, UQI + +/// lasx_xvbitrevi_d +name = lasx_xvbitrevi_d +asm-fmts = xd, xj, ui6 +data-types = UV4DI, UV4DI, UQI + +/// lasx_xvadd_b +name = lasx_xvadd_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvadd_h +name = lasx_xvadd_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvadd_w +name = lasx_xvadd_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvadd_d +name = lasx_xvadd_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvaddi_bu +name = lasx_xvaddi_bu +asm-fmts = xd, xj, ui5 +data-types = V32QI, V32QI, UQI + +/// lasx_xvaddi_hu +name = lasx_xvaddi_hu +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, UQI + +/// lasx_xvaddi_wu +name = lasx_xvaddi_wu +asm-fmts = xd, xj, ui5 +data-types = V8SI, V8SI, UQI + +/// lasx_xvaddi_du +name = lasx_xvaddi_du +asm-fmts = xd, xj, ui5 +data-types = V4DI, V4DI, UQI + +/// lasx_xvsub_b +name = lasx_xvsub_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvsub_h +name = lasx_xvsub_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvsub_w +name = lasx_xvsub_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvsub_d +name = lasx_xvsub_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsubi_bu +name = lasx_xvsubi_bu +asm-fmts = xd, xj, ui5 +data-types = V32QI, V32QI, UQI + +/// lasx_xvsubi_hu +name = lasx_xvsubi_hu +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, UQI + +/// lasx_xvsubi_wu +name = lasx_xvsubi_wu +asm-fmts = xd, xj, ui5 +data-types = V8SI, V8SI, UQI + +/// lasx_xvsubi_du +name = lasx_xvsubi_du +asm-fmts = xd, xj, ui5 +data-types = V4DI, V4DI, UQI + +/// lasx_xvmax_b +name = lasx_xvmax_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvmax_h +name = lasx_xvmax_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvmax_w +name = lasx_xvmax_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvmax_d +name = lasx_xvmax_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvmaxi_b +name = lasx_xvmaxi_b +asm-fmts = xd, xj, si5 +data-types = V32QI, V32QI, QI + +/// lasx_xvmaxi_h +name = lasx_xvmaxi_h +asm-fmts = xd, xj, si5 +data-types = V16HI, V16HI, QI + +/// lasx_xvmaxi_w +name = lasx_xvmaxi_w +asm-fmts = xd, xj, si5 +data-types = V8SI, V8SI, QI + +/// lasx_xvmaxi_d +name = lasx_xvmaxi_d +asm-fmts = xd, xj, si5 +data-types = V4DI, V4DI, QI + +/// lasx_xvmax_bu +name = lasx_xvmax_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvmax_hu +name = lasx_xvmax_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvmax_wu +name = lasx_xvmax_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvmax_du +name = lasx_xvmax_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvmaxi_bu +name = lasx_xvmaxi_bu +asm-fmts = xd, xj, ui5 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvmaxi_hu +name = lasx_xvmaxi_hu +asm-fmts = xd, xj, ui5 +data-types = UV16HI, UV16HI, UQI + +/// lasx_xvmaxi_wu +name = lasx_xvmaxi_wu +asm-fmts = xd, xj, ui5 +data-types = UV8SI, UV8SI, UQI + +/// lasx_xvmaxi_du +name = lasx_xvmaxi_du +asm-fmts = xd, xj, ui5 +data-types = UV4DI, UV4DI, UQI + +/// lasx_xvmin_b +name = lasx_xvmin_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvmin_h +name = lasx_xvmin_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvmin_w +name = lasx_xvmin_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvmin_d +name = lasx_xvmin_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvmini_b +name = lasx_xvmini_b +asm-fmts = xd, xj, si5 +data-types = V32QI, V32QI, QI + +/// lasx_xvmini_h +name = lasx_xvmini_h +asm-fmts = xd, xj, si5 +data-types = V16HI, V16HI, QI + +/// lasx_xvmini_w +name = lasx_xvmini_w +asm-fmts = xd, xj, si5 +data-types = V8SI, V8SI, QI + +/// lasx_xvmini_d +name = lasx_xvmini_d +asm-fmts = xd, xj, si5 +data-types = V4DI, V4DI, QI + +/// lasx_xvmin_bu +name = lasx_xvmin_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvmin_hu +name = lasx_xvmin_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvmin_wu +name = lasx_xvmin_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvmin_du +name = lasx_xvmin_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvmini_bu +name = lasx_xvmini_bu +asm-fmts = xd, xj, ui5 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvmini_hu +name = lasx_xvmini_hu +asm-fmts = xd, xj, ui5 +data-types = UV16HI, UV16HI, UQI + +/// lasx_xvmini_wu +name = lasx_xvmini_wu +asm-fmts = xd, xj, ui5 +data-types = UV8SI, UV8SI, UQI + +/// lasx_xvmini_du +name = lasx_xvmini_du +asm-fmts = xd, xj, ui5 +data-types = UV4DI, UV4DI, UQI + +/// lasx_xvseq_b +name = lasx_xvseq_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvseq_h +name = lasx_xvseq_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvseq_w +name = lasx_xvseq_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvseq_d +name = lasx_xvseq_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvseqi_b +name = lasx_xvseqi_b +asm-fmts = xd, xj, si5 +data-types = V32QI, V32QI, QI + +/// lasx_xvseqi_h +name = lasx_xvseqi_h +asm-fmts = xd, xj, si5 +data-types = V16HI, V16HI, QI + +/// lasx_xvseqi_w +name = lasx_xvseqi_w +asm-fmts = xd, xj, si5 +data-types = V8SI, V8SI, QI + +/// lasx_xvseqi_d +name = lasx_xvseqi_d +asm-fmts = xd, xj, si5 +data-types = V4DI, V4DI, QI + +/// lasx_xvslt_b +name = lasx_xvslt_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvslt_h +name = lasx_xvslt_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvslt_w +name = lasx_xvslt_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvslt_d +name = lasx_xvslt_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvslti_b +name = lasx_xvslti_b +asm-fmts = xd, xj, si5 +data-types = V32QI, V32QI, QI + +/// lasx_xvslti_h +name = lasx_xvslti_h +asm-fmts = xd, xj, si5 +data-types = V16HI, V16HI, QI + +/// lasx_xvslti_w +name = lasx_xvslti_w +asm-fmts = xd, xj, si5 +data-types = V8SI, V8SI, QI + +/// lasx_xvslti_d +name = lasx_xvslti_d +asm-fmts = xd, xj, si5 +data-types = V4DI, V4DI, QI + +/// lasx_xvslt_bu +name = lasx_xvslt_bu +asm-fmts = xd, xj, xk +data-types = V32QI, UV32QI, UV32QI + +/// lasx_xvslt_hu +name = lasx_xvslt_hu +asm-fmts = xd, xj, xk +data-types = V16HI, UV16HI, UV16HI + +/// lasx_xvslt_wu +name = lasx_xvslt_wu +asm-fmts = xd, xj, xk +data-types = V8SI, UV8SI, UV8SI + +/// lasx_xvslt_du +name = lasx_xvslt_du +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, UV4DI + +/// lasx_xvslti_bu +name = lasx_xvslti_bu +asm-fmts = xd, xj, ui5 +data-types = V32QI, UV32QI, UQI + +/// lasx_xvslti_hu +name = lasx_xvslti_hu +asm-fmts = xd, xj, ui5 +data-types = V16HI, UV16HI, UQI + +/// lasx_xvslti_wu +name = lasx_xvslti_wu +asm-fmts = xd, xj, ui5 +data-types = V8SI, UV8SI, UQI + +/// lasx_xvslti_du +name = lasx_xvslti_du +asm-fmts = xd, xj, ui5 +data-types = V4DI, UV4DI, UQI + +/// lasx_xvsle_b +name = lasx_xvsle_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvsle_h +name = lasx_xvsle_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvsle_w +name = lasx_xvsle_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvsle_d +name = lasx_xvsle_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvslei_b +name = lasx_xvslei_b +asm-fmts = xd, xj, si5 +data-types = V32QI, V32QI, QI + +/// lasx_xvslei_h +name = lasx_xvslei_h +asm-fmts = xd, xj, si5 +data-types = V16HI, V16HI, QI + +/// lasx_xvslei_w +name = lasx_xvslei_w +asm-fmts = xd, xj, si5 +data-types = V8SI, V8SI, QI + +/// lasx_xvslei_d +name = lasx_xvslei_d +asm-fmts = xd, xj, si5 +data-types = V4DI, V4DI, QI + +/// lasx_xvsle_bu +name = lasx_xvsle_bu +asm-fmts = xd, xj, xk +data-types = V32QI, UV32QI, UV32QI + +/// lasx_xvsle_hu +name = lasx_xvsle_hu +asm-fmts = xd, xj, xk +data-types = V16HI, UV16HI, UV16HI + +/// lasx_xvsle_wu +name = lasx_xvsle_wu +asm-fmts = xd, xj, xk +data-types = V8SI, UV8SI, UV8SI + +/// lasx_xvsle_du +name = lasx_xvsle_du +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, UV4DI + +/// lasx_xvslei_bu +name = lasx_xvslei_bu +asm-fmts = xd, xj, ui5 +data-types = V32QI, UV32QI, UQI + +/// lasx_xvslei_hu +name = lasx_xvslei_hu +asm-fmts = xd, xj, ui5 +data-types = V16HI, UV16HI, UQI + +/// lasx_xvslei_wu +name = lasx_xvslei_wu +asm-fmts = xd, xj, ui5 +data-types = V8SI, UV8SI, UQI + +/// lasx_xvslei_du +name = lasx_xvslei_du +asm-fmts = xd, xj, ui5 +data-types = V4DI, UV4DI, UQI + +/// lasx_xvsat_b +name = lasx_xvsat_b +asm-fmts = xd, xj, ui3 +data-types = V32QI, V32QI, UQI + +/// lasx_xvsat_h +name = lasx_xvsat_h +asm-fmts = xd, xj, ui4 +data-types = V16HI, V16HI, UQI + +/// lasx_xvsat_w +name = lasx_xvsat_w +asm-fmts = xd, xj, ui5 +data-types = V8SI, V8SI, UQI + +/// lasx_xvsat_d +name = lasx_xvsat_d +asm-fmts = xd, xj, ui6 +data-types = V4DI, V4DI, UQI + +/// lasx_xvsat_bu +name = lasx_xvsat_bu +asm-fmts = xd, xj, ui3 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvsat_hu +name = lasx_xvsat_hu +asm-fmts = xd, xj, ui4 +data-types = UV16HI, UV16HI, UQI + +/// lasx_xvsat_wu +name = lasx_xvsat_wu +asm-fmts = xd, xj, ui5 +data-types = UV8SI, UV8SI, UQI + +/// lasx_xvsat_du +name = lasx_xvsat_du +asm-fmts = xd, xj, ui6 +data-types = UV4DI, UV4DI, UQI + +/// lasx_xvadda_b +name = lasx_xvadda_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvadda_h +name = lasx_xvadda_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvadda_w +name = lasx_xvadda_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvadda_d +name = lasx_xvadda_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsadd_b +name = lasx_xvsadd_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvsadd_h +name = lasx_xvsadd_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvsadd_w +name = lasx_xvsadd_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvsadd_d +name = lasx_xvsadd_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsadd_bu +name = lasx_xvsadd_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvsadd_hu +name = lasx_xvsadd_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvsadd_wu +name = lasx_xvsadd_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvsadd_du +name = lasx_xvsadd_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvavg_b +name = lasx_xvavg_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvavg_h +name = lasx_xvavg_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvavg_w +name = lasx_xvavg_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvavg_d +name = lasx_xvavg_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvavg_bu +name = lasx_xvavg_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvavg_hu +name = lasx_xvavg_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvavg_wu +name = lasx_xvavg_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvavg_du +name = lasx_xvavg_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvavgr_b +name = lasx_xvavgr_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvavgr_h +name = lasx_xvavgr_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvavgr_w +name = lasx_xvavgr_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvavgr_d +name = lasx_xvavgr_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvavgr_bu +name = lasx_xvavgr_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvavgr_hu +name = lasx_xvavgr_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvavgr_wu +name = lasx_xvavgr_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvavgr_du +name = lasx_xvavgr_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvssub_b +name = lasx_xvssub_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvssub_h +name = lasx_xvssub_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvssub_w +name = lasx_xvssub_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvssub_d +name = lasx_xvssub_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvssub_bu +name = lasx_xvssub_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvssub_hu +name = lasx_xvssub_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvssub_wu +name = lasx_xvssub_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvssub_du +name = lasx_xvssub_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvabsd_b +name = lasx_xvabsd_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvabsd_h +name = lasx_xvabsd_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvabsd_w +name = lasx_xvabsd_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvabsd_d +name = lasx_xvabsd_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvabsd_bu +name = lasx_xvabsd_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvabsd_hu +name = lasx_xvabsd_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvabsd_wu +name = lasx_xvabsd_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvabsd_du +name = lasx_xvabsd_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvmul_b +name = lasx_xvmul_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvmul_h +name = lasx_xvmul_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvmul_w +name = lasx_xvmul_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvmul_d +name = lasx_xvmul_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvmadd_b +name = lasx_xvmadd_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI, V32QI + +/// lasx_xvmadd_h +name = lasx_xvmadd_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI, V16HI + +/// lasx_xvmadd_w +name = lasx_xvmadd_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI, V8SI + +/// lasx_xvmadd_d +name = lasx_xvmadd_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI, V4DI + +/// lasx_xvmsub_b +name = lasx_xvmsub_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI, V32QI + +/// lasx_xvmsub_h +name = lasx_xvmsub_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI, V16HI + +/// lasx_xvmsub_w +name = lasx_xvmsub_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI, V8SI + +/// lasx_xvmsub_d +name = lasx_xvmsub_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI, V4DI + +/// lasx_xvdiv_b +name = lasx_xvdiv_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvdiv_h +name = lasx_xvdiv_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvdiv_w +name = lasx_xvdiv_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvdiv_d +name = lasx_xvdiv_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvdiv_bu +name = lasx_xvdiv_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvdiv_hu +name = lasx_xvdiv_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvdiv_wu +name = lasx_xvdiv_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvdiv_du +name = lasx_xvdiv_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvhaddw_h_b +name = lasx_xvhaddw_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V32QI, V32QI + +/// lasx_xvhaddw_w_h +name = lasx_xvhaddw_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V16HI, V16HI + +/// lasx_xvhaddw_d_w +name = lasx_xvhaddw_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V8SI, V8SI + +/// lasx_xvhaddw_hu_bu +name = lasx_xvhaddw_hu_bu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV32QI, UV32QI + +/// lasx_xvhaddw_wu_hu +name = lasx_xvhaddw_wu_hu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV16HI, UV16HI + +/// lasx_xvhaddw_du_wu +name = lasx_xvhaddw_du_wu +asm-fmts = xd, xj, xk +data-types = UV4DI, UV8SI, UV8SI + +/// lasx_xvhsubw_h_b +name = lasx_xvhsubw_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V32QI, V32QI + +/// lasx_xvhsubw_w_h +name = lasx_xvhsubw_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V16HI, V16HI + +/// lasx_xvhsubw_d_w +name = lasx_xvhsubw_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V8SI, V8SI + +/// lasx_xvhsubw_hu_bu +name = lasx_xvhsubw_hu_bu +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, UV32QI + +/// lasx_xvhsubw_wu_hu +name = lasx_xvhsubw_wu_hu +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, UV16HI + +/// lasx_xvhsubw_du_wu +name = lasx_xvhsubw_du_wu +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, UV8SI + +/// lasx_xvmod_b +name = lasx_xvmod_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvmod_h +name = lasx_xvmod_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvmod_w +name = lasx_xvmod_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvmod_d +name = lasx_xvmod_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvmod_bu +name = lasx_xvmod_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvmod_hu +name = lasx_xvmod_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvmod_wu +name = lasx_xvmod_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvmod_du +name = lasx_xvmod_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvrepl128vei_b +name = lasx_xvrepl128vei_b +asm-fmts = xd, xj, ui4 +data-types = V32QI, V32QI, UQI + +/// lasx_xvrepl128vei_h +name = lasx_xvrepl128vei_h +asm-fmts = xd, xj, ui3 +data-types = V16HI, V16HI, UQI + +/// lasx_xvrepl128vei_w +name = lasx_xvrepl128vei_w +asm-fmts = xd, xj, ui2 +data-types = V8SI, V8SI, UQI + +/// lasx_xvrepl128vei_d +name = lasx_xvrepl128vei_d +asm-fmts = xd, xj, ui1 +data-types = V4DI, V4DI, UQI + +/// lasx_xvpickev_b +name = lasx_xvpickev_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvpickev_h +name = lasx_xvpickev_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvpickev_w +name = lasx_xvpickev_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvpickev_d +name = lasx_xvpickev_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvpickod_b +name = lasx_xvpickod_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvpickod_h +name = lasx_xvpickod_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvpickod_w +name = lasx_xvpickod_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvpickod_d +name = lasx_xvpickod_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvilvh_b +name = lasx_xvilvh_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvilvh_h +name = lasx_xvilvh_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvilvh_w +name = lasx_xvilvh_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvilvh_d +name = lasx_xvilvh_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvilvl_b +name = lasx_xvilvl_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvilvl_h +name = lasx_xvilvl_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvilvl_w +name = lasx_xvilvl_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvilvl_d +name = lasx_xvilvl_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvpackev_b +name = lasx_xvpackev_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvpackev_h +name = lasx_xvpackev_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvpackev_w +name = lasx_xvpackev_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvpackev_d +name = lasx_xvpackev_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvpackod_b +name = lasx_xvpackod_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvpackod_h +name = lasx_xvpackod_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvpackod_w +name = lasx_xvpackod_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvpackod_d +name = lasx_xvpackod_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvshuf_b +name = lasx_xvshuf_b +asm-fmts = xd, xj, xk, xa +data-types = V32QI, V32QI, V32QI, V32QI + +/// lasx_xvshuf_h +name = lasx_xvshuf_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI, V16HI + +/// lasx_xvshuf_w +name = lasx_xvshuf_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI, V8SI + +/// lasx_xvshuf_d +name = lasx_xvshuf_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI, V4DI + +/// lasx_xvand_v +name = lasx_xvand_v +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvandi_b +name = lasx_xvandi_b +asm-fmts = xd, xj, ui8 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvor_v +name = lasx_xvor_v +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvori_b +name = lasx_xvori_b +asm-fmts = xd, xj, ui8 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvnor_v +name = lasx_xvnor_v +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvnori_b +name = lasx_xvnori_b +asm-fmts = xd, xj, ui8 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvxor_v +name = lasx_xvxor_v +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvxori_b +name = lasx_xvxori_b +asm-fmts = xd, xj, ui8 +data-types = UV32QI, UV32QI, UQI + +/// lasx_xvbitsel_v +name = lasx_xvbitsel_v +asm-fmts = xd, xj, xk, xa +data-types = UV32QI, UV32QI, UV32QI, UV32QI + +/// lasx_xvbitseli_b +name = lasx_xvbitseli_b +asm-fmts = xd, xj, ui8 +data-types = UV32QI, UV32QI, UV32QI, USI + +/// lasx_xvshuf4i_b +name = lasx_xvshuf4i_b +asm-fmts = xd, xj, ui8 +data-types = V32QI, V32QI, USI + +/// lasx_xvshuf4i_h +name = lasx_xvshuf4i_h +asm-fmts = xd, xj, ui8 +data-types = V16HI, V16HI, USI + +/// lasx_xvshuf4i_w +name = lasx_xvshuf4i_w +asm-fmts = xd, xj, ui8 +data-types = V8SI, V8SI, USI + +/// lasx_xvreplgr2vr_b +name = lasx_xvreplgr2vr_b +asm-fmts = xd, rj +data-types = V32QI, SI + +/// lasx_xvreplgr2vr_h +name = lasx_xvreplgr2vr_h +asm-fmts = xd, rj +data-types = V16HI, SI + +/// lasx_xvreplgr2vr_w +name = lasx_xvreplgr2vr_w +asm-fmts = xd, rj +data-types = V8SI, SI + +/// lasx_xvreplgr2vr_d +name = lasx_xvreplgr2vr_d +asm-fmts = xd, rj +data-types = V4DI, DI + +/// lasx_xvpcnt_b +name = lasx_xvpcnt_b +asm-fmts = xd, xj +data-types = V32QI, V32QI + +/// lasx_xvpcnt_h +name = lasx_xvpcnt_h +asm-fmts = xd, xj +data-types = V16HI, V16HI + +/// lasx_xvpcnt_w +name = lasx_xvpcnt_w +asm-fmts = xd, xj +data-types = V8SI, V8SI + +/// lasx_xvpcnt_d +name = lasx_xvpcnt_d +asm-fmts = xd, xj +data-types = V4DI, V4DI + +/// lasx_xvclo_b +name = lasx_xvclo_b +asm-fmts = xd, xj +data-types = V32QI, V32QI + +/// lasx_xvclo_h +name = lasx_xvclo_h +asm-fmts = xd, xj +data-types = V16HI, V16HI + +/// lasx_xvclo_w +name = lasx_xvclo_w +asm-fmts = xd, xj +data-types = V8SI, V8SI + +/// lasx_xvclo_d +name = lasx_xvclo_d +asm-fmts = xd, xj +data-types = V4DI, V4DI + +/// lasx_xvclz_b +name = lasx_xvclz_b +asm-fmts = xd, xj +data-types = V32QI, V32QI + +/// lasx_xvclz_h +name = lasx_xvclz_h +asm-fmts = xd, xj +data-types = V16HI, V16HI + +/// lasx_xvclz_w +name = lasx_xvclz_w +asm-fmts = xd, xj +data-types = V8SI, V8SI + +/// lasx_xvclz_d +name = lasx_xvclz_d +asm-fmts = xd, xj +data-types = V4DI, V4DI + +/// lasx_xvfadd_s +name = lasx_xvfadd_s +asm-fmts = xd, xj, xk +data-types = V8SF, V8SF, V8SF + +/// lasx_xvfadd_d +name = lasx_xvfadd_d +asm-fmts = xd, xj, xk +data-types = V4DF, V4DF, V4DF + +/// lasx_xvfsub_s +name = lasx_xvfsub_s +asm-fmts = xd, xj, xk +data-types = V8SF, V8SF, V8SF + +/// lasx_xvfsub_d +name = lasx_xvfsub_d +asm-fmts = xd, xj, xk +data-types = V4DF, V4DF, V4DF + +/// lasx_xvfmul_s +name = lasx_xvfmul_s +asm-fmts = xd, xj, xk +data-types = V8SF, V8SF, V8SF + +/// lasx_xvfmul_d +name = lasx_xvfmul_d +asm-fmts = xd, xj, xk +data-types = V4DF, V4DF, V4DF + +/// lasx_xvfdiv_s +name = lasx_xvfdiv_s +asm-fmts = xd, xj, xk +data-types = V8SF, V8SF, V8SF + +/// lasx_xvfdiv_d +name = lasx_xvfdiv_d +asm-fmts = xd, xj, xk +data-types = V4DF, V4DF, V4DF + +/// lasx_xvfcvt_h_s +name = lasx_xvfcvt_h_s +asm-fmts = xd, xj, xk +data-types = V16HI, V8SF, V8SF + +/// lasx_xvfcvt_s_d +name = lasx_xvfcvt_s_d +asm-fmts = xd, xj, xk +data-types = V8SF, V4DF, V4DF + +/// lasx_xvfmin_s +name = lasx_xvfmin_s +asm-fmts = xd, xj, xk +data-types = V8SF, V8SF, V8SF + +/// lasx_xvfmin_d +name = lasx_xvfmin_d +asm-fmts = xd, xj, xk +data-types = V4DF, V4DF, V4DF + +/// lasx_xvfmina_s +name = lasx_xvfmina_s +asm-fmts = xd, xj, xk +data-types = V8SF, V8SF, V8SF + +/// lasx_xvfmina_d +name = lasx_xvfmina_d +asm-fmts = xd, xj, xk +data-types = V4DF, V4DF, V4DF + +/// lasx_xvfmax_s +name = lasx_xvfmax_s +asm-fmts = xd, xj, xk +data-types = V8SF, V8SF, V8SF + +/// lasx_xvfmax_d +name = lasx_xvfmax_d +asm-fmts = xd, xj, xk +data-types = V4DF, V4DF, V4DF + +/// lasx_xvfmaxa_s +name = lasx_xvfmaxa_s +asm-fmts = xd, xj, xk +data-types = V8SF, V8SF, V8SF + +/// lasx_xvfmaxa_d +name = lasx_xvfmaxa_d +asm-fmts = xd, xj, xk +data-types = V4DF, V4DF, V4DF + +/// lasx_xvfclass_s +name = lasx_xvfclass_s +asm-fmts = xd, xj +data-types = V8SI, V8SF + +/// lasx_xvfclass_d +name = lasx_xvfclass_d +asm-fmts = xd, xj +data-types = V4DI, V4DF + +/// lasx_xvfsqrt_s +name = lasx_xvfsqrt_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfsqrt_d +name = lasx_xvfsqrt_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvfrecip_s +name = lasx_xvfrecip_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfrecip_d +name = lasx_xvfrecip_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvfrecipe_s +name = lasx_xvfrecipe_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfrecipe_d +name = lasx_xvfrecipe_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvfrsqrte_s +name = lasx_xvfrsqrte_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfrsqrte_d +name = lasx_xvfrsqrte_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvfrint_s +name = lasx_xvfrint_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfrint_d +name = lasx_xvfrint_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvfrsqrt_s +name = lasx_xvfrsqrt_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfrsqrt_d +name = lasx_xvfrsqrt_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvflogb_s +name = lasx_xvflogb_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvflogb_d +name = lasx_xvflogb_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvfcvth_s_h +name = lasx_xvfcvth_s_h +asm-fmts = xd, xj +data-types = V8SF, V16HI + +/// lasx_xvfcvth_d_s +name = lasx_xvfcvth_d_s +asm-fmts = xd, xj +data-types = V4DF, V8SF + +/// lasx_xvfcvtl_s_h +name = lasx_xvfcvtl_s_h +asm-fmts = xd, xj +data-types = V8SF, V16HI + +/// lasx_xvfcvtl_d_s +name = lasx_xvfcvtl_d_s +asm-fmts = xd, xj +data-types = V4DF, V8SF + +/// lasx_xvftint_w_s +name = lasx_xvftint_w_s +asm-fmts = xd, xj +data-types = V8SI, V8SF + +/// lasx_xvftint_l_d +name = lasx_xvftint_l_d +asm-fmts = xd, xj +data-types = V4DI, V4DF + +/// lasx_xvftint_wu_s +name = lasx_xvftint_wu_s +asm-fmts = xd, xj +data-types = UV8SI, V8SF + +/// lasx_xvftint_lu_d +name = lasx_xvftint_lu_d +asm-fmts = xd, xj +data-types = UV4DI, V4DF + +/// lasx_xvftintrz_w_s +name = lasx_xvftintrz_w_s +asm-fmts = xd, xj +data-types = V8SI, V8SF + +/// lasx_xvftintrz_l_d +name = lasx_xvftintrz_l_d +asm-fmts = xd, xj +data-types = V4DI, V4DF + +/// lasx_xvftintrz_wu_s +name = lasx_xvftintrz_wu_s +asm-fmts = xd, xj +data-types = UV8SI, V8SF + +/// lasx_xvftintrz_lu_d +name = lasx_xvftintrz_lu_d +asm-fmts = xd, xj +data-types = UV4DI, V4DF + +/// lasx_xvffint_s_w +name = lasx_xvffint_s_w +asm-fmts = xd, xj +data-types = V8SF, V8SI + +/// lasx_xvffint_d_l +name = lasx_xvffint_d_l +asm-fmts = xd, xj +data-types = V4DF, V4DI + +/// lasx_xvffint_s_wu +name = lasx_xvffint_s_wu +asm-fmts = xd, xj +data-types = V8SF, UV8SI + +/// lasx_xvffint_d_lu +name = lasx_xvffint_d_lu +asm-fmts = xd, xj +data-types = V4DF, UV4DI + +/// lasx_xvreplve_b +name = lasx_xvreplve_b +asm-fmts = xd, xj, rk +data-types = V32QI, V32QI, SI + +/// lasx_xvreplve_h +name = lasx_xvreplve_h +asm-fmts = xd, xj, rk +data-types = V16HI, V16HI, SI + +/// lasx_xvreplve_w +name = lasx_xvreplve_w +asm-fmts = xd, xj, rk +data-types = V8SI, V8SI, SI + +/// lasx_xvreplve_d +name = lasx_xvreplve_d +asm-fmts = xd, xj, rk +data-types = V4DI, V4DI, SI + +/// lasx_xvpermi_w +name = lasx_xvpermi_w +asm-fmts = xd, xj, ui8 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvandn_v +name = lasx_xvandn_v +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvneg_b +name = lasx_xvneg_b +asm-fmts = xd, xj +data-types = V32QI, V32QI + +/// lasx_xvneg_h +name = lasx_xvneg_h +asm-fmts = xd, xj +data-types = V16HI, V16HI + +/// lasx_xvneg_w +name = lasx_xvneg_w +asm-fmts = xd, xj +data-types = V8SI, V8SI + +/// lasx_xvneg_d +name = lasx_xvneg_d +asm-fmts = xd, xj +data-types = V4DI, V4DI + +/// lasx_xvmuh_b +name = lasx_xvmuh_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvmuh_h +name = lasx_xvmuh_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvmuh_w +name = lasx_xvmuh_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvmuh_d +name = lasx_xvmuh_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvmuh_bu +name = lasx_xvmuh_bu +asm-fmts = xd, xj, xk +data-types = UV32QI, UV32QI, UV32QI + +/// lasx_xvmuh_hu +name = lasx_xvmuh_hu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV16HI + +/// lasx_xvmuh_wu +name = lasx_xvmuh_wu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV8SI + +/// lasx_xvmuh_du +name = lasx_xvmuh_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvsllwil_h_b +name = lasx_xvsllwil_h_b +asm-fmts = xd, xj, ui3 +data-types = V16HI, V32QI, UQI + +/// lasx_xvsllwil_w_h +name = lasx_xvsllwil_w_h +asm-fmts = xd, xj, ui4 +data-types = V8SI, V16HI, UQI + +/// lasx_xvsllwil_d_w +name = lasx_xvsllwil_d_w +asm-fmts = xd, xj, ui5 +data-types = V4DI, V8SI, UQI + +/// lasx_xvsllwil_hu_bu +name = lasx_xvsllwil_hu_bu +asm-fmts = xd, xj, ui3 +data-types = UV16HI, UV32QI, UQI + +/// lasx_xvsllwil_wu_hu +name = lasx_xvsllwil_wu_hu +asm-fmts = xd, xj, ui4 +data-types = UV8SI, UV16HI, UQI + +/// lasx_xvsllwil_du_wu +name = lasx_xvsllwil_du_wu +asm-fmts = xd, xj, ui5 +data-types = UV4DI, UV8SI, UQI + +/// lasx_xvsran_b_h +name = lasx_xvsran_b_h +asm-fmts = xd, xj, xk +data-types = V32QI, V16HI, V16HI + +/// lasx_xvsran_h_w +name = lasx_xvsran_h_w +asm-fmts = xd, xj, xk +data-types = V16HI, V8SI, V8SI + +/// lasx_xvsran_w_d +name = lasx_xvsran_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DI, V4DI + +/// lasx_xvssran_b_h +name = lasx_xvssran_b_h +asm-fmts = xd, xj, xk +data-types = V32QI, V16HI, V16HI + +/// lasx_xvssran_h_w +name = lasx_xvssran_h_w +asm-fmts = xd, xj, xk +data-types = V16HI, V8SI, V8SI + +/// lasx_xvssran_w_d +name = lasx_xvssran_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DI, V4DI + +/// lasx_xvssran_bu_h +name = lasx_xvssran_bu_h +asm-fmts = xd, xj, xk +data-types = UV32QI, UV16HI, UV16HI + +/// lasx_xvssran_hu_w +name = lasx_xvssran_hu_w +asm-fmts = xd, xj, xk +data-types = UV16HI, UV8SI, UV8SI + +/// lasx_xvssran_wu_d +name = lasx_xvssran_wu_d +asm-fmts = xd, xj, xk +data-types = UV8SI, UV4DI, UV4DI + +/// lasx_xvsrarn_b_h +name = lasx_xvsrarn_b_h +asm-fmts = xd, xj, xk +data-types = V32QI, V16HI, V16HI + +/// lasx_xvsrarn_h_w +name = lasx_xvsrarn_h_w +asm-fmts = xd, xj, xk +data-types = V16HI, V8SI, V8SI + +/// lasx_xvsrarn_w_d +name = lasx_xvsrarn_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DI, V4DI + +/// lasx_xvssrarn_b_h +name = lasx_xvssrarn_b_h +asm-fmts = xd, xj, xk +data-types = V32QI, V16HI, V16HI + +/// lasx_xvssrarn_h_w +name = lasx_xvssrarn_h_w +asm-fmts = xd, xj, xk +data-types = V16HI, V8SI, V8SI + +/// lasx_xvssrarn_w_d +name = lasx_xvssrarn_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DI, V4DI + +/// lasx_xvssrarn_bu_h +name = lasx_xvssrarn_bu_h +asm-fmts = xd, xj, xk +data-types = UV32QI, UV16HI, UV16HI + +/// lasx_xvssrarn_hu_w +name = lasx_xvssrarn_hu_w +asm-fmts = xd, xj, xk +data-types = UV16HI, UV8SI, UV8SI + +/// lasx_xvssrarn_wu_d +name = lasx_xvssrarn_wu_d +asm-fmts = xd, xj, xk +data-types = UV8SI, UV4DI, UV4DI + +/// lasx_xvsrln_b_h +name = lasx_xvsrln_b_h +asm-fmts = xd, xj, xk +data-types = V32QI, V16HI, V16HI + +/// lasx_xvsrln_h_w +name = lasx_xvsrln_h_w +asm-fmts = xd, xj, xk +data-types = V16HI, V8SI, V8SI + +/// lasx_xvsrln_w_d +name = lasx_xvsrln_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DI, V4DI + +/// lasx_xvssrln_bu_h +name = lasx_xvssrln_bu_h +asm-fmts = xd, xj, xk +data-types = UV32QI, UV16HI, UV16HI + +/// lasx_xvssrln_hu_w +name = lasx_xvssrln_hu_w +asm-fmts = xd, xj, xk +data-types = UV16HI, UV8SI, UV8SI + +/// lasx_xvssrln_wu_d +name = lasx_xvssrln_wu_d +asm-fmts = xd, xj, xk +data-types = UV8SI, UV4DI, UV4DI + +/// lasx_xvsrlrn_b_h +name = lasx_xvsrlrn_b_h +asm-fmts = xd, xj, xk +data-types = V32QI, V16HI, V16HI + +/// lasx_xvsrlrn_h_w +name = lasx_xvsrlrn_h_w +asm-fmts = xd, xj, xk +data-types = V16HI, V8SI, V8SI + +/// lasx_xvsrlrn_w_d +name = lasx_xvsrlrn_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DI, V4DI + +/// lasx_xvssrlrn_bu_h +name = lasx_xvssrlrn_bu_h +asm-fmts = xd, xj, xk +data-types = UV32QI, UV16HI, UV16HI + +/// lasx_xvssrlrn_hu_w +name = lasx_xvssrlrn_hu_w +asm-fmts = xd, xj, xk +data-types = UV16HI, UV8SI, UV8SI + +/// lasx_xvssrlrn_wu_d +name = lasx_xvssrlrn_wu_d +asm-fmts = xd, xj, xk +data-types = UV8SI, UV4DI, UV4DI + +/// lasx_xvfrstpi_b +name = lasx_xvfrstpi_b +asm-fmts = xd, xj, ui5 +data-types = V32QI, V32QI, V32QI, UQI + +/// lasx_xvfrstpi_h +name = lasx_xvfrstpi_h +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, V16HI, UQI + +/// lasx_xvfrstp_b +name = lasx_xvfrstp_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI, V32QI + +/// lasx_xvfrstp_h +name = lasx_xvfrstp_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI, V16HI + +/// lasx_xvshuf4i_d +name = lasx_xvshuf4i_d +asm-fmts = xd, xj, ui8 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvbsrl_v +name = lasx_xvbsrl_v +asm-fmts = xd, xj, ui5 +data-types = V32QI, V32QI, UQI + +/// lasx_xvbsll_v +name = lasx_xvbsll_v +asm-fmts = xd, xj, ui5 +data-types = V32QI, V32QI, UQI + +/// lasx_xvextrins_b +name = lasx_xvextrins_b +asm-fmts = xd, xj, ui8 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvextrins_h +name = lasx_xvextrins_h +asm-fmts = xd, xj, ui8 +data-types = V16HI, V16HI, V16HI, USI + +/// lasx_xvextrins_w +name = lasx_xvextrins_w +asm-fmts = xd, xj, ui8 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvextrins_d +name = lasx_xvextrins_d +asm-fmts = xd, xj, ui8 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvmskltz_b +name = lasx_xvmskltz_b +asm-fmts = xd, xj +data-types = V32QI, V32QI + +/// lasx_xvmskltz_h +name = lasx_xvmskltz_h +asm-fmts = xd, xj +data-types = V16HI, V16HI + +/// lasx_xvmskltz_w +name = lasx_xvmskltz_w +asm-fmts = xd, xj +data-types = V8SI, V8SI + +/// lasx_xvmskltz_d +name = lasx_xvmskltz_d +asm-fmts = xd, xj +data-types = V4DI, V4DI + +/// lasx_xvsigncov_b +name = lasx_xvsigncov_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvsigncov_h +name = lasx_xvsigncov_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvsigncov_w +name = lasx_xvsigncov_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvsigncov_d +name = lasx_xvsigncov_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvfmadd_s +name = lasx_xvfmadd_s +asm-fmts = xd, xj, xk, xa +data-types = V8SF, V8SF, V8SF, V8SF + +/// lasx_xvfmadd_d +name = lasx_xvfmadd_d +asm-fmts = xd, xj, xk, xa +data-types = V4DF, V4DF, V4DF, V4DF + +/// lasx_xvfmsub_s +name = lasx_xvfmsub_s +asm-fmts = xd, xj, xk, xa +data-types = V8SF, V8SF, V8SF, V8SF + +/// lasx_xvfmsub_d +name = lasx_xvfmsub_d +asm-fmts = xd, xj, xk, xa +data-types = V4DF, V4DF, V4DF, V4DF + +/// lasx_xvfnmadd_s +name = lasx_xvfnmadd_s +asm-fmts = xd, xj, xk, xa +data-types = V8SF, V8SF, V8SF, V8SF + +/// lasx_xvfnmadd_d +name = lasx_xvfnmadd_d +asm-fmts = xd, xj, xk, xa +data-types = V4DF, V4DF, V4DF, V4DF + +/// lasx_xvfnmsub_s +name = lasx_xvfnmsub_s +asm-fmts = xd, xj, xk, xa +data-types = V8SF, V8SF, V8SF, V8SF + +/// lasx_xvfnmsub_d +name = lasx_xvfnmsub_d +asm-fmts = xd, xj, xk, xa +data-types = V4DF, V4DF, V4DF, V4DF + +/// lasx_xvftintrne_w_s +name = lasx_xvftintrne_w_s +asm-fmts = xd, xj +data-types = V8SI, V8SF + +/// lasx_xvftintrne_l_d +name = lasx_xvftintrne_l_d +asm-fmts = xd, xj +data-types = V4DI, V4DF + +/// lasx_xvftintrp_w_s +name = lasx_xvftintrp_w_s +asm-fmts = xd, xj +data-types = V8SI, V8SF + +/// lasx_xvftintrp_l_d +name = lasx_xvftintrp_l_d +asm-fmts = xd, xj +data-types = V4DI, V4DF + +/// lasx_xvftintrm_w_s +name = lasx_xvftintrm_w_s +asm-fmts = xd, xj +data-types = V8SI, V8SF + +/// lasx_xvftintrm_l_d +name = lasx_xvftintrm_l_d +asm-fmts = xd, xj +data-types = V4DI, V4DF + +/// lasx_xvftint_w_d +name = lasx_xvftint_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DF, V4DF + +/// lasx_xvffint_s_l +name = lasx_xvffint_s_l +asm-fmts = xd, xj, xk +data-types = V8SF, V4DI, V4DI + +/// lasx_xvftintrz_w_d +name = lasx_xvftintrz_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DF, V4DF + +/// lasx_xvftintrp_w_d +name = lasx_xvftintrp_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DF, V4DF + +/// lasx_xvftintrm_w_d +name = lasx_xvftintrm_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DF, V4DF + +/// lasx_xvftintrne_w_d +name = lasx_xvftintrne_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DF, V4DF + +/// lasx_xvftinth_l_s +name = lasx_xvftinth_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvftintl_l_s +name = lasx_xvftintl_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvffinth_d_w +name = lasx_xvffinth_d_w +asm-fmts = xd, xj +data-types = V4DF, V8SI + +/// lasx_xvffintl_d_w +name = lasx_xvffintl_d_w +asm-fmts = xd, xj +data-types = V4DF, V8SI + +/// lasx_xvftintrzh_l_s +name = lasx_xvftintrzh_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvftintrzl_l_s +name = lasx_xvftintrzl_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvftintrph_l_s +name = lasx_xvftintrph_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvftintrpl_l_s +name = lasx_xvftintrpl_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvftintrmh_l_s +name = lasx_xvftintrmh_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvftintrml_l_s +name = lasx_xvftintrml_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvftintrneh_l_s +name = lasx_xvftintrneh_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvftintrnel_l_s +name = lasx_xvftintrnel_l_s +asm-fmts = xd, xj +data-types = V4DI, V8SF + +/// lasx_xvfrintrne_s +name = lasx_xvfrintrne_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfrintrne_d +name = lasx_xvfrintrne_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvfrintrz_s +name = lasx_xvfrintrz_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfrintrz_d +name = lasx_xvfrintrz_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvfrintrp_s +name = lasx_xvfrintrp_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfrintrp_d +name = lasx_xvfrintrp_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvfrintrm_s +name = lasx_xvfrintrm_s +asm-fmts = xd, xj +data-types = V8SF, V8SF + +/// lasx_xvfrintrm_d +name = lasx_xvfrintrm_d +asm-fmts = xd, xj +data-types = V4DF, V4DF + +/// lasx_xvld +name = lasx_xvld +asm-fmts = xd, rj, si12 +data-types = V32QI, CVPOINTER, SI + +/// lasx_xvst +name = lasx_xvst +asm-fmts = xd, rj, si12 +data-types = VOID, V32QI, CVPOINTER, SI + +/// lasx_xvstelm_b +name = lasx_xvstelm_b +asm-fmts = xd, rj, si8, idx +data-types = VOID, V32QI, CVPOINTER, SI, UQI + +/// lasx_xvstelm_h +name = lasx_xvstelm_h +asm-fmts = xd, rj, si8, idx +data-types = VOID, V16HI, CVPOINTER, SI, UQI + +/// lasx_xvstelm_w +name = lasx_xvstelm_w +asm-fmts = xd, rj, si8, idx +data-types = VOID, V8SI, CVPOINTER, SI, UQI + +/// lasx_xvstelm_d +name = lasx_xvstelm_d +asm-fmts = xd, rj, si8, idx +data-types = VOID, V4DI, CVPOINTER, SI, UQI + +/// lasx_xvinsve0_w +name = lasx_xvinsve0_w +asm-fmts = xd, xj, ui3 +data-types = V8SI, V8SI, V8SI, UQI + +/// lasx_xvinsve0_d +name = lasx_xvinsve0_d +asm-fmts = xd, xj, ui2 +data-types = V4DI, V4DI, V4DI, UQI + +/// lasx_xvpickve_w +name = lasx_xvpickve_w +asm-fmts = xd, xj, ui3 +data-types = V8SI, V8SI, UQI + +/// lasx_xvpickve_d +name = lasx_xvpickve_d +asm-fmts = xd, xj, ui2 +data-types = V4DI, V4DI, UQI + +/// lasx_xvssrlrn_b_h +name = lasx_xvssrlrn_b_h +asm-fmts = xd, xj, xk +data-types = V32QI, V16HI, V16HI + +/// lasx_xvssrlrn_h_w +name = lasx_xvssrlrn_h_w +asm-fmts = xd, xj, xk +data-types = V16HI, V8SI, V8SI + +/// lasx_xvssrlrn_w_d +name = lasx_xvssrlrn_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DI, V4DI + +/// lasx_xvssrln_b_h +name = lasx_xvssrln_b_h +asm-fmts = xd, xj, xk +data-types = V32QI, V16HI, V16HI + +/// lasx_xvssrln_h_w +name = lasx_xvssrln_h_w +asm-fmts = xd, xj, xk +data-types = V16HI, V8SI, V8SI + +/// lasx_xvssrln_w_d +name = lasx_xvssrln_w_d +asm-fmts = xd, xj, xk +data-types = V8SI, V4DI, V4DI + +/// lasx_xvorn_v +name = lasx_xvorn_v +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvldi +name = lasx_xvldi +asm-fmts = xd, i13 +data-types = V4DI, HI + +/// lasx_xvldx +name = lasx_xvldx +asm-fmts = xd, rj, rk +data-types = V32QI, CVPOINTER, DI + +/// lasx_xvstx +name = lasx_xvstx +asm-fmts = xd, rj, rk +data-types = VOID, V32QI, CVPOINTER, DI + +/// lasx_xvextl_qu_du +name = lasx_xvextl_qu_du +asm-fmts = xd, xj +data-types = UV4DI, UV4DI + +/// lasx_xvinsgr2vr_w +name = lasx_xvinsgr2vr_w +asm-fmts = xd, rj, ui3 +data-types = V8SI, V8SI, SI, UQI + +/// lasx_xvinsgr2vr_d +name = lasx_xvinsgr2vr_d +asm-fmts = xd, rj, ui2 +data-types = V4DI, V4DI, DI, UQI + +/// lasx_xvreplve0_b +name = lasx_xvreplve0_b +asm-fmts = xd, xj +data-types = V32QI, V32QI + +/// lasx_xvreplve0_h +name = lasx_xvreplve0_h +asm-fmts = xd, xj +data-types = V16HI, V16HI + +/// lasx_xvreplve0_w +name = lasx_xvreplve0_w +asm-fmts = xd, xj +data-types = V8SI, V8SI + +/// lasx_xvreplve0_d +name = lasx_xvreplve0_d +asm-fmts = xd, xj +data-types = V4DI, V4DI + +/// lasx_xvreplve0_q +name = lasx_xvreplve0_q +asm-fmts = xd, xj +data-types = V32QI, V32QI + +/// lasx_vext2xv_h_b +name = lasx_vext2xv_h_b +asm-fmts = xd, xj +data-types = V16HI, V32QI + +/// lasx_vext2xv_w_h +name = lasx_vext2xv_w_h +asm-fmts = xd, xj +data-types = V8SI, V16HI + +/// lasx_vext2xv_d_w +name = lasx_vext2xv_d_w +asm-fmts = xd, xj +data-types = V4DI, V8SI + +/// lasx_vext2xv_w_b +name = lasx_vext2xv_w_b +asm-fmts = xd, xj +data-types = V8SI, V32QI + +/// lasx_vext2xv_d_h +name = lasx_vext2xv_d_h +asm-fmts = xd, xj +data-types = V4DI, V16HI + +/// lasx_vext2xv_d_b +name = lasx_vext2xv_d_b +asm-fmts = xd, xj +data-types = V4DI, V32QI + +/// lasx_vext2xv_hu_bu +name = lasx_vext2xv_hu_bu +asm-fmts = xd, xj +data-types = V16HI, V32QI + +/// lasx_vext2xv_wu_hu +name = lasx_vext2xv_wu_hu +asm-fmts = xd, xj +data-types = V8SI, V16HI + +/// lasx_vext2xv_du_wu +name = lasx_vext2xv_du_wu +asm-fmts = xd, xj +data-types = V4DI, V8SI + +/// lasx_vext2xv_wu_bu +name = lasx_vext2xv_wu_bu +asm-fmts = xd, xj +data-types = V8SI, V32QI + +/// lasx_vext2xv_du_hu +name = lasx_vext2xv_du_hu +asm-fmts = xd, xj +data-types = V4DI, V16HI + +/// lasx_vext2xv_du_bu +name = lasx_vext2xv_du_bu +asm-fmts = xd, xj +data-types = V4DI, V32QI + +/// lasx_xvpermi_q +name = lasx_xvpermi_q +asm-fmts = xd, xj, ui8 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvpermi_d +name = lasx_xvpermi_d +asm-fmts = xd, xj, ui8 +data-types = V4DI, V4DI, USI + +/// lasx_xvperm_w +name = lasx_xvperm_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvldrepl_b +name = lasx_xvldrepl_b +asm-fmts = xd, rj, si12 +data-types = V32QI, CVPOINTER, SI + +/// lasx_xvldrepl_h +name = lasx_xvldrepl_h +asm-fmts = xd, rj, si11 +data-types = V16HI, CVPOINTER, SI + +/// lasx_xvldrepl_w +name = lasx_xvldrepl_w +asm-fmts = xd, rj, si10 +data-types = V8SI, CVPOINTER, SI + +/// lasx_xvldrepl_d +name = lasx_xvldrepl_d +asm-fmts = xd, rj, si9 +data-types = V4DI, CVPOINTER, SI + +/// lasx_xvpickve2gr_w +name = lasx_xvpickve2gr_w +asm-fmts = rd, xj, ui3 +data-types = SI, V8SI, UQI + +/// lasx_xvpickve2gr_wu +name = lasx_xvpickve2gr_wu +asm-fmts = rd, xj, ui3 +data-types = USI, V8SI, UQI + +/// lasx_xvpickve2gr_d +name = lasx_xvpickve2gr_d +asm-fmts = rd, xj, ui2 +data-types = DI, V4DI, UQI + +/// lasx_xvpickve2gr_du +name = lasx_xvpickve2gr_du +asm-fmts = rd, xj, ui2 +data-types = UDI, V4DI, UQI + +/// lasx_xvaddwev_q_d +name = lasx_xvaddwev_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvaddwev_d_w +name = lasx_xvaddwev_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V8SI, V8SI + +/// lasx_xvaddwev_w_h +name = lasx_xvaddwev_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V16HI, V16HI + +/// lasx_xvaddwev_h_b +name = lasx_xvaddwev_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V32QI, V32QI + +/// lasx_xvaddwev_q_du +name = lasx_xvaddwev_q_du +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, UV4DI + +/// lasx_xvaddwev_d_wu +name = lasx_xvaddwev_d_wu +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, UV8SI + +/// lasx_xvaddwev_w_hu +name = lasx_xvaddwev_w_hu +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, UV16HI + +/// lasx_xvaddwev_h_bu +name = lasx_xvaddwev_h_bu +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, UV32QI + +/// lasx_xvsubwev_q_d +name = lasx_xvsubwev_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsubwev_d_w +name = lasx_xvsubwev_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V8SI, V8SI + +/// lasx_xvsubwev_w_h +name = lasx_xvsubwev_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V16HI, V16HI + +/// lasx_xvsubwev_h_b +name = lasx_xvsubwev_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V32QI, V32QI + +/// lasx_xvsubwev_q_du +name = lasx_xvsubwev_q_du +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, UV4DI + +/// lasx_xvsubwev_d_wu +name = lasx_xvsubwev_d_wu +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, UV8SI + +/// lasx_xvsubwev_w_hu +name = lasx_xvsubwev_w_hu +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, UV16HI + +/// lasx_xvsubwev_h_bu +name = lasx_xvsubwev_h_bu +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, UV32QI + +/// lasx_xvmulwev_q_d +name = lasx_xvmulwev_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvmulwev_d_w +name = lasx_xvmulwev_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V8SI, V8SI + +/// lasx_xvmulwev_w_h +name = lasx_xvmulwev_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V16HI, V16HI + +/// lasx_xvmulwev_h_b +name = lasx_xvmulwev_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V32QI, V32QI + +/// lasx_xvmulwev_q_du +name = lasx_xvmulwev_q_du +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, UV4DI + +/// lasx_xvmulwev_d_wu +name = lasx_xvmulwev_d_wu +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, UV8SI + +/// lasx_xvmulwev_w_hu +name = lasx_xvmulwev_w_hu +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, UV16HI + +/// lasx_xvmulwev_h_bu +name = lasx_xvmulwev_h_bu +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, UV32QI + +/// lasx_xvaddwod_q_d +name = lasx_xvaddwod_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvaddwod_d_w +name = lasx_xvaddwod_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V8SI, V8SI + +/// lasx_xvaddwod_w_h +name = lasx_xvaddwod_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V16HI, V16HI + +/// lasx_xvaddwod_h_b +name = lasx_xvaddwod_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V32QI, V32QI + +/// lasx_xvaddwod_q_du +name = lasx_xvaddwod_q_du +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, UV4DI + +/// lasx_xvaddwod_d_wu +name = lasx_xvaddwod_d_wu +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, UV8SI + +/// lasx_xvaddwod_w_hu +name = lasx_xvaddwod_w_hu +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, UV16HI + +/// lasx_xvaddwod_h_bu +name = lasx_xvaddwod_h_bu +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, UV32QI + +/// lasx_xvsubwod_q_d +name = lasx_xvsubwod_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsubwod_d_w +name = lasx_xvsubwod_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V8SI, V8SI + +/// lasx_xvsubwod_w_h +name = lasx_xvsubwod_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V16HI, V16HI + +/// lasx_xvsubwod_h_b +name = lasx_xvsubwod_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V32QI, V32QI + +/// lasx_xvsubwod_q_du +name = lasx_xvsubwod_q_du +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, UV4DI + +/// lasx_xvsubwod_d_wu +name = lasx_xvsubwod_d_wu +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, UV8SI + +/// lasx_xvsubwod_w_hu +name = lasx_xvsubwod_w_hu +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, UV16HI + +/// lasx_xvsubwod_h_bu +name = lasx_xvsubwod_h_bu +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, UV32QI + +/// lasx_xvmulwod_q_d +name = lasx_xvmulwod_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvmulwod_d_w +name = lasx_xvmulwod_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V8SI, V8SI + +/// lasx_xvmulwod_w_h +name = lasx_xvmulwod_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V16HI, V16HI + +/// lasx_xvmulwod_h_b +name = lasx_xvmulwod_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V32QI, V32QI + +/// lasx_xvmulwod_q_du +name = lasx_xvmulwod_q_du +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, UV4DI + +/// lasx_xvmulwod_d_wu +name = lasx_xvmulwod_d_wu +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, UV8SI + +/// lasx_xvmulwod_w_hu +name = lasx_xvmulwod_w_hu +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, UV16HI + +/// lasx_xvmulwod_h_bu +name = lasx_xvmulwod_h_bu +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, UV32QI + +/// lasx_xvaddwev_d_wu_w +name = lasx_xvaddwev_d_wu_w +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, V8SI + +/// lasx_xvaddwev_w_hu_h +name = lasx_xvaddwev_w_hu_h +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, V16HI + +/// lasx_xvaddwev_h_bu_b +name = lasx_xvaddwev_h_bu_b +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, V32QI + +/// lasx_xvmulwev_d_wu_w +name = lasx_xvmulwev_d_wu_w +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, V8SI + +/// lasx_xvmulwev_w_hu_h +name = lasx_xvmulwev_w_hu_h +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, V16HI + +/// lasx_xvmulwev_h_bu_b +name = lasx_xvmulwev_h_bu_b +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, V32QI + +/// lasx_xvaddwod_d_wu_w +name = lasx_xvaddwod_d_wu_w +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, V8SI + +/// lasx_xvaddwod_w_hu_h +name = lasx_xvaddwod_w_hu_h +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, V16HI + +/// lasx_xvaddwod_h_bu_b +name = lasx_xvaddwod_h_bu_b +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, V32QI + +/// lasx_xvmulwod_d_wu_w +name = lasx_xvmulwod_d_wu_w +asm-fmts = xd, xj, xk +data-types = V4DI, UV8SI, V8SI + +/// lasx_xvmulwod_w_hu_h +name = lasx_xvmulwod_w_hu_h +asm-fmts = xd, xj, xk +data-types = V8SI, UV16HI, V16HI + +/// lasx_xvmulwod_h_bu_b +name = lasx_xvmulwod_h_bu_b +asm-fmts = xd, xj, xk +data-types = V16HI, UV32QI, V32QI + +/// lasx_xvhaddw_q_d +name = lasx_xvhaddw_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvhaddw_qu_du +name = lasx_xvhaddw_qu_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvhsubw_q_d +name = lasx_xvhsubw_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvhsubw_qu_du +name = lasx_xvhsubw_qu_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI + +/// lasx_xvmaddwev_q_d +name = lasx_xvmaddwev_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI, V4DI + +/// lasx_xvmaddwev_d_w +name = lasx_xvmaddwev_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V8SI, V8SI + +/// lasx_xvmaddwev_w_h +name = lasx_xvmaddwev_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V16HI, V16HI + +/// lasx_xvmaddwev_h_b +name = lasx_xvmaddwev_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V32QI, V32QI + +/// lasx_xvmaddwev_q_du +name = lasx_xvmaddwev_q_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI, UV4DI + +/// lasx_xvmaddwev_d_wu +name = lasx_xvmaddwev_d_wu +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV8SI, UV8SI + +/// lasx_xvmaddwev_w_hu +name = lasx_xvmaddwev_w_hu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV16HI, UV16HI + +/// lasx_xvmaddwev_h_bu +name = lasx_xvmaddwev_h_bu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV32QI, UV32QI + +/// lasx_xvmaddwod_q_d +name = lasx_xvmaddwod_q_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI, V4DI + +/// lasx_xvmaddwod_d_w +name = lasx_xvmaddwod_d_w +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V8SI, V8SI + +/// lasx_xvmaddwod_w_h +name = lasx_xvmaddwod_w_h +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V16HI, V16HI + +/// lasx_xvmaddwod_h_b +name = lasx_xvmaddwod_h_b +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V32QI, V32QI + +/// lasx_xvmaddwod_q_du +name = lasx_xvmaddwod_q_du +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV4DI, UV4DI + +/// lasx_xvmaddwod_d_wu +name = lasx_xvmaddwod_d_wu +asm-fmts = xd, xj, xk +data-types = UV4DI, UV4DI, UV8SI, UV8SI + +/// lasx_xvmaddwod_w_hu +name = lasx_xvmaddwod_w_hu +asm-fmts = xd, xj, xk +data-types = UV8SI, UV8SI, UV16HI, UV16HI + +/// lasx_xvmaddwod_h_bu +name = lasx_xvmaddwod_h_bu +asm-fmts = xd, xj, xk +data-types = UV16HI, UV16HI, UV32QI, UV32QI + +/// lasx_xvmaddwev_q_du_d +name = lasx_xvmaddwev_q_du_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, UV4DI, V4DI + +/// lasx_xvmaddwev_d_wu_w +name = lasx_xvmaddwev_d_wu_w +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, UV8SI, V8SI + +/// lasx_xvmaddwev_w_hu_h +name = lasx_xvmaddwev_w_hu_h +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, UV16HI, V16HI + +/// lasx_xvmaddwev_h_bu_b +name = lasx_xvmaddwev_h_bu_b +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, UV32QI, V32QI + +/// lasx_xvmaddwod_q_du_d +name = lasx_xvmaddwod_q_du_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, UV4DI, V4DI + +/// lasx_xvmaddwod_d_wu_w +name = lasx_xvmaddwod_d_wu_w +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, UV8SI, V8SI + +/// lasx_xvmaddwod_w_hu_h +name = lasx_xvmaddwod_w_hu_h +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, UV16HI, V16HI + +/// lasx_xvmaddwod_h_bu_b +name = lasx_xvmaddwod_h_bu_b +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, UV32QI, V32QI + +/// lasx_xvrotr_b +name = lasx_xvrotr_b +asm-fmts = xd, xj, xk +data-types = V32QI, V32QI, V32QI + +/// lasx_xvrotr_h +name = lasx_xvrotr_h +asm-fmts = xd, xj, xk +data-types = V16HI, V16HI, V16HI + +/// lasx_xvrotr_w +name = lasx_xvrotr_w +asm-fmts = xd, xj, xk +data-types = V8SI, V8SI, V8SI + +/// lasx_xvrotr_d +name = lasx_xvrotr_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvadd_q +name = lasx_xvadd_q +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvsub_q +name = lasx_xvsub_q +asm-fmts = xd, xj, xk +data-types = V4DI, V4DI, V4DI + +/// lasx_xvaddwev_q_du_d +name = lasx_xvaddwev_q_du_d +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, V4DI + +/// lasx_xvaddwod_q_du_d +name = lasx_xvaddwod_q_du_d +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, V4DI + +/// lasx_xvmulwev_q_du_d +name = lasx_xvmulwev_q_du_d +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, V4DI + +/// lasx_xvmulwod_q_du_d +name = lasx_xvmulwod_q_du_d +asm-fmts = xd, xj, xk +data-types = V4DI, UV4DI, V4DI + +/// lasx_xvmskgez_b +name = lasx_xvmskgez_b +asm-fmts = xd, xj +data-types = V32QI, V32QI + +/// lasx_xvmsknz_b +name = lasx_xvmsknz_b +asm-fmts = xd, xj +data-types = V32QI, V32QI + +/// lasx_xvexth_h_b +name = lasx_xvexth_h_b +asm-fmts = xd, xj +data-types = V16HI, V32QI + +/// lasx_xvexth_w_h +name = lasx_xvexth_w_h +asm-fmts = xd, xj +data-types = V8SI, V16HI + +/// lasx_xvexth_d_w +name = lasx_xvexth_d_w +asm-fmts = xd, xj +data-types = V4DI, V8SI + +/// lasx_xvexth_q_d +name = lasx_xvexth_q_d +asm-fmts = xd, xj +data-types = V4DI, V4DI + +/// lasx_xvexth_hu_bu +name = lasx_xvexth_hu_bu +asm-fmts = xd, xj +data-types = UV16HI, UV32QI + +/// lasx_xvexth_wu_hu +name = lasx_xvexth_wu_hu +asm-fmts = xd, xj +data-types = UV8SI, UV16HI + +/// lasx_xvexth_du_wu +name = lasx_xvexth_du_wu +asm-fmts = xd, xj +data-types = UV4DI, UV8SI + +/// lasx_xvexth_qu_du +name = lasx_xvexth_qu_du +asm-fmts = xd, xj +data-types = UV4DI, UV4DI + +/// lasx_xvrotri_b +name = lasx_xvrotri_b +asm-fmts = xd, xj, ui3 +data-types = V32QI, V32QI, UQI + +/// lasx_xvrotri_h +name = lasx_xvrotri_h +asm-fmts = xd, xj, ui4 +data-types = V16HI, V16HI, UQI + +/// lasx_xvrotri_w +name = lasx_xvrotri_w +asm-fmts = xd, xj, ui5 +data-types = V8SI, V8SI, UQI + +/// lasx_xvrotri_d +name = lasx_xvrotri_d +asm-fmts = xd, xj, ui6 +data-types = V4DI, V4DI, UQI + +/// lasx_xvextl_q_d +name = lasx_xvextl_q_d +asm-fmts = xd, xj +data-types = V4DI, V4DI + +/// lasx_xvsrlni_b_h +name = lasx_xvsrlni_b_h +asm-fmts = xd, xj, ui4 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvsrlni_h_w +name = lasx_xvsrlni_h_w +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, V16HI, USI + +/// lasx_xvsrlni_w_d +name = lasx_xvsrlni_w_d +asm-fmts = xd, xj, ui6 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvsrlni_d_q +name = lasx_xvsrlni_d_q +asm-fmts = xd, xj, ui7 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvsrlrni_b_h +name = lasx_xvsrlrni_b_h +asm-fmts = xd, xj, ui4 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvsrlrni_h_w +name = lasx_xvsrlrni_h_w +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, V16HI, USI + +/// lasx_xvsrlrni_w_d +name = lasx_xvsrlrni_w_d +asm-fmts = xd, xj, ui6 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvsrlrni_d_q +name = lasx_xvsrlrni_d_q +asm-fmts = xd, xj, ui7 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvssrlni_b_h +name = lasx_xvssrlni_b_h +asm-fmts = xd, xj, ui4 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvssrlni_h_w +name = lasx_xvssrlni_h_w +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, V16HI, USI + +/// lasx_xvssrlni_w_d +name = lasx_xvssrlni_w_d +asm-fmts = xd, xj, ui6 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvssrlni_d_q +name = lasx_xvssrlni_d_q +asm-fmts = xd, xj, ui7 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvssrlni_bu_h +name = lasx_xvssrlni_bu_h +asm-fmts = xd, xj, ui4 +data-types = UV32QI, UV32QI, V32QI, USI + +/// lasx_xvssrlni_hu_w +name = lasx_xvssrlni_hu_w +asm-fmts = xd, xj, ui5 +data-types = UV16HI, UV16HI, V16HI, USI + +/// lasx_xvssrlni_wu_d +name = lasx_xvssrlni_wu_d +asm-fmts = xd, xj, ui6 +data-types = UV8SI, UV8SI, V8SI, USI + +/// lasx_xvssrlni_du_q +name = lasx_xvssrlni_du_q +asm-fmts = xd, xj, ui7 +data-types = UV4DI, UV4DI, V4DI, USI + +/// lasx_xvssrlrni_b_h +name = lasx_xvssrlrni_b_h +asm-fmts = xd, xj, ui4 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvssrlrni_h_w +name = lasx_xvssrlrni_h_w +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, V16HI, USI + +/// lasx_xvssrlrni_w_d +name = lasx_xvssrlrni_w_d +asm-fmts = xd, xj, ui6 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvssrlrni_d_q +name = lasx_xvssrlrni_d_q +asm-fmts = xd, xj, ui7 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvssrlrni_bu_h +name = lasx_xvssrlrni_bu_h +asm-fmts = xd, xj, ui4 +data-types = UV32QI, UV32QI, V32QI, USI + +/// lasx_xvssrlrni_hu_w +name = lasx_xvssrlrni_hu_w +asm-fmts = xd, xj, ui5 +data-types = UV16HI, UV16HI, V16HI, USI + +/// lasx_xvssrlrni_wu_d +name = lasx_xvssrlrni_wu_d +asm-fmts = xd, xj, ui6 +data-types = UV8SI, UV8SI, V8SI, USI + +/// lasx_xvssrlrni_du_q +name = lasx_xvssrlrni_du_q +asm-fmts = xd, xj, ui7 +data-types = UV4DI, UV4DI, V4DI, USI + +/// lasx_xvsrani_b_h +name = lasx_xvsrani_b_h +asm-fmts = xd, xj, ui4 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvsrani_h_w +name = lasx_xvsrani_h_w +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, V16HI, USI + +/// lasx_xvsrani_w_d +name = lasx_xvsrani_w_d +asm-fmts = xd, xj, ui6 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvsrani_d_q +name = lasx_xvsrani_d_q +asm-fmts = xd, xj, ui7 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvsrarni_b_h +name = lasx_xvsrarni_b_h +asm-fmts = xd, xj, ui4 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvsrarni_h_w +name = lasx_xvsrarni_h_w +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, V16HI, USI + +/// lasx_xvsrarni_w_d +name = lasx_xvsrarni_w_d +asm-fmts = xd, xj, ui6 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvsrarni_d_q +name = lasx_xvsrarni_d_q +asm-fmts = xd, xj, ui7 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvssrani_b_h +name = lasx_xvssrani_b_h +asm-fmts = xd, xj, ui4 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvssrani_h_w +name = lasx_xvssrani_h_w +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, V16HI, USI + +/// lasx_xvssrani_w_d +name = lasx_xvssrani_w_d +asm-fmts = xd, xj, ui6 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvssrani_d_q +name = lasx_xvssrani_d_q +asm-fmts = xd, xj, ui7 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvssrani_bu_h +name = lasx_xvssrani_bu_h +asm-fmts = xd, xj, ui4 +data-types = UV32QI, UV32QI, V32QI, USI + +/// lasx_xvssrani_hu_w +name = lasx_xvssrani_hu_w +asm-fmts = xd, xj, ui5 +data-types = UV16HI, UV16HI, V16HI, USI + +/// lasx_xvssrani_wu_d +name = lasx_xvssrani_wu_d +asm-fmts = xd, xj, ui6 +data-types = UV8SI, UV8SI, V8SI, USI + +/// lasx_xvssrani_du_q +name = lasx_xvssrani_du_q +asm-fmts = xd, xj, ui7 +data-types = UV4DI, UV4DI, V4DI, USI + +/// lasx_xvssrarni_b_h +name = lasx_xvssrarni_b_h +asm-fmts = xd, xj, ui4 +data-types = V32QI, V32QI, V32QI, USI + +/// lasx_xvssrarni_h_w +name = lasx_xvssrarni_h_w +asm-fmts = xd, xj, ui5 +data-types = V16HI, V16HI, V16HI, USI + +/// lasx_xvssrarni_w_d +name = lasx_xvssrarni_w_d +asm-fmts = xd, xj, ui6 +data-types = V8SI, V8SI, V8SI, USI + +/// lasx_xvssrarni_d_q +name = lasx_xvssrarni_d_q +asm-fmts = xd, xj, ui7 +data-types = V4DI, V4DI, V4DI, USI + +/// lasx_xvssrarni_bu_h +name = lasx_xvssrarni_bu_h +asm-fmts = xd, xj, ui4 +data-types = UV32QI, UV32QI, V32QI, USI + +/// lasx_xvssrarni_hu_w +name = lasx_xvssrarni_hu_w +asm-fmts = xd, xj, ui5 +data-types = UV16HI, UV16HI, V16HI, USI + +/// lasx_xvssrarni_wu_d +name = lasx_xvssrarni_wu_d +asm-fmts = xd, xj, ui6 +data-types = UV8SI, UV8SI, V8SI, USI + +/// lasx_xvssrarni_du_q +name = lasx_xvssrarni_du_q +asm-fmts = xd, xj, ui7 +data-types = UV4DI, UV4DI, V4DI, USI + +/// lasx_xbnz_b +name = lasx_xbnz_b +asm-fmts = cd, xj +data-types = SI, UV32QI + +/// lasx_xbnz_d +name = lasx_xbnz_d +asm-fmts = cd, xj +data-types = SI, UV4DI + +/// lasx_xbnz_h +name = lasx_xbnz_h +asm-fmts = cd, xj +data-types = SI, UV16HI + +/// lasx_xbnz_v +name = lasx_xbnz_v +asm-fmts = cd, xj +data-types = SI, UV32QI + +/// lasx_xbnz_w +name = lasx_xbnz_w +asm-fmts = cd, xj +data-types = SI, UV8SI + +/// lasx_xbz_b +name = lasx_xbz_b +asm-fmts = cd, xj +data-types = SI, UV32QI + +/// lasx_xbz_d +name = lasx_xbz_d +asm-fmts = cd, xj +data-types = SI, UV4DI + +/// lasx_xbz_h +name = lasx_xbz_h +asm-fmts = cd, xj +data-types = SI, UV16HI + +/// lasx_xbz_v +name = lasx_xbz_v +asm-fmts = cd, xj +data-types = SI, UV32QI + +/// lasx_xbz_w +name = lasx_xbz_w +asm-fmts = cd, xj +data-types = SI, UV8SI + +/// lasx_xvfcmp_caf_d +name = lasx_xvfcmp_caf_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_caf_s +name = lasx_xvfcmp_caf_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_ceq_d +name = lasx_xvfcmp_ceq_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_ceq_s +name = lasx_xvfcmp_ceq_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_cle_d +name = lasx_xvfcmp_cle_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_cle_s +name = lasx_xvfcmp_cle_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_clt_d +name = lasx_xvfcmp_clt_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_clt_s +name = lasx_xvfcmp_clt_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_cne_d +name = lasx_xvfcmp_cne_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_cne_s +name = lasx_xvfcmp_cne_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_cor_d +name = lasx_xvfcmp_cor_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_cor_s +name = lasx_xvfcmp_cor_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_cueq_d +name = lasx_xvfcmp_cueq_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_cueq_s +name = lasx_xvfcmp_cueq_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_cule_d +name = lasx_xvfcmp_cule_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_cule_s +name = lasx_xvfcmp_cule_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_cult_d +name = lasx_xvfcmp_cult_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_cult_s +name = lasx_xvfcmp_cult_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_cun_d +name = lasx_xvfcmp_cun_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_cune_d +name = lasx_xvfcmp_cune_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_cune_s +name = lasx_xvfcmp_cune_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_cun_s +name = lasx_xvfcmp_cun_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_saf_d +name = lasx_xvfcmp_saf_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_saf_s +name = lasx_xvfcmp_saf_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_seq_d +name = lasx_xvfcmp_seq_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_seq_s +name = lasx_xvfcmp_seq_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_sle_d +name = lasx_xvfcmp_sle_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_sle_s +name = lasx_xvfcmp_sle_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_slt_d +name = lasx_xvfcmp_slt_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_slt_s +name = lasx_xvfcmp_slt_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_sne_d +name = lasx_xvfcmp_sne_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_sne_s +name = lasx_xvfcmp_sne_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_sor_d +name = lasx_xvfcmp_sor_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_sor_s +name = lasx_xvfcmp_sor_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_sueq_d +name = lasx_xvfcmp_sueq_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_sueq_s +name = lasx_xvfcmp_sueq_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_sule_d +name = lasx_xvfcmp_sule_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_sule_s +name = lasx_xvfcmp_sule_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_sult_d +name = lasx_xvfcmp_sult_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_sult_s +name = lasx_xvfcmp_sult_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_sun_d +name = lasx_xvfcmp_sun_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_sune_d +name = lasx_xvfcmp_sune_d +asm-fmts = xd, xj, xk +data-types = V4DI, V4DF, V4DF + +/// lasx_xvfcmp_sune_s +name = lasx_xvfcmp_sune_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvfcmp_sun_s +name = lasx_xvfcmp_sun_s +asm-fmts = xd, xj, xk +data-types = V8SI, V8SF, V8SF + +/// lasx_xvpickve_d_f +name = lasx_xvpickve_d_f +asm-fmts = xd, xj, ui2 +data-types = V4DF, V4DF, UQI + +/// lasx_xvpickve_w_f +name = lasx_xvpickve_w_f +asm-fmts = xd, xj, ui3 +data-types = V8SF, V8SF, UQI + +/// lasx_xvrepli_b +name = lasx_xvrepli_b +asm-fmts = xd, si10 +data-types = V32QI, HI + +/// lasx_xvrepli_d +name = lasx_xvrepli_d +asm-fmts = xd, si10 +data-types = V4DI, HI + +/// lasx_xvrepli_h +name = lasx_xvrepli_h +asm-fmts = xd, si10 +data-types = V16HI, HI + +/// lasx_xvrepli_w +name = lasx_xvrepli_w +asm-fmts = xd, si10 +data-types = V8SI, HI + diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lasxintrin.h b/library/stdarch/crates/stdarch-gen-loongarch/lasxintrin.h new file mode 100644 index 000000000000..c525b6106b89 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-loongarch/lasxintrin.h @@ -0,0 +1,5376 @@ +/* + * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lasxintrin.h;hb=61f1001f2f4ab9128e5eb6e9a4adbbb0f9f0bc75 + */ + +/* LARCH Loongson ASX intrinsics include file. + + Copyright (C) 2018-2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_LOONGSON_ASXINTRIN_H +#define _GCC_LOONGSON_ASXINTRIN_H 1 + +#if defined(__loongarch_asx) + +typedef signed char v32i8 __attribute__ ((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__ ((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__ ((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__ ((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__ ((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__ ((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__ ((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__ ((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__ ((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__ ((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__ ((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__ ((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__ ((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__ ((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__ ((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__ ((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__ ((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__ ((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__ ((vector_size(32), aligned(8))); +typedef float __m256 __attribute__ ((__vector_size__ (32), + __may_alias__)); +typedef long long __m256i __attribute__ ((__vector_size__ (32), + __may_alias__)); +typedef double __m256d __attribute__ ((__vector_size__ (32), + __may_alias__)); + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsll_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsll_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsll_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsll_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsll_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsll_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsll_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsll_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsra_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsra_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsra_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsra_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsra_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsra_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsra_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsra_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrar_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrar_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrar_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrar_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrar_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrar_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrar_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrar_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrl_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrl_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrl_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrl_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrl_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrl_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrl_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrl_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrlr_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrlr_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrlr_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrlr_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrlr_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrlr_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrlr_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrlr_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitclr_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitclr_b ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitclr_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitclr_h ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitclr_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitclr_w ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitclr_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitclr_d ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_b ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_h ((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_w ((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_d ((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitset_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitset_b ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitset_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitset_h ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitset_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitset_w ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitset_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitset_d ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_b ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_h ((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_w ((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_d ((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitrev_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitrev_b ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitrev_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitrev_h ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitrev_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitrev_w ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitrev_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvbitrev_d ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_b ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_h ((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_w ((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_d ((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvadd_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvadd_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvadd_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvadd_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvadd_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvadd_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvadd_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvadd_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_bu ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_hu ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_wu ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_du ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsub_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsub_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsub_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsub_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsub_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsub_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsub_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsub_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_bu ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_hu ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_wu ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_du ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmax_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmax_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmax_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmax_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmax_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmax_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmax_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmax_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmax_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmax_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmax_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmax_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmax_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmax_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmax_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmax_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_bu ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_hu ((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_wu ((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_du ((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmin_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmin_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmin_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmin_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmin_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmin_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmin_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmin_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmin_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmin_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmin_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmin_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmin_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmin_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmin_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmin_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_bu ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_hu ((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_wu ((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_du ((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvseq_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvseq_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvseq_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvseq_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvseq_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvseq_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvseq_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvseq_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvslt_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvslt_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvslt_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvslt_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvslt_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvslt_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvslt_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvslt_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvslt_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvslt_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvslt_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvslt_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvslt_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvslt_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvslt_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvslt_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, UV32QI, UQI. */ +#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_bu ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, UV16HI, UQI. */ +#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_hu ((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, UV8SI, UQI. */ +#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_wu ((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, UV4DI, UQI. */ +#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_du ((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsle_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsle_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsle_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsle_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsle_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsle_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsle_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsle_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsle_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsle_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsle_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsle_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsle_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsle_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsle_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsle_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, UV32QI, UQI. */ +#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_bu ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, UV16HI, UQI. */ +#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_hu ((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, UV8SI, UQI. */ +#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_wu ((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, UV4DI, UQI. */ +#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_du ((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_bu ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_hu ((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_wu ((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_du ((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvadda_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvadda_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvadda_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvadda_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvadda_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvadda_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvadda_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvadda_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsadd_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsadd_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsadd_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsadd_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsadd_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsadd_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsadd_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsadd_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsadd_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsadd_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsadd_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsadd_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsadd_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsadd_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsadd_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsadd_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavg_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavg_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavg_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavg_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavg_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavg_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavg_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavg_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavg_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavg_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavg_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavg_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavg_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavg_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavg_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavg_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavgr_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavgr_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavgr_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavgr_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavgr_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavgr_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavgr_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavgr_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavgr_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavgr_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavgr_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavgr_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavgr_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavgr_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvavgr_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvavgr_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssub_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssub_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssub_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssub_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssub_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssub_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssub_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssub_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssub_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssub_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssub_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssub_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssub_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssub_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssub_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssub_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvabsd_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvabsd_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvabsd_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvabsd_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvabsd_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvabsd_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvabsd_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvabsd_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvabsd_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvabsd_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvabsd_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvabsd_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvabsd_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvabsd_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvabsd_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvabsd_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmul_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmul_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmul_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmul_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmul_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmul_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmul_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmul_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmadd_b (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmadd_b ((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmadd_h (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmadd_h ((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmadd_w (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmadd_w ((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmadd_d (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmadd_d ((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmsub_b (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmsub_b ((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmsub_h (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmsub_h ((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmsub_w (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmsub_w ((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmsub_d (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmsub_d ((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvdiv_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvdiv_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvdiv_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvdiv_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvdiv_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvdiv_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvdiv_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvdiv_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvdiv_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvdiv_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvdiv_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvdiv_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvdiv_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvdiv_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvdiv_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvdiv_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhaddw_h_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhaddw_h_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhaddw_w_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhaddw_w_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhaddw_d_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhaddw_d_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhaddw_hu_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhaddw_hu_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhaddw_wu_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhaddw_wu_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhaddw_du_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhaddw_du_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhsubw_h_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhsubw_h_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhsubw_w_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhsubw_w_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhsubw_d_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhsubw_d_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhsubw_hu_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhsubw_hu_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhsubw_wu_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhsubw_wu_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhsubw_du_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhsubw_du_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmod_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmod_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmod_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmod_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmod_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmod_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmod_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmod_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmod_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmod_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmod_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmod_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmod_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmod_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmod_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmod_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui2. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui1. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpickev_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpickev_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpickev_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpickev_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpickev_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpickev_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpickev_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpickev_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpickod_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpickod_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpickod_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpickod_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpickod_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpickod_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpickod_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpickod_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvilvh_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvilvh_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvilvh_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvilvh_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvilvh_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvilvh_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvilvh_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvilvh_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvilvl_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvilvl_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvilvl_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvilvl_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvilvl_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvilvl_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvilvl_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvilvl_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpackev_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpackev_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpackev_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpackev_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpackev_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpackev_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpackev_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpackev_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpackod_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpackod_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpackod_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpackod_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpackod_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpackod_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpackod_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvpackod_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvshuf_b (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvshuf_b ((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvshuf_h (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvshuf_h ((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvshuf_w (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvshuf_w ((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvshuf_d (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvshuf_d ((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvand_v (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvand_v ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvandi_b ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvor_v (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvor_v ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvori_b ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvnor_v (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvnor_v ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvnori_b ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvxor_v (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvxor_v ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvxori_b ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvbitsel_v (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvbitsel_v ((v32u8)_1, (v32u8)_2, (v32u8)_3); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, USI. */ +#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvbitseli_b ((v32u8)(_1), (v32u8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V32QI, V32QI, USI. */ +#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V16HI, V16HI, USI. */ +#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V8SI, V8SI, USI. */ +#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, rj. */ +/* Data types in instruction templates: V32QI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplgr2vr_b (int _1) +{ + return (__m256i)__builtin_lasx_xvreplgr2vr_b ((int)_1); +} + +/* Assembly instruction format: xd, rj. */ +/* Data types in instruction templates: V16HI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplgr2vr_h (int _1) +{ + return (__m256i)__builtin_lasx_xvreplgr2vr_h ((int)_1); +} + +/* Assembly instruction format: xd, rj. */ +/* Data types in instruction templates: V8SI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplgr2vr_w (int _1) +{ + return (__m256i)__builtin_lasx_xvreplgr2vr_w ((int)_1); +} + +/* Assembly instruction format: xd, rj. */ +/* Data types in instruction templates: V4DI, DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplgr2vr_d (long int _1) +{ + return (__m256i)__builtin_lasx_xvreplgr2vr_d ((long int)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpcnt_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvpcnt_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpcnt_h (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvpcnt_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpcnt_w (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvpcnt_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvpcnt_d (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvpcnt_d ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvclo_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvclo_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvclo_h (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvclo_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvclo_w (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvclo_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvclo_d (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvclo_d ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvclz_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvclz_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvclz_h (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvclz_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvclz_w (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvclz_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvclz_d (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvclz_d ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfadd_s (__m256 _1, __m256 _2) +{ + return (__m256)__builtin_lasx_xvfadd_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfadd_d (__m256d _1, __m256d _2) +{ + return (__m256d)__builtin_lasx_xvfadd_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfsub_s (__m256 _1, __m256 _2) +{ + return (__m256)__builtin_lasx_xvfsub_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfsub_d (__m256d _1, __m256d _2) +{ + return (__m256d)__builtin_lasx_xvfsub_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfmul_s (__m256 _1, __m256 _2) +{ + return (__m256)__builtin_lasx_xvfmul_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfmul_d (__m256d _1, __m256d _2) +{ + return (__m256d)__builtin_lasx_xvfmul_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfdiv_s (__m256 _1, __m256 _2) +{ + return (__m256)__builtin_lasx_xvfdiv_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfdiv_d (__m256d _1, __m256d _2) +{ + return (__m256d)__builtin_lasx_xvfdiv_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcvt_h_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcvt_h_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfcvt_s_d (__m256d _1, __m256d _2) +{ + return (__m256)__builtin_lasx_xvfcvt_s_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfmin_s (__m256 _1, __m256 _2) +{ + return (__m256)__builtin_lasx_xvfmin_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfmin_d (__m256d _1, __m256d _2) +{ + return (__m256d)__builtin_lasx_xvfmin_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfmina_s (__m256 _1, __m256 _2) +{ + return (__m256)__builtin_lasx_xvfmina_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfmina_d (__m256d _1, __m256d _2) +{ + return (__m256d)__builtin_lasx_xvfmina_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfmax_s (__m256 _1, __m256 _2) +{ + return (__m256)__builtin_lasx_xvfmax_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfmax_d (__m256d _1, __m256d _2) +{ + return (__m256d)__builtin_lasx_xvfmax_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfmaxa_s (__m256 _1, __m256 _2) +{ + return (__m256)__builtin_lasx_xvfmaxa_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfmaxa_d (__m256d _1, __m256d _2) +{ + return (__m256d)__builtin_lasx_xvfmaxa_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfclass_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvfclass_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfclass_d (__m256d _1) +{ + return (__m256i)__builtin_lasx_xvfclass_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfsqrt_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfsqrt_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfsqrt_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfsqrt_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrecip_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrecip_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrecip_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1); +} + +#if defined(__loongarch_frecipe) +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrecipe_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrecipe_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrsqrte_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrsqrte_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1); +} +#endif + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrint_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrint_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrint_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrint_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrsqrt_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrsqrt_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrsqrt_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrsqrt_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvflogb_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvflogb_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvflogb_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvflogb_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfcvth_s_h (__m256i _1) +{ + return (__m256)__builtin_lasx_xvfcvth_s_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfcvth_d_s (__m256 _1) +{ + return (__m256d)__builtin_lasx_xvfcvth_d_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfcvtl_s_h (__m256i _1) +{ + return (__m256)__builtin_lasx_xvfcvtl_s_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfcvtl_d_s (__m256 _1) +{ + return (__m256d)__builtin_lasx_xvfcvtl_d_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftint_w_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftint_w_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftint_l_d (__m256d _1) +{ + return (__m256i)__builtin_lasx_xvftint_l_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV8SI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftint_wu_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftint_wu_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftint_lu_d (__m256d _1) +{ + return (__m256i)__builtin_lasx_xvftint_lu_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrz_w_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrz_w_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrz_l_d (__m256d _1) +{ + return (__m256i)__builtin_lasx_xvftintrz_l_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV8SI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrz_wu_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrz_wu_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrz_lu_d (__m256d _1) +{ + return (__m256i)__builtin_lasx_xvftintrz_lu_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvffint_s_w (__m256i _1) +{ + return (__m256)__builtin_lasx_xvffint_s_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvffint_d_l (__m256i _1) +{ + return (__m256d)__builtin_lasx_xvffint_d_l ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvffint_s_wu (__m256i _1) +{ + return (__m256)__builtin_lasx_xvffint_s_wu ((v8u32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvffint_d_lu (__m256i _1) +{ + return (__m256d)__builtin_lasx_xvffint_d_lu ((v4u64)_1); +} + +/* Assembly instruction format: xd, xj, rk. */ +/* Data types in instruction templates: V32QI, V32QI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplve_b (__m256i _1, int _2) +{ + return (__m256i)__builtin_lasx_xvreplve_b ((v32i8)_1, (int)_2); +} + +/* Assembly instruction format: xd, xj, rk. */ +/* Data types in instruction templates: V16HI, V16HI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplve_h (__m256i _1, int _2) +{ + return (__m256i)__builtin_lasx_xvreplve_h ((v16i16)_1, (int)_2); +} + +/* Assembly instruction format: xd, xj, rk. */ +/* Data types in instruction templates: V8SI, V8SI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplve_w (__m256i _1, int _2) +{ + return (__m256i)__builtin_lasx_xvreplve_w ((v8i32)_1, (int)_2); +} + +/* Assembly instruction format: xd, xj, rk. */ +/* Data types in instruction templates: V4DI, V4DI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplve_d (__m256i _1, int _2) +{ + return (__m256i)__builtin_lasx_xvreplve_d ((v4i64)_1, (int)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_w ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvandn_v (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvandn_v ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvneg_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvneg_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvneg_h (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvneg_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvneg_w (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvneg_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvneg_d (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvneg_d ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmuh_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmuh_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmuh_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmuh_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmuh_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmuh_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmuh_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmuh_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmuh_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmuh_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmuh_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmuh_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmuh_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmuh_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmuh_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmuh_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V16HI, V32QI, UQI. */ +#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_h_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V8SI, V16HI, UQI. */ +#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_w_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, V8SI, UQI. */ +#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_d_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV16HI, UV32QI, UQI. */ +#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_hu_bu ((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV8SI, UV16HI, UQI. */ +#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_wu_hu ((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV4DI, UV8SI, UQI. */ +#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_du_wu ((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsran_b_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsran_b_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsran_h_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsran_h_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsran_w_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsran_w_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssran_b_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssran_b_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssran_h_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssran_h_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssran_w_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssran_w_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssran_bu_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssran_bu_h ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssran_hu_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssran_hu_w ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssran_wu_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssran_wu_d ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrarn_b_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrarn_b_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrarn_h_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrarn_h_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrarn_w_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrarn_w_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrarn_b_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrarn_b_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrarn_h_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrarn_h_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrarn_w_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrarn_w_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrarn_bu_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrarn_bu_h ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrarn_hu_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrarn_hu_w ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrarn_wu_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrarn_wu_d ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrln_b_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrln_b_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrln_h_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrln_h_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrln_w_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrln_w_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrln_bu_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrln_bu_h ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrln_hu_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrln_hu_w ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrln_wu_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrln_wu_d ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrlrn_b_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrlrn_b_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrlrn_h_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrlrn_h_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsrlrn_w_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsrlrn_w_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrlrn_bu_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrlrn_bu_h ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrlrn_hu_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrlrn_hu_w ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrlrn_wu_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrlrn_wu_d ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, UQI. */ +#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_b ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, UQI. */ +#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_h ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfrstp_b (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvfrstp_b ((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfrstp_h (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvfrstp_h ((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvshuf4i_d ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsrl_v ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsll_v ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_b ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_h ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_w ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_d ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmskltz_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvmskltz_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmskltz_h (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvmskltz_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmskltz_w (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvmskltz_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmskltz_d (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvmskltz_d ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsigncov_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsigncov_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsigncov_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsigncov_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsigncov_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsigncov_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsigncov_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsigncov_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfmadd_s (__m256 _1, __m256 _2, __m256 _3) +{ + return (__m256)__builtin_lasx_xvfmadd_s ((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfmadd_d (__m256d _1, __m256d _2, __m256d _3) +{ + return (__m256d)__builtin_lasx_xvfmadd_d ((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfmsub_s (__m256 _1, __m256 _2, __m256 _3) +{ + return (__m256)__builtin_lasx_xvfmsub_s ((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfmsub_d (__m256d _1, __m256d _2, __m256d _3) +{ + return (__m256d)__builtin_lasx_xvfmsub_d ((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfnmadd_s (__m256 _1, __m256 _2, __m256 _3) +{ + return (__m256)__builtin_lasx_xvfnmadd_s ((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfnmadd_d (__m256d _1, __m256d _2, __m256d _3) +{ + return (__m256d)__builtin_lasx_xvfnmadd_d ((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfnmsub_s (__m256 _1, __m256 _2, __m256 _3) +{ + return (__m256)__builtin_lasx_xvfnmsub_s ((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfnmsub_d (__m256d _1, __m256d _2, __m256d _3) +{ + return (__m256d)__builtin_lasx_xvfnmsub_d ((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrne_w_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrne_w_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrne_l_d (__m256d _1) +{ + return (__m256i)__builtin_lasx_xvftintrne_l_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrp_w_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrp_w_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrp_l_d (__m256d _1) +{ + return (__m256i)__builtin_lasx_xvftintrp_l_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrm_w_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrm_w_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrm_l_d (__m256d _1) +{ + return (__m256i)__builtin_lasx_xvftintrm_l_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftint_w_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvftint_w_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvffint_s_l (__m256i _1, __m256i _2) +{ + return (__m256)__builtin_lasx_xvffint_s_l ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrz_w_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvftintrz_w_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrp_w_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvftintrp_w_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrm_w_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvftintrm_w_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrne_w_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvftintrne_w_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftinth_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftinth_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintl_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintl_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvffinth_d_w (__m256i _1) +{ + return (__m256d)__builtin_lasx_xvffinth_d_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvffintl_d_w (__m256i _1) +{ + return (__m256d)__builtin_lasx_xvffintl_d_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrzh_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrzh_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrzl_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrzl_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrph_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrph_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrpl_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrpl_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrmh_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrmh_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrml_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrml_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrneh_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrneh_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvftintrnel_l_s (__m256 _1) +{ + return (__m256i)__builtin_lasx_xvftintrnel_l_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrintrne_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrintrne_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrintrne_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrintrne_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrintrz_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrintrz_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrintrz_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrintrz_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrintrp_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrintrp_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrintrp_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrintrp_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrintrm_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrintrm_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrintrm_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrintrm_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, rj, si12. */ +/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ +#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvld ((void *)(_1), (_2))) + +/* Assembly instruction format: xd, rj, si12. */ +/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI. */ +#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lasx_xvst ((v32i8)(_1), (void *)(_2), (_3))) + +/* Assembly instruction format: xd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI, UQI. */ +#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_b ((v32i8)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: xd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V16HI, CVPOINTER, SI, UQI. */ +#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_h ((v16i16)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: xd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V8SI, CVPOINTER, SI, UQI. */ +#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_w ((v8i32)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: xd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V4DI, CVPOINTER, SI, UQI. */ +#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_d ((v4i64)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, UQI. */ +#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_w ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui2. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, UQI. */ +#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_d ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui2. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrlrn_b_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrlrn_b_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrlrn_h_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrlrn_h_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrlrn_w_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrlrn_w_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrln_b_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrln_b_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrln_h_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrln_h_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvssrln_w_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvssrln_w_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvorn_v (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvorn_v ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, i13. */ +/* Data types in instruction templates: V4DI, HI. */ +#define __lasx_xvldi(/*i13*/ _1) \ + ((__m256i)__builtin_lasx_xvldi ((_1))) + +/* Assembly instruction format: xd, rj, rk. */ +/* Data types in instruction templates: V32QI, CVPOINTER, DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvldx (void * _1, long int _2) +{ + return (__m256i)__builtin_lasx_xvldx ((void *)_1, (long int)_2); +} + +/* Assembly instruction format: xd, rj, rk. */ +/* Data types in instruction templates: VOID, V32QI, CVPOINTER, DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __lasx_xvstx (__m256i _1, void * _2, long int _3) +{ + return (void)__builtin_lasx_xvstx ((v32i8)_1, (void *)_2, (long int)_3); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvextl_qu_du (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvextl_qu_du ((v4u64)_1); +} + +/* Assembly instruction format: xd, rj, ui3. */ +/* Data types in instruction templates: V8SI, V8SI, SI, UQI. */ +#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_w ((v8i32)(_1), (int)(_2), (_3))) + +/* Assembly instruction format: xd, rj, ui2. */ +/* Data types in instruction templates: V4DI, V4DI, DI, UQI. */ +#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_d ((v4i64)(_1), (long int)(_2), (_3))) + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplve0_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvreplve0_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplve0_h (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvreplve0_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplve0_w (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvreplve0_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplve0_d (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvreplve0_d ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvreplve0_q (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvreplve0_q ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_h_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_h_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_w_h (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_w_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_d_w (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_d_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_w_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_w_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_d_h (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_d_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_d_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_d_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_hu_bu (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_hu_bu ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_wu_hu (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_wu_hu ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_du_wu (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_du_wu ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_wu_bu (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_wu_bu ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_du_hu (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_du_hu ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_vext2xv_du_bu (__m256i _1) +{ + return (__m256i)__builtin_lasx_vext2xv_du_bu ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_q ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V4DI, V4DI, USI. */ +#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvpermi_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvperm_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvperm_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, rj, si12. */ +/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ +#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_b ((void *)(_1), (_2))) + +/* Assembly instruction format: xd, rj, si11. */ +/* Data types in instruction templates: V16HI, CVPOINTER, SI. */ +#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_h ((void *)(_1), (_2))) + +/* Assembly instruction format: xd, rj, si10. */ +/* Data types in instruction templates: V8SI, CVPOINTER, SI. */ +#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_w ((void *)(_1), (_2))) + +/* Assembly instruction format: xd, rj, si9. */ +/* Data types in instruction templates: V4DI, CVPOINTER, SI. */ +#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_d ((void *)(_1), (_2))) + +/* Assembly instruction format: rd, xj, ui3. */ +/* Data types in instruction templates: SI, V8SI, UQI. */ +#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lasx_xvpickve2gr_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: rd, xj, ui3. */ +/* Data types in instruction templates: USI, V8SI, UQI. */ +#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lasx_xvpickve2gr_wu ((v8i32)(_1), (_2))) + +/* Assembly instruction format: rd, xj, ui2. */ +/* Data types in instruction templates: DI, V4DI, UQI. */ +#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((long int)__builtin_lasx_xvpickve2gr_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: rd, xj, ui2. */ +/* Data types in instruction templates: UDI, V4DI, UQI. */ +#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ + ((unsigned long int)__builtin_lasx_xvpickve2gr_du ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_q_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_q_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_d_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_d_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_w_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_w_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_h_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_h_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_q_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_q_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_d_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_d_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_w_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_w_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_h_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_h_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwev_q_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwev_q_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwev_d_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwev_d_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwev_w_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwev_w_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwev_h_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwev_h_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwev_q_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwev_q_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwev_d_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwev_d_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwev_w_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwev_w_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwev_h_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwev_h_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_q_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_q_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_d_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_d_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_w_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_w_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_h_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_h_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_q_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_q_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_d_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_d_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_w_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_w_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_h_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_h_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_q_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_q_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_d_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_d_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_w_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_w_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_h_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_h_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_q_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_q_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_d_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_d_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_w_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_w_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_h_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_h_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwod_q_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwod_q_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwod_d_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwod_d_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwod_w_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwod_w_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwod_h_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwod_h_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwod_q_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwod_q_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwod_d_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwod_d_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwod_w_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwod_w_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsubwod_h_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsubwod_h_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_q_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_q_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_d_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_d_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_w_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_w_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_h_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_h_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_q_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_q_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_d_wu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_d_wu ((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_w_hu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_w_hu ((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_h_bu (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_h_bu ((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_d_wu_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_d_wu_w ((v8u32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_w_hu_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_w_hu_h ((v16u16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_h_bu_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_h_bu_b ((v32u8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_d_wu_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_d_wu_w ((v8u32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_w_hu_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_w_hu_h ((v16u16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_h_bu_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_h_bu_b ((v32u8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_d_wu_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_d_wu_w ((v8u32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_w_hu_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_w_hu_h ((v16u16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_h_bu_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_h_bu_b ((v32u8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_d_wu_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_d_wu_w ((v8u32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_w_hu_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_w_hu_h ((v16u16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_h_bu_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_h_bu_b ((v32u8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhaddw_q_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhaddw_q_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhaddw_qu_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhaddw_qu_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhsubw_q_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhsubw_q_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvhsubw_qu_du (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvhsubw_qu_du ((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_q_d (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_q_d ((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_d_w (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_d_w ((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_w_h (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_w_h ((v8i32)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_h_b (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_h_b ((v16i16)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_q_du (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_q_du ((v4u64)_1, (v4u64)_2, (v4u64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_d_wu (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_d_wu ((v4u64)_1, (v8u32)_2, (v8u32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_w_hu (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_w_hu ((v8u32)_1, (v16u16)_2, (v16u16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_h_bu (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_h_bu ((v16u16)_1, (v32u8)_2, (v32u8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_q_d (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_q_d ((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_d_w (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_d_w ((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_w_h (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_w_h ((v8i32)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_h_b (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_h_b ((v16i16)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_q_du (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_q_du ((v4u64)_1, (v4u64)_2, (v4u64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_d_wu (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_d_wu ((v4u64)_1, (v8u32)_2, (v8u32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_w_hu (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_w_hu ((v8u32)_1, (v16u16)_2, (v16u16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_h_bu (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_h_bu ((v16u16)_1, (v32u8)_2, (v32u8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_q_du_d (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_q_du_d ((v4i64)_1, (v4u64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_d_wu_w (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w ((v4i64)_1, (v8u32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_w_hu_h (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h ((v8i32)_1, (v16u16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwev_h_bu_b (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b ((v16i16)_1, (v32u8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_q_du_d (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_q_du_d ((v4i64)_1, (v4u64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_d_wu_w (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w ((v4i64)_1, (v8u32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_w_hu_h (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h ((v8i32)_1, (v16u16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmaddwod_h_bu_b (__m256i _1, __m256i _2, __m256i _3) +{ + return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b ((v16i16)_1, (v32u8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvrotr_b (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvrotr_b ((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvrotr_h (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvrotr_h ((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvrotr_w (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvrotr_w ((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvrotr_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvrotr_d ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvadd_q (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvadd_q ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvsub_q (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvsub_q ((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwev_q_du_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwev_q_du_d ((v4u64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvaddwod_q_du_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvaddwod_q_du_d ((v4u64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwev_q_du_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwev_q_du_d ((v4u64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmulwod_q_du_d (__m256i _1, __m256i _2) +{ + return (__m256i)__builtin_lasx_xvmulwod_q_du_d ((v4u64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmskgez_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvmskgez_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvmsknz_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvmsknz_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvexth_h_b (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvexth_h_b ((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvexth_w_h (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvexth_w_h ((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvexth_d_w (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvexth_d_w ((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvexth_q_d (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvexth_q_d ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV16HI, UV32QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvexth_hu_bu (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvexth_hu_bu ((v32u8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV8SI, UV16HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvexth_wu_hu (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvexth_wu_hu ((v16u16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, UV8SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvexth_du_wu (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvexth_du_wu ((v8u32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, UV4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvexth_qu_du (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvexth_qu_du ((v4u64)_1); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_b ((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_h ((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_w ((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_d ((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvextl_q_d (__m256i _1) +{ + return (__m256i)__builtin_lasx_xvextl_q_d ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_b_h ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_h_w ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_w_d ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_d_q ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_b_h ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_h_w ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_w_d ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_d_q ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_b_h ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_h_w ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_w_d ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_d_q ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ +#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_bu_h ((v32u8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ +#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_hu_w ((v16u16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ +#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_wu_d ((v8u32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ +#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_du_q ((v4u64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_b_h ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_h_w ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_w_d ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_d_q ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ +#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_bu_h ((v32u8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ +#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_hu_w ((v16u16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ +#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_wu_d ((v8u32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ +#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_du_q ((v4u64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_b_h ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_h_w ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_w_d ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_d_q ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_b_h ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_h_w ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_w_d ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_d_q ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_b_h ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_h_w ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_w_d ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_d_q ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ +#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_bu_h ((v32u8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ +#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_hu_w ((v16u16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ +#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_wu_d ((v8u32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ +#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_du_q ((v4u64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_b_h ((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_h_w ((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_w_d ((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_d_q ((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ +#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_bu_h ((v32u8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ +#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_hu_w ((v16u16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ +#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_wu_d ((v8u32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ +#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_du_q ((v4u64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV32QI. */ +#define __lasx_xbnz_b(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbnz_b ((v32u8)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV4DI. */ +#define __lasx_xbnz_d(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbnz_d ((v4u64)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV16HI. */ +#define __lasx_xbnz_h(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbnz_h ((v16u16)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV32QI. */ +#define __lasx_xbnz_v(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbnz_v ((v32u8)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV8SI. */ +#define __lasx_xbnz_w(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbnz_w ((v8u32)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV32QI. */ +#define __lasx_xbz_b(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbz_b ((v32u8)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV4DI. */ +#define __lasx_xbz_d(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbz_d ((v4u64)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV16HI. */ +#define __lasx_xbz_h(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbz_h ((v16u16)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV32QI. */ +#define __lasx_xbz_v(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbz_v ((v32u8)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV8SI. */ +#define __lasx_xbz_w(/*__m256i*/ _1) \ + ((int)__builtin_lasx_xbz_w ((v8u32)(_1))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_caf_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_caf_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_caf_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_caf_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_ceq_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_ceq_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_ceq_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_ceq_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cle_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cle_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cle_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cle_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_clt_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_clt_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_clt_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_clt_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cne_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cne_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cne_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cne_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cor_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cor_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cor_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cor_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cueq_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cueq_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cueq_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cueq_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cule_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cule_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cule_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cule_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cult_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cult_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cult_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cult_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cun_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cun_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cune_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cune_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cune_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cune_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_cun_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_cun_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_saf_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_saf_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_saf_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_saf_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_seq_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_seq_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_seq_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_seq_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sle_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sle_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sle_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sle_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_slt_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_slt_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_slt_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_slt_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sne_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sne_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sne_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sne_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sor_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sor_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sor_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sor_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sueq_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sueq_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sueq_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sueq_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sule_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sule_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sule_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sule_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sult_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sult_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sult_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sult_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sun_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sun_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sune_d (__m256d _1, __m256d _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sune_d ((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sune_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sune_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_xvfcmp_sun_s (__m256 _1, __m256 _2) +{ + return (__m256i)__builtin_lasx_xvfcmp_sun_s ((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, ui2. */ +/* Data types in instruction templates: V4DF, V4DF, UQI. */ +#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ + ((__m256d)__builtin_lasx_xvpickve_d_f ((v4f64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V8SF, V8SF, UQI. */ +#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ + ((__m256)__builtin_lasx_xvpickve_w_f ((v8f32)(_1), (_2))) + +/* Assembly instruction format: xd, si10. */ +/* Data types in instruction templates: V32QI, HI. */ +#define __lasx_xvrepli_b(/*si10*/ _1) \ + ((__m256i)__builtin_lasx_xvrepli_b ((_1))) + +/* Assembly instruction format: xd, si10. */ +/* Data types in instruction templates: V4DI, HI. */ +#define __lasx_xvrepli_d(/*si10*/ _1) \ + ((__m256i)__builtin_lasx_xvrepli_d ((_1))) + +/* Assembly instruction format: xd, si10. */ +/* Data types in instruction templates: V16HI, HI. */ +#define __lasx_xvrepli_h(/*si10*/ _1) \ + ((__m256i)__builtin_lasx_xvrepli_h ((_1))) + +/* Assembly instruction format: xd, si10. */ +/* Data types in instruction templates: V8SI, HI. */ +#define __lasx_xvrepli_w(/*si10*/ _1) \ + ((__m256i)__builtin_lasx_xvrepli_w ((_1))) + +#endif /* defined(__loongarch_asx). */ +#endif /* _GCC_LOONGSON_ASXINTRIN_H. */ diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec new file mode 100644 index 000000000000..dc835770d566 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec @@ -0,0 +1,3605 @@ +// This code is automatically generated. DO NOT MODIFY. +// ``` +// OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsxintrin.h +// ``` + +/// lsx_vsll_b +name = lsx_vsll_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vsll_h +name = lsx_vsll_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vsll_w +name = lsx_vsll_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vsll_d +name = lsx_vsll_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vslli_b +name = lsx_vslli_b +asm-fmts = vd, vj, ui3 +data-types = V16QI, V16QI, UQI + +/// lsx_vslli_h +name = lsx_vslli_h +asm-fmts = vd, vj, ui4 +data-types = V8HI, V8HI, UQI + +/// lsx_vslli_w +name = lsx_vslli_w +asm-fmts = vd, vj, ui5 +data-types = V4SI, V4SI, UQI + +/// lsx_vslli_d +name = lsx_vslli_d +asm-fmts = vd, vj, ui6 +data-types = V2DI, V2DI, UQI + +/// lsx_vsra_b +name = lsx_vsra_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vsra_h +name = lsx_vsra_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vsra_w +name = lsx_vsra_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vsra_d +name = lsx_vsra_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsrai_b +name = lsx_vsrai_b +asm-fmts = vd, vj, ui3 +data-types = V16QI, V16QI, UQI + +/// lsx_vsrai_h +name = lsx_vsrai_h +asm-fmts = vd, vj, ui4 +data-types = V8HI, V8HI, UQI + +/// lsx_vsrai_w +name = lsx_vsrai_w +asm-fmts = vd, vj, ui5 +data-types = V4SI, V4SI, UQI + +/// lsx_vsrai_d +name = lsx_vsrai_d +asm-fmts = vd, vj, ui6 +data-types = V2DI, V2DI, UQI + +/// lsx_vsrar_b +name = lsx_vsrar_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vsrar_h +name = lsx_vsrar_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vsrar_w +name = lsx_vsrar_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vsrar_d +name = lsx_vsrar_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsrari_b +name = lsx_vsrari_b +asm-fmts = vd, vj, ui3 +data-types = V16QI, V16QI, UQI + +/// lsx_vsrari_h +name = lsx_vsrari_h +asm-fmts = vd, vj, ui4 +data-types = V8HI, V8HI, UQI + +/// lsx_vsrari_w +name = lsx_vsrari_w +asm-fmts = vd, vj, ui5 +data-types = V4SI, V4SI, UQI + +/// lsx_vsrari_d +name = lsx_vsrari_d +asm-fmts = vd, vj, ui6 +data-types = V2DI, V2DI, UQI + +/// lsx_vsrl_b +name = lsx_vsrl_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vsrl_h +name = lsx_vsrl_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vsrl_w +name = lsx_vsrl_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vsrl_d +name = lsx_vsrl_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsrli_b +name = lsx_vsrli_b +asm-fmts = vd, vj, ui3 +data-types = V16QI, V16QI, UQI + +/// lsx_vsrli_h +name = lsx_vsrli_h +asm-fmts = vd, vj, ui4 +data-types = V8HI, V8HI, UQI + +/// lsx_vsrli_w +name = lsx_vsrli_w +asm-fmts = vd, vj, ui5 +data-types = V4SI, V4SI, UQI + +/// lsx_vsrli_d +name = lsx_vsrli_d +asm-fmts = vd, vj, ui6 +data-types = V2DI, V2DI, UQI + +/// lsx_vsrlr_b +name = lsx_vsrlr_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vsrlr_h +name = lsx_vsrlr_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vsrlr_w +name = lsx_vsrlr_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vsrlr_d +name = lsx_vsrlr_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsrlri_b +name = lsx_vsrlri_b +asm-fmts = vd, vj, ui3 +data-types = V16QI, V16QI, UQI + +/// lsx_vsrlri_h +name = lsx_vsrlri_h +asm-fmts = vd, vj, ui4 +data-types = V8HI, V8HI, UQI + +/// lsx_vsrlri_w +name = lsx_vsrlri_w +asm-fmts = vd, vj, ui5 +data-types = V4SI, V4SI, UQI + +/// lsx_vsrlri_d +name = lsx_vsrlri_d +asm-fmts = vd, vj, ui6 +data-types = V2DI, V2DI, UQI + +/// lsx_vbitclr_b +name = lsx_vbitclr_b +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vbitclr_h +name = lsx_vbitclr_h +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vbitclr_w +name = lsx_vbitclr_w +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vbitclr_d +name = lsx_vbitclr_d +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vbitclri_b +name = lsx_vbitclri_b +asm-fmts = vd, vj, ui3 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vbitclri_h +name = lsx_vbitclri_h +asm-fmts = vd, vj, ui4 +data-types = UV8HI, UV8HI, UQI + +/// lsx_vbitclri_w +name = lsx_vbitclri_w +asm-fmts = vd, vj, ui5 +data-types = UV4SI, UV4SI, UQI + +/// lsx_vbitclri_d +name = lsx_vbitclri_d +asm-fmts = vd, vj, ui6 +data-types = UV2DI, UV2DI, UQI + +/// lsx_vbitset_b +name = lsx_vbitset_b +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vbitset_h +name = lsx_vbitset_h +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vbitset_w +name = lsx_vbitset_w +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vbitset_d +name = lsx_vbitset_d +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vbitseti_b +name = lsx_vbitseti_b +asm-fmts = vd, vj, ui3 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vbitseti_h +name = lsx_vbitseti_h +asm-fmts = vd, vj, ui4 +data-types = UV8HI, UV8HI, UQI + +/// lsx_vbitseti_w +name = lsx_vbitseti_w +asm-fmts = vd, vj, ui5 +data-types = UV4SI, UV4SI, UQI + +/// lsx_vbitseti_d +name = lsx_vbitseti_d +asm-fmts = vd, vj, ui6 +data-types = UV2DI, UV2DI, UQI + +/// lsx_vbitrev_b +name = lsx_vbitrev_b +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vbitrev_h +name = lsx_vbitrev_h +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vbitrev_w +name = lsx_vbitrev_w +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vbitrev_d +name = lsx_vbitrev_d +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vbitrevi_b +name = lsx_vbitrevi_b +asm-fmts = vd, vj, ui3 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vbitrevi_h +name = lsx_vbitrevi_h +asm-fmts = vd, vj, ui4 +data-types = UV8HI, UV8HI, UQI + +/// lsx_vbitrevi_w +name = lsx_vbitrevi_w +asm-fmts = vd, vj, ui5 +data-types = UV4SI, UV4SI, UQI + +/// lsx_vbitrevi_d +name = lsx_vbitrevi_d +asm-fmts = vd, vj, ui6 +data-types = UV2DI, UV2DI, UQI + +/// lsx_vadd_b +name = lsx_vadd_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vadd_h +name = lsx_vadd_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vadd_w +name = lsx_vadd_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vadd_d +name = lsx_vadd_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vaddi_bu +name = lsx_vaddi_bu +asm-fmts = vd, vj, ui5 +data-types = V16QI, V16QI, UQI + +/// lsx_vaddi_hu +name = lsx_vaddi_hu +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, UQI + +/// lsx_vaddi_wu +name = lsx_vaddi_wu +asm-fmts = vd, vj, ui5 +data-types = V4SI, V4SI, UQI + +/// lsx_vaddi_du +name = lsx_vaddi_du +asm-fmts = vd, vj, ui5 +data-types = V2DI, V2DI, UQI + +/// lsx_vsub_b +name = lsx_vsub_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vsub_h +name = lsx_vsub_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vsub_w +name = lsx_vsub_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vsub_d +name = lsx_vsub_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsubi_bu +name = lsx_vsubi_bu +asm-fmts = vd, vj, ui5 +data-types = V16QI, V16QI, UQI + +/// lsx_vsubi_hu +name = lsx_vsubi_hu +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, UQI + +/// lsx_vsubi_wu +name = lsx_vsubi_wu +asm-fmts = vd, vj, ui5 +data-types = V4SI, V4SI, UQI + +/// lsx_vsubi_du +name = lsx_vsubi_du +asm-fmts = vd, vj, ui5 +data-types = V2DI, V2DI, UQI + +/// lsx_vmax_b +name = lsx_vmax_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vmax_h +name = lsx_vmax_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vmax_w +name = lsx_vmax_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vmax_d +name = lsx_vmax_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vmaxi_b +name = lsx_vmaxi_b +asm-fmts = vd, vj, si5 +data-types = V16QI, V16QI, QI + +/// lsx_vmaxi_h +name = lsx_vmaxi_h +asm-fmts = vd, vj, si5 +data-types = V8HI, V8HI, QI + +/// lsx_vmaxi_w +name = lsx_vmaxi_w +asm-fmts = vd, vj, si5 +data-types = V4SI, V4SI, QI + +/// lsx_vmaxi_d +name = lsx_vmaxi_d +asm-fmts = vd, vj, si5 +data-types = V2DI, V2DI, QI + +/// lsx_vmax_bu +name = lsx_vmax_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vmax_hu +name = lsx_vmax_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vmax_wu +name = lsx_vmax_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vmax_du +name = lsx_vmax_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vmaxi_bu +name = lsx_vmaxi_bu +asm-fmts = vd, vj, ui5 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vmaxi_hu +name = lsx_vmaxi_hu +asm-fmts = vd, vj, ui5 +data-types = UV8HI, UV8HI, UQI + +/// lsx_vmaxi_wu +name = lsx_vmaxi_wu +asm-fmts = vd, vj, ui5 +data-types = UV4SI, UV4SI, UQI + +/// lsx_vmaxi_du +name = lsx_vmaxi_du +asm-fmts = vd, vj, ui5 +data-types = UV2DI, UV2DI, UQI + +/// lsx_vmin_b +name = lsx_vmin_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vmin_h +name = lsx_vmin_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vmin_w +name = lsx_vmin_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vmin_d +name = lsx_vmin_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vmini_b +name = lsx_vmini_b +asm-fmts = vd, vj, si5 +data-types = V16QI, V16QI, QI + +/// lsx_vmini_h +name = lsx_vmini_h +asm-fmts = vd, vj, si5 +data-types = V8HI, V8HI, QI + +/// lsx_vmini_w +name = lsx_vmini_w +asm-fmts = vd, vj, si5 +data-types = V4SI, V4SI, QI + +/// lsx_vmini_d +name = lsx_vmini_d +asm-fmts = vd, vj, si5 +data-types = V2DI, V2DI, QI + +/// lsx_vmin_bu +name = lsx_vmin_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vmin_hu +name = lsx_vmin_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vmin_wu +name = lsx_vmin_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vmin_du +name = lsx_vmin_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vmini_bu +name = lsx_vmini_bu +asm-fmts = vd, vj, ui5 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vmini_hu +name = lsx_vmini_hu +asm-fmts = vd, vj, ui5 +data-types = UV8HI, UV8HI, UQI + +/// lsx_vmini_wu +name = lsx_vmini_wu +asm-fmts = vd, vj, ui5 +data-types = UV4SI, UV4SI, UQI + +/// lsx_vmini_du +name = lsx_vmini_du +asm-fmts = vd, vj, ui5 +data-types = UV2DI, UV2DI, UQI + +/// lsx_vseq_b +name = lsx_vseq_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vseq_h +name = lsx_vseq_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vseq_w +name = lsx_vseq_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vseq_d +name = lsx_vseq_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vseqi_b +name = lsx_vseqi_b +asm-fmts = vd, vj, si5 +data-types = V16QI, V16QI, QI + +/// lsx_vseqi_h +name = lsx_vseqi_h +asm-fmts = vd, vj, si5 +data-types = V8HI, V8HI, QI + +/// lsx_vseqi_w +name = lsx_vseqi_w +asm-fmts = vd, vj, si5 +data-types = V4SI, V4SI, QI + +/// lsx_vseqi_d +name = lsx_vseqi_d +asm-fmts = vd, vj, si5 +data-types = V2DI, V2DI, QI + +/// lsx_vslti_b +name = lsx_vslti_b +asm-fmts = vd, vj, si5 +data-types = V16QI, V16QI, QI + +/// lsx_vslt_b +name = lsx_vslt_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vslt_h +name = lsx_vslt_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vslt_w +name = lsx_vslt_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vslt_d +name = lsx_vslt_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vslti_h +name = lsx_vslti_h +asm-fmts = vd, vj, si5 +data-types = V8HI, V8HI, QI + +/// lsx_vslti_w +name = lsx_vslti_w +asm-fmts = vd, vj, si5 +data-types = V4SI, V4SI, QI + +/// lsx_vslti_d +name = lsx_vslti_d +asm-fmts = vd, vj, si5 +data-types = V2DI, V2DI, QI + +/// lsx_vslt_bu +name = lsx_vslt_bu +asm-fmts = vd, vj, vk +data-types = V16QI, UV16QI, UV16QI + +/// lsx_vslt_hu +name = lsx_vslt_hu +asm-fmts = vd, vj, vk +data-types = V8HI, UV8HI, UV8HI + +/// lsx_vslt_wu +name = lsx_vslt_wu +asm-fmts = vd, vj, vk +data-types = V4SI, UV4SI, UV4SI + +/// lsx_vslt_du +name = lsx_vslt_du +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, UV2DI + +/// lsx_vslti_bu +name = lsx_vslti_bu +asm-fmts = vd, vj, ui5 +data-types = V16QI, UV16QI, UQI + +/// lsx_vslti_hu +name = lsx_vslti_hu +asm-fmts = vd, vj, ui5 +data-types = V8HI, UV8HI, UQI + +/// lsx_vslti_wu +name = lsx_vslti_wu +asm-fmts = vd, vj, ui5 +data-types = V4SI, UV4SI, UQI + +/// lsx_vslti_du +name = lsx_vslti_du +asm-fmts = vd, vj, ui5 +data-types = V2DI, UV2DI, UQI + +/// lsx_vsle_b +name = lsx_vsle_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vsle_h +name = lsx_vsle_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vsle_w +name = lsx_vsle_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vsle_d +name = lsx_vsle_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vslei_b +name = lsx_vslei_b +asm-fmts = vd, vj, si5 +data-types = V16QI, V16QI, QI + +/// lsx_vslei_h +name = lsx_vslei_h +asm-fmts = vd, vj, si5 +data-types = V8HI, V8HI, QI + +/// lsx_vslei_w +name = lsx_vslei_w +asm-fmts = vd, vj, si5 +data-types = V4SI, V4SI, QI + +/// lsx_vslei_d +name = lsx_vslei_d +asm-fmts = vd, vj, si5 +data-types = V2DI, V2DI, QI + +/// lsx_vsle_bu +name = lsx_vsle_bu +asm-fmts = vd, vj, vk +data-types = V16QI, UV16QI, UV16QI + +/// lsx_vsle_hu +name = lsx_vsle_hu +asm-fmts = vd, vj, vk +data-types = V8HI, UV8HI, UV8HI + +/// lsx_vsle_wu +name = lsx_vsle_wu +asm-fmts = vd, vj, vk +data-types = V4SI, UV4SI, UV4SI + +/// lsx_vsle_du +name = lsx_vsle_du +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, UV2DI + +/// lsx_vslei_bu +name = lsx_vslei_bu +asm-fmts = vd, vj, ui5 +data-types = V16QI, UV16QI, UQI + +/// lsx_vslei_hu +name = lsx_vslei_hu +asm-fmts = vd, vj, ui5 +data-types = V8HI, UV8HI, UQI + +/// lsx_vslei_wu +name = lsx_vslei_wu +asm-fmts = vd, vj, ui5 +data-types = V4SI, UV4SI, UQI + +/// lsx_vslei_du +name = lsx_vslei_du +asm-fmts = vd, vj, ui5 +data-types = V2DI, UV2DI, UQI + +/// lsx_vsat_b +name = lsx_vsat_b +asm-fmts = vd, vj, ui3 +data-types = V16QI, V16QI, UQI + +/// lsx_vsat_h +name = lsx_vsat_h +asm-fmts = vd, vj, ui4 +data-types = V8HI, V8HI, UQI + +/// lsx_vsat_w +name = lsx_vsat_w +asm-fmts = vd, vj, ui5 +data-types = V4SI, V4SI, UQI + +/// lsx_vsat_d +name = lsx_vsat_d +asm-fmts = vd, vj, ui6 +data-types = V2DI, V2DI, UQI + +/// lsx_vsat_bu +name = lsx_vsat_bu +asm-fmts = vd, vj, ui3 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vsat_hu +name = lsx_vsat_hu +asm-fmts = vd, vj, ui4 +data-types = UV8HI, UV8HI, UQI + +/// lsx_vsat_wu +name = lsx_vsat_wu +asm-fmts = vd, vj, ui5 +data-types = UV4SI, UV4SI, UQI + +/// lsx_vsat_du +name = lsx_vsat_du +asm-fmts = vd, vj, ui6 +data-types = UV2DI, UV2DI, UQI + +/// lsx_vadda_b +name = lsx_vadda_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vadda_h +name = lsx_vadda_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vadda_w +name = lsx_vadda_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vadda_d +name = lsx_vadda_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsadd_b +name = lsx_vsadd_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vsadd_h +name = lsx_vsadd_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vsadd_w +name = lsx_vsadd_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vsadd_d +name = lsx_vsadd_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsadd_bu +name = lsx_vsadd_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vsadd_hu +name = lsx_vsadd_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vsadd_wu +name = lsx_vsadd_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vsadd_du +name = lsx_vsadd_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vavg_b +name = lsx_vavg_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vavg_h +name = lsx_vavg_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vavg_w +name = lsx_vavg_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vavg_d +name = lsx_vavg_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vavg_bu +name = lsx_vavg_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vavg_hu +name = lsx_vavg_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vavg_wu +name = lsx_vavg_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vavg_du +name = lsx_vavg_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vavgr_b +name = lsx_vavgr_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vavgr_h +name = lsx_vavgr_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vavgr_w +name = lsx_vavgr_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vavgr_d +name = lsx_vavgr_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vavgr_bu +name = lsx_vavgr_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vavgr_hu +name = lsx_vavgr_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vavgr_wu +name = lsx_vavgr_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vavgr_du +name = lsx_vavgr_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vssub_b +name = lsx_vssub_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vssub_h +name = lsx_vssub_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vssub_w +name = lsx_vssub_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vssub_d +name = lsx_vssub_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vssub_bu +name = lsx_vssub_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vssub_hu +name = lsx_vssub_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vssub_wu +name = lsx_vssub_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vssub_du +name = lsx_vssub_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vabsd_b +name = lsx_vabsd_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vabsd_h +name = lsx_vabsd_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vabsd_w +name = lsx_vabsd_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vabsd_d +name = lsx_vabsd_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vabsd_bu +name = lsx_vabsd_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vabsd_hu +name = lsx_vabsd_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vabsd_wu +name = lsx_vabsd_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vabsd_du +name = lsx_vabsd_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vmul_b +name = lsx_vmul_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vmul_h +name = lsx_vmul_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vmul_w +name = lsx_vmul_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vmul_d +name = lsx_vmul_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vmadd_b +name = lsx_vmadd_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI, V16QI + +/// lsx_vmadd_h +name = lsx_vmadd_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI, V8HI + +/// lsx_vmadd_w +name = lsx_vmadd_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI, V4SI + +/// lsx_vmadd_d +name = lsx_vmadd_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI, V2DI + +/// lsx_vmsub_b +name = lsx_vmsub_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI, V16QI + +/// lsx_vmsub_h +name = lsx_vmsub_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI, V8HI + +/// lsx_vmsub_w +name = lsx_vmsub_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI, V4SI + +/// lsx_vmsub_d +name = lsx_vmsub_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI, V2DI + +/// lsx_vdiv_b +name = lsx_vdiv_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vdiv_h +name = lsx_vdiv_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vdiv_w +name = lsx_vdiv_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vdiv_d +name = lsx_vdiv_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vdiv_bu +name = lsx_vdiv_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vdiv_hu +name = lsx_vdiv_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vdiv_wu +name = lsx_vdiv_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vdiv_du +name = lsx_vdiv_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vhaddw_h_b +name = lsx_vhaddw_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V16QI, V16QI + +/// lsx_vhaddw_w_h +name = lsx_vhaddw_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V8HI, V8HI + +/// lsx_vhaddw_d_w +name = lsx_vhaddw_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V4SI, V4SI + +/// lsx_vhaddw_hu_bu +name = lsx_vhaddw_hu_bu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV16QI, UV16QI + +/// lsx_vhaddw_wu_hu +name = lsx_vhaddw_wu_hu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV8HI, UV8HI + +/// lsx_vhaddw_du_wu +name = lsx_vhaddw_du_wu +asm-fmts = vd, vj, vk +data-types = UV2DI, UV4SI, UV4SI + +/// lsx_vhsubw_h_b +name = lsx_vhsubw_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V16QI, V16QI + +/// lsx_vhsubw_w_h +name = lsx_vhsubw_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V8HI, V8HI + +/// lsx_vhsubw_d_w +name = lsx_vhsubw_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V4SI, V4SI + +/// lsx_vhsubw_hu_bu +name = lsx_vhsubw_hu_bu +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, UV16QI + +/// lsx_vhsubw_wu_hu +name = lsx_vhsubw_wu_hu +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, UV8HI + +/// lsx_vhsubw_du_wu +name = lsx_vhsubw_du_wu +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, UV4SI + +/// lsx_vmod_b +name = lsx_vmod_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vmod_h +name = lsx_vmod_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vmod_w +name = lsx_vmod_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vmod_d +name = lsx_vmod_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vmod_bu +name = lsx_vmod_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vmod_hu +name = lsx_vmod_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vmod_wu +name = lsx_vmod_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vmod_du +name = lsx_vmod_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vreplve_b +name = lsx_vreplve_b +asm-fmts = vd, vj, rk +data-types = V16QI, V16QI, SI + +/// lsx_vreplve_h +name = lsx_vreplve_h +asm-fmts = vd, vj, rk +data-types = V8HI, V8HI, SI + +/// lsx_vreplve_w +name = lsx_vreplve_w +asm-fmts = vd, vj, rk +data-types = V4SI, V4SI, SI + +/// lsx_vreplve_d +name = lsx_vreplve_d +asm-fmts = vd, vj, rk +data-types = V2DI, V2DI, SI + +/// lsx_vreplvei_b +name = lsx_vreplvei_b +asm-fmts = vd, vj, ui4 +data-types = V16QI, V16QI, UQI + +/// lsx_vreplvei_h +name = lsx_vreplvei_h +asm-fmts = vd, vj, ui3 +data-types = V8HI, V8HI, UQI + +/// lsx_vreplvei_w +name = lsx_vreplvei_w +asm-fmts = vd, vj, ui2 +data-types = V4SI, V4SI, UQI + +/// lsx_vreplvei_d +name = lsx_vreplvei_d +asm-fmts = vd, vj, ui1 +data-types = V2DI, V2DI, UQI + +/// lsx_vpickev_b +name = lsx_vpickev_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vpickev_h +name = lsx_vpickev_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vpickev_w +name = lsx_vpickev_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vpickev_d +name = lsx_vpickev_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vpickod_b +name = lsx_vpickod_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vpickod_h +name = lsx_vpickod_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vpickod_w +name = lsx_vpickod_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vpickod_d +name = lsx_vpickod_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vilvh_b +name = lsx_vilvh_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vilvh_h +name = lsx_vilvh_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vilvh_w +name = lsx_vilvh_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vilvh_d +name = lsx_vilvh_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vilvl_b +name = lsx_vilvl_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vilvl_h +name = lsx_vilvl_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vilvl_w +name = lsx_vilvl_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vilvl_d +name = lsx_vilvl_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vpackev_b +name = lsx_vpackev_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vpackev_h +name = lsx_vpackev_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vpackev_w +name = lsx_vpackev_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vpackev_d +name = lsx_vpackev_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vpackod_b +name = lsx_vpackod_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vpackod_h +name = lsx_vpackod_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vpackod_w +name = lsx_vpackod_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vpackod_d +name = lsx_vpackod_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vshuf_h +name = lsx_vshuf_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI, V8HI + +/// lsx_vshuf_w +name = lsx_vshuf_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI, V4SI + +/// lsx_vshuf_d +name = lsx_vshuf_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI, V2DI + +/// lsx_vand_v +name = lsx_vand_v +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vandi_b +name = lsx_vandi_b +asm-fmts = vd, vj, ui8 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vor_v +name = lsx_vor_v +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vori_b +name = lsx_vori_b +asm-fmts = vd, vj, ui8 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vnor_v +name = lsx_vnor_v +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vnori_b +name = lsx_vnori_b +asm-fmts = vd, vj, ui8 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vxor_v +name = lsx_vxor_v +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vxori_b +name = lsx_vxori_b +asm-fmts = vd, vj, ui8 +data-types = UV16QI, UV16QI, UQI + +/// lsx_vbitsel_v +name = lsx_vbitsel_v +asm-fmts = vd, vj, vk, va +data-types = UV16QI, UV16QI, UV16QI, UV16QI + +/// lsx_vbitseli_b +name = lsx_vbitseli_b +asm-fmts = vd, vj, ui8 +data-types = UV16QI, UV16QI, UV16QI, USI + +/// lsx_vshuf4i_b +name = lsx_vshuf4i_b +asm-fmts = vd, vj, ui8 +data-types = V16QI, V16QI, USI + +/// lsx_vshuf4i_h +name = lsx_vshuf4i_h +asm-fmts = vd, vj, ui8 +data-types = V8HI, V8HI, USI + +/// lsx_vshuf4i_w +name = lsx_vshuf4i_w +asm-fmts = vd, vj, ui8 +data-types = V4SI, V4SI, USI + +/// lsx_vreplgr2vr_b +name = lsx_vreplgr2vr_b +asm-fmts = vd, rj +data-types = V16QI, SI + +/// lsx_vreplgr2vr_h +name = lsx_vreplgr2vr_h +asm-fmts = vd, rj +data-types = V8HI, SI + +/// lsx_vreplgr2vr_w +name = lsx_vreplgr2vr_w +asm-fmts = vd, rj +data-types = V4SI, SI + +/// lsx_vreplgr2vr_d +name = lsx_vreplgr2vr_d +asm-fmts = vd, rj +data-types = V2DI, DI + +/// lsx_vpcnt_b +name = lsx_vpcnt_b +asm-fmts = vd, vj +data-types = V16QI, V16QI + +/// lsx_vpcnt_h +name = lsx_vpcnt_h +asm-fmts = vd, vj +data-types = V8HI, V8HI + +/// lsx_vpcnt_w +name = lsx_vpcnt_w +asm-fmts = vd, vj +data-types = V4SI, V4SI + +/// lsx_vpcnt_d +name = lsx_vpcnt_d +asm-fmts = vd, vj +data-types = V2DI, V2DI + +/// lsx_vclo_b +name = lsx_vclo_b +asm-fmts = vd, vj +data-types = V16QI, V16QI + +/// lsx_vclo_h +name = lsx_vclo_h +asm-fmts = vd, vj +data-types = V8HI, V8HI + +/// lsx_vclo_w +name = lsx_vclo_w +asm-fmts = vd, vj +data-types = V4SI, V4SI + +/// lsx_vclo_d +name = lsx_vclo_d +asm-fmts = vd, vj +data-types = V2DI, V2DI + +/// lsx_vclz_b +name = lsx_vclz_b +asm-fmts = vd, vj +data-types = V16QI, V16QI + +/// lsx_vclz_h +name = lsx_vclz_h +asm-fmts = vd, vj +data-types = V8HI, V8HI + +/// lsx_vclz_w +name = lsx_vclz_w +asm-fmts = vd, vj +data-types = V4SI, V4SI + +/// lsx_vclz_d +name = lsx_vclz_d +asm-fmts = vd, vj +data-types = V2DI, V2DI + +/// lsx_vpickve2gr_b +name = lsx_vpickve2gr_b +asm-fmts = rd, vj, ui4 +data-types = SI, V16QI, UQI + +/// lsx_vpickve2gr_h +name = lsx_vpickve2gr_h +asm-fmts = rd, vj, ui3 +data-types = SI, V8HI, UQI + +/// lsx_vpickve2gr_w +name = lsx_vpickve2gr_w +asm-fmts = rd, vj, ui2 +data-types = SI, V4SI, UQI + +/// lsx_vpickve2gr_d +name = lsx_vpickve2gr_d +asm-fmts = rd, vj, ui1 +data-types = DI, V2DI, UQI + +/// lsx_vpickve2gr_bu +name = lsx_vpickve2gr_bu +asm-fmts = rd, vj, ui4 +data-types = USI, V16QI, UQI + +/// lsx_vpickve2gr_hu +name = lsx_vpickve2gr_hu +asm-fmts = rd, vj, ui3 +data-types = USI, V8HI, UQI + +/// lsx_vpickve2gr_wu +name = lsx_vpickve2gr_wu +asm-fmts = rd, vj, ui2 +data-types = USI, V4SI, UQI + +/// lsx_vpickve2gr_du +name = lsx_vpickve2gr_du +asm-fmts = rd, vj, ui1 +data-types = UDI, V2DI, UQI + +/// lsx_vinsgr2vr_b +name = lsx_vinsgr2vr_b +asm-fmts = vd, rj, ui4 +data-types = V16QI, V16QI, SI, UQI + +/// lsx_vinsgr2vr_h +name = lsx_vinsgr2vr_h +asm-fmts = vd, rj, ui3 +data-types = V8HI, V8HI, SI, UQI + +/// lsx_vinsgr2vr_w +name = lsx_vinsgr2vr_w +asm-fmts = vd, rj, ui2 +data-types = V4SI, V4SI, SI, UQI + +/// lsx_vinsgr2vr_d +name = lsx_vinsgr2vr_d +asm-fmts = vd, rj, ui1 +data-types = V2DI, V2DI, DI, UQI + +/// lsx_vfadd_s +name = lsx_vfadd_s +asm-fmts = vd, vj, vk +data-types = V4SF, V4SF, V4SF + +/// lsx_vfadd_d +name = lsx_vfadd_d +asm-fmts = vd, vj, vk +data-types = V2DF, V2DF, V2DF + +/// lsx_vfsub_s +name = lsx_vfsub_s +asm-fmts = vd, vj, vk +data-types = V4SF, V4SF, V4SF + +/// lsx_vfsub_d +name = lsx_vfsub_d +asm-fmts = vd, vj, vk +data-types = V2DF, V2DF, V2DF + +/// lsx_vfmul_s +name = lsx_vfmul_s +asm-fmts = vd, vj, vk +data-types = V4SF, V4SF, V4SF + +/// lsx_vfmul_d +name = lsx_vfmul_d +asm-fmts = vd, vj, vk +data-types = V2DF, V2DF, V2DF + +/// lsx_vfdiv_s +name = lsx_vfdiv_s +asm-fmts = vd, vj, vk +data-types = V4SF, V4SF, V4SF + +/// lsx_vfdiv_d +name = lsx_vfdiv_d +asm-fmts = vd, vj, vk +data-types = V2DF, V2DF, V2DF + +/// lsx_vfcvt_h_s +name = lsx_vfcvt_h_s +asm-fmts = vd, vj, vk +data-types = V8HI, V4SF, V4SF + +/// lsx_vfcvt_s_d +name = lsx_vfcvt_s_d +asm-fmts = vd, vj, vk +data-types = V4SF, V2DF, V2DF + +/// lsx_vfmin_s +name = lsx_vfmin_s +asm-fmts = vd, vj, vk +data-types = V4SF, V4SF, V4SF + +/// lsx_vfmin_d +name = lsx_vfmin_d +asm-fmts = vd, vj, vk +data-types = V2DF, V2DF, V2DF + +/// lsx_vfmina_s +name = lsx_vfmina_s +asm-fmts = vd, vj, vk +data-types = V4SF, V4SF, V4SF + +/// lsx_vfmina_d +name = lsx_vfmina_d +asm-fmts = vd, vj, vk +data-types = V2DF, V2DF, V2DF + +/// lsx_vfmax_s +name = lsx_vfmax_s +asm-fmts = vd, vj, vk +data-types = V4SF, V4SF, V4SF + +/// lsx_vfmax_d +name = lsx_vfmax_d +asm-fmts = vd, vj, vk +data-types = V2DF, V2DF, V2DF + +/// lsx_vfmaxa_s +name = lsx_vfmaxa_s +asm-fmts = vd, vj, vk +data-types = V4SF, V4SF, V4SF + +/// lsx_vfmaxa_d +name = lsx_vfmaxa_d +asm-fmts = vd, vj, vk +data-types = V2DF, V2DF, V2DF + +/// lsx_vfclass_s +name = lsx_vfclass_s +asm-fmts = vd, vj +data-types = V4SI, V4SF + +/// lsx_vfclass_d +name = lsx_vfclass_d +asm-fmts = vd, vj +data-types = V2DI, V2DF + +/// lsx_vfsqrt_s +name = lsx_vfsqrt_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfsqrt_d +name = lsx_vfsqrt_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vfrecip_s +name = lsx_vfrecip_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfrecip_d +name = lsx_vfrecip_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vfrecipe_s +name = lsx_vfrecipe_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfrecipe_d +name = lsx_vfrecipe_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vfrsqrte_s +name = lsx_vfrsqrte_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfrsqrte_d +name = lsx_vfrsqrte_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vfrint_s +name = lsx_vfrint_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfrint_d +name = lsx_vfrint_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vfrsqrt_s +name = lsx_vfrsqrt_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfrsqrt_d +name = lsx_vfrsqrt_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vflogb_s +name = lsx_vflogb_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vflogb_d +name = lsx_vflogb_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vfcvth_s_h +name = lsx_vfcvth_s_h +asm-fmts = vd, vj +data-types = V4SF, V8HI + +/// lsx_vfcvth_d_s +name = lsx_vfcvth_d_s +asm-fmts = vd, vj +data-types = V2DF, V4SF + +/// lsx_vfcvtl_s_h +name = lsx_vfcvtl_s_h +asm-fmts = vd, vj +data-types = V4SF, V8HI + +/// lsx_vfcvtl_d_s +name = lsx_vfcvtl_d_s +asm-fmts = vd, vj +data-types = V2DF, V4SF + +/// lsx_vftint_w_s +name = lsx_vftint_w_s +asm-fmts = vd, vj +data-types = V4SI, V4SF + +/// lsx_vftint_l_d +name = lsx_vftint_l_d +asm-fmts = vd, vj +data-types = V2DI, V2DF + +/// lsx_vftint_wu_s +name = lsx_vftint_wu_s +asm-fmts = vd, vj +data-types = UV4SI, V4SF + +/// lsx_vftint_lu_d +name = lsx_vftint_lu_d +asm-fmts = vd, vj +data-types = UV2DI, V2DF + +/// lsx_vftintrz_w_s +name = lsx_vftintrz_w_s +asm-fmts = vd, vj +data-types = V4SI, V4SF + +/// lsx_vftintrz_l_d +name = lsx_vftintrz_l_d +asm-fmts = vd, vj +data-types = V2DI, V2DF + +/// lsx_vftintrz_wu_s +name = lsx_vftintrz_wu_s +asm-fmts = vd, vj +data-types = UV4SI, V4SF + +/// lsx_vftintrz_lu_d +name = lsx_vftintrz_lu_d +asm-fmts = vd, vj +data-types = UV2DI, V2DF + +/// lsx_vffint_s_w +name = lsx_vffint_s_w +asm-fmts = vd, vj +data-types = V4SF, V4SI + +/// lsx_vffint_d_l +name = lsx_vffint_d_l +asm-fmts = vd, vj +data-types = V2DF, V2DI + +/// lsx_vffint_s_wu +name = lsx_vffint_s_wu +asm-fmts = vd, vj +data-types = V4SF, UV4SI + +/// lsx_vffint_d_lu +name = lsx_vffint_d_lu +asm-fmts = vd, vj +data-types = V2DF, UV2DI + +/// lsx_vandn_v +name = lsx_vandn_v +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vneg_b +name = lsx_vneg_b +asm-fmts = vd, vj +data-types = V16QI, V16QI + +/// lsx_vneg_h +name = lsx_vneg_h +asm-fmts = vd, vj +data-types = V8HI, V8HI + +/// lsx_vneg_w +name = lsx_vneg_w +asm-fmts = vd, vj +data-types = V4SI, V4SI + +/// lsx_vneg_d +name = lsx_vneg_d +asm-fmts = vd, vj +data-types = V2DI, V2DI + +/// lsx_vmuh_b +name = lsx_vmuh_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vmuh_h +name = lsx_vmuh_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vmuh_w +name = lsx_vmuh_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vmuh_d +name = lsx_vmuh_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vmuh_bu +name = lsx_vmuh_bu +asm-fmts = vd, vj, vk +data-types = UV16QI, UV16QI, UV16QI + +/// lsx_vmuh_hu +name = lsx_vmuh_hu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV8HI + +/// lsx_vmuh_wu +name = lsx_vmuh_wu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV4SI + +/// lsx_vmuh_du +name = lsx_vmuh_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vsllwil_h_b +name = lsx_vsllwil_h_b +asm-fmts = vd, vj, ui3 +data-types = V8HI, V16QI, UQI + +/// lsx_vsllwil_w_h +name = lsx_vsllwil_w_h +asm-fmts = vd, vj, ui4 +data-types = V4SI, V8HI, UQI + +/// lsx_vsllwil_d_w +name = lsx_vsllwil_d_w +asm-fmts = vd, vj, ui5 +data-types = V2DI, V4SI, UQI + +/// lsx_vsllwil_hu_bu +name = lsx_vsllwil_hu_bu +asm-fmts = vd, vj, ui3 +data-types = UV8HI, UV16QI, UQI + +/// lsx_vsllwil_wu_hu +name = lsx_vsllwil_wu_hu +asm-fmts = vd, vj, ui4 +data-types = UV4SI, UV8HI, UQI + +/// lsx_vsllwil_du_wu +name = lsx_vsllwil_du_wu +asm-fmts = vd, vj, ui5 +data-types = UV2DI, UV4SI, UQI + +/// lsx_vsran_b_h +name = lsx_vsran_b_h +asm-fmts = vd, vj, vk +data-types = V16QI, V8HI, V8HI + +/// lsx_vsran_h_w +name = lsx_vsran_h_w +asm-fmts = vd, vj, vk +data-types = V8HI, V4SI, V4SI + +/// lsx_vsran_w_d +name = lsx_vsran_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DI, V2DI + +/// lsx_vssran_b_h +name = lsx_vssran_b_h +asm-fmts = vd, vj, vk +data-types = V16QI, V8HI, V8HI + +/// lsx_vssran_h_w +name = lsx_vssran_h_w +asm-fmts = vd, vj, vk +data-types = V8HI, V4SI, V4SI + +/// lsx_vssran_w_d +name = lsx_vssran_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DI, V2DI + +/// lsx_vssran_bu_h +name = lsx_vssran_bu_h +asm-fmts = vd, vj, vk +data-types = UV16QI, UV8HI, UV8HI + +/// lsx_vssran_hu_w +name = lsx_vssran_hu_w +asm-fmts = vd, vj, vk +data-types = UV8HI, UV4SI, UV4SI + +/// lsx_vssran_wu_d +name = lsx_vssran_wu_d +asm-fmts = vd, vj, vk +data-types = UV4SI, UV2DI, UV2DI + +/// lsx_vsrarn_b_h +name = lsx_vsrarn_b_h +asm-fmts = vd, vj, vk +data-types = V16QI, V8HI, V8HI + +/// lsx_vsrarn_h_w +name = lsx_vsrarn_h_w +asm-fmts = vd, vj, vk +data-types = V8HI, V4SI, V4SI + +/// lsx_vsrarn_w_d +name = lsx_vsrarn_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DI, V2DI + +/// lsx_vssrarn_b_h +name = lsx_vssrarn_b_h +asm-fmts = vd, vj, vk +data-types = V16QI, V8HI, V8HI + +/// lsx_vssrarn_h_w +name = lsx_vssrarn_h_w +asm-fmts = vd, vj, vk +data-types = V8HI, V4SI, V4SI + +/// lsx_vssrarn_w_d +name = lsx_vssrarn_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DI, V2DI + +/// lsx_vssrarn_bu_h +name = lsx_vssrarn_bu_h +asm-fmts = vd, vj, vk +data-types = UV16QI, UV8HI, UV8HI + +/// lsx_vssrarn_hu_w +name = lsx_vssrarn_hu_w +asm-fmts = vd, vj, vk +data-types = UV8HI, UV4SI, UV4SI + +/// lsx_vssrarn_wu_d +name = lsx_vssrarn_wu_d +asm-fmts = vd, vj, vk +data-types = UV4SI, UV2DI, UV2DI + +/// lsx_vsrln_b_h +name = lsx_vsrln_b_h +asm-fmts = vd, vj, vk +data-types = V16QI, V8HI, V8HI + +/// lsx_vsrln_h_w +name = lsx_vsrln_h_w +asm-fmts = vd, vj, vk +data-types = V8HI, V4SI, V4SI + +/// lsx_vsrln_w_d +name = lsx_vsrln_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DI, V2DI + +/// lsx_vssrln_bu_h +name = lsx_vssrln_bu_h +asm-fmts = vd, vj, vk +data-types = UV16QI, UV8HI, UV8HI + +/// lsx_vssrln_hu_w +name = lsx_vssrln_hu_w +asm-fmts = vd, vj, vk +data-types = UV8HI, UV4SI, UV4SI + +/// lsx_vssrln_wu_d +name = lsx_vssrln_wu_d +asm-fmts = vd, vj, vk +data-types = UV4SI, UV2DI, UV2DI + +/// lsx_vsrlrn_b_h +name = lsx_vsrlrn_b_h +asm-fmts = vd, vj, vk +data-types = V16QI, V8HI, V8HI + +/// lsx_vsrlrn_h_w +name = lsx_vsrlrn_h_w +asm-fmts = vd, vj, vk +data-types = V8HI, V4SI, V4SI + +/// lsx_vsrlrn_w_d +name = lsx_vsrlrn_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DI, V2DI + +/// lsx_vssrlrn_bu_h +name = lsx_vssrlrn_bu_h +asm-fmts = vd, vj, vk +data-types = UV16QI, UV8HI, UV8HI + +/// lsx_vssrlrn_hu_w +name = lsx_vssrlrn_hu_w +asm-fmts = vd, vj, vk +data-types = UV8HI, UV4SI, UV4SI + +/// lsx_vssrlrn_wu_d +name = lsx_vssrlrn_wu_d +asm-fmts = vd, vj, vk +data-types = UV4SI, UV2DI, UV2DI + +/// lsx_vfrstpi_b +name = lsx_vfrstpi_b +asm-fmts = vd, vj, ui5 +data-types = V16QI, V16QI, V16QI, UQI + +/// lsx_vfrstpi_h +name = lsx_vfrstpi_h +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, V8HI, UQI + +/// lsx_vfrstp_b +name = lsx_vfrstp_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI, V16QI + +/// lsx_vfrstp_h +name = lsx_vfrstp_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI, V8HI + +/// lsx_vshuf4i_d +name = lsx_vshuf4i_d +asm-fmts = vd, vj, ui8 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vbsrl_v +name = lsx_vbsrl_v +asm-fmts = vd, vj, ui5 +data-types = V16QI, V16QI, UQI + +/// lsx_vbsll_v +name = lsx_vbsll_v +asm-fmts = vd, vj, ui5 +data-types = V16QI, V16QI, UQI + +/// lsx_vextrins_b +name = lsx_vextrins_b +asm-fmts = vd, vj, ui8 +data-types = V16QI, V16QI, V16QI, USI + +/// lsx_vextrins_h +name = lsx_vextrins_h +asm-fmts = vd, vj, ui8 +data-types = V8HI, V8HI, V8HI, USI + +/// lsx_vextrins_w +name = lsx_vextrins_w +asm-fmts = vd, vj, ui8 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vextrins_d +name = lsx_vextrins_d +asm-fmts = vd, vj, ui8 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vmskltz_b +name = lsx_vmskltz_b +asm-fmts = vd, vj +data-types = V16QI, V16QI + +/// lsx_vmskltz_h +name = lsx_vmskltz_h +asm-fmts = vd, vj +data-types = V8HI, V8HI + +/// lsx_vmskltz_w +name = lsx_vmskltz_w +asm-fmts = vd, vj +data-types = V4SI, V4SI + +/// lsx_vmskltz_d +name = lsx_vmskltz_d +asm-fmts = vd, vj +data-types = V2DI, V2DI + +/// lsx_vsigncov_b +name = lsx_vsigncov_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vsigncov_h +name = lsx_vsigncov_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vsigncov_w +name = lsx_vsigncov_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vsigncov_d +name = lsx_vsigncov_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vfmadd_s +name = lsx_vfmadd_s +asm-fmts = vd, vj, vk, va +data-types = V4SF, V4SF, V4SF, V4SF + +/// lsx_vfmadd_d +name = lsx_vfmadd_d +asm-fmts = vd, vj, vk, va +data-types = V2DF, V2DF, V2DF, V2DF + +/// lsx_vfmsub_s +name = lsx_vfmsub_s +asm-fmts = vd, vj, vk, va +data-types = V4SF, V4SF, V4SF, V4SF + +/// lsx_vfmsub_d +name = lsx_vfmsub_d +asm-fmts = vd, vj, vk, va +data-types = V2DF, V2DF, V2DF, V2DF + +/// lsx_vfnmadd_s +name = lsx_vfnmadd_s +asm-fmts = vd, vj, vk, va +data-types = V4SF, V4SF, V4SF, V4SF + +/// lsx_vfnmadd_d +name = lsx_vfnmadd_d +asm-fmts = vd, vj, vk, va +data-types = V2DF, V2DF, V2DF, V2DF + +/// lsx_vfnmsub_s +name = lsx_vfnmsub_s +asm-fmts = vd, vj, vk, va +data-types = V4SF, V4SF, V4SF, V4SF + +/// lsx_vfnmsub_d +name = lsx_vfnmsub_d +asm-fmts = vd, vj, vk, va +data-types = V2DF, V2DF, V2DF, V2DF + +/// lsx_vftintrne_w_s +name = lsx_vftintrne_w_s +asm-fmts = vd, vj +data-types = V4SI, V4SF + +/// lsx_vftintrne_l_d +name = lsx_vftintrne_l_d +asm-fmts = vd, vj +data-types = V2DI, V2DF + +/// lsx_vftintrp_w_s +name = lsx_vftintrp_w_s +asm-fmts = vd, vj +data-types = V4SI, V4SF + +/// lsx_vftintrp_l_d +name = lsx_vftintrp_l_d +asm-fmts = vd, vj +data-types = V2DI, V2DF + +/// lsx_vftintrm_w_s +name = lsx_vftintrm_w_s +asm-fmts = vd, vj +data-types = V4SI, V4SF + +/// lsx_vftintrm_l_d +name = lsx_vftintrm_l_d +asm-fmts = vd, vj +data-types = V2DI, V2DF + +/// lsx_vftint_w_d +name = lsx_vftint_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DF, V2DF + +/// lsx_vffint_s_l +name = lsx_vffint_s_l +asm-fmts = vd, vj, vk +data-types = V4SF, V2DI, V2DI + +/// lsx_vftintrz_w_d +name = lsx_vftintrz_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DF, V2DF + +/// lsx_vftintrp_w_d +name = lsx_vftintrp_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DF, V2DF + +/// lsx_vftintrm_w_d +name = lsx_vftintrm_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DF, V2DF + +/// lsx_vftintrne_w_d +name = lsx_vftintrne_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DF, V2DF + +/// lsx_vftintl_l_s +name = lsx_vftintl_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vftinth_l_s +name = lsx_vftinth_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vffinth_d_w +name = lsx_vffinth_d_w +asm-fmts = vd, vj +data-types = V2DF, V4SI + +/// lsx_vffintl_d_w +name = lsx_vffintl_d_w +asm-fmts = vd, vj +data-types = V2DF, V4SI + +/// lsx_vftintrzl_l_s +name = lsx_vftintrzl_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vftintrzh_l_s +name = lsx_vftintrzh_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vftintrpl_l_s +name = lsx_vftintrpl_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vftintrph_l_s +name = lsx_vftintrph_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vftintrml_l_s +name = lsx_vftintrml_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vftintrmh_l_s +name = lsx_vftintrmh_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vftintrnel_l_s +name = lsx_vftintrnel_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vftintrneh_l_s +name = lsx_vftintrneh_l_s +asm-fmts = vd, vj +data-types = V2DI, V4SF + +/// lsx_vfrintrne_s +name = lsx_vfrintrne_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfrintrne_d +name = lsx_vfrintrne_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vfrintrz_s +name = lsx_vfrintrz_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfrintrz_d +name = lsx_vfrintrz_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vfrintrp_s +name = lsx_vfrintrp_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfrintrp_d +name = lsx_vfrintrp_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vfrintrm_s +name = lsx_vfrintrm_s +asm-fmts = vd, vj +data-types = V4SF, V4SF + +/// lsx_vfrintrm_d +name = lsx_vfrintrm_d +asm-fmts = vd, vj +data-types = V2DF, V2DF + +/// lsx_vstelm_b +name = lsx_vstelm_b +asm-fmts = vd, rj, si8, idx +data-types = VOID, V16QI, CVPOINTER, SI, UQI + +/// lsx_vstelm_h +name = lsx_vstelm_h +asm-fmts = vd, rj, si8, idx +data-types = VOID, V8HI, CVPOINTER, SI, UQI + +/// lsx_vstelm_w +name = lsx_vstelm_w +asm-fmts = vd, rj, si8, idx +data-types = VOID, V4SI, CVPOINTER, SI, UQI + +/// lsx_vstelm_d +name = lsx_vstelm_d +asm-fmts = vd, rj, si8, idx +data-types = VOID, V2DI, CVPOINTER, SI, UQI + +/// lsx_vaddwev_d_w +name = lsx_vaddwev_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V4SI, V4SI + +/// lsx_vaddwev_w_h +name = lsx_vaddwev_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V8HI, V8HI + +/// lsx_vaddwev_h_b +name = lsx_vaddwev_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V16QI, V16QI + +/// lsx_vaddwod_d_w +name = lsx_vaddwod_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V4SI, V4SI + +/// lsx_vaddwod_w_h +name = lsx_vaddwod_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V8HI, V8HI + +/// lsx_vaddwod_h_b +name = lsx_vaddwod_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V16QI, V16QI + +/// lsx_vaddwev_d_wu +name = lsx_vaddwev_d_wu +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, UV4SI + +/// lsx_vaddwev_w_hu +name = lsx_vaddwev_w_hu +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, UV8HI + +/// lsx_vaddwev_h_bu +name = lsx_vaddwev_h_bu +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, UV16QI + +/// lsx_vaddwod_d_wu +name = lsx_vaddwod_d_wu +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, UV4SI + +/// lsx_vaddwod_w_hu +name = lsx_vaddwod_w_hu +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, UV8HI + +/// lsx_vaddwod_h_bu +name = lsx_vaddwod_h_bu +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, UV16QI + +/// lsx_vaddwev_d_wu_w +name = lsx_vaddwev_d_wu_w +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, V4SI + +/// lsx_vaddwev_w_hu_h +name = lsx_vaddwev_w_hu_h +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, V8HI + +/// lsx_vaddwev_h_bu_b +name = lsx_vaddwev_h_bu_b +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, V16QI + +/// lsx_vaddwod_d_wu_w +name = lsx_vaddwod_d_wu_w +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, V4SI + +/// lsx_vaddwod_w_hu_h +name = lsx_vaddwod_w_hu_h +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, V8HI + +/// lsx_vaddwod_h_bu_b +name = lsx_vaddwod_h_bu_b +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, V16QI + +/// lsx_vsubwev_d_w +name = lsx_vsubwev_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V4SI, V4SI + +/// lsx_vsubwev_w_h +name = lsx_vsubwev_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V8HI, V8HI + +/// lsx_vsubwev_h_b +name = lsx_vsubwev_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V16QI, V16QI + +/// lsx_vsubwod_d_w +name = lsx_vsubwod_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V4SI, V4SI + +/// lsx_vsubwod_w_h +name = lsx_vsubwod_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V8HI, V8HI + +/// lsx_vsubwod_h_b +name = lsx_vsubwod_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V16QI, V16QI + +/// lsx_vsubwev_d_wu +name = lsx_vsubwev_d_wu +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, UV4SI + +/// lsx_vsubwev_w_hu +name = lsx_vsubwev_w_hu +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, UV8HI + +/// lsx_vsubwev_h_bu +name = lsx_vsubwev_h_bu +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, UV16QI + +/// lsx_vsubwod_d_wu +name = lsx_vsubwod_d_wu +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, UV4SI + +/// lsx_vsubwod_w_hu +name = lsx_vsubwod_w_hu +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, UV8HI + +/// lsx_vsubwod_h_bu +name = lsx_vsubwod_h_bu +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, UV16QI + +/// lsx_vaddwev_q_d +name = lsx_vaddwev_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vaddwod_q_d +name = lsx_vaddwod_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vaddwev_q_du +name = lsx_vaddwev_q_du +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, UV2DI + +/// lsx_vaddwod_q_du +name = lsx_vaddwod_q_du +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, UV2DI + +/// lsx_vsubwev_q_d +name = lsx_vsubwev_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsubwod_q_d +name = lsx_vsubwod_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsubwev_q_du +name = lsx_vsubwev_q_du +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, UV2DI + +/// lsx_vsubwod_q_du +name = lsx_vsubwod_q_du +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, UV2DI + +/// lsx_vaddwev_q_du_d +name = lsx_vaddwev_q_du_d +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, V2DI + +/// lsx_vaddwod_q_du_d +name = lsx_vaddwod_q_du_d +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, V2DI + +/// lsx_vmulwev_d_w +name = lsx_vmulwev_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V4SI, V4SI + +/// lsx_vmulwev_w_h +name = lsx_vmulwev_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V8HI, V8HI + +/// lsx_vmulwev_h_b +name = lsx_vmulwev_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V16QI, V16QI + +/// lsx_vmulwod_d_w +name = lsx_vmulwod_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V4SI, V4SI + +/// lsx_vmulwod_w_h +name = lsx_vmulwod_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V8HI, V8HI + +/// lsx_vmulwod_h_b +name = lsx_vmulwod_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V16QI, V16QI + +/// lsx_vmulwev_d_wu +name = lsx_vmulwev_d_wu +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, UV4SI + +/// lsx_vmulwev_w_hu +name = lsx_vmulwev_w_hu +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, UV8HI + +/// lsx_vmulwev_h_bu +name = lsx_vmulwev_h_bu +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, UV16QI + +/// lsx_vmulwod_d_wu +name = lsx_vmulwod_d_wu +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, UV4SI + +/// lsx_vmulwod_w_hu +name = lsx_vmulwod_w_hu +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, UV8HI + +/// lsx_vmulwod_h_bu +name = lsx_vmulwod_h_bu +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, UV16QI + +/// lsx_vmulwev_d_wu_w +name = lsx_vmulwev_d_wu_w +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, V4SI + +/// lsx_vmulwev_w_hu_h +name = lsx_vmulwev_w_hu_h +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, V8HI + +/// lsx_vmulwev_h_bu_b +name = lsx_vmulwev_h_bu_b +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, V16QI + +/// lsx_vmulwod_d_wu_w +name = lsx_vmulwod_d_wu_w +asm-fmts = vd, vj, vk +data-types = V2DI, UV4SI, V4SI + +/// lsx_vmulwod_w_hu_h +name = lsx_vmulwod_w_hu_h +asm-fmts = vd, vj, vk +data-types = V4SI, UV8HI, V8HI + +/// lsx_vmulwod_h_bu_b +name = lsx_vmulwod_h_bu_b +asm-fmts = vd, vj, vk +data-types = V8HI, UV16QI, V16QI + +/// lsx_vmulwev_q_d +name = lsx_vmulwev_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vmulwod_q_d +name = lsx_vmulwod_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vmulwev_q_du +name = lsx_vmulwev_q_du +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, UV2DI + +/// lsx_vmulwod_q_du +name = lsx_vmulwod_q_du +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, UV2DI + +/// lsx_vmulwev_q_du_d +name = lsx_vmulwev_q_du_d +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, V2DI + +/// lsx_vmulwod_q_du_d +name = lsx_vmulwod_q_du_d +asm-fmts = vd, vj, vk +data-types = V2DI, UV2DI, V2DI + +/// lsx_vhaddw_q_d +name = lsx_vhaddw_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vhaddw_qu_du +name = lsx_vhaddw_qu_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vhsubw_q_d +name = lsx_vhsubw_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vhsubw_qu_du +name = lsx_vhsubw_qu_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI + +/// lsx_vmaddwev_d_w +name = lsx_vmaddwev_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V4SI, V4SI + +/// lsx_vmaddwev_w_h +name = lsx_vmaddwev_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V8HI, V8HI + +/// lsx_vmaddwev_h_b +name = lsx_vmaddwev_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V16QI, V16QI + +/// lsx_vmaddwev_d_wu +name = lsx_vmaddwev_d_wu +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV4SI, UV4SI + +/// lsx_vmaddwev_w_hu +name = lsx_vmaddwev_w_hu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV8HI, UV8HI + +/// lsx_vmaddwev_h_bu +name = lsx_vmaddwev_h_bu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV16QI, UV16QI + +/// lsx_vmaddwod_d_w +name = lsx_vmaddwod_d_w +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V4SI, V4SI + +/// lsx_vmaddwod_w_h +name = lsx_vmaddwod_w_h +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V8HI, V8HI + +/// lsx_vmaddwod_h_b +name = lsx_vmaddwod_h_b +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V16QI, V16QI + +/// lsx_vmaddwod_d_wu +name = lsx_vmaddwod_d_wu +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV4SI, UV4SI + +/// lsx_vmaddwod_w_hu +name = lsx_vmaddwod_w_hu +asm-fmts = vd, vj, vk +data-types = UV4SI, UV4SI, UV8HI, UV8HI + +/// lsx_vmaddwod_h_bu +name = lsx_vmaddwod_h_bu +asm-fmts = vd, vj, vk +data-types = UV8HI, UV8HI, UV16QI, UV16QI + +/// lsx_vmaddwev_d_wu_w +name = lsx_vmaddwev_d_wu_w +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, UV4SI, V4SI + +/// lsx_vmaddwev_w_hu_h +name = lsx_vmaddwev_w_hu_h +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, UV8HI, V8HI + +/// lsx_vmaddwev_h_bu_b +name = lsx_vmaddwev_h_bu_b +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, UV16QI, V16QI + +/// lsx_vmaddwod_d_wu_w +name = lsx_vmaddwod_d_wu_w +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, UV4SI, V4SI + +/// lsx_vmaddwod_w_hu_h +name = lsx_vmaddwod_w_hu_h +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, UV8HI, V8HI + +/// lsx_vmaddwod_h_bu_b +name = lsx_vmaddwod_h_bu_b +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, UV16QI, V16QI + +/// lsx_vmaddwev_q_d +name = lsx_vmaddwev_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI, V2DI + +/// lsx_vmaddwod_q_d +name = lsx_vmaddwod_q_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI, V2DI + +/// lsx_vmaddwev_q_du +name = lsx_vmaddwev_q_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI, UV2DI + +/// lsx_vmaddwod_q_du +name = lsx_vmaddwod_q_du +asm-fmts = vd, vj, vk +data-types = UV2DI, UV2DI, UV2DI, UV2DI + +/// lsx_vmaddwev_q_du_d +name = lsx_vmaddwev_q_du_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, UV2DI, V2DI + +/// lsx_vmaddwod_q_du_d +name = lsx_vmaddwod_q_du_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, UV2DI, V2DI + +/// lsx_vrotr_b +name = lsx_vrotr_b +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vrotr_h +name = lsx_vrotr_h +asm-fmts = vd, vj, vk +data-types = V8HI, V8HI, V8HI + +/// lsx_vrotr_w +name = lsx_vrotr_w +asm-fmts = vd, vj, vk +data-types = V4SI, V4SI, V4SI + +/// lsx_vrotr_d +name = lsx_vrotr_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vadd_q +name = lsx_vadd_q +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vsub_q +name = lsx_vsub_q +asm-fmts = vd, vj, vk +data-types = V2DI, V2DI, V2DI + +/// lsx_vldrepl_b +name = lsx_vldrepl_b +asm-fmts = vd, rj, si12 +data-types = V16QI, CVPOINTER, SI + +/// lsx_vldrepl_h +name = lsx_vldrepl_h +asm-fmts = vd, rj, si11 +data-types = V8HI, CVPOINTER, SI + +/// lsx_vldrepl_w +name = lsx_vldrepl_w +asm-fmts = vd, rj, si10 +data-types = V4SI, CVPOINTER, SI + +/// lsx_vldrepl_d +name = lsx_vldrepl_d +asm-fmts = vd, rj, si9 +data-types = V2DI, CVPOINTER, SI + +/// lsx_vmskgez_b +name = lsx_vmskgez_b +asm-fmts = vd, vj +data-types = V16QI, V16QI + +/// lsx_vmsknz_b +name = lsx_vmsknz_b +asm-fmts = vd, vj +data-types = V16QI, V16QI + +/// lsx_vexth_h_b +name = lsx_vexth_h_b +asm-fmts = vd, vj +data-types = V8HI, V16QI + +/// lsx_vexth_w_h +name = lsx_vexth_w_h +asm-fmts = vd, vj +data-types = V4SI, V8HI + +/// lsx_vexth_d_w +name = lsx_vexth_d_w +asm-fmts = vd, vj +data-types = V2DI, V4SI + +/// lsx_vexth_q_d +name = lsx_vexth_q_d +asm-fmts = vd, vj +data-types = V2DI, V2DI + +/// lsx_vexth_hu_bu +name = lsx_vexth_hu_bu +asm-fmts = vd, vj +data-types = UV8HI, UV16QI + +/// lsx_vexth_wu_hu +name = lsx_vexth_wu_hu +asm-fmts = vd, vj +data-types = UV4SI, UV8HI + +/// lsx_vexth_du_wu +name = lsx_vexth_du_wu +asm-fmts = vd, vj +data-types = UV2DI, UV4SI + +/// lsx_vexth_qu_du +name = lsx_vexth_qu_du +asm-fmts = vd, vj +data-types = UV2DI, UV2DI + +/// lsx_vrotri_b +name = lsx_vrotri_b +asm-fmts = vd, vj, ui3 +data-types = V16QI, V16QI, UQI + +/// lsx_vrotri_h +name = lsx_vrotri_h +asm-fmts = vd, vj, ui4 +data-types = V8HI, V8HI, UQI + +/// lsx_vrotri_w +name = lsx_vrotri_w +asm-fmts = vd, vj, ui5 +data-types = V4SI, V4SI, UQI + +/// lsx_vrotri_d +name = lsx_vrotri_d +asm-fmts = vd, vj, ui6 +data-types = V2DI, V2DI, UQI + +/// lsx_vextl_q_d +name = lsx_vextl_q_d +asm-fmts = vd, vj +data-types = V2DI, V2DI + +/// lsx_vsrlni_b_h +name = lsx_vsrlni_b_h +asm-fmts = vd, vj, ui4 +data-types = V16QI, V16QI, V16QI, USI + +/// lsx_vsrlni_h_w +name = lsx_vsrlni_h_w +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, V8HI, USI + +/// lsx_vsrlni_w_d +name = lsx_vsrlni_w_d +asm-fmts = vd, vj, ui6 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vsrlni_d_q +name = lsx_vsrlni_d_q +asm-fmts = vd, vj, ui7 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vsrlrni_b_h +name = lsx_vsrlrni_b_h +asm-fmts = vd, vj, ui4 +data-types = V16QI, V16QI, V16QI, USI + +/// lsx_vsrlrni_h_w +name = lsx_vsrlrni_h_w +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, V8HI, USI + +/// lsx_vsrlrni_w_d +name = lsx_vsrlrni_w_d +asm-fmts = vd, vj, ui6 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vsrlrni_d_q +name = lsx_vsrlrni_d_q +asm-fmts = vd, vj, ui7 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vssrlni_b_h +name = lsx_vssrlni_b_h +asm-fmts = vd, vj, ui4 +data-types = V16QI, V16QI, V16QI, USI + +/// lsx_vssrlni_h_w +name = lsx_vssrlni_h_w +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, V8HI, USI + +/// lsx_vssrlni_w_d +name = lsx_vssrlni_w_d +asm-fmts = vd, vj, ui6 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vssrlni_d_q +name = lsx_vssrlni_d_q +asm-fmts = vd, vj, ui7 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vssrlni_bu_h +name = lsx_vssrlni_bu_h +asm-fmts = vd, vj, ui4 +data-types = UV16QI, UV16QI, V16QI, USI + +/// lsx_vssrlni_hu_w +name = lsx_vssrlni_hu_w +asm-fmts = vd, vj, ui5 +data-types = UV8HI, UV8HI, V8HI, USI + +/// lsx_vssrlni_wu_d +name = lsx_vssrlni_wu_d +asm-fmts = vd, vj, ui6 +data-types = UV4SI, UV4SI, V4SI, USI + +/// lsx_vssrlni_du_q +name = lsx_vssrlni_du_q +asm-fmts = vd, vj, ui7 +data-types = UV2DI, UV2DI, V2DI, USI + +/// lsx_vssrlrni_b_h +name = lsx_vssrlrni_b_h +asm-fmts = vd, vj, ui4 +data-types = V16QI, V16QI, V16QI, USI + +/// lsx_vssrlrni_h_w +name = lsx_vssrlrni_h_w +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, V8HI, USI + +/// lsx_vssrlrni_w_d +name = lsx_vssrlrni_w_d +asm-fmts = vd, vj, ui6 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vssrlrni_d_q +name = lsx_vssrlrni_d_q +asm-fmts = vd, vj, ui7 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vssrlrni_bu_h +name = lsx_vssrlrni_bu_h +asm-fmts = vd, vj, ui4 +data-types = UV16QI, UV16QI, V16QI, USI + +/// lsx_vssrlrni_hu_w +name = lsx_vssrlrni_hu_w +asm-fmts = vd, vj, ui5 +data-types = UV8HI, UV8HI, V8HI, USI + +/// lsx_vssrlrni_wu_d +name = lsx_vssrlrni_wu_d +asm-fmts = vd, vj, ui6 +data-types = UV4SI, UV4SI, V4SI, USI + +/// lsx_vssrlrni_du_q +name = lsx_vssrlrni_du_q +asm-fmts = vd, vj, ui7 +data-types = UV2DI, UV2DI, V2DI, USI + +/// lsx_vsrani_b_h +name = lsx_vsrani_b_h +asm-fmts = vd, vj, ui4 +data-types = V16QI, V16QI, V16QI, USI + +/// lsx_vsrani_h_w +name = lsx_vsrani_h_w +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, V8HI, USI + +/// lsx_vsrani_w_d +name = lsx_vsrani_w_d +asm-fmts = vd, vj, ui6 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vsrani_d_q +name = lsx_vsrani_d_q +asm-fmts = vd, vj, ui7 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vsrarni_b_h +name = lsx_vsrarni_b_h +asm-fmts = vd, vj, ui4 +data-types = V16QI, V16QI, V16QI, USI + +/// lsx_vsrarni_h_w +name = lsx_vsrarni_h_w +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, V8HI, USI + +/// lsx_vsrarni_w_d +name = lsx_vsrarni_w_d +asm-fmts = vd, vj, ui6 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vsrarni_d_q +name = lsx_vsrarni_d_q +asm-fmts = vd, vj, ui7 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vssrani_b_h +name = lsx_vssrani_b_h +asm-fmts = vd, vj, ui4 +data-types = V16QI, V16QI, V16QI, USI + +/// lsx_vssrani_h_w +name = lsx_vssrani_h_w +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, V8HI, USI + +/// lsx_vssrani_w_d +name = lsx_vssrani_w_d +asm-fmts = vd, vj, ui6 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vssrani_d_q +name = lsx_vssrani_d_q +asm-fmts = vd, vj, ui7 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vssrani_bu_h +name = lsx_vssrani_bu_h +asm-fmts = vd, vj, ui4 +data-types = UV16QI, UV16QI, V16QI, USI + +/// lsx_vssrani_hu_w +name = lsx_vssrani_hu_w +asm-fmts = vd, vj, ui5 +data-types = UV8HI, UV8HI, V8HI, USI + +/// lsx_vssrani_wu_d +name = lsx_vssrani_wu_d +asm-fmts = vd, vj, ui6 +data-types = UV4SI, UV4SI, V4SI, USI + +/// lsx_vssrani_du_q +name = lsx_vssrani_du_q +asm-fmts = vd, vj, ui7 +data-types = UV2DI, UV2DI, V2DI, USI + +/// lsx_vssrarni_b_h +name = lsx_vssrarni_b_h +asm-fmts = vd, vj, ui4 +data-types = V16QI, V16QI, V16QI, USI + +/// lsx_vssrarni_h_w +name = lsx_vssrarni_h_w +asm-fmts = vd, vj, ui5 +data-types = V8HI, V8HI, V8HI, USI + +/// lsx_vssrarni_w_d +name = lsx_vssrarni_w_d +asm-fmts = vd, vj, ui6 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vssrarni_d_q +name = lsx_vssrarni_d_q +asm-fmts = vd, vj, ui7 +data-types = V2DI, V2DI, V2DI, USI + +/// lsx_vssrarni_bu_h +name = lsx_vssrarni_bu_h +asm-fmts = vd, vj, ui4 +data-types = UV16QI, UV16QI, V16QI, USI + +/// lsx_vssrarni_hu_w +name = lsx_vssrarni_hu_w +asm-fmts = vd, vj, ui5 +data-types = UV8HI, UV8HI, V8HI, USI + +/// lsx_vssrarni_wu_d +name = lsx_vssrarni_wu_d +asm-fmts = vd, vj, ui6 +data-types = UV4SI, UV4SI, V4SI, USI + +/// lsx_vssrarni_du_q +name = lsx_vssrarni_du_q +asm-fmts = vd, vj, ui7 +data-types = UV2DI, UV2DI, V2DI, USI + +/// lsx_vpermi_w +name = lsx_vpermi_w +asm-fmts = vd, vj, ui8 +data-types = V4SI, V4SI, V4SI, USI + +/// lsx_vld +name = lsx_vld +asm-fmts = vd, rj, si12 +data-types = V16QI, CVPOINTER, SI + +/// lsx_vst +name = lsx_vst +asm-fmts = vd, rj, si12 +data-types = VOID, V16QI, CVPOINTER, SI + +/// lsx_vssrlrn_b_h +name = lsx_vssrlrn_b_h +asm-fmts = vd, vj, vk +data-types = V16QI, V8HI, V8HI + +/// lsx_vssrlrn_h_w +name = lsx_vssrlrn_h_w +asm-fmts = vd, vj, vk +data-types = V8HI, V4SI, V4SI + +/// lsx_vssrlrn_w_d +name = lsx_vssrlrn_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DI, V2DI + +/// lsx_vssrln_b_h +name = lsx_vssrln_b_h +asm-fmts = vd, vj, vk +data-types = V16QI, V8HI, V8HI + +/// lsx_vssrln_h_w +name = lsx_vssrln_h_w +asm-fmts = vd, vj, vk +data-types = V8HI, V4SI, V4SI + +/// lsx_vssrln_w_d +name = lsx_vssrln_w_d +asm-fmts = vd, vj, vk +data-types = V4SI, V2DI, V2DI + +/// lsx_vorn_v +name = lsx_vorn_v +asm-fmts = vd, vj, vk +data-types = V16QI, V16QI, V16QI + +/// lsx_vldi +name = lsx_vldi +asm-fmts = vd, i13 +data-types = V2DI, HI + +/// lsx_vshuf_b +name = lsx_vshuf_b +asm-fmts = vd, vj, vk, va +data-types = V16QI, V16QI, V16QI, V16QI + +/// lsx_vldx +name = lsx_vldx +asm-fmts = vd, rj, rk +data-types = V16QI, CVPOINTER, DI + +/// lsx_vstx +name = lsx_vstx +asm-fmts = vd, rj, rk +data-types = VOID, V16QI, CVPOINTER, DI + +/// lsx_vextl_qu_du +name = lsx_vextl_qu_du +asm-fmts = vd, vj +data-types = UV2DI, UV2DI + +/// lsx_bnz_b +name = lsx_bnz_b +asm-fmts = cd, vj +data-types = SI, UV16QI + +/// lsx_bnz_d +name = lsx_bnz_d +asm-fmts = cd, vj +data-types = SI, UV2DI + +/// lsx_bnz_h +name = lsx_bnz_h +asm-fmts = cd, vj +data-types = SI, UV8HI + +/// lsx_bnz_v +name = lsx_bnz_v +asm-fmts = cd, vj +data-types = SI, UV16QI + +/// lsx_bnz_w +name = lsx_bnz_w +asm-fmts = cd, vj +data-types = SI, UV4SI + +/// lsx_bz_b +name = lsx_bz_b +asm-fmts = cd, vj +data-types = SI, UV16QI + +/// lsx_bz_d +name = lsx_bz_d +asm-fmts = cd, vj +data-types = SI, UV2DI + +/// lsx_bz_h +name = lsx_bz_h +asm-fmts = cd, vj +data-types = SI, UV8HI + +/// lsx_bz_v +name = lsx_bz_v +asm-fmts = cd, vj +data-types = SI, UV16QI + +/// lsx_bz_w +name = lsx_bz_w +asm-fmts = cd, vj +data-types = SI, UV4SI + +/// lsx_vfcmp_caf_d +name = lsx_vfcmp_caf_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_caf_s +name = lsx_vfcmp_caf_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_ceq_d +name = lsx_vfcmp_ceq_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_ceq_s +name = lsx_vfcmp_ceq_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_cle_d +name = lsx_vfcmp_cle_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_cle_s +name = lsx_vfcmp_cle_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_clt_d +name = lsx_vfcmp_clt_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_clt_s +name = lsx_vfcmp_clt_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_cne_d +name = lsx_vfcmp_cne_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_cne_s +name = lsx_vfcmp_cne_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_cor_d +name = lsx_vfcmp_cor_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_cor_s +name = lsx_vfcmp_cor_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_cueq_d +name = lsx_vfcmp_cueq_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_cueq_s +name = lsx_vfcmp_cueq_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_cule_d +name = lsx_vfcmp_cule_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_cule_s +name = lsx_vfcmp_cule_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_cult_d +name = lsx_vfcmp_cult_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_cult_s +name = lsx_vfcmp_cult_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_cun_d +name = lsx_vfcmp_cun_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_cune_d +name = lsx_vfcmp_cune_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_cune_s +name = lsx_vfcmp_cune_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_cun_s +name = lsx_vfcmp_cun_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_saf_d +name = lsx_vfcmp_saf_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_saf_s +name = lsx_vfcmp_saf_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_seq_d +name = lsx_vfcmp_seq_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_seq_s +name = lsx_vfcmp_seq_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_sle_d +name = lsx_vfcmp_sle_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_sle_s +name = lsx_vfcmp_sle_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_slt_d +name = lsx_vfcmp_slt_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_slt_s +name = lsx_vfcmp_slt_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_sne_d +name = lsx_vfcmp_sne_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_sne_s +name = lsx_vfcmp_sne_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_sor_d +name = lsx_vfcmp_sor_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_sor_s +name = lsx_vfcmp_sor_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_sueq_d +name = lsx_vfcmp_sueq_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_sueq_s +name = lsx_vfcmp_sueq_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_sule_d +name = lsx_vfcmp_sule_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_sule_s +name = lsx_vfcmp_sule_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_sult_d +name = lsx_vfcmp_sult_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_sult_s +name = lsx_vfcmp_sult_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_sun_d +name = lsx_vfcmp_sun_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_sune_d +name = lsx_vfcmp_sune_d +asm-fmts = vd, vj, vk +data-types = V2DI, V2DF, V2DF + +/// lsx_vfcmp_sune_s +name = lsx_vfcmp_sune_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vfcmp_sun_s +name = lsx_vfcmp_sun_s +asm-fmts = vd, vj, vk +data-types = V4SI, V4SF, V4SF + +/// lsx_vrepli_b +name = lsx_vrepli_b +asm-fmts = vd, si10 +data-types = V16QI, HI + +/// lsx_vrepli_d +name = lsx_vrepli_d +asm-fmts = vd, si10 +data-types = V2DI, HI + +/// lsx_vrepli_h +name = lsx_vrepli_h +asm-fmts = vd, si10 +data-types = V8HI, HI + +/// lsx_vrepli_w +name = lsx_vrepli_w +asm-fmts = vd, si10 +data-types = V4SI, HI + diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lsxintrin.h b/library/stdarch/crates/stdarch-gen-loongarch/lsxintrin.h new file mode 100644 index 000000000000..943f2df913e4 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-loongarch/lsxintrin.h @@ -0,0 +1,5219 @@ +/* + * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lsxintrin.h;hb=61f1001f2f4ab9128e5eb6e9a4adbbb0f9f0bc75 + */ + +/* LARCH Loongson SX intrinsics include file. + + Copyright (C) 2018-2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_LOONGSON_SXINTRIN_H +#define _GCC_LOONGSON_SXINTRIN_H 1 + +#if defined(__loongarch_sx) +typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsll_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsll_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsll_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsll_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsll_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsll_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsll_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsll_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vslli_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vslli_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslli_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vslli_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsra_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsra_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsra_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsra_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsra_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsra_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsra_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsra_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrar_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrar_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrar_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrar_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrar_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrar_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrar_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrar_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrl_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrl_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrl_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrl_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrl_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrl_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrl_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrl_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrlr_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrlr_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrlr_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrlr_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrlr_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrlr_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrlr_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrlr_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitclr_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitclr_b ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitclr_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitclr_h ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitclr_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitclr_w ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitclr_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitclr_d ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_b ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_h ((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_w ((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_d ((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitset_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitset_b ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitset_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitset_h ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitset_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitset_w ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitset_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitset_d ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_b ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_h ((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_w ((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_d ((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitrev_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitrev_b ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitrev_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitrev_h ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitrev_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitrev_w ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitrev_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vbitrev_d ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_b ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_h ((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_w ((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_d ((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vadd_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vadd_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vadd_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vadd_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vadd_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vadd_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vadd_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vadd_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_bu ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_hu ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_wu ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_du ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsub_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsub_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsub_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsub_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsub_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsub_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsub_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsub_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_bu ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_hu ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_wu ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_du ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmax_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmax_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmax_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmax_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmax_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmax_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmax_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmax_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmax_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmax_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmax_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmax_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmax_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmax_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmax_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmax_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_bu ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_hu ((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_wu ((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_du ((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmin_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmin_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmin_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmin_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmin_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmin_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmin_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmin_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmin_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmin_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmin_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmin_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmin_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmin_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmin_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmin_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_bu ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_hu ((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_wu ((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_du ((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vseq_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vseq_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vseq_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vseq_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vseq_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vseq_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vseq_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vseq_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vslt_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vslt_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vslt_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vslt_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vslt_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vslt_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vslt_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vslt_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vslt_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vslt_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vslt_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vslt_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vslt_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vslt_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vslt_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vslt_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, UV16QI, UQI. */ +#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_bu ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, UV8HI, UQI. */ +#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_hu ((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, UV4SI, UQI. */ +#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_wu ((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, UV2DI, UQI. */ +#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_du ((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsle_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsle_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsle_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsle_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsle_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsle_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsle_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsle_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsle_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsle_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsle_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsle_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsle_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsle_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsle_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsle_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, UV16QI, UQI. */ +#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_bu ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, UV8HI, UQI. */ +#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_hu ((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, UV4SI, UQI. */ +#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_wu ((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, UV2DI, UQI. */ +#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_du ((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_bu ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_hu ((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_wu ((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_du ((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vadda_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vadda_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vadda_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vadda_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vadda_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vadda_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vadda_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vadda_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsadd_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsadd_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsadd_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsadd_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsadd_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsadd_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsadd_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsadd_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsadd_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsadd_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsadd_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsadd_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsadd_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsadd_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsadd_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsadd_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavg_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavg_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavg_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavg_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavg_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavg_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavg_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavg_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavg_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavg_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavg_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavg_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavg_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavg_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavg_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavg_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavgr_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavgr_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavgr_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavgr_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavgr_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavgr_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavgr_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavgr_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavgr_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavgr_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavgr_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavgr_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavgr_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavgr_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vavgr_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vavgr_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssub_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssub_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssub_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssub_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssub_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssub_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssub_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssub_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssub_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssub_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssub_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssub_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssub_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssub_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssub_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssub_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vabsd_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vabsd_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vabsd_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vabsd_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vabsd_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vabsd_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vabsd_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vabsd_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vabsd_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vabsd_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vabsd_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vabsd_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vabsd_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vabsd_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vabsd_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vabsd_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmul_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmul_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmul_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmul_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmul_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmul_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmul_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmul_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmadd_b (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmadd_b ((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmadd_h (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmadd_h ((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmadd_w (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmadd_w ((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmadd_d (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmadd_d ((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmsub_b (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmsub_b ((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmsub_h (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmsub_h ((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmsub_w (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmsub_w ((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmsub_d (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmsub_d ((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vdiv_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vdiv_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vdiv_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vdiv_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vdiv_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vdiv_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vdiv_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vdiv_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vdiv_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vdiv_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vdiv_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vdiv_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vdiv_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vdiv_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vdiv_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vdiv_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhaddw_h_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhaddw_h_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhaddw_w_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhaddw_w_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhaddw_d_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhaddw_d_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhaddw_hu_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhaddw_hu_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhaddw_wu_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhaddw_wu_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhaddw_du_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhaddw_du_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhsubw_h_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhsubw_h_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhsubw_w_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhsubw_w_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhsubw_d_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhsubw_d_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhsubw_hu_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhsubw_hu_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhsubw_wu_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhsubw_wu_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhsubw_du_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhsubw_du_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmod_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmod_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmod_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmod_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmod_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmod_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmod_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmod_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmod_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmod_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmod_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmod_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmod_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmod_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmod_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmod_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, rk. */ +/* Data types in instruction templates: V16QI, V16QI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vreplve_b (__m128i _1, int _2) +{ + return (__m128i)__builtin_lsx_vreplve_b ((v16i8)_1, (int)_2); +} + +/* Assembly instruction format: vd, vj, rk. */ +/* Data types in instruction templates: V8HI, V8HI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vreplve_h (__m128i _1, int _2) +{ + return (__m128i)__builtin_lsx_vreplve_h ((v8i16)_1, (int)_2); +} + +/* Assembly instruction format: vd, vj, rk. */ +/* Data types in instruction templates: V4SI, V4SI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vreplve_w (__m128i _1, int _2) +{ + return (__m128i)__builtin_lsx_vreplve_w ((v4i32)_1, (int)_2); +} + +/* Assembly instruction format: vd, vj, rk. */ +/* Data types in instruction templates: V2DI, V2DI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vreplve_d (__m128i _1, int _2) +{ + return (__m128i)__builtin_lsx_vreplve_d ((v2i64)_1, (int)_2); +} + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui2. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui1. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpickev_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpickev_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpickev_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpickev_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpickev_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpickev_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpickev_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpickev_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpickod_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpickod_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpickod_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpickod_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpickod_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpickod_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpickod_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpickod_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vilvh_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vilvh_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vilvh_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vilvh_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vilvh_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vilvh_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vilvh_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vilvh_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vilvl_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vilvl_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vilvl_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vilvl_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vilvl_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vilvl_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vilvl_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vilvl_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpackev_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpackev_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpackev_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpackev_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpackev_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpackev_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpackev_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpackev_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpackod_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpackod_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpackod_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpackod_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpackod_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpackod_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpackod_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vpackod_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vshuf_h (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vshuf_h ((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vshuf_w (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vshuf_w ((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vshuf_d (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vshuf_d ((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vand_v (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vand_v ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vandi_b ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vor_v (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vor_v ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vori_b ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vnor_v (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vnor_v ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vnori_b ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vxor_v (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vxor_v ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vxori_b ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vbitsel_v (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vbitsel_v ((v16u8)_1, (v16u8)_2, (v16u8)_3); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, USI. */ +#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vbitseli_b ((v16u8)(_1), (v16u8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V16QI, V16QI, USI. */ +#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V8HI, V8HI, USI. */ +#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V4SI, V4SI, USI. */ +#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, rj. */ +/* Data types in instruction templates: V16QI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vreplgr2vr_b (int _1) +{ + return (__m128i)__builtin_lsx_vreplgr2vr_b ((int)_1); +} + +/* Assembly instruction format: vd, rj. */ +/* Data types in instruction templates: V8HI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vreplgr2vr_h (int _1) +{ + return (__m128i)__builtin_lsx_vreplgr2vr_h ((int)_1); +} + +/* Assembly instruction format: vd, rj. */ +/* Data types in instruction templates: V4SI, SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vreplgr2vr_w (int _1) +{ + return (__m128i)__builtin_lsx_vreplgr2vr_w ((int)_1); +} + +/* Assembly instruction format: vd, rj. */ +/* Data types in instruction templates: V2DI, DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vreplgr2vr_d (long int _1) +{ + return (__m128i)__builtin_lsx_vreplgr2vr_d ((long int)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpcnt_b (__m128i _1) +{ + return (__m128i)__builtin_lsx_vpcnt_b ((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpcnt_h (__m128i _1) +{ + return (__m128i)__builtin_lsx_vpcnt_h ((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpcnt_w (__m128i _1) +{ + return (__m128i)__builtin_lsx_vpcnt_w ((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vpcnt_d (__m128i _1) +{ + return (__m128i)__builtin_lsx_vpcnt_d ((v2i64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vclo_b (__m128i _1) +{ + return (__m128i)__builtin_lsx_vclo_b ((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vclo_h (__m128i _1) +{ + return (__m128i)__builtin_lsx_vclo_h ((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vclo_w (__m128i _1) +{ + return (__m128i)__builtin_lsx_vclo_w ((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vclo_d (__m128i _1) +{ + return (__m128i)__builtin_lsx_vclo_d ((v2i64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vclz_b (__m128i _1) +{ + return (__m128i)__builtin_lsx_vclz_b ((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vclz_h (__m128i _1) +{ + return (__m128i)__builtin_lsx_vclz_h ((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vclz_w (__m128i _1) +{ + return (__m128i)__builtin_lsx_vclz_w ((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vclz_d (__m128i _1) +{ + return (__m128i)__builtin_lsx_vclz_d ((v2i64)_1); +} + +/* Assembly instruction format: rd, vj, ui4. */ +/* Data types in instruction templates: SI, V16QI, UQI. */ +#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui3. */ +/* Data types in instruction templates: SI, V8HI, UQI. */ +#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui2. */ +/* Data types in instruction templates: SI, V4SI, UQI. */ +#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui1. */ +/* Data types in instruction templates: DI, V2DI, UQI. */ +#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((long int)__builtin_lsx_vpickve2gr_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui4. */ +/* Data types in instruction templates: USI, V16QI, UQI. */ +#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_bu ((v16i8)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui3. */ +/* Data types in instruction templates: USI, V8HI, UQI. */ +#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_hu ((v8i16)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui2. */ +/* Data types in instruction templates: USI, V4SI, UQI. */ +#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_wu ((v4i32)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui1. */ +/* Data types in instruction templates: UDI, V2DI, UQI. */ +#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ + ((unsigned long int)__builtin_lsx_vpickve2gr_du ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, rj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, SI, UQI. */ +#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_b ((v16i8)(_1), (int)(_2), (_3))) + +/* Assembly instruction format: vd, rj, ui3. */ +/* Data types in instruction templates: V8HI, V8HI, SI, UQI. */ +#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_h ((v8i16)(_1), (int)(_2), (_3))) + +/* Assembly instruction format: vd, rj, ui2. */ +/* Data types in instruction templates: V4SI, V4SI, SI, UQI. */ +#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_w ((v4i32)(_1), (int)(_2), (_3))) + +/* Assembly instruction format: vd, rj, ui1. */ +/* Data types in instruction templates: V2DI, V2DI, DI, UQI. */ +#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_d ((v2i64)(_1), (long int)(_2), (_3))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfadd_s (__m128 _1, __m128 _2) +{ + return (__m128)__builtin_lsx_vfadd_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfadd_d (__m128d _1, __m128d _2) +{ + return (__m128d)__builtin_lsx_vfadd_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfsub_s (__m128 _1, __m128 _2) +{ + return (__m128)__builtin_lsx_vfsub_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfsub_d (__m128d _1, __m128d _2) +{ + return (__m128d)__builtin_lsx_vfsub_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfmul_s (__m128 _1, __m128 _2) +{ + return (__m128)__builtin_lsx_vfmul_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfmul_d (__m128d _1, __m128d _2) +{ + return (__m128d)__builtin_lsx_vfmul_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfdiv_s (__m128 _1, __m128 _2) +{ + return (__m128)__builtin_lsx_vfdiv_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfdiv_d (__m128d _1, __m128d _2) +{ + return (__m128d)__builtin_lsx_vfdiv_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcvt_h_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcvt_h_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfcvt_s_d (__m128d _1, __m128d _2) +{ + return (__m128)__builtin_lsx_vfcvt_s_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfmin_s (__m128 _1, __m128 _2) +{ + return (__m128)__builtin_lsx_vfmin_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfmin_d (__m128d _1, __m128d _2) +{ + return (__m128d)__builtin_lsx_vfmin_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfmina_s (__m128 _1, __m128 _2) +{ + return (__m128)__builtin_lsx_vfmina_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfmina_d (__m128d _1, __m128d _2) +{ + return (__m128d)__builtin_lsx_vfmina_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfmax_s (__m128 _1, __m128 _2) +{ + return (__m128)__builtin_lsx_vfmax_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfmax_d (__m128d _1, __m128d _2) +{ + return (__m128d)__builtin_lsx_vfmax_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfmaxa_s (__m128 _1, __m128 _2) +{ + return (__m128)__builtin_lsx_vfmaxa_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfmaxa_d (__m128d _1, __m128d _2) +{ + return (__m128d)__builtin_lsx_vfmaxa_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfclass_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vfclass_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfclass_d (__m128d _1) +{ + return (__m128i)__builtin_lsx_vfclass_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfsqrt_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfsqrt_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfsqrt_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfsqrt_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrecip_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrecip_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrecip_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1); +} + +#if defined(__loongarch_frecipe) +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrecipe_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrecipe_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrsqrte_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrsqrte_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1); +} +#endif + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrint_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrint_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrint_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrint_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrsqrt_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrsqrt_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrsqrt_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrsqrt_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vflogb_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vflogb_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vflogb_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vflogb_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfcvth_s_h (__m128i _1) +{ + return (__m128)__builtin_lsx_vfcvth_s_h ((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfcvth_d_s (__m128 _1) +{ + return (__m128d)__builtin_lsx_vfcvth_d_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfcvtl_s_h (__m128i _1) +{ + return (__m128)__builtin_lsx_vfcvtl_s_h ((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfcvtl_d_s (__m128 _1) +{ + return (__m128d)__builtin_lsx_vfcvtl_d_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftint_w_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftint_w_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftint_l_d (__m128d _1) +{ + return (__m128i)__builtin_lsx_vftint_l_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV4SI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftint_wu_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftint_wu_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftint_lu_d (__m128d _1) +{ + return (__m128i)__builtin_lsx_vftint_lu_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrz_w_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrz_w_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrz_l_d (__m128d _1) +{ + return (__m128i)__builtin_lsx_vftintrz_l_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV4SI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrz_wu_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrz_wu_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrz_lu_d (__m128d _1) +{ + return (__m128i)__builtin_lsx_vftintrz_lu_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vffint_s_w (__m128i _1) +{ + return (__m128)__builtin_lsx_vffint_s_w ((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vffint_d_l (__m128i _1) +{ + return (__m128d)__builtin_lsx_vffint_d_l ((v2i64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vffint_s_wu (__m128i _1) +{ + return (__m128)__builtin_lsx_vffint_s_wu ((v4u32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vffint_d_lu (__m128i _1) +{ + return (__m128d)__builtin_lsx_vffint_d_lu ((v2u64)_1); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vandn_v (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vandn_v ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vneg_b (__m128i _1) +{ + return (__m128i)__builtin_lsx_vneg_b ((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vneg_h (__m128i _1) +{ + return (__m128i)__builtin_lsx_vneg_h ((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vneg_w (__m128i _1) +{ + return (__m128i)__builtin_lsx_vneg_w ((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vneg_d (__m128i _1) +{ + return (__m128i)__builtin_lsx_vneg_d ((v2i64)_1); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmuh_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmuh_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmuh_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmuh_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmuh_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmuh_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmuh_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmuh_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmuh_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmuh_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmuh_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmuh_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmuh_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmuh_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmuh_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmuh_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V8HI, V16QI, UQI. */ +#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_h_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V4SI, V8HI, UQI. */ +#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_w_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, V4SI, UQI. */ +#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_d_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV8HI, UV16QI, UQI. */ +#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_hu_bu ((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV4SI, UV8HI, UQI. */ +#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_wu_hu ((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV2DI, UV4SI, UQI. */ +#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_du_wu ((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsran_b_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsran_b_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsran_h_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsran_h_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsran_w_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsran_w_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssran_b_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssran_b_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssran_h_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssran_h_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssran_w_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssran_w_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssran_bu_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssran_bu_h ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssran_hu_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssran_hu_w ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssran_wu_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssran_wu_d ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrarn_b_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrarn_b_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrarn_h_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrarn_h_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrarn_w_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrarn_w_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrarn_b_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrarn_b_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrarn_h_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrarn_h_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrarn_w_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrarn_w_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrarn_bu_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrarn_bu_h ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrarn_hu_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrarn_hu_w ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrarn_wu_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrarn_wu_d ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrln_b_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrln_b_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrln_h_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrln_h_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrln_w_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrln_w_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrln_bu_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrln_bu_h ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrln_hu_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrln_hu_w ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrln_wu_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrln_wu_d ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrlrn_b_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrlrn_b_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrlrn_h_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrlrn_h_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsrlrn_w_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsrlrn_w_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrlrn_bu_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrlrn_bu_h ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrlrn_hu_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrlrn_hu_w ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrlrn_wu_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrlrn_wu_d ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, UQI. */ +#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_b ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, UQI. */ +#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_h ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfrstp_b (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vfrstp_b ((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfrstp_h (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vfrstp_h ((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vshuf4i_d ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsrl_v ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsll_v ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_b ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_h ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_w ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_d ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmskltz_b (__m128i _1) +{ + return (__m128i)__builtin_lsx_vmskltz_b ((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmskltz_h (__m128i _1) +{ + return (__m128i)__builtin_lsx_vmskltz_h ((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmskltz_w (__m128i _1) +{ + return (__m128i)__builtin_lsx_vmskltz_w ((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmskltz_d (__m128i _1) +{ + return (__m128i)__builtin_lsx_vmskltz_d ((v2i64)_1); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsigncov_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsigncov_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsigncov_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsigncov_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsigncov_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsigncov_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsigncov_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsigncov_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfmadd_s (__m128 _1, __m128 _2, __m128 _3) +{ + return (__m128)__builtin_lsx_vfmadd_s ((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfmadd_d (__m128d _1, __m128d _2, __m128d _3) +{ + return (__m128d)__builtin_lsx_vfmadd_d ((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfmsub_s (__m128 _1, __m128 _2, __m128 _3) +{ + return (__m128)__builtin_lsx_vfmsub_s ((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfmsub_d (__m128d _1, __m128d _2, __m128d _3) +{ + return (__m128d)__builtin_lsx_vfmsub_d ((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfnmadd_s (__m128 _1, __m128 _2, __m128 _3) +{ + return (__m128)__builtin_lsx_vfnmadd_s ((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfnmadd_d (__m128d _1, __m128d _2, __m128d _3) +{ + return (__m128d)__builtin_lsx_vfnmadd_d ((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfnmsub_s (__m128 _1, __m128 _2, __m128 _3) +{ + return (__m128)__builtin_lsx_vfnmsub_s ((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfnmsub_d (__m128d _1, __m128d _2, __m128d _3) +{ + return (__m128d)__builtin_lsx_vfnmsub_d ((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrne_w_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrne_w_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrne_l_d (__m128d _1) +{ + return (__m128i)__builtin_lsx_vftintrne_l_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrp_w_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrp_w_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrp_l_d (__m128d _1) +{ + return (__m128i)__builtin_lsx_vftintrp_l_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrm_w_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrm_w_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrm_l_d (__m128d _1) +{ + return (__m128i)__builtin_lsx_vftintrm_l_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftint_w_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vftint_w_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vffint_s_l (__m128i _1, __m128i _2) +{ + return (__m128)__builtin_lsx_vffint_s_l ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrz_w_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vftintrz_w_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrp_w_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vftintrp_w_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrm_w_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vftintrm_w_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrne_w_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vftintrne_w_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintl_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintl_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftinth_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftinth_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vffinth_d_w (__m128i _1) +{ + return (__m128d)__builtin_lsx_vffinth_d_w ((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vffintl_d_w (__m128i _1) +{ + return (__m128d)__builtin_lsx_vffintl_d_w ((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrzl_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrzl_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrzh_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrzh_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrpl_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrpl_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrph_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrph_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrml_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrml_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrmh_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrmh_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrnel_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrnel_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vftintrneh_l_s (__m128 _1) +{ + return (__m128i)__builtin_lsx_vftintrneh_l_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrintrne_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrintrne_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrintrne_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrintrne_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrintrz_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrintrz_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrintrz_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrintrz_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrintrp_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrintrp_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrintrp_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrintrp_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrintrm_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrintrm_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrintrm_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrintrm_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI, UQI. */ +#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_b ((v16i8)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: vd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V8HI, CVPOINTER, SI, UQI. */ +#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_h ((v8i16)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: vd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V4SI, CVPOINTER, SI, UQI. */ +#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_w ((v4i32)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: vd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V2DI, CVPOINTER, SI, UQI. */ +#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_d ((v2i64)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_d_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_d_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_w_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_w_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_h_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_h_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_d_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_d_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_w_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_w_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_h_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_h_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_d_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_d_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_w_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_w_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_h_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_h_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_d_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_d_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_w_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_w_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_h_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_h_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_d_wu_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_d_wu_w ((v4u32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_w_hu_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_w_hu_h ((v8u16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_h_bu_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_h_bu_b ((v16u8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_d_wu_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_d_wu_w ((v4u32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_w_hu_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_w_hu_h ((v8u16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_h_bu_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_h_bu_b ((v16u8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwev_d_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwev_d_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwev_w_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwev_w_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwev_h_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwev_h_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwod_d_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwod_d_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwod_w_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwod_w_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwod_h_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwod_h_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwev_d_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwev_d_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwev_w_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwev_w_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwev_h_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwev_h_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwod_d_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwod_d_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwod_w_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwod_w_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwod_h_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwod_h_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_q_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_q_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_q_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_q_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_q_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_q_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_q_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_q_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwev_q_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwev_q_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwod_q_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwod_q_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwev_q_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwev_q_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsubwod_q_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsubwod_q_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwev_q_du_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwev_q_du_d ((v2u64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vaddwod_q_du_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vaddwod_q_du_d ((v2u64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_d_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_d_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_w_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_w_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_h_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_h_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_d_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_d_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_w_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_w_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_h_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_h_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_d_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_d_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_w_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_w_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_h_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_h_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_d_wu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_d_wu ((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_w_hu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_w_hu ((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_h_bu (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_h_bu ((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_d_wu_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_d_wu_w ((v4u32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_w_hu_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_w_hu_h ((v8u16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_h_bu_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_h_bu_b ((v16u8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_d_wu_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_d_wu_w ((v4u32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_w_hu_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_w_hu_h ((v8u16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_h_bu_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_h_bu_b ((v16u8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_q_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_q_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_q_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_q_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_q_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_q_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_q_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_q_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwev_q_du_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwev_q_du_d ((v2u64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmulwod_q_du_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vmulwod_q_du_d ((v2u64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhaddw_q_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhaddw_q_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhaddw_qu_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhaddw_qu_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhsubw_q_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhsubw_q_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vhsubw_qu_du (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vhsubw_qu_du ((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_d_w (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_d_w ((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_w_h (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_w_h ((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_h_b (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_h_b ((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_d_wu (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_d_wu ((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_w_hu (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_w_hu ((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_h_bu (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_h_bu ((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_d_w (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_d_w ((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_w_h (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_w_h ((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_h_b (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_h_b ((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_d_wu (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_d_wu ((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_w_hu (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_w_hu ((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_h_bu (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_h_bu ((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_d_wu_w (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_d_wu_w ((v2i64)_1, (v4u32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_w_hu_h (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_w_hu_h ((v4i32)_1, (v8u16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_h_bu_b (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_h_bu_b ((v8i16)_1, (v16u8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_d_wu_w (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_d_wu_w ((v2i64)_1, (v4u32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_w_hu_h (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_w_hu_h ((v4i32)_1, (v8u16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_h_bu_b (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_h_bu_b ((v8i16)_1, (v16u8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_q_d (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_q_d ((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_q_d (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_q_d ((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_q_du (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_q_du ((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_q_du (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_q_du ((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwev_q_du_d (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwev_q_du_d ((v2i64)_1, (v2u64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmaddwod_q_du_d (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vmaddwod_q_du_d ((v2i64)_1, (v2u64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vrotr_b (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vrotr_b ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vrotr_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vrotr_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vrotr_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vrotr_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vrotr_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vrotr_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vadd_q (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vadd_q ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vsub_q (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vsub_q ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, rj, si12. */ +/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ +#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_b ((void *)(_1), (_2))) + +/* Assembly instruction format: vd, rj, si11. */ +/* Data types in instruction templates: V8HI, CVPOINTER, SI. */ +#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_h ((void *)(_1), (_2))) + +/* Assembly instruction format: vd, rj, si10. */ +/* Data types in instruction templates: V4SI, CVPOINTER, SI. */ +#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_w ((void *)(_1), (_2))) + +/* Assembly instruction format: vd, rj, si9. */ +/* Data types in instruction templates: V2DI, CVPOINTER, SI. */ +#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_d ((void *)(_1), (_2))) + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmskgez_b (__m128i _1) +{ + return (__m128i)__builtin_lsx_vmskgez_b ((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vmsknz_b (__m128i _1) +{ + return (__m128i)__builtin_lsx_vmsknz_b ((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vexth_h_b (__m128i _1) +{ + return (__m128i)__builtin_lsx_vexth_h_b ((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vexth_w_h (__m128i _1) +{ + return (__m128i)__builtin_lsx_vexth_w_h ((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vexth_d_w (__m128i _1) +{ + return (__m128i)__builtin_lsx_vexth_d_w ((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vexth_q_d (__m128i _1) +{ + return (__m128i)__builtin_lsx_vexth_q_d ((v2i64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV8HI, UV16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vexth_hu_bu (__m128i _1) +{ + return (__m128i)__builtin_lsx_vexth_hu_bu ((v16u8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV4SI, UV8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vexth_wu_hu (__m128i _1) +{ + return (__m128i)__builtin_lsx_vexth_wu_hu ((v8u16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, UV4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vexth_du_wu (__m128i _1) +{ + return (__m128i)__builtin_lsx_vexth_du_wu ((v4u32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vexth_qu_du (__m128i _1) +{ + return (__m128i)__builtin_lsx_vexth_qu_du ((v2u64)_1); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_b ((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_h ((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_w ((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_d ((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vextl_q_d (__m128i _1) +{ + return (__m128i)__builtin_lsx_vextl_q_d ((v2i64)_1); +} + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_b_h ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_h_w ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_w_d ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_d_q ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_b_h ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_h_w ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_w_d ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_d_q ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_b_h ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_h_w ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_w_d ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_d_q ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ +#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_bu_h ((v16u8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ +#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_hu_w ((v8u16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ +#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_wu_d ((v4u32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ +#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_du_q ((v2u64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_b_h ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_h_w ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_w_d ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_d_q ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ +#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_bu_h ((v16u8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ +#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_hu_w ((v8u16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ +#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_wu_d ((v4u32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ +#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_du_q ((v2u64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_b_h ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_h_w ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_w_d ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_d_q ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_b_h ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_h_w ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_w_d ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_d_q ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_b_h ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_h_w ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_w_d ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_d_q ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ +#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_bu_h ((v16u8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ +#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_hu_w ((v8u16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ +#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_wu_d ((v4u32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ +#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_du_q ((v2u64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_b_h ((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_h_w ((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_w_d ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_d_q ((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ +#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_bu_h ((v16u8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ +#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_hu_w ((v8u16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ +#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_wu_d ((v4u32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ +#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_du_q ((v2u64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vpermi_w ((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, rj, si12. */ +/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ +#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vld ((void *)(_1), (_2))) + +/* Assembly instruction format: vd, rj, si12. */ +/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI. */ +#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lsx_vst ((v16i8)(_1), (void *)(_2), (_3))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrlrn_b_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrlrn_b_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrlrn_h_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrlrn_h_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrlrn_w_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrlrn_w_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrln_b_h (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrln_b_h ((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrln_h_w (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrln_h_w ((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vssrln_w_d (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vssrln_w_d ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vorn_v (__m128i _1, __m128i _2) +{ + return (__m128i)__builtin_lsx_vorn_v ((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, i13. */ +/* Data types in instruction templates: V2DI, HI. */ +#define __lsx_vldi(/*i13*/ _1) \ + ((__m128i)__builtin_lsx_vldi ((_1))) + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vshuf_b (__m128i _1, __m128i _2, __m128i _3) +{ + return (__m128i)__builtin_lsx_vshuf_b ((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, rj, rk. */ +/* Data types in instruction templates: V16QI, CVPOINTER, DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vldx (void * _1, long int _2) +{ + return (__m128i)__builtin_lsx_vldx ((void *)_1, (long int)_2); +} + +/* Assembly instruction format: vd, rj, rk. */ +/* Data types in instruction templates: VOID, V16QI, CVPOINTER, DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __lsx_vstx (__m128i _1, void * _2, long int _3) +{ + return (void)__builtin_lsx_vstx ((v16i8)_1, (void *)_2, (long int)_3); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, UV2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vextl_qu_du (__m128i _1) +{ + return (__m128i)__builtin_lsx_vextl_qu_du ((v2u64)_1); +} + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV16QI. */ +#define __lsx_bnz_b(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bnz_b ((v16u8)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV2DI. */ +#define __lsx_bnz_d(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bnz_d ((v2u64)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV8HI. */ +#define __lsx_bnz_h(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bnz_h ((v8u16)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV16QI. */ +#define __lsx_bnz_v(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bnz_v ((v16u8)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV4SI. */ +#define __lsx_bnz_w(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bnz_w ((v4u32)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV16QI. */ +#define __lsx_bz_b(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bz_b ((v16u8)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV2DI. */ +#define __lsx_bz_d(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bz_d ((v2u64)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV8HI. */ +#define __lsx_bz_h(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bz_h ((v8u16)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV16QI. */ +#define __lsx_bz_v(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bz_v ((v16u8)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV4SI. */ +#define __lsx_bz_w(/*__m128i*/ _1) \ + ((int)__builtin_lsx_bz_w ((v4u32)(_1))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_caf_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_caf_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_caf_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_caf_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_ceq_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_ceq_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_ceq_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_ceq_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cle_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cle_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cle_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cle_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_clt_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_clt_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_clt_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_clt_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cne_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cne_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cne_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cne_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cor_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cor_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cor_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cor_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cueq_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cueq_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cueq_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cueq_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cule_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cule_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cule_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cule_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cult_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cult_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cult_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cult_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cun_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cun_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cune_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cune_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cune_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cune_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_cun_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_cun_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_saf_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_saf_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_saf_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_saf_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_seq_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_seq_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_seq_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_seq_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sle_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sle_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sle_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sle_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_slt_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_slt_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_slt_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_slt_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sne_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sne_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sne_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sne_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sor_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sor_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sor_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sor_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sueq_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sueq_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sueq_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sueq_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sule_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sule_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sule_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sule_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sult_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sult_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sult_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sult_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sun_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sun_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sune_d (__m128d _1, __m128d _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sune_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sune_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sune_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lsx_vfcmp_sun_s (__m128 _1, __m128 _2) +{ + return (__m128i)__builtin_lsx_vfcmp_sun_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, si10. */ +/* Data types in instruction templates: V16QI, HI. */ +#define __lsx_vrepli_b(/*si10*/ _1) \ + ((__m128i)__builtin_lsx_vrepli_b ((_1))) + +/* Assembly instruction format: vd, si10. */ +/* Data types in instruction templates: V2DI, HI. */ +#define __lsx_vrepli_d(/*si10*/ _1) \ + ((__m128i)__builtin_lsx_vrepli_d ((_1))) + +/* Assembly instruction format: vd, si10. */ +/* Data types in instruction templates: V8HI, HI. */ +#define __lsx_vrepli_h(/*si10*/ _1) \ + ((__m128i)__builtin_lsx_vrepli_h ((_1))) + +/* Assembly instruction format: vd, si10. */ +/* Data types in instruction templates: V4SI, HI. */ +#define __lsx_vrepli_w(/*si10*/ _1) \ + ((__m128i)__builtin_lsx_vrepli_w ((_1))) + +#endif /* defined(__loongarch_sx) */ +#endif /* _GCC_LOONGSON_SXINTRIN_H */ diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs b/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs new file mode 100644 index 000000000000..aa9990b6ccd1 --- /dev/null +++ b/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs @@ -0,0 +1,1551 @@ +use std::env; +use std::fmt; +use std::fs::File; +use std::io::prelude::*; +use std::io::{self, BufReader}; +use std::path::PathBuf; + +/// Complete lines of generated source. +/// +/// This enables common generation tasks to be factored out without precluding basic +/// context-specific formatting. +/// +/// The convention in this generator is to prefix (not suffix) lines with a newline, so the +/// implementation of `std::fmt::Display` behaves in the same way. +struct Lines { + indent: usize, + lines: Vec, +} + +impl Lines { + fn single(line: String) -> Self { + Self::from(vec![line]) + } +} + +impl From> for Lines { + fn from(lines: Vec) -> Self { + Self { indent: 0, lines } + } +} + +impl std::fmt::Display for Lines { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> fmt::Result { + for line in self.lines.iter() { + write!(f, "\n{:width$}{line}", "", width = self.indent)?; + } + Ok(()) + } +} + +#[derive(Clone, Copy, PartialEq)] +enum TargetFeature { + Lsx, + Lasx, +} + +impl TargetFeature { + fn new(ext: &str) -> TargetFeature { + match ext { + "lasx" => Self::Lasx, + _ => Self::Lsx, + } + } + + /// A string for use with `#[target_feature(...)]`. + fn as_target_feature_arg(&self, ins: &str) -> String { + let vec = match *self { + // Features included with LoongArch64 LSX and LASX. + Self::Lsx => "lsx", + Self::Lasx => "lasx", + }; + let frecipe = match ins { + "lsx_vfrecipe_s" | "lsx_vfrecipe_d" | "lsx_vfrsqrte_s" | "lsx_vfrsqrte_d" + | "lasx_xvfrecipe_s" | "lasx_xvfrecipe_d" | "lasx_xvfrsqrte_s" | "lasx_xvfrsqrte_d" => { + ",frecipe" + } + _ => "", + }; + format!("{vec}{frecipe}") + } + + fn attr(name: &str, value: impl fmt::Display) -> String { + format!(r#"#[{name}(enable = "{value}")]"#) + } + + /// Generate a target_feature attribute + fn to_target_feature_attr(self, ins: &str) -> Lines { + Lines::single(Self::attr( + "target_feature", + self.as_target_feature_arg(ins), + )) + } + + fn bytes(&self) -> u8 { + match *self { + // Features included with LoongArch64 LSX and LASX. + Self::Lsx => 16, + Self::Lasx => 32, + } + } +} + +fn gen_spec(in_file: String, ext_name: &str) -> io::Result<()> { + let f = File::open(in_file.clone()).unwrap_or_else(|_| panic!("Failed to open {in_file}")); + let f = BufReader::new(f); + let mut out = format!( + r#"// This code is automatically generated. DO NOT MODIFY. +// ``` +// OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- {in_file} +// ``` +"# + ); + out.push('\n'); + + let mut asm_fmts = String::new(); + let mut data_types = String::new(); + let fn_pat = format!("__{ext_name}_"); + for line in f.lines() { + let line = line.unwrap(); + if line.is_empty() { + continue; + } + + if let Some(s) = line.find("/* Assembly instruction format:") { + let e = line.find('.').unwrap(); + asm_fmts = line.get(s + 31..e).unwrap().trim().to_string(); + } else if let Some(s) = line.find("/* Data types in instruction templates:") { + let e = line.find('.').unwrap(); + data_types = line.get(s + 39..e).unwrap().trim().to_string(); + } else if let Some(s) = line.find(fn_pat.as_str()) { + let e = line.find('(').unwrap(); + let name = line.get(s + 2..e).unwrap().trim().to_string(); + out.push_str(&format!("/// {name}\n")); + out.push_str(&format!("name = {name}\n")); + out.push_str(&format!("asm-fmts = {asm_fmts}\n")); + out.push_str(&format!("data-types = {data_types}\n")); + out.push('\n'); + } + } + + let out_dir_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap()); + std::fs::create_dir_all(&out_dir_path)?; + let mut f = File::create(out_dir_path.join(format!("{ext_name}.spec")))?; + f.write_all(out.as_bytes())?; + Ok(()) +} + +fn gen_bind(in_file: String, ext_name: &str) -> io::Result<()> { + let f = File::open(in_file.clone()).unwrap_or_else(|_| panic!("Failed to open {in_file}")); + let f = BufReader::new(f); + + let target: TargetFeature = TargetFeature::new(ext_name); + let mut para_num; + let mut current_name: Option = None; + let mut asm_fmts: Vec = Vec::new(); + let mut link_function_str = String::new(); + let mut function_str = String::new(); + let mut out = String::new(); + + out.push_str(&format!( + r#"// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `{in_file}` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- {in_file} +// ``` + +use super::types::*; +"# + )); + + out.push_str( + r#" +#[allow(improper_ctypes)] +unsafe extern "unadjusted" { +"#, + ); + + for line in f.lines() { + let line = line.unwrap(); + if line.is_empty() { + continue; + } + if let Some(name) = line.strip_prefix("name = ") { + current_name = Some(String::from(name)); + } else if line.starts_with("asm-fmts = ") { + asm_fmts = line[10..] + .split(',') + .map(|v| v.trim().to_string()) + .collect(); + } else if line.starts_with("data-types = ") { + let current_name = current_name.clone().unwrap(); + let data_types: Vec<&str> = line + .get(12..) + .unwrap() + .split(',') + .map(|e| e.trim()) + .collect(); + let in_t; + let out_t; + if data_types.len() == 2 { + in_t = [data_types[1], "NULL", "NULL", "NULL"]; + out_t = data_types[0]; + para_num = 1; + } else if data_types.len() == 3 { + in_t = [data_types[1], data_types[2], "NULL", "NULL"]; + out_t = data_types[0]; + para_num = 2; + } else if data_types.len() == 4 { + in_t = [data_types[1], data_types[2], data_types[3], "NULL"]; + out_t = data_types[0]; + para_num = 3; + } else if data_types.len() == 5 { + in_t = [data_types[1], data_types[2], data_types[3], data_types[4]]; + out_t = data_types[0]; + para_num = 4; + } else { + panic!("DEBUG: line: {0} len: {1}", line, data_types.len()); + } + + let (link_function, function) = + gen_bind_body(¤t_name, &asm_fmts, &in_t, out_t, para_num, target); + link_function_str.push_str(&link_function); + function_str.push_str(&function); + } + } + out.push_str(&link_function_str); + out.push_str("}\n"); + out.push_str(&function_str); + + let out_path: PathBuf = + PathBuf::from(env::var("OUT_DIR").unwrap_or("crates/core_arch".to_string())) + .join("src") + .join("loongarch64") + .join(ext_name); + std::fs::create_dir_all(&out_path)?; + + let mut file = File::create(out_path.join("generated.rs"))?; + file.write_all(out.as_bytes())?; + Ok(()) +} + +fn gen_bind_body( + current_name: &str, + asm_fmts: &[String], + in_t: &[&str; 4], + out_t: &str, + para_num: i32, + target: TargetFeature, +) -> (String, String) { + let type_to_rst = |t: &str, s: bool| -> &str { + match (t, s) { + ("V16QI", _) => "v16i8", + ("V32QI", _) => "v32i8", + ("V8HI", _) => "v8i16", + ("V16HI", _) => "v16i16", + ("V4SI", _) => "v4i32", + ("V8SI", _) => "v8i32", + ("V2DI", _) => "v2i64", + ("V4DI", _) => "v4i64", + ("UV16QI", _) => "v16u8", + ("UV32QI", _) => "v32u8", + ("UV8HI", _) => "v8u16", + ("UV16HI", _) => "v16u16", + ("UV4SI", _) => "v4u32", + ("UV8SI", _) => "v8u32", + ("UV2DI", _) => "v2u64", + ("UV4DI", _) => "v4u64", + ("SI", _) => "i32", + ("DI", _) => "i64", + ("USI", _) => "u32", + ("UDI", _) => "u64", + ("V4SF", _) => "v4f32", + ("V8SF", _) => "v8f32", + ("V2DF", _) => "v2f64", + ("V4DF", _) => "v4f64", + ("UQI", _) => "u32", + ("QI", _) => "i32", + ("CVPOINTER", false) => "*const i8", + ("CVPOINTER", true) => "*mut i8", + ("HI", _) => "i32", + (_, _) => panic!("unknown type: {t}"), + } + }; + + let is_store = current_name.to_string().contains("vst"); + let link_function = { + let fn_decl = { + let fn_output = if out_t.to_lowercase() == "void" { + String::new() + } else { + format!("-> {}", type_to_rst(out_t, is_store)) + }; + let fn_inputs = match para_num { + 1 => format!("(a: {})", type_to_rst(in_t[0], is_store)), + 2 => format!( + "(a: {}, b: {})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store) + ), + 3 => format!( + "(a: {}, b: {}, c: {})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + type_to_rst(in_t[2], is_store) + ), + 4 => format!( + "(a: {}, b: {}, c: {}, d: {})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + type_to_rst(in_t[2], is_store), + type_to_rst(in_t[3], is_store) + ), + _ => panic!("unsupported parameter number"), + }; + format!("fn __{current_name}{fn_inputs} {fn_output};") + }; + let function = format!( + r#" #[link_name = "llvm.loongarch.{}"] + {fn_decl} +"#, + current_name.replace('_', ".") + ); + function + }; + + let type_to_imm = |t| -> i8 { + match t { + 'b' => 4, + 'h' => 3, + 'w' => 2, + 'd' => 1, + _ => panic!("unsupported type"), + } + }; + let mut rustc_legacy_const_generics = ""; + let fn_decl = { + let fn_output = if out_t.to_lowercase() == "void" { + String::new() + } else { + format!("-> {} ", type_to_rst(out_t, is_store)) + }; + let mut fn_inputs = match para_num { + 1 => format!("(a: {})", type_to_rst(in_t[0], is_store)), + 2 => format!( + "(a: {}, b: {})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store) + ), + 3 => format!( + "(a: {}, b: {}, c: {})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + type_to_rst(in_t[2], is_store) + ), + 4 => format!( + "(a: {}, b: {}, c: {}, d: {})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + type_to_rst(in_t[2], is_store), + type_to_rst(in_t[3], is_store) + ), + _ => panic!("unsupported parameter number"), + }; + if para_num == 1 && in_t[0] == "HI" { + fn_inputs = match asm_fmts[1].as_str() { + "si13" | "i13" => format!("()", type_to_rst(in_t[0], is_store)), + "si10" => format!("()", type_to_rst(in_t[0], is_store)), + _ => panic!("unsupported assembly format: {}", asm_fmts[1]), + }; + rustc_legacy_const_generics = "rustc_legacy_const_generics(0)"; + } else if para_num == 2 && (in_t[1] == "UQI" || in_t[1] == "USI") { + fn_inputs = if asm_fmts[2].starts_with("ui") { + format!( + "(a: {0})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + asm_fmts[2].get(2..).unwrap() + ) + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + }; + rustc_legacy_const_generics = "rustc_legacy_const_generics(1)"; + } else if para_num == 2 && in_t[1] == "QI" { + fn_inputs = if asm_fmts[2].starts_with("si") { + format!( + "(a: {0})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + asm_fmts[2].get(2..).unwrap() + ) + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + }; + rustc_legacy_const_generics = "rustc_legacy_const_generics(1)"; + } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "SI" { + fn_inputs = if asm_fmts[2].starts_with("si") { + format!( + "(mem_addr: {0})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + asm_fmts[2].get(2..).unwrap() + ) + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + }; + rustc_legacy_const_generics = "rustc_legacy_const_generics(1)"; + } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "DI" { + fn_inputs = match asm_fmts[2].as_str() { + "rk" => format!( + "(mem_addr: {}, b: {})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store) + ), + _ => panic!("unsupported assembly format: {}", asm_fmts[2]), + }; + } else if para_num == 3 && (in_t[2] == "USI" || in_t[2] == "UQI") { + fn_inputs = if asm_fmts[2].starts_with("ui") { + format!( + "(a: {0}, b: {1})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + type_to_rst(in_t[2], is_store), + asm_fmts[2].get(2..).unwrap() + ) + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]) + }; + rustc_legacy_const_generics = "rustc_legacy_const_generics(2)"; + } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "SI" { + fn_inputs = match asm_fmts[2].as_str() { + "si12" => format!( + "(a: {0}, mem_addr: {1})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + type_to_rst(in_t[2], is_store) + ), + _ => panic!("unsupported assembly format: {}", asm_fmts[2]), + }; + rustc_legacy_const_generics = "rustc_legacy_const_generics(2)"; + } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "DI" { + fn_inputs = match asm_fmts[2].as_str() { + "rk" => format!( + "(a: {}, mem_addr: {}, b: {})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + type_to_rst(in_t[2], is_store) + ), + _ => panic!("unsupported assembly format: {}", asm_fmts[2]), + }; + } else if para_num == 4 { + fn_inputs = match (asm_fmts[2].as_str(), current_name.chars().last().unwrap()) { + ("si8", t) => format!( + "(a: {0}, mem_addr: {1})", + type_to_rst(in_t[0], is_store), + type_to_rst(in_t[1], is_store), + type_to_rst(in_t[2], is_store), + type_to_rst(in_t[3], is_store), + type_to_imm(t), + ), + (_, _) => panic!( + "unsupported assembly format: {} for {}", + asm_fmts[2], current_name + ), + }; + rustc_legacy_const_generics = "rustc_legacy_const_generics(2, 3)"; + } + format!("pub unsafe fn {current_name}{fn_inputs} {fn_output}") + }; + let mut call_params = { + match para_num { + 1 => format!("__{current_name}(a)"), + 2 => format!("__{current_name}(a, b)"), + 3 => format!("__{current_name}(a, b, c)"), + 4 => format!("__{current_name}(a, b, c, d)"), + _ => panic!("unsupported parameter number"), + } + }; + if para_num == 1 && in_t[0] == "HI" { + call_params = match asm_fmts[1].as_str() { + "si10" => { + format!("static_assert_simm_bits!(IMM_S10, 10);\n __{current_name}(IMM_S10)") + } + "i13" => { + format!("static_assert_simm_bits!(IMM_S13, 13);\n __{current_name}(IMM_S13)") + } + _ => panic!("unsupported assembly format: {}", asm_fmts[2]), + } + } else if para_num == 2 && (in_t[1] == "UQI" || in_t[1] == "USI") { + call_params = if asm_fmts[2].starts_with("ui") { + format!( + "static_assert_uimm_bits!(IMM{0}, {0});\n __{current_name}(a, IMM{0})", + asm_fmts[2].get(2..).unwrap() + ) + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]) + }; + } else if para_num == 2 && in_t[1] == "QI" { + call_params = match asm_fmts[2].as_str() { + "si5" => { + format!("static_assert_simm_bits!(IMM_S5, 5);\n __{current_name}(a, IMM_S5)") + } + _ => panic!("unsupported assembly format: {}", asm_fmts[2]), + }; + } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "SI" { + call_params = if asm_fmts[2].starts_with("si") { + format!( + "static_assert_simm_bits!(IMM_S{0}, {0});\n __{current_name}(mem_addr, IMM_S{0})", + asm_fmts[2].get(2..).unwrap() + ) + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]) + } + } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "DI" { + call_params = match asm_fmts[2].as_str() { + "rk" => format!("__{current_name}(mem_addr, b)"), + _ => panic!("unsupported assembly format: {}", asm_fmts[2]), + }; + } else if para_num == 3 && (in_t[2] == "USI" || in_t[2] == "UQI") { + call_params = if asm_fmts[2].starts_with("ui") { + format!( + "static_assert_uimm_bits!(IMM{0}, {0});\n __{current_name}(a, b, IMM{0})", + asm_fmts[2].get(2..).unwrap() + ) + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]) + } + } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "SI" { + call_params = match asm_fmts[2].as_str() { + "si12" => format!( + "static_assert_simm_bits!(IMM_S12, 12);\n __{current_name}(a, mem_addr, IMM_S12)" + ), + _ => panic!("unsupported assembly format: {}", asm_fmts[2]), + }; + } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "DI" { + call_params = match asm_fmts[2].as_str() { + "rk" => format!("__{current_name}(a, mem_addr, b)"), + _ => panic!("unsupported assembly format: {}", asm_fmts[2]), + }; + } else if para_num == 4 { + call_params = match (asm_fmts[2].as_str(), current_name.chars().last().unwrap()) { + ("si8", t) => format!( + "static_assert_simm_bits!(IMM_S8, 8);\n static_assert_uimm_bits!(IMM{0}, {0});\n __{current_name}(a, mem_addr, IMM_S8, IMM{0})", + type_to_imm(t) + ), + (_, _) => panic!( + "unsupported assembly format: {} for {}", + asm_fmts[2], current_name + ), + } + } + let function = if !rustc_legacy_const_generics.is_empty() { + format!( + r#" +#[inline]{target_feature} +#[{rustc_legacy_const_generics}] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +{fn_decl}{{ + {call_params} +}} +"#, + target_feature = target.to_target_feature_attr(current_name) + ) + } else { + format!( + r#" +#[inline]{target_feature} +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +{fn_decl}{{ + {call_params} +}} +"#, + target_feature = target.to_target_feature_attr(current_name) + ) + }; + (link_function, function) +} + +fn gen_test(in_file: String, ext_name: &str) -> io::Result<()> { + let f = File::open(in_file.clone()).unwrap_or_else(|_| panic!("Failed to open {in_file}")); + let f = BufReader::new(f); + + let target: TargetFeature = TargetFeature::new(ext_name); + let mut para_num; + let mut current_name: Option = None; + let mut asm_fmts: Vec = Vec::new(); + let mut impl_function_str = String::new(); + let mut call_function_str = String::new(); + let mut out = String::new(); + + out.push_str(&format!( + r#"/* + * This code is automatically generated. DO NOT MODIFY. + * + * Instead, modify `{in_file}` and run the following command to re-generate this file: + * + * ``` + * OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- {in_file} test + * ``` + */ + +#include +#include +#include +#include + +union v16qi +{{ + __m128i v; + int64_t i64[2]; + int8_t i8[16]; +}}; + +union v32qi +{{ + __m256i v; + int64_t i64[4]; + int8_t i8[32]; +}}; + +union v8hi +{{ + __m128i v; + int64_t i64[2]; + int16_t i16[8]; +}}; + +union v16hi +{{ + __m256i v; + int64_t i64[4]; + int16_t i16[16]; +}}; + +union v4si +{{ + __m128i v; + int64_t i64[2]; + int32_t i32[4]; +}}; + +union v8si +{{ + __m256i v; + int64_t i64[4]; + int32_t i32[8]; +}}; + +union v2di +{{ + __m128i v; + int64_t i64[2]; +}}; + +union v4di +{{ + __m256i v; + int64_t i64[4]; +}}; + +union uv16qi +{{ + __m128i v; + uint64_t i64[2]; + uint8_t i8[16]; +}}; + +union uv32qi +{{ + __m256i v; + uint64_t i64[4]; + uint8_t i8[32]; +}}; + +union uv8hi +{{ + __m128i v; + uint64_t i64[2]; + uint16_t i16[8]; +}}; + +union uv16hi +{{ + __m256i v; + uint64_t i64[4]; + uint16_t i16[16]; +}}; + +union uv4si +{{ + __m128i v; + uint64_t i64[2]; + uint32_t i32[4]; +}}; + +union uv8si +{{ + __m256i v; + uint64_t i64[4]; + uint32_t i32[8]; +}}; + +union uv2di +{{ + __m128i v; + uint64_t i64[2]; +}}; + +union uv4di +{{ + __m256i v; + uint64_t i64[4]; +}}; + +union v4sf +{{ + __m128 v; + int64_t i64[2]; + uint32_t i32[2]; + float f32[4]; +}}; + +union v8sf +{{ + __m256 v; + int64_t i64[4]; + uint32_t i32[4]; + float f32[8]; +}}; + +union v2df +{{ + __m128d v; + uint64_t i64[2]; + double f64[2]; +}}; + +union v4df +{{ + __m256d v; + uint64_t i64[4]; + double f64[4]; +}}; +"# + )); + + for line in f.lines() { + let line = line.unwrap(); + if line.is_empty() { + continue; + } + if let Some(name) = line.strip_prefix("name = ") { + current_name = Some(String::from(name)); + } else if line.starts_with("asm-fmts = ") { + asm_fmts = line[10..] + .split(',') + .map(|v| v.trim().to_string()) + .collect(); + } else if line.starts_with("data-types = ") { + let current_name = current_name.clone().unwrap(); + let data_types: Vec<&str> = line + .get(12..) + .unwrap() + .split(',') + .map(|e| e.trim()) + .collect(); + let in_t; + let out_t; + if data_types.len() == 2 { + in_t = [data_types[1], "NULL", "NULL", "NULL"]; + out_t = data_types[0]; + para_num = 1; + } else if data_types.len() == 3 { + in_t = [data_types[1], data_types[2], "NULL", "NULL"]; + out_t = data_types[0]; + para_num = 2; + } else if data_types.len() == 4 { + in_t = [data_types[1], data_types[2], data_types[3], "NULL"]; + out_t = data_types[0]; + para_num = 3; + } else if data_types.len() == 5 { + in_t = [data_types[1], data_types[2], data_types[3], data_types[4]]; + out_t = data_types[0]; + para_num = 4; + } else { + panic!("DEBUG: line: {0} len: {1}", line, data_types.len()); + } + + let (link_function, function) = + gen_test_body(¤t_name, &asm_fmts, &in_t, out_t, para_num, target); + impl_function_str.push_str(&link_function); + call_function_str.push_str(&function); + } + } + out.push_str(&impl_function_str); + out.push('\n'); + out.push_str("int main(int argc, char *argv[])\n"); + out.push_str("{\n"); + out.push_str(" printf(\"// This code is automatically generated. DO NOT MODIFY.\\n\");\n"); + out.push_str(" printf(\"// See crates/stdarch-gen-loongarch/README.md\\n\\n\");\n"); + out.push_str(" printf(\"use crate::{\\n\");\n"); + out.push_str(" printf(\" core_arch::{loongarch64::*, simd::*},\\n\");\n"); + out.push_str(" printf(\" mem::transmute,\\n\");\n"); + out.push_str(" printf(\"};\\n\");\n"); + out.push_str(" printf(\"use stdarch_test::simd_test;\\n\");\n"); + out.push_str(&call_function_str); + out.push_str(" return 0;\n"); + out.push('}'); + + let out_dir_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap()); + std::fs::create_dir_all(&out_dir_path)?; + let mut f = File::create(out_dir_path.join(format!("{ext_name}.c")))?; + f.write_all(out.as_bytes())?; + Ok(()) +} + +fn gen_test_body( + current_name: &str, + asm_fmts: &[String], + in_t: &[&str; 4], + out_t: &str, + para_num: i32, + target: TargetFeature, +) -> (String, String) { + let rand_i32 = |bits: u8| -> i32 { + let val = rand::random::(); + let bits = 32 - bits; + (val << bits) >> bits + }; + let rand_u32 = |bits: u8| -> u32 { + let val = rand::random::(); + let bits = 32 - bits; + (val << bits) >> bits + }; + let rand_i64 = || -> i64 { rand::random::() }; + let rand_u64 = || -> u64 { rand::random::() }; + let rand_f32 = || -> f32 { rand::random::() }; + let rand_f64 = || -> f64 { rand::random::() }; + let type_to_ct = |t: &str| -> &str { + match t { + "V16QI" => "union v16qi", + "V32QI" => "union v32qi", + "V8HI" => "union v8hi", + "V16HI" => "union v16hi", + "V4SI" => "union v4si", + "V8SI" => "union v8si", + "V2DI" => "union v2di", + "V4DI" => "union v4di", + "UV16QI" => "union uv16qi", + "UV32QI" => "union uv32qi", + "UV8HI" => "union uv8hi", + "UV16HI" => "union uv16hi", + "UV4SI" => "union uv4si", + "UV8SI" => "union uv8si", + "UV2DI" => "union uv2di", + "UV4DI" => "union uv4di", + "SI" => "int32_t", + "DI" => "int64_t", + "USI" => "uint32_t", + "UDI" => "uint64_t", + "V4SF" => "union v4sf", + "V8SF" => "union v8sf", + "V2DF" => "union v2df", + "V4DF" => "union v4df", + "UQI" => "uint32_t", + "QI" => "int32_t", + "CVPOINTER" => "void*", + "HI" => "int32_t", + _ => panic!("unknown type: {t}"), + } + }; + let type_to_va = |v: &str, t: &str| -> String { + let n = if v.starts_with('_') { + v.get(1..).unwrap() + } else { + v + }; + let mut out = String::new(); + match t { + "A16QI" => { + for i in 0..16 { + out.push_str(&format!(" {v}.i8[{i}] = {};\n", rand_i32(8))); + } + out.push_str(&format!(" printf(\" let {n}: [i8; 16] = [%d")); + for _ in 1..16 { + out.push_str(", %d"); + } + out.push_str(&format!("];\\n\",\n {v}.i8[0]")); + for i in 1..16 { + out.push_str(&format!(", {v}.i8[{i}]")); + } + } + "AM16QI" => { + for i in 0..16 { + out.push_str(&format!(" {v}.i8[{i}] = {};\n", rand_i32(8))); + } + out.push_str(&format!(" printf(\" let mut {n}: [i8; 16] = [%d")); + for _ in 1..16 { + out.push_str(", %d"); + } + out.push_str(&format!("];\\n\",\n {v}.i8[0]")); + for i in 1..16 { + out.push_str(&format!(", {v}.i8[{i}]")); + } + } + "V16QI" => { + for i in 0..16 { + out.push_str(&format!(" {v}.i8[{i}] = {};\n", rand_i32(8))); + } + out.push_str(&format!(" printf(\" let {n} = i8x16::new(%d")); + for _ in 1..16 { + out.push_str(", %d"); + } + out.push_str(&format!(");\\n\",\n {v}.i8[0]")); + for i in 1..16 { + out.push_str(&format!(", {v}.i8[{i}]")); + } + } + "V32QI" => { + for i in 0..32 { + out.push_str(&format!(" {v}.i8[{i}] = {};\n", rand_i32(8))); + } + out.push_str(&format!(" printf(\" let {n} = i8x32::new(%d")); + for _ in 1..32 { + out.push_str(", %d"); + } + out.push_str(&format!(");\\n\",\n {v}.i8[0]")); + for i in 1..32 { + out.push_str(&format!(", {v}.i8[{i}]")); + } + } + "A32QI" => { + for i in 0..32 { + out.push_str(&format!(" {v}.i8[{i}] = {};\n", rand_i32(8))); + } + out.push_str(&format!(" printf(\" let {n}: [i8; 32] = [%d")); + for _ in 1..32 { + out.push_str(", %d"); + } + out.push_str(&format!("];\\n\",\n {v}.i8[0]")); + for i in 1..32 { + out.push_str(&format!(", {v}.i8[{i}]")); + } + } + "AM32QI" => { + for i in 0..32 { + out.push_str(&format!(" {v}.i8[{i}] = {};\n", rand_i32(8))); + } + out.push_str(&format!(" printf(\" let mut {n}: [i8; 32] = [%d")); + for _ in 1..32 { + out.push_str(", %d"); + } + out.push_str(&format!("];\\n\",\n {v}.i8[0]")); + for i in 1..32 { + out.push_str(&format!(", {v}.i8[{i}]")); + } + } + "V8HI" => { + for i in 0..8 { + out.push_str(&format!(" {v}.i16[{i}] = {};\n", rand_i32(16))); + } + out.push_str(&format!(" printf(\" let {n} = i16x8::new(%d")); + for _ in 1..8 { + out.push_str(", %d"); + } + out.push_str(&format!(");\\n\",\n {v}.i16[0]")); + for i in 1..8 { + out.push_str(&format!(", {v}.i16[{i}]")); + } + } + "V16HI" => { + for i in 0..16 { + out.push_str(&format!(" {v}.i16[{i}] = {};\n", rand_i32(16))); + } + out.push_str(&format!(" printf(\" let {n} = i16x16::new(%d")); + for _ in 1..16 { + out.push_str(", %d"); + } + out.push_str(&format!(");\\n\",\n {v}.i16[0]")); + for i in 1..16 { + out.push_str(&format!(", {v}.i16[{i}]")); + } + } + "V4SI" => { + for i in 0..4 { + out.push_str(&format!(" {v}.i32[{i}] = {};\n", rand_i32(32))); + } + out.push_str(&format!(" printf(\" let {n} = i32x4::new(%d")); + for _ in 1..4 { + out.push_str(", %d"); + } + out.push_str(&format!(");\\n\",\n {v}.i32[0]")); + for i in 1..4 { + out.push_str(&format!(", {v}.i32[{i}]")); + } + } + "V8SI" => { + for i in 0..8 { + out.push_str(&format!(" {v}.i32[{i}] = {};\n", rand_i32(32))); + } + out.push_str(&format!(" printf(\" let {n} = i32x8::new(%d")); + for _ in 1..8 { + out.push_str(", %d"); + } + out.push_str(&format!(");\\n\",\n {v}.i32[0]")); + for i in 1..8 { + out.push_str(&format!(", {v}.i32[{i}]")); + } + } + "V2DI" => { + for i in 0..2 { + out.push_str(&format!(" {v}.i64[{i}] = {}L;\n", rand_i64())); + } + out.push_str(&format!(" printf(\" let {n} = i64x2::new(%ld")); + for _ in 1..2 { + out.push_str(", %ld"); + } + out.push_str(&format!(");\\n\",\n {v}.i64[0]")); + for i in 1..2 { + out.push_str(&format!(", {v}.i64[{i}]")); + } + } + "V4DI" => { + for i in 0..4 { + out.push_str(&format!(" {v}.i64[{i}] = {}L;\n", rand_i64())); + } + out.push_str(&format!(" printf(\" let {n} = i64x4::new(%ld")); + for _ in 1..4 { + out.push_str(", %ld"); + } + out.push_str(&format!(");\\n\",\n {v}.i64[0]")); + for i in 1..4 { + out.push_str(&format!(", {v}.i64[{i}]")); + } + } + "UV16QI" => { + for i in 0..16 { + out.push_str(&format!(" {v}.i8[{i}] = {};\n", rand_u32(8))); + } + out.push_str(&format!(" printf(\" let {n} = u8x16::new(%u")); + for _ in 1..16 { + out.push_str(", %u"); + } + out.push_str(&format!(");\\n\",\n {v}.i8[0]")); + for i in 1..16 { + out.push_str(&format!(", {v}.i8[{i}]")); + } + } + "UV32QI" => { + for i in 0..32 { + out.push_str(&format!(" {v}.i8[{i}] = {};\n", rand_u32(8))); + } + out.push_str(&format!(" printf(\" let {n} = u8x32::new(%u")); + for _ in 1..32 { + out.push_str(", %u"); + } + out.push_str(&format!(");\\n\",\n {v}.i8[0]")); + for i in 1..32 { + out.push_str(&format!(", {v}.i8[{i}]")); + } + } + "UV8HI" => { + for i in 0..8 { + out.push_str(&format!(" {v}.i16[{i}] = {};\n", rand_u32(16))); + } + out.push_str(&format!(" printf(\" let {n} = u16x8::new(%u")); + for _ in 1..8 { + out.push_str(", %u"); + } + out.push_str(&format!(");\\n\",\n {v}.i16[0]")); + for i in 1..8 { + out.push_str(&format!(", {v}.i16[{i}]")); + } + } + "UV16HI" => { + for i in 0..16 { + out.push_str(&format!(" {v}.i16[{i}] = {};\n", rand_u32(16))); + } + out.push_str(&format!(" printf(\" let {n} = u16x16::new(%u")); + for _ in 1..16 { + out.push_str(", %u"); + } + out.push_str(&format!(");\\n\",\n {v}.i16[0]")); + for i in 1..16 { + out.push_str(&format!(", {v}.i16[{i}]")); + } + } + "UV4SI" => { + for i in 0..4 { + out.push_str(&format!(" {v}.i32[{i}] = {};\n", rand_u32(32))); + } + out.push_str(&format!(" printf(\" let {n} = u32x4::new(%u")); + for _ in 1..4 { + out.push_str(", %u"); + } + out.push_str(&format!(");\\n\",\n {v}.i32[0]")); + for i in 1..4 { + out.push_str(&format!(", {v}.i32[{i}]")); + } + } + "UV8SI" => { + for i in 0..8 { + out.push_str(&format!(" {v}.i32[{i}] = {};\n", rand_u32(32))); + } + out.push_str(&format!(" printf(\" let {n} = u32x8::new(%u")); + for _ in 1..8 { + out.push_str(", %u"); + } + out.push_str(&format!(");\\n\",\n {v}.i32[0]")); + for i in 1..8 { + out.push_str(&format!(", {v}.i32[{i}]")); + } + } + "UV2DI" => { + for i in 0..2 { + out.push_str(&format!(" {v}.i64[{i}] = {}UL;\n", rand_u64())); + } + out.push_str(&format!(" printf(\" let {n} = u64x2::new(%lu")); + for _ in 1..2 { + out.push_str(", %lu"); + } + out.push_str(&format!(");\\n\",\n {v}.i64[0]")); + for i in 1..2 { + out.push_str(&format!(", {v}.i64[{i}]")); + } + } + "UV4DI" => { + for i in 0..4 { + out.push_str(&format!(" {v}.i64[{i}] = {}UL;\n", rand_u64())); + } + out.push_str(&format!(" printf(\" let {n} = u64x4::new(%lu")); + for _ in 1..4 { + out.push_str(", %lu"); + } + out.push_str(&format!(");\\n\",\n {v}.i64[0]")); + for i in 1..4 { + out.push_str(&format!(", {v}.i64[{i}]")); + } + } + "V4SF" => { + for i in 0..4 { + out.push_str(&format!(" {v}.f32[{i}] = {};\n", rand_f32())); + } + out.push_str(&format!(" printf(\" let {n} = u32x4::new(%u")); + for _ in 1..4 { + out.push_str(", %u"); + } + out.push_str(&format!(");\\n\",\n {v}.i32[0]")); + for i in 1..4 { + out.push_str(&format!(", {v}.i32[{i}]")); + } + } + "V8SF" => { + for i in 0..8 { + out.push_str(&format!(" {v}.f32[{i}] = {};\n", rand_f32())); + } + out.push_str(&format!(" printf(\" let {n} = u32x8::new(%u")); + for _ in 1..8 { + out.push_str(", %u"); + } + out.push_str(&format!(");\\n\",\n {v}.i32[0]")); + for i in 1..8 { + out.push_str(&format!(", {v}.i32[{i}]")); + } + } + "V2DF" => { + for i in 0..2 { + out.push_str(&format!(" {v}.f64[{i}] = {};\n", rand_f64())); + } + out.push_str(&format!(" printf(\" let {n} = u64x2::new(%lu")); + for _ in 1..2 { + out.push_str(", %lu"); + } + out.push_str(&format!(");\\n\",\n {v}.i64[0]")); + for i in 1..2 { + out.push_str(&format!(", {v}.i64[{i}]")); + } + } + "V4DF" => { + for i in 0..4 { + out.push_str(&format!(" {v}.f64[{i}] = {};\n", rand_f64())); + } + out.push_str(&format!(" printf(\" let {n} = u64x4::new(%lu")); + for _ in 1..4 { + out.push_str(", %lu"); + } + out.push_str(&format!(");\\n\",\n {v}.i64[0]")); + for i in 1..4 { + out.push_str(&format!(", {v}.i64[{i}]")); + } + } + "SI" | "DI" | "USI" | "UDI" | "UQI" | "QI" | "CVPOINTER" | "HI" => (), + _ => panic!("unknown type: {t}"), + } + if !out.is_empty() { + out.push_str(");"); + } + out + }; + let type_to_rp = |t: &str| -> &str { + match t { + "SI" => " printf(\" let r: i32 = %d;\\n\", o);", + "DI" => " printf(\" let r: i64 = %ld;\\n\", o);", + "USI" => " printf(\" let r: u32 = %u;\\n\", o);", + "UDI" => " printf(\" let r: u64 = %lu;\\n\", o);", + "UQI" => " printf(\" let r: u32 = %u;\\n\", o);", + "QI" => " printf(\" let r: i32 = %d;\\n\", o);", + "HI" => " printf(\" let r: i32 = %d;\\n\", o);", + "V32QI" | "V16HI" | "V8SI" | "V4DI" | "UV32QI" | "UV16HI" | "UV8SI" | "UV4DI" + | "V8SF" | "V4DF" => { + " printf(\" let r = i64x4::new(%ld, %ld, %ld, %ld);\\n\", o.i64[0], o.i64[1], o.i64[2], o.i64[3]);" + } + _ => " printf(\" let r = i64x2::new(%ld, %ld);\\n\", o.i64[0], o.i64[1]);", + } + }; + let type_to_rx = |t: &str| -> &str { + match t { + "SI" | "DI" | "USI" | "UDI" | "UQI" | "QI" | "HI" => "o", + _ => "o.v", + } + }; + let type_to_imm = |t| -> i8 { + match t { + 'b' => 4, + 'h' => 3, + 'w' => 2, + 'd' => 1, + _ => panic!("unsupported type"), + } + }; + + let impl_function = { + let fn_output = if out_t.to_lowercase() == "void" { + String::new() + } else { + format!(" {} o;", type_to_ct(out_t)) + }; + let mut fn_inputs = match para_num { + 1 => format!( + " {} a;\n{}", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]) + ), + 2 => format!( + " {} a;\n{}\n {} b;\n{}", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]), + type_to_ct(in_t[1]), + type_to_va("b", in_t[1]) + ), + 3 => format!( + " {} a;\n{}\n {} b;\n{}\n {} c;\n{}", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]), + type_to_ct(in_t[1]), + type_to_va("b", in_t[1]), + type_to_ct(in_t[2]), + type_to_va("c", in_t[2]) + ), + 4 => format!( + " {} a;\n{}\n {} b;\n{}\n {} c;\n{}\n {} d;\n{}", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]), + type_to_ct(in_t[1]), + type_to_va("b", in_t[1]), + type_to_ct(in_t[2]), + type_to_va("c", in_t[2]), + type_to_ct(in_t[3]), + type_to_va("d", in_t[3]) + ), + _ => panic!("unsupported parameter number"), + }; + let mut fn_params = match para_num { + 1 => "(a.v)".to_string(), + 2 => "(a.v, b.v)".to_string(), + 3 => "(a.v, b.v, c.v)".to_string(), + 4 => "(a.v, b.v, c.v, d.v)".to_string(), + _ => "unsupported parameter number".to_string(), + }; + let mut as_params = match para_num { + 1 => "(transmute(a))".to_string(), + 2 => "(transmute(a), transmute(b))".to_string(), + 3 => "(transmute(a), transmute(b), transmute(c))".to_string(), + 4 => "(transmute(a), transmute(b), transmute(c), transmute(d))".to_string(), + _ => panic!("unsupported parameter number"), + }; + let mut as_args = String::new(); + if para_num == 1 && in_t[0] == "HI" { + fn_inputs = "".to_string(); + match asm_fmts[1].as_str() { + "si13" => { + let val = rand_i32(13); + fn_params = format!("({val})"); + as_params = format!("::<{val}>()"); + } + "i13" => { + let val = rand_u32(12); + fn_params = format!("({val})"); + as_params = format!("::<{val}>()"); + } + "si10" => { + let val = rand_i32(10); + fn_params = format!("({val})"); + as_params = format!("::<{val}>()"); + } + _ => panic!("unsupported assembly format: {}", asm_fmts[1]), + } + } else if para_num == 1 + && (in_t[0] == "SI" || in_t[0] == "DI") + && asm_fmts[1].starts_with("rj") + { + fn_params = "(a)".to_string(); + if in_t[0] == "SI" { + as_params = "(%d)".to_string(); + } else { + as_params = "(%ld)".to_string(); + } + as_args = ", a".to_string(); + } else if para_num == 2 && (in_t[1] == "UQI" || in_t[1] == "USI") { + if asm_fmts[2].starts_with("ui") { + fn_inputs = format!( + " {} a;\n{}", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]) + ); + let val = rand_u32(asm_fmts[2].get(2..).unwrap().parse::().unwrap()); + fn_params = format!("(a.v, {val})"); + as_params = format!("::<{val}>(transmute(a))"); + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + } + } else if para_num == 2 && in_t[1] == "QI" { + if asm_fmts[2].starts_with("si") { + fn_inputs = format!( + " {} a;\n{}", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]) + ); + let val = rand_i32(asm_fmts[2].get(2..).unwrap().parse::().unwrap()); + fn_params = format!("(a.v, {val})"); + as_params = format!("::<{val}>(transmute(a))"); + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + } + } else if para_num == 2 && in_t[1] == "SI" && asm_fmts[2].starts_with("rk") { + fn_params = "(a.v, b)".to_string(); + as_params = "(transmute(a), %d)".to_string(); + as_args = ", b".to_string(); + } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "SI" { + if asm_fmts[2].starts_with("si") { + fn_inputs = format!( + " union v{}qi _a;\n{}\n {} a = &_a;", + target.bytes(), + type_to_va( + "_a", + if target == TargetFeature::Lsx { + "A16QI" + } else { + "A32QI" + } + ), + type_to_ct(in_t[0]) + ); + fn_params = "(a, 0)".to_string(); + as_params = "::<0>(a.as_ptr())".to_string(); + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + } + } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "DI" { + if asm_fmts[2].as_str() == "rk" { + fn_inputs = format!( + " union v{}qi _a;\n{}\n {} a = &_a;", + target.bytes(), + type_to_va( + "_a", + if target == TargetFeature::Lsx { + "A16QI" + } else { + "A32QI" + } + ), + type_to_ct(in_t[0]) + ); + fn_params = "(a, 0)".to_string(); + as_params = "(a.as_ptr(), 0)".to_string(); + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + } + } else if para_num == 3 && in_t[2] == "UQI" && asm_fmts[1].starts_with("rj") { + if asm_fmts[2].starts_with("ui") { + fn_inputs = format!( + " {} a;\n{}", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]) + ); + let ival = rand_i32(32); + let uval = rand_u32(asm_fmts[2].get(2..).unwrap().parse::().unwrap()); + fn_params = format!("(a.v, {ival}, {uval})"); + as_params = format!("::<{uval}>(transmute(a), {ival})"); + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + } + } else if para_num == 3 && (in_t[2] == "USI" || in_t[2] == "UQI") { + if asm_fmts[2].starts_with("ui") { + fn_inputs = format!( + " {} a;\n{}\n {} b;\n{}", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]), + type_to_ct(in_t[1]), + type_to_va("b", in_t[1]), + ); + let val = rand_u32(asm_fmts[2].get(2..).unwrap().parse::().unwrap()); + fn_params = format!("(a.v, b.v, {val})"); + as_params = format!("::<{val}>(transmute(a), transmute(b))"); + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + } + } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "SI" { + if asm_fmts[2].as_str() == "si12" { + fn_inputs = format!( + " {} a;\n{}\n union v{}qi o;\n{}\n {} b = &o;", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]), + target.bytes(), + type_to_va( + "o", + if target == TargetFeature::Lsx { + "AM16QI" + } else { + "AM32QI" + } + ), + type_to_ct(in_t[1]) + ); + fn_params = "(a.v, b, 0)".to_string(); + as_params = "::<0>(transmute(a), o.as_mut_ptr())".to_string(); + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + } + } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "DI" { + if asm_fmts[2].as_str() == "rk" { + fn_inputs = format!( + " {} a;\n{}\n union v{}qi o;\n{}\n {} b = &o;", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]), + target.bytes(), + type_to_va( + "o", + if target == TargetFeature::Lsx { + "AM16QI" + } else { + "AM32QI" + } + ), + type_to_ct(in_t[1]) + ); + fn_params = "(a.v, b, 0)".to_string(); + as_params = "(transmute(a), o.as_mut_ptr(), 0)".to_string(); + } else { + panic!("unsupported assembly format: {}", asm_fmts[2]); + } + } else if para_num == 4 { + match (asm_fmts[2].as_str(), current_name.chars().last().unwrap()) { + ("si8", t) => { + fn_inputs = format!( + " {} a;\n{}\n union v{}qi o;\n{}\n {} b = &o;", + type_to_ct(in_t[0]), + type_to_va("a", in_t[0]), + target.bytes(), + type_to_va( + "o", + if target == TargetFeature::Lsx { + "AM16QI" + } else { + "AM32QI" + } + ), + type_to_ct(in_t[1]) + ); + let val = rand_u32(type_to_imm(t).try_into().unwrap()); + fn_params = format!("(a.v, b, 0, {val})"); + as_params = format!("::<0, {val}>(transmute(a), o.as_mut_ptr())"); + } + (_, _) => panic!( + "unsupported assembly format: {} for {}", + asm_fmts[2], current_name + ), + }; + } + let fn_docall = if out_t.to_lowercase() == "void" { + format!(" __{current_name}{fn_params};") + } else { + format!(" {} = __{current_name}{fn_params};", type_to_rx(out_t)) + }; + let fn_result = if out_t.to_lowercase() == "void" { + if target == TargetFeature::Lsx { + type_to_rp("V16QI") + } else { + type_to_rp("V32QI") + } + } else { + type_to_rp(out_t) + }; + let fn_assert = { + if out_t.to_lowercase() == "void" { + format!( + " printf(\"\\n {current_name}{as_params};\\n assert_eq!(r, transmute(o));\\n\"{as_args});" + ) + } else { + format!( + " printf(\"\\n assert_eq!(r, transmute({current_name}{as_params}));\\n\"{as_args});" + ) + } + }; + format!( + r#" +static void {current_name}(void) +{{ + printf("\n#[simd_test(enable = \"{}\")]\n"); + printf("unsafe fn test_{current_name}() {{\n"); +{fn_inputs} +{fn_output} +{fn_docall} +{fn_result} +{fn_assert} + printf("}}\n"); +}} +"#, + target.as_target_feature_arg(current_name) + ) + }; + let call_function = format!(" {current_name}();\n"); + (impl_function, call_function) +} + +pub fn main() -> io::Result<()> { + let args: Vec = env::args().collect(); + let in_file = args.get(1).cloned().expect("Input file missing!"); + let in_file_path = PathBuf::from(&in_file); + let in_file_name = in_file_path + .file_name() + .unwrap() + .to_os_string() + .into_string() + .unwrap(); + + let ext_name = if in_file_name.starts_with("lasx") { + "lasx" + } else { + "lsx" + }; + + if in_file_name.ends_with(".h") { + gen_spec(in_file, ext_name) + } else if args.get(2).is_some() { + gen_test(in_file, ext_name) + } else { + gen_bind(in_file, ext_name) + } +} diff --git a/library/stdarch/crates/stdarch-test/Cargo.toml b/library/stdarch/crates/stdarch-test/Cargo.toml new file mode 100644 index 000000000000..e4791e4ec525 --- /dev/null +++ b/library/stdarch/crates/stdarch-test/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "stdarch-test" +version = "0.1.0" +authors = ["Alex Crichton "] +edition = "2024" + +[dependencies] +assert-instr-macro = { path = "../assert-instr-macro" } +simd-test-macro = { path = "../simd-test-macro" } +lazy_static = "1.0" +rustc-demangle = "0.1.8" +cfg-if = "1.0" + +[target.'cfg(windows)'.dependencies] +cc = "1.0" + +# We use a crates.io dependency to disassemble wasm binaries to look for +# instructions for `#[assert_instr]`. Note that we use an `=` dependency here +# instead of a floating dependency because the text format for wasm changes over +# time, and we want to make updates to this explicit rather than automatically +# picking up updates which might break CI with new instruction names. +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasmprinter = "=0.2.67" + +[features] +default = [] diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs new file mode 100644 index 000000000000..f5167ea8d8ef --- /dev/null +++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs @@ -0,0 +1,208 @@ +//! Disassembly calling function for most targets. + +use crate::Function; +use std::{collections::HashSet, env, str}; + +// Extracts the "shim" name from the `symbol`. +fn normalize(mut symbol: &str) -> String { + // Remove trailing colon: + if symbol.ends_with(':') { + symbol = &symbol[..symbol.len() - 1]; + } + if symbol.ends_with('>') { + symbol = &symbol[..symbol.len() - 1]; + } + if let Some(idx) = symbol.find('<') { + symbol = &symbol[idx + 1..]; + } + + let mut symbol = rustc_demangle::demangle(symbol).to_string(); + symbol = match symbol.rfind("::h") { + Some(i) => symbol[..i].to_string(), + None => symbol.to_string(), + }; + + // Remove Rust paths + if let Some(last_colon) = symbol.rfind(':') { + symbol = symbol[last_colon + 1..].to_string(); + } + + // Normalize to no leading underscore to handle platforms that may + // inject extra ones in symbol names. + while symbol.starts_with('_') || symbol.starts_with('.') { + symbol.remove(0); + } + // Windows/x86 has a suffix such as @@4. + if let Some(idx) = symbol.find("@@") { + symbol = symbol[..idx].to_string(); + } + symbol +} + +#[cfg(target_env = "msvc")] +pub(crate) fn disassemble_myself() -> HashSet { + let me = env::current_exe().expect("failed to get current exe"); + + let target = if cfg!(target_arch = "x86_64") { + "x86_64-pc-windows-msvc" + } else if cfg!(target_arch = "x86") { + "i686-pc-windows-msvc" + } else if cfg!(target_arch = "aarch64") { + "aarch64-pc-windows-msvc" + } else { + panic!("disassembly unimplemented") + }; + let mut cmd = + cc::windows_registry::find(target, "dumpbin.exe").expect("failed to find `dumpbin` tool"); + let output = cmd + .arg("/DISASM:NOBYTES") + .arg(&me) + .output() + .expect("failed to execute dumpbin"); + println!( + "{}\n{}", + output.status, + String::from_utf8_lossy(&output.stderr) + ); + assert!(output.status.success()); + // Windows does not return valid UTF-8 output: + parse(&String::from_utf8_lossy(Vec::leak(output.stdout))) +} + +#[cfg(not(target_env = "msvc"))] +pub(crate) fn disassemble_myself() -> HashSet { + let me = env::current_exe().expect("failed to get current exe"); + + let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string()); + let add_args = if cfg!(target_vendor = "apple") && cfg!(target_arch = "aarch64") { + // Target features need to be enabled for LLVM objdump on Darwin ARM64 + vec!["--mattr=+v8.6a,+crypto,+tme"] + } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + vec!["--mattr=+zk,+zks,+zbc,+zbb"] + } else { + vec![] + }; + let output = std::process::Command::new(objdump.clone()) + .arg("--disassemble") + .arg("--no-show-raw-insn") + .args(add_args) + .arg(&me) + .output() + .unwrap_or_else(|_| panic!("failed to execute objdump. OBJDUMP={objdump}")); + println!( + "{}\n{}", + output.status, + String::from_utf8_lossy(&output.stderr) + ); + assert!(output.status.success()); + + let disassembly = String::from_utf8_lossy(Vec::leak(output.stdout)); + + parse(&disassembly) +} + +fn parse(output: &str) -> HashSet { + let mut lines = output.lines(); + + println!( + "First 100 lines of the disassembly input containing {} lines:", + lines.clone().count() + ); + for line in output.lines().take(100) { + println!("{line}"); + } + + let mut functions = HashSet::new(); + let mut cached_header = None; + while let Some(header) = cached_header.take().or_else(|| lines.next()) { + if !header.ends_with(':') || !header.contains("stdarch_test_shim") { + continue; + } + eprintln!("header: {header}"); + let symbol = normalize(header); + eprintln!("normalized symbol: {symbol}"); + let mut instructions = Vec::new(); + for instruction in lines.by_ref() { + if instruction.ends_with(':') { + cached_header = Some(instruction); + break; + } + if instruction.is_empty() { + cached_header = None; + break; + } + let mut parts = if cfg!(target_env = "msvc") { + // Each line looks like: + // + // > $addr: $instr.. + instruction + .split(&[' ', ',']) + .filter(|&x| !x.is_empty()) + .skip(1) + .map(str::to_lowercase) + .skip_while(|s| matches!(&**s, "lock" | "vex")) // skip x86-specific prefix + .collect::>() + } else { + // objdump with --no-show-raw-insn + // Each line of instructions should look like: + // + // $rel_offset: $instruction... + instruction + .split_whitespace() + .skip(1) + .skip_while(|s| matches!(*s, "lock" | "{evex}" | "{vex}")) // skip x86-specific prefix + .map(ToString::to_string) + .collect::>() + }; + + if cfg!(any(target_arch = "aarch64", target_arch = "arm64ec")) { + // Normalize [us]shll.* ..., #0 instructions to the preferred form: [us]xtl.* ... + // as neither LLVM objdump nor dumpbin does that. + // See https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/UXTL--UXTL2--Unsigned-extend-Long--an-alias-of-USHLL--USHLL2- + // and https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/SXTL--SXTL2--Signed-extend-Long--an-alias-of-SSHLL--SSHLL2- + // for details. + fn is_shll(instr: &str) -> bool { + if cfg!(target_env = "msvc") { + instr.starts_with("ushll") || instr.starts_with("sshll") + } else { + instr.starts_with("ushll.") || instr.starts_with("sshll.") + } + } + match (parts.first(), parts.last()) { + (Some(instr), Some(last_arg)) if is_shll(instr) && last_arg == "#0" => { + assert_eq!(parts.len(), 4); + let mut new_parts = Vec::with_capacity(3); + let new_instr = format!("{}{}{}", &instr[..1], "xtl", &instr[5..]); + new_parts.push(new_instr); + new_parts.push(parts[1].clone()); + new_parts.push(parts[2][0..parts[2].len() - 1].to_owned()); // strip trailing comma + parts = new_parts; + } + // dumpbin uses "ins" instead of "mov" + (Some(instr), _) if cfg!(target_env = "msvc") && instr == "ins" => { + parts[0] = "mov".to_string() + } + _ => {} + }; + } + + instructions.push(parts.join(" ")); + if matches!(&**instructions.last().unwrap(), "ret" | "retq") { + cached_header = None; + break; + } + } + let function = Function { + name: symbol, + instrs: instructions, + }; + assert!(functions.insert(function)); + } + + eprintln!("all found functions dump:"); + for k in &functions { + eprintln!(" f: {}", k.name); + } + + functions +} diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs new file mode 100644 index 000000000000..f6614f6d51c9 --- /dev/null +++ b/library/stdarch/crates/stdarch-test/src/lib.rs @@ -0,0 +1,218 @@ +//! Runtime support needed for testing the stdarch crate. +//! +//! This basically just disassembles the current executable and then parses the +//! output once globally and then provides the `assert` function which makes +//! assertions about the disassembly of a function. +#![deny(rust_2018_idioms)] +#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] + +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate cfg_if; + +pub use assert_instr_macro::*; +pub use simd_test_macro::*; +use std::{cmp, collections::HashSet, env, hash, hint::black_box, str}; + +cfg_if! { + if #[cfg(target_arch = "wasm32")] { + pub mod wasm; + use wasm::disassemble_myself; + } else { + mod disassembly; + use crate::disassembly::disassemble_myself; + } +} + +lazy_static! { + static ref DISASSEMBLY: HashSet = disassemble_myself(); +} + +#[derive(Debug)] +struct Function { + name: String, + instrs: Vec, +} +impl Function { + fn new(n: &str) -> Self { + Self { + name: n.to_string(), + instrs: Vec::new(), + } + } +} + +impl cmp::PartialEq for Function { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} +impl cmp::Eq for Function {} + +impl hash::Hash for Function { + fn hash(&self, state: &mut H) { + self.name.hash(state) + } +} + +/// Main entry point for this crate, called by the `#[assert_instr]` macro. +/// +/// This asserts that the function at `fnptr` contains the instruction +/// `expected` provided. +pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { + // Make sure that the shim is not removed + black_box(shim_addr); + + //eprintln!("shim name: {fnname}"); + let function = &DISASSEMBLY + .get(&Function::new(fnname)) + .unwrap_or_else(|| panic!("function \"{fnname}\" not found in the disassembly")); + //eprintln!(" function: {:?}", function); + + let mut instrs = &function.instrs[..]; + while instrs.last().is_some_and(|s| s == "nop" || s == "int3") { + instrs = &instrs[..instrs.len() - 1]; + } + + // Look for `expected` as the first part of any instruction in this + // function, e.g., tzcntl in tzcntl %rax,%rax. + // + // There are two cases when the expected instruction is nop: + // 1. The expected intrinsic is compiled away so we can't + // check for it - aka the intrinsic is not generating any code. + // 2. It is a mark, indicating that the instruction will be + // compiled into other instructions - mainly because of llvm + // optimization. + let expected = if expected == "unknown" { + "" // Workaround for rust-lang/stdarch#1674, todo: remove when the issue is fixed + } else { + expected + }; + let found = expected == "nop" || instrs.iter().any(|s| s.starts_with(expected)); + + // Look for subroutine call instructions in the disassembly to detect whether + // inlining failed: all intrinsics are `#[inline(always)]`, so calling one + // intrinsic from another should not generate subroutine call instructions. + let inlining_failed = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "wasm32") { + instrs.iter().any(|s| s.starts_with("call ")) + } else if cfg!(target_arch = "x86") { + instrs.windows(2).any(|s| { + // On 32-bit x86 position independent code will call itself and be + // immediately followed by a `pop` to learn about the current address. + // Let's not take that into account when considering whether a function + // failed inlining something. + s[0].starts_with("call ") && s[1].starts_with("pop") // FIXME: original logic but does not match comment + }) + } else if cfg!(any( + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + instrs.iter().any(|s| s.starts_with("bl ")) + } else { + // FIXME: Add detection for other archs + false + }; + + let instruction_limit = std::env::var("STDARCH_ASSERT_INSTR_LIMIT") + .ok() + .map_or_else( + || match expected { + // `cpuid` returns a pretty big aggregate structure, so exempt + // it from the slightly more restrictive 22 instructions below. + "cpuid" => 30, + + // These require 8 loads and stores, so it _just_ overflows the limit + "aesencwide128kl" | "aesencwide256kl" | "aesdecwide128kl" | "aesdecwide256kl" => 24, + + // Apparently, on Windows, LLVM generates a bunch of + // saves/restores of xmm registers around these instructions, + // which exceeds the limit of 20 below. As it seems dictated by + // Windows's ABI (I believe?), we probably can't do much + // about it. + "vzeroall" | "vzeroupper" if cfg!(windows) => 30, + + // Intrinsics using `cvtpi2ps` are typically "composites" and + // in some cases exceed the limit. + "cvtpi2ps" => 25, + // core_arch/src/arm_shared/simd32 + // vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit) + "usad8" | "vfma" | "vfms" => 27, + "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, + // core_arch/src/arm_shared/simd32 + // vst1q_s64_x4_vst1 : #instructions = 27 >= 22 (limit) + "vld3" => 28, + // core_arch/src/arm_shared/simd32 + // vld4q_lane_u32_vld4 : #instructions = 36 >= 22 (limit) + "vld4" => 37, + // core_arch/src/arm_shared/simd32 + // vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit) + "vst1" => 41, + // core_arch/src/arm_shared/simd32 + // vst3q_u32_vst3 : #instructions = 25 >= 22 (limit) + "vst3" => 26, + // core_arch/src/arm_shared/simd32 + // vst4q_u32_vst4 : #instructions = 33 >= 22 (limit) + "vst4" => 34, + + // core_arch/src/arm_shared/simd32 + // vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit) + "nop" if fnname.contains("vst1q_p64") => 34, + + // Original limit was 20 instructions, but ARM DSP Intrinsics + // are exactly 20 instructions long. So, bump the limit to 22 + // instead of adding here a long list of exceptions. + _ => { + // aarch64_be may add reverse instructions which increases + // the number of instructions generated. + if cfg!(all(target_endian = "big", target_arch = "aarch64")) { + 32 + } else { + 22 + } + } + }, + |v| v.parse().unwrap(), + ); + let probably_only_one_instruction = instrs.len() < instruction_limit; + + if found && probably_only_one_instruction && !inlining_failed { + return; + } + + // Help debug by printing out the found disassembly, and then panic as we + // didn't find the instruction. + println!("disassembly for {fnname}: ",); + for (i, instr) in instrs.iter().enumerate() { + println!("\t{i:2}: {instr}"); + } + + if !found { + panic!("failed to find instruction `{expected}` in the disassembly"); + } else if !probably_only_one_instruction { + panic!( + "instruction found, but the disassembly contains too many \ + instructions: #instructions = {} >= {} (limit)", + instrs.len(), + instruction_limit + ); + } else if inlining_failed { + panic!( + "instruction found, but the disassembly contains subroutine \ + call instructions, which hint that inlining failed" + ); + } +} + +pub fn assert_skip_test_ok(name: &str, missing_features: &[&str]) { + println!("Skipping test `{name}` due to missing target features:"); + for feature in missing_features { + println!(" - {feature}"); + } + match env::var("STDARCH_TEST_EVERYTHING") { + Ok(_) => panic!("skipped test `{name}` when it shouldn't be skipped"), + Err(_) => println!("Set STDARCH_TEST_EVERYTHING to make this an error."), + } +} diff --git a/library/stdarch/crates/stdarch-test/src/wasm.rs b/library/stdarch/crates/stdarch-test/src/wasm.rs new file mode 100644 index 000000000000..bf411c12148e --- /dev/null +++ b/library/stdarch/crates/stdarch-test/src/wasm.rs @@ -0,0 +1,55 @@ +//! Disassembly calling function for `wasm32` targets. + +use crate::Function; +use std::collections::HashSet; + +pub(crate) fn disassemble_myself() -> HashSet { + // Use `std::env::args` to find the path to our executable. Assume the + // environment is configured such that we can read that file. Read it and + // use the `wasmprinter` crate to transform the binary to text, then search + // the text for appropriately named functions. + let me = std::env::args() + .next() + .expect("failed to find current wasm file"); + let output = wasmprinter::print_file(&me).unwrap(); + + let mut ret: HashSet = HashSet::new(); + let mut lines = output.lines().map(|s| s.trim()); + while let Some(line) = lines.next() { + // If this isn't a function, we don't care about it. + if !line.starts_with("(func ") { + continue; + } + + let mut function = Function { + name: String::new(), + instrs: Vec::new(), + }; + + // Empty functions will end in `))` so there's nothing to do, otherwise + // we'll have a bunch of following lines which are instructions. + // + // Lines that have an imbalanced `)` mark the end of a function. + if !line.ends_with("))") { + while let Some(line) = lines.next() { + function.instrs.push(line.to_string()); + if !line.starts_with("(") && line.ends_with(")") { + break; + } + } + } + // The second element here split on whitespace should be the name of + // the function, skipping the type/params/results + function.name = line.split_whitespace().nth(1).unwrap().to_string(); + if function.name.starts_with("$") { + function.name = function.name[1..].to_string() + } + + if !function.name.contains("stdarch_test_shim") { + continue; + } + + assert!(ret.insert(function)); + } + return ret; +} diff --git a/library/stdarch/crates/stdarch-verify/.gitattributes b/library/stdarch/crates/stdarch-verify/.gitattributes new file mode 100644 index 000000000000..621fdea6f7d6 --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/.gitattributes @@ -0,0 +1 @@ +*.xml binary diff --git a/library/stdarch/crates/stdarch-verify/Cargo.toml b/library/stdarch/crates/stdarch-verify/Cargo.toml new file mode 100644 index 000000000000..c82a1262d04f --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "stdarch-verify" +version = "0.1.0" +authors = ["Alex Crichton "] +edition = "2024" + +[dependencies] +proc-macro2 = "1.0" +quote = "1.0" +syn = { version = "2.0", features = ["full"] } + +[lib] +proc-macro = true +test = false + +[dev-dependencies] +serde = { version = "1.0", features = ['derive'] } +serde_json = "1.0.96" +quick-xml = { version = "0.33.0", features = ["serialize", "overlapped-lists"] } diff --git a/library/stdarch/crates/stdarch-verify/build.rs b/library/stdarch/crates/stdarch-verify/build.rs new file mode 100644 index 000000000000..c0dc81b6a613 --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/build.rs @@ -0,0 +1,28 @@ +use std::path::Path; + +fn main() { + let dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let root = dir.parent().unwrap(); + eprintln!("root: {}", root.display()); + walk(&root.join("core_arch/src/x86")); + walk(&root.join("core_arch/src/x86_64")); + walk(&root.join("core_arch/src/arm")); + walk(&root.join("core_arch/src/aarch64")); +} + +fn walk(root: &Path) { + for file in root.read_dir().unwrap() { + eprintln!("root: {}", root.display()); + let file = file.unwrap(); + if file.file_type().unwrap().is_dir() { + walk(&file.path()); + continue; + } + let path = file.path(); + if path.extension().and_then(|s| s.to_str()) != Some("rs") { + continue; + } + + println!("cargo:rerun-if-changed={}", path.display()); + } +} diff --git a/library/stdarch/crates/stdarch-verify/mips-msa.h b/library/stdarch/crates/stdarch-verify/mips-msa.h new file mode 100644 index 000000000000..881f1918f6bd --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/mips-msa.h @@ -0,0 +1,707 @@ +v16i8 __builtin_msa_add_a_b (v16i8, v16i8); +v8i16 __builtin_msa_add_a_h (v8i16, v8i16); +v4i32 __builtin_msa_add_a_w (v4i32, v4i32); +v2i64 __builtin_msa_add_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_adds_a_b (v16i8, v16i8); +v8i16 __builtin_msa_adds_a_h (v8i16, v8i16); +v4i32 __builtin_msa_adds_a_w (v4i32, v4i32); +v2i64 __builtin_msa_adds_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_adds_s_b (v16i8, v16i8); +v8i16 __builtin_msa_adds_s_h (v8i16, v8i16); +v4i32 __builtin_msa_adds_s_w (v4i32, v4i32); +v2i64 __builtin_msa_adds_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_adds_u_b (v16u8, v16u8); +v8u16 __builtin_msa_adds_u_h (v8u16, v8u16); +v4u32 __builtin_msa_adds_u_w (v4u32, v4u32); +v2u64 __builtin_msa_adds_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_addv_b (v16i8, v16i8); +v8i16 __builtin_msa_addv_h (v8i16, v8i16); +v4i32 __builtin_msa_addv_w (v4i32, v4i32); +v2i64 __builtin_msa_addv_d (v2i64, v2i64); + +v16i8 __builtin_msa_addvi_b (v16i8, imm0_31); +v8i16 __builtin_msa_addvi_h (v8i16, imm0_31); +v4i32 __builtin_msa_addvi_w (v4i32, imm0_31); +v2i64 __builtin_msa_addvi_d (v2i64, imm0_31); + +v16u8 __builtin_msa_and_v (v16u8, v16u8); + +v16u8 __builtin_msa_andi_b (v16u8, imm0_255); + +v16i8 __builtin_msa_asub_s_b (v16i8, v16i8); +v8i16 __builtin_msa_asub_s_h (v8i16, v8i16); +v4i32 __builtin_msa_asub_s_w (v4i32, v4i32); +v2i64 __builtin_msa_asub_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_asub_u_b (v16u8, v16u8); +v8u16 __builtin_msa_asub_u_h (v8u16, v8u16); +v4u32 __builtin_msa_asub_u_w (v4u32, v4u32); +v2u64 __builtin_msa_asub_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_ave_s_b (v16i8, v16i8); +v8i16 __builtin_msa_ave_s_h (v8i16, v8i16); +v4i32 __builtin_msa_ave_s_w (v4i32, v4i32); +v2i64 __builtin_msa_ave_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_ave_u_b (v16u8, v16u8); +v8u16 __builtin_msa_ave_u_h (v8u16, v8u16); +v4u32 __builtin_msa_ave_u_w (v4u32, v4u32); +v2u64 __builtin_msa_ave_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_aver_s_b (v16i8, v16i8); +v8i16 __builtin_msa_aver_s_h (v8i16, v8i16); +v4i32 __builtin_msa_aver_s_w (v4i32, v4i32); +v2i64 __builtin_msa_aver_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_aver_u_b (v16u8, v16u8); +v8u16 __builtin_msa_aver_u_h (v8u16, v8u16); +v4u32 __builtin_msa_aver_u_w (v4u32, v4u32); +v2u64 __builtin_msa_aver_u_d (v2u64, v2u64); + +v16u8 __builtin_msa_bclr_b (v16u8, v16u8); +v8u16 __builtin_msa_bclr_h (v8u16, v8u16); +v4u32 __builtin_msa_bclr_w (v4u32, v4u32); +v2u64 __builtin_msa_bclr_d (v2u64, v2u64); + +v16u8 __builtin_msa_bclri_b (v16u8, imm0_7); +v8u16 __builtin_msa_bclri_h (v8u16, imm0_15); +v4u32 __builtin_msa_bclri_w (v4u32, imm0_31); +v2u64 __builtin_msa_bclri_d (v2u64, imm0_63); + +v16u8 __builtin_msa_binsl_b (v16u8, v16u8, v16u8); +v8u16 __builtin_msa_binsl_h (v8u16, v8u16, v8u16); +v4u32 __builtin_msa_binsl_w (v4u32, v4u32, v4u32); +v2u64 __builtin_msa_binsl_d (v2u64, v2u64, v2u64); + +v16u8 __builtin_msa_binsli_b (v16u8, v16u8, imm0_7); +v8u16 __builtin_msa_binsli_h (v8u16, v8u16, imm0_15); +v4u32 __builtin_msa_binsli_w (v4u32, v4u32, imm0_31); +v2u64 __builtin_msa_binsli_d (v2u64, v2u64, imm0_63); + +v16u8 __builtin_msa_binsr_b (v16u8, v16u8, v16u8); +v8u16 __builtin_msa_binsr_h (v8u16, v8u16, v8u16); +v4u32 __builtin_msa_binsr_w (v4u32, v4u32, v4u32); +v2u64 __builtin_msa_binsr_d (v2u64, v2u64, v2u64); + +v16u8 __builtin_msa_binsri_b (v16u8, v16u8, imm0_7); +v8u16 __builtin_msa_binsri_h (v8u16, v8u16, imm0_15); +v4u32 __builtin_msa_binsri_w (v4u32, v4u32, imm0_31); +v2u64 __builtin_msa_binsri_d (v2u64, v2u64, imm0_63); + +v16u8 __builtin_msa_bmnz_v (v16u8, v16u8, v16u8); + +v16u8 __builtin_msa_bmnzi_b (v16u8, v16u8, imm0_255); + +v16u8 __builtin_msa_bmz_v (v16u8, v16u8, v16u8); + +v16u8 __builtin_msa_bmzi_b (v16u8, v16u8, imm0_255); + +v16u8 __builtin_msa_bneg_b (v16u8, v16u8); +v8u16 __builtin_msa_bneg_h (v8u16, v8u16); +v4u32 __builtin_msa_bneg_w (v4u32, v4u32); +v2u64 __builtin_msa_bneg_d (v2u64, v2u64); + +v16u8 __builtin_msa_bnegi_b (v16u8, imm0_7); +v8u16 __builtin_msa_bnegi_h (v8u16, imm0_15); +v4u32 __builtin_msa_bnegi_w (v4u32, imm0_31); +v2u64 __builtin_msa_bnegi_d (v2u64, imm0_63); + +i32 __builtin_msa_bnz_b (v16u8); +i32 __builtin_msa_bnz_h (v8u16); +i32 __builtin_msa_bnz_w (v4u32); +i32 __builtin_msa_bnz_d (v2u64); + +i32 __builtin_msa_bnz_v (v16u8); + +v16u8 __builtin_msa_bsel_v (v16u8, v16u8, v16u8); + +v16u8 __builtin_msa_bseli_b (v16u8, v16u8, imm0_255); + +v16u8 __builtin_msa_bset_b (v16u8, v16u8); +v8u16 __builtin_msa_bset_h (v8u16, v8u16); +v4u32 __builtin_msa_bset_w (v4u32, v4u32); +v2u64 __builtin_msa_bset_d (v2u64, v2u64); + +v16u8 __builtin_msa_bseti_b (v16u8, imm0_7); +v8u16 __builtin_msa_bseti_h (v8u16, imm0_15); +v4u32 __builtin_msa_bseti_w (v4u32, imm0_31); +v2u64 __builtin_msa_bseti_d (v2u64, imm0_63); + +i32 __builtin_msa_bz_b (v16u8); +i32 __builtin_msa_bz_h (v8u16); +i32 __builtin_msa_bz_w (v4u32); +i32 __builtin_msa_bz_d (v2u64); + +i32 __builtin_msa_bz_v (v16u8); + +v16i8 __builtin_msa_ceq_b (v16i8, v16i8); +v8i16 __builtin_msa_ceq_h (v8i16, v8i16); +v4i32 __builtin_msa_ceq_w (v4i32, v4i32); +v2i64 __builtin_msa_ceq_d (v2i64, v2i64); + +v16i8 __builtin_msa_ceqi_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_ceqi_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_ceqi_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_ceqi_d (v2i64, imm_n16_15); + +i32 __builtin_msa_cfcmsa (imm0_31); + +v16i8 __builtin_msa_cle_s_b (v16i8, v16i8); +v8i16 __builtin_msa_cle_s_h (v8i16, v8i16); +v4i32 __builtin_msa_cle_s_w (v4i32, v4i32); +v2i64 __builtin_msa_cle_s_d (v2i64, v2i64); + +v16i8 __builtin_msa_cle_u_b (v16u8, v16u8); +v8i16 __builtin_msa_cle_u_h (v8u16, v8u16); +v4i32 __builtin_msa_cle_u_w (v4u32, v4u32); +v2i64 __builtin_msa_cle_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_clei_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_clei_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_clei_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_clei_s_d (v2i64, imm_n16_15); + +v16i8 __builtin_msa_clei_u_b (v16u8, imm0_31); +v8i16 __builtin_msa_clei_u_h (v8u16, imm0_31); +v4i32 __builtin_msa_clei_u_w (v4u32, imm0_31); +v2i64 __builtin_msa_clei_u_d (v2u64, imm0_31); + +v16i8 __builtin_msa_clt_s_b (v16i8, v16i8); +v8i16 __builtin_msa_clt_s_h (v8i16, v8i16); +v4i32 __builtin_msa_clt_s_w (v4i32, v4i32); +v2i64 __builtin_msa_clt_s_d (v2i64, v2i64); + +v16i8 __builtin_msa_clt_u_b (v16u8, v16u8); +v8i16 __builtin_msa_clt_u_h (v8u16, v8u16); +v4i32 __builtin_msa_clt_u_w (v4u32, v4u32); +v2i64 __builtin_msa_clt_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_clti_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_clti_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_clti_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_clti_s_d (v2i64, imm_n16_15); + +v16i8 __builtin_msa_clti_u_b (v16u8, imm0_31); +v8i16 __builtin_msa_clti_u_h (v8u16, imm0_31); +v4i32 __builtin_msa_clti_u_w (v4u32, imm0_31); +v2i64 __builtin_msa_clti_u_d (v2u64, imm0_31); + +i32 __builtin_msa_copy_s_b (v16i8, imm0_15); +i32 __builtin_msa_copy_s_h (v8i16, imm0_7); +i32 __builtin_msa_copy_s_w (v4i32, imm0_3); +i64 __builtin_msa_copy_s_d (v2i64, imm0_1); + +u32 __builtin_msa_copy_u_b (v16i8, imm0_15); +u32 __builtin_msa_copy_u_h (v8i16, imm0_7); +u32 __builtin_msa_copy_u_w (v4i32, imm0_3); +u64 __builtin_msa_copy_u_d (v2i64, imm0_1); + +void __builtin_msa_ctcmsa (imm0_31, i32); + +v16i8 __builtin_msa_div_s_b (v16i8, v16i8); +v8i16 __builtin_msa_div_s_h (v8i16, v8i16); +v4i32 __builtin_msa_div_s_w (v4i32, v4i32); +v2i64 __builtin_msa_div_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_div_u_b (v16u8, v16u8); +v8u16 __builtin_msa_div_u_h (v8u16, v8u16); +v4u32 __builtin_msa_div_u_w (v4u32, v4u32); +v2u64 __builtin_msa_div_u_d (v2u64, v2u64); + +v8i16 __builtin_msa_dotp_s_h (v16i8, v16i8); +v4i32 __builtin_msa_dotp_s_w (v8i16, v8i16); +v2i64 __builtin_msa_dotp_s_d (v4i32, v4i32); + +v8u16 __builtin_msa_dotp_u_h (v16u8, v16u8); +v4u32 __builtin_msa_dotp_u_w (v8u16, v8u16); +v2u64 __builtin_msa_dotp_u_d (v4u32, v4u32); + +v8i16 __builtin_msa_dpadd_s_h (v8i16, v16i8, v16i8); +v4i32 __builtin_msa_dpadd_s_w (v4i32, v8i16, v8i16); +v2i64 __builtin_msa_dpadd_s_d (v2i64, v4i32, v4i32); + +v8u16 __builtin_msa_dpadd_u_h (v8u16, v16u8, v16u8); +v4u32 __builtin_msa_dpadd_u_w (v4u32, v8u16, v8u16); +v2u64 __builtin_msa_dpadd_u_d (v2u64, v4u32, v4u32); + +v8i16 __builtin_msa_dpsub_s_h (v8i16, v16i8, v16i8); +v4i32 __builtin_msa_dpsub_s_w (v4i32, v8i16, v8i16); +v2i64 __builtin_msa_dpsub_s_d (v2i64, v4i32, v4i32); + +v8i16 __builtin_msa_dpsub_u_h (v8i16, v16u8, v16u8); +v4i32 __builtin_msa_dpsub_u_w (v4i32, v8u16, v8u16); +v2i64 __builtin_msa_dpsub_u_d (v2i64, v4u32, v4u32); + +v4f32 __builtin_msa_fadd_w (v4f32, v4f32); +v2f64 __builtin_msa_fadd_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcaf_w (v4f32, v4f32); +v2i64 __builtin_msa_fcaf_d (v2f64, v2f64); + +v4i32 __builtin_msa_fceq_w (v4f32, v4f32); +v2i64 __builtin_msa_fceq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fclass_w (v4f32); +v2i64 __builtin_msa_fclass_d (v2f64); + +v4i32 __builtin_msa_fcle_w (v4f32, v4f32); +v2i64 __builtin_msa_fcle_d (v2f64, v2f64); + +v4i32 __builtin_msa_fclt_w (v4f32, v4f32); +v2i64 __builtin_msa_fclt_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcne_w (v4f32, v4f32); +v2i64 __builtin_msa_fcne_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcor_w (v4f32, v4f32); +v2i64 __builtin_msa_fcor_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcueq_w (v4f32, v4f32); +v2i64 __builtin_msa_fcueq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcule_w (v4f32, v4f32); +v2i64 __builtin_msa_fcule_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcult_w (v4f32, v4f32); +v2i64 __builtin_msa_fcult_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcun_w (v4f32, v4f32); +v2i64 __builtin_msa_fcun_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcune_w (v4f32, v4f32); +v2i64 __builtin_msa_fcune_d (v2f64, v2f64); + +v4f32 __builtin_msa_fdiv_w (v4f32, v4f32); +v2f64 __builtin_msa_fdiv_d (v2f64, v2f64); + +v8i16 __builtin_msa_fexdo_h (v4f32, v4f32); +v4f32 __builtin_msa_fexdo_w (v2f64, v2f64); + +v4f32 __builtin_msa_fexp2_w (v4f32, v4i32); +v2f64 __builtin_msa_fexp2_d (v2f64, v2i64); + +v4f32 __builtin_msa_fexupl_w (v8i16); +v2f64 __builtin_msa_fexupl_d (v4f32); + +v4f32 __builtin_msa_fexupr_w (v8i16); +v2f64 __builtin_msa_fexupr_d (v4f32); + +v4f32 __builtin_msa_ffint_s_w (v4i32); +v2f64 __builtin_msa_ffint_s_d (v2i64); + +v4f32 __builtin_msa_ffint_u_w (v4u32); +v2f64 __builtin_msa_ffint_u_d (v2u64); + +v4f32 __builtin_msa_ffql_w (v8i16); +v2f64 __builtin_msa_ffql_d (v4i32); + +v4f32 __builtin_msa_ffqr_w (v8i16); +v2f64 __builtin_msa_ffqr_d (v4i32); + +v16i8 __builtin_msa_fill_b (i32); +v8i16 __builtin_msa_fill_h (i32); +v4i32 __builtin_msa_fill_w (i32); +v2i64 __builtin_msa_fill_d (i64); + +v4f32 __builtin_msa_flog2_w (v4f32); +v2f64 __builtin_msa_flog2_d (v2f64); + +v4f32 __builtin_msa_fmadd_w (v4f32, v4f32, v4f32); +v2f64 __builtin_msa_fmadd_d (v2f64, v2f64, v2f64); + +v4f32 __builtin_msa_fmax_w (v4f32, v4f32); +v2f64 __builtin_msa_fmax_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmax_a_w (v4f32, v4f32); +v2f64 __builtin_msa_fmax_a_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmin_w (v4f32, v4f32); +v2f64 __builtin_msa_fmin_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmin_a_w (v4f32, v4f32); +v2f64 __builtin_msa_fmin_a_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmsub_w (v4f32, v4f32, v4f32); +v2f64 __builtin_msa_fmsub_d (v2f64, v2f64, v2f64); + +v4f32 __builtin_msa_fmul_w (v4f32, v4f32); +v2f64 __builtin_msa_fmul_d (v2f64, v2f64); + +v4f32 __builtin_msa_frint_w (v4f32); +v2f64 __builtin_msa_frint_d (v2f64); + +v4f32 __builtin_msa_frcp_w (v4f32); +v2f64 __builtin_msa_frcp_d (v2f64); + +v4f32 __builtin_msa_frsqrt_w (v4f32); +v2f64 __builtin_msa_frsqrt_d (v2f64); + +v4i32 __builtin_msa_fsaf_w (v4f32, v4f32); +v2i64 __builtin_msa_fsaf_d (v2f64, v2f64); + +v4i32 __builtin_msa_fseq_w (v4f32, v4f32); +v2i64 __builtin_msa_fseq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsle_w (v4f32, v4f32); +v2i64 __builtin_msa_fsle_d (v2f64, v2f64); + +v4i32 __builtin_msa_fslt_w (v4f32, v4f32); +v2i64 __builtin_msa_fslt_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsne_w (v4f32, v4f32); +v2i64 __builtin_msa_fsne_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsor_w (v4f32, v4f32); +v2i64 __builtin_msa_fsor_d (v2f64, v2f64); + +v4f32 __builtin_msa_fsqrt_w (v4f32); +v2f64 __builtin_msa_fsqrt_d (v2f64); + +v4f32 __builtin_msa_fsub_w (v4f32, v4f32); +v2f64 __builtin_msa_fsub_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsueq_w (v4f32, v4f32); +v2i64 __builtin_msa_fsueq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsule_w (v4f32, v4f32); +v2i64 __builtin_msa_fsule_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsult_w (v4f32, v4f32); +v2i64 __builtin_msa_fsult_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsun_w (v4f32, v4f32); +v2i64 __builtin_msa_fsun_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsune_w (v4f32, v4f32); +v2i64 __builtin_msa_fsune_d (v2f64, v2f64); + +v4i32 __builtin_msa_ftint_s_w (v4f32); +v2i64 __builtin_msa_ftint_s_d (v2f64); + +v4u32 __builtin_msa_ftint_u_w (v4f32); +v2u64 __builtin_msa_ftint_u_d (v2f64); + +v8i16 __builtin_msa_ftq_h (v4f32, v4f32); +v4i32 __builtin_msa_ftq_w (v2f64, v2f64); + +v4i32 __builtin_msa_ftrunc_s_w (v4f32); +v2i64 __builtin_msa_ftrunc_s_d (v2f64); + +v4u32 __builtin_msa_ftrunc_u_w (v4f32); +v2u64 __builtin_msa_ftrunc_u_d (v2f64); + +v8i16 __builtin_msa_hadd_s_h (v16i8, v16i8); +v4i32 __builtin_msa_hadd_s_w (v8i16, v8i16); +v2i64 __builtin_msa_hadd_s_d (v4i32, v4i32); + +v8u16 __builtin_msa_hadd_u_h (v16u8, v16u8); +v4u32 __builtin_msa_hadd_u_w (v8u16, v8u16); +v2u64 __builtin_msa_hadd_u_d (v4u32, v4u32); + +v8i16 __builtin_msa_hsub_s_h (v16i8, v16i8); +v4i32 __builtin_msa_hsub_s_w (v8i16, v8i16); +v2i64 __builtin_msa_hsub_s_d (v4i32, v4i32); + +v8i16 __builtin_msa_hsub_u_h (v16u8, v16u8); +v4i32 __builtin_msa_hsub_u_w (v8u16, v8u16); +v2i64 __builtin_msa_hsub_u_d (v4u32, v4u32); + +v16i8 __builtin_msa_ilvev_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvev_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvev_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvev_d (v2i64, v2i64); + +v16i8 __builtin_msa_ilvl_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvl_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvl_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvl_d (v2i64, v2i64); + +v16i8 __builtin_msa_ilvod_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvod_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvod_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvod_d (v2i64, v2i64); + +v16i8 __builtin_msa_ilvr_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvr_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvr_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvr_d (v2i64, v2i64); + +v16i8 __builtin_msa_insert_b (v16i8, imm0_15, i32); +v8i16 __builtin_msa_insert_h (v8i16, imm0_7, i32); +v4i32 __builtin_msa_insert_w (v4i32, imm0_3, i32); +v2i64 __builtin_msa_insert_d (v2i64, imm0_1, i64); + +v16i8 __builtin_msa_insve_b (v16i8, imm0_15, v16i8); +v8i16 __builtin_msa_insve_h (v8i16, imm0_7, v8i16); +v4i32 __builtin_msa_insve_w (v4i32, imm0_3, v4i32); +v2i64 __builtin_msa_insve_d (v2i64, imm0_1, v2i64); + +v16i8 __builtin_msa_ld_b (void *, imm_n512_511); +v8i16 __builtin_msa_ld_h (void *, imm_n1024_1022); +v4i32 __builtin_msa_ld_w (void *, imm_n2048_2044); +v2i64 __builtin_msa_ld_d (void *, imm_n4096_4088); + +v16i8 __builtin_msa_ldi_b (imm_n512_511); +v8i16 __builtin_msa_ldi_h (imm_n512_511); +v4i32 __builtin_msa_ldi_w (imm_n512_511); +v2i64 __builtin_msa_ldi_d (imm_n512_511); + +v8i16 __builtin_msa_madd_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_madd_q_w (v4i32, v4i32, v4i32); + +v8i16 __builtin_msa_maddr_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_maddr_q_w (v4i32, v4i32, v4i32); + +v16i8 __builtin_msa_maddv_b (v16i8, v16i8, v16i8); +v8i16 __builtin_msa_maddv_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_maddv_w (v4i32, v4i32, v4i32); +v2i64 __builtin_msa_maddv_d (v2i64, v2i64, v2i64); + +v16i8 __builtin_msa_max_a_b (v16i8, v16i8); +v8i16 __builtin_msa_max_a_h (v8i16, v8i16); +v4i32 __builtin_msa_max_a_w (v4i32, v4i32); +v2i64 __builtin_msa_max_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_max_s_b (v16i8, v16i8); +v8i16 __builtin_msa_max_s_h (v8i16, v8i16); +v4i32 __builtin_msa_max_s_w (v4i32, v4i32); +v2i64 __builtin_msa_max_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_max_u_b (v16u8, v16u8); +v8u16 __builtin_msa_max_u_h (v8u16, v8u16); +v4u32 __builtin_msa_max_u_w (v4u32, v4u32); +v2u64 __builtin_msa_max_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_maxi_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_maxi_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_maxi_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_maxi_s_d (v2i64, imm_n16_15); + +v16u8 __builtin_msa_maxi_u_b (v16u8, imm0_31); +v8u16 __builtin_msa_maxi_u_h (v8u16, imm0_31); +v4u32 __builtin_msa_maxi_u_w (v4u32, imm0_31); +v2u64 __builtin_msa_maxi_u_d (v2u64, imm0_31); + +v16i8 __builtin_msa_min_a_b (v16i8, v16i8); +v8i16 __builtin_msa_min_a_h (v8i16, v8i16); +v4i32 __builtin_msa_min_a_w (v4i32, v4i32); +v2i64 __builtin_msa_min_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_min_s_b (v16i8, v16i8); +v8i16 __builtin_msa_min_s_h (v8i16, v8i16); +v4i32 __builtin_msa_min_s_w (v4i32, v4i32); +v2i64 __builtin_msa_min_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_min_u_b (v16u8, v16u8); +v8u16 __builtin_msa_min_u_h (v8u16, v8u16); +v4u32 __builtin_msa_min_u_w (v4u32, v4u32); +v2u64 __builtin_msa_min_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_mini_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_mini_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_mini_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_mini_s_d (v2i64, imm_n16_15); + +v16u8 __builtin_msa_mini_u_b (v16u8, imm0_31); +v8u16 __builtin_msa_mini_u_h (v8u16, imm0_31); +v4u32 __builtin_msa_mini_u_w (v4u32, imm0_31); +v2u64 __builtin_msa_mini_u_d (v2u64, imm0_31); + +v16i8 __builtin_msa_mod_s_b (v16i8, v16i8); +v8i16 __builtin_msa_mod_s_h (v8i16, v8i16); +v4i32 __builtin_msa_mod_s_w (v4i32, v4i32); +v2i64 __builtin_msa_mod_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_mod_u_b (v16u8, v16u8); +v8u16 __builtin_msa_mod_u_h (v8u16, v8u16); +v4u32 __builtin_msa_mod_u_w (v4u32, v4u32); +v2u64 __builtin_msa_mod_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_move_v (v16i8); + +v8i16 __builtin_msa_msub_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_msub_q_w (v4i32, v4i32, v4i32); + +v8i16 __builtin_msa_msubr_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_msubr_q_w (v4i32, v4i32, v4i32); + +v16i8 __builtin_msa_msubv_b (v16i8, v16i8, v16i8); +v8i16 __builtin_msa_msubv_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_msubv_w (v4i32, v4i32, v4i32); +v2i64 __builtin_msa_msubv_d (v2i64, v2i64, v2i64); + +v8i16 __builtin_msa_mul_q_h (v8i16, v8i16); +v4i32 __builtin_msa_mul_q_w (v4i32, v4i32); + +v8i16 __builtin_msa_mulr_q_h (v8i16, v8i16); +v4i32 __builtin_msa_mulr_q_w (v4i32, v4i32); + +v16i8 __builtin_msa_mulv_b (v16i8, v16i8); +v8i16 __builtin_msa_mulv_h (v8i16, v8i16); +v4i32 __builtin_msa_mulv_w (v4i32, v4i32); +v2i64 __builtin_msa_mulv_d (v2i64, v2i64); + +v16i8 __builtin_msa_nloc_b (v16i8); +v8i16 __builtin_msa_nloc_h (v8i16); +v4i32 __builtin_msa_nloc_w (v4i32); +v2i64 __builtin_msa_nloc_d (v2i64); + +v16i8 __builtin_msa_nlzc_b (v16i8); +v8i16 __builtin_msa_nlzc_h (v8i16); +v4i32 __builtin_msa_nlzc_w (v4i32); +v2i64 __builtin_msa_nlzc_d (v2i64); + +v16u8 __builtin_msa_nor_v (v16u8, v16u8); + +v16u8 __builtin_msa_nori_b (v16u8, imm0_255); + +v16u8 __builtin_msa_or_v (v16u8, v16u8); + +v16u8 __builtin_msa_ori_b (v16u8, imm0_255); + +v16i8 __builtin_msa_pckev_b (v16i8, v16i8); +v8i16 __builtin_msa_pckev_h (v8i16, v8i16); +v4i32 __builtin_msa_pckev_w (v4i32, v4i32); +v2i64 __builtin_msa_pckev_d (v2i64, v2i64); + +v16i8 __builtin_msa_pckod_b (v16i8, v16i8); +v8i16 __builtin_msa_pckod_h (v8i16, v8i16); +v4i32 __builtin_msa_pckod_w (v4i32, v4i32); +v2i64 __builtin_msa_pckod_d (v2i64, v2i64); + +v16i8 __builtin_msa_pcnt_b (v16i8); +v8i16 __builtin_msa_pcnt_h (v8i16); +v4i32 __builtin_msa_pcnt_w (v4i32); +v2i64 __builtin_msa_pcnt_d (v2i64); + +v16i8 __builtin_msa_sat_s_b (v16i8, imm0_7); +v8i16 __builtin_msa_sat_s_h (v8i16, imm0_15); +v4i32 __builtin_msa_sat_s_w (v4i32, imm0_31); +v2i64 __builtin_msa_sat_s_d (v2i64, imm0_63); + +v16u8 __builtin_msa_sat_u_b (v16u8, imm0_7); +v8u16 __builtin_msa_sat_u_h (v8u16, imm0_15); +v4u32 __builtin_msa_sat_u_w (v4u32, imm0_31); +v2u64 __builtin_msa_sat_u_d (v2u64, imm0_63); + +v16i8 __builtin_msa_shf_b (v16i8, imm0_255); +v8i16 __builtin_msa_shf_h (v8i16, imm0_255); +v4i32 __builtin_msa_shf_w (v4i32, imm0_255); + +v16i8 __builtin_msa_sld_b (v16i8, v16i8, i32); +v8i16 __builtin_msa_sld_h (v8i16, v8i16, i32); +v4i32 __builtin_msa_sld_w (v4i32, v4i32, i32); +v2i64 __builtin_msa_sld_d (v2i64, v2i64, i32); + +v16i8 __builtin_msa_sldi_b (v16i8, v16i8, imm0_15); +v8i16 __builtin_msa_sldi_h (v8i16, v8i16, imm0_7); +v4i32 __builtin_msa_sldi_w (v4i32, v4i32, imm0_3); +v2i64 __builtin_msa_sldi_d (v2i64, v2i64, imm0_1); + +v16i8 __builtin_msa_sll_b (v16i8, v16i8); +v8i16 __builtin_msa_sll_h (v8i16, v8i16); +v4i32 __builtin_msa_sll_w (v4i32, v4i32); +v2i64 __builtin_msa_sll_d (v2i64, v2i64); + +v16i8 __builtin_msa_slli_b (v16i8, imm0_7); +v8i16 __builtin_msa_slli_h (v8i16, imm0_15); +v4i32 __builtin_msa_slli_w (v4i32, imm0_31); +v2i64 __builtin_msa_slli_d (v2i64, imm0_63); + +v16i8 __builtin_msa_splat_b (v16i8, i32); +v8i16 __builtin_msa_splat_h (v8i16, i32); +v4i32 __builtin_msa_splat_w (v4i32, i32); +v2i64 __builtin_msa_splat_d (v2i64, i32); + +v16i8 __builtin_msa_splati_b (v16i8, imm0_15); +v8i16 __builtin_msa_splati_h (v8i16, imm0_7); +v4i32 __builtin_msa_splati_w (v4i32, imm0_3); +v2i64 __builtin_msa_splati_d (v2i64, imm0_1); + +v16i8 __builtin_msa_sra_b (v16i8, v16i8); +v8i16 __builtin_msa_sra_h (v8i16, v8i16); +v4i32 __builtin_msa_sra_w (v4i32, v4i32); +v2i64 __builtin_msa_sra_d (v2i64, v2i64); + +v16i8 __builtin_msa_srai_b (v16i8, imm0_7); +v8i16 __builtin_msa_srai_h (v8i16, imm0_15); +v4i32 __builtin_msa_srai_w (v4i32, imm0_31); +v2i64 __builtin_msa_srai_d (v2i64, imm0_63); + +v16i8 __builtin_msa_srar_b (v16i8, v16i8); +v8i16 __builtin_msa_srar_h (v8i16, v8i16); +v4i32 __builtin_msa_srar_w (v4i32, v4i32); +v2i64 __builtin_msa_srar_d (v2i64, v2i64); + +v16i8 __builtin_msa_srari_b (v16i8, imm0_7); +v8i16 __builtin_msa_srari_h (v8i16, imm0_15); +v4i32 __builtin_msa_srari_w (v4i32, imm0_31); +v2i64 __builtin_msa_srari_d (v2i64, imm0_63); + +v16i8 __builtin_msa_srl_b (v16i8, v16i8); +v8i16 __builtin_msa_srl_h (v8i16, v8i16); +v4i32 __builtin_msa_srl_w (v4i32, v4i32); +v2i64 __builtin_msa_srl_d (v2i64, v2i64); + +v16i8 __builtin_msa_srli_b (v16i8, imm0_7); +v8i16 __builtin_msa_srli_h (v8i16, imm0_15); +v4i32 __builtin_msa_srli_w (v4i32, imm0_31); +v2i64 __builtin_msa_srli_d (v2i64, imm0_63); + +v16i8 __builtin_msa_srlr_b (v16i8, v16i8); +v8i16 __builtin_msa_srlr_h (v8i16, v8i16); +v4i32 __builtin_msa_srlr_w (v4i32, v4i32); +v2i64 __builtin_msa_srlr_d (v2i64, v2i64); + +v16i8 __builtin_msa_srlri_b (v16i8, imm0_7); +v8i16 __builtin_msa_srlri_h (v8i16, imm0_15); +v4i32 __builtin_msa_srlri_w (v4i32, imm0_31); +v2i64 __builtin_msa_srlri_d (v2i64, imm0_63); + +void __builtin_msa_st_b (v16i8, void *, imm_n512_511); +void __builtin_msa_st_h (v8i16, void *, imm_n1024_1022); +void __builtin_msa_st_w (v4i32, void *, imm_n2048_2044); +void __builtin_msa_st_d (v2i64, void *, imm_n4096_4088); + +v16i8 __builtin_msa_subs_s_b (v16i8, v16i8); +v8i16 __builtin_msa_subs_s_h (v8i16, v8i16); +v4i32 __builtin_msa_subs_s_w (v4i32, v4i32); +v2i64 __builtin_msa_subs_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_subs_u_b (v16u8, v16u8); +v8u16 __builtin_msa_subs_u_h (v8u16, v8u16); +v4u32 __builtin_msa_subs_u_w (v4u32, v4u32); +v2u64 __builtin_msa_subs_u_d (v2u64, v2u64); + +v16u8 __builtin_msa_subsus_u_b (v16u8, v16i8); +v8u16 __builtin_msa_subsus_u_h (v8u16, v8i16); +v4u32 __builtin_msa_subsus_u_w (v4u32, v4i32); +v2u64 __builtin_msa_subsus_u_d (v2u64, v2i64); + +v16i8 __builtin_msa_subsuu_s_b (v16u8, v16u8); +v8i16 __builtin_msa_subsuu_s_h (v8u16, v8u16); +v4i32 __builtin_msa_subsuu_s_w (v4u32, v4u32); +v2i64 __builtin_msa_subsuu_s_d (v2u64, v2u64); + +v16i8 __builtin_msa_subv_b (v16i8, v16i8); +v8i16 __builtin_msa_subv_h (v8i16, v8i16); +v4i32 __builtin_msa_subv_w (v4i32, v4i32); +v2i64 __builtin_msa_subv_d (v2i64, v2i64); + +v16i8 __builtin_msa_subvi_b (v16i8, imm0_31); +v8i16 __builtin_msa_subvi_h (v8i16, imm0_31); +v4i32 __builtin_msa_subvi_w (v4i32, imm0_31); +v2i64 __builtin_msa_subvi_d (v2i64, imm0_31); + +v16i8 __builtin_msa_vshf_b (v16i8, v16i8, v16i8); +v8i16 __builtin_msa_vshf_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_vshf_w (v4i32, v4i32, v4i32); +v2i64 __builtin_msa_vshf_d (v2i64, v2i64, v2i64); + +v16u8 __builtin_msa_xor_v (v16u8, v16u8); + +v16u8 __builtin_msa_xori_b (v16u8, imm0_255); diff --git a/library/stdarch/crates/stdarch-verify/src/lib.rs b/library/stdarch/crates/stdarch-verify/src/lib.rs new file mode 100644 index 000000000000..c81f5f45bcce --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/src/lib.rs @@ -0,0 +1,583 @@ +#![deny(rust_2018_idioms)] +#[macro_use] +extern crate quote; +#[macro_use] +extern crate syn; + +use proc_macro::TokenStream; +use std::{fs::File, io::Read, path::Path}; +use syn::ext::IdentExt; +use syn::parse::Parser as _; + +#[proc_macro] +pub fn x86_functions(input: TokenStream) -> TokenStream { + functions(input, &["core_arch/src/x86", "core_arch/src/x86_64"]) +} + +#[proc_macro] +pub fn arm_functions(input: TokenStream) -> TokenStream { + functions( + input, + &[ + "core_arch/src/arm", + "core_arch/src/aarch64", + "core_arch/src/arm_shared/neon", + ], + ) +} + +#[proc_macro] +pub fn mips_functions(input: TokenStream) -> TokenStream { + functions(input, &["core_arch/src/mips"]) +} + +fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream { + let dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let root = dir.parent().expect("root-dir not found"); + + let mut files = Vec::new(); + for dir in dirs { + walk(&root.join(dir), &mut files); + } + assert!(!files.is_empty()); + + let mut functions = Vec::new(); + for &mut (ref mut file, ref path) in &mut files { + for mut item in file.items.drain(..) { + match item { + syn::Item::Fn(f) => functions.push((f, path)), + syn::Item::Mod(ref mut m) => { + if let Some(ref mut m) = m.content { + for i in m.1.drain(..) { + if let syn::Item::Fn(f) = i { + functions.push((f, path)) + } + } + } + } + _ => (), + } + } + } + assert!(!functions.is_empty()); + + let mut tests = std::collections::HashSet::::new(); + for f in &functions { + let id = format!("{}", f.0.sig.ident); + if id.starts_with("test_") { + tests.insert(id); + } + } + assert!(!tests.is_empty()); + + functions.retain(|(f, _)| matches!(f.vis, syn::Visibility::Public(_))); + assert!(!functions.is_empty()); + + let input = proc_macro2::TokenStream::from(input); + + let functions = functions + .iter() + .map(|&(ref f, path)| { + let name = &f.sig.ident; + // println!("{name}"); + let mut arguments = Vec::new(); + let mut const_arguments = Vec::new(); + for input in f.sig.inputs.iter() { + let ty = match *input { + syn::FnArg::Typed(ref c) => &c.ty, + _ => panic!("invalid argument on {name}"), + }; + arguments.push(to_type(ty)); + } + for generic in f.sig.generics.params.iter() { + match *generic { + syn::GenericParam::Const(ref c) => const_arguments.push(to_type(&c.ty)), + syn::GenericParam::Type(ref _t) => (), + _ => panic!("invalid generic argument on {name}"), + }; + } + let ret = match f.sig.output { + syn::ReturnType::Default => quote! { None }, + syn::ReturnType::Type(_, ref t) => { + let ty = to_type(t); + quote! { Some(#ty) } + } + }; + let instrs = find_instrs(&f.attrs); + let target_feature = if let Some(i) = find_target_feature(&f.attrs) { + quote! { Some(#i) } + } else { + quote! { None } + }; + + let required_const = find_required_const("rustc_args_required_const", &f.attrs); + let mut legacy_const_generics = + find_required_const("rustc_legacy_const_generics", &f.attrs); + if !required_const.is_empty() && !legacy_const_generics.is_empty() { + panic!( + "Can't have both #[rustc_args_required_const] and \ + #[rustc_legacy_const_generics]" + ); + } + + // The list of required consts, used to verify the arguments, comes from either the + // `rustc_args_required_const` or the `rustc_legacy_const_generics` attribute. + let required_const = if required_const.is_empty() { + legacy_const_generics.clone() + } else { + required_const + }; + + legacy_const_generics.sort(); + for (idx, ty) in legacy_const_generics + .into_iter() + .zip(const_arguments.into_iter()) + { + arguments.insert(idx, ty); + } + + // strip leading underscore from fn name when building a test + // _mm_foo -> mm_foo such that the test name is test_mm_foo. + let test_name_string = format!("{name}"); + let mut test_name_id = test_name_string.as_str(); + while test_name_id.starts_with('_') { + test_name_id = &test_name_id[1..]; + } + let has_test = tests.contains(&format!("test_{test_name_id}")); + + let doc = find_doc(&f.attrs); + + quote! { + Function { + name: stringify!(#name), + arguments: &[#(#arguments),*], + ret: #ret, + target_feature: #target_feature, + instrs: &[#(#instrs),*], + file: stringify!(#path), + required_const: &[#(#required_const),*], + has_test: #has_test, + doc: #doc + } + } + }) + .collect::>(); + + let ret = quote! { #input: &[Function] = &[#(#functions),*]; }; + // println!("{ret}"); + ret.into() +} + +fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { + match *t { + syn::Type::Path(ref p) => match extract_path_ident(&p.path).to_string().as_ref() { + // x86 ... + "__m128" => quote! { &M128 }, + "__m128bh" => quote! { &M128BH }, + "__m128d" => quote! { &M128D }, + "__m128h" => quote! { &M128H }, + "__m128i" => quote! { &M128I }, + "__m256" => quote! { &M256 }, + "__m256bh" => quote! { &M256BH }, + "__m256d" => quote! { &M256D }, + "__m256h" => quote! { &M256H }, + "__m256i" => quote! { &M256I }, + "__m512" => quote! { &M512 }, + "__m512bh" => quote! { &M512BH }, + "__m512d" => quote! { &M512D }, + "__m512h" => quote! { &M512H }, + "__m512i" => quote! { &M512I }, + "__mmask8" => quote! { &MMASK8 }, + "__mmask16" => quote! { &MMASK16 }, + "__mmask32" => quote! { &MMASK32 }, + "__mmask64" => quote! { &MMASK64 }, + "_MM_CMPINT_ENUM" => quote! { &MM_CMPINT_ENUM }, + "_MM_MANTISSA_NORM_ENUM" => quote! { &MM_MANTISSA_NORM_ENUM }, + "_MM_MANTISSA_SIGN_ENUM" => quote! { &MM_MANTISSA_SIGN_ENUM }, + "_MM_PERM_ENUM" => quote! { &MM_PERM_ENUM }, + "bool" => quote! { &BOOL }, + "bf16" => quote! { &BF16 }, + "f16" => quote! { &F16 }, + "f32" => quote! { &F32 }, + "f64" => quote! { &F64 }, + "i16" => quote! { &I16 }, + "i32" => quote! { &I32 }, + "i64" => quote! { &I64 }, + "i8" => quote! { &I8 }, + "u16" => quote! { &U16 }, + "u32" => quote! { &U32 }, + "u64" => quote! { &U64 }, + "u128" => quote! { &U128 }, + "usize" => quote! { &USIZE }, + "u8" => quote! { &U8 }, + "p8" => quote! { &P8 }, + "p16" => quote! { &P16 }, + "Ordering" => quote! { &ORDERING }, + "CpuidResult" => quote! { &CPUID }, + + // arm ... + "int8x4_t" => quote! { &I8X4 }, + "int8x8_t" => quote! { &I8X8 }, + "int8x8x2_t" => quote! { &I8X8X2 }, + "int8x8x3_t" => quote! { &I8X8X3 }, + "int8x8x4_t" => quote! { &I8X8X4 }, + "int8x16x2_t" => quote! { &I8X16X2 }, + "int8x16x3_t" => quote! { &I8X16X3 }, + "int8x16x4_t" => quote! { &I8X16X4 }, + "int8x16_t" => quote! { &I8X16 }, + "int16x2_t" => quote! { &I16X2 }, + "int16x4_t" => quote! { &I16X4 }, + "int16x4x2_t" => quote! { &I16X4X2 }, + "int16x4x3_t" => quote! { &I16X4X3 }, + "int16x4x4_t" => quote! { &I16X4X4 }, + "int16x8_t" => quote! { &I16X8 }, + "int16x8x2_t" => quote! { &I16X8X2 }, + "int16x8x3_t" => quote! { &I16X8X3 }, + "int16x8x4_t" => quote! { &I16X8X4 }, + "int32x2_t" => quote! { &I32X2 }, + "int32x2x2_t" => quote! { &I32X2X2 }, + "int32x2x3_t" => quote! { &I32X2X3 }, + "int32x2x4_t" => quote! { &I32X2X4 }, + "int32x4_t" => quote! { &I32X4 }, + "int32x4x2_t" => quote! { &I32X4X2 }, + "int32x4x3_t" => quote! { &I32X4X3 }, + "int32x4x4_t" => quote! { &I32X4X4 }, + "int64x1_t" => quote! { &I64X1 }, + "int64x1x2_t" => quote! { &I64X1X2 }, + "int64x1x3_t" => quote! { &I64X1X3 }, + "int64x1x4_t" => quote! { &I64X1X4 }, + "int64x2_t" => quote! { &I64X2 }, + "int64x2x2_t" => quote! { &I64X2X2 }, + "int64x2x3_t" => quote! { &I64X2X3 }, + "int64x2x4_t" => quote! { &I64X2X4 }, + "uint8x8_t" => quote! { &U8X8 }, + "uint8x4_t" => quote! { &U8X4 }, + "uint8x8x2_t" => quote! { &U8X8X2 }, + "uint8x16x2_t" => quote! { &U8X16X2 }, + "uint8x16x3_t" => quote! { &U8X16X3 }, + "uint8x16x4_t" => quote! { &U8X16X4 }, + "uint8x8x3_t" => quote! { &U8X8X3 }, + "uint8x8x4_t" => quote! { &U8X8X4 }, + "uint8x16_t" => quote! { &U8X16 }, + "uint16x4_t" => quote! { &U16X4 }, + "uint16x4x2_t" => quote! { &U16X4X2 }, + "uint16x4x3_t" => quote! { &U16X4X3 }, + "uint16x4x4_t" => quote! { &U16X4X4 }, + "uint16x8_t" => quote! { &U16X8 }, + "uint16x8x2_t" => quote! { &U16X8X2 }, + "uint16x8x3_t" => quote! { &U16X8X3 }, + "uint16x8x4_t" => quote! { &U16X8X4 }, + "uint32x2_t" => quote! { &U32X2 }, + "uint32x2x2_t" => quote! { &U32X2X2 }, + "uint32x2x3_t" => quote! { &U32X2X3 }, + "uint32x2x4_t" => quote! { &U32X2X4 }, + "uint32x4_t" => quote! { &U32X4 }, + "uint32x4x2_t" => quote! { &U32X4X2 }, + "uint32x4x3_t" => quote! { &U32X4X3 }, + "uint32x4x4_t" => quote! { &U32X4X4 }, + "uint64x1_t" => quote! { &U64X1 }, + "uint64x1x2_t" => quote! { &U64X1X2 }, + "uint64x1x3_t" => quote! { &U64X1X3 }, + "uint64x1x4_t" => quote! { &U64X1X4 }, + "uint64x2_t" => quote! { &U64X2 }, + "uint64x2x2_t" => quote! { &U64X2X2 }, + "uint64x2x3_t" => quote! { &U64X2X3 }, + "uint64x2x4_t" => quote! { &U64X2X4 }, + "float16x2_t" => quote! { &F16X2 }, + "float16x4_t" => quote! { &F16X4 }, + "float16x4x2_t" => quote! { &F16X4X2 }, + "float16x4x3_t" => quote! { &F16X4X3 }, + "float16x4x4_t" => quote! { &F16X4X4 }, + "float16x8_t" => quote! { &F16X8 }, + "float16x8x2_t" => quote! { &F16X8X2 }, + "float16x8x3_t" => quote! { &F16X8X3 }, + "float16x8x4_t" => quote! { &F16X8X4 }, + "float32x2_t" => quote! { &F32X2 }, + "float32x2x2_t" => quote! { &F32X2X2 }, + "float32x2x3_t" => quote! { &F32X2X3 }, + "float32x2x4_t" => quote! { &F32X2X4 }, + "float32x4_t" => quote! { &F32X4 }, + "float32x4x2_t" => quote! { &F32X4X2 }, + "float32x4x3_t" => quote! { &F32X4X3 }, + "float32x4x4_t" => quote! { &F32X4X4 }, + "float64x1_t" => quote! { &F64X1 }, + "float64x1x2_t" => quote! { &F64X1X2 }, + "float64x1x3_t" => quote! { &F64X1X3 }, + "float64x1x4_t" => quote! { &F64X1X4 }, + "float64x2_t" => quote! { &F64X2 }, + "float64x2x2_t" => quote! { &F64X2X2 }, + "float64x2x3_t" => quote! { &F64X2X3 }, + "float64x2x4_t" => quote! { &F64X2X4 }, + "poly8x8_t" => quote! { &POLY8X8 }, + "poly8x8x2_t" => quote! { &POLY8X8X2 }, + "poly8x8x3_t" => quote! { &POLY8X8X3 }, + "poly8x8x4_t" => quote! { &POLY8X8X4 }, + "poly8x16x2_t" => quote! { &POLY8X16X2 }, + "poly8x16x3_t" => quote! { &POLY8X16X3 }, + "poly8x16x4_t" => quote! { &POLY8X16X4 }, + "p64" => quote! { &P64 }, + "poly64x1_t" => quote! { &POLY64X1 }, + "poly64x2_t" => quote! { &POLY64X2 }, + "poly8x16_t" => quote! { &POLY8X16 }, + "poly16x4_t" => quote! { &POLY16X4 }, + "poly16x4x2_t" => quote! { &P16X4X2 }, + "poly16x4x3_t" => quote! { &P16X4X3 }, + "poly16x4x4_t" => quote! { &P16X4X4 }, + "poly16x8_t" => quote! { &POLY16X8 }, + "poly16x8x2_t" => quote! { &P16X8X2 }, + "poly16x8x3_t" => quote! { &P16X8X3 }, + "poly16x8x4_t" => quote! { &P16X8X4 }, + "poly64x1x2_t" => quote! { &P64X1X2 }, + "poly64x1x3_t" => quote! { &P64X1X3 }, + "poly64x1x4_t" => quote! { &P64X1X4 }, + "poly64x2x2_t" => quote! { &P64X2X2 }, + "poly64x2x3_t" => quote! { &P64X2X3 }, + "poly64x2x4_t" => quote! { &P64X2X4 }, + "p128" => quote! { &P128 }, + + "v16i8" => quote! { &v16i8 }, + "v8i16" => quote! { &v8i16 }, + "v4i32" => quote! { &v4i32 }, + "v2i64" => quote! { &v2i64 }, + "v16u8" => quote! { &v16u8 }, + "v8u16" => quote! { &v8u16 }, + "v4u32" => quote! { &v4u32 }, + "v2u64" => quote! { &v2u64 }, + "v8f16" => quote! { &v8f16 }, + "v4f32" => quote! { &v4f32 }, + "v2f64" => quote! { &v2f64 }, + + // Generic types + "T" => quote! { &GENERICT }, + "U" => quote! { &GENERICU }, + + s => panic!("unsupported type: \"{s}\""), + }, + syn::Type::Ptr(syn::TypePtr { + ref elem, + ref mutability, + .. + }) + | syn::Type::Reference(syn::TypeReference { + ref elem, + ref mutability, + .. + }) => { + // Both pointers and references can have a mut token (*mut and &mut) + if mutability.is_some() { + let tokens = to_type(elem); + quote! { &Type::MutPtr(#tokens) } + } else { + // If they don't (*const or &) then they are "const" + let tokens = to_type(elem); + quote! { &Type::ConstPtr(#tokens) } + } + } + + syn::Type::Slice(_) => panic!("unsupported slice"), + syn::Type::Array(_) => panic!("unsupported array"), + syn::Type::Tuple(_) => quote! { &TUPLE }, + syn::Type::Never(_) => quote! { &NEVER }, + _ => panic!("unsupported type"), + } +} + +fn extract_path_ident(path: &syn::Path) -> syn::Ident { + if path.leading_colon.is_some() { + panic!("unsupported leading colon in path") + } + if path.segments.len() != 1 { + panic!("unsupported path that needs name resolution") + } + match path.segments.first().expect("segment not found").arguments { + syn::PathArguments::None => {} + _ => panic!("unsupported path that has path arguments"), + } + path.segments + .first() + .expect("segment not found") + .ident + .clone() +} + +fn walk(root: &Path, files: &mut Vec<(syn::File, String)>) { + for file in root.read_dir().unwrap() { + let file = file.unwrap(); + if file.file_type().unwrap().is_dir() { + walk(&file.path(), files); + continue; + } + let path = file.path(); + if path.extension().and_then(std::ffi::OsStr::to_str) != Some("rs") { + continue; + } + + if path.file_name().and_then(std::ffi::OsStr::to_str) == Some("test.rs") { + continue; + } + + let mut contents = String::new(); + File::open(&path) + .unwrap_or_else(|_| panic!("can't open file at path: {}", path.display())) + .read_to_string(&mut contents) + .expect("failed to read file to string"); + + files.push(( + syn::parse_str::(&contents).expect("failed to parse"), + path.display().to_string(), + )); + } +} + +fn find_instrs(attrs: &[syn::Attribute]) -> Vec { + struct AssertInstr { + instr: Option, + } + + // A small custom parser to parse out the instruction in `assert_instr`. + // + // TODO: should probably just reuse `Invoc` from the `assert-instr-macro` + // crate. + impl syn::parse::Parse for AssertInstr { + fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result { + let _ = input.parse::().unwrap(); + let _ = input.parse::().unwrap(); + + match input.parse::() { + Ok(ident) if ident == "assert_instr" => {} + _ => { + while !input.is_empty() { + // consume everything + drop(input.parse::()); + } + return Ok(Self { instr: None }); + } + } + + let instrs; + parenthesized!(instrs in input); + + let mut instr = String::new(); + while !instrs.is_empty() { + if let Ok(lit) = instrs.parse::() { + instr.push_str(&lit.value()); + } else if let Ok(ident) = instrs.call(syn::Ident::parse_any) { + instr.push_str(&ident.to_string()); + } else if instrs.parse::().is_ok() { + instr.push('.'); + } else if instrs.parse::().is_ok() { + // consume everything remaining + drop(instrs.parse::()); + break; + } else { + return Err(input.error("failed to parse instruction")); + } + } + Ok(Self { instr: Some(instr) }) + } + } + + attrs + .iter() + .filter_map(|a| { + if let syn::Meta::List(ref l) = a.meta { + if l.path.is_ident("cfg_attr") { + Some(l) + } else { + None + } + } else { + None + } + }) + .filter_map(|l| syn::parse2::(l.tokens.clone()).unwrap().instr) + .collect() +} + +fn find_target_feature(attrs: &[syn::Attribute]) -> Option { + attrs + .iter() + .flat_map(|a| { + #[allow(clippy::collapsible_if)] + if let syn::Meta::List(ref l) = a.meta { + if l.path.is_ident("target_feature") { + if let Ok(l) = + syn::punctuated::Punctuated::::parse_terminated + .parse2(l.tokens.clone()) + { + return l; + } + } + } + syn::punctuated::Punctuated::new() + }) + .find_map(|m| match m { + syn::Meta::NameValue(i) if i.path.is_ident("enable") => { + if let syn::Expr::Lit(lit) = i.value { + Some(lit.lit) + } else { + None + } + } + _ => None, + }) +} + +fn find_doc(attrs: &[syn::Attribute]) -> String { + attrs + .iter() + .filter_map(|a| { + #[allow(clippy::collapsible_if)] + if let syn::Meta::NameValue(ref l) = a.meta { + if l.path.is_ident("doc") { + if let syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Str(ref s), + .. + }) = l.value + { + return Some(s.value()); + } + } + } + None + }) + .collect() +} + +fn find_required_const(name: &str, attrs: &[syn::Attribute]) -> Vec { + attrs + .iter() + .filter_map(|a| { + if let syn::Meta::List(ref l) = a.meta { + Some(l) + } else { + None + } + }) + .flat_map(|l| { + if l.path.segments[0].ident == name { + syn::parse2::(l.tokens.clone()) + .unwrap() + .args + } else { + Vec::new() + } + }) + .collect() +} + +struct RustcArgsRequiredConst { + args: Vec, +} + +impl syn::parse::Parse for RustcArgsRequiredConst { + fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result { + let list = syn::punctuated::Punctuated::::parse_terminated(input)?; + Ok(Self { + args: list + .into_iter() + .map(|a| a.base10_parse::()) + .collect::>()?, + }) + } +} diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs new file mode 100644 index 000000000000..a35b8175fb22 --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs @@ -0,0 +1,745 @@ +#![allow(unused)] + +use std::collections::HashMap; + +use serde::Deserialize; + +struct Function { + name: &'static str, + arguments: &'static [&'static Type], + ret: Option<&'static Type>, + target_feature: Option<&'static str>, + instrs: &'static [&'static str], + file: &'static str, + required_const: &'static [usize], + has_test: bool, + doc: &'static str, +} + +static F16: Type = Type::PrimFloat(16); +static F32: Type = Type::PrimFloat(32); +static F64: Type = Type::PrimFloat(64); +static I16: Type = Type::PrimSigned(16); +static I32: Type = Type::PrimSigned(32); +static I64: Type = Type::PrimSigned(64); +static I8: Type = Type::PrimSigned(8); +static U16: Type = Type::PrimUnsigned(16); +static U32: Type = Type::PrimUnsigned(32); +static U64: Type = Type::PrimUnsigned(64); +static U8: Type = Type::PrimUnsigned(8); +static NEVER: Type = Type::Never; +static GENERICT: Type = Type::GenericParam("T"); +static GENERICU: Type = Type::GenericParam("U"); + +static F16X4: Type = Type::F(16, 4, 1); +static F16X4X2: Type = Type::F(16, 4, 2); +static F16X4X3: Type = Type::F(16, 4, 3); +static F16X4X4: Type = Type::F(16, 4, 4); +static F16X8: Type = Type::F(16, 8, 1); +static F16X8X2: Type = Type::F(16, 8, 2); +static F16X8X3: Type = Type::F(16, 8, 3); +static F16X8X4: Type = Type::F(16, 8, 4); +static F32X2: Type = Type::F(32, 2, 1); +static F32X2X2: Type = Type::F(32, 2, 2); +static F32X2X3: Type = Type::F(32, 2, 3); +static F32X2X4: Type = Type::F(32, 2, 4); +static F32X4: Type = Type::F(32, 4, 1); +static F32X4X2: Type = Type::F(32, 4, 2); +static F32X4X3: Type = Type::F(32, 4, 3); +static F32X4X4: Type = Type::F(32, 4, 4); +static F64X1: Type = Type::F(64, 1, 1); +static F64X1X2: Type = Type::F(64, 1, 2); +static F64X1X3: Type = Type::F(64, 1, 3); +static F64X1X4: Type = Type::F(64, 1, 4); +static F64X2: Type = Type::F(64, 2, 1); +static F64X2X2: Type = Type::F(64, 2, 2); +static F64X2X3: Type = Type::F(64, 2, 3); +static F64X2X4: Type = Type::F(64, 2, 4); +static I16X2: Type = Type::I(16, 2, 1); +static I16X4: Type = Type::I(16, 4, 1); +static I16X4X2: Type = Type::I(16, 4, 2); +static I16X4X3: Type = Type::I(16, 4, 3); +static I16X4X4: Type = Type::I(16, 4, 4); +static I16X8: Type = Type::I(16, 8, 1); +static I16X8X2: Type = Type::I(16, 8, 2); +static I16X8X3: Type = Type::I(16, 8, 3); +static I16X8X4: Type = Type::I(16, 8, 4); +static I32X2: Type = Type::I(32, 2, 1); +static I32X2X2: Type = Type::I(32, 2, 2); +static I32X2X3: Type = Type::I(32, 2, 3); +static I32X2X4: Type = Type::I(32, 2, 4); +static I32X4: Type = Type::I(32, 4, 1); +static I32X4X2: Type = Type::I(32, 4, 2); +static I32X4X3: Type = Type::I(32, 4, 3); +static I32X4X4: Type = Type::I(32, 4, 4); +static I64X1: Type = Type::I(64, 1, 1); +static I64X1X2: Type = Type::I(64, 1, 2); +static I64X1X3: Type = Type::I(64, 1, 3); +static I64X1X4: Type = Type::I(64, 1, 4); +static I64X2: Type = Type::I(64, 2, 1); +static I64X2X2: Type = Type::I(64, 2, 2); +static I64X2X3: Type = Type::I(64, 2, 3); +static I64X2X4: Type = Type::I(64, 2, 4); +static I8X16: Type = Type::I(8, 16, 1); +static I8X16X2: Type = Type::I(8, 16, 2); +static I8X16X3: Type = Type::I(8, 16, 3); +static I8X16X4: Type = Type::I(8, 16, 4); +static I8X4: Type = Type::I(8, 4, 1); +static I8X8: Type = Type::I(8, 8, 1); +static I8X8X2: Type = Type::I(8, 8, 2); +static I8X8X3: Type = Type::I(8, 8, 3); +static I8X8X4: Type = Type::I(8, 8, 4); +static P128: Type = Type::PrimPoly(128); +static P16: Type = Type::PrimPoly(16); +static P16X4X2: Type = Type::P(16, 4, 2); +static P16X4X3: Type = Type::P(16, 4, 3); +static P16X4X4: Type = Type::P(16, 4, 4); +static P16X8X2: Type = Type::P(16, 8, 2); +static P16X8X3: Type = Type::P(16, 8, 3); +static P16X8X4: Type = Type::P(16, 8, 4); +static P64: Type = Type::PrimPoly(64); +static P64X1X2: Type = Type::P(64, 1, 2); +static P64X1X3: Type = Type::P(64, 1, 3); +static P64X1X4: Type = Type::P(64, 1, 4); +static P64X2X2: Type = Type::P(64, 2, 2); +static P64X2X3: Type = Type::P(64, 2, 3); +static P64X2X4: Type = Type::P(64, 2, 4); +static P8: Type = Type::PrimPoly(8); +static POLY16X4: Type = Type::P(16, 4, 1); +static POLY16X8: Type = Type::P(16, 8, 1); +static POLY64X1: Type = Type::P(64, 1, 1); +static POLY64X2: Type = Type::P(64, 2, 1); +static POLY8X16: Type = Type::P(8, 16, 1); +static POLY8X16X2: Type = Type::P(8, 16, 2); +static POLY8X16X3: Type = Type::P(8, 16, 3); +static POLY8X16X4: Type = Type::P(8, 16, 4); +static POLY8X8: Type = Type::P(8, 8, 1); +static POLY8X8X2: Type = Type::P(8, 8, 2); +static POLY8X8X3: Type = Type::P(8, 8, 3); +static POLY8X8X4: Type = Type::P(8, 8, 4); +static U16X4: Type = Type::U(16, 4, 1); +static U16X4X2: Type = Type::U(16, 4, 2); +static U16X4X3: Type = Type::U(16, 4, 3); +static U16X4X4: Type = Type::U(16, 4, 4); +static U16X8: Type = Type::U(16, 8, 1); +static U16X8X2: Type = Type::U(16, 8, 2); +static U16X8X3: Type = Type::U(16, 8, 3); +static U16X8X4: Type = Type::U(16, 8, 4); +static U32X2: Type = Type::U(32, 2, 1); +static U32X2X2: Type = Type::U(32, 2, 2); +static U32X2X3: Type = Type::U(32, 2, 3); +static U32X2X4: Type = Type::U(32, 2, 4); +static U32X4: Type = Type::U(32, 4, 1); +static U32X4X2: Type = Type::U(32, 4, 2); +static U32X4X3: Type = Type::U(32, 4, 3); +static U32X4X4: Type = Type::U(32, 4, 4); +static U64X1: Type = Type::U(64, 1, 1); +static U64X1X2: Type = Type::U(64, 1, 2); +static U64X1X3: Type = Type::U(64, 1, 3); +static U64X1X4: Type = Type::U(64, 1, 4); +static U64X2: Type = Type::U(64, 2, 1); +static U64X2X2: Type = Type::U(64, 2, 2); +static U64X2X3: Type = Type::U(64, 2, 3); +static U64X2X4: Type = Type::U(64, 2, 4); +static U8X16: Type = Type::U(8, 16, 1); +static U8X16X2: Type = Type::U(8, 16, 2); +static U8X16X3: Type = Type::U(8, 16, 3); +static U8X16X4: Type = Type::U(8, 16, 4); +static U8X8: Type = Type::U(8, 8, 1); +static U8X4: Type = Type::U(8, 4, 1); +static U8X8X2: Type = Type::U(8, 8, 2); +static U8X8X3: Type = Type::U(8, 8, 3); +static U8X8X4: Type = Type::U(8, 8, 4); + +#[derive(Debug, Copy, Clone, PartialEq)] +enum Type { + PrimFloat(u8), + PrimSigned(u8), + PrimUnsigned(u8), + PrimPoly(u8), + MutPtr(&'static Type), + ConstPtr(&'static Type), + GenericParam(&'static str), + I(u8, u8, u8), + U(u8, u8, u8), + P(u8, u8, u8), + F(u8, u8, u8), + Never, +} + +stdarch_verify::arm_functions!(static FUNCTIONS); + +macro_rules! bail { + ($($t:tt)*) => (return Err(format!($($t)*))) +} + +#[test] +fn verify_all_signatures() { + // Reference: https://developer.arm.com/architectures/instruction-sets/intrinsics + let json = include_bytes!("../../../intrinsics_data/arm_intrinsics.json"); + let intrinsics: Vec = serde_json::from_slice(json).unwrap(); + let map = parse_intrinsics(intrinsics); + + let mut all_valid = true; + for rust in FUNCTIONS { + if !rust.has_test { + let skip = [ + "vaddq_s64", + "vaddq_u64", + "vrsqrte_f32", + "vtbl1_s8", + "vtbl1_u8", + "vtbl1_p8", + "vtbl2_s8", + "vtbl2_u8", + "vtbl2_p8", + "vtbl3_s8", + "vtbl3_u8", + "vtbl3_p8", + "vtbl4_s8", + "vtbl4_u8", + "vtbl4_p8", + "vtbx1_s8", + "vtbx1_u8", + "vtbx1_p8", + "vtbx2_s8", + "vtbx2_u8", + "vtbx2_p8", + "vtbx3_s8", + "vtbx3_u8", + "vtbx3_p8", + "vtbx4_s8", + "vtbx4_u8", + "vtbx4_p8", + "udf", + "_clz_u8", + "_clz_u16", + "_clz_u32", + "_rbit_u32", + "_rev_u16", + "_rev_u32", + "__breakpoint", + "vpminq_f32", + "vpminq_f64", + "vpmaxq_f32", + "vpmaxq_f64", + "vcombine_s8", + "vcombine_s16", + "vcombine_s32", + "vcombine_s64", + "vcombine_u8", + "vcombine_u16", + "vcombine_u32", + "vcombine_u64", + "vcombine_p64", + "vcombine_f32", + "vcombine_p8", + "vcombine_p16", + "vcombine_f64", + "vtbl1_s8", + "vtbl1_u8", + "vtbl1_p8", + "vtbl2_s8", + "vtbl2_u8", + "vtbl2_p8", + "vtbl3_s8", + "vtbl3_u8", + "vtbl3_p8", + "vtbl4_s8", + "vtbl4_u8", + "vtbl4_p8", + "vtbx1_s8", + "vtbx1_u8", + "vtbx1_p8", + "vtbx2_s8", + "vtbx2_u8", + "vtbx2_p8", + "vtbx3_s8", + "vtbx3_u8", + "vtbx3_p8", + "vtbx4_s8", + "vtbx4_u8", + "vtbx4_p8", + "vqtbl1_s8", + "vqtbl1q_s8", + "vqtbl1_u8", + "vqtbl1q_u8", + "vqtbl1_p8", + "vqtbl1q_p8", + "vqtbx1_s8", + "vqtbx1q_s8", + "vqtbx1_u8", + "vqtbx1q_u8", + "vqtbx1_p8", + "vqtbx1q_p8", + "vqtbl2_s8", + "vqtbl2q_s8", + "vqtbl2_u8", + "vqtbl2q_u8", + "vqtbl2_p8", + "vqtbl2q_p8", + "vqtbx2_s8", + "vqtbx2q_s8", + "vqtbx2_u8", + "vqtbx2q_u8", + "vqtbx2_p8", + "vqtbx2q_p8", + "vqtbl3_s8", + "vqtbl3q_s8", + "vqtbl3_u8", + "vqtbl3q_u8", + "vqtbl3_p8", + "vqtbl3q_p8", + "vqtbx3_s8", + "vqtbx3q_s8", + "vqtbx3_u8", + "vqtbx3q_u8", + "vqtbx3_p8", + "vqtbx3q_p8", + "vqtbl4_s8", + "vqtbl4q_s8", + "vqtbl4_u8", + "vqtbl4q_u8", + "vqtbl4_p8", + "vqtbl4q_p8", + "vqtbx4_s8", + "vqtbx4q_s8", + "vqtbx4_u8", + "vqtbx4q_u8", + "vqtbx4_p8", + "vqtbx4q_p8", + "brk", + "_rev_u64", + "_clz_u64", + "_rbit_u64", + "_cls_u32", + "_cls_u64", + "_prefetch", + "vsli_n_s8", + "vsliq_n_s8", + "vsli_n_s16", + "vsliq_n_s16", + "vsli_n_s32", + "vsliq_n_s32", + "vsli_n_s64", + "vsliq_n_s64", + "vsli_n_u8", + "vsliq_n_u8", + "vsli_n_u16", + "vsliq_n_u16", + "vsli_n_u32", + "vsliq_n_u32", + "vsli_n_u64", + "vsliq_n_u64", + "vsli_n_p8", + "vsliq_n_p8", + "vsli_n_p16", + "vsliq_n_p16", + "vsli_n_p64", + "vsliq_n_p64", + "vsri_n_s8", + "vsriq_n_s8", + "vsri_n_s16", + "vsriq_n_s16", + "vsri_n_s32", + "vsriq_n_s32", + "vsri_n_s64", + "vsriq_n_s64", + "vsri_n_u8", + "vsriq_n_u8", + "vsri_n_u16", + "vsriq_n_u16", + "vsri_n_u32", + "vsriq_n_u32", + "vsri_n_u64", + "vsriq_n_u64", + "vsri_n_p8", + "vsriq_n_p8", + "vsri_n_p16", + "vsriq_n_p16", + "vsri_n_p64", + "vsriq_n_p64", + "__smulbb", + "__smultb", + "__smulbt", + "__smultt", + "__smulwb", + "__smulwt", + "__qadd", + "__qsub", + "__qdbl", + "__smlabb", + "__smlabt", + "__smlatb", + "__smlatt", + "__smlawb", + "__smlawt", + "__qadd8", + "__qsub8", + "__qsub16", + "__qadd16", + "__qasx", + "__qsax", + "__sadd16", + "__sadd8", + "__smlad", + "__smlsd", + "__sasx", + "__sel", + "__shadd8", + "__shadd16", + "__shsub8", + "__usub8", + "__ssub8", + "__shsub16", + "__smuad", + "__smuadx", + "__smusd", + "__smusdx", + "__usad8", + "__usada8", + "__ldrex", + "__strex", + "__ldrexb", + "__strexb", + "__ldrexh", + "__strexh", + "__clrex", + "__dbg", + ]; + } + + // Skip some intrinsics that aren't NEON and are located in different + // places than the whitelists below. + match rust.name { + "brk" | "__breakpoint" | "udf" | "_prefetch" => continue, + _ => {} + } + // Skip some intrinsics that are present in GCC and Clang but + // are missing from the official documentation. + let skip_intrinsic_verify = [ + "vmov_n_p64", + "vmovq_n_p64", + "vreinterpret_p64_s64", + "vreinterpret_f32_p64", + "vreinterpretq_f32_p64", + "vreinterpretq_p64_p128", + "vreinterpretq_p128_p64", + "vreinterpretq_f32_p128", + "vtst_p16", + "vtstq_p16", + "__dbg", + ]; + let arm = match map.get(rust.name) { + Some(i) => i, + None => { + // Skip all these intrinsics as they're not listed in NEON + // descriptions online. + // + // TODO: we still need to verify these intrinsics or find a + // reference for them, need to figure out where though! + if !rust.file.ends_with("dsp.rs\"") + && !rust.file.ends_with("sat.rs\"") + && !rust.file.ends_with("simd32.rs\"") + && !rust.file.ends_with("v6.rs\"") + && !rust.file.ends_with("v7.rs\"") + && !rust.file.ends_with("v8.rs\"") + && !rust.file.ends_with("tme.rs\"") + && !rust.file.ends_with("mte.rs\"") + && !rust.file.ends_with("ex.rs\"") + && !skip_intrinsic_verify.contains(&rust.name) + { + println!( + "missing arm definition for {:?} in {}", + rust.name, rust.file + ); + all_valid = false; + } + continue; + } + }; + + if let Err(e) = matches(rust, arm) { + println!("failed to verify `{}`", rust.name); + println!(" * {e}"); + all_valid = false; + } + } + assert!(all_valid); +} + +fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> { + if rust.ret != arm.ret.as_ref() { + bail!("mismatched return value") + } + if rust.arguments.len() != arm.arguments.len() { + bail!("mismatched argument lengths"); + } + + let mut nconst = 0; + let iter = rust.arguments.iter().zip(&arm.arguments).enumerate(); + for (i, (rust_ty, (arm, arm_const))) in iter { + if *rust_ty != arm { + bail!("mismatched arguments: {rust_ty:?} != {arm:?}") + } + if *arm_const { + nconst += 1; + if !rust.required_const.contains(&i) { + bail!("argument const mismatch"); + } + } + } + if nconst != rust.required_const.len() { + bail!("wrong number of const arguments"); + } + + if rust.instrs.is_empty() { + bail!( + "instruction not listed for `{}`, but arm lists {:?}", + rust.name, + arm.instruction + ); + } else if false + // TODO: This instruction checking logic needs work to handle multiple instructions and to only + // look at aarch64 insructions. + // The ACLE's listed instructions are a guideline only and compilers have the freedom to use + // different instructions in dfferent cases which makes this an unreliable testing method. It + // is of questionable value given the intrinsic test tool. + { + for instr in rust.instrs { + if arm.instruction.starts_with(instr) { + continue; + } + // sometimes arm says `foo` and disassemblers say `vfoo`, or + // sometimes disassemblers say `vfoo` and arm says `sfoo` or `ffoo` + if instr.starts_with('v') + && (arm.instruction.starts_with(&instr[1..]) + || arm.instruction[1..].starts_with(&instr[1..])) + { + continue; + } + bail!( + "arm failed to list `{}` as an instruction for `{}` in {:?}", + instr, + rust.name, + arm.instruction, + ); + } + } + + // TODO: verify `target_feature`. + + Ok(()) +} + +#[derive(PartialEq)] +struct Intrinsic { + name: String, + ret: Option, + arguments: Vec<(Type, bool)>, + instruction: String, +} + +// These structures are similar to those in json_parser.rs in intrinsics-test +#[derive(Deserialize, Debug)] +struct JsonIntrinsic { + name: String, + arguments: Vec, + return_type: ReturnType, + #[serde(default)] + instructions: Vec>, +} + +#[derive(Deserialize, Debug)] +struct ReturnType { + value: String, +} + +fn parse_intrinsics(intrinsics: Vec) -> HashMap { + let mut ret = HashMap::new(); + for intr in intrinsics.into_iter() { + let f = parse_intrinsic(intr); + ret.insert(f.name.clone(), f); + } + ret +} + +fn parse_intrinsic(mut intr: JsonIntrinsic) -> Intrinsic { + let name = intr.name; + let ret = if intr.return_type.value == "void" { + None + } else { + Some(parse_ty(&intr.return_type.value)) + }; + + // This ignores multiple instructions and different optional sequences for now to mimic + // the old HTML scraping behaviour + let instruction = intr.instructions.swap_remove(0).swap_remove(0); + + let arguments = intr + .arguments + .iter() + .map(|s| { + let (ty, konst) = match s.strip_prefix("const") { + Some(stripped) => (stripped.trim_start(), true), + None => (s.as_str(), false), + }; + let ty = ty.rsplit_once(' ').unwrap().0; + (parse_ty(ty), konst) + }) + .collect::>(); + + Intrinsic { + name, + ret, + instruction, + arguments, + } +} + +fn parse_ty(s: &str) -> Type { + let suffix = " const *"; + if let Some(base) = s.strip_suffix(suffix) { + Type::ConstPtr(parse_ty_base(base)) + } else if let Some(base) = s.strip_suffix(" *") { + Type::MutPtr(parse_ty_base(base)) + } else { + *parse_ty_base(s) + } +} + +fn parse_ty_base(s: &str) -> &'static Type { + match s { + "float16_t" => &F16, + "float16x4_t" => &F16X4, + "float16x4x2_t" => &F16X4X2, + "float16x4x3_t" => &F16X4X3, + "float16x4x4_t" => &F16X4X4, + "float16x8_t" => &F16X8, + "float16x8x2_t" => &F16X8X2, + "float16x8x3_t" => &F16X8X3, + "float16x8x4_t" => &F16X8X4, + "float32_t" => &F32, + "float32x2_t" => &F32X2, + "float32x2x2_t" => &F32X2X2, + "float32x2x3_t" => &F32X2X3, + "float32x2x4_t" => &F32X2X4, + "float32x4_t" => &F32X4, + "float32x4x2_t" => &F32X4X2, + "float32x4x3_t" => &F32X4X3, + "float32x4x4_t" => &F32X4X4, + "float64_t" => &F64, + "float64x1_t" => &F64X1, + "float64x1x2_t" => &F64X1X2, + "float64x1x3_t" => &F64X1X3, + "float64x1x4_t" => &F64X1X4, + "float64x2_t" => &F64X2, + "float64x2x2_t" => &F64X2X2, + "float64x2x3_t" => &F64X2X3, + "float64x2x4_t" => &F64X2X4, + "int16_t" => &I16, + "int16x2_t" => &I16X2, + "int16x4_t" => &I16X4, + "int16x4x2_t" => &I16X4X2, + "int16x4x3_t" => &I16X4X3, + "int16x4x4_t" => &I16X4X4, + "int16x8_t" => &I16X8, + "int16x8x2_t" => &I16X8X2, + "int16x8x3_t" => &I16X8X3, + "int16x8x4_t" => &I16X8X4, + "int32_t" | "int" => &I32, + "int32x2_t" => &I32X2, + "int32x2x2_t" => &I32X2X2, + "int32x2x3_t" => &I32X2X3, + "int32x2x4_t" => &I32X2X4, + "int32x4_t" => &I32X4, + "int32x4x2_t" => &I32X4X2, + "int32x4x3_t" => &I32X4X3, + "int32x4x4_t" => &I32X4X4, + "int64_t" => &I64, + "int64x1_t" => &I64X1, + "int64x1x2_t" => &I64X1X2, + "int64x1x3_t" => &I64X1X3, + "int64x1x4_t" => &I64X1X4, + "int64x2_t" => &I64X2, + "int64x2x2_t" => &I64X2X2, + "int64x2x3_t" => &I64X2X3, + "int64x2x4_t" => &I64X2X4, + "int8_t" => &I8, + "int8x16_t" => &I8X16, + "int8x16x2_t" => &I8X16X2, + "int8x16x3_t" => &I8X16X3, + "int8x16x4_t" => &I8X16X4, + "int8x4_t" => &I8X4, + "int8x8_t" => &I8X8, + "int8x8x2_t" => &I8X8X2, + "int8x8x3_t" => &I8X8X3, + "int8x8x4_t" => &I8X8X4, + "poly128_t" => &P128, + "poly16_t" => &P16, + "poly16x4_t" => &POLY16X4, + "poly16x4x2_t" => &P16X4X2, + "poly16x4x3_t" => &P16X4X3, + "poly16x4x4_t" => &P16X4X4, + "poly16x8_t" => &POLY16X8, + "poly16x8x2_t" => &P16X8X2, + "poly16x8x3_t" => &P16X8X3, + "poly16x8x4_t" => &P16X8X4, + "poly64_t" => &P64, + "poly64x1_t" => &POLY64X1, + "poly64x1x2_t" => &P64X1X2, + "poly64x1x3_t" => &P64X1X3, + "poly64x1x4_t" => &P64X1X4, + "poly64x2_t" => &POLY64X2, + "poly64x2x2_t" => &P64X2X2, + "poly64x2x3_t" => &P64X2X3, + "poly64x2x4_t" => &P64X2X4, + "poly8_t" => &P8, + "poly8x16_t" => &POLY8X16, + "poly8x16x2_t" => &POLY8X16X2, + "poly8x16x3_t" => &POLY8X16X3, + "poly8x16x4_t" => &POLY8X16X4, + "poly8x8_t" => &POLY8X8, + "poly8x8x2_t" => &POLY8X8X2, + "poly8x8x3_t" => &POLY8X8X3, + "poly8x8x4_t" => &POLY8X8X4, + "uint16_t" => &U16, + "uint16x4_t" => &U16X4, + "uint16x4x2_t" => &U16X4X2, + "uint16x4x3_t" => &U16X4X3, + "uint16x4x4_t" => &U16X4X4, + "uint16x8_t" => &U16X8, + "uint16x8x2_t" => &U16X8X2, + "uint16x8x3_t" => &U16X8X3, + "uint16x8x4_t" => &U16X8X4, + "uint32_t" => &U32, + "uint32x2_t" => &U32X2, + "uint32x2x2_t" => &U32X2X2, + "uint32x2x3_t" => &U32X2X3, + "uint32x2x4_t" => &U32X2X4, + "uint32x4_t" => &U32X4, + "uint32x4x2_t" => &U32X4X2, + "uint32x4x3_t" => &U32X4X3, + "uint32x4x4_t" => &U32X4X4, + "uint64_t" => &U64, + "uint64x1_t" => &U64X1, + "uint64x1x2_t" => &U64X1X2, + "uint64x1x3_t" => &U64X1X3, + "uint64x1x4_t" => &U64X1X4, + "uint64x2_t" => &U64X2, + "uint64x2x2_t" => &U64X2X2, + "uint64x2x3_t" => &U64X2X3, + "uint64x2x4_t" => &U64X2X4, + "uint8_t" => &U8, + "uint8x16_t" => &U8X16, + "uint8x16x2_t" => &U8X16X2, + "uint8x16x3_t" => &U8X16X3, + "uint8x16x4_t" => &U8X16X4, + "uint8x8_t" => &U8X8, + "uint8x8x2_t" => &U8X8X2, + "uint8x8x3_t" => &U8X8X3, + "uint8x8x4_t" => &U8X8X4, + + _ => panic!("failed to parse json type {s:?}"), + } +} diff --git a/library/stdarch/crates/stdarch-verify/tests/mips.rs b/library/stdarch/crates/stdarch-verify/tests/mips.rs new file mode 100644 index 000000000000..ba639c3f92f7 --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/tests/mips.rs @@ -0,0 +1,367 @@ +//! Verification of MIPS MSA intrinsics +#![allow(unused, non_upper_case_globals, clippy::single_match)] + +// This file is obtained from +// https://gcc.gnu.org/onlinedocs//gcc/MIPS-SIMD-Architecture-Built-in-Functions.html +static HEADER: &str = include_str!("../mips-msa.h"); + +stdarch_verify::mips_functions!(static FUNCTIONS); + +struct Function { + name: &'static str, + arguments: &'static [&'static Type], + ret: Option<&'static Type>, + target_feature: Option<&'static str>, + instrs: &'static [&'static str], + file: &'static str, + required_const: &'static [usize], + has_test: bool, + doc: &'static str, +} + +static F16: Type = Type::PrimFloat(16); +static F32: Type = Type::PrimFloat(32); +static F64: Type = Type::PrimFloat(64); +static I8: Type = Type::PrimSigned(8); +static I16: Type = Type::PrimSigned(16); +static I32: Type = Type::PrimSigned(32); +static I64: Type = Type::PrimSigned(64); +static U8: Type = Type::PrimUnsigned(8); +static U16: Type = Type::PrimUnsigned(16); +static U32: Type = Type::PrimUnsigned(32); +static U64: Type = Type::PrimUnsigned(64); +static NEVER: Type = Type::Never; +static TUPLE: Type = Type::Tuple; +static v16i8: Type = Type::I(8, 16, 1); +static v8i16: Type = Type::I(16, 8, 1); +static v4i32: Type = Type::I(32, 4, 1); +static v2i64: Type = Type::I(64, 2, 1); +static v16u8: Type = Type::U(8, 16, 1); +static v8u16: Type = Type::U(16, 8, 1); +static v4u32: Type = Type::U(32, 4, 1); +static v2u64: Type = Type::U(64, 2, 1); +static v8f16: Type = Type::F(16, 8, 1); +static v4f32: Type = Type::F(32, 4, 1); +static v2f64: Type = Type::F(64, 2, 1); + +#[derive(Debug, Copy, Clone, PartialEq)] +enum Type { + PrimFloat(u8), + PrimSigned(u8), + PrimUnsigned(u8), + PrimPoly(u8), + MutPtr(&'static Type), + ConstPtr(&'static Type), + Tuple, + I(u8, u8, u8), + U(u8, u8, u8), + P(u8, u8, u8), + F(u8, u8, u8), + Never, +} + +#[derive(Copy, Clone, Debug, PartialEq)] +#[allow(non_camel_case_types)] +enum MsaTy { + v16i8, + v8i16, + v4i32, + v2i64, + v16u8, + v8u16, + v4u32, + v2u64, + v8f16, + v4f32, + v2f64, + imm0_1, + imm0_3, + imm0_7, + imm0_15, + imm0_31, + imm0_63, + imm0_255, + imm_n16_15, + imm_n512_511, + imm_n1024_1022, + imm_n2048_2044, + imm_n4096_4088, + i32, + u32, + i64, + u64, + Void, + MutVoidPtr, +} + +impl<'a> From<&'a str> for MsaTy { + fn from(s: &'a str) -> MsaTy { + match s { + "v16i8" => MsaTy::v16i8, + "v8i16" => MsaTy::v8i16, + "v4i32" => MsaTy::v4i32, + "v2i64" => MsaTy::v2i64, + "v16u8" => MsaTy::v16u8, + "v8u16" => MsaTy::v8u16, + "v4u32" => MsaTy::v4u32, + "v2u64" => MsaTy::v2u64, + "v8f16" => MsaTy::v8f16, + "v4f32" => MsaTy::v4f32, + "v2f64" => MsaTy::v2f64, + "imm0_1" => MsaTy::imm0_1, + "imm0_3" => MsaTy::imm0_3, + "imm0_7" => MsaTy::imm0_7, + "imm0_15" => MsaTy::imm0_15, + "imm0_31" => MsaTy::imm0_31, + "imm0_63" => MsaTy::imm0_63, + "imm0_255" => MsaTy::imm0_255, + "imm_n16_15" => MsaTy::imm_n16_15, + "imm_n512_511" => MsaTy::imm_n512_511, + "imm_n1024_1022" => MsaTy::imm_n1024_1022, + "imm_n2048_2044" => MsaTy::imm_n2048_2044, + "imm_n4096_4088" => MsaTy::imm_n4096_4088, + "i32" => MsaTy::i32, + "u32" => MsaTy::u32, + "i64" => MsaTy::i64, + "u64" => MsaTy::u64, + "void" => MsaTy::Void, + "void *" => MsaTy::MutVoidPtr, + v => panic!("unknown ty: \"{v}\""), + } + } +} + +#[derive(Debug, Clone)] +struct MsaIntrinsic { + id: String, + arg_tys: Vec, + ret_ty: MsaTy, + instruction: String, +} + +struct NoneError; + +impl std::convert::TryFrom<&'static str> for MsaIntrinsic { + // The intrinsics are just C function declarations of the form: + // $ret_ty __builtin_${fn_id}($($arg_ty),*); + type Error = NoneError; + fn try_from(line: &'static str) -> Result { + return inner(line).ok_or(NoneError); + + fn inner(line: &'static str) -> Option { + let first_whitespace = line.find(char::is_whitespace)?; + let ret_ty = &line[0..first_whitespace]; + let ret_ty = MsaTy::from(ret_ty); + + let first_parentheses = line.find('(')?; + assert!(first_parentheses > first_whitespace); + let id = &line[first_whitespace + 1..first_parentheses].trim(); + assert!(id.starts_with("__builtin")); + let mut id_str = "_".to_string(); + id_str += &id[9..]; + let id = id_str; + + let mut arg_tys = Vec::new(); + + let last_parentheses = line.find(')')?; + for arg in line[first_parentheses + 1..last_parentheses].split(',') { + let arg = arg.trim(); + arg_tys.push(MsaTy::from(arg)); + } + + // The instruction is the intrinsic name without the __msa_ prefix. + let instruction = &id[6..]; + let mut instruction = instruction.to_string(); + // With all underscores but the first one replaced with a `.` + if let Some(first_underscore) = instruction.find('_') { + let postfix = instruction[first_underscore + 1..].replace('_', "."); + instruction = instruction[0..=first_underscore].to_string(); + instruction += &postfix; + } + + Some(MsaIntrinsic { + id, + ret_ty, + arg_tys, + instruction, + }) + } + } +} + +#[test] +fn verify_all_signatures() { + // Parse the C intrinsic header file: + let mut intrinsics = std::collections::HashMap::::new(); + for line in HEADER.lines() { + if line.is_empty() { + continue; + } + + use std::convert::TryFrom; + let intrinsic: MsaIntrinsic = + TryFrom::try_from(line).unwrap_or_else(|_| panic!("failed to parse line: \"{line}\"")); + assert!(!intrinsics.contains_key(&intrinsic.id)); + intrinsics.insert(intrinsic.id.clone(), intrinsic); + } + + let mut all_valid = true; + for rust in FUNCTIONS { + if !rust.has_test { + let skip = [ + "__msa_ceqi_d", + "__msa_cfcmsa", + "__msa_clei_s_d", + "__msa_clti_s_d", + "__msa_ctcmsa", + "__msa_ldi_d", + "__msa_maxi_s_d", + "__msa_mini_s_d", + "break_", + ]; + if !skip.contains(&rust.name) { + println!( + "missing run-time test named `test_{}` for `{}`", + { + let mut id = rust.name; + while id.starts_with('_') { + id = &id[1..]; + } + id + }, + rust.name + ); + all_valid = false; + } + } + + // Skip some intrinsics that aren't part of MSA + match rust.name { + "break_" => continue, + _ => {} + } + let mips = match intrinsics.get(rust.name) { + Some(i) => i, + None => { + eprintln!( + "missing mips definition for {:?} in {}", + rust.name, rust.file + ); + all_valid = false; + continue; + } + }; + + if let Err(e) = matches(rust, mips) { + println!("failed to verify `{}`", rust.name); + println!(" * {e}"); + all_valid = false; + } + } + assert!(all_valid); +} + +fn matches(rust: &Function, mips: &MsaIntrinsic) -> Result<(), String> { + macro_rules! bail { + ($($t:tt)*) => (return Err(format!($($t)*))) + } + + if rust.ret.is_none() && mips.ret_ty != MsaTy::Void { + bail!("mismatched return value") + } + + if rust.arguments.len() != mips.arg_tys.len() { + bail!("mismatched argument lengths"); + } + + let mut nconst = 0; + for (i, (rust_arg, mips_arg)) in rust.arguments.iter().zip(mips.arg_tys.iter()).enumerate() { + match mips_arg { + MsaTy::v16i8 if **rust_arg == v16i8 => (), + MsaTy::v8i16 if **rust_arg == v8i16 => (), + MsaTy::v4i32 if **rust_arg == v4i32 => (), + MsaTy::v2i64 if **rust_arg == v2i64 => (), + MsaTy::v16u8 if **rust_arg == v16u8 => (), + MsaTy::v8u16 if **rust_arg == v8u16 => (), + MsaTy::v4u32 if **rust_arg == v4u32 => (), + MsaTy::v2u64 if **rust_arg == v2u64 => (), + MsaTy::v4f32 if **rust_arg == v4f32 => (), + MsaTy::v2f64 if **rust_arg == v2f64 => (), + MsaTy::imm0_1 + | MsaTy::imm0_3 + | MsaTy::imm0_7 + | MsaTy::imm0_15 + | MsaTy::imm0_31 + | MsaTy::imm0_63 + | MsaTy::imm0_255 + | MsaTy::imm_n16_15 + | MsaTy::imm_n512_511 + | MsaTy::imm_n1024_1022 + | MsaTy::imm_n2048_2044 + | MsaTy::imm_n4096_4088 + if **rust_arg == I32 => {} + MsaTy::i32 if **rust_arg == I32 => (), + MsaTy::i64 if **rust_arg == I64 => (), + MsaTy::u32 if **rust_arg == U32 => (), + MsaTy::u64 if **rust_arg == U64 => (), + MsaTy::MutVoidPtr if **rust_arg == Type::MutPtr(&U8) => (), + m => bail!( + "mismatched argument \"{}\"= \"{:?}\" != \"{:?}\"", + i, + m, + *rust_arg + ), + } + + let is_const = matches!( + mips_arg, + MsaTy::imm0_1 + | MsaTy::imm0_3 + | MsaTy::imm0_7 + | MsaTy::imm0_15 + | MsaTy::imm0_31 + | MsaTy::imm0_63 + | MsaTy::imm0_255 + | MsaTy::imm_n16_15 + | MsaTy::imm_n512_511 + | MsaTy::imm_n1024_1022 + | MsaTy::imm_n2048_2044 + | MsaTy::imm_n4096_4088 + ); + if is_const { + nconst += 1; + if !rust.required_const.contains(&i) { + bail!("argument const mismatch"); + } + } + } + + if nconst != rust.required_const.len() { + bail!("wrong number of const arguments"); + } + + if rust.target_feature != Some("msa") { + bail!("wrong target_feature"); + } + + if !rust.instrs.is_empty() { + // Normalize slightly to get rid of assembler differences + let actual = rust.instrs[0].replace('.', "_"); + let expected = mips.instruction.replace('.', "_"); + if actual != expected { + bail!( + "wrong instruction: \"{}\" != \"{}\"", + rust.instrs[0], + mips.instruction + ); + } + } else { + bail!( + "missing assert_instr for \"{}\" (should be \"{}\")", + mips.id, + mips.instruction + ); + } + + Ok(()) +} diff --git a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs new file mode 100644 index 000000000000..02b6bdc76840 --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs @@ -0,0 +1,884 @@ +#![allow(unused, non_camel_case_types)] + +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fs::File; +use std::io; +use std::io::{BufWriter, Write}; + +use serde::Deserialize; + +const PRINT_INSTRUCTION_VIOLATIONS: bool = false; +const GENERATE_MISSING_X86_MD: bool = false; +const SS: u8 = (8 * size_of::()) as u8; + +struct Function { + name: &'static str, + arguments: &'static [&'static Type], + ret: Option<&'static Type>, + target_feature: Option<&'static str>, + instrs: &'static [&'static str], + file: &'static str, + required_const: &'static [usize], + has_test: bool, + doc: &'static str, +} + +static BF16: Type = Type::BFloat16; +static F16: Type = Type::PrimFloat(16); +static F32: Type = Type::PrimFloat(32); +static F64: Type = Type::PrimFloat(64); +static I8: Type = Type::PrimSigned(8); +static I16: Type = Type::PrimSigned(16); +static I32: Type = Type::PrimSigned(32); +static I64: Type = Type::PrimSigned(64); +static U8: Type = Type::PrimUnsigned(8); +static U16: Type = Type::PrimUnsigned(16); +static U32: Type = Type::PrimUnsigned(32); +static U64: Type = Type::PrimUnsigned(64); +static U128: Type = Type::PrimUnsigned(128); +static USIZE: Type = Type::PrimUnsigned(SS); +static ORDERING: Type = Type::Ordering; + +static M128: Type = Type::M128; +static M128BH: Type = Type::M128BH; +static M128I: Type = Type::M128I; +static M128D: Type = Type::M128D; +static M128H: Type = Type::M128H; +static M256: Type = Type::M256; +static M256BH: Type = Type::M256BH; +static M256I: Type = Type::M256I; +static M256D: Type = Type::M256D; +static M256H: Type = Type::M256H; +static M512: Type = Type::M512; +static M512BH: Type = Type::M512BH; +static M512I: Type = Type::M512I; +static M512D: Type = Type::M512D; +static M512H: Type = Type::M512H; +static MMASK8: Type = Type::MMASK8; +static MMASK16: Type = Type::MMASK16; +static MMASK32: Type = Type::MMASK32; +static MMASK64: Type = Type::MMASK64; +static MM_CMPINT_ENUM: Type = Type::MM_CMPINT_ENUM; +static MM_MANTISSA_NORM_ENUM: Type = Type::MM_MANTISSA_NORM_ENUM; +static MM_MANTISSA_SIGN_ENUM: Type = Type::MM_MANTISSA_SIGN_ENUM; +static MM_PERM_ENUM: Type = Type::MM_PERM_ENUM; + +static TUPLE: Type = Type::Tuple; +static CPUID: Type = Type::CpuidResult; +static NEVER: Type = Type::Never; + +#[derive(Debug, PartialEq, Copy, Clone)] +enum Type { + PrimFloat(u8), + PrimSigned(u8), + PrimUnsigned(u8), + BFloat16, + MutPtr(&'static Type), + ConstPtr(&'static Type), + M128, + M128BH, + M128D, + M128H, + M128I, + M256, + M256BH, + M256D, + M256H, + M256I, + M512, + M512BH, + M512D, + M512H, + M512I, + MMASK8, + MMASK16, + MMASK32, + MMASK64, + MM_CMPINT_ENUM, + MM_MANTISSA_NORM_ENUM, + MM_MANTISSA_SIGN_ENUM, + MM_PERM_ENUM, + Tuple, + CpuidResult, + Never, + Ordering, +} + +stdarch_verify::x86_functions!(static FUNCTIONS); + +#[derive(Deserialize)] +struct Data { + #[serde(rename = "intrinsic", default)] + intrinsics: Vec, +} + +#[derive(Deserialize)] +struct Intrinsic { + #[serde(rename = "return")] + return_: Return, + #[serde(rename = "@name")] + name: String, + #[serde(rename = "@tech")] + tech: String, + #[serde(rename = "CPUID", default)] + cpuid: Vec, + #[serde(rename = "parameter", default)] + parameters: Vec, + #[serde(rename = "@sequence", default)] + generates_sequence: bool, + #[serde(default)] + instruction: Vec, +} + +#[derive(Deserialize)] +struct Parameter { + #[serde(rename = "@type")] + type_: String, + #[serde(rename = "@etype", default)] + etype: String, +} + +#[derive(Deserialize)] +struct Return { + #[serde(rename = "@type", default)] + type_: String, +} + +#[derive(Deserialize, Debug)] +struct Instruction { + #[serde(rename = "@name")] + name: String, +} + +macro_rules! bail { + ($($t:tt)*) => { return Err(format!($($t)*)) } +} + +#[test] +fn verify_all_signatures() { + // This XML document was downloaded from Intel's site. To update this you + // can visit intel's intrinsics guide online documentation: + // + // https://software.intel.com/sites/landingpage/IntrinsicsGuide/# + // + // Open up the network console and you'll see an xml file was downloaded + // (currently called data-3.6.9.xml). That's the file we downloaded + // here. + let xml = include_bytes!("../x86-intel.xml"); + + let xml = &xml[..]; + let data: Data = quick_xml::de::from_reader(xml).expect("failed to deserialize xml"); + let mut map = HashMap::new(); + for intrinsic in &data.intrinsics { + map.entry(&intrinsic.name[..]) + .or_insert_with(Vec::new) + .push(intrinsic); + } + + let mut all_valid = true; + 'outer: for rust in FUNCTIONS { + if !rust.has_test { + // FIXME: this list should be almost empty + let skip = [ + // MXCSR - deprecated, immediate UB + "_mm_getcsr", + "_mm_setcsr", + "_MM_GET_EXCEPTION_MASK", + "_MM_GET_EXCEPTION_STATE", + "_MM_GET_FLUSH_ZERO_MODE", + "_MM_GET_ROUNDING_MODE", + "_MM_SET_EXCEPTION_MASK", + "_MM_SET_EXCEPTION_STATE", + "_MM_SET_FLUSH_ZERO_MODE", + "_MM_SET_ROUNDING_MODE", + // CPUID + "__cpuid_count", + "__cpuid", + "__get_cpuid_max", + // Privileged, see https://github.com/rust-lang/stdarch/issues/209 + "_xsetbv", + "_xsaves", + "_xrstors", + "_xsaves64", + "_xrstors64", + "_mm_loadiwkey", + // RDRAND + "_rdrand16_step", + "_rdrand32_step", + "_rdrand64_step", + "_rdseed16_step", + "_rdseed32_step", + "_rdseed64_step", + // Prefetch + "_mm_prefetch", + // CMPXCHG + "cmpxchg16b", + // Undefined + "_mm_undefined_ps", + "_mm_undefined_pd", + "_mm_undefined_si128", + "_mm_undefined_ph", + "_mm256_undefined_ps", + "_mm256_undefined_pd", + "_mm256_undefined_si256", + "_mm256_undefined_ph", + "_mm512_undefined_ps", + "_mm512_undefined_pd", + "_mm512_undefined_epi32", + "_mm512_undefined", + "_mm512_undefined_ph", + // Has doc-tests instead + "_mm256_shuffle_epi32", + "_mm256_unpackhi_epi8", + "_mm256_unpacklo_epi8", + "_mm256_unpackhi_epi16", + "_mm256_unpacklo_epi16", + "_mm256_unpackhi_epi32", + "_mm256_unpacklo_epi32", + "_mm256_unpackhi_epi64", + "_mm256_unpacklo_epi64", + // Has tests with some other intrinsic + "__writeeflags", + "_xrstor", + "_xrstor64", + "_fxrstor", + "_fxrstor64", + "_xend", + "_xabort_code", + // Aliases + "_mm_comige_ss", + "_mm_cvt_ss2si", + "_mm_cvtt_ss2si", + "_mm_cvt_si2ss", + "_mm_set_ps1", + "_mm_load_ps1", + "_mm_store_ps1", + "_mm_bslli_si128", + "_mm_bsrli_si128", + "_bextr2_u32", + "_mm_tzcnt_32", + "_mm256_bslli_epi128", + "_mm256_bsrli_epi128", + "_mm_cvtsi64x_si128", + "_mm_cvtsi128_si64x", + "_mm_cvtsi64x_sd", + "_bextr2_u64", + "_mm_tzcnt_64", + ]; + if !skip.contains(&rust.name) { + println!( + "missing run-time test named `test_{}` for `{}`", + { + let mut id = rust.name; + while id.starts_with('_') { + id = &id[1..]; + } + id + }, + rust.name + ); + all_valid = false; + } + } + + match rust.name { + // These aren't defined by Intel but they're defined by what appears + // to be all other compilers. For more information see + // rust-lang/stdarch#307, and otherwise these signatures + // have all been manually verified. + "__readeflags" | + "__writeeflags" | + "__cpuid_count" | + "__cpuid" | + "__get_cpuid_max" | + "_MM_SHUFFLE" | + "_xabort_code" | + // Not listed with intel, but manually verified + "cmpxchg16b" + => continue, + _ => {} + } + + // these are all AMD-specific intrinsics + if let Some(feature) = rust.target_feature { + if feature.contains("sse4a") || feature.contains("tbm") { + continue; + } + } + + let intel = match map.remove(rust.name) { + Some(i) => i, + None => panic!("missing intel definition for {}", rust.name), + }; + + let mut errors = Vec::new(); + for intel in intel { + match matches(rust, intel) { + Ok(()) => continue 'outer, + Err(e) => errors.push(e), + } + } + println!("failed to verify `{}`", rust.name); + for error in errors { + println!(" * {error}"); + } + all_valid = false; + } + assert!(all_valid); + + if GENERATE_MISSING_X86_MD { + print_missing( + &map, + BufWriter::new(File::create("../core_arch/missing-x86.md").unwrap()), + ) + .unwrap(); + } +} + +fn print_missing(map: &HashMap<&str, Vec<&Intrinsic>>, mut f: impl Write) -> io::Result<()> { + let mut missing = BTreeMap::new(); // BTreeMap to keep the cpuids ordered + + // we cannot use SVML and MMX, and MPX is not in LLVM, and intrinsics without any cpuid requirement + // are accessible from safe rust + for intrinsic in map.values().flatten().filter(|intrinsic| { + intrinsic.tech != "SVML" + && intrinsic.tech != "MMX" + && !intrinsic.cpuid.is_empty() + && !intrinsic.cpuid.contains(&"MPX".to_string()) + && intrinsic.return_.type_ != "__m64" + && !intrinsic + .parameters + .iter() + .any(|param| param.type_.contains("__m64")) + }) { + missing + .entry(&intrinsic.cpuid) + .or_insert_with(Vec::new) + .push(intrinsic); + } + + for (k, v) in &mut missing { + v.sort_by_key(|intrinsic| &intrinsic.name); // sort to make the order of everything same + writeln!(f, "\n

{k:?}

\n")?; + for intel in v { + let url = format!( + "https://software.intel.com/sites/landingpage\ + /IntrinsicsGuide/#text={}", + intel.name + ); + writeln!(f, " * [ ] [`{}`]({url})", intel.name)?; + } + writeln!(f, "

\n")?; + } + + f.flush() +} + +fn check_target_features(rust: &Function, intel: &Intrinsic) -> Result<(), String> { + // Verify that all `#[target_feature]` annotations are correct, + // ensuring that we've actually enabled the right instruction + // set for this intrinsic. + match rust.name { + "_bswap" | "_bswap64" => {} + + // These don't actually have a target feature unlike their brethren with + // the `x` inside the name which requires adx + "_addcarry_u32" | "_addcarry_u64" | "_subborrow_u32" | "_subborrow_u64" => {} + + "_bittest" + | "_bittestandset" + | "_bittestandreset" + | "_bittestandcomplement" + | "_bittest64" + | "_bittestandset64" + | "_bittestandreset64" + | "_bittestandcomplement64" => {} + + _ => { + if intel.cpuid.is_empty() { + bail!("missing cpuid for {}", rust.name); + } + } + } + + let rust_features = match rust.target_feature { + Some(features) => features + .split(',') + .map(|feature| feature.to_string()) + .collect(), + None => HashSet::new(), + }; + + let mut intel_cpuids = HashSet::new(); + + for cpuid in &intel.cpuid { + // The pause intrinsic is in the SSE2 module, but it is backwards + // compatible with CPUs without SSE2, and it therefore does not need the + // target-feature attribute. + if rust.name == "_mm_pause" { + continue; + } + + // these flags on the rdtsc/rtdscp intrinsics we don't test for right + // now, but we may wish to add these one day! + // + // For more info see #308 + if *cpuid == "TSC" || *cpuid == "RDTSCP" { + continue; + } + + // Some CPUs support VAES/GFNI/VPCLMULQDQ without AVX512, even though + // the Intel documentation states that those instructions require + // AVX512VL. + if *cpuid == "AVX512VL" + && intel + .cpuid + .iter() + .any(|x| matches!(&**x, "VAES" | "GFNI" | "VPCLMULQDQ")) + { + continue; + } + + let cpuid = cpuid.to_lowercase().replace('_', ""); + + // Fix mismatching feature names: + let fixed_cpuid = match cpuid.as_ref() { + // The XML file names IFMA as "avx512ifma52", while Rust calls + // it "avx512ifma". + "avx512ifma52" => String::from("avx512ifma"), + "xss" => String::from("xsaves"), + "keylocker" => String::from("kl"), + "keylockerwide" => String::from("widekl"), + _ => cpuid, + }; + + intel_cpuids.insert(fixed_cpuid); + } + + if intel_cpuids.contains("gfni") { + if rust.name.contains("mask") { + // LLVM requires avx512bw for all masked GFNI intrinsics, and also avx512vl for the 128- and 256-bit versions + if !rust.name.starts_with("_mm512") { + intel_cpuids.insert(String::from("avx512vl")); + } + intel_cpuids.insert(String::from("avx512bw")); + } else if rust.name.starts_with("_mm256") { + // LLVM requires AVX for all non-masked 256-bit GFNI intrinsics + intel_cpuids.insert(String::from("avx")); + } + } + + // Also, 512-bit vpclmulqdq intrisic requires avx512f + if &rust.name == &"_mm512_clmulepi64_epi128" { + intel_cpuids.insert(String::from("avx512f")); + } + + if rust_features != intel_cpuids { + bail!( + "Intel cpuids `{:?}` doesn't match Rust `{:?}` for {}", + intel_cpuids, + rust_features, + rust.name + ); + } + + Ok(()) +} + +fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { + check_target_features(rust, intel)?; + + if PRINT_INSTRUCTION_VIOLATIONS { + if rust.instrs.is_empty() { + if !intel.instruction.is_empty() && !intel.generates_sequence { + println!( + "instruction not listed for `{}`, but intel lists {:?}", + rust.name, intel.instruction + ); + } + + // If intel doesn't list any instructions and we do then don't + // bother trying to look for instructions in intel, we've just got + // some extra assertions on our end. + } else if !intel.instruction.is_empty() { + for instr in rust.instrs { + let asserting = intel + .instruction + .iter() + .any(|a| a.name.to_lowercase().starts_with(instr)); + if !asserting { + println!( + "intel failed to list `{}` as an instruction for `{}`", + instr, rust.name + ); + } + } + } + } + + // Make sure we've got the right return type. + if let Some(t) = rust.ret { + equate(t, &intel.return_.type_, "", intel, false)?; + } else if !intel.return_.type_.is_empty() && intel.return_.type_ != "void" { + bail!( + "{} returns `{}` with intel, void in rust", + rust.name, + intel.return_.type_ + ); + } + + // If there's no arguments on Rust's side intel may list one "void" + // argument, so handle that here. + if rust.arguments.is_empty() && intel.parameters.len() == 1 { + if intel.parameters[0].type_ != "void" { + bail!("rust has 0 arguments, intel has one for") + } + } else { + // Otherwise we want all parameters to be exactly the same + if rust.arguments.len() != intel.parameters.len() { + bail!("wrong number of arguments on {}", rust.name); + } + for (i, (a, b)) in intel.parameters.iter().zip(rust.arguments).enumerate() { + let is_const = rust.required_const.contains(&i); + equate(b, &a.type_, &a.etype, &intel, is_const)?; + } + } + + let any_i64 = rust + .arguments + .iter() + .cloned() + .chain(rust.ret) + .any(|arg| matches!(*arg, Type::PrimSigned(64) | Type::PrimUnsigned(64))); + let any_i64_exempt = match rust.name { + // These intrinsics have all been manually verified against Clang's + // headers to be available on x86, and the u64 arguments seem + // spurious I guess? + "_xsave" | "_xrstor" | "_xsetbv" | "_xgetbv" | "_xsaveopt" | "_xsavec" | "_xsaves" + | "_xrstors" => true, + + // Apparently all of clang/msvc/gcc accept these intrinsics on + // 32-bit, so let's do the same + "_mm_set_epi64x" + | "_mm_set1_epi64x" + | "_mm256_set_epi64x" + | "_mm256_setr_epi64x" + | "_mm256_set1_epi64x" + | "_mm512_set1_epi64" + | "_mm256_mask_set1_epi64" + | "_mm256_maskz_set1_epi64" + | "_mm_mask_set1_epi64" + | "_mm_maskz_set1_epi64" + | "_mm512_set4_epi64" + | "_mm512_setr4_epi64" + | "_mm512_set_epi64" + | "_mm512_setr_epi64" + | "_mm512_reduce_add_epi64" + | "_mm512_mask_reduce_add_epi64" + | "_mm512_reduce_mul_epi64" + | "_mm512_mask_reduce_mul_epi64" + | "_mm512_reduce_max_epi64" + | "_mm512_mask_reduce_max_epi64" + | "_mm512_reduce_max_epu64" + | "_mm512_mask_reduce_max_epu64" + | "_mm512_reduce_min_epi64" + | "_mm512_mask_reduce_min_epi64" + | "_mm512_reduce_min_epu64" + | "_mm512_mask_reduce_min_epu64" + | "_mm512_reduce_and_epi64" + | "_mm512_mask_reduce_and_epi64" + | "_mm512_reduce_or_epi64" + | "_mm512_mask_reduce_or_epi64" + | "_mm512_mask_set1_epi64" + | "_mm512_maskz_set1_epi64" + | "_mm_cvt_roundss_si64" + | "_mm_cvt_roundss_i64" + | "_mm_cvt_roundss_u64" + | "_mm_cvtss_i64" + | "_mm_cvtss_u64" + | "_mm_cvt_roundsd_si64" + | "_mm_cvt_roundsd_i64" + | "_mm_cvt_roundsd_u64" + | "_mm_cvtsd_i64" + | "_mm_cvtsd_u64" + | "_mm_cvt_roundi64_ss" + | "_mm_cvt_roundi64_sd" + | "_mm_cvt_roundsi64_ss" + | "_mm_cvt_roundsi64_sd" + | "_mm_cvt_roundu64_ss" + | "_mm_cvt_roundu64_sd" + | "_mm_cvti64_ss" + | "_mm_cvti64_sd" + | "_mm_cvtt_roundss_si64" + | "_mm_cvtt_roundss_i64" + | "_mm_cvtt_roundss_u64" + | "_mm_cvttss_i64" + | "_mm_cvttss_u64" + | "_mm_cvtt_roundsd_si64" + | "_mm_cvtt_roundsd_i64" + | "_mm_cvtt_roundsd_u64" + | "_mm_cvttsd_i64" + | "_mm_cvttsd_u64" + | "_mm_cvtu64_ss" + | "_mm_cvtu64_sd" => true, + + // These return a 64-bit argument but they're assembled from other + // 32-bit registers, so these work on 32-bit just fine. See #308 for + // more info. + "_rdtsc" | "__rdtscp" => true, + + _ => false, + }; + if any_i64 && !any_i64_exempt && !rust.file.contains("x86_64") { + bail!( + "intrinsic `{}` uses a 64-bit bare type but may be \ + available on 32-bit platforms", + rust.name + ); + } + if !rust.doc.contains("Intel") { + bail!("No link to Intel"); + } + let recognized_links = [ + "https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html", + "https://software.intel.com/sites/landingpage/IntrinsicsGuide/", + ]; + if !recognized_links.iter().any(|link| rust.doc.contains(link)) { + bail!("Unrecognized Intel Link"); + } + if !rust.doc.contains(&rust.name[1..]) { + // We can leave the leading underscore + bail!("Bad link to Intel"); + } + Ok(()) +} + +fn pointed_type(intrinsic: &Intrinsic) -> Result { + Ok( + if intrinsic.tech == "AMX" + || intrinsic + .cpuid + .iter() + .any(|cpuid| matches!(&**cpuid, "KEYLOCKER" | "KEYLOCKER_WIDE" | "XSAVE" | "FXSR")) + { + // AMX, KEYLOCKER and XSAVE intrinsics should take `*u8` + U8 + } else if intrinsic.name == "_mm_clflush" { + // Just a false match in the following logic + U8 + } else if ["_mm_storeu_si", "_mm_loadu_si"] + .iter() + .any(|x| intrinsic.name.starts_with(x)) + { + // These have already been stabilized, so cannot be changed anymore + U8 + } else if intrinsic.name.ends_with("i8") { + I8 + } else if intrinsic.name.ends_with("i16") { + I16 + } else if intrinsic.name.ends_with("i32") { + I32 + } else if intrinsic.name.ends_with("i64") { + I64 + } else if intrinsic.name.ends_with("i128") { + M128I + } else if intrinsic.name.ends_with("i256") { + M256I + } else if intrinsic.name.ends_with("i512") { + M512I + } else if intrinsic.name.ends_with("h") { + F16 + } else if intrinsic.name.ends_with("s") { + F32 + } else if intrinsic.name.ends_with("d") { + F64 + } else { + bail!( + "Don't know what type of *void to use for {}", + intrinsic.name + ); + }, + ) +} + +fn equate( + t: &Type, + intel: &str, + etype: &str, + intrinsic: &Intrinsic, + is_const: bool, +) -> Result<(), String> { + // Make pointer adjacent to the type: float * foo => float* foo + let mut intel = intel.replace(" *", "*"); + // Make mutability modifier adjacent to the pointer: + // float const * foo => float const* foo + intel = intel.replace("const *", "const*"); + // Normalize mutability modifier to after the type: + // const float* foo => float const* + if intel.starts_with("const") && intel.ends_with('*') { + intel = intel.replace("const ", ""); + intel = intel.replace('*', " const*"); + } + if etype == "IMM" || intel == "constexpr int" { + // The _bittest intrinsics claim to only accept immediates but actually + // accept run-time values as well. + if !is_const && !intrinsic.name.starts_with("_bittest") { + bail!("argument required to be const but isn't"); + } + } else { + // const int must be an IMM + assert_ne!(intel, "const int"); + if is_const { + bail!("argument is const but shouldn't be"); + } + } + match (t, &intel[..]) { + (&Type::PrimFloat(16), "_Float16") => {} + (&Type::PrimFloat(32), "float") => {} + (&Type::PrimFloat(64), "double") => {} + (&Type::PrimSigned(8), "__int8" | "char") => {} + (&Type::PrimSigned(16), "__int16" | "short") => {} + (&Type::PrimSigned(32), "__int32" | "constexpr int" | "const int" | "int") => {} + (&Type::PrimSigned(64), "__int64" | "long long") => {} + (&Type::PrimUnsigned(8), "unsigned char") => {} + (&Type::PrimUnsigned(16), "unsigned short") => {} + (&Type::BFloat16, "__bfloat16") => {} + ( + &Type::PrimUnsigned(32), + "unsigned __int32" | "unsigned int" | "unsigned long" | "const unsigned int", + ) => {} + (&Type::PrimUnsigned(64), "unsigned __int64") => {} + (&Type::PrimUnsigned(SS), "size_t") => {} + + (&Type::M128, "__m128") => {} + (&Type::M128BH, "__m128bh") => {} + (&Type::M128I, "__m128i") => {} + (&Type::M128D, "__m128d") => {} + (&Type::M128H, "__m128h") => {} + (&Type::M256, "__m256") => {} + (&Type::M256BH, "__m256bh") => {} + (&Type::M256I, "__m256i") => {} + (&Type::M256D, "__m256d") => {} + (&Type::M256H, "__m256h") => {} + (&Type::M512, "__m512") => {} + (&Type::M512BH, "__m512bh") => {} + (&Type::M512I, "__m512i") => {} + (&Type::M512D, "__m512d") => {} + (&Type::M512H, "__m512h") => {} + (&Type::MMASK64, "__mmask64") => {} + (&Type::MMASK32, "__mmask32") => {} + (&Type::MMASK16, "__mmask16") => {} + (&Type::MMASK8, "__mmask8") => {} + + (&Type::MutPtr(_type), "void*") | (&Type::ConstPtr(_type), "void const*") => { + let pointed_type = pointed_type(intrinsic)?; + if _type != &pointed_type { + bail!( + "incorrect void pointer type {_type:?} in {}, should be pointer to {pointed_type:?}", + intrinsic.name, + ); + } + } + + (&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {} + (&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {} + (&Type::MutPtr(&Type::PrimSigned(8)), "char*") => {} + (&Type::MutPtr(&Type::PrimSigned(32)), "__int32*" | "int*") => {} + (&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {} + (&Type::MutPtr(&Type::PrimUnsigned(8)), "unsigned char*") => {} + (&Type::MutPtr(&Type::PrimUnsigned(16)), "unsigned short*") => {} + (&Type::MutPtr(&Type::PrimUnsigned(32)), "unsigned int*" | "unsigned __int32*") => {} + (&Type::MutPtr(&Type::PrimUnsigned(64)), "unsigned __int64*") => {} + + (&Type::MutPtr(&Type::MMASK8), "__mmask8*") => {} + (&Type::MutPtr(&Type::MMASK32), "__mmask32*") => {} + (&Type::MutPtr(&Type::MMASK64), "__mmask64*") => {} + (&Type::MutPtr(&Type::MMASK16), "__mmask16*") => {} + + (&Type::MutPtr(&Type::M128), "__m128*") => {} + (&Type::MutPtr(&Type::M128BH), "__m128bh*") => {} + (&Type::MutPtr(&Type::M128I), "__m128i*") => {} + (&Type::MutPtr(&Type::M128D), "__m128d*") => {} + (&Type::MutPtr(&Type::M256), "__m256*") => {} + (&Type::MutPtr(&Type::M256BH), "__m256bh*") => {} + (&Type::MutPtr(&Type::M256I), "__m256i*") => {} + (&Type::MutPtr(&Type::M256D), "__m256d*") => {} + (&Type::MutPtr(&Type::M512), "__m512*") => {} + (&Type::MutPtr(&Type::M512BH), "__m512bh*") => {} + (&Type::MutPtr(&Type::M512I), "__m512i*") => {} + (&Type::MutPtr(&Type::M512D), "__m512d*") => {} + + (&Type::ConstPtr(&Type::PrimFloat(16)), "_Float16 const*") => {} + (&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {} + (&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {} + (&Type::ConstPtr(&Type::PrimSigned(8)), "char const*") => {} + (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*" | "int const*") => {} + (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {} + (&Type::ConstPtr(&Type::PrimUnsigned(16)), "unsigned short const*") => {} + (&Type::ConstPtr(&Type::PrimUnsigned(32)), "unsigned int const*") => {} + (&Type::ConstPtr(&Type::PrimUnsigned(64)), "unsigned __int64 const*") => {} + (&Type::ConstPtr(&Type::BFloat16), "__bf16 const*") => {} + + (&Type::ConstPtr(&Type::M128), "__m128 const*") => {} + (&Type::ConstPtr(&Type::M128BH), "__m128bh const*") => {} + (&Type::ConstPtr(&Type::M128I), "__m128i const*") => {} + (&Type::ConstPtr(&Type::M128D), "__m128d const*") => {} + (&Type::ConstPtr(&Type::M128H), "__m128h const*") => {} + (&Type::ConstPtr(&Type::M256), "__m256 const*") => {} + (&Type::ConstPtr(&Type::M256BH), "__m256bh const*") => {} + (&Type::ConstPtr(&Type::M256I), "__m256i const*") => {} + (&Type::ConstPtr(&Type::M256D), "__m256d const*") => {} + (&Type::ConstPtr(&Type::M256H), "__m256h const*") => {} + (&Type::ConstPtr(&Type::M512), "__m512 const*") => {} + (&Type::ConstPtr(&Type::M512BH), "__m512bh const*") => {} + (&Type::ConstPtr(&Type::M512I), "__m512i const*") => {} + (&Type::ConstPtr(&Type::M512D), "__m512d const*") => {} + + (&Type::ConstPtr(&Type::MMASK8), "__mmask8*") => {} + (&Type::ConstPtr(&Type::MMASK16), "__mmask16*") => {} + (&Type::ConstPtr(&Type::MMASK32), "__mmask32*") => {} + (&Type::ConstPtr(&Type::MMASK64), "__mmask64*") => {} + + (&Type::MM_CMPINT_ENUM, "_MM_CMPINT_ENUM") => {} + (&Type::MM_MANTISSA_NORM_ENUM, "_MM_MANTISSA_NORM_ENUM") => {} + (&Type::MM_MANTISSA_SIGN_ENUM, "_MM_MANTISSA_SIGN_ENUM") => {} + (&Type::MM_PERM_ENUM, "_MM_PERM_ENUM") => {} + + // This is a macro (?) in C which seems to mutate its arguments, but + // that means that we're taking pointers to arguments in rust + // as we're not exposing it as a macro. + (&Type::MutPtr(&Type::M128), "__m128") if intrinsic.name == "_MM_TRANSPOSE4_PS" => {} + + // The _rdtsc intrinsic uses a __int64 return type, but this is a bug in + // the intrinsics guide: https://github.com/rust-lang/stdarch/issues/559 + // We have manually fixed the bug by changing the return type to `u64`. + (&Type::PrimUnsigned(64), "__int64") if intrinsic.name == "_rdtsc" => {} + + // The _bittest and _bittest64 intrinsics takes a mutable pointer in the + // intrinsics guide even though it never writes through the pointer: + (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32*") if intrinsic.name == "_bittest" => {} + (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64*") if intrinsic.name == "_bittest64" => {} + // The _xrstor, _fxrstor, _xrstor64, _fxrstor64 intrinsics take a + // mutable pointer in the intrinsics guide even though they never write + // through the pointer: + (&Type::ConstPtr(&Type::PrimUnsigned(8)), "void*") + if matches!( + &*intrinsic.name, + "_xrstor" | "_xrstor64" | "_fxrstor" | "_fxrstor64" + ) => {} + // The _mm_stream_load_si128 intrinsic take a mutable pointer in the intrinsics + // guide even though they never write through the pointer + (&Type::ConstPtr(&Type::M128I), "void*") if intrinsic.name == "_mm_stream_load_si128" => {} + /// Intel requires the mask argument for _mm_shuffle_ps to be an + // unsigned integer, but all other _mm_shuffle_.. intrinsics + // take a signed-integer. This breaks `_MM_SHUFFLE` for + // `_mm_shuffle_ps` + (&Type::PrimSigned(32), "unsigned int") if intrinsic.name == "_mm_shuffle_ps" => {} + + _ => bail!( + "failed to equate: `{intel}` and {t:?} for {}", + intrinsic.name + ), + } + Ok(()) +} diff --git a/library/stdarch/crates/stdarch-verify/x86-intel.xml b/library/stdarch/crates/stdarch-verify/x86-intel.xml new file mode 100644 index 000000000000..41f2119e681f --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/x86-intel.xml @@ -0,0 +1,158422 @@ + + + + + + + + Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[32:0] := a[31:0] + b[31:0] + (c_in > 0 ? 1 : 0) +MEM[out+31:out] := tmp[31:0] +dst[0] := tmp[32] +dst[7:1] := 0 + + + + ADX +
immintrin.h
+ Arithmetic +
+ + + + + + + Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[64:0] := a[63:0] + b[63:0] + (c_in > 0 ? 1 : 0) +MEM[out+63:out] := tmp[63:0] +dst[0] := tmp[64] +dst[7:1] := 0 + + + + ADX +
immintrin.h
+ Arithmetic +
+ + + + + Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"." + a[127:0] := ShiftRows(a[127:0]) +a[127:0] := SubBytes(a[127:0]) +a[127:0] := MixColumns(a[127:0]) +dst[127:0] := a[127:0] XOR RoundKey[127:0] + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"." + a[127:0] := ShiftRows(a[127:0]) +a[127:0] := SubBytes(a[127:0]) +dst[127:0] := a[127:0] XOR RoundKey[127:0] + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst". + a[127:0] := InvShiftRows(a[127:0]) +a[127:0] := InvSubBytes(a[127:0]) +a[127:0] := InvMixColumns(a[127:0]) +dst[127:0] := a[127:0] XOR RoundKey[127:0] + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst". + a[127:0] := InvShiftRows(a[127:0]) +a[127:0] := InvSubBytes(a[127:0]) +dst[127:0] := a[127:0] XOR RoundKey[127:0] + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + Perform the InvMixColumns transformation on "a" and store the result in "dst". + dst[127:0] := InvMixColumns(a[127:0]) + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + Assist in expanding the AES cipher key by computing steps towards generating a round key for encryption cipher using data from "a" and an 8-bit round constant specified in "imm8", and store the result in "dst"." + X3[31:0] := a[127:96] +X2[31:0] := a[95:64] +X1[31:0] := a[63:32] +X0[31:0] := a[31:0] +RCON[31:0] := ZeroExtend32(imm8[7:0]) +dst[31:0] := SubWord(X1) +dst[63:32] := RotWord(SubWord(X1)) XOR RCON +dst[95:64] := SubWord(X3) +dst[127:96] := RotWord(SubWord(X3)) XOR RCON + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in tiles "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(a.row[m].bf16[2*k+0]) * FP32(b.row[k].bf16[2*n+0]) + tmp.fp32[n] += FP32(a.row[m].bf16[2*k+1]) * FP32(b.row[k].bf16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-BF16 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of BF16 (16-bit) floating-point pairs in tiles "src0" and "src1", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(src0.row[m].bf16[2*k+0]) * FP32(src1.row[k].bf16[2*n+0]) + tmp.fp32[n] += FP32(src0.row[m].bf16[2*k+1]) * FP32(src1.row[k].bf16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-BF16 +
immintrin.h
+ Application-Targeted +
+ + + + + + + Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles "a" and "b" is interpreted as a complex number with FP16 real part and FP16 imaginary part. Calculates the imaginary part of the result. For each possible combination of (row of "a", column of "b"), it performs a set of multiplication and accumulations on all corresponding complex numbers (one from "a" and one from "b"). The imaginary part of the "a" element is multiplied with the real part of the corresponding "b" element, and the real part of the "a" element is multiplied with the imaginary part of the corresponding "b" elements. The two accumulated results are added, and then accumulated into the corresponding row and column of "dst". + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-COMPLEX +
immintrin.h
+ Application-Targeted +
+ + + + + + Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles "a" and "b" is interpreted as a complex number with FP16 real part and FP16 imaginary part. Calculates the real part of the result. For each possible combination of (row of "a", column of "b"), it performs a set of multiplication and accumulations on all corresponding complex numbers (one from "a" and one from "b"). The real part of the "a" element is multiplied with the real part of the corresponding "b" element, and the negated imaginary part of the "a" element is multiplied with the imaginary part of the corresponding "b" elements. The two accumulated results are added, and then accumulated into the corresponding row and column of "dst". + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) + tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-COMPLEX +
immintrin.h
+ Application-Targeted +
+ + + Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles "src0" and "src1" is interpreted as a complex number with FP16 real part and FP16 imaginary part. This function calculates the imaginary part of the result. + + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+0]) * FP32(src1.row[k].fp16[2*n+1]) + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+1]) * FP32(src1.row[k].fp16[2*n+0]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-COMPLEX +
immintrin.h
+ Application-Targeted +
+ + + Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles src0 and src1 is interpreted as a complex number with FP16 real part and FP16 imaginary part. This function calculates the real part of the result. + + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+0]) * FP32(src1.row[k].fp16[2*n+0]) + tmp.fp32[n] += FP32(-src0.row[m].fp16[2*k+1]) * FP32(src1.row[k].fp16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-COMPLEX +
immintrin.h
+ Application-Targeted +
+ + + + + + + Compute dot-product of FP16 (16-bit) floating-point pairs in tiles "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-FP16 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of FP16 (16-bit) floating-point pairs in tiles "src0" and "src1", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+0]) * FP32(src1.row[k].fp16[2*n+0]) + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+1]) * FP32(src1.row[k].fp16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-FP16 +
immintrin.h
+ Application-Targeted +
+ + + + + + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "a" with corresponding unsigned 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". + DEFINE DPBD(c, x, y) { + tmp1 := SignExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) + tmp2 := SignExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) + tmp3 := SignExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) + tmp4 := SignExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) + + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + + + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". + DEFINE DPBD(c, x, y) { + tmp1 := ZeroExtend32(x.byte[0]) * SignExtend32(y.byte[0]) + tmp2 := ZeroExtend32(x.byte[1]) * SignExtend32(y.byte[1]) + tmp3 := ZeroExtend32(x.byte[2]) * SignExtend32(y.byte[2]) + tmp4 := ZeroExtend32(x.byte[3]) * SignExtend32(y.byte[3]) + + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + + + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding unsigned 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". + DEFINE DPBD(c, x, y) { + tmp1 := ZeroExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) + tmp2 := ZeroExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) + tmp3 := ZeroExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) + tmp4 := ZeroExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) + + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + + + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". + DEFINE DPBD(c, x, y) { + tmp1 := SignExtend32(x.byte[0]) * SignExtend32(y.byte[0]) + tmp2 := SignExtend32(x.byte[1]) * SignExtend32(y.byte[1]) + tmp3 := SignExtend32(x.byte[2]) * SignExtend32(y.byte[2]) + tmp4 := SignExtend32(x.byte[3]) * SignExtend32(y.byte[3]) + + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "src0" with corresponding signed 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + DEFINE DPBD(c, x, y) { + tmp1 := SignExtend32(x.byte[0]) * SignExtend32(y.byte[0]) + tmp2 := SignExtend32(x.byte[1]) * SignExtend32(y.byte[1]) + tmp3 := SignExtend32(x.byte[2]) * SignExtend32(y.byte[2]) + tmp4 := SignExtend32(x.byte[3]) * SignExtend32(y.byte[3]) + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "src0" with corresponding unsigned 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + DEFINE DPBD(c, x, y) { + tmp1 := SignExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) + tmp2 := SignExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) + tmp3 := SignExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) + tmp4 := SignExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "src0" with corresponding signed 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + DEFINE DPBD(c, x, y) { + tmp1 := ZeroExtend32(x.byte[0]) * SignExtend32(y.byte[0]) + tmp2 := ZeroExtend32(x.byte[1]) * SignExtend32(y.byte[1]) + tmp3 := ZeroExtend32(x.byte[2]) * SignExtend32(y.byte[2]) + tmp4 := ZeroExtend32(x.byte[3]) * SignExtend32(y.byte[3]) + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "src0" with corresponding unsigned 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + DEFINE DPBD(c, x, y) { + tmp1 := ZeroExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) + tmp2 := ZeroExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) + tmp3 := ZeroExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) + tmp4 := ZeroExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + + + Load tile configuration from a 64-byte memory location specified by "mem_addr". The tile configuration format is specified below, and includes the tile type pallette, the number of bytes per row, and the number of rows. If the specified pallette_id is zero, that signifies the init state for both the tile config and the tile data, and the tiles are zeroed. Any invalid configurations will result in #GP fault. + +// format of memory payload. each field is a byte. +// 0: palette +// 1: start_row +// 2-15: reserved, must be zero +// 16-17: tile0.colsb +// 18-19: tile1.colsb +// 20-21: tile2.colsb +// ... +// 30-31: tile7.colsb +// 32-47: reserved, must be zero +// 48: tile0.rows +// 49: tile1.rows +// 50: tile2.rows +// ... +// 55: tile7.rows +// 56-63: reserved, must be zero + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + Stores the current tile configuration to a 64-byte memory location specified by "mem_addr". The tile configuration format is specified below, and includes the tile type pallette, the number of bytes per row, and the number of rows. If tiles are not configured, all zeroes will be stored to memory. + +// format of memory payload. each field is a byte. +// 0: palette +// 1: start_row +// 2-15: reserved, must be zero +// 16-17: tile0.colsb +// 18-19: tile1.colsb +// 20-21: tile2.colsb +// ... +// 30-31: tile7.colsb +// 32-47: reserved, must be zero +// 48: tile0.rows +// 49: tile1.rows +// 50: tile2.rows +// ... +// 55: tile7.rows +// 56-63: reserved, must be zero + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + + + Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig". + start := tileconfig.startRow +IF start == 0 // not restarting, zero incoming state + tilezero(dst) +FI +nbytes := dst.colsb +DO WHILE start < dst.rows + memptr := base + start * stride + write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) + start := start + 1 +OD +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + + + Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig". This intrinsic provides a hint to the implementation that the data will likely not be reused in the near future and the data caching can be optimized accordingly. + start := tileconfig.startRow +IF start == 0 // not restarting, zero incoming state + tilezero(dst) +FI +nbytes := dst.colsb +DO WHILE start < dst.rows + memptr := base + start * stride + write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) + start := start + 1 +OD +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Release the tile configuration to return to the init state, which releases all storage it currently holds. + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + + + Store the tile specified by "src" to memory specifieid by "base" address and "stride" using the tile configuration previously configured via "_tile_loadconfig". + start := tileconfig.startRow +DO WHILE start < src.rows + memptr := base + start * stride + write_memory(memptr, src.colsb, src.row[start]) + start := start + 1 +OD +zero_tileconfig_start() + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + Zero the tile specified by "tdest". + nbytes := palette_table[tileconfig.palette_id].bytes_per_row +FOR i := 0 TO palette_table[tileconfig.palette_id].max_rows-1 + FOR j := 0 TO nbytes-1 + tdest.row[i].byte[j] := 0 + ENDFOR +ENDFOR + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + start := tileconfig.startRow +IF start == 0 // not restarting, zero incoming state + tilezero(dst) +FI +nbytes := dst.colsb +DO WHILE start < dst.rows + memptr := base + start * stride + write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) + start := start + 1 +OD +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Store the tile specified by "src" to memory specifieid by "base" address and "stride". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + start := tileconfig.startRow +DO WHILE start < src.rows + memptr := base + start * stride + write_memory(memptr, src.colsb, src.row[start]) + start := start + 1 +OD +zero_tileconfig_start() + + + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst". This intrinsic provides a hint to the implementation that the data will likely not be reused in the near future and the data caching can be optimized accordingly. The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + start := tileconfig.startRow +IF start == 0 // not restarting, zero incoming state + tilezero(dst) +FI +nbytes := dst.colsb +DO WHILE start < dst.rows + memptr := base + start * stride + write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) + start := start + 1 +OD +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Zero the tile specified by "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + nbytes := palette_table[tileconfig.palette_id].bytes_per_row +FOR i := 0 TO palette_table[tileconfig.palette_id].max_rows-1 + FOR j := 0 TO nbytes-1 + tdest.row[i].byte[j] := 0 + ENDFOR +ENDFOR + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + + Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ACOS(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ACOS(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ACOSH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ACOSH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ASIN(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ASIN(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ASINH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ASINH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ATAN(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ATAN(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ATANH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ATANH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := COSD(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := COSD(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := COSH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := COSH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) + MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SIND(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SIND(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SINH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SINH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := TAN(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := TAN(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := TAND(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := TAND(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := TANH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := TANH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := CubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CEXP(a[31:0], b[31:0]) { + result[31:0] := POW(FP32(e), a[31:0]) * COS(b[31:0]) + result[63:32] := POW(FP32(e), a[31:0]) * SIN(b[31:0]) + RETURN result +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CEXP(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CLOG(a[31:0], b[31:0]) { + result[31:0] := LOG(SQRT(POW(a, 2.0) + POW(b, 2.0))) + result[63:32] := ATAN2(b, a) + RETURN result +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CLOG(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed complex snumbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CSQRT(a[31:0], b[31:0]) { + sign[31:0] := (b < 0.0) ? -FP32(1.0) : FP32(1.0) + result[31:0] := SQRT((a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) + result[63:32] := sign * SQRT((-a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) + RETURN result +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CSQRT(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(10.0, a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(2.0, a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := InvCubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := InvCubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := InvSQRT(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := InvSQRT(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LOG(1.0 + a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LOG(1.0 + a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := CDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := InverseCDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := InverseCDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ERF(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ERF(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := 1.0 - ERF(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+63:i] := 1.0 - ERF(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := 1.0 / ERF(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+63:i] := 1.0 / ERF(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + + Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 31 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 3 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 31 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 3 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 31 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 15 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 3 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 31 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 15 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 3 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ROUND(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := TRUNCATE(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Miscellaneous +
+ + + + Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := TRUNCATE(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Miscellaneous +
+ + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + b[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + b[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + dst[i+63:i] := a[i+63:i] / b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := a[i+31:i] / b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + + Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". + +DEFINE DP(a[127:0], b[127:0], imm8[7:0]) { + FOR j := 0 to 3 + i := j*32 + IF imm8[(4+j)%8] + temp[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + temp[i+31:i] := FP32(0.0) + FI + ENDFOR + + sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0]) + + FOR j := 0 to 3 + i := j*32 + IF imm8[j%8] + tmpdst[i+31:i] := sum[31:0] + ELSE + tmpdst[i+31:i] := FP32(0.0) + FI + ENDFOR + RETURN tmpdst[127:0] +} +dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0]) +dst[255:128] := DP(a[255:128], b[255:128], imm8[7:0]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[63:0] := a[127:64] + a[63:0] +dst[127:64] := b[127:64] + b[63:0] +dst[191:128] := a[255:192] + a[191:128] +dst[255:192] := b[255:192] + b[191:128] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := a[127:96] + a[95:64] +dst[95:64] := b[63:32] + b[31:0] +dst[127:96] := b[127:96] + b[95:64] +dst[159:128] := a[191:160] + a[159:128] +dst[191:160] := a[255:224] + a[223:192] +dst[223:192] := b[191:160] + b[159:128] +dst[255:224] := b[255:224] + b[223:192] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[63:0] := a[63:0] - a[127:64] +dst[127:64] := b[63:0] - b[127:64] +dst[191:128] := a[191:128] - a[255:192] +dst[255:192] := b[191:128] - b[255:192] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := a[95:64] - a[127:96] +dst[95:64] := b[31:0] - b[63:32] +dst[127:96] := b[95:64] - b[127:96] +dst[159:128] := a[159:128] - a[191:160] +dst[191:160] := a[223:192] - a[255:224] +dst[223:192] := b[159:128] - b[191:160] +dst[255:224] := b[223:192] - b[255:224] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value. + +IF ((a[255:0] AND b[255:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[255:0]) AND b[255:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +RETURN ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value. + +IF ((a[255:0] AND b[255:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[255:0]) AND b[255:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +RETURN CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +IF ((a[255:0] AND b[255:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[255:0]) AND b[255:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF imm8[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". + +dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract a 32-bit integer from "a", selected with "index", and store the result in "dst". + +dst[31:0] := (a[255:0] >> (index[2:0] * 32))[31:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract a 64-bit integer from "a", selected with "index", and store the result in "dst". + +dst[63:0] := (a[255:0] >> (index[1:0] * 64))[63:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], b[1:0]) +dst[63:32] := SELECT4(a[127:0], b[33:32]) +dst[95:64] := SELECT4(a[127:0], b[65:64]) +dst[127:96] := SELECT4(a[127:0], b[97:96]) +dst[159:128] := SELECT4(a[255:128], b[129:128]) +dst[191:160] := SELECT4(a[255:128], b[161:160]) +dst[223:192] := SELECT4(a[255:128], b[193:192]) +dst[255:224] := SELECT4(a[255:128], b[225:224]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], b[1:0]) +dst[63:32] := SELECT4(a[127:0], b[33:32]) +dst[95:64] := SELECT4(a[127:0], b[65:64]) +dst[127:96] := SELECT4(a[127:0], b[97:96]) +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". + +IF (b[1] == 0) dst[63:0] := a[63:0]; FI +IF (b[1] == 1) dst[63:0] := a[127:64]; FI +IF (b[65] == 0) dst[127:64] := a[63:0]; FI +IF (b[65] == 1) dst[127:64] := a[127:64]; FI +IF (b[129] == 0) dst[191:128] := a[191:128]; FI +IF (b[129] == 1) dst[191:128] := a[255:192]; FI +IF (b[193] == 0) dst[255:192] := a[191:128]; FI +IF (b[193] == 1) dst[255:192] := a[255:192]; FI +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst". + +IF (b[1] == 0) dst[63:0] := a[63:0]; FI +IF (b[1] == 1) dst[63:0] := a[127:64]; FI +IF (b[65] == 0) dst[127:64] := a[63:0]; FI +IF (b[65] == 1) dst[127:64] := a[127:64]; FI +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) dst[255:192] := a[255:192]; FI +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst". + +IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src1, src2, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src1[127:0] + 1: tmp[127:0] := src1[255:128] + 2: tmp[127:0] := src2[127:0] + 3: tmp[127:0] := src2[255:128] + ESAC + IF control[3] + tmp[127:0] := 0 + FI + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) +dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src1, src2, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src1[127:0] + 1: tmp[127:0] := src1[255:128] + 2: tmp[127:0] := src2[127:0] + 3: tmp[127:0] := src2[255:128] + ESAC + IF control[3] + tmp[127:0] := 0 + FI + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) +dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src1, src2, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src1[127:0] + 1: tmp[127:0] := src1[255:128] + 2: tmp[127:0] := src2[127:0] + 3: tmp[127:0] := src2[255:128] + ESAC + IF control[3] + tmp[127:0] := 0 + FI + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) +dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE imm8[0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 8-bit integer "i" into "dst" at the location specified by "index". + +dst[255:0] := a[255:0] +sel := index[4:0]*8 +dst[sel+7:sel] := i[7:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "index". + +dst[255:0] := a[255:0] +sel := index[3:0]*16 +dst[sel+15:sel] := i[15:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "index". + +dst[255:0] := a[255:0] +sel := index[2:0]*32 +dst[sel+31:sel] := i[31:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "index". + +dst[255:0] := a[255:0] +sel := index[1:0]*64 +dst[sel+63:sel] := i[63:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst". + [round_note] + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i], rounding) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ROUND(a[i+31:i], rounding) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +dst[63:0] := ( a[63:0] OP b[63:0] ) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +dst[31:0] := ( a[31:0] OP b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + m := j*64 + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 32*j + dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Copy the lower single-precision (32-bit) floating-point element of "a" to "dst". + +dst[31:0] := a[31:0] + + + AVX +
immintrin.h
+ Convert +
+ + + + Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". + +dst[63:0] := a[63:0] + + + AVX +
immintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + AVX +
immintrin.h
+ Convert +
+ + + + Zero the contents of all XMM or YMM registers. + YMM0[MAX:0] := 0 +YMM1[MAX:0] := 0 +YMM2[MAX:0] := 0 +YMM3[MAX:0] := 0 +YMM4[MAX:0] := 0 +YMM5[MAX:0] := 0 +YMM6[MAX:0] := 0 +YMM7[MAX:0] := 0 +IF _64_BIT_MODE + YMM8[MAX:0] := 0 + YMM9[MAX:0] := 0 + YMM10[MAX:0] := 0 + YMM11[MAX:0] := 0 + YMM12[MAX:0] := 0 + YMM13[MAX:0] := 0 + YMM14[MAX:0] := 0 + YMM15[MAX:0] := 0 +FI + + + AVX +
immintrin.h
+ General Support +
+ + + + Zero the upper 128 bits of all YMM registers; the lower 128-bits of the registers are unmodified. + YMM0[MAX:128] := 0 +YMM1[MAX:128] := 0 +YMM2[MAX:128] := 0 +YMM3[MAX:128] := 0 +YMM4[MAX:128] := 0 +YMM5[MAX:128] := 0 +YMM6[MAX:128] := 0 +YMM7[MAX:128] := 0 +IF _64_BIT_MODE + YMM8[MAX:128] := 0 + YMM9[MAX:128] := 0 + YMM10[MAX:128] := 0 + YMM11[MAX:128] := 0 + YMM12[MAX:128] := 0 + YMM13[MAX:128] := 0 + YMM14[MAX:128] := 0 + YMM15[MAX:128] := 0 +FI + + + AVX +
immintrin.h
+ General Support +
+ + + + Return vector of type __m256 with undefined elements. + AVX +
immintrin.h
+ General Support +
+ + + + Return vector of type __m256d with undefined elements. + AVX +
immintrin.h
+ General Support +
+ + + + Return vector of type __m256i with undefined elements. + AVX +
immintrin.h
+ General Support +
+ + + + Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst". + +tmp[31:0] := MEM[mem_addr+31:mem_addr] +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := tmp[31:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + Swizzle + + + Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst". + +tmp[31:0] := MEM[mem_addr+31:mem_addr] +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := tmp[31:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Load +
+ + Swizzle + + + Broadcast a double-precision (64-bit) floating-point element from memory to all elements of "dst". + +tmp[63:0] := MEM[mem_addr+63:mem_addr] +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := tmp[63:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + Swizzle + + + Broadcast 128 bits from memory (composed of 4 packed single-precision (32-bit) floating-point elements) to all elements of "dst". + +tmp[127:0] := MEM[mem_addr+127:mem_addr] +dst[127:0] := tmp[127:0] +dst[255:128] := tmp[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + Swizzle + + + Broadcast 128 bits from memory (composed of 2 packed double-precision (64-bit) floating-point elements) to all elements of "dst". + +tmp[127:0] := MEM[mem_addr+127:mem_addr] +dst[127:0] := tmp[127:0] +dst[255:128] := tmp[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits of integer data from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits of integer data from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm256_loadu_si256" when the data crosses a cache line boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load two 128-bit values (composed of 4 packed single-precision (32-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst". + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +dst[127:0] := MEM[loaddr+127:loaddr] +dst[255:128] := MEM[hiaddr+127:hiaddr] +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Load +
+ + + + + Load two 128-bit values (composed of 2 packed double-precision (64-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst". + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +dst[127:0] := MEM[loaddr+127:loaddr] +dst[255:128] := MEM[hiaddr+127:hiaddr] +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Load +
+ + + + + Load two 128-bit values (composed of integer data) from memory, and combine them into a 256-bit value in "dst". + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +dst[127:0] := MEM[loaddr+127:loaddr] +dst[255:128] := MEM[hiaddr+127:hiaddr] +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Load +
+ + + + + Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits of integer data from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits of integer data from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask". + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask". + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask". + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask". + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits of integer data from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store the high and low 128-bit halves (each composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory two different 128-bit locations. + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +MEM[loaddr+127:loaddr] := a[127:0] +MEM[hiaddr+127:hiaddr] := a[255:128] + + AVX +
immintrin.h
+ Store +
+ + + + + + Store the high and low 128-bit halves (each composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory two different 128-bit locations. + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +MEM[loaddr+127:loaddr] := a[127:0] +MEM[hiaddr+127:hiaddr] := a[255:128] + + AVX +
immintrin.h
+ Store +
+ + + + + + Store the high and low 128-bit halves (each composed of integer data) from "a" into memory two different 128-bit locations. + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +MEM[loaddr+127:loaddr] := a[127:0] +MEM[hiaddr+127:hiaddr] := a[255:128] + + AVX +
immintrin.h
+ Store +
+ + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := a[63:32] +dst[95:64] := a[127:96] +dst[127:96] := a[127:96] +dst[159:128] := a[191:160] +dst[191:160] := a[191:160] +dst[223:192] := a[255:224] +dst[255:224] := a[255:224] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Move +
+ + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := a[31:0] +dst[95:64] := a[95:64] +dst[127:96] := a[95:64] +dst[159:128] := a[159:128] +dst[191:160] := a[159:128] +dst[223:192] := a[223:192] +dst[255:224] := a[223:192] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Move +
+ + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst". + +dst[63:0] := a[63:0] +dst[127:64] := a[63:0] +dst[191:128] := a[191:128] +dst[255:192] := a[191:128] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Move +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := 1.0 / a[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a". + +FOR j := 0 to 3 + i := j*64 + IF a[i+63] + dst[j] := 1 + ELSE + dst[j] := 0 + FI +ENDFOR +dst[MAX:4] := 0 + + + AVX +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a". + +FOR j := 0 to 7 + i := j*32 + IF a[i+31] + dst[j] := 1 + ELSE + dst[j] := 0 + FI +ENDFOR +dst[MAX:8] := 0 + + + AVX +
immintrin.h
+ Miscellaneous +
+ + + + Return vector of type __m256d with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + Return vector of type __m256 with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + Return vector of type __m256i with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 +dst[191:128] := e2 +dst[255:192] := e3 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 +dst[159:128] := e4 +dst[191:160] := e5 +dst[223:192] := e6 +dst[255:224] := e7 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values. + +dst[7:0] := e0 +dst[15:8] := e1 +dst[23:16] := e2 +dst[31:24] := e3 +dst[39:32] := e4 +dst[47:40] := e5 +dst[55:48] := e6 +dst[63:56] := e7 +dst[71:64] := e8 +dst[79:72] := e9 +dst[87:80] := e10 +dst[95:88] := e11 +dst[103:96] := e12 +dst[111:104] := e13 +dst[119:112] := e14 +dst[127:120] := e15 +dst[135:128] := e16 +dst[143:136] := e17 +dst[151:144] := e18 +dst[159:152] := e19 +dst[167:160] := e20 +dst[175:168] := e21 +dst[183:176] := e22 +dst[191:184] := e23 +dst[199:192] := e24 +dst[207:200] := e25 +dst[215:208] := e26 +dst[223:216] := e27 +dst[231:224] := e28 +dst[239:232] := e29 +dst[247:240] := e30 +dst[255:248] := e31 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values. + +dst[15:0] := e0 +dst[31:16] := e1 +dst[47:32] := e2 +dst[63:48] := e3 +dst[79:64] := e4 +dst[95:80] := e5 +dst[111:96] := e6 +dst[127:112] := e7 +dst[143:128] := e8 +dst[159:144] := e9 +dst[175:160] := e10 +dst[191:176] := e11 +dst[207:192] := e12 +dst[223:208] := e13 +dst[239:224] := e14 +dst[255:240] := e15 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed 32-bit integers in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 +dst[159:128] := e4 +dst[191:160] := e5 +dst[223:192] := e6 +dst[255:224] := e7 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + Set packed 64-bit integers in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 +dst[191:128] := e2 +dst[255:192] := e3 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[63:0] := e3 +dst[127:64] := e2 +dst[191:128] := e1 +dst[255:192] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[31:0] := e7 +dst[63:32] := e6 +dst[95:64] := e5 +dst[127:96] := e4 +dst[159:128] := e3 +dst[191:160] := e2 +dst[223:192] := e1 +dst[255:224] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values in reverse order. + +dst[7:0] := e31 +dst[15:8] := e30 +dst[23:16] := e29 +dst[31:24] := e28 +dst[39:32] := e27 +dst[47:40] := e26 +dst[55:48] := e25 +dst[63:56] := e24 +dst[71:64] := e23 +dst[79:72] := e22 +dst[87:80] := e21 +dst[95:88] := e20 +dst[103:96] := e19 +dst[111:104] := e18 +dst[119:112] := e17 +dst[127:120] := e16 +dst[135:128] := e15 +dst[143:136] := e14 +dst[151:144] := e13 +dst[159:152] := e12 +dst[167:160] := e11 +dst[175:168] := e10 +dst[183:176] := e9 +dst[191:184] := e8 +dst[199:192] := e7 +dst[207:200] := e6 +dst[215:208] := e5 +dst[223:216] := e4 +dst[231:224] := e3 +dst[239:232] := e2 +dst[247:240] := e1 +dst[255:248] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values in reverse order. + +dst[15:0] := e15 +dst[31:16] := e14 +dst[47:32] := e13 +dst[63:48] := e12 +dst[79:64] := e11 +dst[95:80] := e10 +dst[111:96] := e9 +dst[127:112] := e8 +dst[143:128] := e7 +dst[159:144] := e6 +dst[175:160] := e5 +dst[191:176] := e4 +dst[207:192] := e3 +dst[223:208] := e2 +dst[239:224] := e1 +dst[255:240] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed 32-bit integers in "dst" with the supplied values in reverse order. + +dst[31:0] := e7 +dst[63:32] := e6 +dst[95:64] := e5 +dst[127:96] := e4 +dst[159:128] := e3 +dst[191:160] := e2 +dst[223:192] := e1 +dst[255:224] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + Set packed 64-bit integers in "dst" with the supplied values in reverse order. + +dst[63:0] := e3 +dst[127:64] := e2 +dst[191:128] := e1 +dst[255:192] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastb". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate the "vpbroadcastw". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastd". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256 vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256d vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256i vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256 vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256d vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256i vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + Cast vector of type __m256d to type __m256. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m256d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256d to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m256; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m256; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + + + + Extract an 8-bit integer from "a", selected with "index", and store the result in "dst". + +dst[7:0] := (a[255:0] >> (index[4:0] * 8))[7:0] + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Extract a 16-bit integer from "a", selected with "index", and store the result in "dst". + +dst[15:0] := (a[255:0] >> (index[3:0] * 16))[15:0] + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 16-bit integers from "a" and "b" within 128-bit lanes using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF imm8[j%8] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + IF mask[i+7] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst". + +dst[127:0] := a[127:0] +dst[255:128] := a[127:0] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst". + +dst[127:0] := a[127:0] +dst[255:128] := a[127:0] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of integer data) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src1, src2, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src1[127:0] + 1: tmp[127:0] := src1[255:128] + 2: tmp[127:0] := src2[127:0] + 3: tmp[127:0] := src2[255:128] + ESAC + IF control[3] + tmp[127:0] := 0 + FI + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) +dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx". + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 8-bit integers in "a" within 128-bit lanes according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[3:0] := b[i+3:i] + dst[i+7:i] := a[index*8+7:index*8] + FI + IF b[128+i+7] == 1 + dst[128+i+7:128+i] := 0 + ELSE + index[3:0] := b[128+i+3:128+i] + dst[128+i+7:128+i] := a[128+index*8+7:128+index*8] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst". + +dst[63:0] := a[63:0] +dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +dst[191:128] := a[191:128] +dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst". + +dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +dst[127:64] := a[127:64] +dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +dst[255:192] := a[255:192] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := ABS(a[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ABS(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ABS(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[31:16] + a[15:0] +dst[31:16] := a[63:48] + a[47:32] +dst[47:32] := a[95:80] + a[79:64] +dst[63:48] := a[127:112] + a[111:96] +dst[79:64] := b[31:16] + b[15:0] +dst[95:80] := b[63:48] + b[47:32] +dst[111:96] := b[95:80] + b[79:64] +dst[127:112] := b[127:112] + b[111:96] +dst[143:128] := a[159:144] + a[143:128] +dst[159:144] := a[191:176] + a[175:160] +dst[175:160] := a[223:208] + a[207:192] +dst[191:176] := a[255:240] + a[239:224] +dst[207:192] := b[159:144] + b[143:128] +dst[223:208] := b[191:176] + b[175:160] +dst[239:224] := b[223:208] + b[207:192] +dst[255:240] := b[255:240] + b[239:224] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := a[127:96] + a[95:64] +dst[95:64] := b[63:32] + b[31:0] +dst[127:96] := b[127:96] + b[95:64] +dst[159:128] := a[191:160] + a[159:128] +dst[191:160] := a[255:224] + a[223:192] +dst[223:192] := b[191:160] + b[159:128] +dst[255:224] := b[255:224] + b[223:192] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[31:16] + a[15:0]) +dst[31:16] := Saturate16(a[63:48] + a[47:32]) +dst[47:32] := Saturate16(a[95:80] + a[79:64]) +dst[63:48] := Saturate16(a[127:112] + a[111:96]) +dst[79:64] := Saturate16(b[31:16] + b[15:0]) +dst[95:80] := Saturate16(b[63:48] + b[47:32]) +dst[111:96] := Saturate16(b[95:80] + b[79:64]) +dst[127:112] := Saturate16(b[127:112] + b[111:96]) +dst[143:128] := Saturate16(a[159:144] + a[143:128]) +dst[159:144] := Saturate16(a[191:176] + a[175:160]) +dst[175:160] := Saturate16(a[223:208] + a[207:192]) +dst[191:176] := Saturate16(a[255:240] + a[239:224]) +dst[207:192] := Saturate16(b[159:144] + b[143:128]) +dst[223:208] := Saturate16(b[191:176] + b[175:160]) +dst[239:224] := Saturate16(b[223:208] + b[207:192]) +dst[255:240] := Saturate16(b[255:240] + b[239:224]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[15:0] - a[31:16] +dst[31:16] := a[47:32] - a[63:48] +dst[47:32] := a[79:64] - a[95:80] +dst[63:48] := a[111:96] - a[127:112] +dst[79:64] := b[15:0] - b[31:16] +dst[95:80] := b[47:32] - b[63:48] +dst[111:96] := b[79:64] - b[95:80] +dst[127:112] := b[111:96] - b[127:112] +dst[143:128] := a[143:128] - a[159:144] +dst[159:144] := a[175:160] - a[191:176] +dst[175:160] := a[207:192] - a[223:208] +dst[191:176] := a[239:224] - a[255:240] +dst[207:192] := b[143:128] - b[159:144] +dst[223:208] := b[175:160] - b[191:176] +dst[239:224] := b[207:192] - b[223:208] +dst[255:240] := b[239:224] - b[255:240] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := a[95:64] - a[127:96] +dst[95:64] := b[31:0] - b[63:32] +dst[127:96] := b[95:64] - b[127:96] +dst[159:128] := a[159:128] - a[191:160] +dst[191:160] := a[223:192] - a[255:224] +dst[223:192] := b[159:128] - b[191:160] +dst[255:224] := b[223:192] - b[255:224] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[15:0] - a[31:16]) +dst[31:16] := Saturate16(a[47:32] - a[63:48]) +dst[47:32] := Saturate16(a[79:64] - a[95:80]) +dst[63:48] := Saturate16(a[111:96] - a[127:112]) +dst[79:64] := Saturate16(b[15:0] - b[31:16]) +dst[95:80] := Saturate16(b[47:32] - b[63:48]) +dst[111:96] := Saturate16(b[79:64] - b[95:80]) +dst[127:112] := Saturate16(b[111:96] - b[127:112]) +dst[143:128] := Saturate16(a[143:128] - a[159:144]) +dst[159:144] := Saturate16(a[175:160] - a[191:176]) +dst[175:160] := Saturate16(a[207:192] - a[223:208]) +dst[191:176] := Saturate16(a[239:224] - a[255:240]) +dst[207:192] := Saturate16(b[143:128] - b[159:144]) +dst[223:208] := Saturate16(b[175:160] - b[191:176]) +dst[239:224] := Saturate16(b[207:192] - b[223:208]) +dst[255:240] := Saturate16(b[239:224] - b[255:240]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*32 + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". + +FOR j := 0 to 31 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +FOR j := 0 to 3 + i := j*64 + dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \ + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56] + dst[i+63:i+16] := 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Negate packed signed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 31 + i := j*8 + IF b[i+7:i] < 0 + dst[i+7:i] := -(a[i+7:i]) + ELSE IF b[i+7:i] == 0 + dst[i+7:i] := 0 + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Negate packed signed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 15 + i := j*16 + IF b[i+15:i] < 0 + dst[i+15:i] := -(a[i+15:i]) + ELSE IF b[i+15:i] == 0 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Negate packed signed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 7 + i := j*32 + IF b[i+31:i] < 0 + dst[i+31:i] := -(a[i+31:i]) + ELSE IF b[i+31:i] == 0 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". + +FOR j := 0 to 1 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + dst[i+127:i] := tmp[127:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[j] := a[i+7] +ENDFOR + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Eight SADs are performed for each 128-bit lane using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8". + +DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) { + a_offset := imm8[2]*32 + b_offset := imm8[1:0]*32 + FOR j := 0 to 7 + i := j*8 + k := a_offset+i + l := b_offset + tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \ + ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24])) + ENDFOR + RETURN tmp[127:0] +} +dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0]) +dst[255:128] := MPSADBW(a[255:128], b[255:128], imm8[5:3]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(a[79:64]) +dst[47:40] := Saturate8(a[95:80]) +dst[55:48] := Saturate8(a[111:96]) +dst[63:56] := Saturate8(a[127:112]) +dst[71:64] := Saturate8(b[15:0]) +dst[79:72] := Saturate8(b[31:16]) +dst[87:80] := Saturate8(b[47:32]) +dst[95:88] := Saturate8(b[63:48]) +dst[103:96] := Saturate8(b[79:64]) +dst[111:104] := Saturate8(b[95:80]) +dst[119:112] := Saturate8(b[111:96]) +dst[127:120] := Saturate8(b[127:112]) +dst[135:128] := Saturate8(a[143:128]) +dst[143:136] := Saturate8(a[159:144]) +dst[151:144] := Saturate8(a[175:160]) +dst[159:152] := Saturate8(a[191:176]) +dst[167:160] := Saturate8(a[207:192]) +dst[175:168] := Saturate8(a[223:208]) +dst[183:176] := Saturate8(a[239:224]) +dst[191:184] := Saturate8(a[255:240]) +dst[199:192] := Saturate8(b[143:128]) +dst[207:200] := Saturate8(b[159:144]) +dst[215:208] := Saturate8(b[175:160]) +dst[223:216] := Saturate8(b[191:176]) +dst[231:224] := Saturate8(b[207:192]) +dst[239:232] := Saturate8(b[223:208]) +dst[247:240] := Saturate8(b[239:224]) +dst[255:248] := Saturate8(b[255:240]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(a[95:64]) +dst[63:48] := Saturate16(a[127:96]) +dst[79:64] := Saturate16(b[31:0]) +dst[95:80] := Saturate16(b[63:32]) +dst[111:96] := Saturate16(b[95:64]) +dst[127:112] := Saturate16(b[127:96]) +dst[143:128] := Saturate16(a[159:128]) +dst[159:144] := Saturate16(a[191:160]) +dst[175:160] := Saturate16(a[223:192]) +dst[191:176] := Saturate16(a[255:224]) +dst[207:192] := Saturate16(b[159:128]) +dst[223:208] := Saturate16(b[191:160]) +dst[239:224] := Saturate16(b[223:192]) +dst[255:240] := Saturate16(b[255:224]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(a[79:64]) +dst[47:40] := SaturateU8(a[95:80]) +dst[55:48] := SaturateU8(a[111:96]) +dst[63:56] := SaturateU8(a[127:112]) +dst[71:64] := SaturateU8(b[15:0]) +dst[79:72] := SaturateU8(b[31:16]) +dst[87:80] := SaturateU8(b[47:32]) +dst[95:88] := SaturateU8(b[63:48]) +dst[103:96] := SaturateU8(b[79:64]) +dst[111:104] := SaturateU8(b[95:80]) +dst[119:112] := SaturateU8(b[111:96]) +dst[127:120] := SaturateU8(b[127:112]) +dst[135:128] := SaturateU8(a[143:128]) +dst[143:136] := SaturateU8(a[159:144]) +dst[151:144] := SaturateU8(a[175:160]) +dst[159:152] := SaturateU8(a[191:176]) +dst[167:160] := SaturateU8(a[207:192]) +dst[175:168] := SaturateU8(a[223:208]) +dst[183:176] := SaturateU8(a[239:224]) +dst[191:184] := SaturateU8(a[255:240]) +dst[199:192] := SaturateU8(b[143:128]) +dst[207:200] := SaturateU8(b[159:144]) +dst[215:208] := SaturateU8(b[175:160]) +dst[223:216] := SaturateU8(b[191:176]) +dst[231:224] := SaturateU8(b[207:192]) +dst[239:232] := SaturateU8(b[223:208]) +dst[247:240] := SaturateU8(b[239:224]) +dst[255:248] := SaturateU8(b[255:240]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". + +dst[15:0] := SaturateU16(a[31:0]) +dst[31:16] := SaturateU16(a[63:32]) +dst[47:32] := SaturateU16(a[95:64]) +dst[63:48] := SaturateU16(a[127:96]) +dst[79:64] := SaturateU16(b[31:0]) +dst[95:80] := SaturateU16(b[63:32]) +dst[111:96] := SaturateU16(b[95:64]) +dst[127:112] := SaturateU16(b[127:96]) +dst[143:128] := SaturateU16(a[159:128]) +dst[159:144] := SaturateU16(a[191:160]) +dst[175:160] := SaturateU16(a[223:192]) +dst[191:176] := SaturateU16(a[255:224]) +dst[207:192] := SaturateU16(b[159:128]) +dst[223:208] := SaturateU16(b[191:160]) +dst[239:224] := SaturateU16(b[223:192]) +dst[255:240] := SaturateU16(b[255:224]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[255:0] := (a[255:0] AND b[255:0]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 256 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[255:0] := ((NOT a[255:0]) AND b[255:0]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[255:0] := (a[255:0] OR b[255:0]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[255:0] := (a[255:0] XOR b[255:0]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Logical +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Probability/Statistics +
+ + + + + Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ( a[i+63:i] > b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j:= 0 to 7 + i := 32*j + k := 16*j + dst[i+31:i] := SignExtend32(a[k+15:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j:= 0 to 3 + i := 64*j + k := 16*j + dst[i+63:i] := SignExtend64(a[k+15:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j:= 0 to 3 + i := 64*j + k := 32*j + dst[i+63:i] := SignExtend64(a[k+31:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + l := j*16 + dst[l+15:l] := SignExtend16(a[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[i+31:i] := SignExtend32(a[k+7:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[i+63:i] := SignExtend64(a[k+7:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 16*j + dst[i+31:i] := ZeroExtend32(a[k+15:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j:= 0 to 3 + i := 64*j + k := 16*j + dst[i+63:i] := ZeroExtend64(a[k+15:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j:= 0 to 3 + i := 64*j + k := 32*j + dst[i+63:i] := ZeroExtend64(a[k+31:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + l := j*16 + dst[l+15:l] := ZeroExtend16(a[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[i+31:i] := ZeroExtend32(a[k+7:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[i+63:i] := ZeroExtend64(a[k+7:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:64] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:64] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF mask[i+63] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF mask[i+63] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF mask[i+31] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF mask[i+31] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF mask[i+31] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF mask[i+31] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF mask[i+63] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF mask[i+63] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF mask[i+63] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF mask[i+63] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF mask[i+31] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:64] := 0 +dst[MAX:64] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF mask[i+31] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF mask[i+31] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:64] := 0 +dst[MAX:64] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF mask[i+31] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF mask[i+63] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF mask[i+63] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + Load 256-bits of integer data from memory into "dst" using a non-temporal memory hint. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX2 +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX2 +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX2 +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX2 +
immintrin.h
+ Store +
+ + + + + Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) +dst[255:128] := a[255:128] << (tmp*8) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) +dst[255:128] := a[255:128] << (tmp*8) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) +dst[255:128] := a[255:128] >> (tmp*8) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) +dst[255:128] := a[255:128] >> (tmp*8) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 1 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 3 + i := j*64 + dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 1 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 3 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 1 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 3 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +tmp.dword[0] := b.dword[ imm8[1:0] ] +tmp.dword[1] := b.dword[ imm8[3:2] ] +tmp.dword[2] := b.dword[ imm8[5:4] ] +tmp.dword[3] := b.dword[ imm8[7:6] ] +FOR j := 0 to 1 + i := j*64 + dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +tmp.dword[0] := b.dword[ imm8[1:0] ] +tmp.dword[1] := b.dword[ imm8[3:2] ] +tmp.dword[2] := b.dword[ imm8[5:4] ] +tmp.dword[3] := b.dword[ imm8[7:6] ] +FOR j := 0 to 1 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +tmp.dword[0] := b.dword[ imm8[1:0] ] +tmp.dword[1] := b.dword[ imm8[3:2] ] +tmp.dword[2] := b.dword[ imm8[5:4] ] +tmp.dword[3] := b.dword[ imm8[7:6] ] +FOR j := 0 to 1 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + tmp_dst[i+127:i] := tmp[127:0] +ENDFOR +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + tmp_dst[i+127:i] := tmp[127:0] +ENDFOR +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + off := 16*idx[i+3:i] + dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := idx[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + off := 16*idx[i+3:i] + dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + off := 16*idx[i+3:i] + dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + off := 16*idx[i+3:i] + dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + off := 16*idx[i+2:i] + dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := idx[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + off := 16*idx[i+2:i] + dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + off := 16*idx[i+2:i] + dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + off := 16*idx[i+2:i] + dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + id := idx[i+3:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + id := idx[i+3:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + id := idx[i+3:i]*16 + dst[i+15:i] := a[id+15:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + id := idx[i+2:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + id := idx[i+2:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + id := idx[i+2:i]*16 + dst[i+15:i] := a[id+15:id] +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a". + +FOR j := 0 to 31 + i := j*8 + IF a[i+7] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a". + +FOR j := 0 to 15 + i := j*8 + IF a[i+7] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := 0xFF + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := 0xFF + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := 0xFFFF + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := 0xFFFF + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a". + +FOR j := 0 to 15 + i := j*16 + IF a[i+15] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a". + +FOR j := 0 to 7 + i := j*16 + IF a[i+15] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[4:0] := b[i+3:i] + (j & 0x10) + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[4:0] := b[i+3:i] + (j & 0x10) + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[3:0] := b[i+3:i] + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[3:0] := b[i+3:i] + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +tmp_dst[191:128] := a[191:128] +tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +tmp_dst[191:128] := a[191:128] +tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +tmp_dst[255:192] := a[255:192] +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +tmp_dst[255:192] := a[255:192] +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 16 packed 16-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 32 packed 8-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 8 packed 16-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 16 packed 8-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Store packed 16-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*16 + IF k[j] + MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i] + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 16-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*16 + IF k[j] + MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i] + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 8-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*8 + IF k[j] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 8-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*8 + IF k[j] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 16 packed 16-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 32 packed 8-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 8 packed 16-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 16 packed 8-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +tmp_dst[143:128] := Saturate16(a[159:128]) +tmp_dst[159:144] := Saturate16(a[191:160]) +tmp_dst[175:160] := Saturate16(a[223:192]) +tmp_dst[191:176] := Saturate16(a[255:224]) +tmp_dst[207:192] := Saturate16(b[159:128]) +tmp_dst[223:208] := Saturate16(b[191:160]) +tmp_dst[239:224] := Saturate16(b[223:192]) +tmp_dst[255:240] := Saturate16(b[255:224]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +tmp_dst[143:128] := Saturate16(a[159:128]) +tmp_dst[159:144] := Saturate16(a[191:160]) +tmp_dst[175:160] := Saturate16(a[223:192]) +tmp_dst[191:176] := Saturate16(a[255:224]) +tmp_dst[207:192] := Saturate16(b[159:128]) +tmp_dst[223:208] := Saturate16(b[191:160]) +tmp_dst[239:224] := Saturate16(b[223:192]) +tmp_dst[255:240] := Saturate16(b[255:224]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +tmp_dst[135:128] := Saturate8(a[143:128]) +tmp_dst[143:136] := Saturate8(a[159:144]) +tmp_dst[151:144] := Saturate8(a[175:160]) +tmp_dst[159:152] := Saturate8(a[191:176]) +tmp_dst[167:160] := Saturate8(a[207:192]) +tmp_dst[175:168] := Saturate8(a[223:208]) +tmp_dst[183:176] := Saturate8(a[239:224]) +tmp_dst[191:184] := Saturate8(a[255:240]) +tmp_dst[199:192] := Saturate8(b[143:128]) +tmp_dst[207:200] := Saturate8(b[159:144]) +tmp_dst[215:208] := Saturate8(b[175:160]) +tmp_dst[223:216] := Saturate8(b[191:176]) +tmp_dst[231:224] := Saturate8(b[207:192]) +tmp_dst[239:232] := Saturate8(b[223:208]) +tmp_dst[247:240] := Saturate8(b[239:224]) +tmp_dst[255:248] := Saturate8(b[255:240]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +tmp_dst[135:128] := Saturate8(a[143:128]) +tmp_dst[143:136] := Saturate8(a[159:144]) +tmp_dst[151:144] := Saturate8(a[175:160]) +tmp_dst[159:152] := Saturate8(a[191:176]) +tmp_dst[167:160] := Saturate8(a[207:192]) +tmp_dst[175:168] := Saturate8(a[223:208]) +tmp_dst[183:176] := Saturate8(a[239:224]) +tmp_dst[191:184] := Saturate8(a[255:240]) +tmp_dst[199:192] := Saturate8(b[143:128]) +tmp_dst[207:200] := Saturate8(b[159:144]) +tmp_dst[215:208] := Saturate8(b[175:160]) +tmp_dst[223:216] := Saturate8(b[191:176]) +tmp_dst[231:224] := Saturate8(b[207:192]) +tmp_dst[239:232] := Saturate8(b[223:208]) +tmp_dst[247:240] := Saturate8(b[239:224]) +tmp_dst[255:248] := Saturate8(b[255:240]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +tmp_dst[143:128] := SaturateU16(a[159:128]) +tmp_dst[159:144] := SaturateU16(a[191:160]) +tmp_dst[175:160] := SaturateU16(a[223:192]) +tmp_dst[191:176] := SaturateU16(a[255:224]) +tmp_dst[207:192] := SaturateU16(b[159:128]) +tmp_dst[223:208] := SaturateU16(b[191:160]) +tmp_dst[239:224] := SaturateU16(b[223:192]) +tmp_dst[255:240] := SaturateU16(b[255:224]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +tmp_dst[143:128] := SaturateU16(a[159:128]) +tmp_dst[159:144] := SaturateU16(a[191:160]) +tmp_dst[175:160] := SaturateU16(a[223:192]) +tmp_dst[191:176] := SaturateU16(a[255:224]) +tmp_dst[207:192] := SaturateU16(b[159:128]) +tmp_dst[223:208] := SaturateU16(b[191:160]) +tmp_dst[239:224] := SaturateU16(b[223:192]) +tmp_dst[255:240] := SaturateU16(b[255:224]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +tmp_dst[135:128] := SaturateU8(a[143:128]) +tmp_dst[143:136] := SaturateU8(a[159:144]) +tmp_dst[151:144] := SaturateU8(a[175:160]) +tmp_dst[159:152] := SaturateU8(a[191:176]) +tmp_dst[167:160] := SaturateU8(a[207:192]) +tmp_dst[175:168] := SaturateU8(a[223:208]) +tmp_dst[183:176] := SaturateU8(a[239:224]) +tmp_dst[191:184] := SaturateU8(a[255:240]) +tmp_dst[199:192] := SaturateU8(b[143:128]) +tmp_dst[207:200] := SaturateU8(b[159:144]) +tmp_dst[215:208] := SaturateU8(b[175:160]) +tmp_dst[223:216] := SaturateU8(b[191:176]) +tmp_dst[231:224] := SaturateU8(b[207:192]) +tmp_dst[239:232] := SaturateU8(b[223:208]) +tmp_dst[247:240] := SaturateU8(b[239:224]) +tmp_dst[255:248] := SaturateU8(b[255:240]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +tmp_dst[135:128] := SaturateU8(a[143:128]) +tmp_dst[143:136] := SaturateU8(a[159:144]) +tmp_dst[151:144] := SaturateU8(a[175:160]) +tmp_dst[159:152] := SaturateU8(a[191:176]) +tmp_dst[167:160] := SaturateU8(a[207:192]) +tmp_dst[175:168] := SaturateU8(a[223:208]) +tmp_dst[183:176] := SaturateU8(a[239:224]) +tmp_dst[191:184] := SaturateU8(a[255:240]) +tmp_dst[199:192] := SaturateU8(b[143:128]) +tmp_dst[207:200] := SaturateU8(b[159:144]) +tmp_dst[215:208] := SaturateU8(b[175:160]) +tmp_dst[223:216] := SaturateU8(b[191:176]) +tmp_dst[231:224] := SaturateU8(b[207:192]) +tmp_dst[239:232] := SaturateU8(b[223:208]) +tmp_dst[247:240] := SaturateU8(b[239:224]) +tmp_dst[255:248] := SaturateU8(b[255:240]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + dst[l+7:l] := Saturate8(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + dst[l+7:l] := Saturate8(a[i+15:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + dst[l+7:l] := SaturateU8(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + dst[l+7:l] := SaturateU8(a[i+15:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + dst[l+7:l] := Truncate8(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + dst[l+7:l] := Truncate8(a[i+15:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 16-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 31 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 31 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + Reduce the packed 16-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[15:0] + src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] + src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_ADD(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_ADD(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[15:0] + src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] + src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_ADD(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_ADD(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[15:0] + src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] + src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_ADD(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_ADD(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[15:0] + src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] + src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_ADD(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_ADD(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[7:0] + src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] + src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_ADD(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_ADD(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[7:0] + src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] + src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_ADD(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_ADD(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[7:0] + src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] + src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_ADD(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_ADD(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[7:0] + src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] + src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_ADD(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_ADD(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[15:0] * src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] * src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_MUL(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MUL(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[15:0] * src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] * src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_MUL(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 1 + FI +ENDFOR +dst[15:0] := REDUCE_MUL(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[15:0] * src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] * src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_MUL(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MUL(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[15:0] * src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] * src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_MUL(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 1 + FI +ENDFOR +dst[15:0] := REDUCE_MUL(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[7:0] * src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] * src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_MUL(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MUL(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[7:0] * src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] * src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_MUL(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 1 + FI +ENDFOR +dst[7:0] := REDUCE_MUL(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[7:0] * src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] * src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_MUL(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MUL(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[7:0] * src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] * src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_MUL(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 1 + FI +ENDFOR +dst[7:0] := REDUCE_MUL(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[15:0] OR src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] OR src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_OR(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_OR(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[15:0] OR src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] OR src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_OR(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_OR(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[15:0] OR src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] OR src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_OR(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_OR(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[15:0] OR src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] OR src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_OR(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_OR(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[7:0] OR src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] OR src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_OR(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_OR(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[7:0] OR src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] OR src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_OR(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_OR(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[7:0] OR src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] OR src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_OR(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_OR(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[7:0] OR src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] OR src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_OR(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_OR(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[15:0] AND src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] AND src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_AND(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_AND(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[15:0] AND src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] AND src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_AND(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0xFFFF + FI +ENDFOR +dst[15:0] := REDUCE_AND(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[15:0] AND src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] AND src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_AND(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_AND(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[15:0] AND src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] AND src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_AND(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0xFFFF + FI +ENDFOR +dst[15:0] := REDUCE_AND(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[7:0] AND src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] AND src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_AND(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_AND(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[7:0] AND src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] AND src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_AND(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0xFF + FI +ENDFOR +dst[7:0] := REDUCE_AND(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[7:0] AND src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] AND src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_AND(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_AND(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[7:0] AND src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] AND src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_AND(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0xFF + FI +ENDFOR +dst[7:0] := REDUCE_AND(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed signed 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MAX(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := Int16(-0x8000) + FI +ENDFOR +dst[15:0] := REDUCE_MAX(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MAX(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := Int16(-0x8000) + FI +ENDFOR +dst[15:0] := REDUCE_MAX(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MAX(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := Int8(-0x80) + FI +ENDFOR +dst[7:0] := REDUCE_MAX(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MAX(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := Int8(-0x80) + FI +ENDFOR +dst[7:0] := REDUCE_MAX(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MAX(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_MAX(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MAX(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_MAX(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MAX(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_MAX(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MAX(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_MAX(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MIN(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := Int16(0x7FFF) + FI +ENDFOR +dst[15:0] := REDUCE_MIN(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MIN(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := Int16(0x7FFF) + FI +ENDFOR +dst[15:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MIN(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := Int8(0x7F) + FI +ENDFOR +dst[7:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MIN(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := Int8(0x7F) + FI +ENDFOR +dst[7:0] := REDUCE_MIN(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MIN(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0xFFFF + FI +ENDFOR +dst[15:0] := REDUCE_MIN(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MIN(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0xFFFF + FI +ENDFOR +dst[15:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MIN(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0xFF + FI +ENDFOR +dst[7:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MIN(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0xFF + FI +ENDFOR +dst[7:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Unpack and interleave 32 bits from masks "a" and "b", and store the 64-bit result in "dst". + +dst[31:0] := b[31:0] +dst[63:32] := a[31:0] +dst[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 16 bits from masks "a" and "b", and store the 32-bit result in "dst". + +dst[15:0] := b[15:0] +dst[31:16] := a[15:0] +dst[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 3 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 7 + i := j*64 + dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 3 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 7 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 3 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 7 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". + +FOR j := 0 to 3 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + dst[i+127:i] := tmp[127:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + tmp_dst[i+127:i] := tmp[127:0] +ENDFOR +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + tmp_dst[i+127:i] := tmp[127:0] +ENDFOR +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + off := 16*idx[i+4:i] + dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := idx[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + off := 16*idx[i+4:i] + dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + off := 16*idx[i+4:i] + dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + off := 16*idx[i+4:i] + dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + id := idx[i+4:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + id := idx[i+4:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + id := idx[i+4:i]*16 + dst[i+15:i] := a[id+15:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a". + +FOR j := 0 to 63 + i := j*8 + IF a[i+7] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := 0xFF + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := 0xFFFF + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a". + +FOR j := 0 to 31 + i := j*16 + IF a[i+15] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". + +FOR j := 0 to 63 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +FOR j := 0 to 7 + i := j*64 + dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \ + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56] + dst[i+63:i+16] := 0 +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 8-bit integers in "a" within 128-bit lanes using the control in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[5:0] := b[i+3:i] + (j & 0x30) + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[5:0] := b[i+3:i] + (j & 0x30) + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Swizzle +
+ + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[5:0] := b[i+3:i] + (j & 0x30) + dst[i+7:i] := a[index*8+7:index*8] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +tmp_dst[191:128] := a[191:128] +tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +tmp_dst[319:256] := a[319:256] +tmp_dst[335:320] := (a >> (imm8[1:0] * 16))[335:320] +tmp_dst[351:336] := (a >> (imm8[3:2] * 16))[335:320] +tmp_dst[367:352] := (a >> (imm8[5:4] * 16))[335:320] +tmp_dst[383:368] := (a >> (imm8[7:6] * 16))[335:320] +tmp_dst[447:384] := a[447:384] +tmp_dst[463:448] := (a >> (imm8[1:0] * 16))[463:448] +tmp_dst[479:464] := (a >> (imm8[3:2] * 16))[463:448] +tmp_dst[495:480] := (a >> (imm8[5:4] * 16))[463:448] +tmp_dst[511:496] := (a >> (imm8[7:6] * 16))[463:448] +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +tmp_dst[191:128] := a[191:128] +tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +tmp_dst[319:256] := a[319:256] +tmp_dst[335:320] := (a >> (imm8[1:0] * 16))[335:320] +tmp_dst[351:336] := (a >> (imm8[3:2] * 16))[335:320] +tmp_dst[367:352] := (a >> (imm8[5:4] * 16))[335:320] +tmp_dst[383:368] := (a >> (imm8[7:6] * 16))[335:320] +tmp_dst[447:384] := a[447:384] +tmp_dst[463:448] := (a >> (imm8[1:0] * 16))[463:448] +tmp_dst[479:464] := (a >> (imm8[3:2] * 16))[463:448] +tmp_dst[495:480] := (a >> (imm8[5:4] * 16))[463:448] +tmp_dst[511:496] := (a >> (imm8[7:6] * 16))[463:448] +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst". + +dst[63:0] := a[63:0] +dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +dst[191:128] := a[191:128] +dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +dst[319:256] := a[319:256] +dst[335:320] := (a >> (imm8[1:0] * 16))[335:320] +dst[351:336] := (a >> (imm8[3:2] * 16))[335:320] +dst[367:352] := (a >> (imm8[5:4] * 16))[335:320] +dst[383:368] := (a >> (imm8[7:6] * 16))[335:320] +dst[447:384] := a[447:384] +dst[463:448] := (a >> (imm8[1:0] * 16))[463:448] +dst[479:464] := (a >> (imm8[3:2] * 16))[463:448] +dst[495:480] := (a >> (imm8[5:4] * 16))[463:448] +dst[511:496] := (a >> (imm8[7:6] * 16))[463:448] +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +tmp_dst[255:192] := a[255:192] +tmp_dst[271:256] := (a >> (imm8[1:0] * 16))[271:256] +tmp_dst[287:272] := (a >> (imm8[3:2] * 16))[271:256] +tmp_dst[303:288] := (a >> (imm8[5:4] * 16))[271:256] +tmp_dst[319:304] := (a >> (imm8[7:6] * 16))[271:256] +tmp_dst[383:320] := a[383:320] +tmp_dst[399:384] := (a >> (imm8[1:0] * 16))[399:384] +tmp_dst[415:400] := (a >> (imm8[3:2] * 16))[399:384] +tmp_dst[431:416] := (a >> (imm8[5:4] * 16))[399:384] +tmp_dst[447:432] := (a >> (imm8[7:6] * 16))[399:384] +tmp_dst[511:448] := a[511:448] +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +tmp_dst[255:192] := a[255:192] +tmp_dst[271:256] := (a >> (imm8[1:0] * 16))[271:256] +tmp_dst[287:272] := (a >> (imm8[3:2] * 16))[271:256] +tmp_dst[303:288] := (a >> (imm8[5:4] * 16))[271:256] +tmp_dst[319:304] := (a >> (imm8[7:6] * 16))[271:256] +tmp_dst[383:320] := a[383:320] +tmp_dst[399:384] := (a >> (imm8[1:0] * 16))[399:384] +tmp_dst[415:400] := (a >> (imm8[3:2] * 16))[399:384] +tmp_dst[431:416] := (a >> (imm8[5:4] * 16))[399:384] +tmp_dst[447:432] := (a >> (imm8[7:6] * 16))[399:384] +tmp_dst[511:448] := a[511:448] +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst". + +dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +dst[127:64] := a[127:64] +dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +dst[255:192] := a[255:192] +dst[271:256] := (a >> (imm8[1:0] * 16))[271:256] +dst[287:272] := (a >> (imm8[3:2] * 16))[271:256] +dst[303:288] := (a >> (imm8[5:4] * 16))[271:256] +dst[319:304] := (a >> (imm8[7:6] * 16))[271:256] +dst[383:320] := a[383:320] +dst[399:384] := (a >> (imm8[1:0] * 16))[399:384] +dst[415:400] := (a >> (imm8[3:2] * 16))[399:384] +dst[431:416] := (a >> (imm8[5:4] * 16))[399:384] +dst[447:432] := (a >> (imm8[7:6] * 16))[399:384] +dst[511:448] := a[511:448] +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + + Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + + + Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + + Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 32 packed 16-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 64 packed 8-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + Load 32-bit mask from memory into "k". + +k[31:0] := MEM[mem_addr+31:mem_addr] + + + AVX512BW +
immintrin.h
+ Load +
+ + + + Load 64-bit mask from memory into "k". + +k[63:0] := MEM[mem_addr+63:mem_addr] + + + AVX512BW +
immintrin.h
+ Load +
+ + + + + + Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Move +
+ + + + + Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Move +
+ + + + + + Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Move +
+ + + + + Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Move +
+ + + + + + Store packed 16-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*16 + IF k[j] + MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i] + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + + Store packed 8-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 63 + i := j*8 + IF k[j] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 32 packed 16-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 64 packed 8-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + Store 32-bit mask from "a" into memory. + +MEM[mem_addr+31:mem_addr] := a[31:0] + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + Store 64-bit mask from "a" into memory. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + AVX512BW +
immintrin.h
+ Store +
+ + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := ABS(a[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ABS(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +tmp_dst[143:128] := Saturate16(a[159:128]) +tmp_dst[159:144] := Saturate16(a[191:160]) +tmp_dst[175:160] := Saturate16(a[223:192]) +tmp_dst[191:176] := Saturate16(a[255:224]) +tmp_dst[207:192] := Saturate16(b[159:128]) +tmp_dst[223:208] := Saturate16(b[191:160]) +tmp_dst[239:224] := Saturate16(b[223:192]) +tmp_dst[255:240] := Saturate16(b[255:224]) +tmp_dst[271:256] := Saturate16(a[287:256]) +tmp_dst[287:272] := Saturate16(a[319:288]) +tmp_dst[303:288] := Saturate16(a[351:320]) +tmp_dst[319:304] := Saturate16(a[383:352]) +tmp_dst[335:320] := Saturate16(b[287:256]) +tmp_dst[351:336] := Saturate16(b[319:288]) +tmp_dst[367:352] := Saturate16(b[351:320]) +tmp_dst[383:368] := Saturate16(b[383:352]) +tmp_dst[399:384] := Saturate16(a[415:384]) +tmp_dst[415:400] := Saturate16(a[447:416]) +tmp_dst[431:416] := Saturate16(a[479:448]) +tmp_dst[447:432] := Saturate16(a[511:480]) +tmp_dst[463:448] := Saturate16(b[415:384]) +tmp_dst[479:464] := Saturate16(b[447:416]) +tmp_dst[495:480] := Saturate16(b[479:448]) +tmp_dst[511:496] := Saturate16(b[511:480]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +tmp_dst[143:128] := Saturate16(a[159:128]) +tmp_dst[159:144] := Saturate16(a[191:160]) +tmp_dst[175:160] := Saturate16(a[223:192]) +tmp_dst[191:176] := Saturate16(a[255:224]) +tmp_dst[207:192] := Saturate16(b[159:128]) +tmp_dst[223:208] := Saturate16(b[191:160]) +tmp_dst[239:224] := Saturate16(b[223:192]) +tmp_dst[255:240] := Saturate16(b[255:224]) +tmp_dst[271:256] := Saturate16(a[287:256]) +tmp_dst[287:272] := Saturate16(a[319:288]) +tmp_dst[303:288] := Saturate16(a[351:320]) +tmp_dst[319:304] := Saturate16(a[383:352]) +tmp_dst[335:320] := Saturate16(b[287:256]) +tmp_dst[351:336] := Saturate16(b[319:288]) +tmp_dst[367:352] := Saturate16(b[351:320]) +tmp_dst[383:368] := Saturate16(b[383:352]) +tmp_dst[399:384] := Saturate16(a[415:384]) +tmp_dst[415:400] := Saturate16(a[447:416]) +tmp_dst[431:416] := Saturate16(a[479:448]) +tmp_dst[447:432] := Saturate16(a[511:480]) +tmp_dst[463:448] := Saturate16(b[415:384]) +tmp_dst[479:464] := Saturate16(b[447:416]) +tmp_dst[495:480] := Saturate16(b[479:448]) +tmp_dst[511:496] := Saturate16(b[511:480]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(a[95:64]) +dst[63:48] := Saturate16(a[127:96]) +dst[79:64] := Saturate16(b[31:0]) +dst[95:80] := Saturate16(b[63:32]) +dst[111:96] := Saturate16(b[95:64]) +dst[127:112] := Saturate16(b[127:96]) +dst[143:128] := Saturate16(a[159:128]) +dst[159:144] := Saturate16(a[191:160]) +dst[175:160] := Saturate16(a[223:192]) +dst[191:176] := Saturate16(a[255:224]) +dst[207:192] := Saturate16(b[159:128]) +dst[223:208] := Saturate16(b[191:160]) +dst[239:224] := Saturate16(b[223:192]) +dst[255:240] := Saturate16(b[255:224]) +dst[271:256] := Saturate16(a[287:256]) +dst[287:272] := Saturate16(a[319:288]) +dst[303:288] := Saturate16(a[351:320]) +dst[319:304] := Saturate16(a[383:352]) +dst[335:320] := Saturate16(b[287:256]) +dst[351:336] := Saturate16(b[319:288]) +dst[367:352] := Saturate16(b[351:320]) +dst[383:368] := Saturate16(b[383:352]) +dst[399:384] := Saturate16(a[415:384]) +dst[415:400] := Saturate16(a[447:416]) +dst[431:416] := Saturate16(a[479:448]) +dst[447:432] := Saturate16(a[511:480]) +dst[463:448] := Saturate16(b[415:384]) +dst[479:464] := Saturate16(b[447:416]) +dst[495:480] := Saturate16(b[479:448]) +dst[511:496] := Saturate16(b[511:480]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +tmp_dst[135:128] := Saturate8(a[143:128]) +tmp_dst[143:136] := Saturate8(a[159:144]) +tmp_dst[151:144] := Saturate8(a[175:160]) +tmp_dst[159:152] := Saturate8(a[191:176]) +tmp_dst[167:160] := Saturate8(a[207:192]) +tmp_dst[175:168] := Saturate8(a[223:208]) +tmp_dst[183:176] := Saturate8(a[239:224]) +tmp_dst[191:184] := Saturate8(a[255:240]) +tmp_dst[199:192] := Saturate8(b[143:128]) +tmp_dst[207:200] := Saturate8(b[159:144]) +tmp_dst[215:208] := Saturate8(b[175:160]) +tmp_dst[223:216] := Saturate8(b[191:176]) +tmp_dst[231:224] := Saturate8(b[207:192]) +tmp_dst[239:232] := Saturate8(b[223:208]) +tmp_dst[247:240] := Saturate8(b[239:224]) +tmp_dst[255:248] := Saturate8(b[255:240]) +tmp_dst[263:256] := Saturate8(a[271:256]) +tmp_dst[271:264] := Saturate8(a[287:272]) +tmp_dst[279:272] := Saturate8(a[303:288]) +tmp_dst[287:280] := Saturate8(a[319:304]) +tmp_dst[295:288] := Saturate8(a[335:320]) +tmp_dst[303:296] := Saturate8(a[351:336]) +tmp_dst[311:304] := Saturate8(a[367:352]) +tmp_dst[319:312] := Saturate8(a[383:368]) +tmp_dst[327:320] := Saturate8(b[271:256]) +tmp_dst[335:328] := Saturate8(b[287:272]) +tmp_dst[343:336] := Saturate8(b[303:288]) +tmp_dst[351:344] := Saturate8(b[319:304]) +tmp_dst[359:352] := Saturate8(b[335:320]) +tmp_dst[367:360] := Saturate8(b[351:336]) +tmp_dst[375:368] := Saturate8(b[367:352]) +tmp_dst[383:376] := Saturate8(b[383:368]) +tmp_dst[391:384] := Saturate8(a[399:384]) +tmp_dst[399:392] := Saturate8(a[415:400]) +tmp_dst[407:400] := Saturate8(a[431:416]) +tmp_dst[415:408] := Saturate8(a[447:432]) +tmp_dst[423:416] := Saturate8(a[463:448]) +tmp_dst[431:424] := Saturate8(a[479:464]) +tmp_dst[439:432] := Saturate8(a[495:480]) +tmp_dst[447:440] := Saturate8(a[511:496]) +tmp_dst[455:448] := Saturate8(b[399:384]) +tmp_dst[463:456] := Saturate8(b[415:400]) +tmp_dst[471:464] := Saturate8(b[431:416]) +tmp_dst[479:472] := Saturate8(b[447:432]) +tmp_dst[487:480] := Saturate8(b[463:448]) +tmp_dst[495:488] := Saturate8(b[479:464]) +tmp_dst[503:496] := Saturate8(b[495:480]) +tmp_dst[511:504] := Saturate8(b[511:496]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +tmp_dst[135:128] := Saturate8(a[143:128]) +tmp_dst[143:136] := Saturate8(a[159:144]) +tmp_dst[151:144] := Saturate8(a[175:160]) +tmp_dst[159:152] := Saturate8(a[191:176]) +tmp_dst[167:160] := Saturate8(a[207:192]) +tmp_dst[175:168] := Saturate8(a[223:208]) +tmp_dst[183:176] := Saturate8(a[239:224]) +tmp_dst[191:184] := Saturate8(a[255:240]) +tmp_dst[199:192] := Saturate8(b[143:128]) +tmp_dst[207:200] := Saturate8(b[159:144]) +tmp_dst[215:208] := Saturate8(b[175:160]) +tmp_dst[223:216] := Saturate8(b[191:176]) +tmp_dst[231:224] := Saturate8(b[207:192]) +tmp_dst[239:232] := Saturate8(b[223:208]) +tmp_dst[247:240] := Saturate8(b[239:224]) +tmp_dst[255:248] := Saturate8(b[255:240]) +tmp_dst[263:256] := Saturate8(a[271:256]) +tmp_dst[271:264] := Saturate8(a[287:272]) +tmp_dst[279:272] := Saturate8(a[303:288]) +tmp_dst[287:280] := Saturate8(a[319:304]) +tmp_dst[295:288] := Saturate8(a[335:320]) +tmp_dst[303:296] := Saturate8(a[351:336]) +tmp_dst[311:304] := Saturate8(a[367:352]) +tmp_dst[319:312] := Saturate8(a[383:368]) +tmp_dst[327:320] := Saturate8(b[271:256]) +tmp_dst[335:328] := Saturate8(b[287:272]) +tmp_dst[343:336] := Saturate8(b[303:288]) +tmp_dst[351:344] := Saturate8(b[319:304]) +tmp_dst[359:352] := Saturate8(b[335:320]) +tmp_dst[367:360] := Saturate8(b[351:336]) +tmp_dst[375:368] := Saturate8(b[367:352]) +tmp_dst[383:376] := Saturate8(b[383:368]) +tmp_dst[391:384] := Saturate8(a[399:384]) +tmp_dst[399:392] := Saturate8(a[415:400]) +tmp_dst[407:400] := Saturate8(a[431:416]) +tmp_dst[415:408] := Saturate8(a[447:432]) +tmp_dst[423:416] := Saturate8(a[463:448]) +tmp_dst[431:424] := Saturate8(a[479:464]) +tmp_dst[439:432] := Saturate8(a[495:480]) +tmp_dst[447:440] := Saturate8(a[511:496]) +tmp_dst[455:448] := Saturate8(b[399:384]) +tmp_dst[463:456] := Saturate8(b[415:400]) +tmp_dst[471:464] := Saturate8(b[431:416]) +tmp_dst[479:472] := Saturate8(b[447:432]) +tmp_dst[487:480] := Saturate8(b[463:448]) +tmp_dst[495:488] := Saturate8(b[479:464]) +tmp_dst[503:496] := Saturate8(b[495:480]) +tmp_dst[511:504] := Saturate8(b[511:496]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(a[79:64]) +dst[47:40] := Saturate8(a[95:80]) +dst[55:48] := Saturate8(a[111:96]) +dst[63:56] := Saturate8(a[127:112]) +dst[71:64] := Saturate8(b[15:0]) +dst[79:72] := Saturate8(b[31:16]) +dst[87:80] := Saturate8(b[47:32]) +dst[95:88] := Saturate8(b[63:48]) +dst[103:96] := Saturate8(b[79:64]) +dst[111:104] := Saturate8(b[95:80]) +dst[119:112] := Saturate8(b[111:96]) +dst[127:120] := Saturate8(b[127:112]) +dst[135:128] := Saturate8(a[143:128]) +dst[143:136] := Saturate8(a[159:144]) +dst[151:144] := Saturate8(a[175:160]) +dst[159:152] := Saturate8(a[191:176]) +dst[167:160] := Saturate8(a[207:192]) +dst[175:168] := Saturate8(a[223:208]) +dst[183:176] := Saturate8(a[239:224]) +dst[191:184] := Saturate8(a[255:240]) +dst[199:192] := Saturate8(b[143:128]) +dst[207:200] := Saturate8(b[159:144]) +dst[215:208] := Saturate8(b[175:160]) +dst[223:216] := Saturate8(b[191:176]) +dst[231:224] := Saturate8(b[207:192]) +dst[239:232] := Saturate8(b[223:208]) +dst[247:240] := Saturate8(b[239:224]) +dst[255:248] := Saturate8(b[255:240]) +dst[263:256] := Saturate8(a[271:256]) +dst[271:264] := Saturate8(a[287:272]) +dst[279:272] := Saturate8(a[303:288]) +dst[287:280] := Saturate8(a[319:304]) +dst[295:288] := Saturate8(a[335:320]) +dst[303:296] := Saturate8(a[351:336]) +dst[311:304] := Saturate8(a[367:352]) +dst[319:312] := Saturate8(a[383:368]) +dst[327:320] := Saturate8(b[271:256]) +dst[335:328] := Saturate8(b[287:272]) +dst[343:336] := Saturate8(b[303:288]) +dst[351:344] := Saturate8(b[319:304]) +dst[359:352] := Saturate8(b[335:320]) +dst[367:360] := Saturate8(b[351:336]) +dst[375:368] := Saturate8(b[367:352]) +dst[383:376] := Saturate8(b[383:368]) +dst[391:384] := Saturate8(a[399:384]) +dst[399:392] := Saturate8(a[415:400]) +dst[407:400] := Saturate8(a[431:416]) +dst[415:408] := Saturate8(a[447:432]) +dst[423:416] := Saturate8(a[463:448]) +dst[431:424] := Saturate8(a[479:464]) +dst[439:432] := Saturate8(a[495:480]) +dst[447:440] := Saturate8(a[511:496]) +dst[455:448] := Saturate8(b[399:384]) +dst[463:456] := Saturate8(b[415:400]) +dst[471:464] := Saturate8(b[431:416]) +dst[479:472] := Saturate8(b[447:432]) +dst[487:480] := Saturate8(b[463:448]) +dst[495:488] := Saturate8(b[479:464]) +dst[503:496] := Saturate8(b[495:480]) +dst[511:504] := Saturate8(b[511:496]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +tmp_dst[143:128] := SaturateU16(a[159:128]) +tmp_dst[159:144] := SaturateU16(a[191:160]) +tmp_dst[175:160] := SaturateU16(a[223:192]) +tmp_dst[191:176] := SaturateU16(a[255:224]) +tmp_dst[207:192] := SaturateU16(b[159:128]) +tmp_dst[223:208] := SaturateU16(b[191:160]) +tmp_dst[239:224] := SaturateU16(b[223:192]) +tmp_dst[255:240] := SaturateU16(b[255:224]) +tmp_dst[271:256] := SaturateU16(a[287:256]) +tmp_dst[287:272] := SaturateU16(a[319:288]) +tmp_dst[303:288] := SaturateU16(a[351:320]) +tmp_dst[319:304] := SaturateU16(a[383:352]) +tmp_dst[335:320] := SaturateU16(b[287:256]) +tmp_dst[351:336] := SaturateU16(b[319:288]) +tmp_dst[367:352] := SaturateU16(b[351:320]) +tmp_dst[383:368] := SaturateU16(b[383:352]) +tmp_dst[399:384] := SaturateU16(a[415:384]) +tmp_dst[415:400] := SaturateU16(a[447:416]) +tmp_dst[431:416] := SaturateU16(a[479:448]) +tmp_dst[447:432] := SaturateU16(a[511:480]) +tmp_dst[463:448] := SaturateU16(b[415:384]) +tmp_dst[479:464] := SaturateU16(b[447:416]) +tmp_dst[495:480] := SaturateU16(b[479:448]) +tmp_dst[511:496] := SaturateU16(b[511:480]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +tmp_dst[143:128] := SaturateU16(a[159:128]) +tmp_dst[159:144] := SaturateU16(a[191:160]) +tmp_dst[175:160] := SaturateU16(a[223:192]) +tmp_dst[191:176] := SaturateU16(a[255:224]) +tmp_dst[207:192] := SaturateU16(b[159:128]) +tmp_dst[223:208] := SaturateU16(b[191:160]) +tmp_dst[239:224] := SaturateU16(b[223:192]) +tmp_dst[255:240] := SaturateU16(b[255:224]) +tmp_dst[271:256] := SaturateU16(a[287:256]) +tmp_dst[287:272] := SaturateU16(a[319:288]) +tmp_dst[303:288] := SaturateU16(a[351:320]) +tmp_dst[319:304] := SaturateU16(a[383:352]) +tmp_dst[335:320] := SaturateU16(b[287:256]) +tmp_dst[351:336] := SaturateU16(b[319:288]) +tmp_dst[367:352] := SaturateU16(b[351:320]) +tmp_dst[383:368] := SaturateU16(b[383:352]) +tmp_dst[399:384] := SaturateU16(a[415:384]) +tmp_dst[415:400] := SaturateU16(a[447:416]) +tmp_dst[431:416] := SaturateU16(a[479:448]) +tmp_dst[447:432] := SaturateU16(a[511:480]) +tmp_dst[463:448] := SaturateU16(b[415:384]) +tmp_dst[479:464] := SaturateU16(b[447:416]) +tmp_dst[495:480] := SaturateU16(b[479:448]) +tmp_dst[511:496] := SaturateU16(b[511:480]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". + +dst[15:0] := SaturateU16(a[31:0]) +dst[31:16] := SaturateU16(a[63:32]) +dst[47:32] := SaturateU16(a[95:64]) +dst[63:48] := SaturateU16(a[127:96]) +dst[79:64] := SaturateU16(b[31:0]) +dst[95:80] := SaturateU16(b[63:32]) +dst[111:96] := SaturateU16(b[95:64]) +dst[127:112] := SaturateU16(b[127:96]) +dst[143:128] := SaturateU16(a[159:128]) +dst[159:144] := SaturateU16(a[191:160]) +dst[175:160] := SaturateU16(a[223:192]) +dst[191:176] := SaturateU16(a[255:224]) +dst[207:192] := SaturateU16(b[159:128]) +dst[223:208] := SaturateU16(b[191:160]) +dst[239:224] := SaturateU16(b[223:192]) +dst[255:240] := SaturateU16(b[255:224]) +dst[271:256] := SaturateU16(a[287:256]) +dst[287:272] := SaturateU16(a[319:288]) +dst[303:288] := SaturateU16(a[351:320]) +dst[319:304] := SaturateU16(a[383:352]) +dst[335:320] := SaturateU16(b[287:256]) +dst[351:336] := SaturateU16(b[319:288]) +dst[367:352] := SaturateU16(b[351:320]) +dst[383:368] := SaturateU16(b[383:352]) +dst[399:384] := SaturateU16(a[415:384]) +dst[415:400] := SaturateU16(a[447:416]) +dst[431:416] := SaturateU16(a[479:448]) +dst[447:432] := SaturateU16(a[511:480]) +dst[463:448] := SaturateU16(b[415:384]) +dst[479:464] := SaturateU16(b[447:416]) +dst[495:480] := SaturateU16(b[479:448]) +dst[511:496] := SaturateU16(b[511:480]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +tmp_dst[135:128] := SaturateU8(a[143:128]) +tmp_dst[143:136] := SaturateU8(a[159:144]) +tmp_dst[151:144] := SaturateU8(a[175:160]) +tmp_dst[159:152] := SaturateU8(a[191:176]) +tmp_dst[167:160] := SaturateU8(a[207:192]) +tmp_dst[175:168] := SaturateU8(a[223:208]) +tmp_dst[183:176] := SaturateU8(a[239:224]) +tmp_dst[191:184] := SaturateU8(a[255:240]) +tmp_dst[199:192] := SaturateU8(b[143:128]) +tmp_dst[207:200] := SaturateU8(b[159:144]) +tmp_dst[215:208] := SaturateU8(b[175:160]) +tmp_dst[223:216] := SaturateU8(b[191:176]) +tmp_dst[231:224] := SaturateU8(b[207:192]) +tmp_dst[239:232] := SaturateU8(b[223:208]) +tmp_dst[247:240] := SaturateU8(b[239:224]) +tmp_dst[255:248] := SaturateU8(b[255:240]) +tmp_dst[263:256] := SaturateU8(a[271:256]) +tmp_dst[271:264] := SaturateU8(a[287:272]) +tmp_dst[279:272] := SaturateU8(a[303:288]) +tmp_dst[287:280] := SaturateU8(a[319:304]) +tmp_dst[295:288] := SaturateU8(a[335:320]) +tmp_dst[303:296] := SaturateU8(a[351:336]) +tmp_dst[311:304] := SaturateU8(a[367:352]) +tmp_dst[319:312] := SaturateU8(a[383:368]) +tmp_dst[327:320] := SaturateU8(b[271:256]) +tmp_dst[335:328] := SaturateU8(b[287:272]) +tmp_dst[343:336] := SaturateU8(b[303:288]) +tmp_dst[351:344] := SaturateU8(b[319:304]) +tmp_dst[359:352] := SaturateU8(b[335:320]) +tmp_dst[367:360] := SaturateU8(b[351:336]) +tmp_dst[375:368] := SaturateU8(b[367:352]) +tmp_dst[383:376] := SaturateU8(b[383:368]) +tmp_dst[391:384] := SaturateU8(a[399:384]) +tmp_dst[399:392] := SaturateU8(a[415:400]) +tmp_dst[407:400] := SaturateU8(a[431:416]) +tmp_dst[415:408] := SaturateU8(a[447:432]) +tmp_dst[423:416] := SaturateU8(a[463:448]) +tmp_dst[431:424] := SaturateU8(a[479:464]) +tmp_dst[439:432] := SaturateU8(a[495:480]) +tmp_dst[447:440] := SaturateU8(a[511:496]) +tmp_dst[455:448] := SaturateU8(b[399:384]) +tmp_dst[463:456] := SaturateU8(b[415:400]) +tmp_dst[471:464] := SaturateU8(b[431:416]) +tmp_dst[479:472] := SaturateU8(b[447:432]) +tmp_dst[487:480] := SaturateU8(b[463:448]) +tmp_dst[495:488] := SaturateU8(b[479:464]) +tmp_dst[503:496] := SaturateU8(b[495:480]) +tmp_dst[511:504] := SaturateU8(b[511:496]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +tmp_dst[135:128] := SaturateU8(a[143:128]) +tmp_dst[143:136] := SaturateU8(a[159:144]) +tmp_dst[151:144] := SaturateU8(a[175:160]) +tmp_dst[159:152] := SaturateU8(a[191:176]) +tmp_dst[167:160] := SaturateU8(a[207:192]) +tmp_dst[175:168] := SaturateU8(a[223:208]) +tmp_dst[183:176] := SaturateU8(a[239:224]) +tmp_dst[191:184] := SaturateU8(a[255:240]) +tmp_dst[199:192] := SaturateU8(b[143:128]) +tmp_dst[207:200] := SaturateU8(b[159:144]) +tmp_dst[215:208] := SaturateU8(b[175:160]) +tmp_dst[223:216] := SaturateU8(b[191:176]) +tmp_dst[231:224] := SaturateU8(b[207:192]) +tmp_dst[239:232] := SaturateU8(b[223:208]) +tmp_dst[247:240] := SaturateU8(b[239:224]) +tmp_dst[255:248] := SaturateU8(b[255:240]) +tmp_dst[263:256] := SaturateU8(a[271:256]) +tmp_dst[271:264] := SaturateU8(a[287:272]) +tmp_dst[279:272] := SaturateU8(a[303:288]) +tmp_dst[287:280] := SaturateU8(a[319:304]) +tmp_dst[295:288] := SaturateU8(a[335:320]) +tmp_dst[303:296] := SaturateU8(a[351:336]) +tmp_dst[311:304] := SaturateU8(a[367:352]) +tmp_dst[319:312] := SaturateU8(a[383:368]) +tmp_dst[327:320] := SaturateU8(b[271:256]) +tmp_dst[335:328] := SaturateU8(b[287:272]) +tmp_dst[343:336] := SaturateU8(b[303:288]) +tmp_dst[351:344] := SaturateU8(b[319:304]) +tmp_dst[359:352] := SaturateU8(b[335:320]) +tmp_dst[367:360] := SaturateU8(b[351:336]) +tmp_dst[375:368] := SaturateU8(b[367:352]) +tmp_dst[383:376] := SaturateU8(b[383:368]) +tmp_dst[391:384] := SaturateU8(a[399:384]) +tmp_dst[399:392] := SaturateU8(a[415:400]) +tmp_dst[407:400] := SaturateU8(a[431:416]) +tmp_dst[415:408] := SaturateU8(a[447:432]) +tmp_dst[423:416] := SaturateU8(a[463:448]) +tmp_dst[431:424] := SaturateU8(a[479:464]) +tmp_dst[439:432] := SaturateU8(a[495:480]) +tmp_dst[447:440] := SaturateU8(a[511:496]) +tmp_dst[455:448] := SaturateU8(b[399:384]) +tmp_dst[463:456] := SaturateU8(b[415:400]) +tmp_dst[471:464] := SaturateU8(b[431:416]) +tmp_dst[479:472] := SaturateU8(b[447:432]) +tmp_dst[487:480] := SaturateU8(b[463:448]) +tmp_dst[495:488] := SaturateU8(b[479:464]) +tmp_dst[503:496] := SaturateU8(b[495:480]) +tmp_dst[511:504] := SaturateU8(b[511:496]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(a[79:64]) +dst[47:40] := SaturateU8(a[95:80]) +dst[55:48] := SaturateU8(a[111:96]) +dst[63:56] := SaturateU8(a[127:112]) +dst[71:64] := SaturateU8(b[15:0]) +dst[79:72] := SaturateU8(b[31:16]) +dst[87:80] := SaturateU8(b[47:32]) +dst[95:88] := SaturateU8(b[63:48]) +dst[103:96] := SaturateU8(b[79:64]) +dst[111:104] := SaturateU8(b[95:80]) +dst[119:112] := SaturateU8(b[111:96]) +dst[127:120] := SaturateU8(b[127:112]) +dst[135:128] := SaturateU8(a[143:128]) +dst[143:136] := SaturateU8(a[159:144]) +dst[151:144] := SaturateU8(a[175:160]) +dst[159:152] := SaturateU8(a[191:176]) +dst[167:160] := SaturateU8(a[207:192]) +dst[175:168] := SaturateU8(a[223:208]) +dst[183:176] := SaturateU8(a[239:224]) +dst[191:184] := SaturateU8(a[255:240]) +dst[199:192] := SaturateU8(b[143:128]) +dst[207:200] := SaturateU8(b[159:144]) +dst[215:208] := SaturateU8(b[175:160]) +dst[223:216] := SaturateU8(b[191:176]) +dst[231:224] := SaturateU8(b[207:192]) +dst[239:232] := SaturateU8(b[223:208]) +dst[247:240] := SaturateU8(b[239:224]) +dst[255:248] := SaturateU8(b[255:240]) +dst[263:256] := SaturateU8(a[271:256]) +dst[271:264] := SaturateU8(a[287:272]) +dst[279:272] := SaturateU8(a[303:288]) +dst[287:280] := SaturateU8(a[319:304]) +dst[295:288] := SaturateU8(a[335:320]) +dst[303:296] := SaturateU8(a[351:336]) +dst[311:304] := SaturateU8(a[367:352]) +dst[319:312] := SaturateU8(a[383:368]) +dst[327:320] := SaturateU8(b[271:256]) +dst[335:328] := SaturateU8(b[287:272]) +dst[343:336] := SaturateU8(b[303:288]) +dst[351:344] := SaturateU8(b[319:304]) +dst[359:352] := SaturateU8(b[335:320]) +dst[367:360] := SaturateU8(b[351:336]) +dst[375:368] := SaturateU8(b[367:352]) +dst[383:376] := SaturateU8(b[383:368]) +dst[391:384] := SaturateU8(a[399:384]) +dst[399:392] := SaturateU8(a[415:400]) +dst[407:400] := SaturateU8(a[431:416]) +dst[415:408] := SaturateU8(a[447:432]) +dst[423:416] := SaturateU8(a[463:448]) +dst[431:424] := SaturateU8(a[479:464]) +dst[439:432] := SaturateU8(a[495:480]) +dst[447:440] := SaturateU8(a[511:496]) +dst[455:448] := SaturateU8(b[399:384]) +dst[463:456] := SaturateU8(b[415:400]) +dst[471:464] := SaturateU8(b[431:416]) +dst[479:472] := SaturateU8(b[447:432]) +dst[487:480] := SaturateU8(b[463:448]) +dst[495:488] := SaturateU8(b[479:464]) +dst[503:496] := SaturateU8(b[495:480]) +dst[511:504] := SaturateU8(b[511:496]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + dst[l+7:l] := Saturate8(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + l := j*16 + dst[l+15:l] := SignExtend16(a[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + dst[l+7:l] := SaturateU8(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + dst[l+7:l] := Truncate8(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + l := j*16 + dst[l+15:l] := ZeroExtend16(a[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Set +
+ + + + + Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Set +
+ + + + + + Broadcast 16-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Set +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Set +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 63 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 31 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 63 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 31 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) +dst[255:128] := a[255:128] << (tmp*8) +dst[383:256] := a[383:256] << (tmp*8) +dst[511:384] := a[511:384] << (tmp*8) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) +dst[255:128] := a[255:128] >> (tmp*8) +dst[383:256] := a[383:256] >> (tmp*8) +dst[511:384] := a[511:384] >> (tmp*8) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Add 32-bit masks in "a" and "b", and store the result in "k". + +k[31:0] := a[31:0] + b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Add 64-bit masks in "a" and "b", and store the result in "k". + +k[63:0] := a[63:0] + b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 32-bit masks "a" and "b", and store the result in "k". + +k[31:0] := a[31:0] AND b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 64-bit masks "a" and "b", and store the result in "k". + +k[63:0] := a[63:0] AND b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 32-bit masks "a" and then AND with "b", and store the result in "k". + +k[31:0] := (NOT a[31:0]) AND b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 64-bit masks "a" and then AND with "b", and store the result in "k". + +k[63:0] := (NOT a[63:0]) AND b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 32-bit mask "a", and store the result in "k". + +k[31:0] := NOT a[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 64-bit mask "a", and store the result in "k". + +k[63:0] := NOT a[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 32-bit masks "a" and "b", and store the result in "k". + +k[31:0] := a[31:0] OR b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 64-bit masks "a" and "b", and store the result in "k". + +k[63:0] := a[63:0] OR b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 32-bit masks "a" and "b", and store the result in "k". + +k[31:0] := NOT (a[31:0] XOR b[31:0]) +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 64-bit masks "a" and "b", and store the result in "k". + +k[63:0] := NOT (a[63:0] XOR b[63:0]) +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 32-bit masks "a" and "b", and store the result in "k". + +k[31:0] := a[31:0] XOR b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 64-bit masks "a" and "b", and store the result in "k". + +k[63:0] := a[63:0] XOR b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 32-bit mask "a" left by "count" while shifting in zeros, and store the least significant 32 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 31 + k[31:0] := a[31:0] << count[7:0] +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 64-bit mask "a" left by "count" while shifting in zeros, and store the least significant 64 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 63 + k[63:0] := a[63:0] << count[7:0] +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 32-bit mask "a" right by "count" while shifting in zeros, and store the least significant 32 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 31 + k[31:0] := a[31:0] >> count[7:0] +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 64-bit mask "a" right by "count" while shifting in zeros, and store the least significant 64 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 63 + k[63:0] := a[63:0] >> count[7:0] +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". + +tmp[31:0] := a[31:0] OR b[31:0] +IF tmp[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +IF tmp[31:0] == 0xFFFFFFFF + MEM[all_ones+7:all_ones] := 1 +ELSE + MEM[all_ones+7:all_ones] := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[31:0] := a[31:0] OR b[31:0] +IF tmp[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". + +tmp[31:0] := a[31:0] OR b[31:0] +IF tmp[31:0] == 0xFFFFFFFF + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". + +tmp[63:0] := a[63:0] OR b[63:0] +IF tmp[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +IF tmp[7:0] == 0xFFFFFFFFFFFFFFFF + MEM[all_ones+7:all_ones] := 1 +ELSE + MEM[all_ones+7:all_ones] := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[63:0] := a[63:0] OR b[63:0] +IF tmp[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". + +tmp[63:0] := a[63:0] OR b[63:0] +IF tmp[63:0] == 0xFFFFFFFFFFFFFFFF + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise AND of 32-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". + +tmp1[31:0] := a[31:0] AND b[31:0] +IF tmp1[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +tmp2[31:0] := (NOT a[31:0]) AND b[31:0] +IF tmp2[31:0] == 0x0 + MEM[and_not+7:and_not] := 1 +ELSE + MEM[and_not+7:and_not] := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 32-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". + +tmp[31:0] := a[31:0] AND b[31:0] +IF tmp[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 32-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[31:0] := (NOT a[31:0]) AND b[31:0] +IF tmp[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise AND of 64-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". + +tmp1[63:0] := a[63:0] AND b[63:0] +IF tmp1[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +tmp2[63:0] := (NOT a[63:0]) AND b[63:0] +IF tmp2[63:0] == 0x0 + MEM[and_not+7:and_not] := 1 +ELSE + MEM[and_not+7:and_not] := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 64-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". + +tmp[63:0] := a[63:0] AND b[63:0] +IF tmp[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 64-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[63:0] := (NOT a[63:0]) AND b[63:0] +IF tmp[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Convert 32-bit mask "a" into an integer value, and store the result in "dst". + +dst := ZeroExtend32(a[31:0]) + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Convert 64-bit mask "a" into an integer value, and store the result in "dst". + +dst := ZeroExtend64(a[63:0]) + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Convert integer value "a" into an 32-bit mask, and store the result in "k". + +k := ZeroExtend32(a[31:0]) + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Convert integer value "a" into an 64-bit mask, and store the result in "k". + +k := ZeroExtend64(a[63:0]) + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ZeroExtend64(k[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ZeroExtend64(k[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ZeroExtend32(k[15:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ZeroExtend32(k[15:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*32 + FOR k := 0 to j-1 + m := k*32 + dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*32 + FOR k := 0 to j-1 + m := k*32 + dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*64 + FOR k := 0 to j-1 + m := k*64 + dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 1 + i := j*64 + FOR k := 0 to j-1 + m := k*64 + dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ZeroExtend64(k[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ZeroExtend32(k[15:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Swizzle +
+ + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 15 + i := j*32 + FOR k := 0 to j-1 + m := k*32 + dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*64 + FOR k := 0 to j-1 + m := k*64 + dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst. + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst. + +FOR j := 0 to 3 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 3 + i := j*64 + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 1 + i := j*64 + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) +ENDFOR +k[MAX:2] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 7 + i := j*32 + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 3 + i := j*32 + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE imm8[0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE imm8[0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a". + +FOR j := 0 to 7 + i := j*32 + IF a[i+31] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a". + +FOR j := 0 to 3 + i := j*32 + IF a[i+31] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := 0xFFFFFFFF + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := 0xFFFFFFFF + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := 0xFFFFFFFFFFFFFFFF + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := 0xFFFFFFFFFFFFFFFF + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a". + +FOR j := 0 to 3 + i := j*64 + IF a[i+63] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a". + +FOR j := 0 to 1 + i := j*64 + IF a[i+63] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + RETURN tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst. + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[255:0] := a[255:0] +1: dst[255:0] := a[511:256] +ESAC +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[1:0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +2: dst[127:0] := a[383:256] +3: dst[127:0] := a[511:384] +ESAC +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[255:0] := a[255:0] +1: dst[255:0] := a[511:256] +ESAC +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[1:0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +2: dst[127:0] := a[383:256] +3: dst[127:0] := a[511:384] +ESAC +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 7 + i := j*64 + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 15 + i := j*32 + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) +ENDFOR +k[MAX:16] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k". + [fpclass_note] + k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0]) +k[MAX:1] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + [fpclass_note] + IF k1[0] + k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0]) +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k. + [fpclass_note] + k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0]) +k[MAX:1] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + [fpclass_note] + IF k1[0] + k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0]) +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[0]) OF +0: dst[255:0] := b[255:0] +1: dst[511:256] := b[255:0] +ESAC +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE imm8[1:0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +2: dst[383:256] := b[127:0] +3: dst[511:384] := b[127:0] +ESAC +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE imm8[0] OF +0: dst[255:0] := b[255:0] +1: dst[511:256] := b[255:0] +ESAC +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE imm8[1:0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +2: dst[383:256] := b[127:0] +3: dst[511:384] := b[127:0] +ESAC +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a". + +FOR j := 0 to 15 + i := j*32 + IF a[i+31] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := 0xFFFFFFFF + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := 0xFFFFFFFFFFFFFFFF + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a". + +FOR j := 0 to 7 + i := j*64 + IF a[i+63] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Arithmetic +
+ + + + + Add 8-bit masks in "a" and "b", and store the result in "k". + +k[7:0] := a[7:0] + b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Add 16-bit masks in "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] + b[15:0] +k[MAX:16] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 8-bit masks "a" and "b", and store the result in "k". + +k[7:0] := a[7:0] AND b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 8-bit masks "a" and then AND with "b", and store the result in "k". + +k[7:0] := (NOT a[7:0]) AND b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 8-bit mask "a", and store the result in "k". + +k[7:0] := NOT a[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 8-bit masks "a" and "b", and store the result in "k". + +k[7:0] := a[7:0] OR b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 8-bit masks "a" and "b", and store the result in "k". + +k[7:0] := NOT (a[7:0] XOR b[7:0]) +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 8-bit masks "a" and "b", and store the result in "k". + +k[7:0] := a[7:0] XOR b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 8-bit mask "a" left by "count" while shifting in zeros, and store the least significant 8 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 7 + k[7:0] := a[7:0] << count[7:0] +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 8-bit mask "a" right by "count" while shifting in zeros, and store the least significant 8 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 7 + k[7:0] := a[7:0] >> count[7:0] +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". + +tmp[7:0] := a[7:0] OR b[7:0] +IF tmp[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +IF tmp[7:0] == 0xFF + MEM[all_ones+7:all_ones] := 1 +ELSE + MEM[all_ones+7:all_ones] := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[7:0] := a[7:0] OR b[7:0] +IF tmp[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". + +tmp[7:0] := a[7:0] OR b[7:0] +IF tmp[7:0] == 0xFF + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise AND of 8-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". + +tmp1[7:0] := a[7:0] AND b[7:0] +IF tmp1[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +tmp2[7:0] := (NOT a[7:0]) AND b[7:0] +IF tmp2[7:0] == 0x0 + MEM[and_not+7:and_not] := 1 +ELSE + MEM[and_not+7:and_not] := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 8-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". + +tmp[7:0] := a[7:0] AND b[7:0] +IF tmp[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 8-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[7:0] := (NOT a[7:0]) AND b[7:0] +IF tmp[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise AND of 16-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". + +tmp1[15:0] := a[15:0] AND b[15:0] +IF tmp1[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +tmp2[15:0] := (NOT a[15:0]) AND b[15:0] +IF tmp2[15:0] == 0x0 + MEM[and_not+7:and_not] := 1 +ELSE + MEM[and_not+7:and_not] := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 16-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". + +tmp[15:0] := a[15:0] AND b[15:0] +IF tmp[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 16-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[15:0] := (NOT a[15:0]) AND b[15:0] +IF tmp[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + Convert 8-bit mask "a" into an integer value, and store the result in "dst". + +dst := ZeroExtend32(a[7:0]) + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + Convert integer value "a" into an 8-bit mask, and store the result in "k". + +k := a[7:0] + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + Load 8-bit mask from memory into "k". + +k[7:0] := MEM[mem_addr+7:mem_addr] + + + AVX512DQ +
immintrin.h
+ Load +
+ + + + + Store 8-bit mask from "a" into memory. + +MEM[mem_addr+7:mem_addr] := a[7:0] + + + AVX512DQ +
immintrin.h
+ Store +
+ + + + + + Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ACOS(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ACOS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ACOS(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ACOS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ACOSH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ACOSH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ACOSH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ACOSH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ASIN(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ASIN(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ASIN(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ASIN(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ASINH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ASINH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ASINH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ASINH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ATAN(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ATAN(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ATAN(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ATAN(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ATANH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ATANH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperblic tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ATANH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ATANH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := COS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := COS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := COSD(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := COSD(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := COSD(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := COSD(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := COSH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := COSH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := COSH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := COSH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SIN(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SIN(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SINH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SINH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SINH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SINH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SIND(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SIND(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SIND(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SIND(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := TAN(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := TAN(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := TAN(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := TAN(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := TAND(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := TAND(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := TAND(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := TAND(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := TANH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := TANH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := TANH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := TANH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) + MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + + + Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SIN(a[i+63:i]) + MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) + ELSE + dst[i+63:i] := sin_src[i+63:i] + MEM[mem_addr+i+63:mem_addr+i] := cos_src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + + + Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SIN(a[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) + ELSE + dst[i+31:i] := sin_src[i+31:i] + MEM[mem_addr+i+31:mem_addr+i] := cos_src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := CubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := CubeRoot(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := CubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := CubeRoot(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(10.0, a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(10.0, a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(2.0, a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(2.0, a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(e, a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(FP32(e), a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := InvSQRT(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := InvSQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := InvSQRT(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := InvSQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LOG(1.0 + a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LOG(1.0 + a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LOG(1.0 + a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LOG(1.0 + a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LOG(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LOG(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (1.0 / a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := CDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := CDFNormal(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := CDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := CDFNormal(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := InverseCDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := InverseCDFNormal(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := InverseCDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := InverseCDFNormal(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ERF(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ERF(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := 1.0 - ERF(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := 1.0 - ERF(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ERF(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ERF(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+63:i] := 1.0 - ERF(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+63:i] := 1.0 - ERF(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := 1.0 / ERF(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := 1.0 / ERF(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+63:i] := 1.0 / ERF(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+63:i] := 1.0 / ERF(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := CEIL(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := CEIL(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FLOOR(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FLOOR(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := NearbyInt(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := NearbyInt(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed single-precision floating-point elements in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := NearbyInt(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := NearbyInt(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RoundToNearestEven(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundToNearestEven(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RoundToNearestEven(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundToNearestEven(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ROUND(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := TRUNCATE(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := TRUNCATE(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := TRUNCATE(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := TRUNCATE(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 63 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 31 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 63 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 31 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 7 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 63 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 31 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 63 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 31 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 7 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). RM. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ABS(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ABS(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] :=0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (1.0 / a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (1.0 / a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst". + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (32*imm8[2:0]) +dst[255:0] := temp[255:0] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (32*imm8[2:0]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (32*imm8[2:0]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst". + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (32*imm8[1:0]) +dst[127:0] := temp[127:0] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (32*imm8[1:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (32*imm8[1:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst". + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (64*imm8[1:0]) +dst[255:0] := temp[255:0] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (64*imm8[1:0]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (64*imm8[1:0]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst". + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (64*imm8[0]) +dst[127:0] := temp[127:0] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (64*imm8[0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (64*imm8[0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN: j := 0 + SNAN_TOKEN: j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set) + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) +tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) +tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) +tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) +tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) +tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) +tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + dst[i+63:i] := a[id+63:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx". + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" across lanes lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + dst[i+63:i] := a[id+63:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +dst.m128[0] := a.m128[imm8[0]] +dst.m128[1] := b.m128[imm8[1]] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +dst.m128[0] := a.m128[imm8[0]] +dst.m128[1] := b.m128[imm8[1]] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". + +dst.m128[0] := a.m128[imm8[0]] +dst.m128[1] := b.m128[imm8[1]] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". + +dst.m128[0] := a.m128[imm8[0]] +dst.m128[1] := b.m128[imm8[1]] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*64 + k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*32 + k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 3 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 3 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 1 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 3 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 3 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 1 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 4 packed 64-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed 32-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed 64-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed 32-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 4 packed 64-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed 32-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed 64-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed 32-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := src[m+63:m] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := src[m+63:m] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_Int32_To_FP64(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+31:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 32*j + dst[k+31:k] := Truncate32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[k+31:k] := Truncate32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+31:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+63:i]) +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 32*j + dst[k+31:k] := Saturate32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[k+31:k] := Saturate32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*16 + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + l := j*16 + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+31:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+63:i]) +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 32*j + dst[k+31:k] := SaturateU32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[k+31:k] := SaturateU32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in th elow 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 4 packed 64-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 8 packed 32-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 2 packed 64-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 4 packed 32-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 4 packed 64-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 8 packed 32-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 2 packed 64-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 4 packed 32-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +tmp[191:128] := a[191:128] +tmp[255:192] := a[191:128] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +tmp[191:128] := a[191:128] +tmp[255:192] := a[191:128] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +tmp[159:128] := a[191:160] +tmp[191:160] := a[191:160] +tmp[223:192] := a[255:224] +tmp[255:224] := a[255:224] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +tmp[159:128] := a[191:160] +tmp[191:160] := a[191:160] +tmp[223:192] := a[255:224] +tmp[255:224] := a[255:224] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +tmp[159:128] := a[159:128] +tmp[191:160] := a[159:128] +tmp[223:192] := a[223:192] +tmp[255:224] := a[223:192] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +tmp[159:128] := a[159:128] +tmp[191:160] := a[159:128] +tmp[223:192] := a[223:192] +tmp[255:224] := a[223:192] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*32 + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*32 + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*64 + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 1 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 1 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 1 + i := j*64 + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." + FOR j := 0 to 3 + i := j*128 + a[i+127:i] := ShiftRows(a[i+127:i]) + a[i+127:i] := SubBytes(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F + VAES +
immintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." + FOR j := 0 to 3 + i := j*128 + a[i+127:i] := ShiftRows(a[i+127:i]) + a[i+127:i] := SubBytes(a[i+127:i]) + a[i+127:i] := MixColumns(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F + VAES +
immintrin.h
+ Cryptography +
+ + + + + Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". + FOR j := 0 to 3 + i := j*128 + a[i+127:i] := InvShiftRows(a[i+127:i]) + a[i+127:i] := InvSubBytes(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F + VAES +
immintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". + FOR j := 0 to 3 + i := j*128 + a[i+127:i] := InvShiftRows(a[i+127:i]) + a[i+127:i] := InvSubBytes(a[i+127:i]) + a[i+127:i] := InvMixColumns(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F + VAES +
immintrin.h
+ Cryptography +
+ + + + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := a[63:0] + b[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] + b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] + b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] + b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] + b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := a[31:0] + b[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] + b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] + b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] + b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] + b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + dst[i+63:i] := a[i+63:i] / b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", =and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := 64*j + dst[i+63:i] := a[i+63:i] / b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := a[i+31:i] / b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := a[i+31:i] / b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := a[63:0] / b[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] / b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] / b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] / b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] / b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := a[31:0] / b[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] / b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] / b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] / b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] / b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "a" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] * b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] * b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] * b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] * b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := a[63:0] * b[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] * b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] * b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] * b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] * b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := a[31:0] * b[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] - b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] - b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] - b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] - b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := a[63:0] - b[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] - b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] - b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] - b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] - b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := a[31:0] - b[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 16-bit mask from "a" into memory. + +MEM[mem_addr+15:mem_addr] := a[15:0] + + + AVX512F +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits of integer data from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits of integer data from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store the lower double-precision (64-bit) floating-point element from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + MEM[mem_addr+63:mem_addr] := a[63:0] +FI + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store the lower single-precision (32-bit) floating-point element from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + MEM[mem_addr+31:mem_addr] := a[31:0] +FI + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Store +
+ + + + Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 16-bit mask from memory into "k". + +k[15:0] := MEM[mem_addr+15:mem_addr] + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits of integer data from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits of integer data from memory into "dst" using a non-temporal memory hint. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + dst[63:0] := MEM[mem_addr+63:mem_addr] +ELSE + dst[63:0] := src[63:0] +FI +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + dst[63:0] := MEM[mem_addr+63:mem_addr] +ELSE + dst[63:0] := 0 +FI +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + dst[31:0] := MEM[mem_addr+31:mem_addr] +ELSE + dst[31:0] := src[31:0] +FI +dst[MAX:32] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + dst[31:0] := MEM[mem_addr+31:mem_addr] +ELSE + dst[31:0] := 0 +FI +dst[MAX:32] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] AND b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k". + +k[15:0] := (NOT a[15:0]) AND b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 16-bit mask "a", and store the result in "k". + +k[15:0] := NOT a[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] OR b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := NOT (a[15:0] XOR b[15:0]) +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] XOR b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 16-bit mask "a" left by "count" while shifting in zeros, and store the least significant 16 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 15 + k[15:0] := a[15:0] << count[7:0] +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 16-bit mask "a" right by "count" while shifting in zeros, and store the least significant 16 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 15 + k[15:0] := a[15:0] >> count[7:0] +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". + +tmp[15:0] := a[15:0] OR b[15:0] +IF tmp[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +IF tmp[15:0] == 0xFFFF + MEM[all_ones+7:all_ones] := 1 +ELSE + MEM[all_ones+7:all_ones] := 0 +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[15:0] := a[15:0] OR b[15:0] +IF tmp[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". + +tmp[15:0] := a[15:0] OR b[15:0] +IF tmp[15:0] == 0xFFFF + dst := 1 +ELSE + dst := 0 +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Convert 16-bit mask "a" into an integer value, and store the result in "dst". + +dst := ZeroExtend32(a[15:0]) + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Convert integer value "a" into an 16-bit mask, and store the result in "k". + +k := ZeroExtend16(a[15:0]) + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k". + +k[15:0] := (NOT a[15:0]) AND b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] AND b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Copy 16-bit mask "a" to "k". + +k[15:0] := a[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 16-bit mask "a", and store the result in "k". + +k[15:0] := NOT a[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] OR b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Unpack and interleave 8 bits from masks "a" and "b", and store the 16-bit result in "k". + +k[7:0] := b[7:0] +k[15:8] := a[7:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := NOT (a[15:0] XOR b[15:0]) +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] XOR b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Performs bitwise OR between "k1" and "k2", storing the result in "dst". ZF flag is set if "dst" is 0. + dst[15:0] := k1[15:0] | k2[15:0] +IF dst == 0 + SetZF() +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Performs bitwise OR between "k1" and "k2", storing the result in "dst". CF flag is set if "dst" consists of all 1's. + dst[15:0] := k1[15:0] | k2[15:0] +IF PopCount(dst[15:0]) == 16 + SetCF() +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Converts bit mask "k1" into an integer value, storing the results in "dst". + +dst := ZeroExtend32(k1) + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Converts integer "mask" into bitmask, storing the result in "dst". + +dst := mask[15:0] + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and stores the low 64 bytes (16 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (32*imm8[3:0]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 64 bytes (8 elements) in "dst". + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (64*imm8[2:0]) +dst[511:0] := temp[511:0] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 64 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (64*imm8[2:0]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and stores the low 64 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (64*imm8[2:0]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +IF k[0] + dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +IF k[0] + dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +IF k[0] + dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +IF k[0] + dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +IF k[0] + dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +IF k[0] + dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +IF k[0] + dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +IF k[0] + dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + dst[63:0] := ConvertExpFP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + dst[63:0] := ConvertExpFP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + IF k[0] + dst[63:0] := ConvertExpFP64(b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst[63:0] := ConvertExpFP64(b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + IF k[0] + dst[63:0] := ConvertExpFP64(b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst[63:0] := ConvertExpFP64(b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + dst[31:0] := ConvertExpFP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + dst[31:0] := ConvertExpFP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + IF k[0] + dst[31:0] := ConvertExpFP32(b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst[31:0] := ConvertExpFP32(b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + IF k[0] + dst[31:0] := ConvertExpFP32(b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst[31:0] := ConvertExpFP32(b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + IF k[0] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + IF k[0] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + IF k[0] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + IF k[0] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +IF k[0] + dst[63:0] := SCALE(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +IF k[0] + dst[63:0] := SCALE(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +IF k[0] + dst[63:0] := SCALE(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +IF k[0] + dst[63:0] := SCALE(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +dst[63:0] := SCALE(a[63:0], b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +dst[63:0] := SCALE(a[63:0], b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +IF k[0] + dst[31:0] := SCALE(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +IF k[0] + dst[31:0] := SCALE(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +IF k[0] + dst[31:0] := SCALE(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +IF k[0] + dst[31:0] := SCALE(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +dst[31:0] := SCALE(a[31:0], b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +dst[31:0] := SCALE(a[31:0], b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[1:0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +2: dst[127:0] := a[383:256] +3: dst[127:0] := a[511:384] +ESAC +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[255:0] := a[255:0] +1: dst[255:0] := a[511:256] +ESAC +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[1:0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +2: dst[127:0] := a[383:256] +3: dst[127:0] := a[511:384] +ESAC +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[255:0] := a[255:0] +1: dst[255:0] := a[511:256] +ESAC +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +2: dst[383:256] := b[127:0] +3: dst[511:384] := b[127:0] +ESAC +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[0]) OF +0: dst[255:0] := b[255:0] +1: dst[511:256] := b[255:0] +ESAC +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +2: dst[383:256] := b[127:0] +3: dst[511:384] := b[127:0] +ESAC +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[0]) OF +0: dst[255:0] := b[255:0] +1: dst[511:256] := b[255:0] +ESAC +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set) + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI +IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]; FI +IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]; FI +IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]; FI +IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]; FI +IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]; FI +IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]; FI +IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]; FI +IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]; FI +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI +IF (b[257] == 0) tmp_dst[319:256] := a[319:256]; FI +IF (b[257] == 1) tmp_dst[319:256] := a[383:320]; FI +IF (b[321] == 0) tmp_dst[383:320] := a[319:256]; FI +IF (b[321] == 1) tmp_dst[383:320] := a[383:320]; FI +IF (b[385] == 0) tmp_dst[447:384] := a[447:384]; FI +IF (b[385] == 1) tmp_dst[447:384] := a[511:448]; FI +IF (b[449] == 0) tmp_dst[511:448] := a[447:384]; FI +IF (b[449] == 1) tmp_dst[511:448] := a[511:448]; FI +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI +IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]; FI +IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]; FI +IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]; FI +IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]; FI +IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]; FI +IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]; FI +IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]; FI +IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]; FI +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI +IF (b[257] == 0) tmp_dst[319:256] := a[319:256]; FI +IF (b[257] == 1) tmp_dst[319:256] := a[383:320]; FI +IF (b[321] == 0) tmp_dst[383:320] := a[319:256]; FI +IF (b[321] == 1) tmp_dst[383:320] := a[383:320]; FI +IF (b[385] == 0) tmp_dst[447:384] := a[447:384]; FI +IF (b[385] == 1) tmp_dst[447:384] := a[511:448]; FI +IF (b[449] == 0) tmp_dst[511:448] := a[447:384]; FI +IF (b[449] == 1) tmp_dst[511:448] := a[511:448]; FI +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) dst[255:192] := a[255:192]; FI +IF (imm8[4] == 0) dst[319:256] := a[319:256]; FI +IF (imm8[4] == 1) dst[319:256] := a[383:320]; FI +IF (imm8[5] == 0) dst[383:320] := a[319:256]; FI +IF (imm8[5] == 1) dst[383:320] := a[383:320]; FI +IF (imm8[6] == 0) dst[447:384] := a[447:384]; FI +IF (imm8[6] == 1) dst[447:384] := a[511:448]; FI +IF (imm8[7] == 0) dst[511:448] := a[447:384]; FI +IF (imm8[7] == 1) dst[511:448] := a[511:448]; FI +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". + +IF (b[1] == 0) dst[63:0] := a[63:0]; FI +IF (b[1] == 1) dst[63:0] := a[127:64]; FI +IF (b[65] == 0) dst[127:64] := a[63:0]; FI +IF (b[65] == 1) dst[127:64] := a[127:64]; FI +IF (b[129] == 0) dst[191:128] := a[191:128]; FI +IF (b[129] == 1) dst[191:128] := a[255:192]; FI +IF (b[193] == 0) dst[255:192] := a[191:128]; FI +IF (b[193] == 1) dst[255:192] := a[255:192]; FI +IF (b[257] == 0) dst[319:256] := a[319:256]; FI +IF (b[257] == 1) dst[319:256] := a[383:320]; FI +IF (b[321] == 0) dst[383:320] := a[319:256]; FI +IF (b[321] == 1) dst[383:320] := a[383:320]; FI +IF (b[385] == 0) dst[447:384] := a[447:384]; FI +IF (b[385] == 1) dst[447:384] := a[511:448]; FI +IF (b[449] == 0) dst[511:448] := a[447:384]; FI +IF (b[449] == 1) dst[511:448] := a[511:448]; FI +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) +tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) +tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) +tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) +tmp_dst[287:256] := SELECT4(a[383:256], b[257:256]) +tmp_dst[319:288] := SELECT4(a[383:256], b[289:288]) +tmp_dst[351:320] := SELECT4(a[383:256], b[321:320]) +tmp_dst[383:352] := SELECT4(a[383:256], b[353:352]) +tmp_dst[415:384] := SELECT4(a[511:384], b[385:384]) +tmp_dst[447:416] := SELECT4(a[511:384], b[417:416]) +tmp_dst[479:448] := SELECT4(a[511:384], b[449:448]) +tmp_dst[511:480] := SELECT4(a[511:384], b[481:480]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) +tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) +tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) +tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) +tmp_dst[287:256] := SELECT4(a[383:256], b[257:256]) +tmp_dst[319:288] := SELECT4(a[383:256], b[289:288]) +tmp_dst[351:320] := SELECT4(a[383:256], b[321:320]) +tmp_dst[383:352] := SELECT4(a[383:256], b[353:352]) +tmp_dst[415:384] := SELECT4(a[511:384], b[385:384]) +tmp_dst[447:416] := SELECT4(a[511:384], b[417:416]) +tmp_dst[479:448] := SELECT4(a[511:384], b[449:448]) +tmp_dst[511:480] := SELECT4(a[511:384], b[481:480]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], b[1:0]) +dst[63:32] := SELECT4(a[127:0], b[33:32]) +dst[95:64] := SELECT4(a[127:0], b[65:64]) +dst[127:96] := SELECT4(a[127:0], b[97:96]) +dst[159:128] := SELECT4(a[255:128], b[129:128]) +dst[191:160] := SELECT4(a[255:128], b[161:160]) +dst[223:192] := SELECT4(a[255:128], b[193:192]) +dst[255:224] := SELECT4(a[255:128], b[225:224]) +dst[287:256] := SELECT4(a[383:256], b[257:256]) +dst[319:288] := SELECT4(a[383:256], b[289:288]) +dst[351:320] := SELECT4(a[383:256], b[321:320]) +dst[383:352] := SELECT4(a[383:256], b[353:352]) +dst[415:384] := SELECT4(a[511:384], b[385:384]) +dst[447:416] := SELECT4(a[511:384], b[417:416]) +dst[479:448] := SELECT4(a[511:384], b[449:448]) +dst[511:480] := SELECT4(a[511:384], b[481:480]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + dst[i+63:i] := a[id+63:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx". + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + dst[i+63:i] := a[id+63:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320] +tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320] +tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448] +tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448] +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320] +tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320] +tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448] +tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448] +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". + +dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320] +dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320] +dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448] +dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +dst[351:320] := SELECT4(b[383:256], imm8[5:4]) +dst[383:352] := SELECT4(b[383:256], imm8[7:6]) +dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +dst[479:448] := SELECT4(b[511:384], imm8[5:4]) +dst[511:480] := SELECT4(b[511:384], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +RETURN ( a[63:0] OP b[63:0] ) ? 1 : 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +RETURN ( a[31:0] OP b[31:0] ) ? 1 : 0 + + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + m := j*64 + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := src[m+63:m] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". [sae_note] + +FOR j := 0 to 15 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP64_To_Int32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP64_To_Int32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_Int32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP64_To_UInt32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP64_To_UInt64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_UInt32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_UInt64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [sae_note] + +dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [sae_note] + +IF k[0] + dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [sae_note] + +IF k[0] + dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP32_To_Int64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP32_To_Int64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_Int64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP32_To_UInt32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP32_To_UInt64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_UInt32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_UInt64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP64_To_UInt32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP64_To_UInt64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_UInt32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_UInt64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP32_To_UInt32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP32_To_UInt64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_UInt32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_UInt64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[k+31:k] := Truncate32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[k+31:k] := Saturate32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[i+31:i] := SignExtend32(a[k+7:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[i+63:i] := SignExtend64(a[k+7:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[i+63:i] := SignExtend64(a[k+31:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[i+31:i] := SignExtend32(a[k+15:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + l := j*16 + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[i+63:i] := SignExtend64(a[k+15:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[k+31:k] := SaturateU32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[i+31:i] := ZeroExtend32(a[k+7:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[i+63:i] := ZeroExtend64(a[k+7:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[i+63:i] := ZeroExtend64(a[k+31:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[i+31:i] := ZeroExtend32(a[k+15:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[i+63:i] := ZeroExtend64(a[k+15:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Copy the lower single-precision (32-bit) floating-point element of "a" to "dst". + +dst[31:0] := a[31:0] + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". + +dst[63:0] := a[63:0] + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [sae_note][max_float_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [sae_note][max_float_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][max_float_note] + +IF k[0] + dst[63:0] := MAX(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := MAX(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][max_float_note] + +IF k[0] + dst[63:0] := MAX(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := MAX(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [sae_note][max_float_note] + +dst[63:0] := MAX(a[63:0], b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +IF k[0] + dst[31:0] := MAX(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := MAX(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +IF k[0] + dst[31:0] := MAX(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := MAX(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +dst[31:0] := MAX(a[31:0], b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [sae_note][min_float_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [sae_note][min_float_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][min_float_note] + +IF k[0] + dst[63:0] := MIN(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := MIN(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][min_float_note] + +IF k[0] + dst[63:0] := MIN(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := MIN(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" , and copy the upper element from "a" to the upper element of "dst". [sae_note][min_float_note] + +dst[63:0] := MIN(a[63:0], b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +IF k[0] + dst[31:0] := MIN(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := MIN(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +IF k[0] + dst[31:0] := MIN(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := MIN(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +dst[31:0] := MIN(a[31:0], b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ABS(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ABS(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +tmp[191:128] := a[191:128] +tmp[255:192] := a[191:128] +tmp[319:256] := a[319:256] +tmp[383:320] := a[319:256] +tmp[447:384] := a[447:384] +tmp[511:448] := a[447:384] +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +tmp[191:128] := a[191:128] +tmp[255:192] := a[191:128] +tmp[319:256] := a[319:256] +tmp[383:320] := a[319:256] +tmp[447:384] := a[447:384] +tmp[511:448] := a[447:384] +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst". + +dst[63:0] := a[63:0] +dst[127:64] := a[63:0] +dst[191:128] := a[191:128] +dst[255:192] := a[191:128] +dst[319:256] := a[319:256] +dst[383:320] := a[319:256] +dst[447:384] := a[447:384] +dst[511:448] := a[447:384] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + + Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +tmp[159:128] := a[191:160] +tmp[191:160] := a[191:160] +tmp[223:192] := a[255:224] +tmp[255:224] := a[255:224] +tmp[287:256] := a[319:288] +tmp[319:288] := a[319:288] +tmp[351:320] := a[383:352] +tmp[383:352] := a[383:352] +tmp[415:384] := a[447:416] +tmp[447:416] := a[447:416] +tmp[479:448] := a[511:480] +tmp[511:480] := a[511:480] +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +tmp[159:128] := a[191:160] +tmp[191:160] := a[191:160] +tmp[223:192] := a[255:224] +tmp[255:224] := a[255:224] +tmp[287:256] := a[319:288] +tmp[319:288] := a[319:288] +tmp[351:320] := a[383:352] +tmp[383:352] := a[383:352] +tmp[415:384] := a[447:416] +tmp[447:416] := a[447:416] +tmp[479:448] := a[511:480] +tmp[511:480] := a[511:480] +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := a[63:32] +dst[95:64] := a[127:96] +dst[127:96] := a[127:96] +dst[159:128] := a[191:160] +dst[191:160] := a[191:160] +dst[223:192] := a[255:224] +dst[255:224] := a[255:224] +dst[287:256] := a[319:288] +dst[319:288] := a[319:288] +dst[351:320] := a[383:352] +dst[383:352] := a[383:352] +dst[415:384] := a[447:416] +dst[447:416] := a[447:416] +dst[479:448] := a[511:480] +dst[511:480] := a[511:480] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +tmp[159:128] := a[159:128] +tmp[191:160] := a[159:128] +tmp[223:192] := a[223:192] +tmp[255:224] := a[223:192] +tmp[287:256] := a[287:256] +tmp[319:288] := a[287:256] +tmp[351:320] := a[351:320] +tmp[383:352] := a[351:320] +tmp[415:384] := a[415:384] +tmp[447:416] := a[415:384] +tmp[479:448] := a[479:448] +tmp[511:480] := a[479:448] +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +tmp[159:128] := a[159:128] +tmp[191:160] := a[159:128] +tmp[223:192] := a[223:192] +tmp[255:224] := a[223:192] +tmp[287:256] := a[287:256] +tmp[319:288] := a[287:256] +tmp[351:320] := a[351:320] +tmp[383:352] := a[351:320] +tmp[415:384] := a[415:384] +tmp[447:416] := a[415:384] +tmp[479:448] := a[479:448] +tmp[511:480] := a[479:448] +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := a[31:0] +dst[95:64] := a[95:64] +dst[127:96] := a[95:64] +dst[159:128] := a[159:128] +dst[191:160] := a[159:128] +dst[223:192] := a[223:192] +dst[255:224] := a[223:192] +dst[287:256] := a[287:256] +dst[319:288] := a[287:256] +dst[351:320] := a[351:320] +dst[383:352] := a[351:320] +dst[415:384] := a[415:384] +dst[447:416] := a[415:384] +dst[479:448] := a[479:448] +dst[511:480] := a[479:448] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + + Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 15 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 15 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 15 + i := j*32 + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*64 + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + Broadcast 8-bit integer "a" to all elements of "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + + Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast 32-bit integer "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + + Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast 64-bit integer "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast the low packed 16-bit integer from "a" to all all elements of "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed 32-bit integers in "dst" with the repeated 4 element sequence. + +dst[31:0] := a +dst[63:32] := b +dst[95:64] := c +dst[127:96] := d +dst[159:128] := a +dst[191:160] := b +dst[223:192] := c +dst[255:224] := d +dst[287:256] := a +dst[319:288] := b +dst[351:320] := c +dst[383:352] := d +dst[415:384] := a +dst[447:416] := b +dst[479:448] := c +dst[511:480] := d +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed 64-bit integers in "dst" with the repeated 4 element sequence. + +dst[63:0] := a +dst[127:64] := b +dst[191:128] := c +dst[255:192] := d +dst[319:256] := a +dst[383:320] := b +dst[447:384] := c +dst[511:448] := d +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence. + +dst[63:0] := a +dst[127:64] := b +dst[191:128] := c +dst[255:192] := d +dst[319:256] := a +dst[383:320] := b +dst[447:384] := c +dst[511:448] := d +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence. + +dst[31:0] := a +dst[63:32] := b +dst[95:64] := c +dst[127:96] := d +dst[159:128] := a +dst[191:160] := b +dst[223:192] := c +dst[255:224] := d +dst[287:256] := a +dst[319:288] := b +dst[351:320] := c +dst[383:352] := d +dst[415:384] := a +dst[447:416] := b +dst[479:448] := c +dst[511:480] := d +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values. + +dst[7:0] := e0 +dst[15:8] := e1 +dst[23:16] := e2 +dst[31:24] := e3 +dst[39:32] := e4 +dst[47:40] := e5 +dst[55:48] := e6 +dst[63:56] := e7 +dst[71:64] := e8 +dst[79:72] := e9 +dst[87:80] := e10 +dst[95:88] := e11 +dst[103:96] := e12 +dst[111:104] := e13 +dst[119:112] := e14 +dst[127:120] := e15 +dst[135:128] := e16 +dst[143:136] := e17 +dst[151:144] := e18 +dst[159:152] := e19 +dst[167:160] := e20 +dst[175:168] := e21 +dst[183:176] := e22 +dst[191:184] := e23 +dst[199:192] := e24 +dst[207:200] := e25 +dst[215:208] := e26 +dst[223:216] := e27 +dst[231:224] := e28 +dst[239:232] := e29 +dst[247:240] := e30 +dst[255:248] := e31 +dst[263:256] := e32 +dst[271:264] := e33 +dst[279:272] := e34 +dst[287:280] := e35 +dst[295:288] := e36 +dst[303:296] := e37 +dst[311:304] := e38 +dst[319:312] := e39 +dst[327:320] := e40 +dst[335:328] := e41 +dst[343:336] := e42 +dst[351:344] := e43 +dst[359:352] := e44 +dst[367:360] := e45 +dst[375:368] := e46 +dst[383:376] := e47 +dst[391:384] := e48 +dst[399:392] := e49 +dst[407:400] := e50 +dst[415:408] := e51 +dst[423:416] := e52 +dst[431:424] := e53 +dst[439:432] := e54 +dst[447:440] := e55 +dst[455:448] := e56 +dst[463:456] := e57 +dst[471:464] := e58 +dst[479:472] := e59 +dst[487:480] := e60 +dst[495:488] := e61 +dst[503:496] := e62 +dst[511:504] := e63 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values. + +dst[15:0] := e0 +dst[31:16] := e1 +dst[47:32] := e2 +dst[63:48] := e3 +dst[79:64] := e4 +dst[95:80] := e5 +dst[111:96] := e6 +dst[127:112] := e7 +dst[143:128] := e8 +dst[159:144] := e9 +dst[175:160] := e10 +dst[191:176] := e11 +dst[207:192] := e12 +dst[223:208] := e13 +dst[239:224] := e14 +dst[255:240] := e15 +dst[271:256] := e16 +dst[287:272] := e17 +dst[303:288] := e18 +dst[319:304] := e19 +dst[335:320] := e20 +dst[351:336] := e21 +dst[367:352] := e22 +dst[383:368] := e23 +dst[399:384] := e24 +dst[415:400] := e25 +dst[431:416] := e26 +dst[447:432] := e27 +dst[463:448] := e28 +dst[479:464] := e29 +dst[495:480] := e30 +dst[511:496] := e31 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 32-bit integers in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 +dst[159:128] := e4 +dst[191:160] := e5 +dst[223:192] := e6 +dst[255:224] := e7 +dst[287:256] := e8 +dst[319:288] := e9 +dst[351:320] := e10 +dst[383:352] := e11 +dst[415:384] := e12 +dst[447:416] := e13 +dst[479:448] := e14 +dst[511:480] := e15 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed 64-bit integers in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 +dst[191:128] := e2 +dst[255:192] := e3 +dst[319:256] := e4 +dst[383:320] := e5 +dst[447:384] := e6 +dst[511:448] := e7 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 +dst[191:128] := e2 +dst[255:192] := e3 +dst[319:256] := e4 +dst[383:320] := e5 +dst[447:384] := e6 +dst[511:448] := e7 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 +dst[159:128] := e4 +dst[191:160] := e5 +dst[223:192] := e6 +dst[255:224] := e7 +dst[287:256] := e8 +dst[319:288] := e9 +dst[351:320] := e10 +dst[383:352] := e11 +dst[415:384] := e12 +dst[447:416] := e13 +dst[479:448] := e14 +dst[511:480] := e15 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed 32-bit integers in "dst" with the repeated 4 element sequence in reverse order. + +dst[31:0] := d +dst[63:32] := c +dst[95:64] := b +dst[127:96] := a +dst[159:128] := d +dst[191:160] := c +dst[223:192] := b +dst[255:224] := a +dst[287:256] := d +dst[319:288] := c +dst[351:320] := b +dst[383:352] := a +dst[415:384] := d +dst[447:416] := c +dst[479:448] := b +dst[511:480] := a +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed 64-bit integers in "dst" with the repeated 4 element sequence in reverse order. + +dst[63:0] := d +dst[127:64] := c +dst[191:128] := b +dst[255:192] := a +dst[319:256] := d +dst[383:320] := c +dst[447:384] := b +dst[511:448] := a +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order. + +dst[63:0] := d +dst[127:64] := c +dst[191:128] := b +dst[255:192] := a +dst[319:256] := d +dst[383:320] := c +dst[447:384] := b +dst[511:448] := a +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order. + +dst[31:0] := d +dst[63:32] := c +dst[95:64] := b +dst[127:96] := a +dst[159:128] := d +dst[191:160] := c +dst[223:192] := b +dst[255:224] := a +dst[287:256] := d +dst[319:288] := c +dst[351:320] := b +dst[383:352] := a +dst[415:384] := d +dst[447:416] := c +dst[479:448] := b +dst[511:480] := a +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 32-bit integers in "dst" with the supplied values in reverse order. + +dst[31:0] := e15 +dst[63:32] := e14 +dst[95:64] := e13 +dst[127:96] := e12 +dst[159:128] := e11 +dst[191:160] := e10 +dst[223:192] := e9 +dst[255:224] := e8 +dst[287:256] := e7 +dst[319:288] := e6 +dst[351:320] := e5 +dst[383:352] := e4 +dst[415:384] := e3 +dst[447:416] := e2 +dst[479:448] := e1 +dst[511:480] := e0 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed 64-bit integers in "dst" with the supplied values in reverse order. + +dst[63:0] := e7 +dst[127:64] := e6 +dst[191:128] := e5 +dst[255:192] := e4 +dst[319:256] := e3 +dst[383:320] := e2 +dst[447:384] := e1 +dst[511:448] := e0 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[63:0] := e7 +dst[127:64] := e6 +dst[191:128] := e5 +dst[255:192] := e4 +dst[319:256] := e3 +dst[383:320] := e2 +dst[447:384] := e1 +dst[511:448] := e0 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[31:0] := e15 +dst[63:32] := e14 +dst[95:64] := e13 +dst[127:96] := e12 +dst[159:128] := e11 +dst[191:160] := e10 +dst[223:192] := e9 +dst[255:224] := e8 +dst[287:256] := e7 +dst[319:288] := e6 +dst[351:320] := e5 +dst[383:352] := e4 +dst[415:384] := e3 +dst[447:416] := e2 +dst[479:448] := e1 +dst[511:480] := e0 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + Return vector of type __m512 with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + Return vector of type __m512i with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + Return vector of type __m512d with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + Return vector of type __m512 with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + Return vector of type __m512i with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (1.0 / a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[63:0] := (1.0 / b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[63:0] := (1.0 / b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +dst[63:0] := (1.0 / b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[31:0] := (1.0 / b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[31:0] := (1.0 / b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +dst[31:0] := (1.0 / b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[63:0] := (1.0 / SQRT(b[63:0])) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[63:0] := (1.0 / SQRT(b[63:0])) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +dst[63:0] := (1.0 / SQRT(b[63:0])) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[31:0] := (1.0 / SQRT(b[31:0])) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[31:0] := (1.0 / SQRT(b[31:0])) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +dst[31:0] := (1.0 / SQRT(b[31:0])) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note]. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + [round_note]. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + [round_note]. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := SQRT(b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := SQRT(b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := SQRT(b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := SQRT(b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := SQRT(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := SQRT(b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := SQRT(b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := SQRT(b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := SQRT(b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := SQRT(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512d to type __m128d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512 to type __m128. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512d to type __m256d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512 to type __m256. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512i to type __m128i. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512i to type __m256i. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Return vector of type __m512 with undefined elements. + AVX512F +
immintrin.h
+ General Support +
+ + + Return vector of type __m512i with undefined elements. + AVX512F +
immintrin.h
+ General Support +
+ + + Return vector of type __m512d with undefined elements. + AVX512F +
immintrin.h
+ General Support +
+ + + Return vector of type __m512 with undefined elements. + AVX512F +
immintrin.h
+ General Support +
+ + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). RM. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). RM. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". + +FOR j := 0 to 15 + i := j*32 + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 32-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[31:0] + src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_ADD(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0 + FI +ENDFOR +dst[31:0] := REDUCE_ADD(tmp, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 64-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[63:0] + src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_ADD(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0 + FI +ENDFOR +dst[63:0] := REDUCE_ADD(tmp, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[63:0] + src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_ADD(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0 + FI +ENDFOR +dst[63:0] := REDUCE_ADD(tmp, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[31:0] + src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_ADD(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0 + FI +ENDFOR +dst[31:0] := REDUCE_ADD(tmp, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 32-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[31:0] * src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_MUL(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 1 + FI +ENDFOR +dst[31:0] := REDUCE_MUL(tmp, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 64-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[63:0] * src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_MUL(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 1 + FI +ENDFOR +dst[63:0] := REDUCE_MUL(tmp, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[63:0] * src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_MUL(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 1.0 + FI +ENDFOR +dst[63:0] := REDUCE_MUL(tmp, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[31:0] * src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_MUL(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := FP32(1.0) + FI +ENDFOR +dst[31:0] := REDUCE_MUL(tmp, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 32-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[31:0] + src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_ADD(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_ADD(a, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 64-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[63:0] + src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_ADD(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_ADD(a, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[63:0] + src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_ADD(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_ADD(a, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[31:0] + src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_ADD(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_ADD(a, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 32-bit integers in "a" by multiplication. Returns the product of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[31:0] * src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_MUL(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MUL(a, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 64-bit integers in "a" by multiplication. Returns the product of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[63:0] * src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_MUL(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MUL(a, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[63:0] * src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_MUL(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MUL(a, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[31:0] * src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_MUL(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MUL(a, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ABS(v2[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(v2[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ABS(v2[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(v2[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 64 bytes (16 elements) in "dst". + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (32*imm8[3:0]) +dst[511:0] := temp[511:0] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 64 bytes (16 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (32*imm8[3:0]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_mask_permutexvar_epi32", and it is recommended that you use that intrinsic name. + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_permutexvar_epi32", and it is recommended that you use that intrinsic name. + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] <= b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] < b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (!(a[i+63:i] <= b[i+63:i])) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (!(a[i+63:i] < b[i+63:i])) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k". + FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k". + FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] <= b[i+63:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] < b[i+63:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (!(a[i+63:i] <= b[i+63:i])) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (!(a[i+63:i] < b[i+63:i])) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] <= b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] < b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (!(a[i+31:i] <= b[i+31:i])) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (!(a[i+31:i] < b[i+31:i])) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k". + FOR j := 0 to 15 + i := j*32 + k[j] := ((a[i+31:i] != NaN) AND (b[i+31:i] != NaN)) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k". + FOR j := 0 to 15 + i := j*32 + k[j] := ((a[i+31:i] == NaN) OR (b[i+31:i] == NaN)) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (a[i+31:i] <= b[i+31:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (a[i+31:i] < b[i+31:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (!(a[i+31:i] <= b[i+31:i])) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (!(a[i+31:i] < b[i+31:i])) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] != NaN) AND (b[i+31:i] != NaN)) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] == NaN) OR (b[i+31:i] == NaN)) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits of integer data from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Loads 8 64-bit integer elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" and stores them in "dst". + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Loads 8 64-bit integer elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Loads 8 double-precision (64-bit) floating-point elements stored at memory locations starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" them in "dst". + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Loads 8 double-precision (64-bit) floating-point elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits of integer data from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Stores 8 packed double-precision (64-bit) floating-point elements in "a" and to memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Stores 8 packed double-precision (64-bit) floating-point elements in "a" to memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] AND b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 512 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[511:0] := (a[511:0] AND b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 512 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[511:0] := ((NOT a[511:0]) AND b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in "a" and then AND with "b", and store the results in "dst". + +dst[511:0] := ((NOT a[511:0]) AND b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in "a" and "b", and store the results in "dst". + +dst[511:0] := (a[511:0] AND b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[511:0] := (a[511:0] OR b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the resut in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[511:0] := (a[511:0] XOR b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Reduce the packed 32-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[31:0] AND src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] AND src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_AND(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0xFFFFFFFF + FI +ENDFOR +dst[31:0] := REDUCE_AND(tmp, 16) + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Reduce the packed 64-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[63:0] AND src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] AND src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_AND(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0xFFFFFFFFFFFFFFFF + FI +ENDFOR +dst[63:0] := REDUCE_AND(tmp, 8) + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Reduce the packed 32-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[31:0] OR src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] OR src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_OR(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0 + FI +ENDFOR +dst[31:0] := REDUCE_OR(tmp, 16) + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Reduce the packed 64-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[63:0] OR src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] OR src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_OR(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0 + FI +ENDFOR +dst[63:0] := REDUCE_OR(tmp, 8) + + AVX512F +
immintrin.h
+ Logical +
+ + + + Reduce the packed 32-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[31:0] AND src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] AND src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_AND(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_AND(a, 16) + + AVX512F +
immintrin.h
+ Logical +
+ + + + Reduce the packed 64-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[63:0] AND src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] AND src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_AND(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_AND(a, 8) + + AVX512F +
immintrin.h
+ Logical +
+ + + + Reduce the packed 32-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[31:0] OR src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] OR src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_OR(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_OR(a, 16) + + AVX512F +
immintrin.h
+ Logical +
+ + + + Reduce the packed 64-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[63:0] OR src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] OR src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_OR(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_OR(a, 8) + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Performs element-by-element bitwise AND between packed 32-bit integer elements of "v2" and "v3", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := v2[i+31:i] & v3[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := Int32(-0x80000000) + FI +ENDFOR +dst[31:0] := REDUCE_MAX(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := Int64(-0x8000000000000000) + FI +ENDFOR +dst[63:0] := REDUCE_MAX(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0 + FI +ENDFOR +dst[31:0] := REDUCE_MAX(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0 + FI +ENDFOR +dst[63:0] := REDUCE_MAX(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := Cast_FP64(0xFFEFFFFFFFFFFFFF) + FI +ENDFOR +dst[63:0] := REDUCE_MAX(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := Cast_FP32(0xFF7FFFFF) + FI +ENDFOR +dst[31:0] := REDUCE_MAX(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := Int32(0x7FFFFFFF) + FI +ENDFOR +dst[31:0] := REDUCE_MIN(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 64-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := Int64(0x7FFFFFFFFFFFFFFF) + FI +ENDFOR +dst[63:0] := REDUCE_MIN(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0xFFFFFFFF + FI +ENDFOR +dst[31:0] := REDUCE_MIN(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 64-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0xFFFFFFFFFFFFFFFF + FI +ENDFOR +dst[63:0] := REDUCE_MIN(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". [min_float_note] + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := Cast_FP64(0x7FEFFFFFFFFFFFFF) + FI +ENDFOR +dst[63:0] := REDUCE_MIN(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". [min_float_note] + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := Cast_FP32(0x7F7FFFFF) + FI +ENDFOR +dst[31:0] := REDUCE_MIN(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MAX(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MAX(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MAX(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MAX(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MAX(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MAX(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MIN(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MIN(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MIN(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MIN(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". [min_float_note] + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MIN(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". [min_float_note] + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MIN(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + Cast vector of type __m512d to type __m512. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512d to type __m512i. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512 to type __m512d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512 to type __m512i. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512i to type __m512d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512i to type __m512. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst". + +FOR j := 0 to 7 + i := j*32 + n := j*64 + dst[n+63:n] := Convert_FP32_To_FP64(v2[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[l+63:l] := Convert_FP32_To_FP64(v2[i+31:i]) + ELSE + dst[l+63:l] := src[l+63:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst". + +FOR j := 0 to 7 + i := j*32 + l := j*64 + dst[l+63:l] := Convert_Int32_To_FP64(v2[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := j*64 + IF k[j] + dst[n+63:n] := Convert_Int32_To_FP64(v2[i+31:i]) + ELSE + dst[n+63:n] := src[n+63:n] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst". + +FOR j := 0 to 7 + i := j*32 + n := j*64 + dst[n+63:n] := Convert_Int32_To_FP64(v2[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[l+63:l] := Convert_Int32_To_FP64(v2[i+31:i]) + ELSE + dst[l+63:l] := src[l+63:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst". The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. + +FOR j := 0 to 7 + i := j*64 + k := j*32 + dst[k+31:k] := Convert_FP64_To_FP32(v2[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_FP64_To_FP32(v2[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Stores 8 packed 64-bit integer elements located in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Stores 8 packed 64-bit integer elements located in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using writemask "k" (elements whose corresponding mask bit is not set are not written to memory). + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POPCNT(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POPCNT(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POPCNT(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POPCNT(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POPCNT(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POPCNT(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 15 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:512] := 0 + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the BF16 (16-bit) floating-point element in "a" to a floating-point element, and store the result in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +dst[31:0] := Convert_BF16_To_FP32(a[15:0]) + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst". + +FOR j := 0 to 31 + IF j < 16 + t := b.fp32[j] + ELSE + t := a.fp32[j-16] + FI + dst.word[j] := Convert_FP32_To_BF16(t) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF j < 16 + t := b.fp32[j] + ELSE + t := a.fp32[j-16] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF j < 16 + t := b.fp32[j] + ELSE + t := a.fp32[j-16] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst". + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 15 + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 3 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:128] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 3 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 3 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 7 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:256] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 7 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 7 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert the single-precision (32-bit) floating-point element in "a" to a BF16 (16-bit) floating-point element, and store the result in "dst". + +dst[15:0] := Convert_FP32_To_BF16(a[31:0]) + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst". + +FOR j := 0 to 7 + IF j < 4 + t := b.fp32[j] + ELSE + t := a.fp32[j-4] + FI + dst.word[j] := Convert_FP32_To_BF16(t) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF j < 4 + t := b.fp32[j] + ELSE + t := a.fp32[j-4] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF j < 4 + t := b.fp32[j] + ELSE + t := a.fp32[j-4] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst". + +FOR j := 0 to 15 + IF j < 8 + t := b.fp32[j] + ELSE + t := a.fp32[j-8] + FI + dst.word[j] := Convert_FP32_To_BF16(t) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF j < 8 + t := b.fp32[j] + ELSE + t := a.fp32[j-8] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF j < 8 + t := b.fp32[j] + ELSE + t := a.fp32[j-8] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst". + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 3 + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 3 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 3 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst". + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 7 + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 7 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 7 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 3 //Qword + FOR j := 0 to 7 // Byte + IF k[i*8+j] + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ELSE + dst[i*8+j] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:32] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst". + +FOR i := 0 to 3 //Qword + FOR j := 0 to 7 // Byte + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ENDFOR +ENDFOR +dst[MAX:32] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 1 //Qword + FOR j := 0 to 7 // Byte + IF k[i*8+j] + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ELSE + dst[i*8+j] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:16] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst". + +FOR i := 0 to 1 //Qword + FOR j := 0 to 7 // Byte + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ENDFOR +ENDFOR +dst[MAX:16] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POPCNT(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POPCNT(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := POPCNT(a[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := POPCNT(a[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 7 //Qword + FOR j := 0 to 7 // Byte + IF k[i*8+j] + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ELSE + dst[i*8+j] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:64] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst". + +FOR i := 0 to 7 //Qword + FOR j := 0 to 7 // Byte + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ENDFOR +ENDFOR +dst[MAX:64] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POPCNT(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := POPCNT(a[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ACOS(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ACOSH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ASIN(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ASINH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ATAN2(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ATAN(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ATANH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := CubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := CDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := InverseCDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := COSD(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := COSH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ERF(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := 1.0 - ERF(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := 1.0 / ERF(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SQRT(POW(a[i+15:i], 2.0) + POW(b[i+15:i], 2.0)) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := InvCubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := InvSQRT(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := LOG(1.0 + a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + + Elementary Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ConvertExpFP16(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of packed half-precision (16-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) + MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +cos_res[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SIND(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SINH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := CEIL(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := FLOOR(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ROUND(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SQRT(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := TAN(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := TAND(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := TANH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst" + + Special Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := TRUNCATE(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ACOS(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ACOSH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ASIN(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ASINH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ATAN2(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ATAN(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperblic tangent of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ATANH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := CubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := CDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := InverseCDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := CEIL(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := COSD(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := COSH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ERF(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := 1.0 - ERF(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := 1.0 / ERF(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := FLOOR(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SQRT(POW(a[i+15:i], 2.0) + POW(b[i+15:i], 2.0)) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := InvSQRT(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := LOG(1.0 + a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ConvertExpFP16(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ACOS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ACOSH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ASIN(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ASINH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ATAN(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ATANH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := CubeRoot(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := CDFNormal(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := InverseCDFNormal(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := CEIL(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := COS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + TrigonometryFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := COSD(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := COSH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ERF(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := 1.0 - ERF(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := 1.0 / ERF(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POW(FP16(e), a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := FLOOR(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := InvSQRT(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := LOG(1.0 + a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := LOG(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + + + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ConvertExpFP16(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Rounds each packed half-precision (16-bit) floating-point element in "a" to the nearest integer value and stores the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := NearbyInt(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Computes the reciprocal of packed half-precision (16-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := (1.0 / a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Rounds the packed half-precision (16-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := RoundToNearestEven(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SIN(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set). + + + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SIN(a[i+15:i]) + MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) + ELSE + dst[i+15:i] := sin_src[i+15:i] + MEM[mem_addr+i+15:mem_addr+i] := cos_src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + TrigonometryFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SIND(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SINH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ROUND(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := TAN(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + TrigonometryFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := TAND(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := TANH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := TRUNCATE(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Rounds each packed half-precision (16-bit) floating-point element in "a" to the nearest integer value and stores the results as packed half-precision floating-point elements in "dst". + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := NearbyInt(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of packed half-precision (16-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Computes the reciprocal of packed half-precision (16-bit) floating-point elements in "a", storing the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := (1.0 / a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Rounds the packed half-precision (16-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst". + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := RoundToNearestEven(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) + MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SIND(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SINH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ROUND(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := TAN(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := TAND(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := TANH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst". + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := TRUNCATE(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ACOS(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ACOSH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ASIN(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ASINH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ATAN2(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ATAN(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ATANH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := CubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := CDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := InverseCDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := COSD(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := COSH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ERF(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := 1.0 - ERF(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := 1.0 / ERF(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SQRT(POW(a[i+15:i], 2.0) + POW(b[i+15:i], 2.0)) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := InvCubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := InvSQRT(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := LOG(1.0 + a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + + Elementary Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ConvertExpFP16(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of packed half-precision (16-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) + MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +cos_res[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SIND(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SINH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := CEIL(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := FLOOR(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ROUND(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SQRT(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := TAN(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := TAND(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := TANH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst". + + Special Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := TRUNCATE(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := a.fp16[j] + b.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := a.fp16[j] + b.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := a.fp16[j] / b.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := a.fp16[j] / b.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := a.fp16[j] - b.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := a.fp16[j] - b.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR i := 0 TO 7 + dst.fp16[i] := a.fp16[i] * b.fp16[i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 TO 7 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 TO 7 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR i := 0 TO 15 + dst.fp16[i] := a.fp16[i] * b.fp16[i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 TO 15 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 TO 15 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +tmp := a +FOR i := 0 to 7 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+8] +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] + tmp.fp16[1] + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (316-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +tmp := a +FOR i := 0 to 7 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+8] +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] * tmp.fp16[1] + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". + +tmp := a +FOR i := 0 to 7 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] > tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". + +tmp := a +FOR i := 0 to 7 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] < tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +tmp := a +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] + tmp.fp16[1] + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +tmp := a +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] * tmp.fp16[1] + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". + +tmp := a +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] > tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". + +tmp := a +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] < tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed half-precision (16-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := ABS(v2.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed half-precision (16-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := ABS(v2.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + IF k1[j] + k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + IF k1[j] + k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Compare +
+ + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 to 3 + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 to 3 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 to 3 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 3 + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 3 + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 3 + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 3 + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 1 + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 3 + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 1 + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 3 + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 1 + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 3 + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 1 + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 3 + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 15 + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 15 + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 15 + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 15 + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := src.fp64[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := src.fp64[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := src.fp32[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := src.fp32[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 7 + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 15 + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [max_float_note] + +dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 7 + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 15 + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [min_float_note] + +dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 7 + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) +ENDFOR +dest[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dest[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dest[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 15 + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) +ENDFOR +dest[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dest[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dest[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 7 + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 15 + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 7 + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 7 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 7 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 15 + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 15 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 15 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 7 + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 15 + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 7 + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR i := 0 to 7 + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) +ENDFOR +k[MAX:8] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR i := 0 to 7 + IF k1[i] + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) + ELSE + k[i] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR i := 0 to 15 + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) +ENDFOR +k[MAX:16] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR i := 0 to 15 + IF k1[i] + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) + ELSE + k[i] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle half-precision (16-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + off := idx[i+2:i] + dst.fp16[j] := idx[i+3] ? b.fp16[off] : a.fp16[off] +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle half-precision (16-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + off := idx[i+3:i] + dst.fp16[j] := idx[i+4] ? b.fp16[off] : a.fp16[off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed half-precision (16-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := b.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed half-precision (16-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := b.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle half-precision (16-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + id := idx[i+3:i] + dst.fp16[j] := a.fp16[id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle half-precision (16-bit) floating-point elements in "a" using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + id := idx[i+2:i] + dst.fp16[j] := a.fp16[id] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR i := 0 to 7 + dst.fp16[i] := SQRT(a.fp16[i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR i := 0 to 15 + dst.fp16[i] := SQRT(a.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + dst.fp16[i] := (1.0 / a.fp16[i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + dst.fp16[i] := (1.0 / a.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Load +
+ + + + + Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Store +
+ + + + Return vector of type __m256h with undefined elements. + AVX512_FP16 + AVX512VL +
immintrin.h
+ General Support +
+ + + + Return vector of type __m128h with undefined elements. + AVX512_FP16 + AVX512VL +
immintrin.h
+ General Support +
+ + + + Return vector of type __m256h with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Set +
+ + + + Return vector of type __m128h with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Set +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 TO 31 + dst.fp16[j] := a.fp16[j] + b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.fp16[j] := a.fp16[j] + b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] + b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := a.fp16[0] + b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] + b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] + b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] + b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] + b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := a.fp16[j] / b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := a.fp16[j] / b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] / b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] / b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] / b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := a.fp16[0] / b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] / b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] / b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 31 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 31 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 TO 31 + dst.fp16[j] := a.fp16[j] - b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.fp16[j] := a.fp16[j] - b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] - b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := a.fp16[0] - b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] - b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] - b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] - b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] - b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR i := 0 TO 31 + dst.fp16[i] := a.fp16[i] * b.fp16[i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR i := 0 TO 31 + dst.fp16[i] := a.fp16[i] * b.fp16[i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] * b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := a.fp16[0] * b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] * b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] * b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] * b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] * b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "src", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "src", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := a.fp16[0] + dst.fp16[1] := a.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := c.fp16[0] + dst.fp16[1] := c.fp16[1] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := a.fp16[0] + dst.fp16[1] := a.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := c.fp16[0] + dst.fp16[1] := c.fp16[1] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := a.fp16[0] + dst.fp16[1] := a.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := c.fp16[0] + dst.fp16[1] := c.fp16[1] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := a.fp16[0] + dst.fp16[1] := a.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := c.fp16[0] + dst.fp16[1] := c.fp16[1] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +tmp := a +FOR i := 0 to 15 + tmp.fp16[i] := tmp.fp16[i] + a.fp16[i+16] +ENDFOR +FOR i := 0 to 7 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+8] +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] + tmp.fp16[1] + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +tmp := a +FOR i := 0 to 15 + tmp.fp16[i] := tmp.fp16[i] * a.fp16[i+16] +ENDFOR +FOR i := 0 to 7 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+8] +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] * tmp.fp16[1] + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". [max_float_note] + +tmp := a +FOR i := 0 to 15 + tmp.fp16[i] := (a.fp16[i] > a.fp16[i+16] ? a.fp16[i] : a.fp16[i+16]) +ENDFOR +FOR i := 0 to 7 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] > tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". [min_float_note] + +tmp := a +FOR i := 0 to 15 + tmp.fp16[i] := (a.fp16[i] < a.fp16[i+16] ? tmp.fp16[i] : a.fp16[i+16]) +ENDFOR +FOR i := 0 to 7 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] < tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed half-precision (16-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := ABS(v2.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 31 + k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 31 + IF k1[j] + k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 31 + k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + CASE (imm8[3:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 31 + IF k1[j] + k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := (a.fp16[0] OP b.fp16[0]) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := (a.fp16[0] OP b.fp16[0]) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +RETURN ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +RETURN ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for equality, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] == b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] < b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] <= b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] > b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] >= b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for not-equal, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] ==NaN OR b.fp16[0] ==NaN OR a.fp16[0] != b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] == b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] < b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] <= b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] > b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] >= b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] ==NaN OR b.fp16[0] ==NaN OR a.fp16[0] != b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 31 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 31 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper element of "dst". + +dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper element of "dst". + [round_note] + +dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". + +IF k[0] + dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". + +IF k[0] + dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". [sae_note] + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := src.fp64[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := src.fp64[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". [sae_note] + +FOR j := 0 to 15 + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := src.fp32[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := src.fp32[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [sae_note] + +dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +ELSE + dst.fp64[0] := src.fp64[0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note] + +IF k[0] + dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +ELSE + dst.fp64[0] := src.fp64[0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +ELSE + dst.fp64[0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note] + +IF k[0] + dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +ELSE + dst.fp64[0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note] + +dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +ELSE + dst.fp32[0] := src.fp32[0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note] + +IF k[0] + dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +ELSE + dst.fp32[0] := src.fp32[0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +ELSE + dst.fp32[0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note] + +IF k[0] + dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +ELSE + dst.fp32[0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst.dword := Convert_FP16_To_Int32(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst.dword := Convert_FP16_To_Int32(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst.qword := Convert_FP16_To_Int64(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst.qword := Convert_FP16_To_Int64(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst.dword := Convert_FP16_To_Int32_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". [sae_note] + +dst.dword := Convert_FP16_To_Int32_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst.qword := Convert_FP16_To_Int64_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". [sae_note] + +dst.qword := Convert_FP16_To_Int64_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + +dst.dword := Convert_FP16_To_UInt32(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". [sae_note] + +dst.dword := Convert_FP16_To_UInt32(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + +dst.qword := Convert_FP16_To_UInt64(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". [round_note] + +dst.qword := Convert_FP16_To_UInt64(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + +dst.dword := Convert_FP16_To_UInt32_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". [sae_note] + +dst.dword := Convert_FP16_To_UInt32_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + +dst.qword := Convert_FP16_To_UInt64_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". [sae_note] + +dst.qword := Convert_FP16_To_UInt64_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy 16-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy the lower 16-bit integer in "a" to "dst". + +dst.fp16[0] := a.fp16[0] +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy the lower half-precision (16-bit) floating-point element of "a" to "dst". + +dst[15:0] := a.fp16[0] + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy the lower half-precision (16-bit) floating-point element of "a" to "dst". + +dst[15:0] := a.fp16[0] + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy the lower half-precision (16-bit) floating-point element of "a" to "dst". + +dst[15:0] := a.fp16[0] + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [sae_note][max_float_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [sae_note] [min_float_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +IF k[0] + dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +IF k[0] + dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +IF k[0] + dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +IF k[0] + dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + Load a half-precision (16-bit) floating-point element from memory into the lower element of "dst", and zero the upper elements. + +dst.fp16[0] := MEM[mem_addr].fp16[0] +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + + + Load a half-precision (16-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper elements of "dst" to zero. + +IF k[0] + dst.fp16[0] := MEM[mem_addr].fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + + Load a half-precision (16-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of "dst" to zero. + +IF k[0] + dst.fp16[0] := MEM[mem_addr].fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + + Store the lower half-precision (16-bit) floating-point element from "a" into memory. + +MEM[mem_addr].fp16[0] := a.fp16[0] + + + AVX512_FP16 +
immintrin.h
+ Store +
+ + + + + + Store the lower half-precision (16-bit) floating-point element from "a" into memory using writemask "k". + +IF k[0] + MEM[mem_addr].fp16[0] := a.fp16[0] +FI + + + AVX512_FP16 +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512_FP16 +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512_FP16 +
immintrin.h
+ Store +
+ + + + + Move the lower half-precision (16-bit) floating-point element from "b" to the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Move +
+ + + + + + + Move the lower half-precision (16-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Move +
+ + + + + + Move the lower half-precision (16-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Move +
+ + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +IF k[0] + dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +IF k[0] + dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +IF k[0] + dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +IF k[0] + dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 31 + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. [sae_note] + FOR i := 0 to 31 + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. [sae_note] + FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. [sae_note] + FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. [sae_note] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. [sae_note] + IF k[0] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. [sae_note] + IF k[0] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 31 + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + FOR i := 0 TO 31 + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + IF k[0] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + IF k[0] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +IF k[0] + dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +IF k[0] + dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +IF k[0] + dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +IF k[0] + dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR i := 0 to 31 + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR i := 0 to 31 + IF k1[i] + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) + ELSE + k[i] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Test the lower half-precision (16-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k". + [fpclass_note] + k[0] := CheckFPClass_FP16(a.fp16[0], imm8[7:0]) +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Test the lower half-precision (16-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + [fpclass_note] + IF k1[0] + k[0] := CheckFPClass_FP16(a.fp16[0], imm8[7:0]) +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle half-precision (16-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + off := idx[i+4:i] + dst.fp16[j] := idx[i+5] ? b.fp16[off] : a.fp16[off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed half-precision (16-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := b.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle half-precision (16-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + id := idx[i+4:i] + dst.fp16[j] := a.fp16[id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +dst.fp16[0] := (1.0 / SQRT(b.fp16[0])) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +IF k[0] + dst.fp16[0] := (1.0 / SQRT(b.fp16[0])) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +IF k[0] + dst.fp16[0] := (1.0 / SQRT(b.fp16[0])) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR i := 0 to 31 + dst.fp16[i] := SQRT(a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + [round_note] + +FOR i := 0 to 31 + dst.fp16[i] := SQRT(a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := SQRT(b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := SQRT(b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := SQRT(b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := SQRT(b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := SQRT(b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := SQRT(b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + dst.fp16[i] := (1.0 / a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +dst.fp16[0] := (1.0 / b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +IF k[0] + dst.fp16[0] := (1.0 / b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +IF k[0] + dst.fp16[0] := (1.0 / b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values. + +dst.fp16[0] := e0 +dst.fp16[1] := e1 +dst.fp16[2] := e2 +dst.fp16[3] := e3 +dst.fp16[4] := e4 +dst.fp16[5] := e5 +dst.fp16[6] := e6 +dst.fp16[7] := e7 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values. + +dst.fp16[0] := e0 +dst.fp16[1] := e1 +dst.fp16[2] := e2 +dst.fp16[3] := e3 +dst.fp16[4] := e4 +dst.fp16[5] := e5 +dst.fp16[6] := e6 +dst.fp16[7] := e7 +dst.fp16[8] := e8 +dst.fp16[9] := e9 +dst.fp16[10] := e10 +dst.fp16[11] := e11 +dst.fp16[12] := e12 +dst.fp16[13] := e13 +dst.fp16[14] := e14 +dst.fp16[15] := e15 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values. + +dst.fp16[0] := e0 +dst.fp16[1] := e1 +dst.fp16[2] := e2 +dst.fp16[3] := e3 +dst.fp16[4] := e4 +dst.fp16[5] := e5 +dst.fp16[6] := e6 +dst.fp16[7] := e7 +dst.fp16[8] := e8 +dst.fp16[9] := e9 +dst.fp16[10] := e10 +dst.fp16[11] := e11 +dst.fp16[12] := e12 +dst.fp16[13] := e13 +dst.fp16[14] := e14 +dst.fp16[15] := e15 +dst.fp16[16] := e16 +dst.fp16[17] := e17 +dst.fp16[18] := e18 +dst.fp16[19] := e19 +dst.fp16[20] := e20 +dst.fp16[21] := e21 +dst.fp16[22] := e22 +dst.fp16[23] := e23 +dst.fp16[24] := e24 +dst.fp16[25] := e25 +dst.fp16[26] := e26 +dst.fp16[27] := e27 +dst.fp16[28] := e28 +dst.fp16[29] := e29 +dst.fp16[30] := e30 +dst.fp16[31] := e31 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst.fp16[0] := e7 +dst.fp16[1] := e6 +dst.fp16[2] := e5 +dst.fp16[3] := e4 +dst.fp16[4] := e3 +dst.fp16[5] := e2 +dst.fp16[6] := e1 +dst.fp16[7] := e0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst.fp16[0] := e15 +dst.fp16[1] := e14 +dst.fp16[2] := e13 +dst.fp16[3] := e12 +dst.fp16[4] := e11 +dst.fp16[5] := e10 +dst.fp16[6] := e9 +dst.fp16[7] := e8 +dst.fp16[8] := e7 +dst.fp16[9] := e6 +dst.fp16[10] := e5 +dst.fp16[11] := e4 +dst.fp16[12] := e3 +dst.fp16[13] := e2 +dst.fp16[14] := e1 +dst.fp16[15] := e0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst.fp16[0] := e31 +dst.fp16[1] := e30 +dst.fp16[2] := e29 +dst.fp16[3] := e28 +dst.fp16[4] := e27 +dst.fp16[5] := e26 +dst.fp16[6] := e25 +dst.fp16[7] := e24 +dst.fp16[8] := e23 +dst.fp16[9] := e22 +dst.fp16[10] := e21 +dst.fp16[11] := e20 +dst.fp16[12] := e19 +dst.fp16[13] := e18 +dst.fp16[14] := e17 +dst.fp16[15] := e16 +dst.fp16[16] := e15 +dst.fp16[17] := e14 +dst.fp16[18] := e13 +dst.fp16[19] := e12 +dst.fp16[20] := e11 +dst.fp16[21] := e10 +dst.fp16[22] := e9 +dst.fp16[23] := e8 +dst.fp16[24] := e7 +dst.fp16[25] := e6 +dst.fp16[26] := e5 +dst.fp16[27] := e4 +dst.fp16[28] := e3 +dst.fp16[29] := e2 +dst.fp16[30] := e1 +dst.fp16[31] := e0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) floating-point value "a" to all elements of "dst". + +FOR i := 0 to 7 + dst.fp16[i] := a[15:0] +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) floating-point value "a" to all elements of "dst". + +FOR i := 0 to 15 + dst.fp16[i] := a[15:0] +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) floating-point value "a" to all elements of "dst". + +FOR i := 0 to 31 + dst.fp16[i] := a[15:0] +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) complex floating-point value "a" to all elements of "dst". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := a[15:0] + dst.fp16[2*i+1] := a[31:16] +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) complex floating-point value "a" to all elements of "dst". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := a[15:0] + dst.fp16[2*i+1] := a[31:16] +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) complex floating-point value "a" to all elements of "dst". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := a[15:0] + dst.fp16[2*i+1] := a[31:16] +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Copy half-precision (16-bit) floating-point element "a" to the lower element of "dst", and zero the upper 7 elements. + +dst.fp16[0] := a[15:0] +dst[127:16] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + Return vector of type __m512h with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Cast vector of type "__m128h" to type "__m128". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m256". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m512". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m128d". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m256d". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m512d". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m128i". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m256i". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m512i". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128d" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256d" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512d" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128i" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256i" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512i" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m256h"; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m512h"; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m512h"; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + Return vector of type __m512h with undefined elements. + AVX512_FP16 +
immintrin.h
+ General Support +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst". + +FOR i := 0 to 3 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 3 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8] + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 3 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst". + +FOR i := 0 to 1 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 1 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8] + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 1 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + id := idx[i+4:i]*8 + dst[i+7:i] := a[id+7:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + id := idx[i+4:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + id := idx[i+4:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + id := idx[i+3:i]*8 + dst[i+7:i] := a[id+7:id] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + id := idx[i+3:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + id := idx[i+3:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + off := 8*idx[i+4:i] + dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + off := 8*idx[i+4:i] + dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + off := 8*idx[i+4:i] + dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := idx[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + off := 8*idx[i+4:i] + dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + off := 8*idx[i+3:i] + dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + off := 8*idx[i+3:i] + dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + off := 8*idx[i+3:i] + dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := idx[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + off := 8*idx[i+3:i] + dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst". + +FOR i := 0 to 7 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Bit Manipulation +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 7 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8] + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Bit Manipulation +
+ + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 7 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Bit Manipulation +
+ + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + id := idx[i+5:i]*8 + dst[i+7:i] := a[id+7:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + id := idx[i+5:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + id := idx[i+5:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + off := 8*idx[i+5:i] + dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + off := 8*idx[i+5:i] + dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + off := 8*idx[i+5:i] + dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := idx[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + off := 8*idx[i+5:i] + dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst". + +FOR j := 0 to 7 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst". + +FOR j := 0 to 3 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst"). + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst"). + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst". + +FOR j := 0 to 7 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst". + +FOR j := 0 to 3 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst"). + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst"). + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + Swizzle + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 16 +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 16 +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 16 +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 16 +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 8 +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 8 +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 8 +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 8 +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + Swizzle + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 16 +m := base_addr +FOR j := 0 to 15 + i := j*16 + IF k[j] + MEM[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 16 +m := base_addr +FOR j := 0 to 7 + i := j*16 + IF k[j] + MEM[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 8 +m := base_addr +FOR j := 0 to 31 + i := j*8 + IF k[j] + MEM[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 8 +m := base_addr +FOR j := 0 to 15 + i := j*8 + IF k[j] + MEM[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst". + +FOR j := 0 to 15 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst"). + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst". + +FOR j := 0 to 15 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst"). + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + Swizzle + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Load +
+ + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 16 +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 16 +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 8 +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 8 +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + Swizzle + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 16 +m := base_addr +FOR j := 0 to 31 + i := j*16 + IF k[j] + MEM[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 8 +m := base_addr +FOR j := 0 to 63 + i := j*8 + IF k[j] + MEM[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 +
immintrin.h
+ Store +
+ + + + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 15 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 15 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 15 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 15 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + + + Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+15:k1] := 0 +MEM[k2+15:k2] := 0 +FOR i := 0 TO 15 + FOR j := 0 TO 15 + match := (a.dword[i] == b.dword[j] ? 1 : 0) + MEM[k1+15:k1].bit[i] |= match + MEM[k2+15:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512F +
immintrin.h
+ Mask +
+ + + + + + + Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 7 + FOR j := 0 TO 7 + match := (a.qword[i] == b.qword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512F +
immintrin.h
+ Mask +
+ + + + + + + + + Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 3 + FOR j := 0 TO 3 + match := (a.dword[i] == b.dword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512VL +
immintrin.h
+ Mask +
+ + + + + + + Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 7 + FOR j := 0 TO 7 + match := (a.dword[i] == b.dword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512VL +
immintrin.h
+ Mask +
+ + + + + + + Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 1 + FOR j := 0 TO 1 + match := (a.qword[i] == b.qword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512VL +
immintrin.h
+ Mask +
+ + + + + + + Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 3 + FOR j := 0 TO 3 + match := (a.qword[i] == b.qword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512VL +
immintrin.h
+ Mask +
+ + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + + Convert scalar BF16 (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +b := Convert_BF16_To_FP32(MEM[__A+15:__A]) +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := b +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +b := Convert_FP16_To_FP32(MEM[__A+15:__A]) +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := b +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+15:__A+m]) +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+15:__A+m]) +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+31:__A+m+16]) +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+31:__A+m+16]) +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert scalar BF16 (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +b := Convert_BF16_To_FP32(MEM[__A+15:__A]) +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := b +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +b := Convert_FP16_To_FP32(MEM[__A+15:__A]) +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := b +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+15:__A+m]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+15:__A+m]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+31:__A+m+16]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+31:__A+m+16]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) + tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) + tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) + tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) + tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) + tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) + tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) + tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) + tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) + tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) + tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) + tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) + tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) + tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) + tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) + tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with unsigned saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) + tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) + tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) + tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) + dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) + tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) + tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) + tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) + tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) + tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) + tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) + tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) + tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) + tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) + tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) + tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) + tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) + tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) + tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) + tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with unsigned saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) + tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) + tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) + tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) + dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + + + + + Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start". + +tmp[511:0] := a +dst[31:0] := ZeroExtend32(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control". + +start := control[7:0] +len := control[15:8] +tmp[511:0] := a +dst[31:0] := ZeroExtend32(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + + Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start". + +tmp[511:0] := a +dst[63:0] := ZeroExtend64(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control".. + +start := control[7:0] +len := control[15:8] +tmp[511:0] := a +dst[63:0] := ZeroExtend64(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Extract the lowest set bit from unsigned 32-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a". + +dst := (-a) AND a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Extract the lowest set bit from unsigned 64-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a". + +dst := (-a) AND a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 32-bit integer "a". + +dst := (a - 1) XOR a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 64-bit integer "a". + +dst := (a - 1) XOR a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a". + +dst := (a - 1) AND a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a". + +dst := (a - 1) AND a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + Compute the bitwise NOT of 32-bit integer "a" and then AND with b, and store the results in dst. + +dst[31:0] := ((NOT a[31:0]) AND b[31:0]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + Compute the bitwise NOT of 64-bit integer "a" and then AND with b, and store the results in dst. + +dst[63:0] := ((NOT a[63:0]) AND b[63:0]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 16-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 16) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 32) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 64) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 32) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 64) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + + + Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index". + +n := index[7:0] +dst := a +IF (n < 32) + dst[31:n] := 0 +FI + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index". + +n := index[7:0] +dst := a +IF (n < 64) + dst[63:n] := 0 +FI + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Deposit contiguous low bits from unsigned 32-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero. + +tmp := a +dst := 0 +m := 0 +k := 0 +DO WHILE m < 32 + IF mask[m] == 1 + dst[m] := tmp[k] + k := k + 1 + FI + m := m + 1 +OD + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Deposit contiguous low bits from unsigned 64-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero. + +tmp := a +dst := 0 +m := 0 +k := 0 +DO WHILE m < 64 + IF mask[m] == 1 + dst[m] := tmp[k] + k := k + 1 + FI + m := m + 1 +OD + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Extract bits from unsigned 32-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero. + +tmp := a +dst := 0 +m := 0 +k := 0 +DO WHILE m < 32 + IF mask[m] == 1 + dst[k] := tmp[m] + k := k + 1 + FI + m := m + 1 +OD + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Extract bits from unsigned 64-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero. + +tmp := a +dst := 0 +m := 0 +k := 0 +DO WHILE m < 64 + IF mask[m] == 1 + dst[k] := tmp[m] + k := k + 1 + FI + m := m + 1 +OD + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + + Multiply unsigned 32-bit integers "a" and "b", store the low 32-bits of the result in "dst", and store the high 32-bits in "hi". This does not read or write arithmetic flags. + +dst[31:0] := (a * b)[31:0] +MEM[hi+31:hi] := (a * b)[63:32] + + + BMI2 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply unsigned 64-bit integers "a" and "b", store the low 64-bits of the result in "dst", and store the high 64-bits in "hi". This does not read or write arithmetic flags. + +dst[63:0] := (a * b)[63:0] +MEM[hi+63:hi] := (a * b)[127:64] + + + BMI2 +
immintrin.h
+ Arithmetic +
+ + + + + + Increment the shadow stack pointer by 4 times the value specified in bits [7:0] of "a". + +SSP := SSP + a[7:0] * 4 + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Increment the shadow stack pointer by 8 times the value specified in bits [7:0] of "a". + +SSP := SSP + a[7:0] * 8 + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Read the low 32-bits of the current shadow stack pointer, and store the result in "dst". + dst := SSP[31:0] + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Read the current shadow stack pointer, and store the result in "dst". + dst := SSP[63:0] + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Save the previous shadow stack pointer context. + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Restore the saved shadow stack pointer from the shadow stack restore token previously created on shadow stack by saveprevssp. + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Write 32-bit value in "val" to a shadow stack page in memory specified by "p". + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Write 64-bit value in "val" to a shadow stack page in memory specified by "p". + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Write 32-bit value in "val" to a user shadow stack page in memory specified by "p". + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Write 64-bit value in "val" to a user shadow stack page in memory specified by "p". + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Mark shadow stack pointed to by IA32_PL0_SSP as busy. + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Mark shadow stack pointed to by "p" as not busy. + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + If CET is enabled, read the low 32-bits of the current shadow stack pointer, and store the result in "dst". Otherwise return 0. + dst := SSP[31:0] + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + If CET is enabled, read the current shadow stack pointer, and store the result in "dst". Otherwise return 0. + dst := SSP[63:0] + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Increment the shadow stack pointer by 4 times the value specified in bits [7:0] of "a". + +SSP := SSP + a[7:0] * 4 + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Hint to hardware that the cache line that contains "p" should be demoted from the cache closest to the processor core to a level more distant from the processor core. + + CLDEMOTE +
immintrin.h
+ Miscellaneous +
+ + + + + + Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy. + + CLFLUSHOPT +
immintrin.h
+ General Support +
+ + + + + + Write back to memory the cache line that contains "p" from any level of the cache hierarchy in the cache coherence domain. + + CLWB +
immintrin.h
+ General Support +
+ + + + + + + + + Compares the value from the memory "__A" with the value of "__B". If the specified condition "__D" is met, then add the third operand "__C" to the "__A" and write it into "__A", else the value of "__A" is unchanged. The return value is the original value of "__A". + CASE (__D[3:0]) OF +0: OP := _CMPCCX_O +1: OP := _CMPCCX_NO +2: OP := _CMPCCX_B +3: OP := _CMPCCX_NB +4: OP := _CMPCCX_Z +5: OP := _CMPCCX_NZ +6: OP := _CMPCCX_BE +7: OP := _CMPCCX_NBE +8: OP := _CMPCCX_S +9: OP := _CMPCCX_NS +10: OP := _CMPCCX_P +11: OP := _CMPCCX_NP +12: OP := _CMPCCX_L +13: OP := _CMPCCX_NL +14: OP := _CMPCCX_LE +15: OP := _CMPCCX_NLE +ESAC +tmp1 := LOAD_LOCK(__A) +tmp2 := tmp1 + __C +IF (tmp1[31:0] OP __B[31:0]) + STORE_UNLOCK(__A, tmp2) +ELSE + STORE_UNLOCK(__A, tmp1) +FI +dst[31:0] := tmp1[31:0] + + + + + + + + + + + + + + + + + + CMPCCXADD +
immintrin.h
+ Arithmetic +
+ + + + + + + Compares the value from the memory "__A" with the value of "__B". If the specified condition "__D" is met, then add the third operand "__C" to the "__A" and write it into "__A", else the value of "__A" is unchanged. The return value is the original value of "__A". + CASE (__D[3:0]) OF +0: OP := _CMPCCX_O +1: OP := _CMPCCX_NO +2: OP := _CMPCCX_B +3: OP := _CMPCCX_NB +4: OP := _CMPCCX_Z +5: OP := _CMPCCX_NZ +6: OP := _CMPCCX_BE +7: OP := _CMPCCX_NBE +8: OP := _CMPCCX_S +9: OP := _CMPCCX_NS +10: OP := _CMPCCX_P +11: OP := _CMPCCX_NP +12: OP := _CMPCCX_L +13: OP := _CMPCCX_NL +14: OP := _CMPCCX_LE +15: OP := _CMPCCX_NLE +ESAC +tmp1 := LOAD_LOCK(__A) +tmp2 := tmp1 + __C +IF (tmp1[63:0] OP __B[63:0]) + STORE_UNLOCK(__A, tmp2) +ELSE + STORE_UNLOCK(__A, tmp1) +FI +dst[63:0] := tmp1[63:0] + + + + + + + + + + + + + + + + + + CMPCCXADD +
immintrin.h
+ Arithmetic +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 8-bit integer "v", and stores the result in "dst". + tmp1[7:0] := v[0:7] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[39:0] := tmp1[7:0] << 32 +tmp4[39:0] := tmp2[31:0] << 8 +tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0] +tmp6[31:0] := MOD2(tmp5[39:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + CRC32 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 16-bit integer "v", and stores the result in "dst". + tmp1[15:0] := v[0:15] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[47:0] := tmp1[15:0] << 32 +tmp4[47:0] := tmp2[31:0] << 16 +tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0] +tmp6[31:0] := MOD2(tmp5[47:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + CRC32 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 32-bit integer "v", and stores the result in "dst". + tmp1[31:0] := v[0:31] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[63:0] := tmp1[31:0] << 32 +tmp4[63:0] := tmp2[31:0] << 32 +tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0] +tmp6[31:0] := MOD2(tmp5[63:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + CRC32 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 64-bit integer "v", and stores the result in "dst". + tmp1[63:0] := v[0:63] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[95:0] := tmp1[31:0] << 32 +tmp4[95:0] := tmp2[63:0] << 64 +tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0] +tmp6[31:0] := MOD2(tmp5[95:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + CRC32 +
nmmintrin.h
+ Cryptography +
+ + + + + + + Reads 64-byte command pointed by "__src", formats 64-byte enqueue store data, and performs 64-byte enqueue store to memory pointed by "__dst". This intrinsics may only be used in User mode. + + ENQCMD +
immintrin.h
+ Unknown +
+ + + + + Reads 64-byte command pointed by "__src", formats 64-byte enqueue store data, and performs 64-byte enqueue store to memory pointed by "__dst" This intrinsic may only be used in Privileged mode. + + ENQCMD +
immintrin.h
+ Unknown +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:256] := 0 + + + F16C +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + F16C +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:128] := 0 + + + F16C +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) +ENDFOR +dst[MAX:64] := 0 + + + F16C +
immintrin.h
+ Convert +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + Read the FS segment base register and store the 32-bit result in "dst". + dst[31:0] := FS_Segment_Base_Register +dst[63:32] := 0 + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + Read the FS segment base register and store the 64-bit result in "dst". + dst[63:0] := FS_Segment_Base_Register + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + Read the GS segment base register and store the 32-bit result in "dst". + dst[31:0] := GS_Segment_Base_Register +dst[63:32] := 0 + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + Read the GS segment base register and store the 64-bit result in "dst". + dst[63:0] := GS_Segment_Base_Register + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + Write the unsigned 32-bit integer "a" to the FS segment base register. + +FS_Segment_Base_Register[31:0] := a[31:0] +FS_Segment_Base_Register[63:32] := 0 + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + Write the unsigned 64-bit integer "a" to the FS segment base register. + +FS_Segment_Base_Register[63:0] := a[63:0] + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + Write the unsigned 32-bit integer "a" to the GS segment base register. + +GS_Segment_Base_Register[31:0] := a[31:0] +GS_Segment_Base_Register[63:32] := 0 + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + Write the unsigned 64-bit integer "a" to the GS segment base register. + +GS_Segment_Base_Register[63:0] := a[63:0] + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + + + Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary. + state_x87_fpu_mmx_sse := fxrstor(MEM[mem_addr+512*8:mem_addr]) + + + FXSR +
immintrin.h
+ OS-Targeted +
+ + + + Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE64 instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary. + state_x87_fpu_mmx_sse := fxrstor64(MEM[mem_addr+512*8:mem_addr]) + + + FXSR +
immintrin.h
+ OS-Targeted +
+ + + + Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor. + MEM[mem_addr+512*8:mem_addr] := fxsave(state_x87_fpu_mmx_sse) + + + FXSR +
immintrin.h
+ OS-Targeted +
+ + + + Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor. + MEM[mem_addr+512*8:mem_addr] := fxsave64(state_x87_fpu_mmx_sse) + + + FXSR +
immintrin.h
+ OS-Targeted +
+ + + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 63 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 63 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := src.byte[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 63 + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst". + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[b] + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst". + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 31 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 31 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := src.byte[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 31 + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 15 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 15 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := src.byte[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 15 + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst". + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst". + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst". + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst". + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Provides a hint to the processor to selectively reset the prediction history of the current logical processor specified by a signed 32-bit integer "__eax". + + HRESET +
immintrin.h
+ General Support +
+ + + + + + Invalidate mappings in the Translation Lookaside Buffers (TLBs) and paging-structure caches for the processor context identifier (PCID) specified by "descriptor" based on the invalidation type specified in "type". + The PCID "descriptor" is specified as a 16-byte memory operand (with no alignment restrictions) where bits [11:0] specify the PCID, and bits [127:64] specify the linear address; bits [63:12] are reserved. + The types supported are: + 0) Individual-address invalidation: If "type" is 0, the logical processor invalidates mappings for a single linear address and tagged with the PCID specified in "descriptor", except global translations. The instruction may also invalidate global translations, mappings for other linear addresses, or mappings tagged with other PCIDs. + 1) Single-context invalidation: If "type" is 1, the logical processor invalidates all mappings tagged with the PCID specified in "descriptor" except global translations. In some cases, it may invalidate mappings for other PCIDs as well. + 2) All-context invalidation: If "type" is 2, the logical processor invalidates all mappings tagged with any PCID. + 3) All-context invalidation, retaining global translations: If "type" is 3, the logical processor invalidates all mappings tagged with any PCID except global translations, ignoring "descriptor". The instruction may also invalidate global translations as well. + +CASE type[1:0] OF +0: // individual-address invalidation retaining global translations + OP_PCID := MEM[descriptor+11:descriptor] + ADDR := MEM[descriptor+127:descriptor+64] + BREAK +1: // single PCID invalidation retaining globals + OP_PCID := MEM[descriptor+11:descriptor] + // invalidate all mappings tagged with OP_PCID except global translations + BREAK +2: // all PCID invalidation + // invalidate all mappings tagged with any PCID + BREAK +3: // all PCID invalidation retaining global translations + // invalidate all mappings tagged with any PCID except global translations + BREAK +ESAC + + + INVPCID +
immintrin.h
+ OS-Targeted +
+ + + + Flag + + + + + Decrypt 10 rounds of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + MEM[__odata+127:__odata] := AES128Decrypt (__idata[127:0], __h[383:0]) +dst := ZF + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Decrypt 10 rounds of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + MEM[__odata+127:__odata] := AES256Decrypt (__idata[127:0], __h[511:0]) +dst := ZF + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Encrypt 10 rounds of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. + MEM[__odata+127:__odata] := AES128Encrypt (__idata[127:0], __h[383:0]) +dst := ZF + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Encrypt 10 rounds of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + MEM[__odata+127:__odata] := AES256Encrypt (__idata[127:0], __h[511:0]) +dst := ZF + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Wrap a 128-bit AES key from "__key" into a 384-bit key __h stored in "__h" and set IWKey's NoBackup and KeySource bits in "dst". The explicit source operand "__htype" specifies __h restrictions. + __h[383:0] := WrapKey128(__key[127:0], __htype) +dst[0] := IWKey.NoBackup +dst[4:1] := IWKey.KeySource[3:0] + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + + Wrap a 256-bit AES key from "__key_hi" and "__key_lo" into a 512-bit key stored in "__h" and set IWKey's NoBackup and KeySource bits in "dst". The 32-bit "__htype" specifies __h restrictions. + __h[511:0] := WrapKey256(__key_lo[127:0], __key_hi[127:0], __htype) +dst[0] := IWKey.NoBackup +dst[4:1] := IWKey.KeySource[3:0] + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + + Load internal wrapping key (IWKey). The 32-bit unsigned integer "__ctl" specifies IWKey's KeySource and whether backing up the key is permitted. IWKey's 256-bit encryption key is loaded from "__enkey_lo" and "__enkey_hi". IWKey's 128-bit integrity key is loaded from "__intkey". + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Decrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + FOR i := 0 to 7 + __odata[i] := AES128Decrypt (__idata[i], __h[383:0]) +ENDFOR +dst := ZF + + + KEYLOCKER_WIDE +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Decrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + FOR i := 0 to 7 + __odata[i] := AES256Decrypt (__idata[i], __h[511:0]) +ENDFOR +dst := ZF + + + KEYLOCKER_WIDE +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Encrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + FOR i := 0 to 7 + __odata[i] := AES128Encrypt (__idata[i], __h[383:0]) +ENDFOR +dst := ZF + + + KEYLOCKER_WIDE +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Encrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + FOR i := 0 to 7 + __odata[i] := AES256Encrypt (__idata[i], __h[512:0]) +ENDFOR +dst := ZF + + + KEYLOCKER_WIDE +
immintrin.h
+ Cryptography +
+ + + + + Count the number of leading zero bits in unsigned 32-bit integer "a", and return that count in "dst". + +tmp := 31 +dst := 0 +DO WHILE (tmp >= 0 AND a[tmp] == 0) + tmp := tmp - 1 + dst := dst + 1 +OD + + + LZCNT +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of leading zero bits in unsigned 64-bit integer "a", and return that count in "dst". + +tmp := 63 +dst := 0 +DO WHILE (tmp >= 0 AND a[tmp] == 0) + tmp := tmp - 1 + dst := dst + 1 +OD + + + LZCNT +
immintrin.h
+ Bit Manipulation +
+ + + + + + Copy 64-bit integer "a" to "dst". + +dst[63:0] := a[63:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to "dst". + +dst[63:0] := a[63:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst". + +dst[31:0] := a[31:0] +dst[63:32] := 0 + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst". + +dst[31:0] := a[31:0] +dst[63:32] := 0 + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to "dst". + +dst[63:0] := a[63:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to "dst". + +dst[63:0] := a[63:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures. + + MMX +
mmintrin.h
+ General Support +
+ + + + Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures. + + MMX +
mmintrin.h
+ General Support +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(b[15:0]) +dst[47:40] := Saturate8(b[31:16]) +dst[55:48] := Saturate8(b[47:32]) +dst[63:56] := Saturate8(b[63:48]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(b[31:0]) +dst[63:48] := Saturate16(b[63:32]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(b[15:0]) +dst[47:40] := SaturateU8(b[31:16]) +dst[55:48] := SaturateU8(b[47:32]) +dst[63:56] := SaturateU8(b[63:48]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(b[15:0]) +dst[47:40] := Saturate8(b[31:16]) +dst[55:48] := Saturate8(b[47:32]) +dst[63:56] := Saturate8(b[63:48]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(b[31:0]) +dst[63:48] := Saturate16(b[63:32]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(b[15:0]) +dst[47:40] := SaturateU8(b[31:16]) +dst[55:48] := SaturateU8(b[47:32]) +dst[63:56] := SaturateU8(b[63:48]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]) { + dst[7:0] := src1[39:32] + dst[15:8] := src2[39:32] + dst[23:16] := src1[47:40] + dst[31:24] := src2[47:40] + dst[39:32] := src1[55:48] + dst[47:40] := src2[55:48] + dst[55:48] := src1[63:56] + dst[63:56] := src2[63:56] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]) { + dst[15:0] := src1[47:32] + dst[31:16] := src2[47:32] + dst[47:32] := src1[63:48] + dst[63:48] := src2[63:48] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := b[63:32] + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[63:0], src2[63:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[63:0], src2[63:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := b[31:0] + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]) { + dst[7:0] := src1[39:32] + dst[15:8] := src2[39:32] + dst[23:16] := src1[47:40] + dst[31:24] := src2[47:40] + dst[39:32] := src1[55:48] + dst[47:40] := src2[55:48] + dst[55:48] := src1[63:56] + dst[63:56] := src2[63:56] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]) { + dst[15:0] := src1[47:32] + dst[31:16] := src2[47:32] + dst[47:32] := src1[63:48] + dst[63:48] := src2[63:48] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := b[63:32] + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[63:0], src2[63:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[63:0], src2[63:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := b[31:0] + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[15:0] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[15:0] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". + +IF count[63:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] << count[63:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". + +IF imm8[7:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] << imm8[7:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". + +IF count[63:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] >> count[63:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". + +IF imm8[7:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] >> imm8[7:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". + +IF count[63:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] << count[63:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". + +IF imm8[7:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] << imm8[7:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". + +IF count[63:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] >> count[63:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". + +IF imm8[7:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] >> imm8[7:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] AND b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[63:0] := ((NOT a[63:0]) AND b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] OR b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] XOR b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] AND b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[63:0] := ((NOT a[63:0]) AND b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] OR b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] XOR b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + Return vector of type __m64 with all elements set to zero. + +dst[MAX:0] := 0 + + + MMX +
mmintrin.h
+ Set +
+ + + + + Set packed 32-bit integers in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + Set packed 16-bit integers in "dst" with the supplied values. + +dst[15:0] := e0 +dst[31:16] := e1 +dst[47:32] := e2 +dst[63:48] := e3 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values. + +dst[7:0] := e0 +dst[15:8] := e1 +dst[23:16] := e2 +dst[31:24] := e3 +dst[39:32] := e4 +dst[47:40] := e5 +dst[55:48] := e6 +dst[63:56] := e7 + + MMX +
mmintrin.h
+ Set +
+ + + + Broadcast 32-bit integer "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR + + MMX +
mmintrin.h
+ Set +
+ + + + Broadcast 16-bit integer "a" to all all elements of "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR + + MMX +
mmintrin.h
+ Set +
+ + + + Broadcast 8-bit integer "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR + + MMX +
mmintrin.h
+ Set +
+ + + + + Set packed 32-bit integers in "dst" with the supplied values in reverse order. + +dst[31:0] := e1 +dst[63:32] := e0 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + Set packed 16-bit integers in "dst" with the supplied values in reverse order. + +dst[15:0] := e3 +dst[31:16] := e2 +dst[47:32] := e1 +dst[63:48] := e0 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values in reverse order. + +dst[7:0] := e7 +dst[15:8] := e6 +dst[23:16] := e5 +dst[31:24] := e4 +dst[39:32] := e3 +dst[47:40] := e2 +dst[55:48] := e1 +dst[63:56] := e0 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + + Arm address monitoring hardware using the address specified in "p". A store to an address within the specified address range triggers the monitoring hardware. Specify optional extensions in "extensions", and optional hints in "hints". + + MONITOR +
pmmintrin.h
+ General Support +
+ + + + + Hint to the processor that it can enter an implementation-dependent-optimized state while waiting for an event or store operation to the address range specified by MONITOR. + + MONITOR +
pmmintrin.h
+ General Support +
+ + + + + + Load 16 bits from memory, perform a byte swap operation, and store the result in "dst". + +FOR j := 0 to 1 + i := j*8 + dst[i+7:i] := MEM[ptr+15-i:ptr+8-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Load +
+ + + + Load 32 bits from memory, perform a byte swap operation, and store the result in "dst". + +FOR j := 0 to 3 + i := j*8 + dst[i+7:i] := MEM[ptr+31-i:ptr+24-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Load +
+ + + + Load 64 bits from memory, perform a byte swap operation, and store the result in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MEM[ptr+63-i:ptr+56-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Load +
+ + + + + Perform a bit swap operation of the 16 bits in "data", and store the results to memory. + +FOR j := 0 to 1 + i := j*8 + MEM[ptr+i+7:ptr+i] := data[15-i:8-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Store +
+ + + + + Perform a bit swap operation of the 32 bits in "data", and store the results to memory. + +addr := MEM[ptr] +FOR j := 0 to 3 + i := j*8 + MEM[ptr+i+7:ptr+i] := data[31-i:24-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Store +
+ + + + + Perform a bit swap operation of the 64 bits in "data", and store the results to memory. + +addr := MEM[ptr] +FOR j := 0 to 7 + i := j*8 + MEM[ptr+i+7:ptr+i] := data[63-i:56-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Store +
+ + + + + + + Move 64-byte (512-bit) value using direct store from source memory address "src" to destination memory address "dst". + +MEM[dst+511:dst] := MEM[src+511:src] + + + MOVDIR64B +
immintrin.h
+ Store +
+ + + + + + + Store 64-bit integer from "val" into memory using direct store. + +MEM[dst+63:dst] := val[63:0] + + + MOVDIRI +
immintrin.h
+ Store +
+ + + + + Store 32-bit integer from "val" into memory using direct store. + +MEM[dst+31:dst] := val[31:0] + + + MOVDIRI +
immintrin.h
+ Store +
+ + + + + + + Make a pointer with the value of "srcmem" and bounds set to ["srcmem", "srcmem" + "size" - 1], and store the result in "dst". + dst := srcmem +dst.LB := srcmem.LB +dst.UB := srcmem + size - 1 + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + + Narrow the bounds for pointer "q" to the intersection of the bounds of "r" and the bounds ["q", "q" + "size" - 1], and store the result in "dst". + dst := q +IF r.LB > (q + size - 1) OR r.UB < q + dst.LB := 1 + dst.UB := 0 +ELSE + dst.LB := MAX(r.LB, q) + dst.UB := MIN(r.UB, (q + size - 1)) +FI + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + Make a pointer with the value of "q" and bounds set to the bounds of "r" (e.g. copy the bounds of "r" to pointer "q"), and store the result in "dst". + dst := q +dst.LB := r.LB +dst.UB := r.UB + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Make a pointer with the value of "q" and open bounds, which allow the pointer to access the entire virtual address space, and store the result in "dst". + dst := q +dst.LB := 0 +dst.UB := 0 + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + Stores the bounds of "ptr_val" pointer in memory at address "ptr_addr". + MEM[ptr_addr].LB := ptr_val.LB +MEM[ptr_addr].UB := ptr_val.UB + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Checks if "q" is within its lower bound, and throws a #BR if not. + IF q < q.LB + #BR +FI + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Checks if "q" is within its upper bound, and throws a #BR if not. + IF q > q.UB + #BR +FI + + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + Checks if ["q", "q" + "size" - 1] is within the lower and upper bounds of "q" and throws a #BR if not. + IF (q + size - 1) < q.LB OR (q + size - 1) > q.UB + #BR +FI + + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Return the lower bound of "q". + dst := q.LB + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Return the upper bound of "q". + dst := q.UB + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + Set "dst" to the index of the lowest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined. + +tmp := 0 +IF a == 0 + // dst is undefined +ELSE + DO WHILE ((tmp < 32) AND a[tmp] == 0) + tmp := tmp + 1 + OD +FI +dst := tmp + + +
immintrin.h
+ Bit Manipulation +
+ + + + Set "dst" to the index of the highest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined. + +tmp := 31 +IF a == 0 + // dst is undefined +ELSE + DO WHILE ((tmp > 0) AND a[tmp] == 0) + tmp := tmp - 1 + OD +FI +dst := tmp + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. + +tmp := 0 +IF a == 0 + // MEM[index+31:index] is undefined + dst := 0 +ELSE + DO WHILE ((tmp < 32) AND a[tmp] == 0) + tmp := tmp + 1 + OD + MEM[index+31:index] := tmp + dst := (tmp == 31) ? 0 : 1 +FI + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. + +tmp := 31 +IF a == 0 + // MEM[index+31:index] is undefined + dst := 0 +ELSE + DO WHILE ((tmp > 0) AND a[tmp] == 0) + tmp := tmp - 1 + OD + MEM[index+31:index] := tmp + dst := (tmp == 0) ? 0 : 1 +FI + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. + +tmp := 0 +IF a == 0 + // MEM[index+31:index] is undefined + dst := 0 +ELSE + DO WHILE ((tmp < 64) AND a[tmp] == 0) + tmp := tmp + 1 + OD + MEM[index+31:index] := tmp + dst := (tmp == 63) ? 0 : 1 +FI + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. + +tmp := 63 +IF a == 0 + // MEM[index+31:index] is undefined + dst := 0 +ELSE + DO WHILE ((tmp > 0) AND a[tmp] == 0) + tmp := tmp - 1 + OD + MEM[index+31:index] := tmp + dst := (tmp == 0) ? 0 : 1 +FI + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 32-bit integer "a". + +addr := a + ZeroExtend64(b) +dst[0] := MEM[addr] + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 32-bit integer "a", and set that bit to its complement. + +addr := a + ZeroExtend64(b) +dst[0] := MEM[addr] +MEM[addr] := ~dst[0] + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 32-bit integer "a", and set that bit to zero. + +addr := a + ZeroExtend64(b) +dst[0] := MEM[addr] +MEM[addr] := 0 + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 32-bit integer "a", and set that bit to one. + +addr := a + ZeroExtend64(b) +dst[0] := MEM[addr] +MEM[addr] := 1 + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 64-bit integer "a". + +addr := a + b +dst[0] := MEM[addr] + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 64-bit integer "a", and set that bit to its complement. + +addr := a + b +dst[0] := MEM[addr] +MEM[addr] := ~dst[0] + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 64-bit integer "a", and set that bit to zero. + +addr := a + b +dst[0] := MEM[addr] +MEM[addr] := 0 + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 64-bit integer "a", and set that bit to one. + +addr := a + b +dst[0] := MEM[addr] +MEM[addr] := 1 + + +
immintrin.h
+ Bit Manipulation +
+ + + + Reverse the byte order of 32-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values. + +dst[7:0] := a[31:24] +dst[15:8] := a[23:16] +dst[23:16] := a[15:8] +dst[31:24] := a[7:0] + + +
immintrin.h
+ Bit Manipulation +
+ + + + Reverse the byte order of 64-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values. + +dst[7:0] := a[63:56] +dst[15:8] := a[55:48] +dst[23:16] := a[47:40] +dst[31:24] := a[39:32] +dst[39:32] := a[31:24] +dst[47:40] := a[23:16] +dst[55:48] := a[15:8] +dst[63:56] := a[7:0] + + +
immintrin.h
+ Bit Manipulation +
+ + + + Cast from type float to type unsigned __int32 without conversion. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +
immintrin.h
+ Cast +
+ + + + Cast from type double to type unsigned __int64 without conversion. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +
immintrin.h
+ Cast +
+ + + + Cast from type unsigned __int32 to type float without conversion. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +
immintrin.h
+ Cast +
+ + + + Cast from type unsigned __int64 to type double without conversion. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +
immintrin.h
+ Cast +
+ + + + + Shift the bits of unsigned long integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". + // size := 32 or 64 +dst := a +count := shift AND (size - 1) +DO WHILE (count > 0) + tmp[0] := dst[size - 1] + dst := (dst << 1) OR tmp[0] + count := count - 1 +OD + + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned long integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". + // size := 32 or 64 +dst := a +count := shift AND (size - 1) +DO WHILE (count > 0) + tmp[size - 1] := dst[0] + dst := (dst >> 1) OR tmp[size - 1] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 32-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 31 +DO WHILE (count > 0) + tmp[0] := dst[31] + dst := (dst << 1) OR tmp[0] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 32-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 31 +DO WHILE (count > 0) + tmp[31] := dst[0] + dst := (dst >> 1) OR tmp + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 16-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 15 +DO WHILE (count > 0) + tmp[0] := dst[15] + dst := (dst << 1) OR tmp[0] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 16-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 15 +DO WHILE (count > 0) + tmp[15] := dst[0] + dst := (dst >> 1) OR tmp + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 64-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 63 +DO WHILE (count > 0) + tmp[0] := dst[63] + dst := (dst << 1) OR tmp[0] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 64-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 63 +DO WHILE (count > 0) + tmp[63] := dst[0] + dst := (dst >> 1) OR tmp[63] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + Treat the processor-specific feature(s) specified in "a" as available. Multiple features may be OR'd together. See the valid feature flags below: + +_FEATURE_GENERIC_IA32 +_FEATURE_FPU +_FEATURE_CMOV +_FEATURE_MMX +_FEATURE_FXSAVE +_FEATURE_SSE +_FEATURE_SSE2 +_FEATURE_SSE3 +_FEATURE_SSSE3 +_FEATURE_SSE4_1 +_FEATURE_SSE4_2 +_FEATURE_MOVBE +_FEATURE_POPCNT +_FEATURE_PCLMULQDQ +_FEATURE_AES +_FEATURE_F16C +_FEATURE_AVX +_FEATURE_RDRND +_FEATURE_FMA +_FEATURE_BMI +_FEATURE_LZCNT +_FEATURE_HLE +_FEATURE_RTM +_FEATURE_AVX2 +_FEATURE_KNCNI +_FEATURE_AVX512F +_FEATURE_ADX +_FEATURE_RDSEED +_FEATURE_AVX512ER +_FEATURE_AVX512PF +_FEATURE_AVX512CD +_FEATURE_SHA +_FEATURE_MPX +_FEATURE_AVX512BW +_FEATURE_AVX512VL +_FEATURE_AVX512VBMI +_FEATURE_AVX512_4FMAPS +_FEATURE_AVX512_4VNNIW +_FEATURE_AVX512_VPOPCNTDQ +_FEATURE_AVX512_BITALG +_FEATURE_AVX512_VBMI2 +_FEATURE_GFNI +_FEATURE_VAES +_FEATURE_VPCLMULQDQ +_FEATURE_AVX512_VNNI +_FEATURE_CLWB +_FEATURE_RDPID +_FEATURE_IBT +_FEATURE_SHSTK +_FEATURE_SGX +_FEATURE_WBNOINVD +_FEATURE_PCONFIG +_FEATURE_AXV512_4VNNIB +_FEATURE_AXV512_4FMAPH +_FEATURE_AXV512_BITALG2 +_FEATURE_AXV512_VP2INTERSECT + +
immintrin.h
+ General Support +
+ + + + Dynamically query the processor to determine if the processor-specific feature(s) specified in "a" are available, and return true or false (1 or 0) if the set of features is available. Multiple features may be OR'd together. This function is limited to bitmask values in the first 'page' of the libirc cpu-id information. This intrinsic does not check the processor vendor. See the valid feature flags below: + +_FEATURE_GENERIC_IA32 +_FEATURE_FPU +_FEATURE_CMOV +_FEATURE_MMX +_FEATURE_FXSAVE +_FEATURE_SSE +_FEATURE_SSE2 +_FEATURE_SSE3 +_FEATURE_SSSE3 +_FEATURE_SSE4_1 +_FEATURE_SSE4_2 +_FEATURE_MOVBE +_FEATURE_POPCNT +_FEATURE_PCLMULQDQ +_FEATURE_AES +_FEATURE_F16C +_FEATURE_AVX +_FEATURE_RDRND +_FEATURE_FMA +_FEATURE_BMI +_FEATURE_LZCNT +_FEATURE_HLE +_FEATURE_RTM +_FEATURE_AVX2 +_FEATURE_KNCNI +_FEATURE_AVX512F +_FEATURE_ADX +_FEATURE_RDSEED +_FEATURE_AVX512ER +_FEATURE_AVX512PF +_FEATURE_AVX512CD +_FEATURE_SHA +_FEATURE_MPX +_FEATURE_AVX512BW +_FEATURE_AVX512VL +_FEATURE_AVX512VBMI +_FEATURE_AVX512_4FMAPS +_FEATURE_AVX512_4VNNIW +_FEATURE_AVX512_VPOPCNTDQ +_FEATURE_AVX512_BITALG +_FEATURE_AVX512_VBMI2 +_FEATURE_GFNI +_FEATURE_VAES +_FEATURE_VPCLMULQDQ +_FEATURE_AVX512_VNNI +_FEATURE_CLWB +_FEATURE_RDPID +_FEATURE_IBT +_FEATURE_SHSTK +_FEATURE_SGX +_FEATURE_WBNOINVD +_FEATURE_PCONFIG +_FEATURE_AXV512_4VNNIB +_FEATURE_AXV512_4FMAPH +_FEATURE_AXV512_BITALG2 +_FEATURE_AXV512_VP2INTERSECT +_FEATURE_AXV512_FP16 + +
immintrin.h
+ General Support +
+ + + + + Dynamically query the processor to determine if the processor-specific feature(s) specified in "a" are available, and return true or false (1 or 0) if the set of features is available. Multiple features may be OR'd together. This works identically to the previous variant, except it also accepts a 'page' index that permits checking features on the 2nd page of the libirc information. When provided with a '0' in the 'page' parameter, this works identically to _may_i_use_cpu_feature. This intrinsic does not check the processor vendor. See the valid feature flags on the 2nd page below: (provided with a '1' in the 'page' parameter) + +_FEATURE_CLDEMOTE +_FEATURE_MOVDIRI +_FEATURE_MOVDIR64B +_FEATURE_WAITPKG +_FEATURE_AVX512_Bf16 +_FEATURE_ENQCMD +_FEATURE_AVX_VNNI +_FEATURE_AMX_TILE +_FEATURE_AMX_INT8 +_FEATURE_AMX_BF16 +_FEATURE_KL +_FEATURE_WIDE_KL +_FEATURE_HRESET +_FEATURE_UINTR +_FEATURE_PREFETCHI +_FEATURE_AVXVNNIINT8 +_FEATURE_CMPCCXADD +_FEATURE_AVXIFMA +_FEATURE_AVXNECONVERT +_FEATURE_RAOINT +_FEATURE_AMX_FP16 +_FEATURE_AMX_COMPLEX +_FEATURE_SHA512 +_FEATURE_SM3 +_FEATURE_SM4 +_FEATURE_AVXVNNIINT16 +_FEATURE_USERMSR +_FEATURE_AVX10_1_256 +_FEATURE_AVX10_1_512 +_FEATURE_APXF +_FEATURE_MSRLIST +_FEATURE_WRMSRNS +_FEATURE_PBNDKB + +
immintrin.h
+ General Support +
+ + + + Dynamically query the processor to determine if the processor-specific feature(s) specified a series of compile-time string literals in "feature, ..." are available, and return true or false (1 or 0) if the set of features is available. These feature names are converted to a bitmask and uses the same infrastructure as _may_i_use_cpu_feature_ext to validate it. The behavior is the same as the previous variants. This intrinsic does not check the processor vendor. Supported string literals are one-to-one corresponding in the "Operation" sections of _may_i_use_cpu_feature and _may_i_use_cpu_feature_ext. Example string literals are "avx2", "bmi", "avx512fp16", "amx-int8"... + + +
immintrin.h
+ General Support +
+ + + + Read the Performance Monitor Counter (PMC) specified by "a", and store up to 64-bits in "dst". The width of performance counters is implementation specific. + dst[63:0] := ReadPMC(a) + + +
immintrin.h
+ General Support +
+ + + + + + + Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[32:0] := a[31:0] + b[31:0] + (c_in > 0 ? 1 : 0) +MEM[out+31:out] := tmp[31:0] +dst[0] := tmp[32] +dst[7:1] := 0 + + +
immintrin.h
+ Arithmetic +
+ + + + + + + Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[64:0] := a[63:0] + b[63:0] + (c_in > 0 ? 1 : 0) +MEM[out+63:out] := tmp[63:0] +dst[0] := tmp[64] +dst[7:1] := 0 + + +
immintrin.h
+ Arithmetic +
+ + + + + + + Add unsigned 8-bit borrow "c_in" (carry flag) to unsigned 32-bit integer "b", and subtract the result from unsigned 32-bit integer "a". Store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[32:0] := a[31:0] - (b[31:0] + (c_in > 0 ? 1 : 0)) +MEM[out+31:out] := tmp[31:0] +dst[0] := tmp[32] +dst[7:1] := 0 + + +
immintrin.h
+ Arithmetic +
+ + + + + + + Add unsigned 8-bit borrow "c_in" (carry flag) to unsigned 64-bit integer "b", and subtract the result from unsigned 64-bit integer "a". Store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[64:0] := a[63:0] - (b[63:0] + (c_in > 0 ? 1 : 0)) +MEM[out+63:out] := tmp[63:0] +dst[0] := tmp[64] +dst[7:1] := 0 + + +
immintrin.h
+ Arithmetic +
+ + + + Insert the 32-bit data from "a" into a Processor Trace stream via a PTW packet. The PTW packet will be inserted if tracing is currently enabled and ptwrite is currently enabled. The current IP will also be inserted via a FUP packet if FUPonPTW is enabled. + +
immintrin.h
+ Miscellaneous +
+ + + + Insert the 64-bit data from "a" into a Processor Trace stream via a PTW packet. The PTW packet will be inserted if tracing is currently enabled and ptwrite is currently enabled. The current IP will also be inserted via a FUP packet if FUPonPTW is enabled. + +
immintrin.h
+ Miscellaneous +
+ + + + + Invoke the Intel SGX enclave user (non-privilege) leaf function specified by "a", and return the error code. The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. + +
immintrin.h
+ Miscellaneous +
+ + + + + Invoke the Intel SGX enclave system (privileged) leaf function specified by "a", and return the error code. The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. + +
immintrin.h
+ Miscellaneous +
+ + + + + Invoke the Intel SGX enclave virtualized (VMM) leaf function specified by "a", and return the error code. The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. + +
immintrin.h
+ Miscellaneous +
+ + + + Write back and flush internal caches. + Initiate writing-back and flushing of external + caches. + +
immintrin.h
+ Miscellaneous +
+ + + + Convert the half-precision (16-bit) floating-point value "a" to a single-precision (32-bit) floating-point value, and store the result in "dst". + +dst[31:0] := Convert_FP16_To_FP32(a[15:0]) + +
emmintrin.h
+ Convert +
+ + + + + Convert the single-precision (32-bit) floating-point value "a" to a half-precision (16-bit) floating-point value, and store the result in "dst". + [round_note] + +dst[15:0] := Convert_FP32_To_FP16(a[31:0]) + +
emmintrin.h
+ Convert +
+ + + + + + + Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst". + +IF (imm8[0] == 0) + TEMP1 := a[63:0] +ELSE + TEMP1 := a[127:64] +FI +IF (imm8[4] == 0) + TEMP2 := b[63:0] +ELSE + TEMP2 := b[127:64] +FI +FOR i := 0 to 63 + TEMP[i] := (TEMP1[0] and TEMP2[i]) + FOR j := 1 to i + TEMP[i] := TEMP[i] XOR (TEMP1[j] AND TEMP2[i-j]) + ENDFOR + dst[i] := TEMP[i] +ENDFOR +FOR i := 64 to 127 + TEMP[i] := 0 + FOR j := (i - 63) to 63 + TEMP[i] := TEMP[i] XOR (TEMP1[j] AND TEMP2[i-j]) + ENDFOR + dst[i] := TEMP[i] +ENDFOR +dst[127] := 0 + + + PCLMULQDQ +
wmmintrin.h
+ Application-Targeted +
+ + + + + + + Invoke the PCONFIG leaf function specified by "a". The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. May return the value in eax, depending on the semantics of the specified leaf function. + + PCONFIG +
immintrin.h
+ Miscellaneous +
+ + + + + + Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst". + +dst := 0 +FOR i := 0 to 31 + IF a[i] + dst := dst + 1 + FI +ENDFOR + + + POPCNT +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of bits set to 1 in unsigned 64-bit integer "a", and return that count in "dst". + +dst := 0 +FOR i := 0 to 63 + IF a[i] + dst := dst + 1 + FI +ENDFOR + + + POPCNT +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of bits set to 1 in 32-bit integer "a", and return that count in "dst". + +dst := 0 +FOR i := 0 to 31 + IF a[i] + dst := dst + 1 + FI +ENDFOR + + + POPCNT +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of bits set to 1 in 64-bit integer "a", and return that count in "dst". + +dst := 0 +FOR i := 0 to 63 + IF a[i] + dst := dst + 1 + FI +ENDFOR + + + POPCNT +
immintrin.h
+ Bit Manipulation +
+ + + + + + Loads an instruction sequence containing the specified memory address into all level cache. + + PREFETCHI +
x86gprintrin.h
+ General Support +
+ + + + Loads an instruction sequence containing the specified memory address into all but the first-level cache. + + PREFETCHI +
x86gprintrin.h
+ General Support +
+ + + + + Fetch the line of data from memory that contains address "p" to a location in the cache hierarchy specified by the locality hint "i", which can be one of:<ul> + <li>_MM_HINT_ET0 // 7, move data using the ET0 hint. The PREFETCHW instruction will be generated.</li> + <li>_MM_HINT_T0 // 3, move data using the T0 hint. The PREFETCHT0 instruction will be generated.</li> + <li>_MM_HINT_T1 // 2, move data using the T1 hint. The PREFETCHT1 instruction will be generated.</li> + <li>_MM_HINT_T2 // 1, move data using the T2 hint. The PREFETCHT2 instruction will be generated.</li> + <li>_MM_HINT_NTA // 0, move data using the non-temporal access (NTA) hint. The PREFETCHNTA instruction will be generated.</li> + + + + + + + PRFCHW +
immintrin.h
+ General Support +
+ + + + + Atomically add a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. + + +MEM[__A+31:__A] := MEM[__A+31:__A] + __B[31:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically add a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. + + +MEM[__A+63:__A] := MEM[__A+63:__A] + __B[63:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically and a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. + + +MEM[__A+31:__A] := MEM[__A+31:__A] AND __B[31:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically and a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. + + +MEM[__A+63:__A] := MEM[__A+63:__A] AND __B[63:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically or a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. + + +MEM[__A+31:__A] := MEM[__A+31:__A] OR __B[31:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically or a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. + + +MEM[__A+63:__A] := MEM[__A+63:__A] OR __B[63:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically xor a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. + + +MEM[__A+31:__A] := MEM[__A+31:__A] XOR __B[31:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically xor a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. + + +MEM[__A+63:__A] := MEM[__A+63:__A] XOR __B[63:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + + Copy the IA32_TSC_AUX MSR (signature value) into "dst". + dst[31:0] := IA32_TSC_AUX[31:0] + + + RDPID +
immintrin.h
+ General Support +
+ + + + + + Read a hardware generated 16-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_RND_GEN.ready == 1 + val[15:0] := HW_RND_GEN.data + dst := 1 +ELSE + val[15:0] := 0 + dst := 0 +FI + + + RDRAND +
immintrin.h
+ Random +
+ + + + Read a hardware generated 32-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_RND_GEN.ready == 1 + val[31:0] := HW_RND_GEN.data + dst := 1 +ELSE + val[31:0] := 0 + dst := 0 +FI + + + RDRAND +
immintrin.h
+ Random +
+ + + + Read a hardware generated 64-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_RND_GEN.ready == 1 + val[63:0] := HW_RND_GEN.data + dst := 1 +ELSE + val[63:0] := 0 + dst := 0 +FI + + + RDRAND +
immintrin.h
+ Random +
+ + + + + + Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_NRND_GEN.ready == 1 + val[15:0] := HW_NRND_GEN.data + dst := 1 +ELSE + val[15:0] := 0 + dst := 0 +FI + + + RDSEED +
immintrin.h
+ Random +
+ + + + Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_NRND_GEN.ready == 1 + val[31:0] := HW_NRND_GEN.data + dst := 1 +ELSE + val[31:0] := 0 + dst := 0 +FI + + + RDSEED +
immintrin.h
+ Random +
+ + + + Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_NRND_GEN.ready == 1 + val[63:0] := HW_NRND_GEN.data + dst := 1 +ELSE + val[63:0] := 0 + dst := 0 +FI + + + RDSEED +
immintrin.h
+ Random +
+ + + + + + Copy the current 64-bit value of the processor's time-stamp counter into "dst", and store the IA32_TSC_AUX MSR (signature value) into memory at "mem_addr". + dst[63:0] := TimeStampCounter +MEM[mem_addr+31:mem_addr] := IA32_TSC_AUX[31:0] + + + RDTSCP +
immintrin.h
+ General Support +
+ + + + + + Force an RTM abort. The EAX register is updated to reflect an XABORT instruction caused the abort, and the "imm8" parameter will be provided in bits [31:24] of EAX. + Following an RTM abort, the logical processor resumes execution at the fallback address computed through the outermost XBEGIN instruction. + IF RTM_ACTIVE == 0 + // nop +ELSE + // restore architectural register state + // discard memory updates performed in transaction + // update EAX with status and imm8 value + eax[31:24] := imm8[7:0] + RTM_NEST_COUNT := 0 + RTM_ACTIVE := 0 + IF _64_BIT_MODE + RIP := fallbackRIP + ELSE + EIP := fallbackEIP + FI +FI + + + RTM +
immintrin.h
+ General Support +
+ + + + Specify the start of an RTM code region. + If the logical processor was not already in transactional execution, then this call causes the logical processor to transition into transactional execution. + On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution, restores architectural state, and starts execution beginning at the fallback address computed from the outermost XBEGIN instruction. Return status of ~0 (0xFFFF) if continuing inside transaction; all other codes are aborts. + IF RTM_NEST_COUNT < MAX_RTM_NEST_COUNT + RTM_NEST_COUNT := RTM_NEST_COUNT + 1 + IF RTM_NEST_COUNT == 1 + IF _64_BIT_MODE + fallbackRIP := RIP + ELSE IF _32_BIT_MODE + fallbackEIP := EIP + FI + + RTM_ACTIVE := 1 + // enter RTM execution, record register state, start tracking memory state + FI +ELSE + // RTM abort (see _xabort) +FI + + + RTM +
immintrin.h
+ General Support +
+ + + + Specify the end of an RTM code region. + If this corresponds to the outermost scope, the logical processor will attempt to commit the logical processor state atomically. + If the commit fails, the logical processor will perform an RTM abort. + IF RTM_ACTIVE == 1 + RTM_NEST_COUNT := RTM_NEST_COUNT - 1 + IF RTM_NEST_COUNT == 0 + // try to commit transaction + IF FAIL_TO_COMMIT_TRANSACTION + // RTM abort (see _xabort) + ELSE + RTM_ACTIVE := 0 + FI + FI +FI + + + RTM +
immintrin.h
+ General Support +
+ + + + Query the transactional execution status, return 1 if inside a transactionally executing RTM or HLE region, and return 0 otherwise. + IF (RTM_ACTIVE == 1 OR HLE_ACTIVE == 1) + dst := 1 +ELSE + dst := 0 +FI + + + RTM +
immintrin.h
+ General Support +
+ + + + + Serialize instruction execution, ensuring all modifications to flags, registers, and memory by previous instructions are completed before the next instruction is fetched. + + SERIALIZE +
immintrin.h
+ General Support +
+ + + + + + + Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst". + +W0 := a[127:96] +W1 := a[95:64] +W2 := a[63:32] +W3 := a[31:0] +W4 := b[127:96] +W5 := b[95:64] +dst[127:96] := W2 XOR W0 +dst[95:64] := W3 XOR W1 +dst[63:32] := W4 XOR W2 +dst[31:0] := W5 XOR W3 + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in "a" and the previous message values in "b", and store the result in "dst". + +W13 := b[95:64] +W14 := b[63:32] +W15 := b[31:0] +W16 := (a[127:96] XOR W13) <<< 1 +W17 := (a[95:64] XOR W14) <<< 1 +W18 := (a[63:32] XOR W15) <<< 1 +W19 := (a[31:0] XOR W16) <<< 1 +dst[127:96] := W16 +dst[95:64] := W17 +dst[63:32] := W18 +dst[31:0] := W19 + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable "a", add that value to the scheduled values (unsigned 32-bit integers) in "b", and store the result in "dst". + +tmp := (a[127:96] <<< 30) +dst[127:96] := b[127:96] + tmp +dst[95:64] := b[95:64] +dst[63:32] := b[63:32] +dst[31:0] := b[31:0] + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + + Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from "a" and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from "b", and store the updated SHA1 state (A,B,C,D) in "dst". "func" contains the logic functions and round constants. + IF (func[1:0] == 0) + f := f0() + K := K0 +ELSE IF (func[1:0] == 1) + f := f1() + K := K1 +ELSE IF (func[1:0] == 2) + f := f2() + K := K2 +ELSE IF (func[1:0] == 3) + f := f3() + K := K3 +FI +A := a[127:96] +B := a[95:64] +C := a[63:32] +D := a[31:0] +W[0] := b[127:96] +W[1] := b[95:64] +W[2] := b[63:32] +W[3] := b[31:0] +A[1] := f(B, C, D) + (A <<< 5) + W[0] + K +B[1] := A +C[1] := B <<< 30 +D[1] := C +E[1] := D +FOR i := 1 to 3 + A[i+1] := f(B[i], C[i], D[i]) + (A[i] <<< 5) + W[i] + E[i] + K + B[i+1] := A[i] + C[i+1] := B[i] <<< 30 + D[i+1] := C[i] + E[i+1] := D[i] +ENDFOR +dst[127:96] := A[4] +dst[95:64] := B[4] +dst[63:32] := C[4] +dst[31:0] := D[4] + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst". + W4 := b[31:0] +W3 := a[127:96] +W2 := a[95:64] +W1 := a[63:32] +W0 := a[31:0] +dst[127:96] := W3 + sigma0(W4) +dst[95:64] := W2 + sigma0(W3) +dst[63:32] := W1 + sigma0(W2) +dst[31:0] := W0 + sigma0(W1) + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst"." + W14 := b[95:64] +W15 := b[127:96] +W16 := a[31:0] + sigma1(W14) +W17 := a[63:32] + sigma1(W15) +W18 := a[95:64] + sigma1(W16) +W19 := a[127:96] + sigma1(W17) +dst[127:96] := W19 +dst[95:64] := W18 +dst[63:32] := W17 +dst[31:0] := W16 + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + + Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from "a", an initial SHA256 state (A,B,E,F) from "b", and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from "k", and store the updated SHA256 state (A,B,E,F) in "dst". + A[0] := b[127:96] +B[0] := b[95:64] +C[0] := a[127:96] +D[0] := a[95:64] +E[0] := b[63:32] +F[0] := b[31:0] +G[0] := a[63:32] +H[0] := a[31:0] +W_K[0] := k[31:0] +W_K[1] := k[63:32] +FOR i := 0 to 1 + A[i+1] := Ch(E[i], F[i], G[i]) + sum1(E[i]) + W_K[i] + H[i] + Maj(A[i], B[i], C[i]) + sum0(A[i]) + B[i+1] := A[i] + C[i+1] := B[i] + D[i+1] := C[i] + E[i+1] := Ch(E[i], F[i], G[i]) + sum1(E[i]) + W_K[i] + H[i] + D[i] + F[i+1] := E[i] + G[i+1] := F[i] + H[i+1] := G[i] +ENDFOR +dst[127:96] := A[2] +dst[95:64] := B[2] +dst[63:32] := E[2] +dst[31:0] := F[2] + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + This intrinisc is one of the two SHA512 message scheduling instructions. The intrinsic performs an intermediate calculation for the next four SHA512 message qwords. The calculated results are stored in "dst". + + +DEFINE ROR64(qword, n) { + count := n % 64 + dest := (qword >> count) | (qword << (64 - count)) + RETURN dest +} +DEFINE SHR64(qword, n) { + RETURN qword >> n +} +DEFINE s0(qword) { + RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7) +} +W.qword[4] := __B.qword[0] +W.qword[3] := __A.qword[3] +W.qword[2] := __A.qword[2] +W.qword[1] := __A.qword[1] +W.qword[0] := __A.qword[0] +dst.qword[3] := W.qword[3] + s0(W.qword[4]) +dst.qword[2] := W.qword[2] + s0(W.qword[3]) +dst.qword[1] := W.qword[1] + s0(W.qword[2]) +dst.qword[0] := W.qword[0] + s0(W.qword[1]) + + + + SHA512 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinisc is one of the two SHA512 message scheduling instructions. The intrinsic performs the final calculation for the next four SHA512 message qwords. The calculated results are stored in "dst". + + +DEFINE ROR64(qword, n) { + count := n % 64 + dest := (qword >> count) | (qword << (64 - count)) + RETURN dest +} +DEFINE SHR64(qword, n) { + RETURN qword >> n +} +DEFINE s1(qword) { + RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6) +} +W.qword[14] := __B.qword[2] +W.qword[15] := __B.qword[3] +W.qword[16] := __A.qword[0] + s1(W.qword[14]) +W.qword[17] := __A.qword[1] + s1(W.qword[15]) +W.qword[18] := __A.qword[2] + s1(W.qword[16]) +W.qword[19] := __A.qword[3] + s1(W.qword[17]) +dst.qword[3] := W.qword[19] +dst.qword[2] := W.qword[18] +dst.qword[1] := W.qword[17] +dst.qword[0] := W.qword[16] + + + + SHA512 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinisc performs two rounds of SHA512 operation using initial SHA512 state (C,D,G,H) from "__A", an initial SHA512 state (A,B,E,F) from "__B", and a pre-computed sum of the next two round message qwords and the corresponding round constants from "__C" (only the two lower qwords of the third operand). The updated SHA512 state (A,B,E,F) is written to "dst", and "dst" can be used as the updated state (C,D,G,H) in later rounds. + + +DEFINE ROR64(qword, n) { + count := n % 64 + dest := (qword >> count) | (qword << (64 - count)) + RETURN dest +} +DEFINE SHR64(qword, n) { + RETURN qword >> n +} +DEFINE cap_sigma0(qword) { + RETURN ROR64(qword, 28) ^ ROR64(qword, 34) ^ ROR64(qword, 39) +} +DEFINE cap_sigma1(qword) { + RETURN ROR64(qword, 14) ^ ROR64(qword, 18) ^ ROR64(qword, 41) +} +DEFINE MAJ(a,b,c) { + RETURN (a & b) ^ (a & c) ^ (b & c) +} +DEFINE CH(a,b,c) { + RETURN (a & b) ^ (c & ~a) +} +A.qword[0] := __B.qword[3] +B.qword[0] := __B.qword[2] +C.qword[0] := __A.qword[3] +D.qword[0] := __A.qword[2] +E.qword[0] := __B.qword[1] +F.qword[0] := __B.qword[0] +G.qword[0] := __A.qword[1] +H.qword[0] := __A.qword[0] +WK.qword[0]:= __C.qword[0] +WK.qword[1]:= __C.qword[1] +FOR i := 0 to 1 + A.qword[i+1] := CH(E.qword[i], F.qword[i], G.qword[i]) + cap_sigma1(E.qword[i]) + WK.qword[i] + H.qword[i] + MAJ(A.qword[i], B.qword[i], C.qword[i]) + cap_sigma0(A.qword[i]) + B.qword[i+1] := A.qword[i] + C.qword[i+1] := B.qword[i] + D.qword[i+1] := C.qword[i] + E.qword[i+1] := CH(E.qword[i], F.qword[i], G.qword[i]) + cap_sigma1(E.qword[i]) + WK.qword[i] + H.qword[i] + D.qword[i] + F.qword[i+1] := E.qword[i] + G.qword[i+1] := F.qword[i] + H.qword[i+1] := G.qword[i] +ENDFOR +dst.qword[3] := A.qword[2] +dst.qword[2] := B.qword[2] +dst.qword[1] := E.qword[2] +dst.qword[0] := F.qword[2] + + + + + SHA512 + AVX +
immintrin.h
+ Cryptography +
+ + + The VSM3MSG1 intrinsic is one of the two SM3 message scheduling intrinsics. The intrinsic performs an initial calculation for the next four SM3 message words. The calculated results are stored in "dst". + + +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32 - count)) + RETURN dest +} +DEFINE P1(x) { + RETURN x ^ ROL32(x, 15) ^ ROL32(x, 23) +} +W.dword[0] := __C.dword[0] +W.dword[1] := __C.dword[1] +W.dword[2] := __C.dword[2] +W.dword[3] := __C.dword[3] +W.dword[7] := __A.dword[0] +W.dword[8] := __A.dword[1] +W.dword[9] := __A.dword[2] +W.dword[10] := __A.dword[3] +W.dword[13] := __B.dword[0] +W.dword[14] := __B.dword[1] +W.dword[15] := __B.dword[2] +TMP0 := W.dword[7] ^ W.dword[0] ^ ROL32(W.dword[13], 15) +TMP1 := W.dword[8] ^ W.dword[1] ^ ROL32(W.dword[14], 15) +TMP2 := W.dword[9] ^ W.dword[2] ^ ROL32(W.dword[15], 15) +TMP3 := W.dword[10] ^ W.dword[3] +dst.dword[0] := P1(TMP0) +dst.dword[1] := P1(TMP1) +dst.dword[2] := P1(TMP2) +dst.dword[3] := P1(TMP3) + + + + + SM3 + AVX +
immintrin.h
+ Cryptography +
+ + + The VSM3MSG2 intrinsic is one of the two SM3 message scheduling intrinsics. The intrinsic performs the final calculation for the next four SM3 message words. The calculated results are stored in "dst". + + +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +WTMP.dword[0] := __A.dword[0] +WTMP.dword[1] := __A.dword[1] +WTMP.dword[2] := __A.dword[2] +WTMP.dword[3] := __A.dword[3] +W.dword[3] := __B.dword[0] +W.dword[4] := __B.dword[1] +W.dword[5] := __B.dword[2] +W.dword[6] := __B.dword[3] +W.dword[10] := __C.dword[0] +W.dword[11] := __C.dword[1] +W.dword[12] := __C.dword[2] +W.dword[13] := __C.dword[3] +W.dword[16] := ROL32(W.dword[3], 7) ^ W.dword[10] ^ WTMP.dword[0] +W.dword[17] := ROL32(W.dword[4], 7) ^ W.dword[11] ^ WTMP.dword[1] +W.dword[18] := ROL32(W.dword[5], 7) ^ W.dword[12] ^ WTMP.dword[2] +W.dword[19] := ROL32(W.dword[6], 7) ^ W.dword[13] ^ WTMP.dword[3] +W.dword[19] := W.dword[19] ^ ROL32(W.dword[16], 6) ^ ROL32(W.dword[16], 15) ^ ROL32(W.dword[16], 30) +dst.dword[0] := W.dword[16] +dst.dword[1] := W.dword[17] +dst.dword[2] := W.dword[18] +dst.dword[3] := W.dword[19] + + + + + SM3 + AVX +
immintrin.h
+ Cryptography +
+ + + The intrinsic performs two rounds of SM3 operation using initial SM3 state (C, D, G, H) from "__A", an initial SM3 states (A, B, E, F) from "__B" and a pre-computed words from the "__C". "__A" with initial SM3 state of (C, D, G, H) assumes input of non-rotated left variables from previous state. The updated SM3 state (A, B, E, F) is written to "__A". The "imm8" should contain the even round number for the first of the two rounds computed by this instruction. The computation masks the "imm8" value by ANDing it with 0x3E so that only even round numbers from 0 through 62 are used for this operation. The calculated results are stored in "dst". + + +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE P0(x) { + RETURN x ^ ROL32(x, 9) ^ ROL32(x, 17) +} +DEFINE FF(x, y, z, round) { + IF round < 16 + RETURN (x ^ y ^ z) + ELSE + RETURN (x & y) | (x & z) | (y & z) + FI +} +DEFINE GG(x, y, z, round){ + IF round < 16 + RETURN (x ^ y ^ z) + ELSE + RETURN (x & y) | (~x & z) + FI +} +A.dword[0] := __B.dword[3] +B.dword[0] := __B.dword[2] +C.dword[0] := __A.dword[3] +D.dword[0] := __A.dword[2] +E.dword[0] := __B.dword[1] +F.dword[0] := __B.dword[0] +G.dword[0] := __A.dword[1] +H.dword[0] := __A.dword[0] +W.dword[0] := __C.dword[0] +W.dword[1] := __C.dword[1] +W.dword[4] := __C.dword[2] +W.dword[5] := __C.dword[3] +C.dword[0] := ROL32(C.dword[0], 9) +D.dword[0] := ROL32(D.dword[0], 9) +G.dword[0] := ROL32(G.dword[0], 19) +H.dword[0] := ROL32(H.dword[0], 19) +ROUND := imm8 & 0x3E +IF ROUND < 16 + CONST.dword[0] := 0x79CC4519 +ELSE + CONST.dword[0] := 0x7A879D8A +FI +CONST.dword[0] := ROL32(CONST.dword[0], ROUND) +FOR i:= 0 to 1 + temp.dword[0] := ROL32(A.dword[i], 12) + E.dword[i] + CONST.dword[0] + S1.dword[0] := ROL32(temp.dword[0], 7) + S2.dword[0] := S1.dword[0] ^ ROL32(A.dword[i], 12) + T1.dword[0] := FF(A.dword[i], B.dword[i], C.dword[i], ROUND) + D.dword[i] + S2.dword[0] + (W.dword[i] ^ W.dword[i+4]) + T2.dword[0] := GG(E.dword[i], F.dword[i], G.dword[i], ROUND) + H.dword[i] + S1.dword[0] + W.dword[i] + D.dword[i+1] := C.dword[i] + C.dword[i+1] := ROL32(B.dword[i], 9) + B.dword[i+1] := A.dword[i] + A.dword[i+1] := T1.dword[0] + H.dword[i+1] := G.dword[i] + G.dword[i+1] := ROL32(F.dword[i], 19) + F.dword[i+1] := E.dword[i] + E.dword[i+1] := P0(T2.dword[0]) + CONST.dword[0] := ROL32(CONST.dword[0], 1) +ENDFOR +dst.dword[3] := A.dword[2] +dst.dword[2] := B.dword[2] +dst.dword[1] := E.dword[2] +dst.dword[0] := F.dword[2] + + + + + + SM3 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent 128-bit lanes. The calculated results are stored in "dst". + + +BYTE sbox[256] = { +0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, +0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, +0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, +0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, +0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, +0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, +0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, +0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, +0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, +0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, +0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, +0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, +0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, +0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, +0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, +0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 +} +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE SBOX_BYTE(dword, i) { + RETURN sbox[dword.byte[i]] +} +DEFINE lower_t(dword) { + tmp.byte[0] := SBOX_BYTE(dword, 0) + tmp.byte[1] := SBOX_BYTE(dword, 1) + tmp.byte[2] := SBOX_BYTE(dword, 2) + tmp.byte[3] := SBOX_BYTE(dword, 3) + RETURN tmp +} +DEFINE L_KEY(dword) { + RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) +} +DEFINE T_KEY(dword) { + RETURN L_KEY(lower_t(dword)) +} +DEFINE F_KEY(X0, X1, X2, X3, round_key) { + RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) +} +FOR i:= 0 to 1 + P.dword[0] := __A.dword[4*i] + P.dword[1] := __A.dword[4*i+1] + P.dword[2] := __A.dword[4*i+2] + P.dword[3] := __A.dword[4*i+3] + C.dword[0] := F_KEY(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[4*i]) + C.dword[1] := F_KEY(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[4*i+1]) + C.dword[2] := F_KEY(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[4*i+2]) + C.dword[3] := F_KEY(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[4*i+3]) + dst.dword[4*i] := C.dword[0] + dst.dword[4*i+1] := C.dword[1] + dst.dword[4*i+2] := C.dword[2] + dst.dword[4*i+3] := C.dword[3] +ENDFOR +dst[MAX:256] := 0 + + + + SM4 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinisc performs four rounds of SM4 encryption. The intrinisc operates on independent 128-bit lanes. The calculated results are stored in "dst". + + BYTE sbox[256] = { +0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, +0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, +0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, +0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, +0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, +0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, +0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, +0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, +0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, +0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, +0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, +0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, +0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, +0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, +0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, +0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 +} +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE SBOX_BYTE(dword, i) { + RETURN sbox[dword.byte[i]] +} +DEFINE lower_t(dword) { + tmp.byte[0] := SBOX_BYTE(dword, 0) + tmp.byte[1] := SBOX_BYTE(dword, 1) + tmp.byte[2] := SBOX_BYTE(dword, 2) + tmp.byte[3] := SBOX_BYTE(dword, 3) + RETURN tmp +} +DEFINE L_RND(dword) { + tmp := dword + tmp := tmp ^ ROL32(dword, 2) + tmp := tmp ^ ROL32(dword, 10) + tmp := tmp ^ ROL32(dword, 18) + tmp := tmp ^ ROL32(dword, 24) + RETURN tmp +} +DEFINE T_RND(dword) { + RETURN L_RND(lower_t(dword)) +} +DEFINE F_RND(X0, X1, X2, X3, round_key) { + RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) +} +FOR i:= 0 to 1 + P.dword[0] := __A.dword[4*i] + P.dword[1] := __A.dword[4*i+1] + P.dword[2] := __A.dword[4*i+2] + P.dword[3] := __A.dword[4*i+3] + C.dword[0] := F_RND(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[4*i]) + C.dword[1] := F_RND(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[4*i+1]) + C.dword[2] := F_RND(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[4*i+2]) + C.dword[3] := F_RND(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[4*i+3]) + dst.dword[4*i] := C.dword[0] + dst.dword[4*i+1] := C.dword[1] + dst.dword[4*i+2] := C.dword[2] + dst.dword[4*i+3] := C.dword[3] +ENDFOR +dst[MAX:256] := 0 + + + + SM4 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent 128-bit lanes. The calculated results are stored in "dst". + + +BYTE sbox[256] = { +0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, +0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, +0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, +0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, +0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, +0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, +0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, +0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, +0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, +0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, +0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, +0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, +0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, +0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, +0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, +0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 +} +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE SBOX_BYTE(dword, i) { + RETURN sbox[dword.byte[i]] +} +DEFINE lower_t(dword) { + tmp.byte[0] := SBOX_BYTE(dword, 0) + tmp.byte[1] := SBOX_BYTE(dword, 1) + tmp.byte[2] := SBOX_BYTE(dword, 2) + tmp.byte[3] := SBOX_BYTE(dword, 3) + RETURN tmp +} +DEFINE L_KEY(dword) { + RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) +} +DEFINE T_KEY(dword) { + RETURN L_KEY(lower_t(dword)) +} +DEFINE F_KEY(X0, X1, X2, X3, round_key) { + RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) +} +P.dword[0] := __A.dword[0] +P.dword[1] := __A.dword[1] +P.dword[2] := __A.dword[2] +P.dword[3] := __A.dword[3] +C.dword[0] := F_KEY(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[0]) +C.dword[1] := F_KEY(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[1]) +C.dword[2] := F_KEY(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[2]) +C.dword[3] := F_KEY(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[3]) +dst.dword[0] := C.dword[0] +dst.dword[1] := C.dword[1] +dst.dword[2] := C.dword[2] +dst.dword[3] := C.dword[3] +dst[MAX:128] := 0 + + + + SM4 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinisc performs four rounds of SM4 encryption. The intrinisc operates on independent 128-bit lanes. The calculated results are stored in "dst". + + +BYTE sbox[256] = { +0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, +0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, +0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, +0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, +0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, +0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, +0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, +0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, +0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, +0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, +0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, +0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, +0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, +0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, +0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, +0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 +} +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE SBOX_BYTE(dword, i) { + RETURN sbox[dword.byte[i]] +} +DEFINE lower_t(dword) { + tmp.byte[0] := SBOX_BYTE(dword, 0) + tmp.byte[1] := SBOX_BYTE(dword, 1) + tmp.byte[2] := SBOX_BYTE(dword, 2) + tmp.byte[3] := SBOX_BYTE(dword, 3) + RETURN tmp +} +DEFINE L_RND(dword) { + tmp := dword + tmp := tmp ^ ROL32(dword, 2) + tmp := tmp ^ ROL32(dword, 10) + tmp := tmp ^ ROL32(dword, 18) + tmp := tmp ^ ROL32(dword, 24) + RETURN tmp +} +DEFINE T_RND(dword) { + RETURN L_RND(lower_t(dword)) +} +DEFINE F_RND(X0, X1, X2, X3, round_key) { + RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) +} +P.dword[0] := __A.dword[0] +P.dword[1] := __A.dword[1] +P.dword[2] := __A.dword[2] +P.dword[3] := __A.dword[3] +C.dword[0] := F_RND(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[0]) +C.dword[1] := F_RND(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[1]) +C.dword[2] := F_RND(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[2]) +C.dword[3] := F_RND(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[3]) +dst.dword[0] := C.dword[0] +dst.dword[1] := C.dword[1] +dst.dword[2] := C.dword[2] +dst.dword[3] := C.dword[3] +dst[MAX:128] := 0 + + + + SM4 + AVX +
immintrin.h
+ Cryptography +
+ + + + Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ACOS(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ACOS(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ACOSH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ACOSH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ASIN(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ASIN(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ASINH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ASINH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ATAN(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ATAN(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ATANH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ATANH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := COSD(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := COSD(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := COSH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := COSH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) + MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SIND(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SIND(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SINH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SINH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := TAN(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := TAN(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := TAND(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := TAND(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := TANH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := TANH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := CubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CEXP(a[31:0], b[31:0]) { + result[31:0] := POW(FP32(e), a[31:0]) * COS(b[31:0]) + result[63:32] := POW(FP32(e), a[31:0]) * SIN(b[31:0]) + RETURN result +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CEXP(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CLOG(a[31:0], b[31:0]) { + result[31:0] := LOG(SQRT(POW(a, 2.0) + POW(b, 2.0))) + result[63:32] := ATAN2(b, a) + RETURN result +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CLOG(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed complex snumbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CSQRT(a[31:0], b[31:0]) { + sign[31:0] := (b < 0.0) ? -FP32(1.0) : FP32(1.0) + result[31:0] := SQRT((a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) + result[63:32] := sign * SQRT((-a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) + RETURN result +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CSQRT(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(10.0, a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(2.0, a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := InvCubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := InvCubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := InvSQRT(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := InvSQRT(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LOG(1.0 + a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LOG(1.0 + a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := CDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := InverseCDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := InverseCDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ERF(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := 1.0 - ERF(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+63:i] := 1.0 - ERF(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := 1.0 / ERF(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+63:i] := 1.0 / ERF(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + + Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 3 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 1 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 3 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 1 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ERF(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 15 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 7 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 1 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 15 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 7 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 1 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ROUND(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := TRUNCATE(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Miscellaneous +
+ + + + Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := TRUNCATE(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision (32-bit) floating-point elements in "row0", "row1", "row2", and "row3", and store the transposed matrix in these vectors ("row0" now contains column 0, etc.). + +__m128 tmp3, tmp2, tmp1, tmp0; +tmp0 := _mm_unpacklo_ps(row0, row1); +tmp2 := _mm_unpacklo_ps(row2, row3); +tmp1 := _mm_unpackhi_ps(row0, row1); +tmp3 := _mm_unpackhi_ps(row2, row3); +row0 := _mm_movelh_ps(tmp0, tmp2); +row1 := _mm_movehl_ps(tmp2, tmp0); +row2 := _mm_movelh_ps(tmp1, tmp3); +row3 := _mm_movehl_ps(tmp3, tmp1); + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". + +dst[15:0] := (a[63:0] >> (imm8[1:0] * 16))[15:0] +dst[31:16] := 0 + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". + +dst[15:0] := (a[63:0] >> (imm8[1:0] * 16))[15:0] +dst[31:16] := 0 + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". + +dst[63:0] := a[63:0] +sel := imm8[1:0]*16 +dst[sel+15:sel] := i[15:0] + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". + +dst[63:0] := a[63:0] +sel := imm8[1:0]*16 +dst[sel+15:sel] := i[15:0] + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[15:0] := src[15:0] + 1: tmp[15:0] := src[31:16] + 2: tmp[15:0] := src[47:32] + 3: tmp[15:0] := src[63:48] + ESAC + RETURN tmp[15:0] +} +dst[15:0] := SELECT4(a[63:0], imm8[1:0]) +dst[31:16] := SELECT4(a[63:0], imm8[3:2]) +dst[47:32] := SELECT4(a[63:0], imm8[5:4]) +dst[63:48] := SELECT4(a[63:0], imm8[7:6]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[15:0] := src[15:0] + 1: tmp[15:0] := src[31:16] + 2: tmp[15:0] := src[47:32] + 3: tmp[15:0] := src[63:48] + ESAC + RETURN tmp[15:0] +} +dst[15:0] := SELECT4(a[63:0], imm8[1:0]) +dst[31:16] := SELECT4(a[63:0], imm8[3:2]) +dst[47:32] := SELECT4(a[63:0], imm8[5:4]) +dst[63:48] := SELECT4(a[63:0], imm8[7:6]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +dst[127:96] := SELECT4(b[127:0], imm8[7:6]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + Get the unsigned 32-bit value of the MXCSR control and status register. + dst[31:0] := MXCSR + + + SSE +
immintrin.h
+ General Support +
+ + + + Set the MXCSR control and status register with the value in unsigned 32-bit integer "a". + +MXCSR := a[31:0] + + + SSE +
immintrin.h
+ General Support +
+ + + Macro: Get the exception state bits from the MXCSR control and status register. The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT + dst[31:0] := MXCSR & _MM_EXCEPT_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + Macro: Set the exception state bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT + MXCSR := a[31:0] AND ~_MM_EXCEPT_MASK + + SSE +
immintrin.h
+ General Support +
+ + + Macro: Get the exception mask bits from the MXCSR control and status register. The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT + dst[31:0] := MXCSR & _MM_MASK_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + Macro: Set the exception mask bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT + MXCSR := a[31:0] AND ~_MM_MASK_MASK + + SSE +
immintrin.h
+ General Support +
+ + + Macro: Get the rounding mode bits from the MXCSR control and status register. The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO + dst[31:0] := MXCSR & _MM_ROUND_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + Macro: Set the rounding mode bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO + MXCSR := a[31:0] AND ~_MM_ROUND_MASK + + SSE +
immintrin.h
+ General Support +
+ + + Macro: Get the flush zero bits from the MXCSR control and status register. The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF + dst[31:0] := MXCSR & _MM_FLUSH_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + Macro: Set the flush zero bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF + MXCSR := a[31:0] AND ~_MM_FLUSH_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + + Fetch the line of data from memory that contains address "p" to a location in the cache hierarchy specified by the locality hint "i", which can be one of:<ul> + <li>_MM_HINT_T0 // 3, move data using the T0 hint. The PREFETCHT0 instruction will be generated.</li> + <li>_MM_HINT_T1 // 2, move data using the T1 hint. The PREFETCHT1 instruction will be generated.</li> + <li>_MM_HINT_T2 // 1, move data using the T2 hint. The PREFETCHT2 instruction will be generated.</li> + <li>_MM_HINT_NTA // 0, move data using the non-temporal access (NTA) hint. The PREFETCHNTA instruction will be generated.</li> + + + + + + SSE +
immintrin.h
+ General Support +
+ + + + Perform a serializing operation on all store-to-memory instructions that were issued prior to this instruction. Guarantees that every store instruction that precedes, in program order, is globally visible before any store instruction which follows the fence in program order. + + SSE +
immintrin.h
+ General Support +
+ + + + + Allocate "size" bytes of memory, aligned to the alignment specified in "align", and return a pointer to the allocated memory. "_mm_free" should be used to free memory that is allocated with "_mm_malloc". + SSE +
immintrin.h
+ General Support +
+ + + + Free aligned memory that was allocated with "_mm_malloc". + SSE +
immintrin.h
+ General Support +
+ + + + Return vector of type __m128 with undefined elements. + SSE +
immintrin.h
+ General Support +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst". [min_float_note] + +dst[31:0] := MIN(a[31:0], b[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst". [max_float_note] + +dst[31:0] := MAX(a[31:0], b[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst". + +FOR j := 0 to 7 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56] +dst[63:16] := 0 + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst". + +FOR j := 0 to 7 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56] +dst[63:16] := 0 + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := a[31:0] + b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := a[31:0] - b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := a[31:0] * b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] * b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := a[31:0] / b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := a[i+31:i] / b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR + + + SSE +
xmmintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR + + + SSE +
xmmintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR + + + SSE +
xmmintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR + + + SSE +
xmmintrin.h
+ Probability/Statistics +
+ + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[63:32] := Convert_Int32_To_FP32(b[63:32]) +dst[95:64] := a[95:64] +dst[127:96] := a[127:96] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[63:32] := Convert_Int32_To_FP32(b[63:32]) +dst[95:64] := a[95:64] +dst[127:96] := a[127:96] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + m := j*32 + dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i]) +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + m := j*32 + dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i]) +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower packed 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*8 + m := j*32 + dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i]) +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower packed unsigned 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*8 + m := j*32 + dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i]) +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", then covert the packed signed 32-bit integers in "b" to single-precision (32-bit) floating-point element, and store the results in the upper 2 elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(a[31:0]) +dst[63:32] := Convert_Int32_To_FP32(a[63:32]) +dst[95:64] := Convert_Int32_To_FP32(b[31:0]) +dst[127:96] := Convert_Int32_To_FP32(b[63:32]) + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_Int64(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Copy the lower single-precision (32-bit) floating-point element of "a" to "dst". + +dst[31:0] := a[31:0] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". Note: this intrinsic will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and 0x7FFFFFFF. + +FOR j := 0 to 3 + i := 16*j + k := 32*j + IF a[k+31:k] >= FP32(0x7FFF) && a[k+31:k] <= FP32(0x7FFFFFFF) + dst[i+15:i] := 0x7FFF + ELSE + dst[i+15:i] := Convert_FP32_To_Int16(a[k+31:k]) + FI +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 8-bit integers, and store the results in lower 4 elements of "dst". Note: this intrinsic will generate 0x7F, rather than 0x80, for input values between 0x7F and 0x7FFFFFFF. + +FOR j := 0 to 3 + i := 8*j + k := 32*j + IF a[k+31:k] >= FP32(0x7F) && a[k+31:k] <= FP32(0x7FFFFFFF) + dst[i+7:i] := 0x7F + ELSE + dst[i+7:i] := Convert_FP32_To_Int8(a[k+31:k]) + FI +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Store 64-bits of integer data from "a" into memory using a non-temporal memory hint. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + + Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. + +FOR j := 0 to 7 + i := j*8 + IF mask[i+7] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + SSE +
immintrin.h
+ Store +
+ + + + + + Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 7 + i := j*8 + IF mask[i+7] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + SSE +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store the upper 2 single-precision (32-bit) floating-point elements from "a" into memory. + +MEM[mem_addr+31:mem_addr] := a[95:64] +MEM[mem_addr+63:mem_addr+32] := a[127:96] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store the lower 2 single-precision (32-bit) floating-point elements from "a" into memory. + +MEM[mem_addr+31:mem_addr] := a[31:0] +MEM[mem_addr+63:mem_addr+32] := a[63:32] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store the lower single-precision (32-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+31:mem_addr] := a[31:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+31:mem_addr] := a[31:0] +MEM[mem_addr+63:mem_addr+32] := a[31:0] +MEM[mem_addr+95:mem_addr+64] := a[31:0] +MEM[mem_addr+127:mem_addr+96] := a[31:0] + + SSE +
immintrin.h
+ Store +
+ + + + + Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+31:mem_addr] := a[31:0] +MEM[mem_addr+63:mem_addr+32] := a[31:0] +MEM[mem_addr+95:mem_addr+64] := a[31:0] +MEM[mem_addr+127:mem_addr+96] := a[31:0] + + SSE +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store 4 single-precision (32-bit) floating-point elements from "a" into memory in reverse order. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+31:mem_addr] := a[127:96] +MEM[mem_addr+63:mem_addr+32] := a[95:64] +MEM[mem_addr+95:mem_addr+64] := a[63:32] +MEM[mem_addr+127:mem_addr+96] := a[31:0] + + + SSE +
immintrin.h
+ Store +
+ + + + Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[j] := a[i+7] +ENDFOR +dst[MAX:8] := 0 + + + SSE +
xmmintrin.h
+ Miscellaneous +
+ + + + Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[j] := a[i+7] +ENDFOR +dst[MAX:8] := 0 + + + SSE +
xmmintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a". + +FOR j := 0 to 3 + i := j*32 + IF a[i+31] + dst[j] := 1 + ELSE + dst[j] := 0 + FI +ENDFOR +dst[MAX:4] := 0 + + + SSE +
xmmintrin.h
+ Miscellaneous +
+ + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := SQRT(a[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +dst[31:0] := (1.0 / a[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +dst[31:0] := (1.0 / SQRT(a[31:0])) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Logical +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] == b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] < b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] < b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] <= b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] <= b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] > b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] >= b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] >= b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] != b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] != b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (!( a[31:0] < b[31:0] )) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := !( a[i+31:i] < b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (!( a[31:0] <= b[31:0] )) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (!( a[i+31:i] <= b[i+31:i] )) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (!( a[31:0] > b[31:0] )) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (!( a[i+31:i] > b[i+31:i] )) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (!( a[31:0] >= b[31:0] )) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (!( a[i+31:i] >= b[i+31:i] )) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + dst[31:0] := ( a[31:0] != NaN AND b[31:0] != NaN ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] != NaN AND b[i+31:i] != NaN ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + dst[31:0] := ( a[31:0] == NaN OR b[31:0] == NaN ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == NaN OR b[i+31:i] == NaN ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] == b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] < b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] <= b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] > b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] >= b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). + RETURN ( a[31:0] == NaN OR b[31:0] == NaN OR a[31:0] != b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] == b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] < b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] <= b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] > b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] >= b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] == NaN OR b[31:0] == NaN OR a[31:0] != b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + Copy single-precision (32-bit) floating-point element "a" to the lower element of "dst", and zero the upper 3 elements. + +dst[31:0] := a[31:0] +dst[127:32] := 0 + + SSE +
xmmintrin.h
+ Set +
+ + + + Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR + + SSE +
xmmintrin.h
+ Set +
+ + + + Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR + + SSE +
xmmintrin.h
+ Set +
+ + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 + + SSE +
xmmintrin.h
+ Set +
+ + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[31:0] := e3 +dst[63:32] := e2 +dst[95:64] := e1 +dst[127:96] := e0 + + SSE +
xmmintrin.h
+ Set +
+ + + + Return vector of type __m128 with all elements set to zero. + +dst[MAX:0] := 0 + + + SSE +
xmmintrin.h
+ Set +
+ + + + + Load 2 single-precision (32-bit) floating-point elements from memory into the upper 2 elements of "dst", and copy the lower 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. + +dst[31:0] := a[31:0] +dst[63:32] := a[63:32] +dst[95:64] := MEM[mem_addr+31:mem_addr] +dst[127:96] := MEM[mem_addr+63:mem_addr+32] + + + SSE +
immintrin.h
+ Load +
+ + + + + Load 2 single-precision (32-bit) floating-point elements from memory into the lower 2 elements of "dst", and copy the upper 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[63:32] := MEM[mem_addr+63:mem_addr+32] +dst[95:64] := a[95:64] +dst[127:96] := a[127:96] + + + SSE +
immintrin.h
+ Load +
+ + + + Load a single-precision (32-bit) floating-point element from memory into the lower of "dst", and zero the upper 3 elements. "mem_addr" does not need to be aligned on any particular boundary. + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[127:32] := 0 + + + SSE +
immintrin.h
+ Load +
+ + + + Load a single-precision (32-bit) floating-point element from memory into all elements of "dst". + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[63:32] := MEM[mem_addr+31:mem_addr] +dst[95:64] := MEM[mem_addr+31:mem_addr] +dst[127:96] := MEM[mem_addr+31:mem_addr] + + SSE +
immintrin.h
+ Load +
+ + + + Load a single-precision (32-bit) floating-point element from memory into all elements of "dst". + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[63:32] := MEM[mem_addr+31:mem_addr] +dst[95:64] := MEM[mem_addr+31:mem_addr] +dst[127:96] := MEM[mem_addr+31:mem_addr] + + SSE +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE +
immintrin.h
+ Load +
+ + + + Load 4 single-precision (32-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[31:0] := MEM[mem_addr+127:mem_addr+96] +dst[63:32] := MEM[mem_addr+95:mem_addr+64] +dst[95:64] := MEM[mem_addr+63:mem_addr+32] +dst[127:96] := MEM[mem_addr+31:mem_addr] + + SSE +
immintrin.h
+ Load +
+ + + + + Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Move +
+ + + + + Move the upper 2 single-precision (32-bit) floating-point elements from "b" to the lower 2 elements of "dst", and copy the upper 2 elements from "a" to the upper 2 elements of "dst". + +dst[31:0] := b[95:64] +dst[63:32] := b[127:96] +dst[95:64] := a[95:64] +dst[127:96] := a[127:96] + + + SSE +
xmmintrin.h
+ Move +
+ + + + + Move the lower 2 single-precision (32-bit) floating-point elements from "b" to the upper 2 elements of "dst", and copy the lower 2 elements from "a" to the lower 2 elements of "dst". + +dst[31:0] := a[31:0] +dst[63:32] := a[63:32] +dst[95:64] := b[31:0] +dst[127:96] := b[63:32] + + + SSE +
xmmintrin.h
+ Move +
+ + + + + + Return vector of type __m128d with undefined elements. + SSE2 +
emmintrin.h
+ General Support +
+ + + + Return vector of type __m128i with undefined elements. + SSE2 +
emmintrin.h
+ General Support +
+ + + + Provide a hint to the processor that the code sequence is a spin-wait loop. This can help improve the performance and power consumption of spin-wait loops. + + SSE2 +
emmintrin.h
+ General Support +
+ + + + Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy. + + SSE2 +
emmintrin.h
+ General Support +
+ + + + Perform a serializing operation on all load-from-memory instructions that were issued prior to this instruction. Guarantees that every load instruction that precedes, in program order, is globally visible before any load instruction which follows the fence in program order. + + SSE2 +
emmintrin.h
+ General Support +
+ + + + Perform a serializing operation on all load-from-memory and store-to-memory instructions that were issued prior to this instruction. Guarantees that every memory access that precedes, in program order, the memory fence instruction is globally visible before any memory instruction which follows the fence in program order. + + SSE2 +
emmintrin.h
+ General Support +
+ + + + Load unaligned 64-bit integer from memory into the first element of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[MAX:64] := 0 + + + SSE2 +
immintrin.h
+ Load +
+ + + + Load unaligned 16-bit integer from memory into the first element of "dst". + +dst[15:0] := MEM[mem_addr+15:mem_addr] +dst[MAX:16] := 0 + + SSE2 +
immintrin.h
+ Load +
+ + + + Load unaligned 32-bit integer from memory into the first element of "dst". + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[MAX:32] := 0 + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 64-bit integer from memory into the first element of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[MAX:64] := 0 + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 128-bits of integer data from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 128-bits of integer data from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 2 double-precision (64-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[63:0] := MEM[mem_addr+127:mem_addr+64] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load a double-precision (64-bit) floating-point element from memory into the lower of "dst", and zero the upper element. "mem_addr" does not need to be aligned on any particular boundary. + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Load +
+ + + + + Load a double-precision (64-bit) floating-point element from memory into the upper element of "dst", and copy the lower element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. + +dst[63:0] := a[63:0] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + + Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst", and copy the upper element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + + Store 16-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+15:mem_addr] := a[15:0] + + SSE2 +
immintrin.h
+ Store +
+ + + + + Store 64-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
immintrin.h
+ Store +
+ + + + + Store 32-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+31:mem_addr] := a[31:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + + Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*8 + IF mask[i+7] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits of integer data from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits of integer data from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 64-bit integer from the first element of "a" into memory. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits of integer data from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 32-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated. + +MEM[mem_addr+31:mem_addr] := a[31:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 64-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the lower double-precision (64-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+63:mem_addr] := a[63:0] +MEM[mem_addr+127:mem_addr+64] := a[63:0] + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+63:mem_addr] := a[63:0] +MEM[mem_addr+127:mem_addr+64] := a[63:0] + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 2 double-precision (64-bit) floating-point elements from "a" into memory in reverse order. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+63:mem_addr] := a[127:64] +MEM[mem_addr+127:mem_addr+64] := a[63:0] + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the upper double-precision (64-bit) floating-point element from "a" into memory. + +MEM[mem_addr+63:mem_addr] := a[127:64] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the lower double-precision (64-bit) floating-point element from "a" into memory. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add 64-bit integers "a" and "b", and store the result in "dst". + +dst[63:0] := a[63:0] + b[63:0] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the low unsigned 32-bit integers from "a" and "b", and store the unsigned 64-bit result in "dst". + +dst[63:0] := a[31:0] * b[31:0] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+31:i] * b[i+31:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce two unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". + +FOR j := 0 to 15 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +FOR j := 0 to 1 + i := j*64 + dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \ + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56] + dst[i+63:i+16] := 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract 64-bit integer "b" from 64-bit integer "a", and store the result in "dst". + +dst[63:0] := a[63:0] - b[63:0] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := a[63:0] + b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := a[63:0] / b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + dst[i+63:i] := a[i+63:i] / b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := a[63:0] * b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := a[63:0] - b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR + + + SSE2 +
emmintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR + + + SSE2 +
emmintrin.h
+ Probability/Statistics +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [max_float_note] + +dst[63:0] := MAX(a[63:0], b[63:0]) +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [min_float_note] + +dst[63:0] := MIN(a[63:0], b[63:0]) +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[127:0] := (a[127:0] AND b[127:0]) + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 128 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[127:0] := ((NOT a[127:0]) AND b[127:0]) + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[127:0] := (a[127:0] OR b[127:0]) + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[127:0] := (a[127:0] XOR b[127:0]) + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtb instruction with the order of the operands switched. + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := ( a[i+7:i] < b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtw instruction with the order of the operands switched. + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ( a[i+15:i] < b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtd instruction with the order of the operands switched. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] < b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] == b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] < b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] <= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] > b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] >= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + dst[63:0] := (a[63:0] != NaN AND b[63:0] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + dst[63:0] := (a[63:0] == NaN OR b[63:0] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] != b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (!(a[63:0] < b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (!(a[63:0] <= b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (!(a[63:0] > b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (!(a[63:0] >= b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] == b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] < b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] <= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] > b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] >= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] != b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (!(a[i+63:i] < b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (!(a[i+63:i] <= b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (!(a[i+63:i] > b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (!(a[i+63:i] >= b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] == b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] < b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] <= b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] > b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] >= b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). + RETURN ( a[63:0] == NaN OR b[63:0] == NaN OR a[63:0] != b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] == b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] < b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] <= b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] > b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] >= b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] == NaN OR b[63:0] == NaN OR a[63:0] != b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + m := j*64 + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + m := j*64 + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst". + +dst[31:0] := a[31:0] +dst[127:32] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy the lower 64-bit integer in "a" to "dst". + +dst[63:0] := a[63:0] + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy the lower 64-bit integer in "a" to "dst". + +dst[63:0] := a[63:0] + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) +ENDFOR +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_Int32(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". + +dst[63:0] := a[63:0] + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Set packed 64-bit integers in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + Set packed 64-bit integers in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + Set packed 32-bit integers in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values. + +dst[15:0] := e0 +dst[31:16] := e1 +dst[47:32] := e2 +dst[63:48] := e3 +dst[79:64] := e4 +dst[95:80] := e5 +dst[111:96] := e6 +dst[127:112] := e7 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values. + +dst[7:0] := e0 +dst[15:8] := e1 +dst[23:16] := e2 +dst[31:24] := e3 +dst[39:32] := e4 +dst[47:40] := e5 +dst[55:48] := e6 +dst[63:56] := e7 +dst[71:64] := e8 +dst[79:72] := e9 +dst[87:80] := e10 +dst[95:88] := e11 +dst[103:96] := e12 +dst[111:104] := e13 +dst[119:112] := e14 +dst[127:120] := e15 + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 64-bit integer "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastd". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate "vpbroadcastw". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastb". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + + Set packed 64-bit integers in "dst" with the supplied values in reverse order. + +dst[63:0] := e1 +dst[127:64] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + Set packed 32-bit integers in "dst" with the supplied values in reverse order. + +dst[31:0] := e3 +dst[63:32] := e2 +dst[95:64] := e1 +dst[127:96] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values in reverse order. + +dst[15:0] := e7 +dst[31:16] := e6 +dst[47:32] := e5 +dst[63:48] := e4 +dst[79:64] := e3 +dst[95:80] := e2 +dst[111:96] := e1 +dst[127:112] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values in reverse order. + +dst[7:0] := e15 +dst[15:8] := e14 +dst[23:16] := e13 +dst[31:24] := e12 +dst[39:32] := e11 +dst[47:40] := e10 +dst[55:48] := e9 +dst[63:56] := e8 +dst[71:64] := e7 +dst[79:72] := e6 +dst[87:80] := e5 +dst[95:88] := e4 +dst[103:96] := e3 +dst[111:104] := e2 +dst[119:112] := e1 +dst[127:120] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + Return vector of type __m128i with all elements set to zero. + +dst[MAX:0] := 0 + + + SSE2 +
emmintrin.h
+ Set +
+ + + + Copy double-precision (64-bit) floating-point element "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[63:0] := e1 +dst[127:64] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + Return vector of type __m128d with all elements set to zero. + +dst[MAX:0] := 0 + + + SSE2 +
emmintrin.h
+ Set +
+ + + + Copy the lower 64-bit integer in "a" to "dst". + +dst[63:0] := a[63:0] + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(a[79:64]) +dst[47:40] := Saturate8(a[95:80]) +dst[55:48] := Saturate8(a[111:96]) +dst[63:56] := Saturate8(a[127:112]) +dst[71:64] := Saturate8(b[15:0]) +dst[79:72] := Saturate8(b[31:16]) +dst[87:80] := Saturate8(b[47:32]) +dst[95:88] := Saturate8(b[63:48]) +dst[103:96] := Saturate8(b[79:64]) +dst[111:104] := Saturate8(b[95:80]) +dst[119:112] := Saturate8(b[111:96]) +dst[127:120] := Saturate8(b[127:112]) + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(a[95:64]) +dst[63:48] := Saturate16(a[127:96]) +dst[79:64] := Saturate16(b[31:0]) +dst[95:80] := Saturate16(b[63:32]) +dst[111:96] := Saturate16(b[95:64]) +dst[127:112] := Saturate16(b[127:96]) + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(a[79:64]) +dst[47:40] := SaturateU8(a[95:80]) +dst[55:48] := SaturateU8(a[111:96]) +dst[63:56] := SaturateU8(a[127:112]) +dst[71:64] := SaturateU8(b[15:0]) +dst[79:72] := SaturateU8(b[31:16]) +dst[87:80] := SaturateU8(b[47:32]) +dst[95:88] := SaturateU8(b[63:48]) +dst[103:96] := SaturateU8(b[79:64]) +dst[111:104] := SaturateU8(b[95:80]) +dst[119:112] := SaturateU8(b[111:96]) +dst[127:120] := SaturateU8(b[127:112]) + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[j] := a[i+7] +ENDFOR +dst[MAX:16] := 0 + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a". + +FOR j := 0 to 1 + i := j*64 + IF a[i+63] + dst[j] := 1 + ELSE + dst[j] := 0 + FI +ENDFOR +dst[MAX:2] := 0 + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + Copy the 64-bit integer "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Move +
+ + + + Copy the lower 64-bit integer in "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Move +
+ + + + + Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Move +
+ + + + + Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". + +dst[15:0] := (a[127:0] >> (imm8[2:0] * 16))[15:0] +dst[31:16] := 0 + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". + +dst[127:0] := a[127:0] +sel := imm8[2:0]*16 +dst[sel+15:sel] := i[15:0] + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst". + +dst[63:0] := a[63:0] +dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst". + +dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst". + +dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := SQRT(b[63:0]) +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Elementary Math Functions +
+ + + + Cast vector of type __m128d to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128d to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + + + + Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + b[i+31:i] + FI +ENDFOR + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + b[i+63:i] + FI +ENDFOR + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[63:0] := a[127:64] + a[63:0] +dst[127:64] := b[127:64] + b[63:0] + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := a[127:96] + a[95:64] +dst[95:64] := b[63:32] + b[31:0] +dst[127:96] := b[127:96] + b[95:64] + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[63:0] := a[63:0] - a[127:64] +dst[127:64] := b[63:0] - b[127:64] + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := a[95:64] - a[127:96] +dst[95:64] := b[31:0] - b[63:32] +dst[127:96] := b[95:64] - b[127:96] + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + Load 128-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm_loadu_si128" when the data crosses a cache line boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE3 +
pmmintrin.h
+ Load +
+ + + + Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE3 +
pmmintrin.h
+ Load +
+ + + + Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst". + +dst[63:0] := a[63:0] +dst[127:64] := a[63:0] + + + SSE3 +
pmmintrin.h
+ Move +
+ + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := a[63:32] +dst[95:64] := a[127:96] +dst[127:96] := a[127:96] + + + SSE3 +
pmmintrin.h
+ Move +
+ + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := a[31:0] +dst[95:64] := a[95:64] +dst[127:96] := a[95:64] + + + SSE3 +
pmmintrin.h
+ Move +
+ + + + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF imm8[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + IF mask[i+7] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed 16-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF imm8[j] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + Extract a single-precision (32-bit) floating-point element from "a", selected with "imm8", and store the result in "dst". + +dst[31:0] := (a[127:0] >> (imm8[1:0] * 32))[31:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + Extract an 8-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". + +dst[7:0] := (a[127:0] >> (imm8[3:0] * 8))[7:0] +dst[31:8] := 0 + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + Extract a 32-bit integer from "a", selected with "imm8", and store the result in "dst". + +dst[31:0] := (a[127:0] >> (imm8[1:0] * 32))[31:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + Extract a 64-bit integer from "a", selected with "imm8", and store the result in "dst". + +dst[63:0] := (a[127:0] >> (imm8[0] * 64))[63:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "tmp", then insert a single-precision (32-bit) floating-point element from "b" into "tmp" using the control in "imm8". Store "tmp" to "dst" using the mask in "imm8" (elements are zeroed out when the corresponding bit is set). + +tmp2[127:0] := a[127:0] +CASE (imm8[7:6]) OF +0: tmp1[31:0] := b[31:0] +1: tmp1[31:0] := b[63:32] +2: tmp1[31:0] := b[95:64] +3: tmp1[31:0] := b[127:96] +ESAC +CASE (imm8[5:4]) OF +0: tmp2[31:0] := tmp1[31:0] +1: tmp2[63:32] := tmp1[31:0] +2: tmp2[95:64] := tmp1[31:0] +3: tmp2[127:96] := tmp1[31:0] +ESAC +FOR j := 0 to 3 + i := j*32 + IF imm8[j%8] + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := tmp2[i+31:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the lower 8-bit integer from "i" into "dst" at the location specified by "imm8". + +dst[127:0] := a[127:0] +sel := imm8[3:0]*8 +dst[sel+7:sel] := i[7:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "imm8". + +dst[127:0] := a[127:0] +sel := imm8[1:0]*32 +dst[sel+31:sel] := i[31:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "imm8". + +dst[127:0] := a[127:0] +sel := imm8[0]*64 +dst[sel+63:sel] := i[63:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Conditionally multiply the packed double-precision (64-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". + +DEFINE DP(a[127:0], b[127:0], imm8[7:0]) { + FOR j := 0 to 1 + i := j*64 + IF imm8[(4+j)%8] + temp[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + temp[i+63:i] := 0.0 + FI + ENDFOR + + sum[63:0] := temp[127:64] + temp[63:0] + + FOR j := 0 to 1 + i := j*64 + IF imm8[j%8] + tmpdst[i+63:i] := sum[63:0] + ELSE + tmpdst[i+63:i] := 0.0 + FI + ENDFOR + RETURN tmpdst[127:0] +} +dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0]) + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + + + + + Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". + +DEFINE DP(a[127:0], b[127:0], imm8[7:0]) { + FOR j := 0 to 3 + i := j*32 + IF imm8[(4+j)%8] + temp[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + temp[i+31:i] := 0 + FI + ENDFOR + + sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0]) + + FOR j := 0 to 3 + i := j*32 + IF imm8[j%8] + tmpdst[i+31:i] := sum[31:0] + ELSE + tmpdst[i+31:i] := 0 + FI + ENDFOR + RETURN tmpdst[127:0] +} +dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0]) + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*32 + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Eight SADs are performed using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8". + +DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) { + a_offset := imm8[2]*32 + b_offset := imm8[1:0]*32 + FOR j := 0 to 7 + i := j*8 + k := a_offset+i + l := b_offset + tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \ + ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24])) + ENDFOR + RETURN tmp[127:0] +} +dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0]) + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst". + [round_note] + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i], rounding) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst". + [round_note] + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ROUND(a[i+31:i], rounding) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + + Round the lower double-precision (64-bit) floating-point element in "b" using the "rounding" parameter, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := ROUND(b[63:0], rounding) +dst[127:64] := a[127:64] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the lower double-precision (64-bit) floating-point element in "b" down to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := FLOOR(b[63:0]) +dst[127:64] := a[127:64] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the lower double-precision (64-bit) floating-point element in "b" up to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := CEIL(b[63:0]) +dst[127:64] := a[127:64] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + + Round the lower single-precision (32-bit) floating-point element in "b" using the "rounding" parameter, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := ROUND(b[31:0], rounding) +dst[127:32] := a[127:32] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the lower single-precision (32-bit) floating-point element in "b" down to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := FLOOR(b[31:0]) +dst[127:32] := a[127:32] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the lower single-precision (32-bit) floating-point element in "b" up to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := CEIL(b[31:0]) +dst[127:32] := a[127:32] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + Miscellaneous + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". + +dst[15:0] := SaturateU16(a[31:0]) +dst[31:16] := SaturateU16(a[63:32]) +dst[47:32] := SaturateU16(a[95:64]) +dst[63:48] := SaturateU16(a[127:96]) +dst[79:64] := SaturateU16(b[31:0]) +dst[95:80] := SaturateU16(b[63:32]) +dst[111:96] := SaturateU16(b[95:64]) +dst[127:112] := SaturateU16(b[127:96]) + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + l := j*16 + dst[l+15:l] := SignExtend16(a[i+7:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[i+31:i] := SignExtend32(a[k+7:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[i+63:i] := SignExtend64(a[k+7:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[i+31:i] := SignExtend32(a[k+15:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[i+63:i] := SignExtend64(a[k+15:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[i+63:i] := SignExtend64(a[k+31:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + l := j*16 + dst[l+15:l] := ZeroExtend16(a[i+7:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[i+31:i] := ZeroExtend32(a[k+7:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[i+63:i] := ZeroExtend64(a[k+7:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[i+31:i] := ZeroExtend32(a[k+15:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[i+63:i] := ZeroExtend64(a[k+15:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[i+63:i] := ZeroExtend64(a[k+31:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + + Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value. + +IF ((a[127:0] AND b[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[127:0]) AND b[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +RETURN ZF + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value. + +IF ((a[127:0] AND b[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[127:0]) AND b[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +RETURN CF + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +IF ((a[127:0] AND b[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[127:0]) AND b[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and return 1 if the result is zero, otherwise return 0. + +IF ((a[127:0] AND mask[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +dst := ZF + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "mask", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +IF ((a[127:0] AND mask[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[127:0]) AND mask[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + Compute the bitwise NOT of "a" and then AND with a 128-bit vector containing all 1's, and return 1 if the result is zero, otherwise return 0. + +FOR j := 0 to 127 + tmp[j] := 1 +ENDFOR +IF (((NOT a[127:0]) AND tmp[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + Horizontally compute the minimum amongst the packed unsigned 16-bit integers in "a", store the minimum and index in "dst", and zero the remaining bits in "dst". + +index[2:0] := 0 +min[15:0] := a[15:0] +FOR j := 0 to 7 + i := j*16 + IF a[i+15:i] < min[15:0] + index[2:0] := j + min[15:0] := a[i+15:i] + FI +ENDFOR +dst[15:0] := min[15:0] +dst[18:16] := index[2:0] +dst[127:19] := 0 + + + SSE4.1 +
smmintrin.h
+ Miscellaneous +
+ + + + Load 128-bits of integer data from memory into "dst" using a non-temporal memory hint. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE4.1 +
smmintrin.h
+ Load +
+ + + + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated mask in "dst". + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +IF imm8[6] // byte / word mask + FOR i := 0 to UpperBound + j := i*size + IF IntRes2[i] + dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF) + ELSE + dst[j+size-1:j] := 0 + FI + ENDFOR +ELSE // bit mask + dst[UpperBound:0] := IntRes2[UpperBound:0] + dst[127:UpperBound+1] := 0 +FI + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated index in "dst". + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +IF imm8[6] // most significant bit + tmp := UpperBound + dst := tmp + DO WHILE ((tmp >= 0) AND a[tmp] == 0) + tmp := tmp - 1 + dst := tmp + OD +ELSE // least significant bit + tmp := 0 + dst := tmp + DO WHILE ((tmp <= UpperBound) AND a[tmp] == 0) + tmp := tmp + 1 + dst := tmp + OD +FI + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +bInvalid := 0 +FOR j := 0 to UpperBound + n := j*size + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI +ENDFOR +dst := bInvalid + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := (IntRes2 != 0) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +aInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI +ENDFOR +dst := aInvalid + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns bit 0 of the resulting bit mask. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := IntRes2[0] + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := (IntRes2 == 0) AND bInvalid + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated mask in "dst". + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +IF imm8[6] // byte / word mask + FOR i := 0 to UpperBound + j := i*size + IF IntRes2[i] + dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF) + ELSE + dst[j+size-1:j] := 0 + FI + ENDFOR +ELSE // bit mask + dst[UpperBound:0] := IntRes2[UpperBound:0] + dst[127:UpperBound+1] := 0 +FI + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated index in "dst". + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +IF imm8[6] // most significant bit + tmp := UpperBound + dst := tmp + DO WHILE ((tmp >= 0) AND a[tmp] == 0) + tmp := tmp - 1 + dst := tmp + OD +ELSE // least significant bit + tmp := 0 + dst := tmp + DO WHILE ((tmp <= UpperBound) AND a[tmp] == 0) + tmp := tmp + 1 + dst := tmp + OD +FI + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +dst := (lb <= UpperBound) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := (IntRes2 != 0) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +dst := (la <= UpperBound) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns bit 0 of the resulting bit mask. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := IntRes2[0] + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := (IntRes2 == 0) AND (lb > UpperBound) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ( a[i+63:i] > b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE4.2 +
nmmintrin.h
+ Compare +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 8-bit integer "v", and stores the result in "dst". + tmp1[7:0] := v[0:7] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[39:0] := tmp1[7:0] << 32 +tmp4[39:0] := tmp2[31:0] << 8 +tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0] +tmp6[31:0] := MOD2(tmp5[39:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + SSE4.2 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 16-bit integer "v", and stores the result in "dst". + tmp1[15:0] := v[0:15] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[47:0] := tmp1[15:0] << 32 +tmp4[47:0] := tmp2[31:0] << 16 +tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0] +tmp6[31:0] := MOD2(tmp5[47:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + SSE4.2 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 32-bit integer "v", and stores the result in "dst". + tmp1[31:0] := v[0:31] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[63:0] := tmp1[31:0] << 32 +tmp4[63:0] := tmp2[31:0] << 32 +tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0] +tmp6[31:0] := MOD2(tmp5[63:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + SSE4.2 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 64-bit integer "v", and stores the result in "dst". + tmp1[63:0] := v[0:63] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[95:0] := tmp1[31:0] << 32 +tmp4[95:0] := tmp2[63:0] << 64 +tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0] +tmp6[31:0] := MOD2(tmp5[95:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + SSE4.2 +
nmmintrin.h
+ Cryptography +
+ + + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ABS(Int(a[i+7:i])) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := ABS(a[i+7:i]) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ABS(Int(a[i+15:i])) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ABS(a[i+15:i]) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ABS(a[i+31:i]) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ABS(a[i+31:i]) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[3:0] := b[i+3:i] + dst[i+7:i] := a[index*8+7:index*8] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Swizzle +
+ + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[2:0] := b[i+2:i] + dst[i+7:i] := a[index*8+7:index*8] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Swizzle +
+ + + + + + Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". + +tmp[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) +dst[127:0] := tmp[127:0] + + + SSSE3 +
tmmintrin.h
+ Miscellaneous +
+ + + + + + Concatenate 8-byte blocks in "a" and "b" into a 16-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". + +tmp[127:0] := ((a[63:0] << 64)[127:0] OR b[63:0]) >> (imm8*8) +dst[63:0] := tmp[63:0] + + + SSSE3 +
tmmintrin.h
+ Miscellaneous +
+ + + + + Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[31:16] + a[15:0] +dst[31:16] := a[63:48] + a[47:32] +dst[47:32] := a[95:80] + a[79:64] +dst[63:48] := a[127:112] + a[111:96] +dst[79:64] := b[31:16] + b[15:0] +dst[95:80] := b[63:48] + b[47:32] +dst[111:96] := b[95:80] + b[79:64] +dst[127:112] := b[127:112] + b[111:96] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[31:16] + a[15:0]) +dst[31:16] := Saturate16(a[63:48] + a[47:32]) +dst[47:32] := Saturate16(a[95:80] + a[79:64]) +dst[63:48] := Saturate16(a[127:112] + a[111:96]) +dst[79:64] := Saturate16(b[31:16] + b[15:0]) +dst[95:80] := Saturate16(b[63:48] + b[47:32]) +dst[111:96] := Saturate16(b[95:80] + b[79:64]) +dst[127:112] := Saturate16(b[127:112] + b[111:96]) + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := a[127:96] + a[95:64] +dst[95:64] := b[63:32] + b[31:0] +dst[127:96] := b[127:96] + b[95:64] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[31:16] + a[15:0] +dst[31:16] := a[63:48] + a[47:32] +dst[47:32] := b[31:16] + b[15:0] +dst[63:48] := b[63:48] + b[47:32] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := b[63:32] + b[31:0] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[31:16] + a[15:0]) +dst[31:16] := Saturate16(a[63:48] + a[47:32]) +dst[47:32] := Saturate16(b[31:16] + b[15:0]) +dst[63:48] := Saturate16(b[63:48] + b[47:32]) + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[15:0] - a[31:16] +dst[31:16] := a[47:32] - a[63:48] +dst[47:32] := a[79:64] - a[95:80] +dst[63:48] := a[111:96] - a[127:112] +dst[79:64] := b[15:0] - b[31:16] +dst[95:80] := b[47:32] - b[63:48] +dst[111:96] := b[79:64] - b[95:80] +dst[127:112] := b[111:96] - b[127:112] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[15:0] - a[31:16]) +dst[31:16] := Saturate16(a[47:32] - a[63:48]) +dst[47:32] := Saturate16(a[79:64] - a[95:80]) +dst[63:48] := Saturate16(a[111:96] - a[127:112]) +dst[79:64] := Saturate16(b[15:0] - b[31:16]) +dst[95:80] := Saturate16(b[47:32] - b[63:48]) +dst[111:96] := Saturate16(b[79:64] - b[95:80]) +dst[127:112] := Saturate16(b[111:96] - b[127:112]) + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := a[95:64] - a[127:96] +dst[95:64] := b[31:0] - b[63:32] +dst[127:96] := b[95:64] - b[127:96] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[15:0] - a[31:16] +dst[31:16] := a[47:32] - a[63:48] +dst[47:32] := b[15:0] - b[31:16] +dst[63:48] := b[47:32] - b[63:48] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := b[31:0] - b[63:32] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[15:0] - a[31:16]) +dst[31:16] := Saturate16(a[47:32] - a[63:48]) +dst[47:32] := Saturate16(b[15:0] - b[31:16]) +dst[63:48] := Saturate16(b[47:32] - b[63:48]) + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 15 + i := j*8 + IF b[i+7:i] < 0 + dst[i+7:i] := -(a[i+7:i]) + ELSE IF b[i+7:i] == 0 + dst[i+7:i] := 0 + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 7 + i := j*16 + IF b[i+15:i] < 0 + dst[i+15:i] := -(a[i+15:i]) + ELSE IF b[i+15:i] == 0 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 3 + i := j*32 + IF b[i+31:i] < 0 + dst[i+31:i] := -(a[i+31:i]) + ELSE IF b[i+31:i] == 0 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 7 + i := j*8 + IF b[i+7:i] < 0 + dst[i+7:i] := -(a[i+7:i]) + ELSE IF b[i+7:i] == 0 + dst[i+7:i] := 0 + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 3 + i := j*16 + IF b[i+15:i] < 0 + dst[i+15:i] := -(a[i+15:i]) + ELSE IF b[i+15:i] == 0 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 1 + i := j*32 + IF b[i+31:i] < 0 + dst[i+31:i] := -(a[i+31:i]) + ELSE IF b[i+31:i] == 0 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + + Copy the current 64-bit value of the processor's time-stamp counter into "dst". + dst[63:0] := TimeStampCounter + + + TSC +
immintrin.h
+ General Support +
+ + + + + Mark the start of a TSX (HLE/RTM) suspend load address tracking region. If this is used inside a transactional region, subsequent loads are not added to the read set of the transaction. If this is used inside a suspend load address tracking region it will cause transaction abort. If this is used outside of a transactional region it behaves like a NOP. + + TSXLDTRK +
immintrin.h
+ Miscellaneous +
+ + + Mark the end of a TSX (HLE/RTM) suspend load address tracking region. If this is used inside a suspend load address tracking region it will end the suspend region and all following load addresses will be added to the transaction read set. If this is used inside an active transaction but not in a suspend region it will cause transaction abort. If this is used outside of a transactional region it behaves like a NOP. + + TSXLDTRK +
immintrin.h
+ Miscellaneous +
+ + + + + + Clear the user interrupt flag (UIF). + + UINTR +
immintrin.h
+ General Support +
+ + + + Send user interprocessor interrupts specified in unsigned 64-bit integer "__a". + + UINTR +
immintrin.h
+ General Support +
+ + + + Sets the user interrupt flag (UIF). + + UINTR +
immintrin.h
+ General Support +
+ + + + Store the current user interrupt flag (UIF) in unsigned 8-bit integer "dst". + + UINTR +
immintrin.h
+ General Support +
+ + + + + Reads the contents of a 64-bit MSR specified in "__A" into "dst". + DEST := MSR[__A] + + + USER_MSR +
x86gprintrin.h
+ General Support +
+ + + + + Writes the contents of "__B" into the 64-bit MSR specified in "__A". + MSR[__A] := __B + + + USER_MSR +
x86gprintrin.h
+ General Support +
+ + + + + Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." + FOR j := 0 to 1 + i := j*128 + a[i+127:i] := ShiftRows(a[i+127:i]) + a[i+127:i] := SubBytes(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:256] := 0 + + + VAES + AVX512VL +
immintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." + FOR j := 0 to 1 + i := j*128 + a[i+127:i] := ShiftRows(a[i+127:i]) + a[i+127:i] := SubBytes(a[i+127:i]) + a[i+127:i] := MixColumns(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:256] := 0 + + + VAES + AVX512VL +
immintrin.h
+ Cryptography +
+ + + + + Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". + FOR j := 0 to 1 + i := j*128 + a[i+127:i] := InvShiftRows(a[i+127:i]) + a[i+127:i] := InvSubBytes(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:256] := 0 + + + VAES + AVX512VL +
immintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". + FOR j := 0 to 1 + i := j*128 + a[i+127:i] := InvShiftRows(a[i+127:i]) + a[i+127:i] := InvSubBytes(a[i+127:i]) + a[i+127:i] := InvMixColumns(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:256] := 0 + + + VAES + AVX512VL +
immintrin.h
+ Cryptography +
+ + + + + + + + Carry-less multiplication of one quadword of + 'b' by one quadword of 'c', stores + the 128-bit result in 'dst'. The immediate 'Imm8' is + used to determine which quadwords of 'b' + and 'c' should be used. + +DEFINE PCLMUL128(X,Y) { + FOR i := 0 to 63 + TMP[i] := X[ 0 ] and Y[ i ] + FOR j := 1 to i + TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) + ENDFOR + DEST[ i ] := TMP[ i ] + ENDFOR + FOR i := 64 to 126 + TMP[i] := 0 + FOR j := i - 63 to 63 + TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) + ENDFOR + DEST[ i ] := TMP[ i ] + ENDFOR + DEST[127] := 0 + RETURN DEST // 128b vector +} +FOR i := 0 to 1 + IF Imm8[0] == 0 + TEMP1 := b.m128[i].qword[0] + ELSE + TEMP1 := b.m128[i].qword[1] + FI + IF Imm8[4] == 0 + TEMP2 := c.m128[i].qword[0] + ELSE + TEMP2 := c.m128[i].qword[1] + FI + dst.m128[i] := PCLMUL128(TEMP1, TEMP2) +ENDFOR +dst[MAX:256] := 0 + + + VPCLMULQDQ + AVX512VL +
immintrin.h
+ Application-Targeted +
+ + + + + + + + Carry-less multiplication of one quadword of + 'b' by one quadword of 'c', stores + the 128-bit result in 'dst'. The immediate 'Imm8' is + used to determine which quadwords of 'b' + and 'c' should be used. + +DEFINE PCLMUL128(X,Y) { + FOR i := 0 to 63 + TMP[i] := X[ 0 ] and Y[ i ] + FOR j := 1 to i + TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) + ENDFOR + DEST[ i ] := TMP[ i ] + ENDFOR + FOR i := 64 to 126 + TMP[i] := 0 + FOR j := i - 63 to 63 + TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) + ENDFOR + DEST[ i ] := TMP[ i ] + ENDFOR + DEST[127] := 0 + RETURN DEST // 128b vector +} +FOR i := 0 to 3 + IF Imm8[0] == 0 + TEMP1 := b.m128[i].qword[0] + ELSE + TEMP1 := b.m128[i].qword[1] + FI + IF Imm8[4] == 0 + TEMP2 := c.m128[i].qword[0] + ELSE + TEMP2 := c.m128[i].qword[1] + FI + dst.m128[i] := PCLMUL128(TEMP1, TEMP2) +ENDFOR +dst[MAX:512] := 0 + + + VPCLMULQDQ +
immintrin.h
+ Application-Targeted +
+ + + + + + + Directs the processor to enter an implementation-dependent optimized state until the TSC reaches or exceeds the value specified in "counter". Bit 0 of "ctrl" selects between a lower power (cleared) or faster wakeup (set) optimized state. Returns the carry flag (CF). If the processor that executed a UMWAIT instruction wakes due to the expiration of the operating system timelimit, the instructions sets RFLAGS.CF; otherwise, that flag is cleared. + + WAITPKG +
immintrin.h
+ Miscellaneous +
+ + + + + Directs the processor to enter an implementation-dependent optimized state while monitoring a range of addresses. The instruction wakes up when the TSC reaches or exceeds the value specified in "counter" (if the monitoring hardware did not trigger beforehand). Bit 0 of "ctrl" selects between a lower power (cleared) or faster wakeup (set) optimized state. Returns the carry flag (CF). If the processor that executed a UMWAIT instruction wakes due to the expiration of the operating system timelimit, the instructions sets RFLAGS.CF; otherwise, that flag is cleared. + + WAITPKG +
immintrin.h
+ Miscellaneous +
+ + + + Sets up a linear address range to be + monitored by hardware and activates the + monitor. The address range should be a writeback + memory caching type. The address is + contained in "a". + + WAITPKG +
immintrin.h
+ Miscellaneous +
+ + + + + + Write back and do not flush internal caches. + Initiate writing-back without flushing of external + caches. + + WBNOINVD +
immintrin.h
+ Miscellaneous +
+ + + + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSAVEC +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSAVEC +
immintrin.h
+ OS-Targeted +
+ + + + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE instruction. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + 2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSAVEOPT +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE64 instruction. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + 2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSAVEOPT +
immintrin.h
+ OS-Targeted +
+ + + + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSS +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSS +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. + st_mask := mem_addr.HEADER.XSTATE_BV[62:0] +FOR i := 0 to 62 + IF (rs_mask[i] AND XCR0[i]) + IF st_mask[i] + CASE (i) OF + 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] + 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] + DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] + ESAC + ELSE + // ProcessorExtendedState := Processor Supplied Values + CASE (i) OF + 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] + ESAC + FI + FI + i := i + 1 +ENDFOR + + + XSAVE + XSS +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. + st_mask := mem_addr.HEADER.XSTATE_BV[62:0] +FOR i := 0 to 62 + IF (rs_mask[i] AND XCR0[i]) + IF st_mask[i] + CASE (i) OF + 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] + 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] + DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] + ESAC + ELSE + // ProcessorExtendedState := Processor Supplied Values + CASE (i) OF + 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] + ESAC + FI + FI + i := i + 1 +ENDFOR + + + XSAVE + XSS +
immintrin.h
+ OS-Targeted +
+ + + + + + Copy up to 64-bits from the value of the extended control register (XCR) specified by "a" into "dst". Currently only XFEATURE_ENABLED_MASK XCR is supported. + dst[63:0] := XCR[a] + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. + st_mask := mem_addr.HEADER.XSTATE_BV[62:0] +FOR i := 0 to 62 + IF (rs_mask[i] AND XCR0[i]) + IF st_mask[i] + CASE (i) OF + 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] + 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] + DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] + ESAC + ELSE + // ProcessorExtendedState := Processor Supplied Values + CASE (i) OF + 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] + ESAC + FI + FI + i := i + 1 +ENDFOR + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. + st_mask := mem_addr.HEADER.XSTATE_BV[62:0] +FOR i := 0 to 62 + IF (rs_mask[i] AND XCR0[i]) + IF st_mask[i] + CASE (i) OF + 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] + 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] + DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] + ESAC + ELSE + // ProcessorExtendedState := Processor Supplied Values + CASE (i) OF + 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] + ESAC + FI + FI + i := i + 1 +ENDFOR + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Copy 64-bits from "val" to the extended control register (XCR) specified by "a". Currently only XFEATURE_ENABLED_MASK XCR is supported. + +XCR[a] := val[63:0] + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + +
\ No newline at end of file diff --git a/library/stdarch/examples/Cargo.toml b/library/stdarch/examples/Cargo.toml new file mode 100644 index 000000000000..61184494e157 --- /dev/null +++ b/library/stdarch/examples/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "stdarch_examples" +version = "0.0.0" +authors = [ + "Alex Crichton ", + "Andrew Gallant ", + "Gonzalo Brito Gadeschi ", +] +description = "Examples of the stdarch crate." +edition = "2024" +default-run = "hex" + +[dependencies] +core_arch = { path = "../crates/core_arch" } +std_detect = { path = "../crates/std_detect" } +quickcheck = "1.0" +rand = "0.8" + +[[bin]] +name = "hex" +path = "hex.rs" + +[[bin]] +name = "connect5" +path = "connect5.rs" + +[[example]] +name = "wasm" +crate-type = ["cdylib"] +path = "wasm.rs" diff --git a/library/stdarch/examples/connect5.rs b/library/stdarch/examples/connect5.rs new file mode 100644 index 000000000000..2b451f45d71c --- /dev/null +++ b/library/stdarch/examples/connect5.rs @@ -0,0 +1,1244 @@ +//! Outer-Open Gomoku is a board game which is a enhanced version of connect5 (Gomoku).\ +//! The game is a two-player game which played on a 15x15 Go board.\ +//! Two players take turns placing a move on an empty intersection in this board.\ +//! The winner is the first player to form an unbroken chain of five moves horizontally, vertically, or diagonally.\ +//! Unlike Gomoku, the first move is required to be placed at the two outer rows or columns of this board.\ +//! This program provides an AI playing with Minimax search with alpha-beta pruning which uses +//! patterns on evaluation.\ +//! The avx512 intrinsic can do 32 pattern matching at one time.\ +//! This avx512 is tested with non-avx512 code to verify its correctness.\ +//! +//! On Intel i7-7800x using single thread with fixed AVX-512 clock at 4.0GHz, the avx512 is speed up about 9x.\ +//! The average time for each move in the avx512 is around 14.00s ± 1.31s and in the non-avx512 +//! is 129.02s ± 4.96s.\ +//! On Intel Tiger Lake i7-1165G7, the avx512 is around 11.11s ± 1.31s. +//! +//! Pattern Matching\ +//! Use 512-bit to present the board state. The location 0 is top left.\ +//! 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15\ +//! 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31\ +//! ...\ +//! Pattern "OOOOO" is matching through "0 1 2 3 4", "1 2 3 4 5", ...\ +//! Using avx512, "0 1 2 3 4", "16 17 18 19 20", ... can be matched simultaneously.\ +//! +//! //! You can test out this program via: +//! +//! cargo +nightly run --release --bin connect5 +//! +//! You should see a game self-playing. In the end of the game, it shows the average time for +//! each move. + +#![allow(internal_features)] +#![cfg_attr(target_arch = "x86", feature(stdarch_internal))] +#![cfg_attr(target_arch = "x86_64", feature(stdarch_internal))] +#![feature(stmt_expr_attributes)] + +use rand::seq::SliceRandom; +use rand::thread_rng; + +use std::cmp; +use std::time::Instant; + +#[cfg(target_arch = "x86")] +use {core_arch::arch::x86::*, std_detect::is_x86_feature_detected}; +#[cfg(target_arch = "x86_64")] +use {core_arch::arch::x86_64::*, std_detect::is_x86_feature_detected}; + +// types + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Color { + Black = 0, + White = 1, + Empty = 2, + Border = 3, +} + +type Square = i32; +type Move = i32; +type Side = Color; + +// constants + +const FILE_SIZE: i32 = 15; +const RANK_SIZE: i32 = 15; +const SQUARE_SIZE: i32 = (FILE_SIZE + 1) * (FILE_SIZE + 4) + 16 + 4; + +const EVAL_INF: i32 = FILE_SIZE * RANK_SIZE * 100; +const MOVE_NONE: Move = -1; +const SCORE_NONE: i32 = -EVAL_INF - 1; + +/// DIRECTION 0: left to right\ +/// DIRECTION 1: top to bottom\ +/// DIRECTION 2: top left to bottom right\ +/// DIRECTION 3: top right to bottom left +#[rustfmt::skip] +#[allow(clippy::identity_op)] +const DIRECTION: [[i32; 5]; 4] = [ [1, 2, 3, 4, 5], + [1 * (FILE_SIZE + 1), 2 * (FILE_SIZE + 1), 3 * (FILE_SIZE + 1), 4 * (FILE_SIZE + 1), 5 * (FILE_SIZE + 1)], + [1 * (FILE_SIZE + 2), 2 * (FILE_SIZE + 2), 3 * (FILE_SIZE + 2), 4 * (FILE_SIZE + 2), 5 * (FILE_SIZE + 2)], + [1 * (FILE_SIZE + 0), 2 * (FILE_SIZE + 0), 3 * (FILE_SIZE + 0), 4 * (FILE_SIZE + 0), 5 * (FILE_SIZE + 0)]]; + +/// A table to encode each location to a value in bit 31-0 in the bitboard for 4 direction +#[rustfmt::skip] +const MAPMOVEVALUE: [[i32; 239]; 4] = [ [// Direction 0 + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17, 0, + 1<<31, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25, 1<<24, 1<<23, 1<<22, 1<<21, 1<<20, 1<<19, 1<<18, 1<<17], + [// Direction 1 + 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 0, + 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 1<<30, 0, + 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 1<<29, 0, + 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 1<<28, 0, + 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 1<<27, 0, + 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 1<<26, 0, + 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 1<<25, 0, + 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 1<<24, 0, + 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 1<<23, 0, + 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 1<<22, 0, + 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 1<<21, 0, + 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 1<<20, 0, + 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 1<<19, 0, + 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 1<<18, 0, + 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17, 1<<17], + [// Direction 2 + 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 0, 0, 0, 0, 0, + 1<<15, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 0, 0, 0, 0, + 1<<15, 1<<14, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 0, 0, 0, + 1<<15, 1<<14, 1<<13, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 0, 0, + 1<<15, 1<<14, 1<<13, 1<<12, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 0, + 1<<15, 1<<14, 1<<13, 1<<12, 1<<11, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 0, + 1<<9, 1<<14, 1<<13, 1<<12, 1<<11, 1<<10, 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 0, + 1<<8, 1<<8, 1<<13, 1<<12, 1<<11, 1<<10, 1<<9, 1<<8, 1<<8, 1<<8, 1<<8, 1<<8, 1<<8, 1<<8, 1<<8, 0, + 1<<7, 1<<7, 1<<7, 1<<12, 1<<11, 1<<10, 1<<9, 1<<8, 1<<7, 1<<7, 1<<7, 1<<7, 1<<7, 1<<7, 1<<7, 0, + 1<<6, 1<<6, 1<<6, 1<<6, 1<<11, 1<<10, 1<<9, 1<<8, 1<<7, 1<<6, 1<<6, 1<<6, 1<<6, 1<<6, 1<<6, 0, + 1<<5, 1<<5, 1<<5, 1<<5, 1<<5, 1<<10, 1<<9, 1<<8, 1<<7, 1<<6, 1<<5, 1<<5, 1<<5, 1<<5, 1<<5, 0, + 0, 1<<4, 1<<4, 1<<4, 1<<4, 1<<4, 1<<9, 1<<8, 1<<7, 1<<6, 1<<5, 1<<4, 1<<4, 1<<4, 1<<4, 0, + 0, 0, 1<<3, 1<<3, 1<<3, 1<<3, 1<<3, 1<<8, 1<<7, 1<<6, 1<<5, 1<<4, 1<<3, 1<<3, 1<<3, 0, + 0, 0, 0, 1<<2, 1<<2, 1<<2, 1<<2, 1<<2, 1<<7, 1<<6, 1<<5, 1<<4, 1<<3, 1<<2, 1<<2, 0, + 0, 0, 0, 0, 1<<1, 1<<1, 1<<1, 1<<1, 1<<1, 1<<6, 1<<5, 1<<4, 1<<3, 1<<2, 1<<1], + [// Direction 3 + 0, 0, 0, 0, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 1<<15, 0, + 0, 0, 0, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<14, 1<<15, 0, + 0, 0, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<13, 1<<14, 1<<15, 0, + 0, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<12, 1<<13, 1<<14, 1<<15, 0, + 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<11, 1<<12, 1<<13, 1<<14, 1<<15, 0, + 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<10, 1<<11, 1<<12, 1<<13, 1<<14, 1<<15, 0, + 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 1<<9, 1<<10, 1<<11, 1<<12, 1<<13, 1<<14, 1<<9, 0, + 1<<8, 1<<8, 1<<8, 1<<8, 1<<8, 1<<8, 1<<8, 1<<8, 1<<9, 1<<10, 1<<11, 1<<12, 1<<13, 1<<8, 1<<8, 0, + 1<<7, 1<<7, 1<<7, 1<<7, 1<<7, 1<<7, 1<<7, 1<<8, 1<<9, 1<<10, 1<<11, 1<<12, 1<<7, 1<<7, 1<<7, 0, + 1<<6, 1<<6, 1<<6, 1<<6, 1<<6, 1<<6, 1<<7, 1<<8, 1<<9, 1<<10, 1<<11, 1<<6, 1<<6, 1<<6, 1<<6, 0, + 1<<5, 1<<5, 1<<5, 1<<5, 1<<5, 1<<6, 1<<7, 1<<8, 1<<9, 1<<10, 1<<5, 1<<5, 1<<5, 1<<5, 1<<5, 0, + 1<<4, 1<<4, 1<<4, 1<<4, 1<<5, 1<<6, 1<<7, 1<<8, 1<<9, 1<<4, 1<<4, 1<<4, 1<<4, 1<<4, 0, 0, + 1<<3, 1<<3, 1<<3, 1<<4, 1<<5, 1<<6, 1<<7, 1<<8, 1<<3, 1<<3, 1<<3, 1<<3, 1<<3, 0, 0, 0, + 1<<2, 1<<2, 1<<3, 1<<4, 1<<5, 1<<6, 1<<7, 1<<2, 1<<2, 1<<2, 1<<2, 1<<2, 0, 0, 0, 0, + 1<<1, 1<<2, 1<<3, 1<<4, 1<<5, 1<<6, 1<<1, 1<<1, 1<<1, 1<<1, 1<<1, 0, 0, 0, 0] + ]; + +/// A table to encode each location to an index in the bitboard for 4 direction +#[rustfmt::skip] +const MAPMOVEIDX: [[i32; 239]; 4] = [ [// Direction 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 0, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14], + [// Direction 1 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + [// Direction 2 + 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, + 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, + 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, + 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, + 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 0, + 2, 1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 0, + 3, 2, 1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 0, + 4, 3, 2, 1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 0, + 5, 4, 3, 2, 1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 0, + 0, 5, 4, 3, 2, 1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 0, + 0, 0, 5, 4, 3, 2, 1, 15, 14, 13, 12, 11, 10, 9, 8, 0, + 0, 0, 0, 5, 4, 3, 2, 1, 15, 14, 13, 12, 11, 10, 9, 0, + 0, 0, 0, 0, 5, 4, 3, 2, 1, 15, 14, 13, 12, 11, 10], + [// Direction 3 + 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, + 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, + 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 0, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 0, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 0, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 0, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 0, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 0, 0, + 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 0, 0, 0, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 0, 0, 0, 0, + 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 0, 0, 0, 0] + ]; + +// structures + +/// Use one-dimensional array to store the board state. The location 0 is top left.\ +/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15\ +/// 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31\ +/// ... \ +/// position 15, 31, ... are Borders.\ +/// position 0 is file 0, rank 0.\ +/// position 17 is file 1, rank 1.\ +/// +/// Use a three-dimensional array to store the bitboard.\ +/// The first dimension is color: Black, White and Empty.\ +/// The second and third one are 2 x 512-bit. Direction 0 and 2 use the first 512-bit. Direction 1 and +/// 3 use the second 512-bit.\ +/// Each 512-bit is a 32-bit x 16 array. Direction 0 and 1 store at bit 31-16 and Direction 2 and 3 store at bit 15-0. +pub struct Pos { + // position + state: [Color; SQUARE_SIZE as usize], + p_turn: Side, + bitboard: [[[i32; 16]; 2]; 3], +} + +impl Pos { + pub fn init(&mut self) { + // starting position + // Set up the Border + for i in 0..SQUARE_SIZE as usize { + self.state[i] = Color::Border; + } + + // In the beginning, all is Empty + for rk in 0..RANK_SIZE { + for fl in 0..FILE_SIZE { + let sq: Square = square_make(fl, rk); + self.state[sq as usize] = Color::Empty; + } + } + + // first move is Black + self.p_turn = Color::Black; + + let black = Color::Black as usize; + let white = Color::White as usize; + let empty = Color::Empty as usize; + + // set up the corresponding bitboard + for i in 0..2 { + for j in 0..16 { + self.bitboard[black][i][j] = 0; + self.bitboard[white][i][j] = 0; + self.bitboard[empty][i][j] = 0; + } + } + + for i in 0..2 { + // use bit 31-16 to store direction 0 and 1 + #[rustfmt::skip] + for j in 0..FILE_SIZE as usize { + self.bitboard[empty][i][j] = (1<<31)|(1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<26)|(1<<25)|(1<<24)|(1<<23)|(1<<22)|(1<<21)|(1<<20)|(1<<19)|(1<<18)|(1<<17); + } + } + + // use bit 15-0 to store direction 2 and 3. There are 21 for each one. We combine row1 and row16, row2 and row17, row3 and row18, row4 and row19, and row 5 and row20 + #[rustfmt::skip] + for i in 0..2 { + self.bitboard[empty][i][0] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11); //row 0 + self.bitboard[empty][i][1] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)/*row1*/|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row16 + self.bitboard[empty][i][2] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)/*row2*/|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row17 + self.bitboard[empty][i][3] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)/*row3*/|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row18 + self.bitboard[empty][i][4] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)/*row4*/|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row19 + self.bitboard[empty][i][5] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)/*row5*/|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row20 + self.bitboard[empty][i][6] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5);//row6 + self.bitboard[empty][i][7] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4);//row7 + self.bitboard[empty][i][8] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3);//row8 + self.bitboard[empty][i][9] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2);//row9 + self.bitboard[empty][i][10] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1);//row10 + self.bitboard[empty][i][11] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2);//row11 + self.bitboard[empty][i][12] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3);//row12 + self.bitboard[empty][i][13] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4);//row13 + self.bitboard[empty][i][14] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5);//row14 + self.bitboard[empty][i][15] |= (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6);//row15 + } + } + + pub fn do_move(&mut self, mv: Move) { + let atk: Side = self.p_turn; + let def: Side = side_opp(atk); + + let mv = mv as usize; + let black = Color::Black as usize; + let white = Color::White as usize; + let empty = Color::Empty as usize; + + match self.p_turn { + Color::Black => { + self.state[mv] = Color::Black; + // update black move and remove empty move in bitboard + self.bitboard[black][0][MAPMOVEIDX[0][mv] as usize] |= MAPMOVEVALUE[0][mv]; + self.bitboard[empty][0][MAPMOVEIDX[0][mv] as usize] ^= MAPMOVEVALUE[0][mv]; + self.bitboard[black][1][MAPMOVEIDX[1][mv] as usize] |= MAPMOVEVALUE[1][mv]; + self.bitboard[empty][1][MAPMOVEIDX[1][mv] as usize] ^= MAPMOVEVALUE[1][mv]; + self.bitboard[black][0][MAPMOVEIDX[2][mv] as usize] |= MAPMOVEVALUE[2][mv]; + self.bitboard[empty][0][MAPMOVEIDX[2][mv] as usize] ^= MAPMOVEVALUE[2][mv]; + self.bitboard[black][1][MAPMOVEIDX[3][mv] as usize] |= MAPMOVEVALUE[3][mv]; + self.bitboard[empty][1][MAPMOVEIDX[3][mv] as usize] ^= MAPMOVEVALUE[3][mv]; + } + Color::White => { + self.state[mv] = Color::White; + // update white move and remove empty move in bitboard + self.bitboard[white][0][MAPMOVEIDX[0][mv] as usize] |= MAPMOVEVALUE[0][mv]; + self.bitboard[empty][0][MAPMOVEIDX[0][mv] as usize] ^= MAPMOVEVALUE[0][mv]; + self.bitboard[white][1][MAPMOVEIDX[1][mv] as usize] |= MAPMOVEVALUE[1][mv]; + self.bitboard[empty][1][MAPMOVEIDX[1][mv] as usize] ^= MAPMOVEVALUE[1][mv]; + self.bitboard[white][0][MAPMOVEIDX[2][mv] as usize] |= MAPMOVEVALUE[2][mv]; + self.bitboard[empty][0][MAPMOVEIDX[2][mv] as usize] ^= MAPMOVEVALUE[2][mv]; + self.bitboard[white][1][MAPMOVEIDX[3][mv] as usize] |= MAPMOVEVALUE[3][mv]; + self.bitboard[empty][1][MAPMOVEIDX[3][mv] as usize] ^= MAPMOVEVALUE[3][mv]; + } + _ => panic! {}, + } + + self.p_turn = def; + } + + fn turn(&self) -> Side { + self.p_turn + } + + pub fn can_play(&self, from: Square) -> bool { + self.state[from as usize] == Color::Empty + } +} + +pub struct List { + // legal move list + p_move: [Move; (FILE_SIZE * RANK_SIZE) as usize], + p_size: i32, +} + +/// Use List to store legal moves. +impl List { + pub fn clear(&mut self) { + self.p_size = 0; + } + + pub fn add(&mut self, mv: Move) { + self.p_move[self.p_size as usize] = mv; + self.p_size += 1; + } + + pub fn size(&self) -> i32 { + self.p_size + } + + pub fn shuffle(&mut self) { + let mut rng = thread_rng(); + let num = self.p_size as usize; + + self.p_move[..num].shuffle(&mut rng); + } +} + +// functions + +fn square_make(fl: i32, rk: i32) -> Square { + rk * (FILE_SIZE + 1) + fl +} + +fn side_opp(sd: Side) -> Side { + match sd { + Side::White => Side::Black, + Side::Black => Side::White, + _ => panic!(""), + } +} + +fn pos_is_winner(pos: &Pos) -> bool { + let current_side = side_opp(pos.p_turn); + check_pattern5(pos, current_side) +} + +fn pos_is_draw(pos: &Pos) -> bool { + let mut found: bool = true; + + for rk in 0..RANK_SIZE { + for fl in 0..FILE_SIZE { + let sq: Square = square_make(fl, rk); + if pos.can_play(sq) { + found = false; + break; + } + + if !found { + break; + } + } + } + + found && !pos_is_winner(pos) +} + +#[target_feature(enable = "avx512f,avx512bw,popcnt")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn pos_is_draw_avx512(pos: &Pos) -> bool { + let empty = Color::Empty as usize; + + let board0org = unsafe { _mm512_loadu_epi32(&pos.bitboard[empty][0][0]) }; + + let answer = _mm512_set1_epi32(0); + + // if all empty is 0, all board is filled. + let temp_mask = _mm512_mask_cmpneq_epi32_mask(0b11111111_11111111, answer, board0org); + + _popcnt32(temp_mask as i32) == 0 && !pos_is_winner_avx512(pos) +} + +fn pos_is_end(pos: &Pos) -> bool { + pos_is_winner(pos) || pos_is_draw(pos) +} + +fn pos_disp(pos: &Pos) { + for rk in 0..RANK_SIZE { + for fl in 0..FILE_SIZE { + let sq: Square = square_make(fl, rk); + + match pos.state[sq as usize] { + Color::Black => print!("# "), + Color::White => print!("O "), + Color::Empty => print!("- "), + Color::Border => print!("| "), + } + } + + println!(); + } + + match pos.turn() { + Color::Black => println!("black to play"), + Color::White => println!("white to play"), + _ => panic!(), + } +} + +fn gen_moves(list: &mut List, pos: &Pos) { + list.clear(); + + for rk in 0..RANK_SIZE { + for fl in 0..FILE_SIZE { + let sq: Square = square_make(fl, rk); + if pos.can_play(sq) { + list.add(sq); + } + } + } +} + +/// AI: use Minimax search with alpha-beta pruning +#[allow(clippy::manual_range_contains)] +fn search(pos: &Pos, alpha: i32, beta: i32, depth: i32, _ply: i32) -> i32 { + assert!(-EVAL_INF <= alpha && alpha < beta && beta <= EVAL_INF); + // leaf? + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if check_x86_avx512_features() { + unsafe { + if pos_is_winner_avx512(pos) { + return -EVAL_INF + _ply; + } + + if pos_is_draw_avx512(pos) { + return 0; + } + } + } else { + if pos_is_winner(pos) { + return -EVAL_INF + _ply; + } + + if pos_is_draw(pos) { + return 0; + } + } + } + + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + { + if pos_is_winner(pos) { + return -EVAL_INF + _ply; + } + + if pos_is_draw(pos) { + return 0; + } + } + + if depth == 0 { + return eval(pos, _ply); + } + + let p_move_new: [Move; (FILE_SIZE * RANK_SIZE) as usize] = + [0; (FILE_SIZE * RANK_SIZE) as usize]; + + let mut list = List { + p_move: p_move_new, + p_size: 0, + }; + + let mut bm: Move = MOVE_NONE; + let mut bs: i32 = SCORE_NONE; + + gen_moves(&mut list, pos); + + // move loop + + if _ply == 0 { + list.shuffle(); + } + + for i in 0..list.size() { + if bs < beta { + let mv: Move = list.p_move[i as usize]; + + let mut new_pos = Pos { + state: pos.state, + p_turn: pos.p_turn, + bitboard: pos.bitboard, + }; + + new_pos.do_move(mv); + + let sc: i32 = -search(&new_pos, -beta, -cmp::max(alpha, bs), depth - 1, _ply + 1); + + if sc > bs { + bm = mv; + bs = sc; + } + } + } + + assert_ne!(bm, MOVE_NONE); + assert!(bs >= -EVAL_INF && bs <= EVAL_INF); + + if _ply == 0 { bm } else { bs } //best move at the root node, best score elsewhere +} + +/// Evaluation function: give different scores to different patterns after a fixed depth. +fn eval(pos: &Pos, _ply: i32) -> i32 { + let atk: Side = pos.turn(); + let def: Side = side_opp(atk); + + // check if opp has live4 which will win playing next move + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if check_x86_avx512_features() { + unsafe { + if check_patternlive4_avx512(pos, def) { + return -4096; + } + } + } else { + if check_patternlive4(pos, def) { + return -4096; + } + } + } + + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + { + if check_patternlive4(pos, def) { + return -4096; + } + } + + // check if self has live4 which will win playing next move + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if check_x86_avx512_features() { + unsafe { + if check_patternlive4_avx512(pos, atk) { + return 2560; + } + } + } else { + if check_patternlive4(pos, atk) { + return 2560; + } + } + } + + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + { + if check_patternlive4(pos, atk) { + return 2560; + } + } + + // check if self has dead4 which will win playing next move + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if check_x86_avx512_features() { + unsafe { + if check_patterndead4_avx512(pos, atk) > 0 { + return 2560; + } + } + } else { + if check_patterndead4(pos, atk) > 0 { + return 2560; + } + } + } + + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + { + if check_patterndead4(pos, atk) > 0 { + return 2560; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if check_x86_avx512_features() { + unsafe { + let n_c4: i32 = check_patterndead4_avx512(pos, def); + let n_c3: i32 = check_patternlive3_avx512(pos, def); + + // check if opp has 2 dead4 which will win playing next move + if n_c4 > 1 { + return -2048; + } + + // check if opp has a dead 4 and live 3 which will win playing the next two move + if n_c4 == 1 && n_c3 > 0 { + return -2048; + } + + if check_patternlive3_avx512(pos, atk) > 1 { + return 2560; + } + + // check if opp has 2 live3 which will win playing the next two move + if n_c3 > 1 { + return -2048; + } + } + } else { + let n_c4: i32 = check_patterndead4(pos, def); + let n_c3: i32 = check_patternlive3(pos, def); + + // check if opp has 2 dead4 which will win playing next move + if n_c4 > 1 { + return -2048; + } + + // check if opp has a dead 4 and live 3 which will win playing the next two move + if n_c4 == 1 && n_c3 > 0 { + return -2048; + } + + // check if self has 2 live3 which will win playing the next two move + if check_patternlive3(pos, atk) > 1 { + return 2560; + } + + // check if opp has 2 live3 which will win playing the next two move + if n_c3 > 1 { + return -2048; + } + } + } + + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + { + let n_c4: i32 = check_patterndead4(pos, def); + let n_c3: i32 = check_patternlive3(pos, def); + + // check if opp has 2 dead4 which will win playing next move + if n_c4 > 1 { + return -2048; + } + + // check if opp has a dead 4 and live 3 which will win playing the next two move + if n_c4 == 1 && n_c3 > 0 { + return -2048; + } + + // check if self has 2 live3 which will win playing the next two move + if check_patternlive3(pos, atk) > 1 { + return 2560; + } + + // check if opp has 2 live3 which will win playing the next two move + if n_c3 > 1 { + return -2048; + } + } + + 0 +} + +/// Check OOOOO +fn check_pattern5(pos: &Pos, sd: Side) -> bool { + let mut n: i32 = 0; + + for rk in 0..RANK_SIZE { + for fl in 0..FILE_SIZE { + let sq: Square = square_make(fl, rk); + + for direction in &DIRECTION { + let idx0 = sq; + let idx1 = sq + direction[0]; + let idx2 = sq + direction[1]; + let idx3 = sq + direction[2]; + let idx4 = sq + direction[3]; + + let val0 = pos.state[idx0 as usize]; + let val1 = pos.state[idx1 as usize]; + let val2 = pos.state[idx2 as usize]; + let val3 = pos.state[idx3 as usize]; + let val4 = pos.state[idx4 as usize]; + + #[rustfmt::skip] + if val0 == sd && val1 == sd && val2 == sd && val3 == sd && val4 == sd { n += 1; } + } + } + } + + n > 0 +} + +/// Check -OOOO- +fn check_patternlive4(pos: &Pos, sd: Side) -> bool { + let mut n: i32 = 0; + + for rk in 0..RANK_SIZE { + for fl in 0..FILE_SIZE { + let sq: Square = square_make(fl, rk); + + for direction in &DIRECTION { + let idx0 = sq; + let idx1 = sq + direction[0]; + let idx2 = sq + direction[1]; + let idx3 = sq + direction[2]; + let idx4 = sq + direction[3]; + let idx5 = sq + direction[4]; + + let val0 = pos.state[idx0 as usize]; + let val1 = pos.state[idx1 as usize]; + let val2 = pos.state[idx2 as usize]; + let val3 = pos.state[idx3 as usize]; + let val4 = pos.state[idx4 as usize]; + let val5 = pos.state[idx5 as usize]; + + #[rustfmt::skip] + if val0 == Color::Empty && val1 == sd && val2 == sd && val3 == sd && val4 == sd && val5 == Color::Empty { n += 1; } + } + } + } + + n > 0 +} + +/// Check OOOO-, OOO-O, OO-OO, O-OOO, -OOOO +fn check_patterndead4(pos: &Pos, sd: Side) -> i32 { + let mut n: i32 = 0; + + for rk in 0..RANK_SIZE { + for fl in 0..FILE_SIZE { + let sq: Square = square_make(fl, rk); + + for direction in &DIRECTION { + let idx0 = sq; + let idx1 = sq + direction[0]; + let idx2 = sq + direction[1]; + let idx3 = sq + direction[2]; + let idx4 = sq + direction[3]; + + let val0 = pos.state[idx0 as usize]; + let val1 = pos.state[idx1 as usize]; + let val2 = pos.state[idx2 as usize]; + let val3 = pos.state[idx3 as usize]; + let val4 = pos.state[idx4 as usize]; + + #[rustfmt::skip] + if val0 == sd && val1 == sd && val2 == sd && val3 == sd && val4 == Color::Empty { n += 1; } + #[rustfmt::skip] + if val0 == sd && val1 == sd && val2 == sd && val3 == Color::Empty && val4 == sd { n += 1; } + #[rustfmt::skip] + if val0 == sd && val1 == sd && val2 == Color::Empty && val3 == sd && val4 == sd { n += 1; } + #[rustfmt::skip] + if val0 == sd && val1 == Color::Empty && val2 == sd && val3 == sd && val4 == sd { n += 1; } + #[rustfmt::skip] + if val0 == Color::Empty && val1 == sd && val2 == sd && val3 == sd && val4 == sd { n += 1; } + } + } + } + + n +} + +/// Check -OOO-, -OO-O-, -O-OO- +fn check_patternlive3(pos: &Pos, sd: Side) -> i32 { + let mut n: i32 = 0; + + for rk in 0..RANK_SIZE { + for fl in 0..FILE_SIZE { + let sq: Square = square_make(fl, rk); + + for direction in &DIRECTION { + let idx0 = sq; + let idx1 = sq + direction[0]; + let idx2 = sq + direction[1]; + let idx3 = sq + direction[2]; + let idx4 = sq + direction[3]; + let idx5 = sq + direction[4]; + + let val0 = pos.state[idx0 as usize]; + let val1 = pos.state[idx1 as usize]; + let val2 = pos.state[idx2 as usize]; + let val3 = pos.state[idx3 as usize]; + let val4 = pos.state[idx4 as usize]; + let val5 = pos.state[idx5 as usize]; + + #[rustfmt::skip] + if val0 == Color::Empty && val1 == sd && val2 == sd && val3 == sd && val4 == Color::Empty { n +=1; } + #[rustfmt::skip] + if val0 == Color::Empty && val1 == sd && val2 == sd && val3 == Color::Empty && val4 == sd && val5 == Color::Empty { n += 1; } + #[rustfmt::skip] + if val0 == Color::Empty && val1 == sd && val2 == Color::Empty && val3 == sd && val4 == sd && val5 == Color::Empty { n += 1; } + } + } + } + + n +} + +#[target_feature(enable = "avx512f,avx512bw,popcnt")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn pos_is_winner_avx512(pos: &Pos) -> bool { + let current_side = side_opp(pos.p_turn); + let coloridx = current_side as usize; + + let board0org: [__m512i; 2] = unsafe { + [ + _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]), + _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]), + ] + }; // load states from bitboard + + #[rustfmt::skip] + let answer = _mm512_set1_epi16((1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)); // an unbroken chain of five moves + + // use Mask to filter out which data is not processed. + // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 + // 1 x x x x _ _ _ _ _ _ _ _ _ _ _ 0 x o x o x 0 0 0 0 0 0 0 0 0 0 0 + // 2 x _ _ _ _ o _ x o _ _ _ _ _ _ 0 x o _ _ _ _ _| x x o o o x x _ _ + // . ... + // . ... + // . ... + // 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 x o x o o o o o o o 0 0 0 0 0 0 + // + // answer_mask[0]: 01_11..............: "0" is in row 16 and column 1-16. + // There is no data to match (x = black, o = white, _ = empty, 0 = no data). + // + // + // Then, shift one space left. + // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 + // 1 x x x _ _ _ _ _ _ _ _ _ _ _ 0 x o x o x 0 0 0 0 0 0 0 0 0 0 0 0 + // . ... + // . ... + // . ... + // 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 x o x o o o o o o o 0 0 0 0 0 0 0 + // answer_mask[1]: ................_10: "0" is in row 1 and column 17-32; + // There is no enough data to match (o x o x but we want to match o o o o o). + // + // answer_mask[2]: mix 2 data together (column 17-23 and column 24-32). Using Mask to make it match correctly. + // For example, column 23,24,25,26,27 is not a pattern and 24,25,26,27,28 is a pattern. + // That is why some mask bits are set to 0 from answer_mask[2] to answer_mask[10]. + + #[rustfmt::skip] + let answer_mask: [__mmask32; 11] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10, + 0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_11_10, + 0b00_10_11_11_11_11_11_11_11_10_10_10_10_11_11_10, + 0b00_10_10_11_11_11_11_11_10_10_10_10_11_11_11_10, + 0b00_10_10_10_11_11_11_10_10_10_10_11_11_11_11_10, + 0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10]; + let mut count_match: i32 = 0; + + for dir in 0..2 { + // direction 0 and 1 + let mut board0 = board0org[dir]; + let boardf = _mm512_and_si512(answer, board0); + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf); + count_match += _popcnt32(temp_mask as i32); + + for i in 1..11 { + // OOOOOOOOOOO----, the last 4 "-" cannot make an unbroken chain of five. + board0 = _mm512_slli_epi32(board0, 1); // shift one space left + let boardf = _mm512_and_si512(answer, board0); // focus on the pattern + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf); // see if it matches the pattern + count_match += _popcnt32(temp_mask as i32); + } + } + + count_match > 0 +} + +#[target_feature(enable = "avx512f,avx512bw,popcnt")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn check_patternlive4_avx512(pos: &Pos, sd: Side) -> bool { + let coloridx = sd as usize; + let emptyidx = Color::Empty as usize; + + #[rustfmt::skip] + let answer_color = _mm512_set1_epi16( (1<<14)|(1<<13)|(1<<12)|(1<<11) ); + #[rustfmt::skip] + let answer_empty = _mm512_set1_epi16( (1<<15)| (1<<10) ); + #[rustfmt::skip] + let answer = _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10) ); + + #[rustfmt::skip] + let answer_mask: [__mmask32; 10] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10, + 0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_10_10, + 0b00_10_11_11_11_11_11_11_11_10_10_10_10_10_11_10, + 0b00_10_10_11_11_11_11_11_10_10_10_10_10_11_11_10, + 0b00_10_10_10_11_11_11_10_10_10_10_10_11_11_11_10, + 0b00_10_10_10_10_11_10_10_10_10_10_11_11_11_11_10]; + let board0org: [__m512i; 2] = unsafe { + [ + _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]), + _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]), + ] + }; + let board1org: [__m512i; 2] = unsafe { + [ + _mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]), + _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0]), + ] + }; + + let mut count_match: i32 = 0; + + for dir in 0..2 { + let mut board0 = board0org[dir]; + let mut board1 = board1org[dir]; + + let boardf1 = _mm512_and_si512(answer_color, board0); + let boardf2 = _mm512_and_si512(answer_empty, board1); + let boardf = _mm512_or_si512(boardf1, boardf2); + + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf); + count_match += _popcnt32(temp_mask as i32); + + for i in 1..10 { + board0 = _mm512_slli_epi32(board0, 1); + board1 = _mm512_slli_epi32(board1, 1); + + let boardf1 = _mm512_and_si512(answer_color, board0); + let boardf2 = _mm512_and_si512(answer_empty, board1); + let boardf = _mm512_or_si512(boardf1, boardf2); + + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf); + count_match += _popcnt32(temp_mask as i32); + } + } + + count_match > 0 +} + +#[target_feature(enable = "avx512f,avx512bw,popcnt")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn check_patterndead4_avx512(pos: &Pos, sd: Side) -> i32 { + let coloridx = sd as usize; + let emptyidx = Color::Empty as usize; + + #[rustfmt::skip] + let answer_color: [__m512i; 5] = [_mm512_set1_epi16( (1<<14)|(1<<13)|(1<<12)|(1<<11) ), + _mm512_set1_epi16( (1<<15)| (1<<13)|(1<<12)|(1<<11) ), + _mm512_set1_epi16( (1<<15)|(1<<14) |(1<<12)|(1<<11) ), + _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13) |(1<<11) ), + _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12) )]; + #[rustfmt::skip] + let answer_empty: [__m512i; 5]= [_mm512_set1_epi16( 1<<15 ), + _mm512_set1_epi16( 1<<14 ), + _mm512_set1_epi16( 1<<13 ), + _mm512_set1_epi16( 1<<12 ), + _mm512_set1_epi16( 1<<11)]; + #[rustfmt::skip] + let answer = _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)); + + #[rustfmt::skip] + let answer_mask: [__mmask32; 11] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10, + 0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_11_10, + 0b00_10_11_11_11_11_11_11_11_10_10_10_10_11_11_10, + 0b00_10_10_11_11_11_11_11_10_10_10_10_11_11_11_10, + 0b00_10_10_10_11_11_11_10_10_10_10_11_11_11_11_10, + 0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10]; + let board0org: [__m512i; 2] = unsafe { + [ + _mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]), + _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0]), + ] + }; + let board1org: [__m512i; 2] = unsafe { + [ + _mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]), + _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0]), + ] + }; + + let mut count_match: i32 = 0; + + for pattern in 0..5 { + for dir in 0..2 { + let mut board0 = board0org[dir]; + let mut board1 = board1org[dir]; + + let boardf1 = _mm512_and_si512(answer_color[pattern], board0); + let boardf2 = _mm512_and_si512(answer_empty[pattern], board1); + let boardf = _mm512_or_si512(boardf1, boardf2); + + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf); + count_match += _popcnt32(temp_mask as i32); + + for i in 1..11 { + board0 = _mm512_slli_epi32(board0, 1); + board1 = _mm512_slli_epi32(board1, 1); + + let boardf1 = _mm512_and_si512(answer_color[pattern], board0); + let boardf2 = _mm512_and_si512(answer_empty[pattern], board1); + let boardf = _mm512_or_si512(boardf1, boardf2); + + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf); + count_match += _popcnt32(temp_mask as i32); + } + } + } + + count_match +} + +#[target_feature(enable = "avx512f,avx512bw,popcnt")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn check_patternlive3_avx512(pos: &Pos, sd: Side) -> i32 { + let coloridx = sd as usize; + let emptyidx = Color::Empty as usize; + + #[rustfmt::skip] + let board0org: [__m512i; 2] = unsafe { [_mm512_loadu_epi32(&pos.bitboard[coloridx][0][0]), _mm512_loadu_epi32(&pos.bitboard[coloridx][1][0])] }; + #[rustfmt::skip] + let board1org: [__m512i; 2] = unsafe { [_mm512_loadu_epi32(&pos.bitboard[emptyidx][0][0]), _mm512_loadu_epi32(&pos.bitboard[emptyidx][1][0])] }; + + #[rustfmt::skip] + let answer_color: [__m512i; 1] = [_mm512_set1_epi16( (1<<14)|(1<<13)|(1<<12) )]; + #[rustfmt::skip] + let answer_empty: [__m512i; 1] = [_mm512_set1_epi16( (1<<15)| (1<<11) )]; + #[rustfmt::skip] + let answer: __m512i = _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11) ); + + let mut count_match: i32 = 0; + + #[rustfmt::skip] + let answer_mask: [__mmask32; 11] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10, + 0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_11_10, + 0b00_10_11_11_11_11_11_11_11_10_10_10_10_11_11_10, + 0b00_10_10_11_11_11_11_11_10_10_10_10_11_11_11_10, + 0b00_10_10_10_11_11_11_10_10_10_10_11_11_11_11_10, + 0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10]; + for pattern in 0..1 { + for dir in 0..2 { + let mut board0 = board0org[dir]; + let mut board1 = board1org[dir]; + + let boardf1 = _mm512_and_si512(answer_color[pattern], board0); + let boardf2 = _mm512_and_si512(answer_empty[pattern], board1); + let boardf = _mm512_or_si512(boardf1, boardf2); + + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf); + count_match += _popcnt32(temp_mask as i32); + + for i in 1..11 { + board0 = _mm512_slli_epi32(board0, 1); + board1 = _mm512_slli_epi32(board1, 1); + + let boardf1 = _mm512_and_si512(answer_color[pattern], board0); + let boardf2 = _mm512_and_si512(answer_empty[pattern], board1); + let boardf = _mm512_or_si512(boardf1, boardf2); + + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf); + count_match += _popcnt32(temp_mask as i32); + } + } + } + + #[rustfmt::skip] + let answer_color: [__m512i; 2] = [_mm512_set1_epi16( (1<<14)| (1<<12)|(1<<11) ), + _mm512_set1_epi16( (1<<14)|(1<<13) |(1<<11) )]; + #[rustfmt::skip] + let answer_empty: [__m512i; 2] = [_mm512_set1_epi16( (1<<15)| (1<<13)| (1<<10) ), + _mm512_set1_epi16( (1<<15)| (1<<12)| (1<<10) )]; + #[rustfmt::skip] + let answer: __m512i = _mm512_set1_epi16( (1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<11)|(1<<10) ); + + #[rustfmt::skip] + let answer_mask: [__mmask32; 10] = [0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_11_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_11_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_11_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_11_10_10_10_10, + 0b01_11_11_11_11_11_11_11_11_11_11_10_10_10_10_10, + 0b00_11_11_11_11_11_11_11_11_11_10_10_10_10_10_10, + 0b00_10_11_11_11_11_11_11_11_10_10_10_10_10_11_10, + 0b00_10_10_11_11_11_11_11_10_10_10_10_10_11_11_10, + 0b00_10_10_10_11_11_11_10_10_10_10_10_11_11_11_10, + 0b00_10_10_10_10_11_10_10_10_10_10_11_11_11_11_10]; + for pattern in 0..2 { + for dir in 0..2 { + let mut board0 = board0org[dir]; + let mut board1 = board1org[dir]; + + let boardf1 = _mm512_and_si512(answer_color[pattern], board0); + let boardf2 = _mm512_and_si512(answer_empty[pattern], board1); + let boardf = _mm512_or_si512(boardf1, boardf2); + + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf); + count_match += _popcnt32(temp_mask as i32); + + for i in 1..10 { + board0 = _mm512_slli_epi32(board0, 1); + board1 = _mm512_slli_epi32(board1, 1); + + let boardf1 = _mm512_and_si512(answer_color[pattern], board0); + let boardf2 = _mm512_and_si512(answer_empty[pattern], board1); + let boardf = _mm512_or_si512(boardf1, boardf2); + + let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[i], answer, boardf); + count_match += _popcnt32(temp_mask as i32); + } + } + } + + count_match +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn check_x86_avx512_features() -> bool { + is_x86_feature_detected!("avx512bw") && is_x86_feature_detected!("popcnt") +} + +fn main() { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if check_x86_avx512_features() { + println!("\n\nThe program is running with avx512f and avx512bw intrinsics\n\n"); + } else { + println!("\n\nThe program is running with NO intrinsics.\n\n"); + } + } + + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + { + println!("\n\nThe program is running with NO intrinsics.\n\n"); + } + + loop { + let start = Instant::now(); + + println!("Hello, this is Connect5 (Outer-Open Gomoku)!"); + println!("Self-playing with search depth = 4"); + + let test_state: [Color; SQUARE_SIZE as usize] = [Color::Empty; SQUARE_SIZE as usize]; + let test_bitboard: [[[i32; 16]; 2]; 3] = [[[0; 16]; 2]; 3]; + + let mut test1 = Pos { + state: test_state, + p_turn: Color::Black, + bitboard: test_bitboard, + }; + + test1.init(); + + let mut count: i32 = 0; + + for i in 0..(FILE_SIZE * RANK_SIZE) { + let mut next_move: Move = square_make(1, 7); // set the first move is (1,7) + + if i > 0 { + next_move = search(&test1, -EVAL_INF, EVAL_INF, 4, 0); + } // search depth = 4 + + test1.do_move(next_move); + pos_disp(&test1); + + if pos_is_end(&test1) { + println!("Game over!!!!!! at Move {i}"); + count = i + 1; + break; + } + } + + let duration = start.elapsed(); + + println!( + "Average time for each move is: {:?}", + duration / count as u32 + ); + } +} diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs new file mode 100644 index 000000000000..e393ad727168 --- /dev/null +++ b/library/stdarch/examples/hex.rs @@ -0,0 +1,420 @@ +//! An example showing runtime dispatch to an architecture-optimized +//! implementation. +//! +//! This program implements hex encoding a slice into a predetermined +//! destination using various different instruction sets. This selects at +//! runtime the most optimized implementation and uses that rather than being +//! required to be compiled differently. +//! +//! You can test out this program via: +//! +//! echo test | cargo +nightly run --release hex +//! +//! and you should see `746573740a` get printed out. + +#![allow(internal_features)] +#![feature(wasm_target_feature)] +#![cfg_attr(test, feature(test))] +#![cfg_attr( + any(target_arch = "x86", target_arch = "x86_64"), + feature(stdarch_internal) +)] +#![allow( + clippy::unwrap_used, + clippy::print_stdout, + clippy::unwrap_used, + clippy::shadow_reuse, + clippy::cast_possible_wrap, + clippy::cast_ptr_alignment, + clippy::cast_sign_loss, + clippy::missing_docs_in_private_items +)] + +use std::{ + io::{self, Read}, + str, +}; + +#[cfg(target_arch = "x86")] +use {core_arch::arch::x86::*, std_detect::is_x86_feature_detected}; +#[cfg(target_arch = "x86_64")] +use {core_arch::arch::x86_64::*, std_detect::is_x86_feature_detected}; + +fn main() { + let mut input = Vec::new(); + io::stdin().read_to_end(&mut input).unwrap(); + let mut dst = vec![0; 2 * input.len()]; + let s = hex_encode(&input, &mut dst).unwrap(); + println!("{s}"); +} + +fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> { + let len = src.len().checked_mul(2).unwrap(); + if dst.len() < len { + return Err(len); + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("avx2") { + return unsafe { hex_encode_avx2(src, dst) }; + } + if is_x86_feature_detected!("sse4.1") { + return unsafe { hex_encode_sse41(src, dst) }; + } + } + #[cfg(target_arch = "wasm32")] + { + if true { + return hex_encode_simd128(src, dst); + } + } + + hex_encode_fallback(src, dst) +} + +#[target_feature(enable = "avx2")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> { + assert!(dst.len() >= src.len().checked_mul(2).unwrap()); + + let ascii_zero = _mm256_set1_epi8(b'0' as i8); + let nines = _mm256_set1_epi8(9); + let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8); + let and4bits = _mm256_set1_epi8(0xf); + + let mut i = 0_usize; + while src.len() >= 32 { + // SAFETY: the loop condition ensures that we have at least 32 bytes + let invec = unsafe { _mm256_loadu_si256(src.as_ptr() as *const _) }; + + let masked1 = _mm256_and_si256(invec, and4bits); + let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits); + + // return 0xff corresponding to the elements > 9, or 0x00 otherwise + let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines); + let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines); + + // add '0' or the offset depending on the masks + let masked1 = _mm256_add_epi8(masked1, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1)); + let masked2 = _mm256_add_epi8(masked2, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2)); + + // interleave masked1 and masked2 bytes + let res1 = _mm256_unpacklo_epi8(masked2, masked1); + let res2 = _mm256_unpackhi_epi8(masked2, masked1); + + // Store everything into the right destination now + unsafe { + // SAFETY: the assertion at the beginning of the function ensures + // that `dst` is large enough. + let base = dst.as_mut_ptr().add(i * 2); + let base1 = base.add(0) as *mut _; + let base2 = base.add(16) as *mut _; + let base3 = base.add(32) as *mut _; + let base4 = base.add(48) as *mut _; + _mm256_storeu2_m128i(base3, base1, res1); + _mm256_storeu2_m128i(base4, base2, res2); + } + + src = &src[32..]; + i += 32; + } + + let _ = hex_encode_sse41(src, &mut dst[i * 2..]); + + // SAFETY: `dst` only contains ASCII characters + unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2])) } +} + +// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp +#[target_feature(enable = "sse4.1")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> { + assert!(dst.len() >= src.len().checked_mul(2).unwrap()); + + let ascii_zero = _mm_set1_epi8(b'0' as i8); + let nines = _mm_set1_epi8(9); + let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8); + let and4bits = _mm_set1_epi8(0xf); + + let mut i = 0_usize; + while src.len() >= 16 { + // SAFETY: the loop condition ensures that we have at least 16 bytes + let invec = unsafe { _mm_loadu_si128(src.as_ptr() as *const _) }; + + let masked1 = _mm_and_si128(invec, and4bits); + let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits); + + // return 0xff corresponding to the elements > 9, or 0x00 otherwise + let cmpmask1 = _mm_cmpgt_epi8(masked1, nines); + let cmpmask2 = _mm_cmpgt_epi8(masked2, nines); + + // add '0' or the offset depending on the masks + let masked1 = _mm_add_epi8(masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1)); + let masked2 = _mm_add_epi8(masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2)); + + // interleave masked1 and masked2 bytes + let res1 = _mm_unpacklo_epi8(masked2, masked1); + let res2 = _mm_unpackhi_epi8(masked2, masked1); + + unsafe { + // SAFETY: the assertion at the beginning of the function ensures + // that `dst` is large enough. + _mm_storeu_si128(dst.as_mut_ptr().add(i * 2) as *mut _, res1); + _mm_storeu_si128(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2); + } + src = &src[16..]; + i += 16; + } + + let _ = hex_encode_fallback(src, &mut dst[i * 2..]); + + // SAFETY: `dst` only contains ASCII characters + unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2])) } +} + +#[cfg(target_arch = "wasm32")] +#[target_feature(enable = "simd128")] +fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> { + assert!(dst.len() >= src.len().checked_mul(2).unwrap()); + + use core_arch::arch::wasm32::*; + + let ascii_zero = u8x16_splat(b'0'); + let nines = u8x16_splat(9); + let ascii_a = u8x16_splat(b'a' - 9 - 1); + let and4bits = u8x16_splat(0xf); + + let mut i = 0_usize; + while src.len() >= 16 { + // SAFETY: the loop condition ensures that we have at least 16 bytes + let invec = unsafe { v128_load(src.as_ptr() as *const _) }; + + let masked1 = v128_and(invec, and4bits); + let masked2 = v128_and(u8x16_shr(invec, 4), and4bits); + + // return 0xff corresponding to the elements > 9, or 0x00 otherwise + let cmpmask1 = u8x16_gt(masked1, nines); + let cmpmask2 = u8x16_gt(masked2, nines); + + // add '0' or the offset depending on the masks + let masked1 = u8x16_add(masked1, v128_bitselect(ascii_a, ascii_zero, cmpmask1)); + let masked2 = u8x16_add(masked2, v128_bitselect(ascii_a, ascii_zero, cmpmask2)); + + // Next we need to shuffle around masked{1,2} to get back to the + // original source text order. The first element (res1) we'll store uses + // all the low bytes from the 2 masks and the second element (res2) uses + // all the upper bytes. + let res1 = u8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>( + masked2, masked1, + ); + let res2 = u8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>( + masked2, masked1, + ); + + unsafe { + // SAFETY: the assertion at the beginning of the function ensures + // that `dst` is large enough. + v128_store(dst.as_mut_ptr().add(i * 2) as *mut _, res1); + v128_store(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2); + } + src = &src[16..]; + i += 16; + } + + let _ = hex_encode_fallback(src, &mut dst[i * 2..]); + + // SAFETY: `dst` only contains ASCII characters + unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2])) } +} + +fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> { + fn hex(byte: u8) -> u8 { + static TABLE: &[u8] = b"0123456789abcdef"; + TABLE[byte as usize] + } + + for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) { + slots[0] = hex((*byte >> 4) & 0xf); + slots[1] = hex(*byte & 0xf); + } + + unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) } +} + +// Run these with `cargo +nightly test --example hex -p stdarch` +#[cfg(test)] +mod tests { + use super::*; + + fn test(input: &[u8], output: &str) { + let tmp = || vec![0; input.len() * 2]; + + assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output); + assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { + if self::is_x86_feature_detected!("avx2") { + assert_eq!(hex_encode_avx2(input, &mut tmp()).unwrap(), output); + } + if self::is_x86_feature_detected!("sse4.1") { + assert_eq!(hex_encode_sse41(input, &mut tmp()).unwrap(), output); + } + } + } + + #[test] + fn empty() { + test(b"", ""); + } + + #[test] + fn big() { + test(&[0; 1024], &"0".repeat(2048)); + } + + #[test] + fn odd() { + test(&[0; 313], &"0".repeat(313 * 2)); + } + + #[test] + fn avx_works() { + let mut input = [0; 33]; + input[4] = 3; + input[16] = 3; + input[17] = 0x30; + input[21] = 1; + input[31] = 0x24; + test( + &input, + "\ + 0000000003000000\ + 0000000000000000\ + 0330000000010000\ + 0000000000000024\ + 00\ + ", + ); + } + + quickcheck::quickcheck! { + fn encode_equals_fallback(input: Vec) -> bool { + let mut space1 = vec![0; input.len() * 2]; + let mut space2 = vec![0; input.len() * 2]; + let a = hex_encode(&input, &mut space1).unwrap(); + let b = hex_encode_fallback(&input, &mut space2).unwrap(); + a == b + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn avx_equals_fallback(input: Vec) -> bool { + if !self::is_x86_feature_detected!("avx2") { + return true + } + let mut space1 = vec![0; input.len() * 2]; + let mut space2 = vec![0; input.len() * 2]; + let a = unsafe { hex_encode_avx2(&input, &mut space1).unwrap() }; + let b = hex_encode_fallback(&input, &mut space2).unwrap(); + a == b + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn sse41_equals_fallback(input: Vec) -> bool { + if !self::is_x86_feature_detected!("avx2") { + return true + } + let mut space1 = vec![0; input.len() * 2]; + let mut space2 = vec![0; input.len() * 2]; + let a = unsafe { hex_encode_sse41(&input, &mut space1).unwrap() }; + let b = hex_encode_fallback(&input, &mut space2).unwrap(); + a == b + } + } +} + +// Run these with `cargo +nightly bench --example hex -p stdarch` +#[cfg(test)] +mod benches { + extern crate rand; + extern crate test; + + use self::rand::Rng; + + use super::*; + + const SMALL_LEN: usize = 117; + const LARGE_LEN: usize = 1 * 1024 * 1024; + + fn doit( + b: &mut test::Bencher, + len: usize, + f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>, + ) { + let mut rng = rand::thread_rng(); + let input = std::iter::repeat(()) + .map(|()| rng.r#gen::()) + .take(len) + .collect::>(); + let mut dst = vec![0; input.len() * 2]; + b.bytes = len as u64; + b.iter(|| unsafe { + f(&input, &mut dst).unwrap(); + dst[0] + }); + } + + #[bench] + fn small_default(b: &mut test::Bencher) { + doit(b, SMALL_LEN, hex_encode); + } + + #[bench] + fn small_fallback(b: &mut test::Bencher) { + doit(b, SMALL_LEN, hex_encode_fallback); + } + + #[bench] + fn large_default(b: &mut test::Bencher) { + doit(b, LARGE_LEN, hex_encode); + } + + #[bench] + fn large_fallback(b: &mut test::Bencher) { + doit(b, LARGE_LEN, hex_encode_fallback); + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + mod x86 { + use super::*; + + #[bench] + fn small_avx2(b: &mut test::Bencher) { + if self::is_x86_feature_detected!("avx2") { + doit(b, SMALL_LEN, hex_encode_avx2); + } + } + + #[bench] + fn small_sse41(b: &mut test::Bencher) { + if self::is_x86_feature_detected!("sse4.1") { + doit(b, SMALL_LEN, hex_encode_sse41); + } + } + + #[bench] + fn large_avx2(b: &mut test::Bencher) { + if self::is_x86_feature_detected!("avx2") { + doit(b, LARGE_LEN, hex_encode_avx2); + } + } + + #[bench] + fn large_sse41(b: &mut test::Bencher) { + if self::is_x86_feature_detected!("sse4.1") { + doit(b, LARGE_LEN, hex_encode_sse41); + } + } + } +} diff --git a/library/stdarch/examples/wasm.rs b/library/stdarch/examples/wasm.rs new file mode 100644 index 000000000000..ed313b15d1e3 --- /dev/null +++ b/library/stdarch/examples/wasm.rs @@ -0,0 +1,48 @@ +//! A simple slab allocator for pages in wasm + +#![cfg(target_arch = "wasm32")] + +use std::ptr; + +use core_arch::arch::wasm32::*; + +static mut HEAD: *mut *mut u8 = 0 as _; + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn page_alloc() -> *mut u8 { + unsafe { + if !HEAD.is_null() { + let next = *HEAD; + let ret = HEAD; + HEAD = next as *mut _; + return ret as *mut u8; + } + } + + let ret = memory_grow(0, 1); + + // if we failed to allocate a page then return null + if ret == usize::MAX { + return ptr::null_mut(); + } + + ((ret as u32) * page_size()) as *mut u8 +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn page_free(page: *mut u8) { + let page = page as *mut *mut u8; + unsafe { + *page = HEAD as *mut u8; + HEAD = page; + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn memory_used() -> usize { + (page_size() * (memory_size(0) as u32)) as usize +} + +fn page_size() -> u32 { + 64 * 1024 +} diff --git a/library/stdarch/intrinsics_data/arm_intrinsics.json b/library/stdarch/intrinsics_data/arm_intrinsics.json new file mode 100644 index 000000000000..9d58aad49cd4 --- /dev/null +++ b/library/stdarch/intrinsics_data/arm_intrinsics.json @@ -0,0 +1,119757 @@ +[ + { + "SIMD_ISA": "Neon", + "name": "__crc32b", + "arguments": [ + "uint32_t a", + "uint8_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Wn" + }, + "b": { + "register": "Wm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CRC32B" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "__crc32cb", + "arguments": [ + "uint32_t a", + "uint8_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Wn" + }, + "b": { + "register": "Wm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CRC32CB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "__crc32cd", + "arguments": [ + "uint32_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Wn" + }, + "b": { + "register": "Xm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CRC32CX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "__crc32ch", + "arguments": [ + "uint32_t a", + "uint16_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Wn" + }, + "b": { + "register": "Wm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CRC32CH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "__crc32cw", + "arguments": [ + "uint32_t a", + "uint32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Wn" + }, + "b": { + "register": "Wm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CRC32CW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "__crc32d", + "arguments": [ + "uint32_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Wn" + }, + "b": { + "register": "Xm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CRC32X" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "__crc32h", + "arguments": [ + "uint32_t a", + "uint16_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Wn" + }, + "b": { + "register": "Wm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CRC32H" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "__crc32w", + "arguments": [ + "uint32_t a", + "uint32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Wn" + }, + "b": { + "register": "Wm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CRC32W" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaba_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaba_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaba_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b", + "int8x8_t c" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaba_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16x4_t c" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaba_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32x2_t c" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaba_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "uint8x8_t c" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_high_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SABAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_high_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SABAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_high_s8", + "arguments": [ + "int16x8_t a", + "int8x16_t b", + "int8x16_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SABAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_high_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UABAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_high_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UABAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_high_u8", + "arguments": [ + "uint16x8_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UABAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_s8", + "arguments": [ + "int16x8_t a", + "int8x8_t b", + "int8x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b", + "uint16x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b", + "uint32x2_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabal_u8", + "arguments": [ + "uint16x8_t a", + "uint8x8_t b", + "uint8x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabaq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabaq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabaq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "int8x16_t c" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabaq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabaq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabaq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabd_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabd_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabd_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabd_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabd_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabd_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabd_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabd_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabd_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdd_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_high_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SABDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_high_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SABDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_high_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SABDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_high_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UABDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_high_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UABDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_high_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UABDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdl_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabds_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabs_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabs_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabs_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabs_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabs_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabs_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabs_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_p64", + "arguments": [ + "poly64x1_t a", + "poly64x1_t b" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vadd_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddd_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddd_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_high_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_high_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_high_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_high_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_high_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_high_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddhn_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_high_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_high_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_high_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_high_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_high_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_high_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddl_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlv_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlv_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlv_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlv_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlv_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlv_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlvq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlvq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlvq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlvq_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlvq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddlvq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDLV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_p128", + "arguments": [ + "poly128_t a", + "poly128_t b" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddv_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddv_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddv_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddv_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddv_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddv_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddv_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADDP", + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddvq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_high_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_high_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_high_s8", + "arguments": [ + "int16x8_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SADDW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_high_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_high_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_high_u8", + "arguments": [ + "uint16x8_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UADDW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_s8", + "arguments": [ + "int16x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaddw_u8", + "arguments": [ + "uint16x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaesdq_u8", + "arguments": [ + "uint8x16_t data", + "uint8x16_t key" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "data": { + "register": "Vd.16B" + }, + "key": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "AESD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaeseq_u8", + "arguments": [ + "uint8x16_t data", + "uint8x16_t key" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "data": { + "register": "Vd.16B" + }, + "key": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "AESE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaesimcq_u8", + "arguments": [ + "uint8x16_t data" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "data": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "AESIMC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaesmcq_u8", + "arguments": [ + "uint8x16_t data" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "data": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "AESMC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vand_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vand_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vand_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vand_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vand_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vand_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vand_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vand_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vandq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vandq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vandq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vandq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vandq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vandq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vandq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vandq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "AND" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbcaxq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BCAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbcaxq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BCAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbcaxq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b", + "int64x2_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BCAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbcaxq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "int8x16_t c" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BCAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbcaxq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BCAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbcaxq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BCAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbcaxq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b", + "uint64x2_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BCAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbcaxq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BCAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbic_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbic_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbic_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbic_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbic_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbic_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbic_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbic_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbicq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbicq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbicq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbicq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbicq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbicq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbicq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbicq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BIC" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_f16", + "arguments": [ + "uint16x4_t a", + "float16x4_t b", + "float16x4_t c" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_f32", + "arguments": [ + "uint32x2_t a", + "float32x2_t b", + "float32x2_t c" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_f64", + "arguments": [ + "uint64x1_t a", + "float64x1_t b", + "float64x1_t c" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_p16", + "arguments": [ + "uint16x4_t a", + "poly16x4_t b", + "poly16x4_t c" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_p64", + "arguments": [ + "poly64x1_t a", + "poly64x1_t b", + "poly64x1_t c" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_p8", + "arguments": [ + "uint8x8_t a", + "poly8x8_t b", + "poly8x8_t c" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_s16", + "arguments": [ + "uint16x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_s32", + "arguments": [ + "uint32x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_s64", + "arguments": [ + "uint64x1_t a", + "int64x1_t b", + "int64x1_t c" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_s8", + "arguments": [ + "uint8x8_t a", + "int8x8_t b", + "int8x8_t c" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16x4_t c" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32x2_t c" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b", + "uint64x1_t c" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "uint8x8_t c" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_f16", + "arguments": [ + "uint16x8_t a", + "float16x8_t b", + "float16x8_t c" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_f32", + "arguments": [ + "uint32x4_t a", + "float32x4_t b", + "float32x4_t c" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_f64", + "arguments": [ + "uint64x2_t a", + "float64x2_t b", + "float64x2_t c" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_p16", + "arguments": [ + "uint16x8_t a", + "poly16x8_t b", + "poly16x8_t c" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b", + "poly64x2_t c" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_p8", + "arguments": [ + "uint8x16_t a", + "poly8x16_t b", + "poly8x16_t c" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_s16", + "arguments": [ + "uint16x8_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_s32", + "arguments": [ + "uint32x4_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_s64", + "arguments": [ + "uint64x2_t a", + "int64x2_t b", + "int64x2_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_s8", + "arguments": [ + "uint8x16_t a", + "int8x16_t b", + "int8x16_t c" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b", + "uint64x2_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcadd_rot270_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H " + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcadd_rot270_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S " + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcadd_rot90_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H " + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcadd_rot90_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S " + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaddq_rot270_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H " + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaddq_rot270_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S " + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaddq_rot270_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D " + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaddq_rot90_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H " + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaddq_rot90_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S " + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaddq_rot90_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D " + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcage_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcage_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcage_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaged_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcageh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcageq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcageq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcageq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcages_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagt_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagt_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagt_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagtd_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagth_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagtq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagtq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagtq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagts_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcale_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcale_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcale_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaled_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaleh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaleq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaleq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaleq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcales_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcalt_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcalt_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcalt_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaltd_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcalth_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaltq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaltq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaltq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcalts_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_p64", + "arguments": [ + "poly64x1_t a", + "poly64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceq_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqd_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqd_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqd_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqs_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzd_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzd_u64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzs_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcge_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcged_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcged_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcged_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcges_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgez_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgez_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgez_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgez_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgez_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgez_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgez_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezd_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezs_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtd_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtd_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtd_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgth_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgts_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtz_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtz_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtz_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtz_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtz_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtz_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtz_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzd_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzs_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcle_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcled_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcled_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcled_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcles_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclez_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclez_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclez_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclez_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclez_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclez_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclez_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezd_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezs_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcls_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcls_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcls_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcls_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcls_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcls_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclsq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclsq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclsq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclsq_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclsq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclsq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclt_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltd_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltd_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltd_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclth_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMHI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclts_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltz_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltz_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltz_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltz_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltz_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltz_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltz_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzd_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzs_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclz_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclz_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclz_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclz_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclz_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclz_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclzq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclzq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclzq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclzq_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclzq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclzq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CLZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_lane_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_lane_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x2_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_laneq_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_laneq_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot180_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot180_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot180_lane_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot180_lane_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x2_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot180_laneq_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot180_laneq_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot270_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot270_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot270_lane_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot270_lane_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x2_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot270_laneq_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot270_laneq_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot90_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot90_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot90_lane_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot90_lane_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x2_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot90_laneq_f16", + "arguments": [ + "float16x4_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot90_laneq_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", + "float32x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_f64", + "arguments": [ + "float64x2_t r", + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_lane_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_lane_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x2_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_laneq_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_laneq_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot180_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot180_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot180_f64", + "arguments": [ + "float64x2_t r", + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot180_lane_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot180_lane_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x2_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot180_laneq_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot180_laneq_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot270_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot270_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot270_f64", + "arguments": [ + "float64x2_t r", + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot270_lane_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot270_lane_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x2_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot270_laneq_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot270_laneq_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot90_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot90_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot90_f64", + "arguments": [ + "float64x2_t r", + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot90_lane_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot90_lane_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x2_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot90_laneq_f16", + "arguments": [ + "float16x8_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmlaq_rot90_laneq_f32", + "arguments": [ + "float32x4_t r", + "float32x4_t a", + "float32x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcnt_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcnt_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcnt_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcntq_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcntq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcntq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_f16", + "arguments": [ + "float16x4_t low", + "float16x4_t high" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.4H" + }, + "low": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_f32", + "arguments": [ + "float32x2_t low", + "float32x2_t high" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.2S" + }, + "low": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_f64", + "arguments": [ + "float64x1_t low", + "float64x1_t high" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.1D" + }, + "low": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_p16", + "arguments": [ + "poly16x4_t low", + "poly16x4_t high" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.4H" + }, + "low": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_p64", + "arguments": [ + "poly64x1_t low", + "poly64x1_t high" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.1D" + }, + "low": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_p8", + "arguments": [ + "poly8x8_t low", + "poly8x8_t high" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.8B" + }, + "low": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_s16", + "arguments": [ + "int16x4_t low", + "int16x4_t high" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.4H" + }, + "low": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_s32", + "arguments": [ + "int32x2_t low", + "int32x2_t high" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.2S" + }, + "low": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_s64", + "arguments": [ + "int64x1_t low", + "int64x1_t high" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.1D" + }, + "low": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_s8", + "arguments": [ + "int8x8_t low", + "int8x8_t high" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.8B" + }, + "low": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_u16", + "arguments": [ + "uint16x4_t low", + "uint16x4_t high" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.4H" + }, + "low": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_u32", + "arguments": [ + "uint32x2_t low", + "uint32x2_t high" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.2S" + }, + "low": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_u64", + "arguments": [ + "uint64x1_t low", + "uint64x1_t high" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.1D" + }, + "low": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_u8", + "arguments": [ + "uint8x8_t low", + "uint8x8_t high" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.8B" + }, + "low": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_f32", + "arguments": [ + "float32x2_t a", + "const int lane1", + "float32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_f64", + "arguments": [ + "float64x1_t a", + "const int lane1", + "float64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_p16", + "arguments": [ + "poly16x4_t a", + "const int lane1", + "poly16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_p64", + "arguments": [ + "poly64x1_t a", + "const int lane1", + "poly64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_p8", + "arguments": [ + "poly8x8_t a", + "const int lane1", + "poly8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_s16", + "arguments": [ + "int16x4_t a", + "const int lane1", + "int16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_s32", + "arguments": [ + "int32x2_t a", + "const int lane1", + "int32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_s64", + "arguments": [ + "int64x1_t a", + "const int lane1", + "int64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_s8", + "arguments": [ + "int8x8_t a", + "const int lane1", + "int8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_u16", + "arguments": [ + "uint16x4_t a", + "const int lane1", + "uint16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_u32", + "arguments": [ + "uint32x2_t a", + "const int lane1", + "uint32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_u64", + "arguments": [ + "uint64x1_t a", + "const int lane1", + "uint64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_u8", + "arguments": [ + "uint8x8_t a", + "const int lane1", + "uint8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_f32", + "arguments": [ + "float32x2_t a", + "const int lane1", + "float32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_f64", + "arguments": [ + "float64x1_t a", + "const int lane1", + "float64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_p16", + "arguments": [ + "poly16x4_t a", + "const int lane1", + "poly16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_p64", + "arguments": [ + "poly64x1_t a", + "const int lane1", + "poly64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_p8", + "arguments": [ + "poly8x8_t a", + "const int lane1", + "poly8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_s16", + "arguments": [ + "int16x4_t a", + "const int lane1", + "int16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_s32", + "arguments": [ + "int32x2_t a", + "const int lane1", + "int32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_s64", + "arguments": [ + "int64x1_t a", + "const int lane1", + "int64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_s8", + "arguments": [ + "int8x8_t a", + "const int lane1", + "int8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_u16", + "arguments": [ + "uint16x4_t a", + "const int lane1", + "uint16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_u32", + "arguments": [ + "uint32x2_t a", + "const int lane1", + "uint32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_u64", + "arguments": [ + "uint64x1_t a", + "const int lane1", + "uint64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_u8", + "arguments": [ + "uint8x8_t a", + "const int lane1", + "uint8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_f32", + "arguments": [ + "float32x4_t a", + "const int lane1", + "float32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_f64", + "arguments": [ + "float64x2_t a", + "const int lane1", + "float64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_p16", + "arguments": [ + "poly16x8_t a", + "const int lane1", + "poly16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_p64", + "arguments": [ + "poly64x2_t a", + "const int lane1", + "poly64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_p8", + "arguments": [ + "poly8x16_t a", + "const int lane1", + "poly8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_s16", + "arguments": [ + "int16x8_t a", + "const int lane1", + "int16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_s32", + "arguments": [ + "int32x4_t a", + "const int lane1", + "int32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_s64", + "arguments": [ + "int64x2_t a", + "const int lane1", + "int64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_s8", + "arguments": [ + "int8x16_t a", + "const int lane1", + "int8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_u16", + "arguments": [ + "uint16x8_t a", + "const int lane1", + "uint16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_u32", + "arguments": [ + "uint32x4_t a", + "const int lane1", + "uint32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_u64", + "arguments": [ + "uint64x2_t a", + "const int lane1", + "uint64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_u8", + "arguments": [ + "uint8x16_t a", + "const int lane1", + "uint8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_f32", + "arguments": [ + "float32x4_t a", + "const int lane1", + "float32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_f64", + "arguments": [ + "float64x2_t a", + "const int lane1", + "float64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_p16", + "arguments": [ + "poly16x8_t a", + "const int lane1", + "poly16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_p64", + "arguments": [ + "poly64x2_t a", + "const int lane1", + "poly64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_p8", + "arguments": [ + "poly8x16_t a", + "const int lane1", + "poly8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_s16", + "arguments": [ + "int16x8_t a", + "const int lane1", + "int16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_s32", + "arguments": [ + "int32x4_t a", + "const int lane1", + "int32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_s64", + "arguments": [ + "int64x2_t a", + "const int lane1", + "int64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_s8", + "arguments": [ + "int8x16_t a", + "const int lane1", + "int8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_u16", + "arguments": [ + "uint16x8_t a", + "const int lane1", + "uint16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_u32", + "arguments": [ + "uint32x4_t a", + "const int lane1", + "uint32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_u64", + "arguments": [ + "uint64x2_t a", + "const int lane1", + "uint64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_u8", + "arguments": [ + "uint8x16_t a", + "const int lane1", + "uint8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_f16", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_f32", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_f64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_p16", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_p64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_p8", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_s16", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_s32", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_s64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_s8", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_u16", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_u32", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_u64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_u8", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f16_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f16_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f16_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f32_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f32_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f32_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f32_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f64_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f64_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f64_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_high_f16_f32", + "arguments": [ + "float16x4_t r", + "float32x4_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_high_f32_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_high_f32_f64", + "arguments": [ + "float32x2_t r", + "float64x2_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_high_f64_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f16_s16", + "arguments": [ + "int16x4_t a", + "const int n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f16_u16", + "arguments": [ + "uint16x4_t a", + "const int n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f32_s32", + "arguments": [ + "int32x2_t a", + "const int n" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f32_u32", + "arguments": [ + "uint32x2_t a", + "const int n" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f64_s64", + "arguments": [ + "int64x1_t a", + "const int n" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f64_u64", + "arguments": [ + "uint64x1_t a", + "const int n" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_s16_f16", + "arguments": [ + "float16x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_s32_f32", + "arguments": [ + "float32x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_s64_f64", + "arguments": [ + "float64x1_t a", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_u16_f16", + "arguments": [ + "float16x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_u32_f32", + "arguments": [ + "float32x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_u64_f64", + "arguments": [ + "float64x1_t a", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_s16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_s32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_s64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_u16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_u32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_u64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_s16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_s32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_s64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_u16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_u32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_u64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtad_s64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtad_u64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_s16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_s32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_s64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_u16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_u32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_u64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_s16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_s32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_s64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_u16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_u32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_u64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtas_s32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtas_u32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_f64_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_f64_u64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_n_f64_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_n_f64_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_n_s64_f64", + "arguments": [ + "float64_t a", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_n_u64_f64", + "arguments": [ + "float64_t a", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_s64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_u64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_s16", + "arguments": [ + "int16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_s32", + "arguments": [ + "int32_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_u16", + "arguments": [ + "uint16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_u32", + "arguments": [ + "uint32_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_u64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_s16", + "arguments": [ + "int16_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_s32", + "arguments": [ + "int32_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_u16", + "arguments": [ + "uint16_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_u32", + "arguments": [ + "uint32_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_s16_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_s32_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_s64_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_u16_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_u32_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_u64_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_s16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_s32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_s64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_u16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_u32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_u64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtm_s16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtm_s32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtm_s64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtm_u16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtm_u32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtm_u64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmd_s64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmd_u64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmh_s16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmh_s32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmh_s64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmh_u16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmh_u32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmh_u64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmq_s16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmq_s32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmq_s64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmq_u16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmq_u32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtmq_u64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtms_s32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtms_u32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTMU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtn_s16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtn_s32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtn_s64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtn_u16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtn_u32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtn_u64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnd_s64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnd_u64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnh_s16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnh_s32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnh_s64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnh_u16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnh_u32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnh_u64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnq_s16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnq_s32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnq_s64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnq_u16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnq_u32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtnq_u64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtns_s32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtns_u32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTNU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtp_s16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtp_s32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtp_s64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtp_u16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtp_u32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtp_u64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtpd_s64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtpd_u64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtph_s16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtph_s32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtph_s64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtph_u16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtph_u32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtph_u64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtpq_s16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtpq_s32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtpq_s64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtpq_u16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtpq_u32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtpq_u64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtps_s32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtps_u32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTPU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_f16_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_f16_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_f32_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_f32_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_f64_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_f64_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_f16_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_f16_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_f32_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_f32_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_f64_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_f64_u64", + "arguments": [ + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_s16_f16", + "arguments": [ + "float16x8_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_s32_f32", + "arguments": [ + "float32x4_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_s64_f64", + "arguments": [ + "float64x2_t a", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_u16_f16", + "arguments": [ + "float16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_u32_f32", + "arguments": [ + "float32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_n_u64_f64", + "arguments": [ + "float64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_s16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_s32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_s64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_u16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_u32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtq_u64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvts_f32_s32", + "arguments": [ + "int32_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvts_f32_u32", + "arguments": [ + "uint32_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvts_n_f32_s32", + "arguments": [ + "int32_t a", + "const int n" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvts_n_f32_u32", + "arguments": [ + "uint32_t a", + "const int n" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvts_n_s32_f32", + "arguments": [ + "float32_t a", + "const int n" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvts_n_u32_f32", + "arguments": [ + "float32_t a", + "const int n" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvts_s32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvts_u32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtx_f32_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTXN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtx_high_f32_f64", + "arguments": [ + "float32x2_t r", + "float64x2_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTXN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtxd_f32_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTXN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdiv_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FDIV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdiv_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FDIV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdiv_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FDIV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdivh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FDIV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdivq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FDIV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdivq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FDIV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdivq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FDIV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdot_lane_s32", + "arguments": [ + "int32x2_t r", + "int8x8_t a", + "int8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdot_lane_u32", + "arguments": [ + "uint32x2_t r", + "uint8x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdot_laneq_s32", + "arguments": [ + "int32x2_t r", + "int8x8_t a", + "int8x16_t b", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdot_laneq_u32", + "arguments": [ + "uint32x2_t r", + "uint8x8_t a", + "uint8x16_t b", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdot_s32", + "arguments": [ + "int32x2_t r", + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdot_u32", + "arguments": [ + "uint32x2_t r", + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdotq_lane_s32", + "arguments": [ + "int32x4_t r", + "int8x16_t a", + "int8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdotq_lane_u32", + "arguments": [ + "uint32x4_t r", + "uint8x16_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdotq_laneq_s32", + "arguments": [ + "int32x4_t r", + "int8x16_t a", + "int8x16_t b", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdotq_laneq_u32", + "arguments": [ + "uint32x4_t r", + "uint8x16_t a", + "uint8x16_t b", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdotq_s32", + "arguments": [ + "int32x4_t r", + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdotq_u32", + "arguments": [ + "uint32x4_t r", + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_f16", + "arguments": [ + "float16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_f32", + "arguments": [ + "float32x2_t vec", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_f64", + "arguments": [ + "float64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_p16", + "arguments": [ + "poly16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_p64", + "arguments": [ + "poly64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_p8", + "arguments": [ + "poly8x8_t vec", + "const int lane" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_s16", + "arguments": [ + "int16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_s32", + "arguments": [ + "int32x2_t vec", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_s64", + "arguments": [ + "int64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_s8", + "arguments": [ + "int8x8_t vec", + "const int lane" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_u16", + "arguments": [ + "uint16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_u32", + "arguments": [ + "uint32x2_t vec", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_u64", + "arguments": [ + "uint64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_lane_u8", + "arguments": [ + "uint8x8_t vec", + "const int lane" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_f16", + "arguments": [ + "float16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_f32", + "arguments": [ + "float32x4_t vec", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_f64", + "arguments": [ + "float64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_p16", + "arguments": [ + "poly16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_p64", + "arguments": [ + "poly64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_p8", + "arguments": [ + "poly8x16_t vec", + "const int lane" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_s16", + "arguments": [ + "int16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_s32", + "arguments": [ + "int32x4_t vec", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_s64", + "arguments": [ + "int64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_s8", + "arguments": [ + "int8x16_t vec", + "const int lane" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_u16", + "arguments": [ + "uint16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_u32", + "arguments": [ + "uint32x4_t vec", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_u64", + "arguments": [ + "uint64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_laneq_u8", + "arguments": [ + "uint8x16_t vec", + "const int lane" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_f16", + "arguments": [ + "float16_t value" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_f32", + "arguments": [ + "float32_t value" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_f64", + "arguments": [ + "float64_t value" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_p16", + "arguments": [ + "poly16_t value" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_p64", + "arguments": [ + "poly64_t value" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_p8", + "arguments": [ + "poly8_t value" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_s16", + "arguments": [ + "int16_t value" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_s32", + "arguments": [ + "int32_t value" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_s64", + "arguments": [ + "int64_t value" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_s8", + "arguments": [ + "int8_t value" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_u16", + "arguments": [ + "uint16_t value" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_u32", + "arguments": [ + "uint32_t value" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_u64", + "arguments": [ + "uint64_t value" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdup_n_u8", + "arguments": [ + "uint8_t value" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupb_lane_p8", + "arguments": [ + "poly8x8_t vec", + "const int lane" + ], + "return_type": { + "value": "poly8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupb_lane_s8", + "arguments": [ + "int8x8_t vec", + "const int lane" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupb_lane_u8", + "arguments": [ + "uint8x8_t vec", + "const int lane" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupb_laneq_p8", + "arguments": [ + "poly8x16_t vec", + "const int lane" + ], + "return_type": { + "value": "poly8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupb_laneq_s8", + "arguments": [ + "int8x16_t vec", + "const int lane" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupb_laneq_u8", + "arguments": [ + "uint8x16_t vec", + "const int lane" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupd_lane_f64", + "arguments": [ + "float64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupd_lane_s64", + "arguments": [ + "int64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupd_lane_u64", + "arguments": [ + "uint64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupd_laneq_f64", + "arguments": [ + "float64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupd_laneq_s64", + "arguments": [ + "int64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupd_laneq_u64", + "arguments": [ + "uint64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vduph_lane_f16", + "arguments": [ + "float16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vduph_lane_p16", + "arguments": [ + "poly16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "poly16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vduph_lane_s16", + "arguments": [ + "int16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vduph_lane_u16", + "arguments": [ + "uint16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vduph_laneq_f16", + "arguments": [ + "float16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vduph_laneq_p16", + "arguments": [ + "poly16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "poly16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vduph_laneq_s16", + "arguments": [ + "int16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vduph_laneq_u16", + "arguments": [ + "uint16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_f16", + "arguments": [ + "float16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_f32", + "arguments": [ + "float32x2_t vec", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_f64", + "arguments": [ + "float64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_p16", + "arguments": [ + "poly16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_p64", + "arguments": [ + "poly64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_p8", + "arguments": [ + "poly8x8_t vec", + "const int lane" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_s16", + "arguments": [ + "int16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_s32", + "arguments": [ + "int32x2_t vec", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_s64", + "arguments": [ + "int64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_s8", + "arguments": [ + "int8x8_t vec", + "const int lane" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_u16", + "arguments": [ + "uint16x4_t vec", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_u32", + "arguments": [ + "uint32x2_t vec", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_u64", + "arguments": [ + "uint64x1_t vec", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_lane_u8", + "arguments": [ + "uint8x8_t vec", + "const int lane" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_f16", + "arguments": [ + "float16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_f32", + "arguments": [ + "float32x4_t vec", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_f64", + "arguments": [ + "float64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_p16", + "arguments": [ + "poly16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_p64", + "arguments": [ + "poly64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_p8", + "arguments": [ + "poly8x16_t vec", + "const int lane" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_s16", + "arguments": [ + "int16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_s32", + "arguments": [ + "int32x4_t vec", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_s64", + "arguments": [ + "int64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_s8", + "arguments": [ + "int8x16_t vec", + "const int lane" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_u16", + "arguments": [ + "uint16x8_t vec", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_u32", + "arguments": [ + "uint32x4_t vec", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_u64", + "arguments": [ + "uint64x2_t vec", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_laneq_u8", + "arguments": [ + "uint8x16_t vec", + "const int lane" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_f16", + "arguments": [ + "float16_t value" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_f32", + "arguments": [ + "float32_t value" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_f64", + "arguments": [ + "float64_t value" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_p16", + "arguments": [ + "poly16_t value" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_p64", + "arguments": [ + "poly64_t value" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_p8", + "arguments": [ + "poly8_t value" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_s16", + "arguments": [ + "int16_t value" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_s32", + "arguments": [ + "int32_t value" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_s64", + "arguments": [ + "int64_t value" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_s8", + "arguments": [ + "int8_t value" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_u16", + "arguments": [ + "uint16_t value" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_u32", + "arguments": [ + "uint32_t value" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_u64", + "arguments": [ + "uint64_t value" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdupq_n_u8", + "arguments": [ + "uint8_t value" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdups_lane_f32", + "arguments": [ + "float32x2_t vec", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdups_lane_s32", + "arguments": [ + "int32x2_t vec", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdups_lane_u32", + "arguments": [ + "uint32x2_t vec", + "const int lane" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdups_laneq_f32", + "arguments": [ + "float32x4_t vec", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdups_laneq_s32", + "arguments": [ + "int32x4_t vec", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vdups_laneq_u32", + "arguments": [ + "uint32x4_t vec", + "const int lane" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor3q_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EOR3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor3q_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EOR3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor3q_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b", + "int64x2_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EOR3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor3q_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "int8x16_t c" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EOR3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor3q_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EOR3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor3q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EOR3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor3q_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b", + "uint64x2_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EOR3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor3q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EOR3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veor_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veorq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veorq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veorq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veorq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veorq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veorq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veorq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "veorq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EOR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "const int n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "const int n" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "const int n" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b", + "const int n" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_p64", + "arguments": [ + "poly64x1_t a", + "poly64x1_t b", + "const int n" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b", + "const int n" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vext_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "const int n" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "const int n" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "const int n" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b", + "const int n" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b", + "const int n" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b", + "const int n" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vextq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "EXT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16x4_t c" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x2_t c" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64x1_t c" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Da" + }, + "b": { + "register": "Dn" + }, + "c": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_lane_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_lane_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_lane_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_laneq_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_laneq_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_laneq_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_n_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16_t n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H " + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_n_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32_t n" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfma_n_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64_t n" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Da" + }, + "b": { + "register": "Dn" + }, + "n": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmad_lane_f64", + "arguments": [ + "float64_t a", + "float64_t b", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmad_laneq_f64", + "arguments": [ + "float64_t a", + "float64_t b", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmah_f16", + "arguments": [ + "float16_t a", + "float16_t b", + "float16_t c" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Ha" + }, + "b": { + "register": "Hn" + }, + "c": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmah_lane_f16", + "arguments": [ + "float16_t a", + "float16_t b", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmah_laneq_f16", + "arguments": [ + "float16_t a", + "float16_t b", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16x8_t c" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x4_t c" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64x2_t c" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "c": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_lane_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_lane_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_lane_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_laneq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_laneq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_laneq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_n_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16_t n" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H " + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_n_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32_t n" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmaq_n_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64_t n" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "register": "Vm.D[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmas_lane_f32", + "arguments": [ + "float32_t a", + "float32_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmas_laneq_f32", + "arguments": [ + "float32_t a", + "float32_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlal_high_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlal_lane_high_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlal_lane_low_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlal_laneq_high_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlal_laneq_low_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlal_low_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlalq_high_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlalq_lane_high_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlalq_lane_low_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlalq_laneq_high_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlalq_laneq_low_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlalq_low_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlsl_high_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlsl_lane_high_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlsl_lane_low_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlsl_laneq_high_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlsl_laneq_low_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlsl_low_f16", + "arguments": [ + "float32x2_t r", + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlslq_high_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlslq_lane_high_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlslq_lane_low_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlslq_laneq_high_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlslq_laneq_low_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmlslq_low_f16", + "arguments": [ + "float32x4_t r", + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16x4_t c" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x2_t c" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64x1_t c" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Da" + }, + "b": { + "register": "Dn" + }, + "c": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_lane_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_lane_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_lane_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_laneq_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_laneq_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_laneq_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_n_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16_t n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H " + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_n_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32_t n" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfms_n_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64_t n" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Da" + }, + "b": { + "register": "Dn" + }, + "n": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsd_lane_f64", + "arguments": [ + "float64_t a", + "float64_t b", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsd_laneq_f64", + "arguments": [ + "float64_t a", + "float64_t b", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsh_f16", + "arguments": [ + "float16_t a", + "float16_t b", + "float16_t c" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Ha" + }, + "b": { + "register": "Hn" + }, + "c": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsh_lane_f16", + "arguments": [ + "float16_t a", + "float16_t b", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsh_laneq_f16", + "arguments": [ + "float16_t a", + "float16_t b", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16x8_t c" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x4_t c" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64x2_t c" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "c": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_lane_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_lane_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_lane_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_laneq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_laneq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_laneq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_n_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16_t n" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H " + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_n_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32_t n" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_n_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64_t n" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "register": "Vm.D[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmss_lane_f32", + "arguments": [ + "float32_t a", + "float32_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmss_laneq_f32", + "arguments": [ + "float32_t a", + "float32_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_f16", + "arguments": [ + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_f32", + "arguments": [ + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_f64", + "arguments": [ + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_p16", + "arguments": [ + "poly16x4_t v", + "const int lane" + ], + "return_type": { + "value": "poly16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_p64", + "arguments": [ + "poly64x1_t v", + "const int lane" + ], + "return_type": { + "value": "poly64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_p8", + "arguments": [ + "poly8x8_t v", + "const int lane" + ], + "return_type": { + "value": "poly8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_s16", + "arguments": [ + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_s32", + "arguments": [ + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_s64", + "arguments": [ + "int64x1_t v", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_s8", + "arguments": [ + "int8x8_t v", + "const int lane" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_u16", + "arguments": [ + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_u32", + "arguments": [ + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_u64", + "arguments": [ + "uint64x1_t v", + "const int lane" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_u8", + "arguments": [ + "uint8x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_f16", + "arguments": [ + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_f32", + "arguments": [ + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_f64", + "arguments": [ + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_p16", + "arguments": [ + "poly16x8_t v", + "const int lane" + ], + "return_type": { + "value": "poly16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_p64", + "arguments": [ + "poly64x2_t v", + "const int lane" + ], + "return_type": { + "value": "poly64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_p8", + "arguments": [ + "poly8x16_t v", + "const int lane" + ], + "return_type": { + "value": "poly8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "v": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_s16", + "arguments": [ + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_s32", + "arguments": [ + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_s64", + "arguments": [ + "int64x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_s8", + "arguments": [ + "int8x16_t v", + "const int lane" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "v": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_u16", + "arguments": [ + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_u32", + "arguments": [ + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_u64", + "arguments": [ + "uint64x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_u8", + "arguments": [ + "uint8x16_t v", + "const int lane" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "v": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhadd_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhadd_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhadd_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhadd_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhadd_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhadd_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhaddq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhaddq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhaddq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhaddq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhaddq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhaddq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsub_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsub_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsub_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsub_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsub_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsub_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsubq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsubq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsubq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsubq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsubq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vhsubq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UHSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f16_x2", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f16_x3", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f16_x4", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f32_x2", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f32_x3", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f32_x4", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f64_x2", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f64_x3", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f64_x4", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x4_t src", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_f32", + "arguments": [ + "float32_t const * ptr", + "float32x2_t src", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_f64", + "arguments": [ + "float64_t const * ptr", + "float64x1_t src", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_p16", + "arguments": [ + "poly16_t const * ptr", + "poly16x4_t src", + "const int lane" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_p64", + "arguments": [ + "poly64_t const * ptr", + "poly64x1_t src", + "const int lane" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_p8", + "arguments": [ + "poly8_t const * ptr", + "poly8x8_t src", + "const int lane" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_s16", + "arguments": [ + "int16_t const * ptr", + "int16x4_t src", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_s32", + "arguments": [ + "int32_t const * ptr", + "int32x2_t src", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_s64", + "arguments": [ + "int64_t const * ptr", + "int64x1_t src", + "const int lane" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_s8", + "arguments": [ + "int8_t const * ptr", + "int8x8_t src", + "const int lane" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_u16", + "arguments": [ + "uint16_t const * ptr", + "uint16x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_u32", + "arguments": [ + "uint32_t const * ptr", + "uint32x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_u64", + "arguments": [ + "uint64_t const * ptr", + "uint64x1_t src", + "const int lane" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_u8", + "arguments": [ + "uint8_t const * ptr", + "uint8x8_t src", + "const int lane" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p16_x2", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p16_x3", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p16_x4", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p64_x2", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p64_x3", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p64_x4", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p8_x2", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p8_x3", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_p8_x4", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s16_x2", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s16_x3", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s16_x4", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s32_x2", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s32_x3", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s32_x4", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s64_x2", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s64_x3", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s64_x4", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s8_x2", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s8_x3", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_s8_x4", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u16_x2", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u16_x3", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u16_x4", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u32_x2", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u32_x3", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u32_x4", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u64_x2", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u64_x3", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u64_x4", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u8_x2", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u8_x3", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_u8_x4", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f16_x2", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f16_x3", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f16_x4", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f32_x2", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f32_x3", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f32_x4", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f64_x2", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f64_x3", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f64_x4", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x8_t src", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_f32", + "arguments": [ + "float32_t const * ptr", + "float32x4_t src", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_f64", + "arguments": [ + "float64_t const * ptr", + "float64x2_t src", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_p16", + "arguments": [ + "poly16_t const * ptr", + "poly16x8_t src", + "const int lane" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_p64", + "arguments": [ + "poly64_t const * ptr", + "poly64x2_t src", + "const int lane" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_p8", + "arguments": [ + "poly8_t const * ptr", + "poly8x16_t src", + "const int lane" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_s16", + "arguments": [ + "int16_t const * ptr", + "int16x8_t src", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_s32", + "arguments": [ + "int32_t const * ptr", + "int32x4_t src", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_s64", + "arguments": [ + "int64_t const * ptr", + "int64x2_t src", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_s8", + "arguments": [ + "int8_t const * ptr", + "int8x16_t src", + "const int lane" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_u16", + "arguments": [ + "uint16_t const * ptr", + "uint16x8_t src", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_u32", + "arguments": [ + "uint32_t const * ptr", + "uint32x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_u64", + "arguments": [ + "uint64_t const * ptr", + "uint64x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_u8", + "arguments": [ + "uint8_t const * ptr", + "uint8x16_t src", + "const int lane" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p16_x2", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p16_x3", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p16_x4", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p64_x2", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p64_x3", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p64_x4", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p8_x2", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p8_x3", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_p8_x4", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s16_x2", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s16_x3", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s16_x4", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s32_x2", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s32_x3", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s32_x4", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s64_x2", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s64_x3", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s64_x4", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s8_x2", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s8_x3", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_s8_x4", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u16_x2", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u16_x3", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u16_x4", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u32_x2", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u32_x3", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u32_x4", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u64_x2", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u64_x3", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u64_x4", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u8_x2", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u8_x3", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_u8_x4", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x4x2_t src", + "const int lane" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_f32", + "arguments": [ + "float32_t const * ptr", + "float32x2x2_t src", + "const int lane" + ], + "return_type": { + "value": "float32x2x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_f64", + "arguments": [ + "float64_t const * ptr", + "float64x1x2_t src", + "const int lane" + ], + "return_type": { + "value": "float64x1x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_p16", + "arguments": [ + "poly16_t const * ptr", + "poly16x4x2_t src", + "const int lane" + ], + "return_type": { + "value": "poly16x4x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_p64", + "arguments": [ + "poly64_t const * ptr", + "poly64x1x2_t src", + "const int lane" + ], + "return_type": { + "value": "poly64x1x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_p8", + "arguments": [ + "poly8_t const * ptr", + "poly8x8x2_t src", + "const int lane" + ], + "return_type": { + "value": "poly8x8x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_s16", + "arguments": [ + "int16_t const * ptr", + "int16x4x2_t src", + "const int lane" + ], + "return_type": { + "value": "int16x4x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_s32", + "arguments": [ + "int32_t const * ptr", + "int32x2x2_t src", + "const int lane" + ], + "return_type": { + "value": "int32x2x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_s64", + "arguments": [ + "int64_t const * ptr", + "int64x1x2_t src", + "const int lane" + ], + "return_type": { + "value": "int64x1x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_s8", + "arguments": [ + "int8_t const * ptr", + "int8x8x2_t src", + "const int lane" + ], + "return_type": { + "value": "int8x8x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_u16", + "arguments": [ + "uint16_t const * ptr", + "uint16x4x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint16x4x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_u32", + "arguments": [ + "uint32_t const * ptr", + "uint32x2x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint32x2x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_u64", + "arguments": [ + "uint64_t const * ptr", + "uint64x1x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint64x1x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_u8", + "arguments": [ + "uint8_t const * ptr", + "uint8x8x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint8x8x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x8x2_t src", + "const int lane" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_f32", + "arguments": [ + "float32_t const * ptr", + "float32x4x2_t src", + "const int lane" + ], + "return_type": { + "value": "float32x4x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_f64", + "arguments": [ + "float64_t const * ptr", + "float64x2x2_t src", + "const int lane" + ], + "return_type": { + "value": "float64x2x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_p16", + "arguments": [ + "poly16_t const * ptr", + "poly16x8x2_t src", + "const int lane" + ], + "return_type": { + "value": "poly16x8x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_p64", + "arguments": [ + "poly64_t const * ptr", + "poly64x2x2_t src", + "const int lane" + ], + "return_type": { + "value": "poly64x2x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_p8", + "arguments": [ + "poly8_t const * ptr", + "poly8x16x2_t src", + "const int lane" + ], + "return_type": { + "value": "poly8x16x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_s16", + "arguments": [ + "int16_t const * ptr", + "int16x8x2_t src", + "const int lane" + ], + "return_type": { + "value": "int16x8x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_s32", + "arguments": [ + "int32_t const * ptr", + "int32x4x2_t src", + "const int lane" + ], + "return_type": { + "value": "int32x4x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_s64", + "arguments": [ + "int64_t const * ptr", + "int64x2x2_t src", + "const int lane" + ], + "return_type": { + "value": "int64x2x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_s8", + "arguments": [ + "int8_t const * ptr", + "int8x16x2_t src", + "const int lane" + ], + "return_type": { + "value": "int8x16x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_u16", + "arguments": [ + "uint16_t const * ptr", + "uint16x8x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint16x8x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_u32", + "arguments": [ + "uint32_t const * ptr", + "uint32x4x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint32x4x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_u64", + "arguments": [ + "uint64_t const * ptr", + "uint64x2x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint64x2x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_u8", + "arguments": [ + "uint8_t const * ptr", + "uint8x16x2_t src", + "const int lane" + ], + "return_type": { + "value": "uint8x16x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x4x3_t src", + "const int lane" + ], + "return_type": { + "value": "float16x4x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_f32", + "arguments": [ + "float32_t const * ptr", + "float32x2x3_t src", + "const int lane" + ], + "return_type": { + "value": "float32x2x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_f64", + "arguments": [ + "float64_t const * ptr", + "float64x1x3_t src", + "const int lane" + ], + "return_type": { + "value": "float64x1x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_p16", + "arguments": [ + "poly16_t const * ptr", + "poly16x4x3_t src", + "const int lane" + ], + "return_type": { + "value": "poly16x4x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_p64", + "arguments": [ + "poly64_t const * ptr", + "poly64x1x3_t src", + "const int lane" + ], + "return_type": { + "value": "poly64x1x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_p8", + "arguments": [ + "poly8_t const * ptr", + "poly8x8x3_t src", + "const int lane" + ], + "return_type": { + "value": "poly8x8x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_s16", + "arguments": [ + "int16_t const * ptr", + "int16x4x3_t src", + "const int lane" + ], + "return_type": { + "value": "int16x4x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_s32", + "arguments": [ + "int32_t const * ptr", + "int32x2x3_t src", + "const int lane" + ], + "return_type": { + "value": "int32x2x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_s64", + "arguments": [ + "int64_t const * ptr", + "int64x1x3_t src", + "const int lane" + ], + "return_type": { + "value": "int64x1x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_s8", + "arguments": [ + "int8_t const * ptr", + "int8x8x3_t src", + "const int lane" + ], + "return_type": { + "value": "int8x8x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_u16", + "arguments": [ + "uint16_t const * ptr", + "uint16x4x3_t src", + "const int lane" + ], + "return_type": { + "value": "uint16x4x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_u32", + "arguments": [ + "uint32_t const * ptr", + "uint32x2x3_t src", + "const int lane" + ], + "return_type": { + "value": "uint32x2x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_u64", + "arguments": [ + "uint64_t const * ptr", + "uint64x1x3_t src", + "const int lane" + ], + "return_type": { + "value": "uint64x1x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_u8", + "arguments": [ + "uint8_t const * ptr", + "uint8x8x3_t src", + "const int lane" + ], + "return_type": { + "value": "uint8x8x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x8x3_t src", + "const int lane" + ], + "return_type": { + "value": "float16x8x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_f32", + "arguments": [ + "float32_t const * ptr", + "float32x4x3_t src", + "const int lane" + ], + "return_type": { + "value": "float32x4x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_f64", + "arguments": [ + "float64_t const * ptr", + "float64x2x3_t src", + "const int lane" + ], + "return_type": { + "value": "float64x2x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_p16", + "arguments": [ + "poly16_t const * ptr", + "poly16x8x3_t src", + "const int lane" + ], + "return_type": { + "value": "poly16x8x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_p64", + "arguments": [ + "poly64_t const * ptr", + "poly64x2x3_t src", + "const int lane" + ], + "return_type": { + "value": "poly64x2x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_p8", + "arguments": [ + "poly8_t const * ptr", + "poly8x16x3_t src", + "const int lane" + ], + "return_type": { + "value": "poly8x16x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_s16", + "arguments": [ + "int16_t const * ptr", + "int16x8x3_t src", + "const int lane" + ], + "return_type": { + "value": "int16x8x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_s32", + "arguments": [ + "int32_t const * ptr", + "int32x4x3_t src", + "const int lane" + ], + "return_type": { + "value": "int32x4x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_s64", + "arguments": [ + "int64_t const * ptr", + "int64x2x3_t src", + "const int lane" + ], + "return_type": { + "value": "int64x2x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_s8", + "arguments": [ + "int8_t const * ptr", + "int8x16x3_t src", + "const int lane" + ], + "return_type": { + "value": "int8x16x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_u16", + "arguments": [ + "uint16_t const * ptr", + "uint16x8x3_t src", + "const int lane" + ], + "return_type": { + "value": "uint16x8x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_u32", + "arguments": [ + "uint32_t const * ptr", + "uint32x4x3_t src", + "const int lane" + ], + "return_type": { + "value": "uint32x4x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_u64", + "arguments": [ + "uint64_t const * ptr", + "uint64x2x3_t src", + "const int lane" + ], + "return_type": { + "value": "uint64x2x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_u8", + "arguments": [ + "uint8_t const * ptr", + "uint8x16x3_t src", + "const int lane" + ], + "return_type": { + "value": "uint8x16x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x4x4_t src", + "const int lane" + ], + "return_type": { + "value": "float16x4x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_f32", + "arguments": [ + "float32_t const * ptr", + "float32x2x4_t src", + "const int lane" + ], + "return_type": { + "value": "float32x2x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_f64", + "arguments": [ + "float64_t const * ptr", + "float64x1x4_t src", + "const int lane" + ], + "return_type": { + "value": "float64x1x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_p16", + "arguments": [ + "poly16_t const * ptr", + "poly16x4x4_t src", + "const int lane" + ], + "return_type": { + "value": "poly16x4x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_p64", + "arguments": [ + "poly64_t const * ptr", + "poly64x1x4_t src", + "const int lane" + ], + "return_type": { + "value": "poly64x1x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_p8", + "arguments": [ + "poly8_t const * ptr", + "poly8x8x4_t src", + "const int lane" + ], + "return_type": { + "value": "poly8x8x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_s16", + "arguments": [ + "int16_t const * ptr", + "int16x4x4_t src", + "const int lane" + ], + "return_type": { + "value": "int16x4x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_s32", + "arguments": [ + "int32_t const * ptr", + "int32x2x4_t src", + "const int lane" + ], + "return_type": { + "value": "int32x2x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_s64", + "arguments": [ + "int64_t const * ptr", + "int64x1x4_t src", + "const int lane" + ], + "return_type": { + "value": "int64x1x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_s8", + "arguments": [ + "int8_t const * ptr", + "int8x8x4_t src", + "const int lane" + ], + "return_type": { + "value": "int8x8x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_u16", + "arguments": [ + "uint16_t const * ptr", + "uint16x4x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint16x4x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_u32", + "arguments": [ + "uint32_t const * ptr", + "uint32x2x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint32x2x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_u64", + "arguments": [ + "uint64_t const * ptr", + "uint64x1x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint64x1x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_u8", + "arguments": [ + "uint8_t const * ptr", + "uint8x8x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint8x8x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x1x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_f32", + "arguments": [ + "float32_t const * ptr" + ], + "return_type": { + "value": "float32x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_f64", + "arguments": [ + "float64_t const * ptr" + ], + "return_type": { + "value": "float64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x8x4_t src", + "const int lane" + ], + "return_type": { + "value": "float16x8x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_f32", + "arguments": [ + "float32_t const * ptr", + "float32x4x4_t src", + "const int lane" + ], + "return_type": { + "value": "float32x4x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_f64", + "arguments": [ + "float64_t const * ptr", + "float64x2x4_t src", + "const int lane" + ], + "return_type": { + "value": "float64x2x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_p16", + "arguments": [ + "poly16_t const * ptr", + "poly16x8x4_t src", + "const int lane" + ], + "return_type": { + "value": "poly16x8x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_p64", + "arguments": [ + "poly64_t const * ptr", + "poly64x2x4_t src", + "const int lane" + ], + "return_type": { + "value": "poly64x2x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_p8", + "arguments": [ + "poly8_t const * ptr", + "poly8x16x4_t src", + "const int lane" + ], + "return_type": { + "value": "poly8x16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_s16", + "arguments": [ + "int16_t const * ptr", + "int16x8x4_t src", + "const int lane" + ], + "return_type": { + "value": "int16x8x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_s32", + "arguments": [ + "int32_t const * ptr", + "int32x4x4_t src", + "const int lane" + ], + "return_type": { + "value": "int32x4x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_s64", + "arguments": [ + "int64_t const * ptr", + "int64x2x4_t src", + "const int lane" + ], + "return_type": { + "value": "int64x2x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_s8", + "arguments": [ + "int8_t const * ptr", + "int8x16x4_t src", + "const int lane" + ], + "return_type": { + "value": "int8x16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_u16", + "arguments": [ + "uint16_t const * ptr", + "uint16x8x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint16x8x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_u32", + "arguments": [ + "uint32_t const * ptr", + "uint32x4x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint32x4x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_u64", + "arguments": [ + "uint64_t const * ptr", + "uint64x2x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint64x2x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_u8", + "arguments": [ + "uint8_t const * ptr", + "uint8x16x4_t src", + "const int lane" + ], + "return_type": { + "value": "uint8x16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_p16", + "arguments": [ + "poly16_t const * ptr" + ], + "return_type": { + "value": "poly16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_p64", + "arguments": [ + "poly64_t const * ptr" + ], + "return_type": { + "value": "poly64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_p8", + "arguments": [ + "poly8_t const * ptr" + ], + "return_type": { + "value": "poly8x16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_s16", + "arguments": [ + "int16_t const * ptr" + ], + "return_type": { + "value": "int16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_s32", + "arguments": [ + "int32_t const * ptr" + ], + "return_type": { + "value": "int32x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_s64", + "arguments": [ + "int64_t const * ptr" + ], + "return_type": { + "value": "int64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_s8", + "arguments": [ + "int8_t const * ptr" + ], + "return_type": { + "value": "int8x16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_u16", + "arguments": [ + "uint16_t const * ptr" + ], + "return_type": { + "value": "uint16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_u32", + "arguments": [ + "uint32_t const * ptr" + ], + "return_type": { + "value": "uint32x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_u64", + "arguments": [ + "uint64_t const * ptr" + ], + "return_type": { + "value": "uint64x2x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_u8", + "arguments": [ + "uint8_t const * ptr" + ], + "return_type": { + "value": "uint8x16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vldrq_p128", + "arguments": [ + "poly128_t const * ptr" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "LDR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmax_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmax_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmax_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmax_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmax_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmax_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmax_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmax_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmax_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnm_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnm_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnm_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmv_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmv_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmvq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmvq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmvq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxv_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxv_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxv_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxv_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxv_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxv_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxv_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxv_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMAXV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmin_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmin_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmin_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmin_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmin_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmin_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmin_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmin_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmin_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnm_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnm_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnm_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmv_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmv_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmvq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmvq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmvq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminv_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminv_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminv_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminv_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminv_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminv_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminv_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminv_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMINV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x2_t c" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64x1_t c" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_lane_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_lane_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_lane_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_lane_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_lane_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_laneq_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_laneq_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_laneq_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_laneq_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_laneq_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_n_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32_t c" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_n_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16_t c" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_n_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32_t c" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_n_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16_t c" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_n_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32_t c" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b", + "int8x8_t c" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16x4_t c" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32x2_t c" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmla_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "uint8x8_t c" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_lane_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_lane_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_lane_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_lane_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_laneq_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_laneq_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_laneq_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_laneq_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_n_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_n_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_n_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b", + "uint16_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_n_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b", + "uint32_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_s8", + "arguments": [ + "int16x8_t a", + "int8x16_t b", + "int8x16_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_high_u8", + "arguments": [ + "uint16x8_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_lane_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_lane_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_lane_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_lane_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_laneq_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_laneq_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_laneq_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_laneq_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_n_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_n_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_n_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b", + "uint16_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_n_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b", + "uint32_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_s8", + "arguments": [ + "int16x8_t a", + "int8x8_t b", + "int8x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b", + "uint16x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b", + "uint32x2_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlal_u8", + "arguments": [ + "uint16x8_t a", + "uint8x8_t b", + "uint8x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x4_t c" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64x2_t c" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_lane_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_lane_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_lane_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_lane_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_lane_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_laneq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_laneq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_laneq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_laneq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_n_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32_t c" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_n_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_n_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_n_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_n_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "int8x16_t c" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlaq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x2_t c" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b", + "float64x1_t c" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_lane_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_lane_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_lane_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_lane_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_lane_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_laneq_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_laneq_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_laneq_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_laneq_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_laneq_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_n_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b", + "float32_t c" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_n_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16_t c" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_n_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32_t c" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_n_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16_t c" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_n_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32_t c" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b", + "int8x8_t c" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "uint16x4_t c" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "uint32x2_t c" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmls_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "uint8x8_t c" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_lane_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_lane_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_lane_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_lane_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_laneq_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_laneq_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_laneq_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_laneq_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_n_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_n_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_n_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b", + "uint16_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_n_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b", + "uint32_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_s8", + "arguments": [ + "int16x8_t a", + "int8x16_t b", + "int8x16_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_high_u8", + "arguments": [ + "uint16x8_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_lane_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_lane_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_lane_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_lane_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_laneq_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_laneq_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_laneq_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_laneq_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_n_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_n_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_n_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b", + "uint16_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_n_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b", + "uint32_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_s8", + "arguments": [ + "int16x8_t a", + "int8x8_t b", + "int8x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b", + "uint16x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b", + "uint32x2_t c" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsl_u8", + "arguments": [ + "uint16x8_t a", + "uint8x8_t b", + "uint8x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x4_t c" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b", + "float64x2_t c" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_lane_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_lane_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_lane_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_lane_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_lane_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_laneq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_laneq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_laneq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_laneq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_n_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b", + "float32_t c" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "N/A" + }, + "b": { + "register": "N/A" + }, + "c": { + "register": "N/A" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RESULT[I]" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_n_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_n_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_n_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_n_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "int8x16_t c" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmmlaq_s32", + "arguments": [ + "int32x4_t r", + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SMMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmmlaq_u32", + "arguments": [ + "uint32x4_t r", + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UMMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_f16", + "arguments": [ + "float16_t value" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_f32", + "arguments": [ + "float32_t value" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_f64", + "arguments": [ + "float64_t value" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_p16", + "arguments": [ + "poly16_t value" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_p8", + "arguments": [ + "poly8_t value" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_s16", + "arguments": [ + "int16_t value" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_s32", + "arguments": [ + "int32_t value" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_s64", + "arguments": [ + "int64_t value" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_s8", + "arguments": [ + "int8_t value" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_u16", + "arguments": [ + "uint16_t value" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_u32", + "arguments": [ + "uint32_t value" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_u64", + "arguments": [ + "uint64_t value" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmov_n_u8", + "arguments": [ + "uint8_t value" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_high_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_high_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_high_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_high_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_high_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_high_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovl_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_high_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_high_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_high_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_high_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_high_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_high_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovn_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "XTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_f16", + "arguments": [ + "float16_t value" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_f32", + "arguments": [ + "float32_t value" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_f64", + "arguments": [ + "float64_t value" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_p16", + "arguments": [ + "poly16_t value" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_p8", + "arguments": [ + "poly8_t value" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_s16", + "arguments": [ + "int16_t value" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_s32", + "arguments": [ + "int32_t value" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_s64", + "arguments": [ + "int64_t value" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_s8", + "arguments": [ + "int8_t value" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_u16", + "arguments": [ + "uint16_t value" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_u32", + "arguments": [ + "uint32_t value" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_u64", + "arguments": [ + "uint64_t value" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_u8", + "arguments": [ + "uint8_t value" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_lane_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_lane_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_lane_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_lane_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_lane_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_lane_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_lane_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_laneq_f16", + "arguments": [ + "float16x4_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_laneq_f32", + "arguments": [ + "float32x2_t a", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_laneq_f64", + "arguments": [ + "float64x1_t a", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_laneq_s16", + "arguments": [ + "int16x4_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_laneq_s32", + "arguments": [ + "int32x2_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_laneq_u16", + "arguments": [ + "uint16x4_t a", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_laneq_u32", + "arguments": [ + "uint32x2_t a", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_n_f16", + "arguments": [ + "float16x4_t a", + "float16_t n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_n_f32", + "arguments": [ + "float32x2_t a", + "float32_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_n_f64", + "arguments": [ + "float64x1_t a", + "float64_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Vm.D[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_n_s16", + "arguments": [ + "int16x4_t a", + "int16_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_n_s32", + "arguments": [ + "int32x2_t a", + "int32_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_n_u16", + "arguments": [ + "uint16x4_t a", + "uint16_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_n_u32", + "arguments": [ + "uint32x2_t a", + "uint32_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "PMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmuld_lane_f64", + "arguments": [ + "float64_t a", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmuld_laneq_f64", + "arguments": [ + "float64_t a", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulh_lane_f16", + "arguments": [ + "float16_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulh_laneq_f16", + "arguments": [ + "float16_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_lane_s16", + "arguments": [ + "int16x8_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_lane_s32", + "arguments": [ + "int32x4_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_lane_u16", + "arguments": [ + "uint16x8_t a", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_lane_u32", + "arguments": [ + "uint32x4_t a", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_laneq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_laneq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_laneq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_n_s16", + "arguments": [ + "int16x8_t a", + "int16_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_n_s32", + "arguments": [ + "int32x4_t a", + "int32_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_n_u16", + "arguments": [ + "uint16x8_t a", + "uint16_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_n_u32", + "arguments": [ + "uint32x4_t a", + "uint32_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "PMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "PMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_high_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_lane_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_lane_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_lane_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_lane_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_laneq_s16", + "arguments": [ + "int16x4_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_laneq_s32", + "arguments": [ + "int32x2_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_laneq_u16", + "arguments": [ + "uint16x4_t a", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_laneq_u32", + "arguments": [ + "uint32x2_t a", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_n_s16", + "arguments": [ + "int16x4_t a", + "int16_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_n_s32", + "arguments": [ + "int32x2_t a", + "int32_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_n_u16", + "arguments": [ + "uint16x4_t a", + "uint16_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_n_u32", + "arguments": [ + "uint32x2_t a", + "uint32_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_p64", + "arguments": [ + "poly64_t a", + "poly64_t b" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.1D" + }, + "b": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "PMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "PMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmull_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_lane_f16", + "arguments": [ + "float16x8_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_lane_f32", + "arguments": [ + "float32x4_t a", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_lane_f64", + "arguments": [ + "float64x2_t a", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_lane_s16", + "arguments": [ + "int16x8_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_lane_s32", + "arguments": [ + "int32x4_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_lane_u16", + "arguments": [ + "uint16x8_t a", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_lane_u32", + "arguments": [ + "uint32x4_t a", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_n_f16", + "arguments": [ + "float16x8_t a", + "float16_t n" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_n_f32", + "arguments": [ + "float32x4_t a", + "float32_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_n_f64", + "arguments": [ + "float64x2_t a", + "float64_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.D[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_n_s16", + "arguments": [ + "int16x8_t a", + "int16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_n_s32", + "arguments": [ + "int32x4_t a", + "int32_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_n_u16", + "arguments": [ + "uint16x8_t a", + "uint16_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_n_u32", + "arguments": [ + "uint32x4_t a", + "uint32_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "PMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmuls_lane_f32", + "arguments": [ + "float32_t a", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmuls_laneq_f32", + "arguments": [ + "float32_t a", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_lane_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_lane_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_lane_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_laneq_f16", + "arguments": [ + "float16x4_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_laneq_f32", + "arguments": [ + "float32x2_t a", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_laneq_f64", + "arguments": [ + "float64x1_t a", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_n_f16", + "arguments": [ + "float16x4_t a", + "float16_t n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxd_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxd_lane_f64", + "arguments": [ + "float64_t a", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxd_laneq_f64", + "arguments": [ + "float64_t a", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxh_lane_f16", + "arguments": [ + "float16_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxh_laneq_f16", + "arguments": [ + "float16_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_lane_f16", + "arguments": [ + "float16x8_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_lane_f32", + "arguments": [ + "float32x4_t a", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_lane_f64", + "arguments": [ + "float64x2_t a", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_laneq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_laneq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_laneq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_n_f16", + "arguments": [ + "float16x8_t a", + "float16_t n" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxs_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxs_lane_f32", + "arguments": [ + "float32_t a", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxs_laneq_f32", + "arguments": [ + "float32_t a", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvn_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvn_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvn_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvn_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvn_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvn_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvn_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvnq_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvnq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvnq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvnq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvnq_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvnq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmvnq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MVN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vneg_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vneg_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vneg_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vneg_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vneg_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vneg_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vneg_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorn_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorn_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorn_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorn_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorn_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorn_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorn_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorn_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vornq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vornq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vornq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vornq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vornq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vornq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vornq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vornq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorr_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorr_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorr_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorr_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorr_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorr_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorr_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorr_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorrq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorrq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorrq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorrq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorrq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorrq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorrq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vorrq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ORR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadal_s16", + "arguments": [ + "int32x2_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadal_s32", + "arguments": [ + "int64x1_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + }, + "b": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadal_s8", + "arguments": [ + "int16x4_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadal_u16", + "arguments": [ + "uint32x2_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadal_u32", + "arguments": [ + "uint64x1_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + }, + "b": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadal_u8", + "arguments": [ + "uint16x4_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadalq_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadalq_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadalq_s8", + "arguments": [ + "int16x8_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadalq_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadalq_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadalq_u8", + "arguments": [ + "uint16x8_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADALP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadd_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadd_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadd_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadd_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadd_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadd_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadd_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadd_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddd_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddd_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddd_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddl_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddl_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddl_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddl_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddl_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddl_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddlq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddlq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddlq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddlq_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddlq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddlq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UADDLP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpadds_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmax_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmax_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmax_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmax_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmax_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmax_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmax_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmax_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxnm_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxnm_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxnmq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxnmq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxnmq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxnmqd_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxnms_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxqd_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxs_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmin_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmin_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmin_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmin_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmin_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmin_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmin_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmin_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminnm_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminnm_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminnmq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminnmq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminnmq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminnmqd_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminnms_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpminqd_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vpmins_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabs_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabs_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabs_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabs_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabsb_s8", + "arguments": [ + "int8_t a" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabsd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabsh_s16", + "arguments": [ + "int16_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabsq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabsq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabsq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabsq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqabss_s32", + "arguments": [ + "int32_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadd_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadd_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadd_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadd_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadd_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadd_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadd_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadd_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddb_s8", + "arguments": [ + "int8_t a", + "int8_t b" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "b": { + "register": "Bm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddb_u8", + "arguments": [ + "uint8_t a", + "uint8_t b" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "b": { + "register": "Bm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddd_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddd_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddh_s16", + "arguments": [ + "int16_t a", + "int16_t b" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddh_u16", + "arguments": [ + "uint16_t a", + "uint16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqaddq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadds_s32", + "arguments": [ + "int32_t a", + "int32_t b" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqadds_u32", + "arguments": [ + "uint32_t a", + "uint32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_high_lane_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_high_lane_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_high_laneq_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_high_laneq_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_high_n_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_high_n_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_high_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_high_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_lane_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_lane_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_laneq_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_laneq_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_n_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_n_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlal_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlalh_lane_s16", + "arguments": [ + "int32_t a", + "int16_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlalh_laneq_s16", + "arguments": [ + "int32_t a", + "int16_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlalh_s16", + "arguments": [ + "int32_t a", + "int16_t b", + "int16_t c" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Hn" + }, + "c": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlals_lane_s32", + "arguments": [ + "int64_t a", + "int32_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlals_laneq_s32", + "arguments": [ + "int64_t a", + "int32_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlals_s32", + "arguments": [ + "int64_t a", + "int32_t b", + "int32_t c" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Sn" + }, + "c": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLAL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_high_lane_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_high_lane_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_high_laneq_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_high_laneq_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_high_n_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_high_n_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_high_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_high_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_lane_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_lane_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_laneq_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_laneq_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_n_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_n_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsl_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlslh_lane_s16", + "arguments": [ + "int32_t a", + "int16_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlslh_laneq_s16", + "arguments": [ + "int32_t a", + "int16_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlslh_s16", + "arguments": [ + "int32_t a", + "int16_t b", + "int16_t c" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Hn" + }, + "c": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsls_lane_s32", + "arguments": [ + "int64_t a", + "int32_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsls_laneq_s32", + "arguments": [ + "int64_t a", + "int32_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmlsls_s32", + "arguments": [ + "int64_t a", + "int32_t b", + "int32_t c" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Sn" + }, + "c": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMLSL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulh_lane_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulh_lane_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulh_laneq_s16", + "arguments": [ + "int16x4_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulh_laneq_s32", + "arguments": [ + "int32x2_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulh_n_s16", + "arguments": [ + "int16x4_t a", + "int16_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulh_n_s32", + "arguments": [ + "int32x2_t a", + "int32_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulh_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulh_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhh_lane_s16", + "arguments": [ + "int16_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhh_laneq_s16", + "arguments": [ + "int16_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhh_s16", + "arguments": [ + "int16_t a", + "int16_t b" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhq_lane_s16", + "arguments": [ + "int16x8_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhq_lane_s32", + "arguments": [ + "int32x4_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhq_laneq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhq_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhq_n_s16", + "arguments": [ + "int16x8_t a", + "int16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhq_n_s32", + "arguments": [ + "int32x4_t a", + "int32_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhs_lane_s32", + "arguments": [ + "int32_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhs_laneq_s32", + "arguments": [ + "int32_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulhs_s32", + "arguments": [ + "int32_t a", + "int32_t b" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_high_lane_s16", + "arguments": [ + "int16x8_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_high_lane_s32", + "arguments": [ + "int32x4_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_high_laneq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_high_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_high_n_s16", + "arguments": [ + "int16x8_t a", + "int16_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_high_n_s32", + "arguments": [ + "int32x4_t a", + "int32_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_high_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_high_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_lane_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_lane_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_laneq_s16", + "arguments": [ + "int16x4_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_laneq_s32", + "arguments": [ + "int32x2_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_n_s16", + "arguments": [ + "int16x4_t a", + "int16_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_n_s32", + "arguments": [ + "int32x2_t a", + "int32_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmull_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmullh_lane_s16", + "arguments": [ + "int16_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmullh_laneq_s16", + "arguments": [ + "int16_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmullh_s16", + "arguments": [ + "int16_t a", + "int16_t b" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulls_lane_s32", + "arguments": [ + "int32_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulls_laneq_s32", + "arguments": [ + "int32_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqdmulls_s32", + "arguments": [ + "int32_t a", + "int32_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQDMULL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_high_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_high_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_high_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_high_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQXTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_high_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQXTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_high_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQXTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovn_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovnd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovnd_u64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovnh_s16", + "arguments": [ + "int16_t a" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovnh_u16", + "arguments": [ + "uint16_t a" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovns_s32", + "arguments": [ + "int32_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovns_u32", + "arguments": [ + "uint32_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQXTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovun_high_s16", + "arguments": [ + "uint8x8_t r", + "int16x8_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTUN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovun_high_s32", + "arguments": [ + "uint16x4_t r", + "int32x4_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTUN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovun_high_s64", + "arguments": [ + "uint32x2_t r", + "int64x2_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTUN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovun_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQXTUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovun_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQXTUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovun_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQXTUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovund_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovunh_s16", + "arguments": [ + "int16_t a" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqmovuns_s32", + "arguments": [ + "int32_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQXTUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqneg_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqneg_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqneg_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqneg_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqnegb_s8", + "arguments": [ + "int8_t a" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqnegd_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqnegh_s16", + "arguments": [ + "int16_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqnegq_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqnegq_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqnegq_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqnegq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqnegs_s32", + "arguments": [ + "int32_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlah_lane_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlah_lane_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlah_laneq_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlah_laneq_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlah_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlah_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahh_lane_s16", + "arguments": [ + "int16_t a", + "int16_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahh_laneq_s16", + "arguments": [ + "int16_t a", + "int16_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahh_s16", + "arguments": [ + "int16_t a", + "int16_t b", + "int16_t c" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "c": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahq_lane_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahq_lane_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahq_laneq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahq_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahs_lane_s32", + "arguments": [ + "int32_t a", + "int32_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahs_laneq_s32", + "arguments": [ + "int32_t a", + "int32_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLAH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlahs_s32", + "arguments": [ + "int32_t a", + "int32_t b", + "int32_t c" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "c": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlsh_lane_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlsh_lane_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlsh_laneq_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlsh_laneq_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlsh_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "int16x4_t c" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlsh_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "int32x2_t c" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshh_lane_s16", + "arguments": [ + "int16_t a", + "int16_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshh_laneq_s16", + "arguments": [ + "int16_t a", + "int16_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshh_s16", + "arguments": [ + "int16_t a", + "int16_t b", + "int16_t c" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "c": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshq_lane_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshq_lane_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshq_laneq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshq_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "int16x8_t c" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "int32x4_t c" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshs_lane_s32", + "arguments": [ + "int32_t a", + "int32_t b", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshs_laneq_s32", + "arguments": [ + "int32_t a", + "int32_t b", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmlshs_s32", + "arguments": [ + "int32_t a", + "int32_t b", + "int32_t c" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + }, + "c": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMLSH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulh_lane_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulh_lane_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulh_laneq_s16", + "arguments": [ + "int16x4_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulh_laneq_s32", + "arguments": [ + "int32x2_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulh_n_s16", + "arguments": [ + "int16x4_t a", + "int16_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulh_n_s32", + "arguments": [ + "int32x2_t a", + "int32_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulh_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulh_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhh_lane_s16", + "arguments": [ + "int16_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhh_laneq_s16", + "arguments": [ + "int16_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhh_s16", + "arguments": [ + "int16_t a", + "int16_t b" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhq_lane_s16", + "arguments": [ + "int16x8_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhq_lane_s32", + "arguments": [ + "int32x4_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhq_laneq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhq_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhq_n_s16", + "arguments": [ + "int16x8_t a", + "int16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhq_n_s32", + "arguments": [ + "int32x4_t a", + "int32_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.S[0]" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhs_lane_s32", + "arguments": [ + "int32_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhs_laneq_s32", + "arguments": [ + "int32_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrdmulhs_s32", + "arguments": [ + "int32_t a", + "int32_t b" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRDMULH" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshl_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshl_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshl_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshl_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshl_u16", + "arguments": [ + "uint16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshl_u32", + "arguments": [ + "uint32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshl_u64", + "arguments": [ + "uint64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshl_u8", + "arguments": [ + "uint8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlb_s8", + "arguments": [ + "int8_t a", + "int8_t b" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "b": { + "register": "Bm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlb_u8", + "arguments": [ + "uint8_t a", + "int8_t b" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "b": { + "register": "Bm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshld_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshld_u64", + "arguments": [ + "uint64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlh_s16", + "arguments": [ + "int16_t a", + "int16_t b" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlh_u16", + "arguments": [ + "uint16_t a", + "int16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlq_u16", + "arguments": [ + "uint16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlq_u32", + "arguments": [ + "uint32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlq_u64", + "arguments": [ + "uint64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshlq_u8", + "arguments": [ + "uint8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshls_s32", + "arguments": [ + "int32_t a", + "int32_t b" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshls_u32", + "arguments": [ + "uint32_t a", + "int32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_high_n_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_high_n_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_high_n_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_high_n_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_high_n_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_high_n_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_n_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_n_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrn_n_u64", + "arguments": [ + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrnd_n_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrnd_n_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrnh_n_s16", + "arguments": [ + "int16_t a", + "const int n" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrnh_n_u16", + "arguments": [ + "uint16_t a", + "const int n" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrns_n_s32", + "arguments": [ + "int32_t a", + "const int n" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrns_n_u32", + "arguments": [ + "uint32_t a", + "const int n" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQRSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrun_high_n_s16", + "arguments": [ + "uint8x8_t r", + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRUN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrun_high_n_s32", + "arguments": [ + "uint16x4_t r", + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRUN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrun_high_n_s64", + "arguments": [ + "uint32x2_t r", + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRUN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrun_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrun_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrun_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQRSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrund_n_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshrunh_n_s16", + "arguments": [ + "int16_t a", + "const int n" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqrshruns_n_s32", + "arguments": [ + "int32_t a", + "const int n" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQRSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_n_s16", + "arguments": [ + "int16x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_n_s32", + "arguments": [ + "int32x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_n_s64", + "arguments": [ + "int64x1_t a", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_n_s8", + "arguments": [ + "int8x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_n_u16", + "arguments": [ + "uint16x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_n_u32", + "arguments": [ + "uint32x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_n_u64", + "arguments": [ + "uint64x1_t a", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_n_u8", + "arguments": [ + "uint8x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_u16", + "arguments": [ + "uint16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_u32", + "arguments": [ + "uint32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_u64", + "arguments": [ + "uint64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshl_u8", + "arguments": [ + "uint8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlb_n_s8", + "arguments": [ + "int8_t a", + "const int n" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlb_n_u8", + "arguments": [ + "uint8_t a", + "const int n" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlb_s8", + "arguments": [ + "int8_t a", + "int8_t b" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "b": { + "register": "Bm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlb_u8", + "arguments": [ + "uint8_t a", + "int8_t b" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "b": { + "register": "Bm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshld_n_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshld_n_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshld_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshld_u64", + "arguments": [ + "uint64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlh_n_s16", + "arguments": [ + "int16_t a", + "const int n" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlh_n_u16", + "arguments": [ + "uint16_t a", + "const int n" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlh_s16", + "arguments": [ + "int16_t a", + "int16_t b" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlh_u16", + "arguments": [ + "uint16_t a", + "int16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_n_s8", + "arguments": [ + "int8x16_t a", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_n_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_n_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_n_u64", + "arguments": [ + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_n_u8", + "arguments": [ + "uint8x16_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_u16", + "arguments": [ + "uint16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_u32", + "arguments": [ + "uint32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_u64", + "arguments": [ + "uint64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlq_u8", + "arguments": [ + "uint8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshls_n_s32", + "arguments": [ + "int32_t a", + "const int n" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshls_n_u32", + "arguments": [ + "uint32_t a", + "const int n" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshls_s32", + "arguments": [ + "int32_t a", + "int32_t b" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshls_u32", + "arguments": [ + "uint32_t a", + "int32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlu_n_s16", + "arguments": [ + "int16x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlu_n_s32", + "arguments": [ + "int32x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlu_n_s64", + "arguments": [ + "int64x1_t a", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlu_n_s8", + "arguments": [ + "int8x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlub_n_s8", + "arguments": [ + "int8_t a", + "const int n" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlud_n_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshluh_n_s16", + "arguments": [ + "int16_t a", + "const int n" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshluq_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshluq_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshluq_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshluq_n_s8", + "arguments": [ + "int8x16_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshlus_n_s32", + "arguments": [ + "int32_t a", + "const int n" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHLU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_high_n_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_high_n_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_high_n_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_high_n_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_high_n_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_high_n_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_n_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_n_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrn_n_u64", + "arguments": [ + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrnd_n_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrnd_n_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrnh_n_s16", + "arguments": [ + "int16_t a", + "const int n" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrnh_n_u16", + "arguments": [ + "uint16_t a", + "const int n" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrns_n_s32", + "arguments": [ + "int32_t a", + "const int n" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrns_n_u32", + "arguments": [ + "uint32_t a", + "const int n" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrun_high_n_s16", + "arguments": [ + "uint8x8_t r", + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRUN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrun_high_n_s32", + "arguments": [ + "uint16x4_t r", + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRUN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrun_high_n_s64", + "arguments": [ + "uint32x2_t r", + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRUN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrun_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrun_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrun_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrund_n_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshrunh_n_s16", + "arguments": [ + "int16_t a", + "const int n" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqshruns_n_s32", + "arguments": [ + "int32_t a", + "const int n" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSHRUN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsub_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsub_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsub_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsub_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsub_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsub_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsub_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsub_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubb_s8", + "arguments": [ + "int8_t a", + "int8_t b" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "b": { + "register": "Bm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubb_u8", + "arguments": [ + "uint8_t a", + "uint8_t b" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bn" + }, + "b": { + "register": "Bm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubd_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubd_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubh_s16", + "arguments": [ + "int16_t a", + "int16_t b" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubh_u16", + "arguments": [ + "uint16_t a", + "uint16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubs_s32", + "arguments": [ + "int32_t a", + "int32_t b" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqsubs_u32", + "arguments": [ + "uint32_t a", + "uint32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UQSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl1_p8", + "arguments": [ + "poly8x16_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl1_s8", + "arguments": [ + "int8x16_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl1_u8", + "arguments": [ + "uint8x16_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl1q_p8", + "arguments": [ + "poly8x16_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl1q_s8", + "arguments": [ + "int8x16_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl1q_u8", + "arguments": [ + "uint8x16_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl2_p8", + "arguments": [ + "poly8x16x2_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl2_s8", + "arguments": [ + "int8x16x2_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl2_u8", + "arguments": [ + "uint8x16x2_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl2q_p8", + "arguments": [ + "poly8x16x2_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl2q_s8", + "arguments": [ + "int8x16x2_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl2q_u8", + "arguments": [ + "uint8x16x2_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl3_p8", + "arguments": [ + "poly8x16x3_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl3_s8", + "arguments": [ + "int8x16x3_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl3_u8", + "arguments": [ + "uint8x16x3_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl3q_p8", + "arguments": [ + "poly8x16x3_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl3q_s8", + "arguments": [ + "int8x16x3_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl3q_u8", + "arguments": [ + "uint8x16x3_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl4_p8", + "arguments": [ + "poly8x16x4_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl4_s8", + "arguments": [ + "int8x16x4_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl4_u8", + "arguments": [ + "uint8x16x4_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl4q_p8", + "arguments": [ + "poly8x16x4_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl4q_s8", + "arguments": [ + "int8x16x4_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbl4q_u8", + "arguments": [ + "uint8x16x4_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx1_p8", + "arguments": [ + "poly8x8_t a", + "poly8x16_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx1_s8", + "arguments": [ + "int8x8_t a", + "int8x16_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx1_u8", + "arguments": [ + "uint8x8_t a", + "uint8x16_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx1q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx1q_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx1q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx2_p8", + "arguments": [ + "poly8x8_t a", + "poly8x16x2_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx2_s8", + "arguments": [ + "int8x8_t a", + "int8x16x2_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx2_u8", + "arguments": [ + "uint8x8_t a", + "uint8x16x2_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx2q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16x2_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx2q_s8", + "arguments": [ + "int8x16_t a", + "int8x16x2_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx2q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16x2_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx3_p8", + "arguments": [ + "poly8x8_t a", + "poly8x16x3_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx3_s8", + "arguments": [ + "int8x8_t a", + "int8x16x3_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx3_u8", + "arguments": [ + "uint8x8_t a", + "uint8x16x3_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx3q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16x3_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx3q_s8", + "arguments": [ + "int8x16_t a", + "int8x16x3_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx3q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16x3_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx4_p8", + "arguments": [ + "poly8x8_t a", + "poly8x16x4_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx4_s8", + "arguments": [ + "int8x8_t a", + "int8x16x4_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx4_u8", + "arguments": [ + "uint8x8_t a", + "uint8x16x4_t t", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "idx": { + "register": "Vm.8B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx4q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16x4_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx4q_s8", + "arguments": [ + "int8x16_t a", + "int8x16x4_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vqtbx4q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16x4_t t", + "uint8x16_t idx" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "idx": { + "register": "Vm.16B" + }, + "t": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_high_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_high_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_high_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_high_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_high_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_high_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RADDHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vraddhn_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RADDHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrax1q_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RAX1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrbit_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RBIT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrbit_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RBIT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrbit_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RBIT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrbitq_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RBIT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrbitq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RBIT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrbitq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RBIT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpe_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpe_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpe_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpe_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecped_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpeh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpeq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpeq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpeq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpeq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpes_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecps_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecps_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecps_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpsd_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpsh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpsq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpsq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpsq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpss_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpxd_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpxh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpxs_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p16_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_p64", + "arguments": [ + "poly64x2_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_s64", + "arguments": [ + "int64x2_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev16_p8", + "arguments": [ + "poly8x8_t vec" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV16" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev16_s8", + "arguments": [ + "int8x8_t vec" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV16" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev16_u8", + "arguments": [ + "uint8x8_t vec" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV16" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev16q_p8", + "arguments": [ + "poly8x16_t vec" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV16" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev16q_s8", + "arguments": [ + "int8x16_t vec" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV16" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev16q_u8", + "arguments": [ + "uint8x16_t vec" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV16" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32_p16", + "arguments": [ + "poly16x4_t vec" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32_p8", + "arguments": [ + "poly8x8_t vec" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32_s16", + "arguments": [ + "int16x4_t vec" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32_s8", + "arguments": [ + "int8x8_t vec" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32_u16", + "arguments": [ + "uint16x4_t vec" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32_u8", + "arguments": [ + "uint8x8_t vec" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32q_p16", + "arguments": [ + "poly16x8_t vec" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32q_p8", + "arguments": [ + "poly8x16_t vec" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32q_s16", + "arguments": [ + "int16x8_t vec" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32q_s8", + "arguments": [ + "int8x16_t vec" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32q_u16", + "arguments": [ + "uint16x8_t vec" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev32q_u8", + "arguments": [ + "uint8x16_t vec" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV32" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_f16", + "arguments": [ + "float16x4_t vec" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_f32", + "arguments": [ + "float32x2_t vec" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_p16", + "arguments": [ + "poly16x4_t vec" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_p8", + "arguments": [ + "poly8x8_t vec" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_s16", + "arguments": [ + "int16x4_t vec" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_s32", + "arguments": [ + "int32x2_t vec" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_s8", + "arguments": [ + "int8x8_t vec" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_u16", + "arguments": [ + "uint16x4_t vec" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_u32", + "arguments": [ + "uint32x2_t vec" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_u8", + "arguments": [ + "uint8x8_t vec" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_f16", + "arguments": [ + "float16x8_t vec" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_f32", + "arguments": [ + "float32x4_t vec" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_p16", + "arguments": [ + "poly16x8_t vec" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_p8", + "arguments": [ + "poly8x16_t vec" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_s16", + "arguments": [ + "int16x8_t vec" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_s32", + "arguments": [ + "int32x4_t vec" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_s8", + "arguments": [ + "int8x16_t vec" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_u16", + "arguments": [ + "uint16x8_t vec" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_u32", + "arguments": [ + "uint32x4_t vec" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_u8", + "arguments": [ + "uint8x16_t vec" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhadd_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhadd_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhadd_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhadd_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhadd_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhadd_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhaddq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhaddq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhaddq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhaddq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhaddq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrhaddq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URHADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd32x_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT32X" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd32x_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT32X" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd32xq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT32X" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd32xq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT32X" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd32z_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT32Z" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd32z_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT32Z" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd32zq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT32Z" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd32zq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT32Z" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd64x_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT64X" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd64x_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT64X" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd64xq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT64X" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd64xq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT64X" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd64z_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT64Z" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd64z_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT64Z" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd64zq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT64Z" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd64zq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINT64Z" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnd_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnda_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnda_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrnda_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndah_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndaq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndaq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndaq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndi_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndi_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndi_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndih_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndiq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndiq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndiq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndm_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndm_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndm_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndmh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndmq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndmq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndmq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndn_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndn_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndn_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndnh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndnq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndnq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndnq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndns_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndp_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndp_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndp_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndph_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndpq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndpq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndpq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndx_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndx_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndx_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndxh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndxq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndxq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndxq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshl_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshl_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshl_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshl_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshl_u16", + "arguments": [ + "uint16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshl_u32", + "arguments": [ + "uint32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshl_u64", + "arguments": [ + "uint64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshl_u8", + "arguments": [ + "uint8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshld_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshld_u64", + "arguments": [ + "uint64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "URSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshlq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshlq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshlq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshlq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshlq_u16", + "arguments": [ + "uint16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshlq_u32", + "arguments": [ + "uint32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshlq_u64", + "arguments": [ + "uint64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshlq_u8", + "arguments": [ + "uint8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshr_n_s16", + "arguments": [ + "int16x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshr_n_s32", + "arguments": [ + "int32x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshr_n_s64", + "arguments": [ + "int64x1_t a", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshr_n_s8", + "arguments": [ + "int8x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshr_n_u16", + "arguments": [ + "uint16x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshr_n_u32", + "arguments": [ + "uint32x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshr_n_u64", + "arguments": [ + "uint64x1_t a", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshr_n_u8", + "arguments": [ + "uint8x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrd_n_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SRSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrd_n_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "URSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_high_n_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_high_n_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_high_n_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_high_n_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_high_n_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_high_n_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "32(Vd)" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_n_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_n_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrn_n_u64", + "arguments": [ + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrq_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrq_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrq_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrq_n_s8", + "arguments": [ + "int8x16_t a", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrq_n_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrq_n_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrq_n_u64", + "arguments": [ + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrshrq_n_u8", + "arguments": [ + "uint8x16_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrte_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrte_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrte_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrte_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrted_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrteh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrteq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrteq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrteq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrteq_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrtes_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrts_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrts_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrts_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrtsd_f64", + "arguments": [ + "float64_t a", + "float64_t b" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrtsh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrtsq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrtsq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrtsq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrtss_f32", + "arguments": [ + "float32_t a", + "float32_t b" + ], + "return_type": { + "value": "float32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + }, + "b": { + "register": "Sm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsra_n_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsra_n_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsra_n_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsra_n_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsra_n_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsra_n_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsra_n_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsra_n_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsrad_n_s64", + "arguments": [ + "int64_t a", + "int64_t b", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SRSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsrad_n_u64", + "arguments": [ + "uint64_t a", + "uint64_t b", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "URSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsraq_n_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsraq_n_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsraq_n_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsraq_n_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsraq_n_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsraq_n_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsraq_n_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsraq_n_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "URSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_high_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_high_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_high_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_high_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_high_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_high_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "RSUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsubhn_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "RSUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_f16", + "arguments": [ + "float16_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "VnH" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_f32", + "arguments": [ + "float32_t a", + "float32x2_t v", + "const int lane" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_f64", + "arguments": [ + "float64_t a", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_p16", + "arguments": [ + "poly16_t a", + "poly16x4_t v", + "const int lane" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_p64", + "arguments": [ + "poly64_t a", + "poly64x1_t v", + "const int lane" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_p8", + "arguments": [ + "poly8_t a", + "poly8x8_t v", + "const int lane" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_s16", + "arguments": [ + "int16_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_s32", + "arguments": [ + "int32_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_s64", + "arguments": [ + "int64_t a", + "int64x1_t v", + "const int lane" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_s8", + "arguments": [ + "int8_t a", + "int8x8_t v", + "const int lane" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_u16", + "arguments": [ + "uint16_t a", + "uint16x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_u32", + "arguments": [ + "uint32_t a", + "uint32x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_u64", + "arguments": [ + "uint64_t a", + "uint64x1_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_u8", + "arguments": [ + "uint8_t a", + "uint8x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_f16", + "arguments": [ + "float16_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "VnH" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_f32", + "arguments": [ + "float32_t a", + "float32x4_t v", + "const int lane" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_f64", + "arguments": [ + "float64_t a", + "float64x2_t v", + "const int lane" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_p16", + "arguments": [ + "poly16_t a", + "poly16x8_t v", + "const int lane" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_p64", + "arguments": [ + "poly64_t a", + "poly64x2_t v", + "const int lane" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_p8", + "arguments": [ + "poly8_t a", + "poly8x16_t v", + "const int lane" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 15 + }, + "v": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_s16", + "arguments": [ + "int16_t a", + "int16x8_t v", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_s32", + "arguments": [ + "int32_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_s64", + "arguments": [ + "int64_t a", + "int64x2_t v", + "const int lane" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_s8", + "arguments": [ + "int8_t a", + "int8x16_t v", + "const int lane" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 15 + }, + "v": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_u16", + "arguments": [ + "uint16_t a", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_u32", + "arguments": [ + "uint32_t a", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_u64", + "arguments": [ + "uint64_t a", + "uint64x2_t v", + "const int lane" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsetq_lane_u8", + "arguments": [ + "uint8_t a", + "uint8x16_t v", + "const int lane" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 15 + }, + "v": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha1cq_u32", + "arguments": [ + "uint32x4_t hash_abcd", + "uint32_t hash_e", + "uint32x4_t wk" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "hash_abcd": { + "register": "Qd" + }, + "hash_e": { + "register": "Sn" + }, + "wk": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA1C" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha1h_u32", + "arguments": [ + "uint32_t hash_e" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "hash_e": { + "register": "Sn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA1H" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha1mq_u32", + "arguments": [ + "uint32x4_t hash_abcd", + "uint32_t hash_e", + "uint32x4_t wk" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "hash_abcd": { + "register": "Qd" + }, + "hash_e": { + "register": "Sn" + }, + "wk": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA1M" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha1pq_u32", + "arguments": [ + "uint32x4_t hash_abcd", + "uint32_t hash_e", + "uint32x4_t wk" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "hash_abcd": { + "register": "Qd" + }, + "hash_e": { + "register": "Sn" + }, + "wk": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA1P" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha1su0q_u32", + "arguments": [ + "uint32x4_t w0_3", + "uint32x4_t w4_7", + "uint32x4_t w8_11" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "w0_3": { + "register": "Vd.4S" + }, + "w4_7": { + "register": "Vn.4S" + }, + "w8_11": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA1SU0" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha1su1q_u32", + "arguments": [ + "uint32x4_t tw0_3", + "uint32x4_t w12_15" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "tw0_3": { + "register": "Vd.4S" + }, + "w12_15": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA1SU1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha256h2q_u32", + "arguments": [ + "uint32x4_t hash_efgh", + "uint32x4_t hash_abcd", + "uint32x4_t wk" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "hash_abcd": { + "register": "Qn" + }, + "hash_efgh": { + "register": "Qd" + }, + "wk": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA256H2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha256hq_u32", + "arguments": [ + "uint32x4_t hash_abcd", + "uint32x4_t hash_efgh", + "uint32x4_t wk" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "hash_abcd": { + "register": "Qd" + }, + "hash_efgh": { + "register": "Qn" + }, + "wk": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA256H" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha256su0q_u32", + "arguments": [ + "uint32x4_t w0_3", + "uint32x4_t w4_7" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "w0_3": { + "register": "Vd.4S" + }, + "w4_7": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA256SU0" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha256su1q_u32", + "arguments": [ + "uint32x4_t tw0_3", + "uint32x4_t w8_11", + "uint32x4_t w12_15" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "tw0_3": { + "register": "Vd.4S" + }, + "w12_15": { + "register": "Vm.4S" + }, + "w8_11": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SHA256SU1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha512h2q_u64", + "arguments": [ + "uint64x2_t sum_ab", + "uint64x2_t hash_c_", + "uint64x2_t hash_ab" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "hash_ab": {}, + "hash_c_": { + "register": "Qn" + }, + "sum_ab": { + "register": "Qd" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHA512H2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha512hq_u64", + "arguments": [ + "uint64x2_t hash_ed", + "uint64x2_t hash_gf", + "uint64x2_t kwh_kwh2" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "hash_ed": { + "register": "Qd" + }, + "hash_gf": { + "register": "Qn" + }, + "kwh_kwh2": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHA512H" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha512su0q_u64", + "arguments": [ + "uint64x2_t w0_1", + "uint64x2_t w2_" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "w0_1": { + "register": "Vd.2D" + }, + "w2_": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHA512SU0" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsha512su1q_u64", + "arguments": [ + "uint64x2_t s01_s02", + "uint64x2_t w14_15", + "uint64x2_t w9_10" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "s01_s02": { + "register": "Vd.2D" + }, + "w14_15": { + "register": "Vn.2D" + }, + "w9_10": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHA512SU1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_n_s16", + "arguments": [ + "int16x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_n_s32", + "arguments": [ + "int32x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_n_s64", + "arguments": [ + "int64x1_t a", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_n_s8", + "arguments": [ + "int8x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_n_u16", + "arguments": [ + "uint16x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_n_u32", + "arguments": [ + "uint32x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_n_u64", + "arguments": [ + "uint64x1_t a", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_n_u8", + "arguments": [ + "uint8x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_u16", + "arguments": [ + "uint16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_u32", + "arguments": [ + "uint32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_u64", + "arguments": [ + "uint64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshl_u8", + "arguments": [ + "uint8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshld_n_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshld_n_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshld_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshld_u64", + "arguments": [ + "uint64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_high_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_high_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 0, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_high_n_s8", + "arguments": [ + "int8x16_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 8 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_high_n_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_high_n_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 0, + "maximum": 32 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_high_n_u8", + "arguments": [ + "uint8x16_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 8 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USHLL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_n_s16", + "arguments": [ + "int16x4_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_n_s32", + "arguments": [ + "int32x2_t a", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 0, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_n_s8", + "arguments": [ + "int8x8_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_n_u16", + "arguments": [ + "uint16x4_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_n_u32", + "arguments": [ + "uint32x2_t a", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 0, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshll_n_u8", + "arguments": [ + "uint8x8_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHLL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_n_s8", + "arguments": [ + "int8x16_t a", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_n_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_n_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_n_u64", + "arguments": [ + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_n_u8", + "arguments": [ + "uint8x16_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_u16", + "arguments": [ + "uint16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_u32", + "arguments": [ + "uint32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_u64", + "arguments": [ + "uint64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshlq_u8", + "arguments": [ + "uint8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshr_n_s16", + "arguments": [ + "int16x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshr_n_s32", + "arguments": [ + "int32x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshr_n_s64", + "arguments": [ + "int64x1_t a", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshr_n_s8", + "arguments": [ + "int8x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshr_n_u16", + "arguments": [ + "uint16x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshr_n_u32", + "arguments": [ + "uint32x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshr_n_u64", + "arguments": [ + "uint64x1_t a", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshr_n_u8", + "arguments": [ + "uint8x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrd_n_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrd_n_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_high_n_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_high_n_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_high_n_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_high_n_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_high_n_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_high_n_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SHRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_n_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_n_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrn_n_u64", + "arguments": [ + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SHRN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrq_n_s16", + "arguments": [ + "int16x8_t a", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrq_n_s32", + "arguments": [ + "int32x4_t a", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrq_n_s64", + "arguments": [ + "int64x2_t a", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrq_n_s8", + "arguments": [ + "int8x16_t a", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrq_n_u16", + "arguments": [ + "uint16x8_t a", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrq_n_u32", + "arguments": [ + "uint32x4_t a", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrq_n_u64", + "arguments": [ + "uint64x2_t a", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vshrq_n_u8", + "arguments": [ + "uint8x16_t a", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USHR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b", + "const int n" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_p64", + "arguments": [ + "poly64x1_t a", + "poly64x1_t b", + "const int n" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b", + "const int n" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsli_n_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vslid_n_s64", + "arguments": [ + "int64_t a", + "int64_t b", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vslid_n_u64", + "arguments": [ + "uint64_t a", + "uint64_t b", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b", + "const int n" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b", + "const int n" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b", + "const int n" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "minimum": 0, + "maximum": 31 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsliq_n_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SLI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsm3partw1q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SM3PARTW1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsm3partw2q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SM3PARTW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsm3ss1q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": {}, + "c": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SM3SS1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsm3tt1aq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c", + "const int imm2" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": {}, + "c": {}, + "imm2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SM3TT1A" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsm3tt1bq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c", + "const int imm2" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": {}, + "c": {}, + "imm2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SM3TT1B" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsm3tt2aq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c", + "const int imm2" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": {}, + "c": {}, + "imm2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SM3TT2A" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsm3tt2bq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c", + "const int imm2" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": {}, + "c": {}, + "imm2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SM3TT2B" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsm4ekeyq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SM4EKEY" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsm4eq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": {} + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SM4E" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqadd_u16", + "arguments": [ + "uint16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqadd_u32", + "arguments": [ + "uint32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqadd_u64", + "arguments": [ + "uint64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqadd_u8", + "arguments": [ + "uint8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqaddb_u8", + "arguments": [ + "uint8_t a", + "int8_t b" + ], + "return_type": { + "value": "uint8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bd" + }, + "b": { + "register": "Bn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqaddd_u64", + "arguments": [ + "uint64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqaddh_u16", + "arguments": [ + "uint16_t a", + "int16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqaddq_u16", + "arguments": [ + "uint16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqaddq_u32", + "arguments": [ + "uint32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqaddq_u64", + "arguments": [ + "uint64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqaddq_u8", + "arguments": [ + "uint8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqadds_u32", + "arguments": [ + "uint32_t a", + "int32_t b" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqrt_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqrt_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqrt_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqrth_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqrtq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqrtq_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqrtq_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsra_n_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsra_n_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsra_n_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsra_n_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsra_n_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsra_n_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsra_n_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsra_n_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsrad_n_s64", + "arguments": [ + "int64_t a", + "int64_t b", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsrad_n_u64", + "arguments": [ + "uint64_t a", + "uint64_t b", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsraq_n_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsraq_n_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsraq_n_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsraq_n_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsraq_n_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsraq_n_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsraq_n_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsraq_n_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USRA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b", + "const int n" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_p64", + "arguments": [ + "poly64x1_t a", + "poly64x1_t b", + "const int n" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b", + "const int n" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b", + "const int n" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsri_n_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "const int n" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsrid_n_s64", + "arguments": [ + "int64_t a", + "int64_t b", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsrid_n_u64", + "arguments": [ + "uint64_t a", + "uint64_t b", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b", + "const int n" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b", + "const int n" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b", + "const int n" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b", + "const int n" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b", + "const int n" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b", + "const int n" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b", + "const int n" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "const int n" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "const int n" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b", + "const int n" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsriq_n_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "const int n" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "n": { + "minimum": 1, + "maximum": 8 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SRI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f16", + "arguments": [ + "float16_t * ptr", + "float16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f16_x2", + "arguments": [ + "float16_t * ptr", + "float16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f16_x3", + "arguments": [ + "float16_t * ptr", + "float16x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f16_x4", + "arguments": [ + "float16_t * ptr", + "float16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f32", + "arguments": [ + "float32_t * ptr", + "float32x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f32_x2", + "arguments": [ + "float32_t * ptr", + "float32x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f32_x3", + "arguments": [ + "float32_t * ptr", + "float32x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f32_x4", + "arguments": [ + "float32_t * ptr", + "float32x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f64", + "arguments": [ + "float64_t * ptr", + "float64x1_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f64_x2", + "arguments": [ + "float64_t * ptr", + "float64x1x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f64_x3", + "arguments": [ + "float64_t * ptr", + "float64x1x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f64_x4", + "arguments": [ + "float64_t * ptr", + "float64x1x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_f32", + "arguments": [ + "float32_t * ptr", + "float32x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_f64", + "arguments": [ + "float64_t * ptr", + "float64x1_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x1_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x8_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_s16", + "arguments": [ + "int16_t * ptr", + "int16x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_s32", + "arguments": [ + "int32_t * ptr", + "int32x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_s64", + "arguments": [ + "int64_t * ptr", + "int64x1_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_s8", + "arguments": [ + "int8_t * ptr", + "int8x8_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x1_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x8_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p16_x2", + "arguments": [ + "poly16_t * ptr", + "poly16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p16_x3", + "arguments": [ + "poly16_t * ptr", + "poly16x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p16_x4", + "arguments": [ + "poly16_t * ptr", + "poly16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x1_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p64_x2", + "arguments": [ + "poly64_t * ptr", + "poly64x1x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p64_x3", + "arguments": [ + "poly64_t * ptr", + "poly64x1x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p64_x4", + "arguments": [ + "poly64_t * ptr", + "poly64x1x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x8_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p8_x2", + "arguments": [ + "poly8_t * ptr", + "poly8x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p8_x3", + "arguments": [ + "poly8_t * ptr", + "poly8x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_p8_x4", + "arguments": [ + "poly8_t * ptr", + "poly8x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s16", + "arguments": [ + "int16_t * ptr", + "int16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s16_x2", + "arguments": [ + "int16_t * ptr", + "int16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s16_x3", + "arguments": [ + "int16_t * ptr", + "int16x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s16_x4", + "arguments": [ + "int16_t * ptr", + "int16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s32", + "arguments": [ + "int32_t * ptr", + "int32x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s32_x2", + "arguments": [ + "int32_t * ptr", + "int32x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s32_x3", + "arguments": [ + "int32_t * ptr", + "int32x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s32_x4", + "arguments": [ + "int32_t * ptr", + "int32x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s64", + "arguments": [ + "int64_t * ptr", + "int64x1_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s64_x2", + "arguments": [ + "int64_t * ptr", + "int64x1x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s64_x3", + "arguments": [ + "int64_t * ptr", + "int64x1x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s64_x4", + "arguments": [ + "int64_t * ptr", + "int64x1x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s8", + "arguments": [ + "int8_t * ptr", + "int8x8_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s8_x2", + "arguments": [ + "int8_t * ptr", + "int8x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s8_x3", + "arguments": [ + "int8_t * ptr", + "int8x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_s8_x4", + "arguments": [ + "int8_t * ptr", + "int8x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u16_x2", + "arguments": [ + "uint16_t * ptr", + "uint16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u16_x3", + "arguments": [ + "uint16_t * ptr", + "uint16x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u16_x4", + "arguments": [ + "uint16_t * ptr", + "uint16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u32_x2", + "arguments": [ + "uint32_t * ptr", + "uint32x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u32_x3", + "arguments": [ + "uint32_t * ptr", + "uint32x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u32_x4", + "arguments": [ + "uint32_t * ptr", + "uint32x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x1_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u64_x2", + "arguments": [ + "uint64_t * ptr", + "uint64x1x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u64_x3", + "arguments": [ + "uint64_t * ptr", + "uint64x1x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u64_x4", + "arguments": [ + "uint64_t * ptr", + "uint64x1x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x8_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u8_x2", + "arguments": [ + "uint8_t * ptr", + "uint8x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u8_x3", + "arguments": [ + "uint8_t * ptr", + "uint8x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_u8_x4", + "arguments": [ + "uint8_t * ptr", + "uint8x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f16", + "arguments": [ + "float16_t * ptr", + "float16x8_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f16_x2", + "arguments": [ + "float16_t * ptr", + "float16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f16_x3", + "arguments": [ + "float16_t * ptr", + "float16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f16_x4", + "arguments": [ + "float16_t * ptr", + "float16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f32", + "arguments": [ + "float32_t * ptr", + "float32x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f32_x2", + "arguments": [ + "float32_t * ptr", + "float32x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f32_x3", + "arguments": [ + "float32_t * ptr", + "float32x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f32_x4", + "arguments": [ + "float32_t * ptr", + "float32x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f64", + "arguments": [ + "float64_t * ptr", + "float64x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f64_x2", + "arguments": [ + "float64_t * ptr", + "float64x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f64_x3", + "arguments": [ + "float64_t * ptr", + "float64x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f64_x4", + "arguments": [ + "float64_t * ptr", + "float64x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x8_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_f32", + "arguments": [ + "float32_t * ptr", + "float32x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_f64", + "arguments": [ + "float64_t * ptr", + "float64x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x8_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x16_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_s16", + "arguments": [ + "int16_t * ptr", + "int16x8_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_s32", + "arguments": [ + "int32_t * ptr", + "int32x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_s64", + "arguments": [ + "int64_t * ptr", + "int64x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_s8", + "arguments": [ + "int8_t * ptr", + "int8x16_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x8_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x16_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x8_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p16_x2", + "arguments": [ + "poly16_t * ptr", + "poly16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p16_x3", + "arguments": [ + "poly16_t * ptr", + "poly16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p16_x4", + "arguments": [ + "poly16_t * ptr", + "poly16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p64_x2", + "arguments": [ + "poly64_t * ptr", + "poly64x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p64_x3", + "arguments": [ + "poly64_t * ptr", + "poly64x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p64_x4", + "arguments": [ + "poly64_t * ptr", + "poly64x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x16_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p8_x2", + "arguments": [ + "poly8_t * ptr", + "poly8x16x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p8_x3", + "arguments": [ + "poly8_t * ptr", + "poly8x16x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_p8_x4", + "arguments": [ + "poly8_t * ptr", + "poly8x16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s16", + "arguments": [ + "int16_t * ptr", + "int16x8_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s16_x2", + "arguments": [ + "int16_t * ptr", + "int16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s16_x3", + "arguments": [ + "int16_t * ptr", + "int16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s16_x4", + "arguments": [ + "int16_t * ptr", + "int16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s32", + "arguments": [ + "int32_t * ptr", + "int32x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s32_x2", + "arguments": [ + "int32_t * ptr", + "int32x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s32_x3", + "arguments": [ + "int32_t * ptr", + "int32x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s32_x4", + "arguments": [ + "int32_t * ptr", + "int32x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s64", + "arguments": [ + "int64_t * ptr", + "int64x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s64_x2", + "arguments": [ + "int64_t * ptr", + "int64x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s64_x3", + "arguments": [ + "int64_t * ptr", + "int64x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s64_x4", + "arguments": [ + "int64_t * ptr", + "int64x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s8", + "arguments": [ + "int8_t * ptr", + "int8x16_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s8_x2", + "arguments": [ + "int8_t * ptr", + "int8x16x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s8_x3", + "arguments": [ + "int8_t * ptr", + "int8x16x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_s8_x4", + "arguments": [ + "int8_t * ptr", + "int8x16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x8_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u16_x2", + "arguments": [ + "uint16_t * ptr", + "uint16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u16_x3", + "arguments": [ + "uint16_t * ptr", + "uint16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u16_x4", + "arguments": [ + "uint16_t * ptr", + "uint16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u32_x2", + "arguments": [ + "uint32_t * ptr", + "uint32x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u32_x3", + "arguments": [ + "uint32_t * ptr", + "uint32x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u32_x4", + "arguments": [ + "uint32_t * ptr", + "uint32x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u64_x2", + "arguments": [ + "uint64_t * ptr", + "uint64x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u64_x3", + "arguments": [ + "uint64_t * ptr", + "uint64x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u64_x4", + "arguments": [ + "uint64_t * ptr", + "uint64x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x16_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u8_x2", + "arguments": [ + "uint8_t * ptr", + "uint8x16x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u8_x3", + "arguments": [ + "uint8_t * ptr", + "uint8x16x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_u8_x4", + "arguments": [ + "uint8_t * ptr", + "uint8x16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_f32", + "arguments": [ + "float32_t * ptr", + "float32x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_f64", + "arguments": [ + "float64_t * ptr", + "float64x1x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_f32", + "arguments": [ + "float32_t * ptr", + "float32x2x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_f64", + "arguments": [ + "float64_t * ptr", + "float64x1x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x4x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x1x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x8x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_s16", + "arguments": [ + "int16_t * ptr", + "int16x4x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_s32", + "arguments": [ + "int32_t * ptr", + "int32x2x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_s64", + "arguments": [ + "int64_t * ptr", + "int64x1x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_s8", + "arguments": [ + "int8_t * ptr", + "int8x8x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x4x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x2x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x1x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x8x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x1x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_s16", + "arguments": [ + "int16_t * ptr", + "int16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_s32", + "arguments": [ + "int32_t * ptr", + "int32x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_s64", + "arguments": [ + "int64_t * ptr", + "int64x1x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_s8", + "arguments": [ + "int8_t * ptr", + "int8x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x1x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_f32", + "arguments": [ + "float32_t * ptr", + "float32x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_f64", + "arguments": [ + "float64_t * ptr", + "float64x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_f32", + "arguments": [ + "float32_t * ptr", + "float32x4x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_f64", + "arguments": [ + "float64_t * ptr", + "float64x2x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 2 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x8x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x2x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x16x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_s16", + "arguments": [ + "int16_t * ptr", + "int16x8x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_s32", + "arguments": [ + "int32_t * ptr", + "int32x4x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_s64", + "arguments": [ + "int64_t * ptr", + "int64x2x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_s8", + "arguments": [ + "int8_t * ptr", + "int8x16x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x8x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x4x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x2x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x16x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x16x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_s16", + "arguments": [ + "int16_t * ptr", + "int16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_s32", + "arguments": [ + "int32_t * ptr", + "int32x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_s64", + "arguments": [ + "int64_t * ptr", + "int64x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_s8", + "arguments": [ + "int8_t * ptr", + "int8x16x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x16x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_f32", + "arguments": [ + "float32_t * ptr", + "float32x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_f64", + "arguments": [ + "float64_t * ptr", + "float64x1x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_f32", + "arguments": [ + "float32_t * ptr", + "float32x2x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_f64", + "arguments": [ + "float64_t * ptr", + "float64x1x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x4x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x1x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x8x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_s16", + "arguments": [ + "int16_t * ptr", + "int16x4x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_s32", + "arguments": [ + "int32_t * ptr", + "int32x2x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_s64", + "arguments": [ + "int64_t * ptr", + "int64x1x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_s8", + "arguments": [ + "int8_t * ptr", + "int8x8x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x4x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x2x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x1x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_lane_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x8x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x1x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_s16", + "arguments": [ + "int16_t * ptr", + "int16x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_s32", + "arguments": [ + "int32_t * ptr", + "int32x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_s64", + "arguments": [ + "int64_t * ptr", + "int64x1x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_s8", + "arguments": [ + "int8_t * ptr", + "int8x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x1x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_f32", + "arguments": [ + "float32_t * ptr", + "float32x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_f64", + "arguments": [ + "float64_t * ptr", + "float64x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_f32", + "arguments": [ + "float32_t * ptr", + "float32x4x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_f64", + "arguments": [ + "float64_t * ptr", + "float64x2x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x8x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x2x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x16x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_s16", + "arguments": [ + "int16_t * ptr", + "int16x8x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_s32", + "arguments": [ + "int32_t * ptr", + "int32x4x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_s64", + "arguments": [ + "int64_t * ptr", + "int64x2x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_s8", + "arguments": [ + "int8_t * ptr", + "int8x16x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x8x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x4x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x2x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x16x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x16x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_s16", + "arguments": [ + "int16_t * ptr", + "int16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_s32", + "arguments": [ + "int32_t * ptr", + "int32x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_s64", + "arguments": [ + "int64_t * ptr", + "int64x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_s8", + "arguments": [ + "int8_t * ptr", + "int8x16x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x2x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x16x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_f32", + "arguments": [ + "float32_t * ptr", + "float32x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_f64", + "arguments": [ + "float64_t * ptr", + "float64x1x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_f32", + "arguments": [ + "float32_t * ptr", + "float32x2x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_f64", + "arguments": [ + "float64_t * ptr", + "float64x1x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x4x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x1x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x8x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_s16", + "arguments": [ + "int16_t * ptr", + "int16x4x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_s32", + "arguments": [ + "int32_t * ptr", + "int32x2x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_s64", + "arguments": [ + "int64_t * ptr", + "int64x1x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_s8", + "arguments": [ + "int8_t * ptr", + "int8x8x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x4x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x2x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x1x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 0 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x8x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x1x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_s16", + "arguments": [ + "int16_t * ptr", + "int16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_s32", + "arguments": [ + "int32_t * ptr", + "int32x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_s64", + "arguments": [ + "int64_t * ptr", + "int64x1x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_s8", + "arguments": [ + "int8_t * ptr", + "int8x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x1x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_f32", + "arguments": [ + "float32_t * ptr", + "float32x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_f64", + "arguments": [ + "float64_t * ptr", + "float64x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_f32", + "arguments": [ + "float32_t * ptr", + "float32x4x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_f64", + "arguments": [ + "float64_t * ptr", + "float64x2x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x8x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x2x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x16x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_s16", + "arguments": [ + "int16_t * ptr", + "int16x8x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_s32", + "arguments": [ + "int32_t * ptr", + "int32x4x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_s64", + "arguments": [ + "int64_t * ptr", + "int64x2x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_s8", + "arguments": [ + "int8_t * ptr", + "int8x16x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x8x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x4x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x2x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 1 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x16x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 15 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_p16", + "arguments": [ + "poly16_t * ptr", + "poly16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_p64", + "arguments": [ + "poly64_t * ptr", + "poly64x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_s16", + "arguments": [ + "int16_t * ptr", + "int16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_s32", + "arguments": [ + "int32_t * ptr", + "int32x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_s64", + "arguments": [ + "int64_t * ptr", + "int64x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_s8", + "arguments": [ + "int8_t * ptr", + "int8x16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_u16", + "arguments": [ + "uint16_t * ptr", + "uint16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_u32", + "arguments": [ + "uint32_t * ptr", + "uint32x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_u64", + "arguments": [ + "uint64_t * ptr", + "uint64x2x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_u8", + "arguments": [ + "uint8_t * ptr", + "uint8x16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vstrq_p128", + "arguments": [ + "poly128_t * ptr", + "poly128_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Qt" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "STR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_f64", + "arguments": [ + "float64x1_t a", + "float64x1_t b" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsub_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubd_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubd_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_high_s16", + "arguments": [ + "int8x8_t r", + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_high_s32", + "arguments": [ + "int16x4_t r", + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_high_s64", + "arguments": [ + "int32x2_t r", + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_high_u16", + "arguments": [ + "uint8x8_t r", + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + }, + "r": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_high_u32", + "arguments": [ + "uint16x4_t r", + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_high_u64", + "arguments": [ + "uint32x2_t r", + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUBHN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubhn_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUBHN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_high_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSUBL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_high_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSUBL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_high_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSUBL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_high_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USUBL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_high_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USUBL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_high_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USUBL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSUBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSUBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSUBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USUBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USUBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubl_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USUBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_high_s16", + "arguments": [ + "int32x4_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSUBW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_high_s32", + "arguments": [ + "int64x2_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSUBW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_high_s8", + "arguments": [ + "int16x8_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SSUBW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_high_u16", + "arguments": [ + "uint32x4_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USUBW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_high_u32", + "arguments": [ + "uint64x2_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USUBW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_high_u8", + "arguments": [ + "uint16x8_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USUBW2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_s16", + "arguments": [ + "int32x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSUBW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_s32", + "arguments": [ + "int64x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSUBW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_s8", + "arguments": [ + "int16x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SSUBW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_u16", + "arguments": [ + "uint32x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USUBW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_u32", + "arguments": [ + "uint64x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USUBW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsubw_u8", + "arguments": [ + "uint16x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "USUBW" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsudot_lane_s32", + "arguments": [ + "int32x2_t r", + "int8x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SUDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsudot_laneq_s32", + "arguments": [ + "int32x2_t r", + "int8x8_t a", + "uint8x16_t b", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsudotq_lane_s32", + "arguments": [ + "int32x4_t r", + "int8x16_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SUDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsudotq_laneq_s32", + "arguments": [ + "int32x4_t r", + "int8x16_t a", + "uint8x16_t b", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl1_p8", + "arguments": [ + "poly8x8_t a", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl1_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl1_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl2_p8", + "arguments": [ + "poly8x8x2_t a", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl2_s8", + "arguments": [ + "int8x8x2_t a", + "int8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl2_u8", + "arguments": [ + "uint8x8x2_t a", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl3_p8", + "arguments": [ + "poly8x8x3_t a", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl3_s8", + "arguments": [ + "int8x8x3_t a", + "int8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl3_u8", + "arguments": [ + "uint8x8x3_t a", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl4_p8", + "arguments": [ + "poly8x8x4_t a", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl4_s8", + "arguments": [ + "int8x8x4_t a", + "int8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbl4_u8", + "arguments": [ + "uint8x8x4_t a", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx1_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOVI", + "CMHS", + "TBL", + "BIF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx1_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b", + "int8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOVI", + "CMHS", + "TBL", + "BIF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx1_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOVI", + "CMHS", + "TBL", + "BIF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx2_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8x2_t b", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx2_s8", + "arguments": [ + "int8x8_t a", + "int8x8x2_t b", + "int8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx2_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8x2_t b", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx3_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8x3_t b", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOVI", + "CMHS", + "TBL", + "BIF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx3_s8", + "arguments": [ + "int8x8_t a", + "int8x8x3_t b", + "int8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOVI", + "CMHS", + "TBL", + "BIF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx3_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8x3_t b", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOVI", + "CMHS", + "TBL", + "BIF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx4_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8x4_t b", + "uint8x8_t idx" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx4_s8", + "arguments": [ + "int8x8_t a", + "int8x8x4_t b", + "int8x8_t idx" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtbx4_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8x4_t b", + "uint8x8_t idx" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": { + "register": "Vn.16B" + }, + "idx": {} + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TBX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_p64", + "arguments": [ + "poly64x1_t a", + "poly64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_s64", + "arguments": [ + "int64x1_t a", + "int64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_u64", + "arguments": [ + "uint64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtst_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstd_s64", + "arguments": [ + "int64_t a", + "int64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstd_u64", + "arguments": [ + "uint64_t a", + "uint64_t b" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "b": { + "register": "Dm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vtstq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CMTST" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqadd_s16", + "arguments": [ + "int16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqadd_s32", + "arguments": [ + "int32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqadd_s64", + "arguments": [ + "int64x1_t a", + "uint64x1_t b" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqadd_s8", + "arguments": [ + "int8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqaddb_s8", + "arguments": [ + "int8_t a", + "uint8_t b" + ], + "return_type": { + "value": "int8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Bd" + }, + "b": { + "register": "Bn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqaddd_s64", + "arguments": [ + "int64_t a", + "uint64_t b" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dd" + }, + "b": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqaddh_s16", + "arguments": [ + "int16_t a", + "uint16_t b" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqaddq_s16", + "arguments": [ + "int16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqaddq_s32", + "arguments": [ + "int32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqaddq_s64", + "arguments": [ + "int64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqaddq_s8", + "arguments": [ + "int8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuqadds_s32", + "arguments": [ + "int32_t a", + "uint32_t b" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sd" + }, + "b": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SUQADD" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vusdot_lane_s32", + "arguments": [ + "int32x2_t r", + "uint8x8_t a", + "int8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "USDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vusdot_laneq_s32", + "arguments": [ + "int32x2_t r", + "uint8x8_t a", + "int8x16_t b", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vusdot_s32", + "arguments": [ + "int32x2_t r", + "uint8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "USDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vusdotq_lane_s32", + "arguments": [ + "int32x4_t r", + "uint8x16_t a", + "int8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "USDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vusdotq_laneq_s32", + "arguments": [ + "int32x4_t r", + "uint8x16_t a", + "int8x16_t b", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vusdotq_s32", + "arguments": [ + "int32x4_t r", + "uint8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "USDOT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vusmmlaq_s32", + "arguments": [ + "int32x4_t r", + "uint8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "r": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "USMMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vxarq_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b", + "const int imm6" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": {}, + "imm6": { + "minimum": 0, + "maximum": 63 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "XAR" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_p64", + "arguments": [ + "poly64x2_t a", + "poly64x2_t b" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_s64", + "arguments": [ + "int64x2_t a", + "int64x2_t b" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_u64", + "arguments": [ + "uint64x2_t a", + "uint64x2_t b" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_p16", + "arguments": [ + "poly16x4_t a", + "poly16x4_t b" + ], + "return_type": { + "value": "poly16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_p8", + "arguments": [ + "poly8x8_t a", + "poly8x8_t b" + ], + "return_type": { + "value": "poly8x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_s16", + "arguments": [ + "int16x4_t a", + "int16x4_t b" + ], + "return_type": { + "value": "int16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_s32", + "arguments": [ + "int32x2_t a", + "int32x2_t b" + ], + "return_type": { + "value": "int32x2x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_s8", + "arguments": [ + "int8x8_t a", + "int8x8_t b" + ], + "return_type": { + "value": "int8x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_u16", + "arguments": [ + "uint16x4_t a", + "uint16x4_t b" + ], + "return_type": { + "value": "uint16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_u32", + "arguments": [ + "uint32x2_t a", + "uint32x2_t b" + ], + "return_type": { + "value": "uint32x2x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzip_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b" + ], + "return_type": { + "value": "uint8x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_p16", + "arguments": [ + "poly16x8_t a", + "poly16x8_t b" + ], + "return_type": { + "value": "poly16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_p8", + "arguments": [ + "poly8x16_t a", + "poly8x16_t b" + ], + "return_type": { + "value": "poly8x16x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_s16", + "arguments": [ + "int16x8_t a", + "int16x8_t b" + ], + "return_type": { + "value": "int16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t b" + ], + "return_type": { + "value": "int32x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_s8", + "arguments": [ + "int8x16_t a", + "int8x16_t b" + ], + "return_type": { + "value": "int8x16x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b" + ], + "return_type": { + "value": "uint16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b" + ], + "return_type": { + "value": "uint32x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b" + ], + "return_type": { + "value": "uint8x16x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vamin_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FAMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaminq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FAMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vaminq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FAMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vamax_f32", + "arguments": [ + "float32x2_t a", + "float32x2_t b" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FAMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vamaxq_f32", + "arguments": [ + "float32x4_t a", + "float32x4_t b" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FAMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vamaxq_f64", + "arguments": [ + "float64x2_t a", + "float64x2_t b" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FAMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_u8", + "arguments": [ + "uint8x16_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_s8", + "arguments": [ + "int8x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_s8", + "arguments": [ + "int8x16_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_p8", + "arguments": [ + "poly8x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_p8", + "arguments": [ + "poly8x16_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_u16", + "arguments": [ + "uint16x4_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_u16", + "arguments": [ + "uint16x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_s16", + "arguments": [ + "int16x4_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_s16", + "arguments": [ + "int16x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_p16", + "arguments": [ + "poly16x4_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_p16", + "arguments": [ + "poly16x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_u8", + "arguments": [ + "uint8x16_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_u8", + "arguments": [ + "uint8x16_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_s8", + "arguments": [ + "int8x16_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_s8", + "arguments": [ + "int8x16_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_p8", + "arguments": [ + "poly8x16_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_p8", + "arguments": [ + "poly8x16_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_u16_x2", + "arguments": [ + "uint16x8x2_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_u16_x2", + "arguments": [ + "uint16x8x2_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_s16_x2", + "arguments": [ + "int16x8x2_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_s16_x2", + "arguments": [ + "int16x8x2_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_f16_x2", + "arguments": [ + "float16x8x2_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_f16_x2", + "arguments": [ + "float16x8x2_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_p16_x2", + "arguments": [ + "poly16x8x2_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_p16_x2", + "arguments": [ + "poly16x8x2_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + } +] diff --git a/library/stdarch/rustfmt.toml b/library/stdarch/rustfmt.toml new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/library/stdarch/triagebot.toml b/library/stdarch/triagebot.toml new file mode 100644 index 000000000000..75eb642e9963 --- /dev/null +++ b/library/stdarch/triagebot.toml @@ -0,0 +1,62 @@ +[assign] + +[assign.owners] +"*" = ["@Amanieu"] + +[ping.windows] +message = """\ +Hey Windows Group! This issue could use some guidance on how it can be resolved +on Windows platforms. +Could one of you weigh in please? In case it's useful, here are some +[instructions] for tackling these sorts of bugs. +Thanks! + +[instructions]: https://rustc-dev-guide.rust-lang.org/notification-groups/windows.html +""" + +[ping.arm] +message = """\ +Hey Arm-interested people! This issue could use some guidance on how it can be +resolved on Arm platforms. +Could one of you weigh in please? In case it's useful, here are some +[instructions] for tackling these sorts of bugs. +Thanks! + +[instructions]: https://rustc-dev-guide.rust-lang.org/notification-groups/arm.html +""" + +[ping.risc-v] +message = """\ +Hey RISC-V Group! This issue could use some guidance on how it can be resolved +on RISC-V platforms. +Could one of you weigh in please? In case it's useful, here are some +[instructions] for tackling these sorts of bugs. +Thanks! + +[instructions]: https://rustc-dev-guide.rust-lang.org/notification-groups/risc-v.html +""" + +[ping.fuchsia] +message = """\ +Hey friends of Fuchsia! This issue could use some guidance on how this should be +resolved/implemented on Fuchsia. Could one of you weigh in please? +Thanks! +""" + +[ping.apple] +alias = ["macos", "ios", "tvos", "watchos", "visionos"] +message = """\ +Hey Apple Group! This issue or PR could use some Darwin-specific guidance. Could +one of you weigh in please? +Thanks! +""" + +# Canonicalize issue numbers to avoid closing the wrong issue +# when commits are included in subtrees, as well as warning links in commits. +# Documentation at: https://forge.rust-lang.org/triagebot/issue-links.html +[issue-links] +check-commits = false + +# Prevents mentions in commits to avoid users being spammed +# Documentation at: https://forge.rust-lang.org/triagebot/no-mentions.html +[no-mentions] diff --git a/library/stdarch/vendor.yml b/library/stdarch/vendor.yml new file mode 100644 index 000000000000..fd2bfecba733 --- /dev/null +++ b/library/stdarch/vendor.yml @@ -0,0 +1,3 @@ +- crates/stdarch-verify/x86-intel.xml +- crates/stdarch-verify/mips-msa.h +- intrinsics_data/arm_intrinsics.json diff --git a/library/sysroot/Cargo.toml b/library/sysroot/Cargo.toml index c149d513c32b..3adc02249716 100644 --- a/library/sysroot/Cargo.toml +++ b/library/sysroot/Cargo.toml @@ -26,7 +26,7 @@ debug_typeid = ["std/debug_typeid"] llvm-libunwind = ["std/llvm-libunwind"] system-llvm-libunwind = ["std/system-llvm-libunwind"] optimize_for_size = ["std/optimize_for_size"] -panic-unwind = ["std/panic_unwind"] +panic-unwind = ["std/panic-unwind"] panic_immediate_abort = ["std/panic_immediate_abort"] profiler = ["dep:profiler_builtins"] std_detect_file_io = ["std/std_detect_file_io"] diff --git a/src/bootstrap/build.rs b/src/bootstrap/build.rs index e0e32d313535..d9810e899a04 100644 --- a/src/bootstrap/build.rs +++ b/src/bootstrap/build.rs @@ -1,6 +1,7 @@ use std::env; fn main() { + // this is needed because `HOST` is only available to build scripts. let host = env::var("HOST").unwrap(); println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rustc-env=BUILD_TRIPLE={host}"); diff --git a/src/bootstrap/configure.py b/src/bootstrap/configure.py index 0d4d6e0ff54c..c077555b9069 100755 --- a/src/bootstrap/configure.py +++ b/src/bootstrap/configure.py @@ -6,6 +6,7 @@ from __future__ import absolute_import, division, print_function import shlex import sys import os +import re rust_dir = os.path.dirname(os.path.abspath(__file__)) rust_dir = os.path.dirname(rust_dir) @@ -585,16 +586,31 @@ def parse_example_config(known_args, config): section_order = [None] targets = {} top_level_keys = [] + comment_lines = [] with open(rust_dir + "/bootstrap.example.toml") as example_config: example_lines = example_config.read().split("\n") for line in example_lines: - if cur_section is None: - if line.count("=") == 1: - top_level_key = line.split("=")[0] - top_level_key = top_level_key.strip(" #") - top_level_keys.append(top_level_key) - if line.startswith("["): + if line.count("=") >= 1 and not line.startswith("# "): + key = line.split("=")[0] + key = key.strip(" #") + parts = key.split(".") + if len(parts) > 1: + cur_section = parts[0] + if cur_section not in sections: + sections[cur_section] = ["[" + cur_section + "]"] + section_order.append(cur_section) + elif cur_section is None: + top_level_keys.append(key) + # put the comment lines within the start of + # a new section, not outside it. + sections[cur_section] += comment_lines + comment_lines = [] + # remove just the `section.` part from the line, if present. + sections[cur_section].append( + re.sub("(#?)([a-zA-Z_-]+\\.)?(.*)", "\\1\\3", line) + ) + elif line.startswith("["): cur_section = line[1:-1] if cur_section.startswith("target"): cur_section = "target" @@ -605,8 +621,9 @@ def parse_example_config(known_args, config): sections[cur_section] = [line] section_order.append(cur_section) else: - sections[cur_section].append(line) + comment_lines.append(line) + sections[cur_section] += comment_lines # Fill out the `targets` array by giving all configured targets a copy of the # `target` section we just loaded from the example config configured_targets = [build(known_args)] diff --git a/src/bootstrap/defaults/bootstrap.library.toml b/src/bootstrap/defaults/bootstrap.library.toml index 895e50b9a20a..6a867093b781 100644 --- a/src/bootstrap/defaults/bootstrap.library.toml +++ b/src/bootstrap/defaults/bootstrap.library.toml @@ -1,7 +1,6 @@ # These defaults are meant for contributors to the standard library and documentation. [build] bench-stage = 1 -build-stage = 1 check-stage = 1 test-stage = 1 diff --git a/src/bootstrap/src/bin/rustc.rs b/src/bootstrap/src/bin/rustc.rs index 0671a8467e81..0364c664ba51 100644 --- a/src/bootstrap/src/bin/rustc.rs +++ b/src/bootstrap/src/bin/rustc.rs @@ -151,18 +151,6 @@ fn main() { cmd.arg("--sysroot").arg(&sysroot); } - // If we're compiling specifically the `panic_abort` crate then we pass - // the `-C panic=abort` option. Note that we do not do this for any - // other crate intentionally as this is the only crate for now that we - // ship with panic=abort. - // - // This... is a bit of a hack how we detect this. Ideally this - // information should be encoded in the crate I guess? Would likely - // require an RFC amendment to RFC 1513, however. - if crate_name == Some("panic_abort") { - cmd.arg("-C").arg("panic=abort"); - } - let crate_type = parse_value_from_args(&orig_args, "--crate-type"); // `-Ztls-model=initial-exec` must not be applied to proc-macros, see // issue https://github.com/rust-lang/rust/issues/100530 diff --git a/src/bootstrap/src/core/build_steps/check.rs b/src/bootstrap/src/core/build_steps/check.rs index fcd4f4078adb..567416d079b1 100644 --- a/src/bootstrap/src/core/build_steps/check.rs +++ b/src/bootstrap/src/core/build_steps/check.rs @@ -1,6 +1,5 @@ //! Implementation of compiling the compiler and standard library, in "check"-based modes. -use crate::core::build_steps::compile; use crate::core::build_steps::compile::{ add_to_sysroot, run_cargo, rustc_cargo, rustc_cargo_env, std_cargo, std_crates_for_run_make, }; @@ -67,8 +66,6 @@ impl Step for Std { return; } - builder.require_submodule("library/stdarch", None); - let stage = self.custom_stage.unwrap_or(builder.top_stage); let target = self.target; @@ -89,7 +86,7 @@ impl Step for Std { } // Reuse the stage0 libstd - builder.ensure(compile::Std::new(compiler, target)); + builder.std(compiler, target); return; } @@ -223,8 +220,8 @@ impl Step for Rustc { // the sysroot for the compiler to find. Otherwise, we're going to // fail when building crates that need to generate code (e.g., build // scripts and their dependencies). - builder.ensure(crate::core::build_steps::compile::Std::new(compiler, compiler.host)); - builder.ensure(crate::core::build_steps::compile::Std::new(compiler, target)); + builder.std(compiler, compiler.host); + builder.std(compiler, target); } else { builder.ensure(Std::new(target)); } diff --git a/src/bootstrap/src/core/build_steps/clippy.rs b/src/bootstrap/src/core/build_steps/clippy.rs index ebf0caccfbc9..1e44b5b67a44 100644 --- a/src/bootstrap/src/core/build_steps/clippy.rs +++ b/src/bootstrap/src/core/build_steps/clippy.rs @@ -1,8 +1,8 @@ //! Implementation of running clippy on the compiler, standard library and various tools. +use super::check; use super::compile::{run_cargo, rustc_cargo, std_cargo}; use super::tool::{SourceType, prepare_tool_cargo}; -use super::{check, compile}; use crate::builder::{Builder, ShouldRun}; use crate::core::build_steps::compile::std_crates_for_run_make; use crate::core::builder; @@ -141,8 +141,6 @@ impl Step for Std { } fn run(self, builder: &Builder<'_>) { - builder.require_submodule("library/stdarch", None); - let target = self.target; let compiler = builder.compiler(builder.top_stage, builder.config.host_target); @@ -214,8 +212,8 @@ impl Step for Rustc { // the sysroot for the compiler to find. Otherwise, we're going to // fail when building crates that need to generate code (e.g., build // scripts and their dependencies). - builder.ensure(compile::Std::new(compiler, compiler.host)); - builder.ensure(compile::Std::new(compiler, target)); + builder.std(compiler, compiler.host); + builder.std(compiler, target); } else { builder.ensure(check::Std::new(target)); } diff --git a/src/bootstrap/src/core/build_steps/compile.rs b/src/bootstrap/src/core/build_steps/compile.rs index f6efb23e8d86..8200e1541692 100644 --- a/src/bootstrap/src/core/build_steps/compile.rs +++ b/src/bootstrap/src/core/build_steps/compile.rs @@ -197,8 +197,6 @@ impl Step for Std { return; } - builder.require_submodule("library/stdarch", None); - let mut target_deps = builder.ensure(StartupObjects { compiler, target }); let compiler_to_use = builder.compiler_for(compiler.stage, compiler.host, target); @@ -213,7 +211,7 @@ impl Step for Std { { trace!(?compiler_to_use, ?compiler, "compiler != compiler_to_use, uplifting library"); - builder.ensure(Std::new(compiler_to_use, target)); + builder.std(compiler_to_use, target); let msg = if compiler_to_use.host == target { format!( "Uplifting library (stage{} -> stage{})", @@ -690,7 +688,7 @@ pub fn std_cargo(builder: &Builder<'_>, target: TargetSelection, stage: u32, car } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -struct StdLink { +pub struct StdLink { pub compiler: Compiler, pub target_compiler: Compiler, pub target: TargetSelection, @@ -701,7 +699,7 @@ struct StdLink { } impl StdLink { - fn from_std(std: Std, host_compiler: Compiler) -> Self { + pub fn from_std(std: Std, host_compiler: Compiler) -> Self { Self { compiler: host_compiler, target_compiler: std.compiler, @@ -1022,6 +1020,12 @@ impl Step for Rustc { } fn make_run(run: RunConfig<'_>) { + // If only `compiler` was passed, do not run this step. + // Instead the `Assemble` step will take care of compiling Rustc. + if run.builder.paths == vec![PathBuf::from("compiler")] { + return; + } + let crates = run.cargo_crates_in_set(); run.builder.ensure(Rustc { build_compiler: run @@ -1067,7 +1071,7 @@ impl Step for Rustc { // Build a standard library for `target` using the `build_compiler`. // This will be the standard library that the rustc which we build *links to*. - builder.ensure(Std::new(build_compiler, target)); + builder.std(build_compiler, target); if builder.config.keep_stage.contains(&build_compiler.stage) { trace!(stage = build_compiler.stage, "`keep-stage` requested"); @@ -1108,10 +1112,10 @@ impl Step for Rustc { // build scripts and proc macros. // If we are not cross-compiling, the Std build above will be the same one as the one we // prepare here. - builder.ensure(Std::new( + builder.std( builder.compiler(self.build_compiler.stage, builder.config.host_target), builder.config.host_target, - )); + ); let mut cargo = builder::Cargo::new( builder, @@ -2079,7 +2083,7 @@ impl Step for Assemble { if builder.download_rustc() { trace!("`download-rustc` requested, reusing CI compiler for stage > 0"); - builder.ensure(Std::new(target_compiler, target_compiler.host)); + builder.std(target_compiler, target_compiler.host); let sysroot = builder.ensure(Sysroot { compiler: target_compiler, force_recompile: false }); // Ensure that `libLLVM.so` ends up in the newly created target directory, @@ -2087,7 +2091,7 @@ impl Step for Assemble { dist::maybe_install_llvm_target(builder, target_compiler.host, &sysroot); // Lower stages use `ci-rustc-sysroot`, not stageN if target_compiler.stage == builder.top_stage { - builder.info(&format!("Creating a sysroot for stage{stage} compiler (use `rustup toolchain link 'name' build/host/stage{stage}`)", stage=target_compiler.stage)); + builder.info(&format!("Creating a sysroot for stage{stage} compiler (use `rustup toolchain link 'name' build/host/stage{stage}`)", stage = target_compiler.stage)); } let mut precompiled_compiler = target_compiler; diff --git a/src/bootstrap/src/core/build_steps/dist.rs b/src/bootstrap/src/core/build_steps/dist.rs index e0f632eda0e2..25b7e5a1b5d1 100644 --- a/src/bootstrap/src/core/build_steps/dist.rs +++ b/src/bootstrap/src/core/build_steps/dist.rs @@ -23,7 +23,7 @@ use crate::core::build_steps::doc::DocumentationFormat; use crate::core::build_steps::tool::{self, Tool}; use crate::core::build_steps::vendor::{VENDOR_DIR, Vendor}; use crate::core::build_steps::{compile, llvm}; -use crate::core::builder::{Builder, Kind, RunConfig, ShouldRun, Step}; +use crate::core::builder::{Builder, Kind, RunConfig, ShouldRun, Step, StepMetadata}; use crate::core::config::TargetSelection; use crate::utils::build_stamp::{self, BuildStamp}; use crate::utils::channel::{self, Info}; @@ -84,6 +84,10 @@ impl Step for Docs { tarball.add_file(builder.src.join("src/doc/robots.txt"), dest, FileType::Regular); Some(tarball.generate()) } + + fn metadata(&self) -> Option { + Some(StepMetadata::dist("docs", self.host)) + } } #[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)] @@ -354,6 +358,10 @@ impl Step for Mingw { Some(tarball.generate()) } + + fn metadata(&self) -> Option { + Some(StepMetadata::dist("mingw", self.host)) + } } #[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)] @@ -540,6 +548,10 @@ impl Step for Rustc { } } } + + fn metadata(&self) -> Option { + Some(StepMetadata::dist("rustc", self.compiler.host)) + } } #[derive(Debug, Clone, Hash, PartialEq, Eq)] @@ -711,7 +723,7 @@ impl Step for Std { return None; } - builder.ensure(compile::Std::new(compiler, target)); + builder.std(compiler, target); let mut tarball = Tarball::new(builder, "rust-std", &target.triple); tarball.include_target_in_component_name(true); @@ -723,6 +735,10 @@ impl Step for Std { Some(tarball.generate()) } + + fn metadata(&self) -> Option { + Some(StepMetadata::dist("std", self.target).built_by(self.compiler)) + } } /// Tarball containing the compiler that gets downloaded and used by @@ -1002,6 +1018,10 @@ impl Step for Src { tarball.generate() } + + fn metadata(&self) -> Option { + Some(StepMetadata::dist("src", TargetSelection::default())) + } } #[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)] @@ -1036,18 +1056,18 @@ impl Step for PlainSourceTarball { let src_files = [ // tidy-alphabetical-start ".gitmodules", - "bootstrap.example.toml", - "Cargo.lock", - "Cargo.toml", - "configure", "CONTRIBUTING.md", "COPYRIGHT", + "Cargo.lock", + "Cargo.toml", "LICENSE-APACHE", - "license-metadata.json", "LICENSE-MIT", "README.md", "RELEASES.md", "REUSE.toml", + "bootstrap.example.toml", + "configure", + "license-metadata.json", "x", "x.ps1", "x.py", diff --git a/src/bootstrap/src/core/build_steps/doc.rs b/src/bootstrap/src/core/build_steps/doc.rs index 215c155651a2..f7c4c5ad0bbd 100644 --- a/src/bootstrap/src/core/build_steps/doc.rs +++ b/src/bootstrap/src/core/build_steps/doc.rs @@ -14,7 +14,8 @@ use std::{env, fs, mem}; use crate::core::build_steps::compile; use crate::core::build_steps::tool::{self, SourceType, Tool, prepare_tool_cargo}; use crate::core::builder::{ - self, Alias, Builder, Compiler, Kind, RunConfig, ShouldRun, Step, crate_description, + self, Alias, Builder, Compiler, Kind, RunConfig, ShouldRun, Step, StepMetadata, + crate_description, }; use crate::core::config::{Config, TargetSelection}; use crate::helpers::{submodule_path_of, symlink_dir, t, up_to_date}; @@ -662,6 +663,10 @@ impl Step for Std { } } } + + fn metadata(&self) -> Option { + Some(StepMetadata::doc("std", self.target).stage(self.stage)) + } } /// Name of the crates that are visible to consumers of the standard library. @@ -804,7 +809,7 @@ impl Step for Rustc { // Build the standard library, so that proc-macros can use it. // (Normally, only the metadata would be necessary, but proc-macros are special since they run at compile-time.) let compiler = builder.compiler(stage, builder.config.host_target); - builder.ensure(compile::Std::new(compiler, builder.config.host_target)); + builder.std(compiler, builder.config.host_target); let _guard = builder.msg_sysroot_tool( Kind::Doc, @@ -947,7 +952,7 @@ macro_rules! tool_doc { t!(fs::create_dir_all(&out)); let compiler = builder.compiler(stage, builder.config.host_target); - builder.ensure(compile::Std::new(compiler, target)); + builder.std(compiler, target); if true $(&& $rustc_tool)? { // Build rustc docs so that we generate relative links. @@ -1195,7 +1200,7 @@ impl Step for RustcBook { let rustc = builder.rustc(self.compiler); // The tool runs `rustc` for extracting output examples, so it needs a // functional sysroot. - builder.ensure(compile::Std::new(self.compiler, self.target)); + builder.std(self.compiler, self.target); let mut cmd = builder.tool_cmd(Tool::LintDocs); cmd.arg("--src"); cmd.arg(builder.src.join("compiler")); @@ -1272,7 +1277,7 @@ impl Step for Reference { // This is needed for generating links to the standard library using // the mdbook-spec plugin. - builder.ensure(compile::Std::new(self.compiler, builder.config.host_target)); + builder.std(self.compiler, builder.config.host_target); // Run rustbook/mdbook to generate the HTML pages. builder.ensure(RustbookSrc { diff --git a/src/bootstrap/src/core/build_steps/perf.rs b/src/bootstrap/src/core/build_steps/perf.rs index c43043b48f4d..4d61b38c876d 100644 --- a/src/bootstrap/src/core/build_steps/perf.rs +++ b/src/bootstrap/src/core/build_steps/perf.rs @@ -1,7 +1,7 @@ use std::env::consts::EXE_EXTENSION; use std::fmt::{Display, Formatter}; -use crate::core::build_steps::compile::{Std, Sysroot}; +use crate::core::build_steps::compile::Sysroot; use crate::core::build_steps::tool::{RustcPerf, Rustdoc}; use crate::core::builder::Builder; use crate::core::config::DebuginfoLevel; @@ -152,7 +152,7 @@ Consider setting `rust.debuginfo-level = 1` in `bootstrap.toml`."#); } let compiler = builder.compiler(builder.top_stage, builder.config.host_target); - builder.ensure(Std::new(compiler, builder.config.host_target)); + builder.std(compiler, builder.config.host_target); if let Some(opts) = args.cmd.shared_opts() && opts.profiles.contains(&Profile::Doc) diff --git a/src/bootstrap/src/core/build_steps/setup.rs b/src/bootstrap/src/core/build_steps/setup.rs index 86b7456d7b4e..37fc85518e0e 100644 --- a/src/bootstrap/src/core/build_steps/setup.rs +++ b/src/bootstrap/src/core/build_steps/setup.rs @@ -272,7 +272,7 @@ fn rustup_installed(builder: &Builder<'_>) -> bool { let mut rustup = command("rustup"); rustup.arg("--version"); - rustup.allow_failure().run_always().run_capture_stdout(builder).is_success() + rustup.allow_failure().run_in_dry_run().run_capture_stdout(builder).is_success() } fn stage_dir_exists(stage_path: &str) -> bool { @@ -531,7 +531,7 @@ enum EditorKind { impl EditorKind { // Used in `./tests.rs`. - #[allow(dead_code)] + #[cfg(test)] pub const ALL: &[EditorKind] = &[ EditorKind::Emacs, EditorKind::Helix, diff --git a/src/bootstrap/src/core/build_steps/test.rs b/src/bootstrap/src/core/build_steps/test.rs index 419839067f98..01b181f55de6 100644 --- a/src/bootstrap/src/core/build_steps/test.rs +++ b/src/bootstrap/src/core/build_steps/test.rs @@ -10,7 +10,7 @@ use std::{env, fs, iter}; use clap_complete::shells; -use crate::core::build_steps::compile::run_cargo; +use crate::core::build_steps::compile::{Std, run_cargo}; use crate::core::build_steps::doc::DocumentationFormat; use crate::core::build_steps::gcc::{Gcc, add_cg_gcc_cargo_flags}; use crate::core::build_steps::llvm::get_llvm_version; @@ -19,7 +19,8 @@ use crate::core::build_steps::tool::{self, COMPILETEST_ALLOW_FEATURES, SourceTyp use crate::core::build_steps::toolstate::ToolState; use crate::core::build_steps::{compile, dist, llvm}; use crate::core::builder::{ - self, Alias, Builder, Compiler, Kind, RunConfig, ShouldRun, Step, crate_description, + self, Alias, Builder, Compiler, Kind, RunConfig, ShouldRun, Step, StepMetadata, + crate_description, }; use crate::core::config::TargetSelection; use crate::core::config::flags::{Subcommand, get_completion}; @@ -544,7 +545,7 @@ impl Step for Miri { // We also need sysroots, for Miri and for the host (the latter for build scripts). // This is for the tests so everything is done with the target compiler. let miri_sysroot = Miri::build_miri_sysroot(builder, target_compiler, target); - builder.ensure(compile::Std::new(target_compiler, host)); + builder.std(target_compiler, host); let host_sysroot = builder.sysroot(target_compiler); // Miri has its own "target dir" for ui test dependencies. Make sure it gets cleared when @@ -709,7 +710,7 @@ impl Step for CompiletestTest { // We need `ToolStd` for the locally-built sysroot because // compiletest uses unstable features of the `test` crate. - builder.ensure(compile::Std::new(compiler, host)); + builder.std(compiler, host); let mut cargo = tool::prepare_tool_cargo( builder, compiler, @@ -1009,7 +1010,7 @@ impl Step for RustdocGUI { } fn run(self, builder: &Builder<'_>) { - builder.ensure(compile::Std::new(self.compiler, self.target)); + builder.std(self.compiler, self.target); let mut cmd = builder.tool_cmd(Tool::RustdocGUITest); @@ -1174,6 +1175,10 @@ HELP: to skip test's attempt to check tidiness, pass `--skip src/tools/tidy` to fn make_run(run: RunConfig<'_>) { run.builder.ensure(Tidy); } + + fn metadata(&self) -> Option { + Some(StepMetadata::test("tidy", TargetSelection::default())) + } } fn testdir(builder: &Builder<'_>, host: TargetSelection) -> PathBuf { @@ -1236,6 +1241,12 @@ macro_rules! test { }), }) } + + fn metadata(&self) -> Option { + Some( + StepMetadata::test(stringify!($name), self.target) + ) + } } }; } @@ -1634,7 +1645,7 @@ NOTE: if you're sure you want to do this, please open an issue as to why. In the if suite == "mir-opt" { builder.ensure(compile::Std::new(compiler, compiler.host).is_for_mir_opt_tests(true)); } else { - builder.ensure(compile::Std::new(compiler, compiler.host)); + builder.std(compiler, compiler.host); } let mut cmd = builder.tool_cmd(Tool::Compiletest); @@ -1642,7 +1653,7 @@ NOTE: if you're sure you want to do this, please open an issue as to why. In the if suite == "mir-opt" { builder.ensure(compile::Std::new(compiler, target).is_for_mir_opt_tests(true)); } else { - builder.ensure(compile::Std::new(compiler, target)); + builder.std(compiler, target); } builder.ensure(RemoteCopyLibs { compiler, target }); @@ -2177,7 +2188,7 @@ impl BookTest { fn run_ext_doc(self, builder: &Builder<'_>) { let compiler = self.compiler; - builder.ensure(compile::Std::new(compiler, compiler.host)); + builder.std(compiler, compiler.host); // mdbook just executes a binary named "rustdoc", so we need to update // PATH so that it points to our rustdoc. @@ -2263,7 +2274,7 @@ impl BookTest { let compiler = self.compiler; let host = self.compiler.host; - builder.ensure(compile::Std::new(compiler, host)); + builder.std(compiler, host); let _guard = builder.msg(Kind::Test, compiler.stage, format!("book {}", self.name), host, host); @@ -2410,7 +2421,7 @@ impl Step for ErrorIndex { drop(guard); // The tests themselves need to link to std, so make sure it is // available. - builder.ensure(compile::Std::new(compiler, compiler.host)); + builder.std(compiler, compiler.host); markdown_test(builder, compiler, &output); } } @@ -2473,7 +2484,7 @@ impl Step for CrateLibrustc { } fn run(self, builder: &Builder<'_>) { - builder.ensure(compile::Std::new(self.compiler, self.target)); + builder.std(self.compiler, self.target); // To actually run the tests, delegate to a copy of the `Crate` step. builder.ensure(Crate { @@ -2483,6 +2494,10 @@ impl Step for CrateLibrustc { crates: self.crates, }); } + + fn metadata(&self) -> Option { + Some(StepMetadata::test("CrateLibrustc", self.target)) + } } /// Given a `cargo test` subcommand, add the appropriate flags and run it. @@ -2641,7 +2656,7 @@ impl Step for Crate { // Prepare sysroot // See [field@compile::Std::force_recompile]. - builder.ensure(compile::Std::new(compiler, compiler.host).force_recompile(true)); + builder.ensure(Std::new(compiler, compiler.host).force_recompile(true)); // If we're not doing a full bootstrap but we're testing a stage2 // version of libstd, then what we're actually testing is the libstd @@ -2767,7 +2782,7 @@ impl Step for CrateRustdoc { // using `download-rustc`, the rustc_private artifacts may be in a *different sysroot* from // the target rustdoc (`ci-rustc-sysroot` vs `stage2`). In that case, we need to ensure this // explicitly to make sure it ends up in the stage2 sysroot. - builder.ensure(compile::Std::new(compiler, target)); + builder.std(compiler, target); builder.ensure(compile::Rustc::new(compiler, target)); let mut cargo = tool::prepare_tool_cargo( @@ -2911,7 +2926,7 @@ impl Step for RemoteCopyLibs { return; } - builder.ensure(compile::Std::new(compiler, target)); + builder.std(compiler, target); builder.info(&format!("REMOTE copy libs to emulator ({target})")); @@ -3101,7 +3116,7 @@ impl Step for TierCheck { /// Tests the Platform Support page in the rustc book. fn run(self, builder: &Builder<'_>) { - builder.ensure(compile::Std::new(self.compiler, self.compiler.host)); + builder.std(self.compiler, self.compiler.host); let mut cargo = tool::prepare_tool_cargo( builder, self.compiler, @@ -3334,7 +3349,7 @@ impl Step for CodegenCranelift { let compiler = self.compiler; let target = self.target; - builder.ensure(compile::Std::new(compiler, target)); + builder.std(compiler, target); // If we're not doing a full bootstrap but we're testing a stage2 // version of libstd, then what we're actually testing is the libstd diff --git a/src/bootstrap/src/core/build_steps/tool.rs b/src/bootstrap/src/core/build_steps/tool.rs index 0088e851d397..83c0525d7c42 100644 --- a/src/bootstrap/src/core/build_steps/tool.rs +++ b/src/bootstrap/src/core/build_steps/tool.rs @@ -20,7 +20,7 @@ use crate::core::build_steps::toolstate::ToolState; use crate::core::build_steps::{compile, llvm}; use crate::core::builder; use crate::core::builder::{ - Builder, Cargo as CargoCommand, RunConfig, ShouldRun, Step, cargo_profile_var, + Builder, Cargo as CargoCommand, RunConfig, ShouldRun, Step, StepMetadata, cargo_profile_var, }; use crate::core::config::{DebuginfoLevel, RustcLto, TargetSelection}; use crate::utils::exec::{BootstrapCommand, command}; @@ -122,14 +122,14 @@ impl Step for ToolBuild { Mode::ToolRustc => { // If compiler was forced, its artifacts should be prepared earlier. if !self.compiler.is_forced_compiler() { - builder.ensure(compile::Std::new(self.compiler, self.compiler.host)); + builder.std(self.compiler, self.compiler.host); builder.ensure(compile::Rustc::new(self.compiler, target)); } } Mode::ToolStd => { // If compiler was forced, its artifacts should be prepared earlier. if !self.compiler.is_forced_compiler() { - builder.ensure(compile::Std::new(self.compiler, target)) + builder.std(self.compiler, target) } } Mode::ToolBootstrap => {} // uses downloaded stage0 compiler libs @@ -390,7 +390,6 @@ macro_rules! bootstrap_tool { ; )+) => { #[derive(PartialEq, Eq, Clone)] - #[allow(dead_code)] pub enum Tool { $( $name, @@ -480,6 +479,13 @@ macro_rules! bootstrap_tool { } }) } + + fn metadata(&self) -> Option { + Some( + StepMetadata::build(stringify!($name), self.target) + .built_by(self.compiler) + ) + } } )+ } @@ -716,7 +722,7 @@ impl Step for Rustdoc { && target_compiler.stage > 0 && builder.rust_info().is_managed_git_subrepository() { - let files_to_track = &["src/librustdoc", "src/tools/rustdoc"]; + let files_to_track = &["src/librustdoc", "src/tools/rustdoc", "src/rustdoc-json-types"]; // Check if unchanged if !builder.config.has_changes_from_upstream(files_to_track) { @@ -780,6 +786,16 @@ impl Step for Rustdoc { ToolBuildResult { tool_path, build_compiler, target_compiler } } } + + fn metadata(&self) -> Option { + Some( + StepMetadata::build("rustdoc", self.compiler.host) + // rustdoc is ToolRustc, so stage N rustdoc is built by stage N-1 rustc + // FIXME: make this stage deduction automatic somehow + // FIXME: log the compiler that actually built ToolRustc steps + .stage(self.compiler.stage.saturating_sub(1)), + ) + } } #[derive(Debug, Clone, Hash, PartialEq, Eq)] @@ -1129,6 +1145,7 @@ macro_rules! tool_extended { tool_name: $tool_name:expr, stable: $stable:expr $( , add_bins_to_sysroot: $add_bins_to_sysroot:expr )? + $( , add_features: $add_features:expr )? $( , )? } ) => { @@ -1168,6 +1185,17 @@ macro_rules! tool_extended { $tool_name, $path, None $( .or(Some(&$add_bins_to_sysroot)) )?, + None $( .or(Some($add_features)) )?, + ) + } + + fn metadata(&self) -> Option { + // FIXME: refactor extended tool steps to make the build_compiler explicit, + // it is offset by one now for rustc tools + Some( + StepMetadata::build($tool_name, self.target) + .built_by(self.compiler.with_stage(self.compiler.stage.saturating_sub(1))) + .stage(self.compiler.stage) ) } } @@ -1205,7 +1233,13 @@ fn run_tool_build_step( tool_name: &'static str, path: &'static str, add_bins_to_sysroot: Option<&[&str]>, + add_features: Option, TargetSelection, &mut Vec)>, ) -> ToolBuildResult { + let mut extra_features = Vec::new(); + if let Some(func) = add_features { + func(builder, target, &mut extra_features); + } + let ToolBuildResult { tool_path, build_compiler, target_compiler } = builder.ensure(ToolBuild { compiler, @@ -1213,7 +1247,7 @@ fn run_tool_build_step( tool: tool_name, mode: Mode::ToolRustc, path, - extra_features: vec![], + extra_features, source_type: SourceType::InTree, allow_features: "", cargo_args: vec![], @@ -1256,7 +1290,12 @@ tool_extended!(Clippy { path: "src/tools/clippy", tool_name: "clippy-driver", stable: true, - add_bins_to_sysroot: ["clippy-driver"] + add_bins_to_sysroot: ["clippy-driver"], + add_features: |builder, target, features| { + if builder.config.jemalloc(target) { + features.push("jemalloc".to_string()); + } + } }); tool_extended!(Miri { path: "src/tools/miri", diff --git a/src/bootstrap/src/core/builder/cargo.rs b/src/bootstrap/src/core/builder/cargo.rs index 0e3c3aaee0ff..deb7106f185c 100644 --- a/src/bootstrap/src/core/builder/cargo.rs +++ b/src/bootstrap/src/core/builder/cargo.rs @@ -3,8 +3,8 @@ use std::ffi::{OsStr, OsString}; use std::path::{Path, PathBuf}; use super::{Builder, Kind}; +use crate::core::build_steps::test; use crate::core::build_steps::tool::SourceType; -use crate::core::build_steps::{compile, test}; use crate::core::config::SplitDebuginfo; use crate::core::config::flags::Color; use crate::utils::build_stamp; @@ -131,7 +131,10 @@ impl Cargo { } pub fn into_cmd(self) -> BootstrapCommand { - self.into() + let mut cmd: BootstrapCommand = self.into(); + // Disable caching for commands originating from Cargo-related operations. + cmd.do_not_cache(); + cmd } /// Same as [`Cargo::new`] except this one doesn't configure the linker with @@ -683,6 +686,7 @@ impl Builder<'_> { .arg("--print=file-names") .arg("--crate-type=proc-macro") .arg("-") + .stdin(std::process::Stdio::null()) .run_capture(self) .stderr(); @@ -842,7 +846,7 @@ impl Builder<'_> { // If this is for `miri-test`, prepare the sysroots. if cmd_kind == Kind::MiriTest { - self.ensure(compile::Std::new(compiler, compiler.host)); + self.std(compiler, compiler.host); let host_sysroot = self.sysroot(compiler); let miri_sysroot = test::Miri::build_miri_sysroot(self, compiler, target); cargo.env("MIRI_SYSROOT", &miri_sysroot); diff --git a/src/bootstrap/src/core/builder/mod.rs b/src/bootstrap/src/core/builder/mod.rs index b26f47a3171a..8e9e8b496de7 100644 --- a/src/bootstrap/src/core/builder/mod.rs +++ b/src/bootstrap/src/core/builder/mod.rs @@ -15,14 +15,14 @@ use tracing::instrument; pub use self::cargo::{Cargo, cargo_profile_var}; pub use crate::Compiler; +use crate::core::build_steps::compile::{Std, StdLink}; use crate::core::build_steps::{ check, clean, clippy, compile, dist, doc, gcc, install, llvm, run, setup, test, tool, vendor, }; use crate::core::config::flags::Subcommand; use crate::core::config::{DryRun, TargetSelection}; use crate::utils::cache::Cache; -use crate::utils::exec::{BootstrapCommand, command}; -use crate::utils::execution_context::ExecutionContext; +use crate::utils::exec::{BootstrapCommand, ExecutionContext, command}; use crate::utils::helpers::{self, LldThreads, add_dylib_path, exe, libdir, linker_args, t}; use crate::{Build, Crate, trace}; @@ -139,7 +139,7 @@ pub trait Step: 'static + Clone + Debug + PartialEq + Eq + Hash { /// Metadata that describes an executed step, mostly for testing and tracing. #[allow(unused)] -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] pub struct StepMetadata { name: &'static str, kind: Kind, @@ -150,7 +150,23 @@ pub struct StepMetadata { impl StepMetadata { pub fn build(name: &'static str, target: TargetSelection) -> Self { - Self { name, kind: Kind::Build, target, built_by: None, stage: None } + Self::new(name, target, Kind::Build) + } + + pub fn doc(name: &'static str, target: TargetSelection) -> Self { + Self::new(name, target, Kind::Doc) + } + + pub fn dist(name: &'static str, target: TargetSelection) -> Self { + Self::new(name, target, Kind::Dist) + } + + pub fn test(name: &'static str, target: TargetSelection) -> Self { + Self::new(name, target, Kind::Test) + } + + fn new(name: &'static str, target: TargetSelection, kind: Kind) -> Self { + Self { name, kind, target, built_by: None, stage: None } } pub fn built_by(mut self, compiler: Compiler) -> Self { @@ -1350,6 +1366,49 @@ impl<'a> Builder<'a> { resolved_compiler } + /// Obtain a standard library for the given target that will be built by the passed compiler. + /// The standard library will be linked to the sysroot of the passed compiler. + /// + /// Prefer using this method rather than manually invoking `Std::new`. + #[cfg_attr( + feature = "tracing", + instrument( + level = "trace", + name = "Builder::std", + target = "STD", + skip_all, + fields( + compiler = ?compiler, + target = ?target, + ), + ), + )] + pub fn std(&self, compiler: Compiler, target: TargetSelection) { + // FIXME: make the `Std` step return some type-level "proof" that std was indeed built, + // and then require passing that to all Cargo invocations that we do. + + // The "stage 0" std is always precompiled and comes with the stage0 compiler, so we have + // special logic for it, to avoid creating needless and confusing Std steps that don't + // actually build anything. + if compiler.stage == 0 { + if target != compiler.host { + panic!( + r"It is not possible to build the standard library for `{target}` using the stage0 compiler. +You have to build a stage1 compiler for `{}` first, and then use it to build a standard library for `{target}`. +", + compiler.host + ) + } + + // We still need to link the prebuilt standard library into the ephemeral stage0 sysroot + self.ensure(StdLink::from_std(Std::new(compiler, target), compiler)); + } else { + // This step both compiles the std and links it into the compiler's sysroot. + // Yes, it's quite magical and side-effecty.. would be nice to refactor later. + self.ensure(Std::new(compiler, target)); + } + } + pub fn sysroot(&self, compiler: Compiler) -> PathBuf { self.ensure(compile::Sysroot::new(compiler)) } diff --git a/src/bootstrap/src/core/builder/tests.rs b/src/bootstrap/src/core/builder/tests.rs index 6268a2b59d6c..8adf93ea5288 100644 --- a/src/bootstrap/src/core/builder/tests.rs +++ b/src/bootstrap/src/core/builder/tests.rs @@ -9,6 +9,8 @@ use crate::Flags; use crate::core::build_steps::doc::DocumentationFormat; use crate::core::config::Config; use crate::utils::cache::ExecutedStep; +use crate::utils::helpers::get_host_target; +use crate::utils::tests::ConfigBuilder; use crate::utils::tests::git::{GitCtx, git_test}; static TEST_TRIPLE_1: &str = "i686-unknown-haiku"; @@ -191,58 +193,6 @@ fn check_missing_paths_for_x_test_tests() { } } -#[test] -fn test_exclude() { - let mut config = configure("test", &[TEST_TRIPLE_1], &[TEST_TRIPLE_1]); - config.skip = vec!["src/tools/tidy".into()]; - let cache = run_build(&[], config); - - // Ensure we have really excluded tidy - assert!(!cache.contains::()); - - // Ensure other tests are not affected. - assert!(cache.contains::()); -} - -#[test] -fn test_exclude_kind() { - let path = PathBuf::from("compiler/rustc_data_structures"); - - let mut config = configure("test", &[TEST_TRIPLE_1], &[TEST_TRIPLE_1]); - // Ensure our test is valid, and `test::Rustc` would be run without the exclude. - assert!(run_build(&[], config.clone()).contains::()); - // Ensure tests for rustc are not skipped. - config.skip = vec![path.clone()]; - assert!(run_build(&[], config.clone()).contains::()); - // Ensure builds for rustc are not skipped. - assert!(run_build(&[], config).contains::()); -} - -/// Ensure that if someone passes both a single crate and `library`, all library crates get built. -#[test] -fn alias_and_path_for_library() { - let mut cache = run_build( - &["library".into(), "core".into()], - configure("build", &[TEST_TRIPLE_1], &[TEST_TRIPLE_1]), - ); - assert_eq!( - first(cache.all::()), - &[ - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1) - ] - ); - - let mut cache = run_build( - &["library".into(), "core".into()], - configure("doc", &[TEST_TRIPLE_1], &[TEST_TRIPLE_1]), - ); - assert_eq!( - first(cache.all::()), - &[doc_std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1)] - ); -} - #[test] fn ci_rustc_if_unchanged_invalidate_on_compiler_changes() { git_test(|ctx| { @@ -314,101 +264,6 @@ mod defaults { use crate::Config; use crate::core::builder::*; - #[test] - fn build_default() { - let mut cache = run_build(&[], configure("build", &[TEST_TRIPLE_1], &[TEST_TRIPLE_1])); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - assert_eq!( - first(cache.all::()), - &[ - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1), - ] - ); - assert!(!cache.all::().is_empty()); - // Make sure rustdoc is only built once. - assert_eq!( - first(cache.all::()), - // Recall that rustdoc stages are off-by-one - // - this is the compiler it's _linked_ to, not built with. - &[tool::Rustdoc { compiler: Compiler::new(1, a) }], - ); - assert_eq!( - first(cache.all::()), - &[rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0)], - ); - } - - #[test] - fn build_stage_0() { - let config = Config { stage: 0, ..configure("build", &[TEST_TRIPLE_1], &[TEST_TRIPLE_1]) }; - let mut cache = run_build(&[], config); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - assert_eq!( - first(cache.all::()), - &[std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0)] - ); - assert!(!cache.all::().is_empty()); - assert_eq!( - first(cache.all::()), - // This is the beta rustdoc. - // Add an assert here to make sure this is the only rustdoc built. - &[tool::Rustdoc { compiler: Compiler::new(0, a) }], - ); - assert!(cache.all::().is_empty()); - } - - #[test] - fn build_cross_compile() { - let config = Config { - stage: 1, - ..configure("build", &[TEST_TRIPLE_1, TEST_TRIPLE_2], &[TEST_TRIPLE_1, TEST_TRIPLE_2]) - }; - let mut cache = run_build(&[], config); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - let b = TargetSelection::from_user(TEST_TRIPLE_2); - - // Ideally, this build wouldn't actually have `target: a` - // rustdoc/rustcc/std here (the user only requested a host=B build, so - // there's not really a need for us to build for target A in this case - // (since we're producing stage 1 libraries/binaries). But currently - // bootstrap is just a bit buggy here; this should be fixed though. - assert_eq!( - first(cache.all::()), - &[ - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 0), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 1), - ] - ); - assert_eq!( - first(cache.all::()), - &[ - compile::Assemble { target_compiler: Compiler::new(0, a) }, - compile::Assemble { target_compiler: Compiler::new(1, a) }, - compile::Assemble { target_compiler: Compiler::new(1, b) }, - ] - ); - assert_eq!( - first(cache.all::()), - &[ - tool::Rustdoc { compiler: Compiler::new(1, a) }, - tool::Rustdoc { compiler: Compiler::new(1, b) }, - ], - ); - assert_eq!( - first(cache.all::()), - &[ - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 0), - ] - ); - } - #[test] fn doc_default() { let mut config = configure("doc", &[TEST_TRIPLE_1], &[TEST_TRIPLE_1]); @@ -445,326 +300,6 @@ mod dist { Config { stage: 2, ..super::configure("dist", host, target) } } - #[test] - fn dist_baseline() { - let mut cache = run_build(&[], configure(&[TEST_TRIPLE_1], &[TEST_TRIPLE_1])); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - - assert_eq!(first(cache.all::()), &[dist::Docs { host: a },]); - assert_eq!(first(cache.all::()), &[dist::Mingw { host: a },]); - assert_eq!( - first(cache.all::()), - &[dist::Rustc { compiler: Compiler::new(2, a) },] - ); - assert_eq!( - first(cache.all::()), - &[dist::Std { compiler: Compiler::new(1, a), target: a },] - ); - assert_eq!(first(cache.all::()), &[dist::Src]); - // Make sure rustdoc is only built once. - assert_eq!( - first(cache.all::()), - &[tool::Rustdoc { compiler: Compiler::new(2, a) },] - ); - } - - #[test] - fn dist_with_targets() { - let mut cache = - run_build(&[], configure(&[TEST_TRIPLE_1], &[TEST_TRIPLE_1, TEST_TRIPLE_2])); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - let b = TargetSelection::from_user(TEST_TRIPLE_2); - - assert_eq!( - first(cache.all::()), - &[dist::Docs { host: a }, dist::Docs { host: b },] - ); - assert_eq!( - first(cache.all::()), - &[dist::Mingw { host: a }, dist::Mingw { host: b },] - ); - assert_eq!( - first(cache.all::()), - &[dist::Rustc { compiler: Compiler::new(2, a) },] - ); - assert_eq!( - first(cache.all::()), - &[ - dist::Std { compiler: Compiler::new(1, a), target: a }, - dist::Std { compiler: Compiler::new(2, a), target: b }, - ] - ); - assert_eq!(first(cache.all::()), &[dist::Src]); - } - - #[test] - fn dist_with_hosts() { - let mut cache = run_build( - &[], - configure(&[TEST_TRIPLE_1, TEST_TRIPLE_2], &[TEST_TRIPLE_1, TEST_TRIPLE_2]), - ); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - let b = TargetSelection::from_user(TEST_TRIPLE_2); - - assert_eq!( - first(cache.all::()), - &[dist::Docs { host: a }, dist::Docs { host: b },] - ); - assert_eq!( - first(cache.all::()), - &[dist::Mingw { host: a }, dist::Mingw { host: b },] - ); - assert_eq!( - first(cache.all::()), - &[ - dist::Rustc { compiler: Compiler::new(2, a) }, - dist::Rustc { compiler: Compiler::new(2, b) }, - ] - ); - assert_eq!( - first(cache.all::()), - &[ - dist::Std { compiler: Compiler::new(1, a), target: a }, - dist::Std { compiler: Compiler::new(1, a), target: b }, - ] - ); - assert_eq!( - first(cache.all::()), - &[ - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 2), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 1), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 2), - ], - ); - assert_eq!(first(cache.all::()), &[dist::Src]); - } - - #[test] - fn dist_only_cross_host() { - let b = TargetSelection::from_user(TEST_TRIPLE_2); - let mut config = - configure(&[TEST_TRIPLE_1, TEST_TRIPLE_2], &[TEST_TRIPLE_1, TEST_TRIPLE_2]); - config.docs = false; - config.extended = true; - config.hosts = vec![b]; - let mut cache = run_build(&[], config); - - assert_eq!( - first(cache.all::()), - &[dist::Rustc { compiler: Compiler::new(2, b) },] - ); - assert_eq!( - first(cache.all::()), - &[ - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 1), - ] - ); - } - - #[test] - fn dist_with_targets_and_hosts() { - let mut cache = run_build( - &[], - configure( - &[TEST_TRIPLE_1, TEST_TRIPLE_2], - &[TEST_TRIPLE_1, TEST_TRIPLE_2, TEST_TRIPLE_3], - ), - ); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - let b = TargetSelection::from_user(TEST_TRIPLE_2); - let c = TargetSelection::from_user(TEST_TRIPLE_3); - - assert_eq!( - first(cache.all::()), - &[dist::Docs { host: a }, dist::Docs { host: b }, dist::Docs { host: c },] - ); - assert_eq!( - first(cache.all::()), - &[dist::Mingw { host: a }, dist::Mingw { host: b }, dist::Mingw { host: c },] - ); - assert_eq!( - first(cache.all::()), - &[ - dist::Rustc { compiler: Compiler::new(2, a) }, - dist::Rustc { compiler: Compiler::new(2, b) }, - ] - ); - assert_eq!( - first(cache.all::()), - &[ - dist::Std { compiler: Compiler::new(1, a), target: a }, - dist::Std { compiler: Compiler::new(1, a), target: b }, - dist::Std { compiler: Compiler::new(2, a), target: c }, - ] - ); - assert_eq!(first(cache.all::()), &[dist::Src]); - } - - #[test] - fn dist_with_empty_host() { - let config = configure(&[], &[TEST_TRIPLE_3]); - let mut cache = run_build(&[], config); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - let c = TargetSelection::from_user(TEST_TRIPLE_3); - - assert_eq!(first(cache.all::()), &[dist::Docs { host: c },]); - assert_eq!(first(cache.all::()), &[dist::Mingw { host: c },]); - assert_eq!( - first(cache.all::()), - &[dist::Std { compiler: Compiler::new(2, a), target: c },] - ); - } - - #[test] - fn dist_with_same_targets_and_hosts() { - let mut cache = run_build( - &[], - configure(&[TEST_TRIPLE_1, TEST_TRIPLE_2], &[TEST_TRIPLE_1, TEST_TRIPLE_2]), - ); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - let b = TargetSelection::from_user(TEST_TRIPLE_2); - - assert_eq!( - first(cache.all::()), - &[dist::Docs { host: a }, dist::Docs { host: b },] - ); - assert_eq!( - first(cache.all::()), - &[dist::Mingw { host: a }, dist::Mingw { host: b },] - ); - assert_eq!( - first(cache.all::()), - &[ - dist::Rustc { compiler: Compiler::new(2, a) }, - dist::Rustc { compiler: Compiler::new(2, b) }, - ] - ); - assert_eq!( - first(cache.all::()), - &[ - dist::Std { compiler: Compiler::new(1, a), target: a }, - dist::Std { compiler: Compiler::new(1, a), target: b }, - ] - ); - assert_eq!(first(cache.all::()), &[dist::Src]); - assert_eq!( - first(cache.all::()), - &[ - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 2), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 1), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 2), - ] - ); - assert_eq!( - first(cache.all::()), - &[ - compile::Assemble { target_compiler: Compiler::new(0, a) }, - compile::Assemble { target_compiler: Compiler::new(1, a) }, - compile::Assemble { target_compiler: Compiler::new(2, a) }, - compile::Assemble { target_compiler: Compiler::new(2, b) }, - ] - ); - } - - /// This also serves as an important regression test for - /// and . - #[test] - fn dist_all_cross() { - let cmd_args = - &["dist", "--stage", "2", "--dry-run", "--config=/does/not/exist"].map(str::to_owned); - let config_str = r#" - [rust] - channel = "nightly" - - [build] - extended = true - - build = "i686-unknown-haiku" - host = ["i686-unknown-netbsd"] - target = ["i686-unknown-netbsd"] - "#; - let config = Config::parse_inner(Flags::parse(cmd_args), |&_| toml::from_str(config_str)); - let mut cache = run_build(&[], config); - - // Stage 2 `compile::Rustc` should **NEVER** be cached here. - assert_eq!( - first(cache.all::()), - &[ - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1), - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_3, stage = 1), - ] - ); - } - - #[test] - fn build_all() { - let build = Build::new(configure( - &[TEST_TRIPLE_1, TEST_TRIPLE_2], - &[TEST_TRIPLE_1, TEST_TRIPLE_2, TEST_TRIPLE_3], - )); - let mut builder = Builder::new(&build); - builder.run_step_descriptions( - &Builder::get_step_descriptions(Kind::Build), - &["compiler/rustc".into(), "library".into()], - ); - - assert_eq!(builder.config.stage, 2); - - // `compile::Rustc` includes one-stage-off compiler information as the target compiler - // artifacts get copied from there to the target stage sysroot. - // For example, `stage2/bin/rustc` gets copied from the `stage1-rustc` build directory. - assert_eq!( - first(builder.cache.all::()), - &[ - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1), - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 1), - ] - ); - - assert_eq!( - first(builder.cache.all::()), - &[ - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 2), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 1), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_2, stage = 2), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_3, stage = 2), - ] - ); - - assert_eq!( - first(builder.cache.all::()), - &[ - compile::Assemble { - target_compiler: Compiler::new(0, TargetSelection::from_user(TEST_TRIPLE_1),) - }, - compile::Assemble { - target_compiler: Compiler::new(1, TargetSelection::from_user(TEST_TRIPLE_1),) - }, - compile::Assemble { - target_compiler: Compiler::new(2, TargetSelection::from_user(TEST_TRIPLE_1),) - }, - compile::Assemble { - target_compiler: Compiler::new(2, TargetSelection::from_user(TEST_TRIPLE_2),) - }, - ] - ); - } - #[test] fn llvm_out_behaviour() { let mut config = configure(&[TEST_TRIPLE_1], &[TEST_TRIPLE_2]); @@ -782,85 +317,6 @@ mod dist { assert!(build.llvm_out(target).ends_with("llvm")); } - #[test] - fn build_with_empty_host() { - let config = configure(&[], &[TEST_TRIPLE_3]); - let build = Build::new(config); - let mut builder = Builder::new(&build); - builder.run_step_descriptions(&Builder::get_step_descriptions(Kind::Build), &[]); - - let a = TargetSelection::from_user(TEST_TRIPLE_1); - - assert_eq!( - first(builder.cache.all::()), - &[ - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1), - std!(TEST_TRIPLE_1 => TEST_TRIPLE_3, stage = 2), - ] - ); - assert_eq!( - first(builder.cache.all::()), - &[ - compile::Assemble { target_compiler: Compiler::new(0, a) }, - compile::Assemble { target_compiler: Compiler::new(1, a) }, - compile::Assemble { target_compiler: Compiler::new(2, a) }, - ] - ); - assert_eq!( - first(builder.cache.all::()), - &[ - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 0), - rustc!(TEST_TRIPLE_1 => TEST_TRIPLE_1, stage = 1), - ] - ); - } - - #[test] - fn test_with_no_doc_stage0() { - let mut config = configure(&[TEST_TRIPLE_1], &[TEST_TRIPLE_1]); - config.stage = 0; - config.paths = vec!["library/std".into()]; - config.cmd = Subcommand::Test { - test_args: vec![], - compiletest_rustc_args: vec![], - no_fail_fast: false, - no_doc: true, - doc: false, - bless: false, - force_rerun: false, - compare_mode: None, - rustfix_coverage: false, - pass: None, - run: None, - only_modified: false, - extra_checks: None, - no_capture: false, - }; - - let build = Build::new(config); - let mut builder = Builder::new(&build); - - let host = TargetSelection::from_user(TEST_TRIPLE_1); - - builder.run_step_descriptions( - &[StepDescription::from::(Kind::Test)], - &["library/std".into()], - ); - - // Ensure we don't build any compiler artifacts. - assert!(!builder.cache.contains::()); - assert_eq!( - first(builder.cache.all::()), - &[test::Crate { - compiler: Compiler::new(0, host), - target: host, - mode: crate::Mode::Std, - crates: vec!["std".to_owned()], - },] - ); - } - #[test] fn doc_ci() { let mut config = configure(&[TEST_TRIPLE_1], &[TEST_TRIPLE_1]); @@ -888,65 +344,6 @@ mod dist { &[tool::Rustdoc { compiler: Compiler::new(2, a) },] ); } - - #[test] - fn test_docs() { - // Behavior of `x.py test` doing various documentation tests. - let mut config = configure(&[TEST_TRIPLE_1], &[TEST_TRIPLE_1]); - config.cmd = Subcommand::Test { - test_args: vec![], - compiletest_rustc_args: vec![], - no_fail_fast: false, - doc: true, - no_doc: false, - bless: false, - force_rerun: false, - compare_mode: None, - rustfix_coverage: false, - pass: None, - run: None, - only_modified: false, - extra_checks: None, - no_capture: false, - }; - // Make sure rustfmt binary not being found isn't an error. - config.channel = "beta".to_string(); - let build = Build::new(config); - let mut builder = Builder::new(&build); - - builder.run_step_descriptions(&Builder::get_step_descriptions(Kind::Test), &[]); - let a = TargetSelection::from_user(TEST_TRIPLE_1); - - // error_index_generator uses stage 1 to share rustdoc artifacts with the - // rustdoc tool. - assert_eq!( - first(builder.cache.all::()), - &[doc::ErrorIndex { target: a },] - ); - assert_eq!( - first(builder.cache.all::()), - &[tool::ErrorIndex { compiler: Compiler::new(1, a) }] - ); - // Unfortunately rustdoc is built twice. Once from stage1 for compiletest - // (and other things), and once from stage0 for std crates. Ideally it - // would only be built once. If someone wants to fix this, it might be - // worth investigating if it would be possible to test std from stage1. - // Note that the stages here are +1 than what they actually are because - // Rustdoc::run swaps out the compiler with stage minus 1 if --stage is - // not 0. - // - // The stage 0 copy is the one downloaded for bootstrapping. It is - // (currently) needed to run "cargo test" on the linkchecker, and - // should be relatively "free". - assert_eq!( - first(builder.cache.all::()), - &[ - tool::Rustdoc { compiler: Compiler::new(0, a) }, - tool::Rustdoc { compiler: Compiler::new(1, a) }, - tool::Rustdoc { compiler: Compiler::new(2, a) }, - ] - ); - } } mod sysroot_target_dirs { @@ -1233,32 +630,739 @@ fn any_debug() { assert_eq!(x.downcast_ref::(), Some(&MyStruct { x: 7 })); } -/// The staging tests use insta for snapshot testing. +/// These tests use insta for snapshot testing. /// See bootstrap's README on how to bless the snapshots. -mod staging { +mod snapshot { + use std::path::PathBuf; + + use crate::core::build_steps::{compile, dist, doc, test, tool}; use crate::core::builder::tests::{ - TEST_TRIPLE_1, configure, configure_with_args, render_steps, run_build, + TEST_TRIPLE_1, TEST_TRIPLE_2, TEST_TRIPLE_3, configure, configure_with_args, first, + host_target, render_steps, run_build, }; + use crate::core::builder::{Builder, Kind, StepDescription, StepMetadata}; + use crate::core::config::TargetSelection; + use crate::utils::cache::Cache; + use crate::utils::helpers::get_host_target; + use crate::utils::tests::{ConfigBuilder, TestCtx}; + use crate::{Build, Compiler, Config, Flags, Subcommand}; + + #[test] + fn build_default() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustdoc 0 + "); + } + + #[test] + fn build_cross_compile() { + let ctx = TestCtx::new(); + + insta::assert_snapshot!( + ctx.config("build") + // Cross-compilation fails on stage 1, as we don't have a stage0 std available + // for non-host targets. + .stage(2) + .hosts(&[&host_target(), TEST_TRIPLE_1]) + .targets(&[&host_target(), TEST_TRIPLE_1]) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustc 2 -> std 2 + [build] rustc 1 -> std 1 + [build] rustc 2 -> std 2 + [build] rustdoc 1 + [build] llvm + [build] rustc 1 -> rustc 2 + [build] rustdoc 1 + "); + } + + #[test] + fn build_with_empty_host() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx + .config("build") + .hosts(&[]) + .targets(&[TEST_TRIPLE_1]) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + " + ); + } + + #[test] + fn build_compiler_no_explicit_stage() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("compiler") + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + "); + + insta::assert_snapshot!( + ctx.config("build") + .path("rustc") + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + "); + } + + #[test] + #[should_panic] + fn build_compiler_stage_0() { + let ctx = TestCtx::new(); + ctx.config("build").path("compiler").stage(0).run(); + } #[test] fn build_compiler_stage_1() { - let mut cache = run_build( - &["compiler".into()], - configure_with_args(&["build", "--stage", "1"], &[TEST_TRIPLE_1], &[TEST_TRIPLE_1]), - ); - let steps = cache.into_executed_steps(); - insta::assert_snapshot!(render_steps(&steps), @r" - [build] rustc 0 -> std 0 - [build] llvm - [build] rustc 0 -> rustc 1 - [build] rustc 0 -> rustc 1 + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("compiler") + .stage(1) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 "); } + + #[test] + fn build_compiler_stage_2() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("compiler") + .stage(2) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + "); + } + + #[test] + fn build_library_no_explicit_stage() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("library") + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + "); + } + + #[test] + #[should_panic] + fn build_library_stage_0() { + let ctx = TestCtx::new(); + ctx.config("build").path("library").stage(0).run(); + } + + #[test] + fn build_library_stage_1() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("library") + .stage(1) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + "); + } + + #[test] + fn build_library_stage_2() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("library") + .stage(2) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustc 2 -> std 2 + "); + } + + #[test] + fn build_miri_no_explicit_stage() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("miri") + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 0 -> miri 1 + "); + } + + #[test] + #[should_panic] + fn build_miri_stage_0() { + let ctx = TestCtx::new(); + ctx.config("build").path("miri").stage(0).run(); + } + + #[test] + fn build_miri_stage_1() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("miri") + .stage(1) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 0 -> miri 1 + "); + } + + #[test] + fn build_miri_stage_2() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("miri") + .stage(2) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustc 1 -> miri 2 + "); + } + + #[test] + fn build_bootstrap_tool_no_explicit_stage() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("opt-dist") + .render_steps(), @"[build] rustc 0 -> OptimizedDist "); + } + + #[test] + #[should_panic] + fn build_bootstrap_tool_stage_0() { + let ctx = TestCtx::new(); + ctx.config("build").path("opt-dist").stage(0).run(); + } + + #[test] + fn build_bootstrap_tool_stage_1() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("opt-dist") + .stage(1) + .render_steps(), @"[build] rustc 0 -> OptimizedDist "); + } + + #[test] + fn build_bootstrap_tool_stage_2() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .path("opt-dist") + .stage(2) + .render_steps(), @"[build] rustc 0 -> OptimizedDist "); + } + + #[test] + fn build_default_stage() { + let ctx = TestCtx::new(); + assert_eq!(ctx.config("build").path("compiler").create_config().stage, 1); + } + + /// Ensure that if someone passes both a single crate and `library`, all + /// library crates get built. + #[test] + fn alias_and_path_for_library() { + let ctx = TestCtx::new(); + insta::assert_snapshot!(ctx.config("build") + .paths(&["library", "core"]) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + "); + + insta::assert_snapshot!(ctx.config("build") + .paths(&["std"]) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + "); + + insta::assert_snapshot!(ctx.config("build") + .paths(&["core"]) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + "); + + insta::assert_snapshot!(ctx.config("build") + .paths(&["alloc"]) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + "); + + insta::assert_snapshot!(ctx.config("doc") + .paths(&["library", "core"]) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustdoc 0 + [doc] std 1 + "); + } + + #[test] + fn build_all() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx.config("build") + .stage(2) + .paths(&["compiler/rustc", "library"]) + .hosts(&[&host_target(), TEST_TRIPLE_1]) + .targets(&[&host_target(), TEST_TRIPLE_1, TEST_TRIPLE_2]) + .render_steps(), @r" + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] llvm + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustc 2 -> std 2 + [build] rustc 2 -> std 2 + [build] rustc 2 -> std 2 + "); + } + + #[test] + fn dist_default_stage() { + let ctx = TestCtx::new(); + assert_eq!(ctx.config("dist").path("compiler").create_config().stage, 2); + } + + #[test] + fn dist_baseline() { + let ctx = TestCtx::new(); + // Note that stdlib is uplifted, that is why `[dist] rustc 1 -> std ` is in + // the output. + insta::assert_snapshot!( + ctx + .config("dist") + .render_steps(), @r" + [build] rustc 0 -> UnstableBookGen + [build] rustc 0 -> Rustbook + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustdoc 1 + [doc] std 2 + [build] rustc 2 -> std 2 + [build] rustc 0 -> LintDocs + [build] rustc 0 -> RustInstaller + [dist] docs + [doc] std 2 + [dist] mingw + [build] rustc 0 -> GenerateCopyright + [dist] rustc + [dist] rustc 1 -> std + [dist] src <> + " + ); + } + + #[test] + fn dist_extended() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx + .config("dist") + .args(&["--set", "build.extended=true"]) + .render_steps(), @r" + [build] rustc 0 -> UnstableBookGen + [build] rustc 0 -> Rustbook + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 0 -> WasmComponentLd + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustc 1 -> WasmComponentLd + [build] rustdoc 1 + [doc] std 2 + [build] rustc 2 -> std 2 + [build] rustc 0 -> LintDocs + [build] rustc 0 -> RustInstaller + [dist] docs + [doc] std 2 + [dist] mingw + [build] rustc 0 -> GenerateCopyright + [dist] rustc + [dist] rustc 1 -> std + [dist] src <> + [build] rustc 0 -> rustfmt 1 + [build] rustc 0 -> cargo-fmt 1 + [build] rustc 0 -> clippy-driver 1 + [build] rustc 0 -> cargo-clippy 1 + [build] rustc 0 -> miri 1 + [build] rustc 0 -> cargo-miri 1 + "); + } + + #[test] + fn dist_with_targets() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx + .config("dist") + .hosts(&[&host_target()]) + .targets(&[&host_target(), TEST_TRIPLE_1]) + .render_steps(), @r" + [build] rustc 0 -> UnstableBookGen + [build] rustc 0 -> Rustbook + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustdoc 1 + [doc] std 2 + [doc] std 2 + [build] rustc 2 -> std 2 + [build] rustc 0 -> LintDocs + [build] rustc 0 -> RustInstaller + [dist] docs + [dist] docs + [doc] std 2 + [doc] std 2 + [dist] mingw + [dist] mingw + [build] rustc 0 -> GenerateCopyright + [dist] rustc + [dist] rustc 1 -> std + [build] rustc 2 -> std 2 + [dist] rustc 2 -> std + [dist] src <> + " + ); + } + + #[test] + fn dist_with_hosts() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx + .config("dist") + .hosts(&[&host_target(), TEST_TRIPLE_1]) + .targets(&[&host_target()]) + .render_steps(), @r" + [build] rustc 0 -> UnstableBookGen + [build] rustc 0 -> Rustbook + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustdoc 1 + [doc] std 2 + [build] rustc 2 -> std 2 + [build] rustc 0 -> LintDocs + [build] rustc 1 -> std 1 + [build] rustc 2 -> std 2 + [build] rustc 0 -> RustInstaller + [dist] docs + [doc] std 2 + [dist] mingw + [build] rustc 0 -> GenerateCopyright + [dist] rustc + [build] llvm + [build] rustc 1 -> rustc 2 + [build] rustdoc 1 + [dist] rustc + [dist] rustc 1 -> std + [dist] src <> + " + ); + } + + #[test] + fn dist_with_targets_and_hosts() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx + .config("dist") + .hosts(&[&host_target(), TEST_TRIPLE_1]) + .targets(&[&host_target(), TEST_TRIPLE_1]) + .render_steps(), @r" + [build] rustc 0 -> UnstableBookGen + [build] rustc 0 -> Rustbook + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustdoc 1 + [doc] std 2 + [doc] std 2 + [build] rustc 2 -> std 2 + [build] rustc 0 -> LintDocs + [build] rustc 1 -> std 1 + [build] rustc 2 -> std 2 + [build] rustc 0 -> RustInstaller + [dist] docs + [dist] docs + [doc] std 2 + [doc] std 2 + [dist] mingw + [dist] mingw + [build] rustc 0 -> GenerateCopyright + [dist] rustc + [build] llvm + [build] rustc 1 -> rustc 2 + [build] rustdoc 1 + [dist] rustc + [dist] rustc 1 -> std + [dist] rustc 1 -> std + [dist] src <> + " + ); + } + + #[test] + fn dist_with_empty_host() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx + .config("dist") + .hosts(&[]) + .targets(&[TEST_TRIPLE_1]) + .render_steps(), @r" + [build] rustc 0 -> UnstableBookGen + [build] rustc 0 -> Rustbook + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustdoc 1 + [doc] std 2 + [build] rustc 2 -> std 2 + [build] rustc 0 -> RustInstaller + [dist] docs + [doc] std 2 + [dist] mingw + [build] rustc 2 -> std 2 + [dist] rustc 2 -> std + "); + } + + /// This also serves as an important regression test for + /// and . + #[test] + fn dist_all_cross() { + let ctx = TestCtx::new(); + insta::assert_snapshot!( + ctx + .config("dist") + .hosts(&[TEST_TRIPLE_1]) + .targets(&[TEST_TRIPLE_1]) + .args(&["--set", "rust.channel=nightly", "--set", "build.extended=true"]) + .render_steps(), @r" + [build] rustc 0 -> UnstableBookGen + [build] rustc 0 -> Rustbook + [build] llvm + [build] rustc 0 -> rustc 1 + [build] rustc 0 -> WasmComponentLd + [build] rustc 1 -> std 1 + [build] rustc 1 -> rustc 2 + [build] rustc 1 -> WasmComponentLd + [build] rustdoc 1 + [doc] std 2 + [build] rustc 2 -> std 2 + [build] rustc 1 -> std 1 + [build] rustc 2 -> std 2 + [build] rustc 0 -> LintDocs + [build] rustc 0 -> RustInstaller + [dist] docs + [doc] std 2 + [dist] mingw + [build] llvm + [build] rustc 1 -> rustc 2 + [build] rustc 1 -> WasmComponentLd + [build] rustdoc 1 + [build] rustc 0 -> GenerateCopyright + [dist] rustc + [dist] rustc 1 -> std + [dist] src <> + [build] rustc 0 -> rustfmt 1 + [build] rustc 0 -> cargo-fmt 1 + [build] rustc 0 -> clippy-driver 1 + [build] rustc 0 -> cargo-clippy 1 + [build] rustc 0 -> miri 1 + [build] rustc 0 -> cargo-miri 1 + "); + } + + #[test] + fn test_exclude() { + let ctx = TestCtx::new(); + let steps = ctx.config("test").args(&["--skip", "src/tools/tidy"]).get_steps(); + + let host = TargetSelection::from_user(&host_target()); + steps.assert_contains(StepMetadata::test("RustdocUi", host)); + steps.assert_not_contains(test::Tidy); + } + + #[test] + fn test_exclude_kind() { + let ctx = TestCtx::new(); + let host = TargetSelection::from_user(&host_target()); + + let get_steps = |args: &[&str]| ctx.config("test").args(args).get_steps(); + + // Ensure our test is valid, and `test::Rustc` would be run without the exclude. + get_steps(&[]).assert_contains(StepMetadata::test("CrateLibrustc", host)); + + let steps = get_steps(&["--skip", "compiler/rustc_data_structures"]); + + // Ensure tests for rustc are not skipped. + steps.assert_contains(StepMetadata::test("CrateLibrustc", host)); + steps.assert_contains_fuzzy(StepMetadata::build("rustc", host)); + } +} + +struct ExecutedSteps { + steps: Vec, +} + +impl ExecutedSteps { + fn render(&self) -> String { + render_steps(&self.steps) + } + + #[track_caller] + fn assert_contains>(&self, metadata: M) { + let metadata = metadata.into(); + if !self.contains(&metadata) { + panic!( + "Metadata `{}` ({metadata:?}) not found in executed steps:\n{}", + render_metadata(&metadata), + self.render() + ); + } + } + + /// Try to match metadata by similarity, it does not need to match exactly. + /// Stages (and built_by compiler) do not need to match, but name, target and + /// kind has to match. + #[track_caller] + fn assert_contains_fuzzy>(&self, metadata: M) { + let metadata = metadata.into(); + if !self.contains_fuzzy(&metadata) { + panic!( + "Metadata `{}` ({metadata:?}) not found in executed steps:\n{}", + render_metadata(&metadata), + self.render() + ); + } + } + + #[track_caller] + fn assert_not_contains>(&self, metadata: M) { + let metadata = metadata.into(); + if self.contains(&metadata) { + panic!( + "Metadata `{}` ({metadata:?}) found in executed steps (it should not be there):\n{}", + render_metadata(&metadata), + self.render() + ); + } + } + + fn contains(&self, metadata: &StepMetadata) -> bool { + self.steps + .iter() + .filter_map(|s| s.metadata.as_ref()) + .any(|executed_metadata| executed_metadata == metadata) + } + + fn contains_fuzzy(&self, metadata: &StepMetadata) -> bool { + self.steps + .iter() + .filter_map(|s| s.metadata.as_ref()) + .any(|executed_metadata| fuzzy_metadata_eq(executed_metadata, metadata)) + } +} + +fn fuzzy_metadata_eq(executed: &StepMetadata, to_match: &StepMetadata) -> bool { + let StepMetadata { name, kind, target, built_by: _, stage: _ } = executed; + *name == to_match.name && *kind == to_match.kind && *target == to_match.target +} + +impl From for StepMetadata { + fn from(step: S) -> Self { + step.metadata().expect("step has no metadata") + } +} + +impl ConfigBuilder { + fn run(self) -> Cache { + let config = self.create_config(); + + let kind = config.cmd.kind(); + let build = Build::new(config); + let builder = Builder::new(&build); + builder.run_step_descriptions(&Builder::get_step_descriptions(kind), &builder.paths); + builder.cache + } + + fn get_steps(self) -> ExecutedSteps { + let cache = self.run(); + ExecutedSteps { steps: cache.into_executed_steps() } + } + + fn render_steps(self) -> String { + self.get_steps().render() + } } /// Renders the executed bootstrap steps for usage in snapshot tests with insta. /// Only renders certain important steps. /// Each value in `steps` should be a tuple of (Step, step output). +/// +/// The arrow in the rendered output (`X -> Y`) means `X builds Y`. +/// This is similar to the output printed by bootstrap to stdout, but here it is +/// generated purely for the purpose of tests. fn render_steps(steps: &[ExecutedStep]) -> String { steps .iter() @@ -1269,24 +1373,34 @@ fn render_steps(steps: &[ExecutedStep]) -> String { return None; }; - let mut record = format!("[{}] ", metadata.kind.as_str()); - if let Some(compiler) = metadata.built_by { - write!(record, "{} -> ", render_compiler(compiler)); - } - let stage = - if let Some(stage) = metadata.stage { format!("{stage} ") } else { "".to_string() }; - write!(record, "{} {stage}<{}>", metadata.name, metadata.target); - Some(record) - }) - .map(|line| { - line.replace(TEST_TRIPLE_1, "target1") - .replace(TEST_TRIPLE_2, "target2") - .replace(TEST_TRIPLE_3, "target3") + Some(render_metadata(&metadata)) }) .collect::>() .join("\n") } -fn render_compiler(compiler: Compiler) -> String { - format!("rustc {} <{}>", compiler.stage, compiler.host) +fn render_metadata(metadata: &StepMetadata) -> String { + let mut record = format!("[{}] ", metadata.kind.as_str()); + if let Some(compiler) = metadata.built_by { + write!(record, "{} -> ", render_compiler(compiler)); + } + let stage = if let Some(stage) = metadata.stage { format!("{stage} ") } else { "".to_string() }; + write!(record, "{} {stage}<{}>", metadata.name, normalize_target(metadata.target)); + record +} + +fn normalize_target(target: TargetSelection) -> String { + target + .to_string() + .replace(&host_target(), "host") + .replace(TEST_TRIPLE_1, "target1") + .replace(TEST_TRIPLE_2, "target2") +} + +fn render_compiler(compiler: Compiler) -> String { + format!("rustc {} <{}>", compiler.stage, normalize_target(compiler.host)) +} + +fn host_target() -> String { + get_host_target().to_string() } diff --git a/src/bootstrap/src/core/config/config.rs b/src/bootstrap/src/core/config/config.rs index ff0fda2d2e69..d1ffdf24acd0 100644 --- a/src/bootstrap/src/core/config/config.rs +++ b/src/bootstrap/src/core/config/config.rs @@ -47,9 +47,8 @@ use crate::core::config::{ }; use crate::core::download::is_download_ci_available; use crate::utils::channel; -use crate::utils::exec::command; -use crate::utils::execution_context::ExecutionContext; -use crate::utils::helpers::exe; +use crate::utils::exec::{ExecutionContext, command}; +use crate::utils::helpers::{exe, get_host_target}; use crate::{GitInfo, OnceLock, TargetSelection, check_ci_llvm, helpers, t}; /// Each path in this list is considered "allowed" in the `download-rustc="if-unchanged"` logic. @@ -349,7 +348,7 @@ impl Config { stderr_is_tty: std::io::stderr().is_terminal(), // set by build.rs - host_target: TargetSelection::from_user(env!("BUILD_TRIPLE")), + host_target: get_host_target(), src: { let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); @@ -809,7 +808,7 @@ impl Config { config.initial_sysroot = t!(PathBuf::from_str( command(&config.initial_rustc) .args(["--print", "sysroot"]) - .run_always() + .run_in_dry_run() .run_capture_stdout(&config) .stdout() .trim() @@ -1023,7 +1022,7 @@ impl Config { || install_stage.is_some() || check_stage.is_some() || bench_stage.is_some(); - // See https://github.com/rust-lang/compiler-team/issues/326 + config.stage = match config.cmd { Subcommand::Check { .. } => flags_stage.or(check_stage).unwrap_or(0), Subcommand::Clippy { .. } | Subcommand::Fix => flags_stage.or(check_stage).unwrap_or(1), @@ -1051,6 +1050,12 @@ impl Config { | Subcommand::Vendor { .. } => flags_stage.unwrap_or(0), }; + // Now check that the selected stage makes sense, and if not, print a warning and end + if let (0, Subcommand::Build) = (config.stage, &config.cmd) { + eprintln!("WARNING: cannot build anything on stage 0. Use at least stage 1."); + exit!(1); + } + // CI should always run stage 2 builds, unless it specifically states otherwise #[cfg(not(test))] if flags_stage.is_none() && config.is_running_on_ci { @@ -1385,11 +1390,11 @@ impl Config { // all the git commands below are actually executed, because some follow-up code // in bootstrap might depend on the submodules being checked out. Furthermore, not all // the command executions below work with an empty output (produced during dry run). - // Therefore, all commands below are marked with `run_always()`, so that they also run in + // Therefore, all commands below are marked with `run_in_dry_run()`, so that they also run in // dry run mode. let submodule_git = || { let mut cmd = helpers::git(Some(&absolute_path)); - cmd.run_always(); + cmd.run_in_dry_run(); cmd }; @@ -1399,7 +1404,7 @@ impl Config { let checked_out_hash = checked_out_hash.trim_end(); // Determine commit that the submodule *should* have. let recorded = helpers::git(Some(&self.src)) - .run_always() + .run_in_dry_run() .args(["ls-tree", "HEAD"]) .arg(relative_path) .run_capture_stdout(self) @@ -1419,7 +1424,7 @@ impl Config { helpers::git(Some(&self.src)) .allow_failure() - .run_always() + .run_in_dry_run() .args(["submodule", "-q", "sync"]) .arg(relative_path) .run(self); @@ -1430,12 +1435,12 @@ impl Config { // even though that has no relation to the upstream for the submodule. let current_branch = helpers::git(Some(&self.src)) .allow_failure() - .run_always() + .run_in_dry_run() .args(["symbolic-ref", "--short", "HEAD"]) .run_capture(self); let mut git = helpers::git(Some(&self.src)).allow_failure(); - git.run_always(); + git.run_in_dry_run(); if current_branch.is_success() { // If there is a tag named after the current branch, git will try to disambiguate by prepending `heads/` to the branch name. // This syntax isn't accepted by `branch.{branch}`. Strip it. diff --git a/src/bootstrap/src/core/metadata.rs b/src/bootstrap/src/core/metadata.rs index 2706aba5ffc8..c79fbbeb55cc 100644 --- a/src/bootstrap/src/core/metadata.rs +++ b/src/bootstrap/src/core/metadata.rs @@ -88,7 +88,7 @@ fn workspace_members(build: &Build) -> Vec { .arg("--no-deps") .arg("--manifest-path") .arg(build.src.join(manifest_path)); - let metadata_output = cargo.run_always().run_capture_stdout(build).stdout(); + let metadata_output = cargo.run_in_dry_run().run_capture_stdout(build).stdout(); let Output { packages, .. } = t!(serde_json::from_str(&metadata_output)); packages }; diff --git a/src/bootstrap/src/core/sanity.rs b/src/bootstrap/src/core/sanity.rs index 493f73b21fe1..958058d982ba 100644 --- a/src/bootstrap/src/core/sanity.rs +++ b/src/bootstrap/src/core/sanity.rs @@ -202,7 +202,7 @@ than building it. let stage0_supported_target_list: HashSet = command(&build.config.initial_rustc) .args(["--print", "target-list"]) - .run_always() + .run_in_dry_run() .run_capture_stdout(&build) .stdout() .lines() @@ -366,7 +366,7 @@ than building it. // Cygwin. The Cygwin build does not have generators for Visual // Studio, so detect that here and error. let out = - command("cmake").arg("--help").run_always().run_capture_stdout(&build).stdout(); + command("cmake").arg("--help").run_in_dry_run().run_capture_stdout(&build).stdout(); if !out.contains("Visual Studio") { panic!( " diff --git a/src/bootstrap/src/lib.rs b/src/bootstrap/src/lib.rs index f44fe4548a1d..ef5c28272b8e 100644 --- a/src/bootstrap/src/lib.rs +++ b/src/bootstrap/src/lib.rs @@ -31,12 +31,12 @@ use cc::Tool; use termcolor::{ColorChoice, StandardStream, WriteColor}; use utils::build_stamp::BuildStamp; use utils::channel::GitInfo; -use utils::execution_context::ExecutionContext; +use utils::exec::ExecutionContext; use crate::core::builder; use crate::core::builder::Kind; use crate::core::config::{DryRun, LldMode, LlvmLibunwind, TargetSelection, flags}; -use crate::utils::exec::{BehaviorOnFailure, BootstrapCommand, CommandOutput, OutputMode, command}; +use crate::utils::exec::{BootstrapCommand, command}; use crate::utils::helpers::{ self, dir_is_empty, exe, libdir, set_file_times, split_debuginfo, symlink_dir, }; @@ -383,7 +383,7 @@ impl Build { let in_tree_gcc_info = config.in_tree_gcc_info.clone(); let initial_target_libdir = command(&config.initial_rustc) - .run_always() + .run_in_dry_run() .args(["--print", "target-libdir"]) .run_capture_stdout(&config) .stdout() @@ -490,7 +490,7 @@ impl Build { // If local-rust is the same major.minor as the current version, then force a // local-rebuild let local_version_verbose = command(&build.initial_rustc) - .run_always() + .run_in_dry_run() .args(["--version", "--verbose"]) .run_capture_stdout(&build) .stdout(); @@ -518,7 +518,7 @@ impl Build { // Make sure we update these before gathering metadata so we don't get an error about missing // Cargo.toml files. - let rust_submodules = ["library/backtrace", "library/stdarch"]; + let rust_submodules = ["library/backtrace"]; for s in rust_submodules { build.require_submodule( s, @@ -949,7 +949,7 @@ impl Build { static SYSROOT_CACHE: OnceLock = OnceLock::new(); SYSROOT_CACHE.get_or_init(|| { command(&self.initial_rustc) - .run_always() + .run_in_dry_run() .args(["--print", "sysroot"]) .run_capture_stdout(self) .stdout() @@ -1512,7 +1512,7 @@ impl Build { "refs/remotes/origin/{}..HEAD", self.config.stage0_metadata.config.nightly_branch )) - .run_always() + .run_in_dry_run() .run_capture(self) .stdout() }); diff --git a/src/bootstrap/src/utils/change_tracker.rs b/src/bootstrap/src/utils/change_tracker.rs index 93e01a58077e..7c588cfea8c2 100644 --- a/src/bootstrap/src/utils/change_tracker.rs +++ b/src/bootstrap/src/utils/change_tracker.rs @@ -426,4 +426,9 @@ pub const CONFIG_CHANGE_HISTORY: &[ChangeInfo] = &[ severity: ChangeSeverity::Info, summary: "Added new option `tool.TOOL_NAME.features` to specify the features to compile a tool with", }, + ChangeInfo { + change_id: 142581, + severity: ChangeSeverity::Warning, + summary: "It is no longer possible to `x build` with stage 0. All build commands have to be on stage 1+.", + }, ]; diff --git a/src/bootstrap/src/utils/channel.rs b/src/bootstrap/src/utils/channel.rs index b28ab5737740..21b4257e54d0 100644 --- a/src/bootstrap/src/utils/channel.rs +++ b/src/bootstrap/src/utils/channel.rs @@ -8,7 +8,7 @@ use std::fs; use std::path::Path; -use super::execution_context::ExecutionContext; +use super::exec::ExecutionContext; use super::helpers; use crate::Build; use crate::utils::helpers::t; @@ -66,19 +66,22 @@ impl GitInfo { .arg("-1") .arg("--date=short") .arg("--pretty=format:%cd") - .run_always() + .run_in_dry_run() .start_capture_stdout(&exec_ctx); let mut git_hash_cmd = helpers::git(Some(dir)); - let ver_hash = - git_hash_cmd.arg("rev-parse").arg("HEAD").run_always().start_capture_stdout(&exec_ctx); + let ver_hash = git_hash_cmd + .arg("rev-parse") + .arg("HEAD") + .run_in_dry_run() + .start_capture_stdout(&exec_ctx); let mut git_short_hash_cmd = helpers::git(Some(dir)); let short_ver_hash = git_short_hash_cmd .arg("rev-parse") .arg("--short=9") .arg("HEAD") - .run_always() + .run_in_dry_run() .start_capture_stdout(&exec_ctx); GitInfo::Present(Some(Info { diff --git a/src/bootstrap/src/utils/exec.rs b/src/bootstrap/src/utils/exec.rs index eb9802bf2e1b..d092765ef762 100644 --- a/src/bootstrap/src/utils/exec.rs +++ b/src/bootstrap/src/utils/exec.rs @@ -1,16 +1,29 @@ //! Command Execution Module //! -//! This module provides a structured way to execute and manage commands efficiently, -//! ensuring controlled failure handling and output management. -use std::ffi::OsStr; +//! Provides a structured interface for executing and managing commands during bootstrap, +//! with support for controlled failure handling and output management. +//! +//! This module defines the [`ExecutionContext`] type, which encapsulates global configuration +//! relevant to command execution in the bootstrap process. This includes settings such as +//! dry-run mode, verbosity level, and failure behavior. + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; use std::fmt::{Debug, Formatter}; +use std::hash::Hash; +use std::panic::Location; use std::path::Path; -use std::process::{Command, CommandArgs, CommandEnvs, ExitStatus, Output, Stdio}; +use std::process::{Child, Command, CommandArgs, CommandEnvs, ExitStatus, Output, Stdio}; +use std::sync::{Arc, Mutex}; use build_helper::ci::CiEnv; use build_helper::drop_bomb::DropBomb; +use build_helper::exit; -use super::execution_context::{DeferredCommand, ExecutionContext}; +use crate::PathBuf; +use crate::core::config::DryRun; +#[cfg(feature = "tracing")] +use crate::trace_cmd; /// What should be done when the command fails. #[derive(Debug, Copy, Clone)] @@ -49,6 +62,14 @@ impl OutputMode { } } +#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] +pub struct CommandCacheKey { + program: OsString, + args: Vec, + envs: Vec<(OsString, Option)>, + cwd: Option, +} + /// Wrapper around `std::process::Command`. /// /// By default, the command will exit bootstrap if it fails. @@ -60,16 +81,20 @@ impl OutputMode { /// /// Bootstrap will print a debug log to stdout if the command fails and failure is not allowed. /// +/// By default, command executions are cached based on their workdir, program, arguments, and environment variables. +/// This avoids re-running identical commands unnecessarily, unless caching is explicitly disabled. +/// /// [allow_failure]: BootstrapCommand::allow_failure /// [delay_failure]: BootstrapCommand::delay_failure pub struct BootstrapCommand { command: Command, pub failure_behavior: BehaviorOnFailure, // Run the command even during dry run - pub run_always: bool, + pub run_in_dry_run: bool, // This field makes sure that each command is executed (or disarmed) before it is dropped, // to avoid forgetting to execute a command. drop_bomb: DropBomb, + should_cache: bool, } impl<'a> BootstrapCommand { @@ -77,12 +102,16 @@ impl<'a> BootstrapCommand { pub fn new>(program: S) -> Self { Command::new(program).into() } - pub fn arg>(&mut self, arg: S) -> &mut Self { self.command.arg(arg.as_ref()); self } + pub fn do_not_cache(&mut self) -> &mut Self { + self.should_cache = false; + self + } + pub fn args(&mut self, args: I) -> &mut Self where I: IntoIterator, @@ -119,6 +148,11 @@ impl<'a> BootstrapCommand { self } + pub fn stdin(&mut self, stdin: std::process::Stdio) -> &mut Self { + self.command.stdin(stdin); + self + } + #[must_use] pub fn delay_failure(self) -> Self { Self { failure_behavior: BehaviorOnFailure::DelayFail, ..self } @@ -133,8 +167,8 @@ impl<'a> BootstrapCommand { Self { failure_behavior: BehaviorOnFailure::Ignore, ..self } } - pub fn run_always(&mut self) -> &mut Self { - self.run_always = true; + pub fn run_in_dry_run(&mut self) -> &mut Self { + self.run_in_dry_run = true; self } @@ -178,9 +212,11 @@ impl<'a> BootstrapCommand { /// Provides access to the stdlib Command inside. /// FIXME: This function should be eventually removed from bootstrap. pub fn as_command_mut(&mut self) -> &mut Command { - // We don't know what will happen with the returned command, so we need to mark this - // command as executed proactively. + // We proactively mark this command as executed since we can't be certain how the returned + // command will be handled. Caching must also be avoided here, as the inner command could be + // modified externally without us being aware. self.mark_as_executed(); + self.do_not_cache(); &mut self.command } @@ -206,6 +242,22 @@ impl<'a> BootstrapCommand { self.env("TERM", "xterm").args(["--color", "always"]); } } + + pub fn cache_key(&self) -> Option { + if !self.should_cache { + return None; + } + let command = &self.command; + Some(CommandCacheKey { + program: command.get_program().into(), + args: command.get_args().map(OsStr::to_os_string).collect(), + envs: command + .get_envs() + .map(|(k, v)| (k.to_os_string(), v.map(|val| val.to_os_string()))) + .collect(), + cwd: command.get_current_dir().map(Path::to_path_buf), + }) + } } impl Debug for BootstrapCommand { @@ -219,17 +271,18 @@ impl From for BootstrapCommand { #[track_caller] fn from(command: Command) -> Self { let program = command.get_program().to_owned(); - Self { + should_cache: true, command, failure_behavior: BehaviorOnFailure::Exit, - run_always: false, + run_in_dry_run: false, drop_bomb: DropBomb::arm(program), } } } /// Represents the current status of `BootstrapCommand`. +#[derive(Clone, PartialEq)] enum CommandStatus { /// The command has started and finished with some status. Finished(ExitStatus), @@ -246,6 +299,7 @@ pub fn command>(program: S) -> BootstrapCommand { } /// Represents the output of an executed process. +#[derive(Clone, PartialEq)] pub struct CommandOutput { status: CommandStatus, stdout: Option>, @@ -368,3 +422,329 @@ impl FormatShortCmd for Command { line.join(" ") } } + +#[derive(Clone, Default)] +pub struct ExecutionContext { + dry_run: DryRun, + verbose: u8, + pub fail_fast: bool, + delayed_failures: Arc>>, + command_cache: Arc, +} + +#[derive(Default)] +pub struct CommandCache { + cache: Mutex>, +} + +enum CommandState<'a> { + Cached(CommandOutput), + Deferred { + process: Option>, + command: &'a mut BootstrapCommand, + stdout: OutputMode, + stderr: OutputMode, + executed_at: &'a Location<'a>, + cache_key: Option, + }, +} + +#[must_use] +pub struct DeferredCommand<'a> { + state: CommandState<'a>, +} + +impl CommandCache { + pub fn get(&self, key: &CommandCacheKey) -> Option { + self.cache.lock().unwrap().get(key).cloned() + } + + pub fn insert(&self, key: CommandCacheKey, output: CommandOutput) { + self.cache.lock().unwrap().insert(key, output); + } +} + +impl ExecutionContext { + pub fn new() -> Self { + ExecutionContext::default() + } + + pub fn dry_run(&self) -> bool { + match self.dry_run { + DryRun::Disabled => false, + DryRun::SelfCheck | DryRun::UserSelected => true, + } + } + + pub fn get_dry_run(&self) -> &DryRun { + &self.dry_run + } + + pub fn verbose(&self, f: impl Fn()) { + if self.is_verbose() { + f() + } + } + + pub fn is_verbose(&self) -> bool { + self.verbose > 0 + } + + pub fn fail_fast(&self) -> bool { + self.fail_fast + } + + pub fn set_dry_run(&mut self, value: DryRun) { + self.dry_run = value; + } + + pub fn set_verbose(&mut self, value: u8) { + self.verbose = value; + } + + pub fn set_fail_fast(&mut self, value: bool) { + self.fail_fast = value; + } + + pub fn add_to_delay_failure(&self, message: String) { + self.delayed_failures.lock().unwrap().push(message); + } + + pub fn report_failures_and_exit(&self) { + let failures = self.delayed_failures.lock().unwrap(); + if failures.is_empty() { + return; + } + eprintln!("\n{} command(s) did not execute successfully:\n", failures.len()); + for failure in &*failures { + eprintln!(" - {failure}"); + } + exit!(1); + } + + /// Execute a command and return its output. + /// Note: Ideally, you should use one of the BootstrapCommand::run* functions to + /// execute commands. They internally call this method. + #[track_caller] + pub fn start<'a>( + &self, + command: &'a mut BootstrapCommand, + stdout: OutputMode, + stderr: OutputMode, + ) -> DeferredCommand<'a> { + let cache_key = command.cache_key(); + + if let Some(cached_output) = cache_key.as_ref().and_then(|key| self.command_cache.get(key)) + { + command.mark_as_executed(); + self.verbose(|| println!("Cache hit: {command:?}")); + return DeferredCommand { state: CommandState::Cached(cached_output) }; + } + + let created_at = command.get_created_location(); + let executed_at = std::panic::Location::caller(); + + if self.dry_run() && !command.run_in_dry_run { + return DeferredCommand { + state: CommandState::Deferred { + process: None, + command, + stdout, + stderr, + executed_at, + cache_key, + }, + }; + } + + #[cfg(feature = "tracing")] + let _run_span = trace_cmd!(command); + + self.verbose(|| { + println!("running: {command:?} (created at {created_at}, executed at {executed_at})") + }); + + let cmd = &mut command.command; + cmd.stdout(stdout.stdio()); + cmd.stderr(stderr.stdio()); + + let child = cmd.spawn(); + + DeferredCommand { + state: CommandState::Deferred { + process: Some(child), + command, + stdout, + stderr, + executed_at, + cache_key, + }, + } + } + + /// Execute a command and return its output. + /// Note: Ideally, you should use one of the BootstrapCommand::run* functions to + /// execute commands. They internally call this method. + #[track_caller] + pub fn run( + &self, + command: &mut BootstrapCommand, + stdout: OutputMode, + stderr: OutputMode, + ) -> CommandOutput { + self.start(command, stdout, stderr).wait_for_output(self) + } + + fn fail(&self, message: &str, output: CommandOutput) -> ! { + if self.is_verbose() { + println!("{message}"); + } else { + let (stdout, stderr) = (output.stdout_if_present(), output.stderr_if_present()); + // If the command captures output, the user would not see any indication that + // it has failed. In this case, print a more verbose error, since to provide more + // context. + if stdout.is_some() || stderr.is_some() { + if let Some(stdout) = output.stdout_if_present().take_if(|s| !s.trim().is_empty()) { + println!("STDOUT:\n{stdout}\n"); + } + if let Some(stderr) = output.stderr_if_present().take_if(|s| !s.trim().is_empty()) { + println!("STDERR:\n{stderr}\n"); + } + println!("Command has failed. Rerun with -v to see more details."); + } else { + println!("Command has failed. Rerun with -v to see more details."); + } + } + exit!(1); + } +} + +impl AsRef for ExecutionContext { + fn as_ref(&self) -> &ExecutionContext { + self + } +} + +impl<'a> DeferredCommand<'a> { + pub fn wait_for_output(self, exec_ctx: impl AsRef) -> CommandOutput { + match self.state { + CommandState::Cached(output) => output, + CommandState::Deferred { process, command, stdout, stderr, executed_at, cache_key } => { + let exec_ctx = exec_ctx.as_ref(); + + let output = + Self::finish_process(process, command, stdout, stderr, executed_at, exec_ctx); + + if (!exec_ctx.dry_run() || command.run_in_dry_run) + && let (Some(cache_key), Some(_)) = (&cache_key, output.status()) + { + exec_ctx.command_cache.insert(cache_key.clone(), output.clone()); + } + + output + } + } + } + + pub fn finish_process( + mut process: Option>, + command: &mut BootstrapCommand, + stdout: OutputMode, + stderr: OutputMode, + executed_at: &'a std::panic::Location<'a>, + exec_ctx: &ExecutionContext, + ) -> CommandOutput { + command.mark_as_executed(); + + let process = match process.take() { + Some(p) => p, + None => return CommandOutput::default(), + }; + + let created_at = command.get_created_location(); + + let mut message = String::new(); + + let output = match process { + Ok(child) => match child.wait_with_output() { + Ok(result) if result.status.success() => { + // Successful execution + CommandOutput::from_output(result, stdout, stderr) + } + Ok(result) => { + // Command ran but failed + use std::fmt::Write; + + writeln!( + message, + r#" +Command {command:?} did not execute successfully. +Expected success, got {} +Created at: {created_at} +Executed at: {executed_at}"#, + result.status, + ) + .unwrap(); + + let output = CommandOutput::from_output(result, stdout, stderr); + + if stdout.captures() { + writeln!(message, "\nSTDOUT ----\n{}", output.stdout().trim()).unwrap(); + } + if stderr.captures() { + writeln!(message, "\nSTDERR ----\n{}", output.stderr().trim()).unwrap(); + } + + output + } + Err(e) => { + // Failed to wait for output + use std::fmt::Write; + + writeln!( + message, + "\n\nCommand {command:?} did not execute successfully.\ + \nIt was not possible to execute the command: {e:?}" + ) + .unwrap(); + + CommandOutput::did_not_start(stdout, stderr) + } + }, + Err(e) => { + // Failed to spawn the command + use std::fmt::Write; + + writeln!( + message, + "\n\nCommand {command:?} did not execute successfully.\ + \nIt was not possible to execute the command: {e:?}" + ) + .unwrap(); + + CommandOutput::did_not_start(stdout, stderr) + } + }; + + if !output.is_success() { + match command.failure_behavior { + BehaviorOnFailure::DelayFail => { + if exec_ctx.fail_fast { + exec_ctx.fail(&message, output); + } + exec_ctx.add_to_delay_failure(message); + } + BehaviorOnFailure::Exit => { + exec_ctx.fail(&message, output); + } + BehaviorOnFailure::Ignore => { + // If failures are allowed, either the error has been printed already + // (OutputMode::Print) or the user used a capture output mode and wants to + // handle the error output on their own. + } + } + } + + output + } +} diff --git a/src/bootstrap/src/utils/execution_context.rs b/src/bootstrap/src/utils/execution_context.rs deleted file mode 100644 index 5b9fef3f8248..000000000000 --- a/src/bootstrap/src/utils/execution_context.rs +++ /dev/null @@ -1,265 +0,0 @@ -//! Shared execution context for running bootstrap commands. -//! -//! This module provides the [`ExecutionContext`] type, which holds global configuration -//! relevant during the execution of commands in bootstrap. This includes dry-run -//! mode, verbosity level, and behavior on failure. -use std::panic::Location; -use std::process::Child; -use std::sync::{Arc, Mutex}; - -use crate::core::config::DryRun; -#[cfg(feature = "tracing")] -use crate::trace_cmd; -use crate::{BehaviorOnFailure, BootstrapCommand, CommandOutput, OutputMode, exit}; - -#[derive(Clone, Default)] -pub struct ExecutionContext { - dry_run: DryRun, - verbose: u8, - pub fail_fast: bool, - delayed_failures: Arc>>, -} - -impl ExecutionContext { - pub fn new() -> Self { - ExecutionContext::default() - } - - pub fn dry_run(&self) -> bool { - match self.dry_run { - DryRun::Disabled => false, - DryRun::SelfCheck | DryRun::UserSelected => true, - } - } - - pub fn get_dry_run(&self) -> &DryRun { - &self.dry_run - } - - pub fn verbose(&self, f: impl Fn()) { - if self.is_verbose() { - f() - } - } - - pub fn is_verbose(&self) -> bool { - self.verbose > 0 - } - - pub fn fail_fast(&self) -> bool { - self.fail_fast - } - - pub fn set_dry_run(&mut self, value: DryRun) { - self.dry_run = value; - } - - pub fn set_verbose(&mut self, value: u8) { - self.verbose = value; - } - - pub fn set_fail_fast(&mut self, value: bool) { - self.fail_fast = value; - } - - pub fn add_to_delay_failure(&self, message: String) { - self.delayed_failures.lock().unwrap().push(message); - } - - pub fn report_failures_and_exit(&self) { - let failures = self.delayed_failures.lock().unwrap(); - if failures.is_empty() { - return; - } - eprintln!("\n{} command(s) did not execute successfully:\n", failures.len()); - for failure in &*failures { - eprintln!(" - {failure}"); - } - exit!(1); - } - - /// Execute a command and return its output. - /// Note: Ideally, you should use one of the BootstrapCommand::run* functions to - /// execute commands. They internally call this method. - #[track_caller] - pub fn start<'a>( - &self, - command: &'a mut BootstrapCommand, - stdout: OutputMode, - stderr: OutputMode, - ) -> DeferredCommand<'a> { - command.mark_as_executed(); - - let created_at = command.get_created_location(); - let executed_at = std::panic::Location::caller(); - - if self.dry_run() && !command.run_always { - return DeferredCommand { process: None, stdout, stderr, command, executed_at }; - } - - #[cfg(feature = "tracing")] - let _run_span = trace_cmd!(command); - - self.verbose(|| { - println!("running: {command:?} (created at {created_at}, executed at {executed_at})") - }); - - let cmd = command.as_command_mut(); - cmd.stdout(stdout.stdio()); - cmd.stderr(stderr.stdio()); - - let child = cmd.spawn(); - - DeferredCommand { process: Some(child), stdout, stderr, command, executed_at } - } - - /// Execute a command and return its output. - /// Note: Ideally, you should use one of the BootstrapCommand::run* functions to - /// execute commands. They internally call this method. - #[track_caller] - pub fn run( - &self, - command: &mut BootstrapCommand, - stdout: OutputMode, - stderr: OutputMode, - ) -> CommandOutput { - self.start(command, stdout, stderr).wait_for_output(self) - } - - fn fail(&self, message: &str, output: CommandOutput) -> ! { - if self.is_verbose() { - println!("{message}"); - } else { - let (stdout, stderr) = (output.stdout_if_present(), output.stderr_if_present()); - // If the command captures output, the user would not see any indication that - // it has failed. In this case, print a more verbose error, since to provide more - // context. - if stdout.is_some() || stderr.is_some() { - if let Some(stdout) = output.stdout_if_present().take_if(|s| !s.trim().is_empty()) { - println!("STDOUT:\n{stdout}\n"); - } - if let Some(stderr) = output.stderr_if_present().take_if(|s| !s.trim().is_empty()) { - println!("STDERR:\n{stderr}\n"); - } - println!("Command has failed. Rerun with -v to see more details."); - } else { - println!("Command has failed. Rerun with -v to see more details."); - } - } - exit!(1); - } -} - -impl AsRef for ExecutionContext { - fn as_ref(&self) -> &ExecutionContext { - self - } -} - -pub struct DeferredCommand<'a> { - process: Option>, - command: &'a mut BootstrapCommand, - stdout: OutputMode, - stderr: OutputMode, - executed_at: &'a Location<'a>, -} - -impl<'a> DeferredCommand<'a> { - pub fn wait_for_output(mut self, exec_ctx: impl AsRef) -> CommandOutput { - let exec_ctx = exec_ctx.as_ref(); - - let process = match self.process.take() { - Some(p) => p, - None => return CommandOutput::default(), - }; - - let created_at = self.command.get_created_location(); - let executed_at = self.executed_at; - - let mut message = String::new(); - - let output = match process { - Ok(child) => match child.wait_with_output() { - Ok(result) if result.status.success() => { - // Successful execution - CommandOutput::from_output(result, self.stdout, self.stderr) - } - Ok(result) => { - // Command ran but failed - use std::fmt::Write; - - writeln!( - message, - r#" -Command {:?} did not execute successfully. -Expected success, got {} -Created at: {created_at} -Executed at: {executed_at}"#, - self.command, result.status, - ) - .unwrap(); - - let output = CommandOutput::from_output(result, self.stdout, self.stderr); - - if self.stdout.captures() { - writeln!(message, "\nSTDOUT ----\n{}", output.stdout().trim()).unwrap(); - } - if self.stderr.captures() { - writeln!(message, "\nSTDERR ----\n{}", output.stderr().trim()).unwrap(); - } - - output - } - Err(e) => { - // Failed to wait for output - use std::fmt::Write; - - writeln!( - message, - "\n\nCommand {:?} did not execute successfully.\ - \nIt was not possible to execute the command: {e:?}", - self.command - ) - .unwrap(); - - CommandOutput::did_not_start(self.stdout, self.stderr) - } - }, - Err(e) => { - // Failed to spawn the command - use std::fmt::Write; - - writeln!( - message, - "\n\nCommand {:?} did not execute successfully.\ - \nIt was not possible to execute the command: {e:?}", - self.command - ) - .unwrap(); - - CommandOutput::did_not_start(self.stdout, self.stderr) - } - }; - - if !output.is_success() { - match self.command.failure_behavior { - BehaviorOnFailure::DelayFail => { - if exec_ctx.fail_fast { - exec_ctx.fail(&message, output); - } - exec_ctx.add_to_delay_failure(message); - } - BehaviorOnFailure::Exit => { - exec_ctx.fail(&message, output); - } - BehaviorOnFailure::Ignore => { - // If failures are allowed, either the error has been printed already - // (OutputMode::Print) or the user used a capture output mode and wants to - // handle the error output on their own. - } - } - } - - output - } -} diff --git a/src/bootstrap/src/utils/helpers.rs b/src/bootstrap/src/utils/helpers.rs index 2f18fb603182..3c5f612daa7d 100644 --- a/src/bootstrap/src/utils/helpers.rs +++ b/src/bootstrap/src/utils/helpers.rs @@ -178,6 +178,11 @@ pub fn symlink_dir(config: &Config, original: &Path, link: &Path) -> io::Result< } } +/// Return the host target on which we are currently running. +pub fn get_host_target() -> TargetSelection { + TargetSelection::from_user(env!("BUILD_TRIPLE")) +} + /// Rename a file if from and to are in the same filesystem or /// copy and remove the file otherwise pub fn move_file, Q: AsRef>(from: P, to: Q) -> io::Result<()> { diff --git a/src/bootstrap/src/utils/mod.rs b/src/bootstrap/src/utils/mod.rs index 5a0b90801e73..169fcec303e9 100644 --- a/src/bootstrap/src/utils/mod.rs +++ b/src/bootstrap/src/utils/mod.rs @@ -8,7 +8,6 @@ pub(crate) mod cc_detect; pub(crate) mod change_tracker; pub(crate) mod channel; pub(crate) mod exec; -pub(crate) mod execution_context; pub(crate) mod helpers; pub(crate) mod job; pub(crate) mod render_tests; diff --git a/src/bootstrap/src/utils/render_tests.rs b/src/bootstrap/src/utils/render_tests.rs index 77e645a9e3cb..051d7dd9fd4d 100644 --- a/src/bootstrap/src/utils/render_tests.rs +++ b/src/bootstrap/src/utils/render_tests.rs @@ -202,7 +202,9 @@ impl<'a> Renderer<'a> { } fn render_test_outcome_terse(&mut self, outcome: Outcome<'_>, test: &TestOutcome) { - if self.terse_tests_in_line != 0 && self.terse_tests_in_line % TERSE_TESTS_PER_LINE == 0 { + if self.terse_tests_in_line != 0 + && self.terse_tests_in_line.is_multiple_of(TERSE_TESTS_PER_LINE) + { if let Some(total) = self.tests_count { let total = total.to_string(); let executed = format!("{:>width$}", self.executed_tests - 1, width = total.len()); diff --git a/src/bootstrap/src/utils/tests/mod.rs b/src/bootstrap/src/utils/tests/mod.rs index 73c500f6e369..b8984d1f3aaf 100644 --- a/src/bootstrap/src/utils/tests/mod.rs +++ b/src/bootstrap/src/utils/tests/mod.rs @@ -1,3 +1,100 @@ //! This module contains shared utilities for bootstrap tests. +use std::path::{Path, PathBuf}; +use std::thread; + +use tempfile::TempDir; + +use crate::core::builder::Builder; +use crate::core::config::DryRun; +use crate::{Build, Config, Flags, t}; + pub mod git; + +/// Holds temporary state of a bootstrap test. +/// Right now it is only used to redirect the build directory of the bootstrap +/// invocation, in the future it would be great if we could actually execute +/// the whole test with this directory set as the workdir. +pub struct TestCtx { + directory: TempDir, +} + +impl TestCtx { + pub fn new() -> Self { + let directory = TempDir::new().expect("cannot create temporary directory"); + eprintln!("Running test in {}", directory.path().display()); + Self { directory } + } + + /// Starts a new invocation of bootstrap that executes `kind` as its top level command + /// (i.e. `x `). Returns a builder that configures the created config through CLI flags. + pub fn config(&self, kind: &str) -> ConfigBuilder { + ConfigBuilder::from_args(&[kind], self.directory.path().to_owned()) + } +} + +/// Used to configure an invocation of bootstrap. +/// Currently runs in the rustc checkout, long-term it should be switched +/// to run in a (cache-primed) temporary directory instead. +pub struct ConfigBuilder { + args: Vec, + directory: PathBuf, +} + +impl ConfigBuilder { + fn from_args(args: &[&str], directory: PathBuf) -> Self { + Self { args: args.iter().copied().map(String::from).collect(), directory } + } + + pub fn path(mut self, path: &str) -> Self { + self.args.push(path.to_string()); + self + } + + pub fn paths(mut self, paths: &[&str]) -> Self { + for path in paths { + self = self.path(path); + } + self + } + + pub fn hosts(mut self, targets: &[&str]) -> Self { + self.args.push("--host".to_string()); + self.args.push(targets.join(",")); + self + } + + pub fn targets(mut self, targets: &[&str]) -> Self { + self.args.push("--target".to_string()); + self.args.push(targets.join(",")); + self + } + + pub fn stage(mut self, stage: u32) -> Self { + self.args.push("--stage".to_string()); + self.args.push(stage.to_string()); + self + } + + pub fn args(mut self, args: &[&str]) -> Self { + for arg in args { + self.args.push(arg.to_string()); + } + self + } + + pub fn create_config(mut self) -> Config { + // Run in dry-check, otherwise the test would be too slow + self.args.push("--dry-run".to_string()); + + // Ignore submodules + self.args.push("--set".to_string()); + self.args.push("build.submodules=false".to_string()); + + // Do not mess with the local rustc checkout build directory + self.args.push("--build-dir".to_string()); + self.args.push(self.directory.join("build").display().to_string()); + + Config::parse(Flags::parse(&self.args)) + } +} diff --git a/src/build_helper/src/ci.rs b/src/build_helper/src/ci.rs index 60f319129a0b..9d114c70a671 100644 --- a/src/build_helper/src/ci.rs +++ b/src/build_helper/src/ci.rs @@ -17,7 +17,11 @@ impl CiEnv { } pub fn is_ci() -> bool { - Self::current() != CiEnv::None + Self::current().is_running_in_ci() + } + + pub fn is_running_in_ci(self) -> bool { + self != CiEnv::None } /// Checks if running in rust-lang/rust managed CI job. diff --git a/src/build_helper/src/git.rs b/src/build_helper/src/git.rs index 438cd14389c1..9d1195aadf84 100644 --- a/src/build_helper/src/git.rs +++ b/src/build_helper/src/git.rs @@ -198,7 +198,7 @@ fn get_latest_upstream_commit_that_modified_files( /// author. /// /// If we are in CI, we simply return our first parent. -fn get_closest_upstream_commit( +pub fn get_closest_upstream_commit( git_dir: Option<&Path>, config: &GitConfig<'_>, env: CiEnv, diff --git a/src/ci/docker/host-x86_64/mingw-check-1/Dockerfile b/src/ci/docker/host-x86_64/mingw-check-1/Dockerfile index a877de1f7b24..c46a2471e75b 100644 --- a/src/ci/docker/host-x86_64/mingw-check-1/Dockerfile +++ b/src/ci/docker/host-x86_64/mingw-check-1/Dockerfile @@ -39,20 +39,18 @@ RUN pip3 install --no-deps --no-cache-dir --require-hashes -r /tmp/reuse-require COPY host-x86_64/mingw-check-1/check-default-config-profiles.sh /scripts/ COPY host-x86_64/mingw-check-1/validate-toolstate.sh /scripts/ -COPY host-x86_64/mingw-check-1/validate-error-codes.sh /scripts/ # Check library crates on all tier 1 targets. # We disable optimized compiler built-ins because that requires a C toolchain for the target. # We also skip the x86_64-unknown-linux-gnu target as it is well-tested by other jobs. ENV SCRIPT \ /scripts/check-default-config-profiles.sh && \ - python3 ../x.py build --stage 0 src/tools/build-manifest && \ + python3 ../x.py build --stage 1 src/tools/build-manifest && \ python3 ../x.py test --stage 0 src/tools/compiletest && \ python3 ../x.py check compiletest --set build.compiletest-use-stage0-libtest=true && \ python3 ../x.py check --stage 1 --target=i686-pc-windows-gnu --host=i686-pc-windows-gnu && \ python3 ../x.py check --stage 1 --set build.optimized-compiler-builtins=false core alloc std --target=aarch64-unknown-linux-gnu,i686-pc-windows-msvc,i686-unknown-linux-gnu,x86_64-apple-darwin,x86_64-pc-windows-gnu,x86_64-pc-windows-msvc && \ /scripts/validate-toolstate.sh && \ - /scripts/validate-error-codes.sh && \ reuse --include-submodules lint && \ python3 ../x.py test collect-license-metadata && \ # Runs checks to ensure that there are no issues in our JS code. diff --git a/src/ci/docker/host-x86_64/mingw-check-1/validate-error-codes.sh b/src/ci/docker/host-x86_64/mingw-check-1/validate-error-codes.sh deleted file mode 100755 index e9aa948eb877..000000000000 --- a/src/ci/docker/host-x86_64/mingw-check-1/validate-error-codes.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# Checks that no error code explanation is removed. - -set -eo pipefail - -if [[ -z "$BASE_COMMIT" ]]; then - echo "not checking error code explanations removal" - exit 0 -fi - -echo "Check if an error code explanation was removed..." - -if (git diff "$BASE_COMMIT" --name-status | grep '^D' \ - | grep --quiet "compiler/rustc_error_codes/src/error_codes/"); then - echo "Error code explanations should never be removed!" - echo "Take a look at E0001 to see how to handle it." - exit 1 -fi - -echo "No error code explanation was removed!" diff --git a/src/ci/docker/host-x86_64/mingw-check-tidy/Dockerfile b/src/ci/docker/host-x86_64/mingw-check-tidy/Dockerfile index 8d2c5e004e47..62cd8a312129 100644 --- a/src/ci/docker/host-x86_64/mingw-check-tidy/Dockerfile +++ b/src/ci/docker/host-x86_64/mingw-check-tidy/Dockerfile @@ -39,7 +39,6 @@ RUN pip3 install --no-deps --no-cache-dir --require-hashes -r /tmp/reuse-require && pip3 install virtualenv COPY host-x86_64/mingw-check-1/validate-toolstate.sh /scripts/ -COPY host-x86_64/mingw-check-1/validate-error-codes.sh /scripts/ RUN bash -c 'npm install -g eslint@$(cat /tmp/eslint.version)' diff --git a/src/ci/docker/host-x86_64/x86_64-gnu-tools/browser-ui-test.version b/src/ci/docker/host-x86_64/x86_64-gnu-tools/browser-ui-test.version index e15121e0f316..f8d54d445576 100644 --- a/src/ci/docker/host-x86_64/x86_64-gnu-tools/browser-ui-test.version +++ b/src/ci/docker/host-x86_64/x86_64-gnu-tools/browser-ui-test.version @@ -1 +1 @@ -0.20.6 \ No newline at end of file +0.20.7 \ No newline at end of file diff --git a/src/ci/docker/scripts/rfl-build.sh b/src/ci/docker/scripts/rfl-build.sh index c59928913989..8acc5040a2fc 100755 --- a/src/ci/docker/scripts/rfl-build.sh +++ b/src/ci/docker/scripts/rfl-build.sh @@ -6,7 +6,7 @@ LINUX_VERSION=v6.16-rc1 # Build rustc, rustdoc, cargo, clippy-driver and rustfmt ../x.py build --stage 2 library rustdoc clippy rustfmt -../x.py build --stage 0 cargo +../x.py build --stage 1 cargo BUILD_DIR=$(realpath ./build/x86_64-unknown-linux-gnu) diff --git a/src/doc/book b/src/doc/book index 4433c9f0cad8..8a6d44e45b7b 160000 --- a/src/doc/book +++ b/src/doc/book @@ -1 +1 @@ -Subproject commit 4433c9f0cad8460bee05ede040587f8a1fa3f1de +Subproject commit 8a6d44e45b7b564eeb6bae30507e1fbac439d72d diff --git a/src/doc/reference b/src/doc/reference index d4c66b346f4b..50fc1628f365 160000 --- a/src/doc/reference +++ b/src/doc/reference @@ -1 +1 @@ -Subproject commit d4c66b346f4b72d29e70390a3fa3ea7d4e064db1 +Subproject commit 50fc1628f36563958399123829c73755fa7a8421 diff --git a/src/doc/rust-by-example b/src/doc/rust-by-example index 9baa9e863116..05c7d8bae65f 160000 --- a/src/doc/rust-by-example +++ b/src/doc/rust-by-example @@ -1 +1 @@ -Subproject commit 9baa9e863116cb9524a177d5a5c475baac18928a +Subproject commit 05c7d8bae65f23a1837430c5a19be129d414f5ec diff --git a/src/doc/rustc-dev-guide/rust-version b/src/doc/rustc-dev-guide/rust-version index 86d35b314983..30ba3070e1f4 100644 --- a/src/doc/rustc-dev-guide/rust-version +++ b/src/doc/rustc-dev-guide/rust-version @@ -1 +1 @@ -14346303d760027e53214e705109a62c0f00b214 +d1d8e386c5e84c4ba857f56c3291f73c27e2d62a diff --git a/src/doc/rustc-dev-guide/src/SUMMARY.md b/src/doc/rustc-dev-guide/src/SUMMARY.md index cba8eac617d6..7f2f32c62ffb 100644 --- a/src/doc/rustc-dev-guide/src/SUMMARY.md +++ b/src/doc/rustc-dev-guide/src/SUMMARY.md @@ -101,6 +101,8 @@ - [The `rustdoc` test suite](./rustdoc-internals/rustdoc-test-suite.md) - [The `rustdoc-gui` test suite](./rustdoc-internals/rustdoc-gui-test-suite.md) - [The `rustdoc-json` test suite](./rustdoc-internals/rustdoc-json-test-suite.md) +- [GPU offload internals](./offload/internals.md) + - [Installation](./offload/installation.md) - [Autodiff internals](./autodiff/internals.md) - [Installation](./autodiff/installation.md) - [How to debug](./autodiff/debugging.md) @@ -121,8 +123,9 @@ - [Feature gate checking](./feature-gate-ck.md) - [Lang Items](./lang-items.md) - [The HIR (High-level IR)](./hir.md) - - [Lowering AST to HIR](./ast-lowering.md) - - [Debugging](./hir-debugging.md) + - [Lowering AST to HIR](./hir/lowering.md) + - [Ambig/Unambig Types and Consts](./hir/ambig-unambig-ty-and-consts.md) + - [Debugging](./hir/debugging.md) - [The THIR (Typed High-level IR)](./thir.md) - [The MIR (Mid-level IR)](./mir/index.md) - [MIR construction](./mir/construction.md) @@ -181,7 +184,7 @@ - [Significant changes and quirks](./solve/significant-changes.md) - [`Unsize` and `CoerceUnsized` traits](./traits/unsize.md) - [Type checking](./type-checking.md) - - [Method Lookup](./method-lookup.md) + - [Method lookup](./method-lookup.md) - [Variance](./variance.md) - [Coherence checking](./coherence.md) - [Opaque types](./opaque-types-type-alias-impl-trait.md) @@ -189,7 +192,7 @@ - [Return Position Impl Trait In Trait](./return-position-impl-trait-in-trait.md) - [Region inference restrictions][opaque-infer] - [Const condition checking](./effects.md) -- [Pattern and Exhaustiveness Checking](./pat-exhaustive-checking.md) +- [Pattern and exhaustiveness checking](./pat-exhaustive-checking.md) - [Unsafety checking](./unsafety-checking.md) - [MIR dataflow](./mir/dataflow.md) - [Drop elaboration](./mir/drop-elaboration.md) @@ -209,7 +212,7 @@ - [Closure capture inference](./closure.md) - [Async closures/"coroutine-closures"](coroutine-closures.md) -# MIR to Binaries +# MIR to binaries - [Prologue](./part-5-intro.md) - [MIR optimizations](./mir/optimizations.md) @@ -218,15 +221,15 @@ - [Interpreter](./const-eval/interpret.md) - [Monomorphization](./backend/monomorph.md) - [Lowering MIR](./backend/lowering-mir.md) -- [Code Generation](./backend/codegen.md) +- [Code generation](./backend/codegen.md) - [Updating LLVM](./backend/updating-llvm.md) - [Debugging LLVM](./backend/debugging.md) - [Backend Agnostic Codegen](./backend/backend-agnostic.md) - - [Implicit Caller Location](./backend/implicit-caller-location.md) -- [Libraries and Metadata](./backend/libs-and-metadata.md) -- [Profile-guided Optimization](./profile-guided-optimization.md) -- [LLVM Source-Based Code Coverage](./llvm-coverage-instrumentation.md) -- [Sanitizers Support](./sanitizers.md) + - [Implicit caller location](./backend/implicit-caller-location.md) +- [Libraries and metadata](./backend/libs-and-metadata.md) +- [Profile-guided optimization](./profile-guided-optimization.md) +- [LLVM source-based code coverage](./llvm-coverage-instrumentation.md) +- [Sanitizers support](./sanitizers.md) - [Debugging support in the Rust compiler](./debugging-support-in-rustc.md) --- diff --git a/src/doc/rustc-dev-guide/src/autodiff/flags.md b/src/doc/rustc-dev-guide/src/autodiff/flags.md index 65287d9ba4c1..efbb9ea3497c 100644 --- a/src/doc/rustc-dev-guide/src/autodiff/flags.md +++ b/src/doc/rustc-dev-guide/src/autodiff/flags.md @@ -6,6 +6,7 @@ To support you while debugging or profiling, we have added support for an experi ```text PrintTA // Print TypeAnalysis information +PrintTAFn // Print TypeAnalysis information for a specific function PrintAA // Print ActivityAnalysis information Print // Print differentiated functions while they are being generated and optimized PrintPerf // Print AD related Performance warnings diff --git a/src/doc/rustc-dev-guide/src/backend/implicit-caller-location.md b/src/doc/rustc-dev-guide/src/backend/implicit-caller-location.md index 17158497d592..c5ee00813a34 100644 --- a/src/doc/rustc-dev-guide/src/backend/implicit-caller-location.md +++ b/src/doc/rustc-dev-guide/src/backend/implicit-caller-location.md @@ -1,4 +1,4 @@ -# Implicit Caller Location +# Implicit caller location @@ -8,7 +8,7 @@ adds the [`#[track_caller]`][attr-reference] attribute for functions, the [`caller_location`][intrinsic] intrinsic, and the stabilization-friendly [`core::panic::Location::caller`][wrapper] wrapper. -## Motivating Example +## Motivating example Take this example program: @@ -39,7 +39,7 @@ These error messages are achieved through a combination of changes to `panic!` i of `core::panic::Location::caller` and a number of `#[track_caller]` annotations in the standard library which propagate caller information. -## Reading Caller Location +## Reading caller location Previously, `panic!` made use of the `file!()`, `line!()`, and `column!()` macros to construct a [`Location`] pointing to where the panic occurred. These macros couldn't be given an overridden @@ -51,7 +51,7 @@ was expanded. This function is itself annotated with `#[track_caller]` and wraps [`caller_location`][intrinsic] compiler intrinsic implemented by rustc. This intrinsic is easiest explained in terms of how it works in a `const` context. -## Caller Location in `const` +## Caller location in `const` There are two main phases to returning the caller location in a const context: walking up the stack to find the right location and allocating a const value to return. @@ -138,7 +138,7 @@ fn main() { } ``` -### Dynamic Dispatch +### Dynamic dispatch In codegen contexts we have to modify the callee ABI to pass this information down the stack, but the attribute expressly does *not* modify the type of the function. The ABI change must be @@ -156,7 +156,7 @@ probably the best we can do without modifying fully-stabilized type signatures. > whether we'll be called in a const context (safe to ignore shim) or in a codegen context (unsafe > to ignore shim). Even if we did know, the results from const and codegen contexts must agree. -## The Attribute +## The attribute The `#[track_caller]` attribute is checked alongside other codegen attributes to ensure the function: diff --git a/src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md b/src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md index eeb2af5e6bc8..aa1d644703a0 100644 --- a/src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md +++ b/src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md @@ -1,4 +1,4 @@ -# Libraries and Metadata +# Libraries and metadata When the compiler sees a reference to an external crate, it needs to load some information about that crate. This chapter gives an overview of that process, diff --git a/src/doc/rustc-dev-guide/src/building/new-target.md b/src/doc/rustc-dev-guide/src/building/new-target.md index 8d323ba9646d..e11a2cd8ee57 100644 --- a/src/doc/rustc-dev-guide/src/building/new-target.md +++ b/src/doc/rustc-dev-guide/src/building/new-target.md @@ -174,8 +174,8 @@ compiler, you can use it instead of the JSON file for both arguments. ## Promoting a target from tier 2 (target) to tier 2 (host) There are two levels of tier 2 targets: - a) Targets that are only cross-compiled (`rustup target add`) - b) Targets that [have a native toolchain][tier2-native] (`rustup toolchain install`) +- Targets that are only cross-compiled (`rustup target add`) +- Targets that [have a native toolchain][tier2-native] (`rustup toolchain install`) [tier2-native]: https://doc.rust-lang.org/nightly/rustc/target-tier-policy.html#tier-2-with-host-tools diff --git a/src/doc/rustc-dev-guide/src/contributing.md b/src/doc/rustc-dev-guide/src/contributing.md index 0575de642eeb..46d74b967342 100644 --- a/src/doc/rustc-dev-guide/src/contributing.md +++ b/src/doc/rustc-dev-guide/src/contributing.md @@ -364,7 +364,7 @@ To find documentation-related issues, use the [A-docs label]. You can find documentation style guidelines in [RFC 1574]. -To build the standard library documentation, use `x doc --stage 0 library --open`. +To build the standard library documentation, use `x doc --stage 1 library --open`. To build the documentation for a book (e.g. the unstable book), use `x doc src/doc/unstable-book.` Results should appear in `build/host/doc`, as well as automatically open in your default browser. See [Building Documentation](./building/compiler-documenting.md#building-documentation) for more diff --git a/src/doc/rustc-dev-guide/src/diagnostics.md b/src/doc/rustc-dev-guide/src/diagnostics.md index 01e59c91904d..33f5441d36e4 100644 --- a/src/doc/rustc-dev-guide/src/diagnostics.md +++ b/src/doc/rustc-dev-guide/src/diagnostics.md @@ -553,7 +553,7 @@ compiler](#linting-early-in-the-compiler). [AST nodes]: the-parser.md -[AST lowering]: ast-lowering.md +[AST lowering]: ./hir/lowering.md [HIR nodes]: hir.md [MIR nodes]: mir/index.md [macro expansion]: macro-expansion.md diff --git a/src/doc/rustc-dev-guide/src/hir.md b/src/doc/rustc-dev-guide/src/hir.md index 0c1c9941572d..72fb10701574 100644 --- a/src/doc/rustc-dev-guide/src/hir.md +++ b/src/doc/rustc-dev-guide/src/hir.md @@ -5,7 +5,7 @@ The HIR – "High-Level Intermediate Representation" – is the primary IR used in most of rustc. It is a compiler-friendly representation of the abstract syntax tree (AST) that is generated after parsing, macro expansion, and name -resolution (see [Lowering](./ast-lowering.html) for how the HIR is created). +resolution (see [Lowering](./hir/lowering.md) for how the HIR is created). Many parts of HIR resemble Rust surface syntax quite closely, with the exception that some of Rust's expression forms have been desugared away. For example, `for` loops are converted into a `loop` and do not appear in diff --git a/src/doc/rustc-dev-guide/src/hir/ambig-unambig-ty-and-consts.md b/src/doc/rustc-dev-guide/src/hir/ambig-unambig-ty-and-consts.md new file mode 100644 index 000000000000..709027883aed --- /dev/null +++ b/src/doc/rustc-dev-guide/src/hir/ambig-unambig-ty-and-consts.md @@ -0,0 +1,63 @@ +# Ambig/Unambig Types and Consts + +Types and Consts args in the HIR can be in two kinds of positions ambiguous (ambig) or unambiguous (unambig). Ambig positions are where +it would be valid to parse either a type or a const, unambig positions are where only one kind would be valid to +parse. + +```rust +fn func(arg: T) { + // ^ Unambig type position + let a: _ = arg; + // ^ Unambig type position + + func::(arg); + // ^ ^ + // ^^^^ Ambig position + + let _: [u8; 10]; + // ^^ ^^ Unambig const position + // ^^ Unambig type position +} + +``` + +Most types/consts in ambig positions are able to be disambiguated as either a type or const during parsing. Single segment paths are always represented as types in the AST but may get resolved to a const parameter during name resolution, then lowered to a const argument during ast-lowering. The only generic arguments which remain ambiguous after lowering are inferred generic arguments (`_`) in path segments. For example, in `Foo<_>` it is not clear whether the `_` argument is an inferred type argument, or an inferred const argument. + +In unambig positions, inferred arguments are represented with [`hir::TyKind::Infer`][ty_infer] or [`hir::ConstArgKind::Infer`][const_infer] depending on whether it is a type or const position respectively. +In ambig positions, inferred arguments are represented with `hir::GenericArg::Infer`. + +A naive implementation of this would result in there being potentially 5 places where you might think an inferred type/const could be found in the HIR from looking at the structure of the HIR: +1. In unambig type position as a `hir::TyKind::Infer` +2. In unambig const arg position as a `hir::ConstArgKind::Infer` +3. In an ambig position as a [`GenericArg::Type(TyKind::Infer)`][generic_arg_ty] +4. In an ambig position as a [`GenericArg::Const(ConstArgKind::Infer)`][generic_arg_const] +5. In an ambig position as a [`GenericArg::Infer`][generic_arg_infer] + +Note that places 3 and 4 would never actually be possible to encounter as we always lower to `GenericArg::Infer` in generic arg position. + +This has a few failure modes: +- People may write visitors which check for `GenericArg::Infer` but forget to check for `hir::TyKind/ConstArgKind::Infer`, only handling infers in ambig positions by accident. +- People may write visitors which check for `hir::TyKind/ConstArgKind::Infer` but forget to check for `GenericArg::Infer`, only handling infers in unambig positions by accident. +- People may write visitors which check for `GenerArg::Type/Const(TyKind/ConstArgKind::Infer)` and `GenerigArg::Infer`, not realising that we never represent inferred types/consts in ambig positions as a `GenericArg::Type/Const`. +- People may write visitors which check for *only* `TyKind::Infer` and not `ConstArgKind::Infer` forgetting that there are also inferred const arguments (and vice versa). + +To make writing HIR visitors less error prone when caring about inferred types/consts we have a relatively complex system: + +1. We have different types in the compiler for when a type or const is in an unambig or ambig position, `hir::Ty` and `hir::Ty<()>`. [`AmbigArg`][ambig_arg] is an uninhabited type which we use in the `Infer` variant of `TyKind` and `ConstArgKind` to selectively "disable" it if we are in an ambig position. + +2. The [`visit_ty`][visit_ty] and [`visit_const_arg`][visit_const_arg] methods on HIR visitors only accept the ambig position versions of types/consts. Unambig types/consts are implicitly converted to ambig types/consts during the visiting process, with the `Infer` variant handled by a dedicated [`visit_infer`][visit_infer] method. + +This has a number of benefits: +- It's clear that `GenericArg::Type/Const` cannot represent inferred type/const arguments +- Implementors of `visit_ty` and `visit_const_arg` will never encounter inferred types/consts making it impossible to write a visitor that seems to work right but handles edge cases wrong +- The `visit_infer` method handles *all* cases of inferred type/consts in the HIR making it easy for visitors to handle inferred type/consts in one dedicated place and not forget cases + +[ty_infer]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/enum.TyKind.html#variant.Infer +[const_infer]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/enum.ConstArgKind.html#variant.Infer +[generic_arg_ty]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/enum.GenericArg.html#variant.Type +[generic_arg_const]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/enum.GenericArg.html#variant.Const +[generic_arg_infer]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/enum.GenericArg.html#variant.Infer +[ambig_arg]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/enum.AmbigArg.html +[visit_ty]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/intravisit/trait.Visitor.html#method.visit_ty +[visit_const_arg]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/intravisit/trait.Visitor.html#method.visit_const_arg +[visit_infer]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/intravisit/trait.Visitor.html#method.visit_infer \ No newline at end of file diff --git a/src/doc/rustc-dev-guide/src/hir-debugging.md b/src/doc/rustc-dev-guide/src/hir/debugging.md similarity index 100% rename from src/doc/rustc-dev-guide/src/hir-debugging.md rename to src/doc/rustc-dev-guide/src/hir/debugging.md diff --git a/src/doc/rustc-dev-guide/src/ast-lowering.md b/src/doc/rustc-dev-guide/src/hir/lowering.md similarity index 97% rename from src/doc/rustc-dev-guide/src/ast-lowering.md rename to src/doc/rustc-dev-guide/src/hir/lowering.md index 033fd4b76f28..02c69b8609f1 100644 --- a/src/doc/rustc-dev-guide/src/ast-lowering.md +++ b/src/doc/rustc-dev-guide/src/hir/lowering.md @@ -1,6 +1,6 @@ # AST lowering -The AST lowering step converts AST to [HIR](hir.html). +The AST lowering step converts AST to [HIR](../hir.md). This means many structures are removed if they are irrelevant for type analysis or similar syntax agnostic analyses. Examples of such structures include but are not limited to diff --git a/src/doc/rustc-dev-guide/src/llvm-coverage-instrumentation.md b/src/doc/rustc-dev-guide/src/llvm-coverage-instrumentation.md index 6bc21b6deeb8..28e0e7a908d6 100644 --- a/src/doc/rustc-dev-guide/src/llvm-coverage-instrumentation.md +++ b/src/doc/rustc-dev-guide/src/llvm-coverage-instrumentation.md @@ -1,4 +1,4 @@ -# LLVM Source-Based Code Coverage +# LLVM source-based code coverage diff --git a/src/doc/rustc-dev-guide/src/offload/installation.md b/src/doc/rustc-dev-guide/src/offload/installation.md new file mode 100644 index 000000000000..2536af09a236 --- /dev/null +++ b/src/doc/rustc-dev-guide/src/offload/installation.md @@ -0,0 +1,71 @@ +# Installation + +In the future, `std::offload` should become available in nightly builds for users. For now, everyone still needs to build rustc from source. + +## Build instructions + +First you need to clone and configure the Rust repository: +```bash +git clone --depth=1 git@github.com:rust-lang/rust.git +cd rust +./configure --enable-llvm-link-shared --release-channel=nightly --enable-llvm-assertions --enable-offload --enable-enzyme --enable-clang --enable-lld --enable-option-checking --enable-ninja --disable-docs +``` + +Afterwards you can build rustc using: +```bash +./x.py build --stage 1 library +``` + +Afterwards rustc toolchain link will allow you to use it through cargo: +``` +rustup toolchain link offload build/host/stage1 +rustup toolchain install nightly # enables -Z unstable-options +``` + + + +## Build instruction for LLVM itself +```bash +git clone --depth=1 git@github.com:llvm/llvm-project.git +cd llvm-project +mkdir build +cd build +cmake -G Ninja ../llvm -DLLVM_TARGETS_TO_BUILD="host,AMDGPU,NVPTX" -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_ENABLE_PROJECTS="clang;lld" -DLLVM_ENABLE_RUNTIMES="offload,openmp" -DLLVM_ENABLE_PLUGINS=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=. +ninja +ninja install +``` +This gives you a working LLVM build. + + +## Testing +run +``` +./x.py test --stage 1 tests/codegen/gpu_offload +``` + +## Usage +It is important to use a clang compiler build on the same llvm as rustc. Just calling clang without the full path will likely use your system clang, which probably will be incompatible. +``` +/absolute/path/to/rust/build/x86_64-unknown-linux-gnu/stage1/bin/rustc --edition=2024 --crate-type cdylib src/main.rs --emit=llvm-ir -O -C lto=fat -Cpanic=abort -Zoffload=Enable +/absolute/path/to/rust/build/x86_64-unknown-linux-gnu/llvm/bin/clang++ -fopenmp --offload-arch=native -g -O3 main.ll -o main -save-temps +LIBOMPTARGET_INFO=-1 ./main +``` +The first step will generate a `main.ll` file, which has enough instructions to cause the offload runtime to move data to and from a gpu. +The second step will use clang as the compilation driver to compile our IR file down to a working binary. Only a very small Rust subset will work out of the box here, unless +you use features like build-std, which are not covered by this guide. Look at the codegen test to get a feeling for how to write a working example. +In the last step you can run your binary, if all went well you will see a data transfer being reported: +``` +omptarget device 0 info: Entering OpenMP data region with being_mapper at unknown:0:0 with 1 arguments: +omptarget device 0 info: tofrom(unknown)[1024] +omptarget device 0 info: Creating new map entry with HstPtrBase=0x00007fffffff9540, HstPtrBegin=0x00007fffffff9540, TgtAllocBegin=0x0000155547200000, TgtPtrBegin=0x0000155547200000, Size=1024, DynRefCount=1, HoldRefCount=0, Name=unknown +omptarget device 0 info: Copying data from host to device, HstPtr=0x00007fffffff9540, TgtPtr=0x0000155547200000, Size=1024, Name=unknown +omptarget device 0 info: OpenMP Host-Device pointer mappings after block at unknown:0:0: +omptarget device 0 info: Host Ptr Target Ptr Size (B) DynRefCount HoldRefCount Declaration +omptarget device 0 info: 0x00007fffffff9540 0x0000155547200000 1024 1 0 unknown at unknown:0:0 +// some other output +omptarget device 0 info: Exiting OpenMP data region with end_mapper at unknown:0:0 with 1 arguments: +omptarget device 0 info: tofrom(unknown)[1024] +omptarget device 0 info: Mapping exists with HstPtrBegin=0x00007fffffff9540, TgtPtrBegin=0x0000155547200000, Size=1024, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +omptarget device 0 info: Copying data from device to host, TgtPtr=0x0000155547200000, HstPtr=0x00007fffffff9540, Size=1024, Name=unknown +omptarget device 0 info: Removing map entry with HstPtrBegin=0x00007fffffff9540, TgtPtrBegin=0x0000155547200000, Size=1024, Name=unknown +``` diff --git a/src/doc/rustc-dev-guide/src/offload/internals.md b/src/doc/rustc-dev-guide/src/offload/internals.md new file mode 100644 index 000000000000..28857a6e78bf --- /dev/null +++ b/src/doc/rustc-dev-guide/src/offload/internals.md @@ -0,0 +1,9 @@ +# std::offload + +This module is under active development. Once upstream, it should allow Rust developers to run Rust code on GPUs. +We aim to develop a `rusty` GPU programming interface, which is safe, convenient and sufficiently fast by default. +This includes automatic data movement to and from the GPU, in a efficient way. We will (later) +also offer more advanced, possibly unsafe, interfaces which allow a higher degree of control. + +The implementation is based on LLVM's "offload" project, which is already used by OpenMP to run Fortran or C++ code on GPUs. +While the project is under development, users will need to call other compilers like clang to finish the compilation process. diff --git a/src/doc/rustc-dev-guide/src/overview.md b/src/doc/rustc-dev-guide/src/overview.md index 92d0c7b0c38c..8a1a22fad660 100644 --- a/src/doc/rustc-dev-guide/src/overview.md +++ b/src/doc/rustc-dev-guide/src/overview.md @@ -410,7 +410,7 @@ For more details on bootstrapping, see - Guide: [The HIR](hir.md) - Guide: [Identifiers in the HIR](hir.md#identifiers-in-the-hir) - Guide: [The `HIR` Map](hir.md#the-hir-map) - - Guide: [Lowering `AST` to `HIR`](ast-lowering.md) + - Guide: [Lowering `AST` to `HIR`](./hir/lowering.md) - How to view `HIR` representation for your code `cargo rustc -- -Z unpretty=hir-tree` - Rustc `HIR` definition: [`rustc_hir`](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/index.html) - Main entry point: **TODO** diff --git a/src/doc/rustc-dev-guide/src/part-5-intro.md b/src/doc/rustc-dev-guide/src/part-5-intro.md index f32508d27744..a44fff1e1430 100644 --- a/src/doc/rustc-dev-guide/src/part-5-intro.md +++ b/src/doc/rustc-dev-guide/src/part-5-intro.md @@ -1,4 +1,4 @@ -# From MIR to Binaries +# From MIR to binaries All of the preceding chapters of this guide have one thing in common: we never generated any executable machine code at all! diff --git a/src/doc/rustc-dev-guide/src/pat-exhaustive-checking.md b/src/doc/rustc-dev-guide/src/pat-exhaustive-checking.md index 4a796ac9500e..e953931aa78c 100644 --- a/src/doc/rustc-dev-guide/src/pat-exhaustive-checking.md +++ b/src/doc/rustc-dev-guide/src/pat-exhaustive-checking.md @@ -1,4 +1,4 @@ -# Pattern and Exhaustiveness Checking +# Pattern and exhaustiveness checking In Rust, pattern matching and bindings have a few very helpful properties. The compiler will check that bindings are irrefutable when made and that match arms diff --git a/src/doc/rustc-dev-guide/src/profile-guided-optimization.md b/src/doc/rustc-dev-guide/src/profile-guided-optimization.md index 39bc8b5e8624..d279786ac45e 100644 --- a/src/doc/rustc-dev-guide/src/profile-guided-optimization.md +++ b/src/doc/rustc-dev-guide/src/profile-guided-optimization.md @@ -1,4 +1,4 @@ -# Profile Guided Optimization +# Profile-guided optimization @@ -6,7 +6,7 @@ This chapter describes what PGO is and how the support for it is implemented in `rustc`. -## What Is Profiled-Guided Optimization? +## What is profiled-guided optimization? The basic concept of PGO is to collect data about the typical execution of a program (e.g. which branches it is likely to take) and then use this data @@ -52,7 +52,7 @@ instrumentation, via the experimental option [`-C instrument-coverage`](./llvm-coverage-instrumentation.md), but using these coverage results for PGO has not been attempted at this time. -### Overall Workflow +### Overall workflow Generating a PGO-optimized program involves the following four steps: @@ -62,12 +62,12 @@ Generating a PGO-optimized program involves the following four steps: 4. Compile the program again, this time making use of the profiling data (e.g. `rustc -C profile-use=merged.profdata main.rs`) -### Compile-Time Aspects +### Compile-time aspects Depending on which step in the above workflow we are in, two different things can happen at compile time: -#### Create Binaries with Instrumentation +#### Create binaries with instrumentation As mentioned above, the profiling instrumentation is added by LLVM. `rustc` instructs LLVM to do so [by setting the appropriate][pgo-gen-passmanager] @@ -88,7 +88,7 @@ runtime are not removed [by marking the with the right export level][pgo-gen-sym [pgo-gen-symbols]:https://github.com/rust-lang/rust/blob/1.34.1/src/librustc_codegen_ssa/back/symbol_export.rs#L212-L225 -#### Compile Binaries Where Optimizations Make Use Of Profiling Data +#### Compile binaries where optimizations make use of profiling data In the final step of the workflow described above, the program is compiled again, with the compiler using the gathered profiling data in order to drive @@ -106,7 +106,7 @@ LLVM does the rest (e.g. setting branch weights, marking functions with `cold` or `inlinehint`, etc). -### Runtime Aspects +### Runtime aspects Instrumentation-based approaches always also have a runtime component, i.e. once we have an instrumented program, that program needs to be run in order @@ -134,7 +134,7 @@ instrumentation artifacts show up in LLVM IR. [rmake-tests]: https://github.com/rust-lang/rust/tree/master/tests/run-make [codegen-test]: https://github.com/rust-lang/rust/blob/master/tests/codegen/pgo-instrumentation.rs -## Additional Information +## Additional information Clang's documentation contains a good overview on [PGO in LLVM][llvm-pgo]. diff --git a/src/doc/rustc-dev-guide/src/profiling/with_perf.md b/src/doc/rustc-dev-guide/src/profiling/with_perf.md index 742ea1c41a6c..0d4f23bcd9ad 100644 --- a/src/doc/rustc-dev-guide/src/profiling/with_perf.md +++ b/src/doc/rustc-dev-guide/src/profiling/with_perf.md @@ -7,8 +7,8 @@ This is a guide for how to profile rustc with [perf](https://perf.wiki.kernel.or - Get a clean checkout of rust-lang/master, or whatever it is you want to profile. - Set the following settings in your `bootstrap.toml`: - - `debuginfo-level = 1` - enables line debuginfo - - `jemalloc = false` - lets you do memory use profiling with valgrind + - `rust.debuginfo-level = 1` - enables line debuginfo + - `rust.jemalloc = false` - lets you do memory use profiling with valgrind - leave everything else the defaults - Run `./x build` to get a full build - Make a rustup toolchain pointing to that result diff --git a/src/doc/rustc-dev-guide/src/queries/incremental-compilation-in-detail.md b/src/doc/rustc-dev-guide/src/queries/incremental-compilation-in-detail.md index 03c822d4feed..18e0e25c5315 100644 --- a/src/doc/rustc-dev-guide/src/queries/incremental-compilation-in-detail.md +++ b/src/doc/rustc-dev-guide/src/queries/incremental-compilation-in-detail.md @@ -1,4 +1,4 @@ -# Incremental Compilation in detail +# Incremental compilation in detail @@ -66,7 +66,7 @@ because it reads the up-to-date version of `Hir(bar)`. Also, we re-run `type_check_item(bar)` because result of `type_of(bar)` might have changed. -## The Problem With The Basic Algorithm: False Positives +## The problem with the basic algorithm: false positives If you read the previous paragraph carefully you'll notice that it says that `type_of(bar)` *might* have changed because one of its inputs has changed. @@ -93,7 +93,7 @@ of examples like this and small changes to the input often potentially affect very large parts of the output binaries. As a consequence, we had to make the change detection system smarter and more accurate. -## Improving Accuracy: The red-green Algorithm +## Improving accuracy: the red-green algorithm The "false positives" problem can be solved by interleaving change detection and query re-evaluation. Instead of walking the graph all the way to the @@ -191,7 +191,7 @@ then itself involve recursively invoking more queries, which can mean we come ba to the `try_mark_green()` algorithm for the dependencies recursively. -## The Real World: How Persistence Makes Everything Complicated +## The real world: how persistence makes everything complicated The sections above described the underlying algorithm for incremental compilation but because the compiler process exits after being finished and @@ -258,7 +258,7 @@ the `LocalId`s within it are still the same. -### Checking Query Results For Changes: HashStable And Fingerprints +### Checking query results for changes: `HashStable` and `Fingerprint`s In order to do red-green-marking we often need to check if the result of a query has changed compared to the result it had during the previous @@ -306,7 +306,7 @@ This approach works rather well but it's not without flaws: their stable equivalents while doing the hashing. -### A Tale Of Two DepGraphs: The Old And The New +### A tale of two `DepGraph`s: the old and the new The initial description of dependency tracking glosses over a few details that quickly become a head scratcher when actually trying to implement things. @@ -344,7 +344,7 @@ new graph is serialized out to disk, alongside the query result cache, and can act as the previous dep-graph in a subsequent compilation session. -### Didn't You Forget Something?: Cache Promotion +### Didn't you forget something?: cache promotion The system described so far has a somewhat subtle property: If all inputs of a dep-node are green then the dep-node itself can be marked as green without @@ -374,7 +374,7 @@ the result cache doesn't unnecessarily shrink again. -# Incremental Compilation and the Compiler Backend +# Incremental compilation and the compiler backend The compiler backend, the part involving LLVM, is using the query system but it is not implemented in terms of queries itself. As a consequence it does not @@ -406,7 +406,7 @@ would save. -## Query Modifiers +## Query modifiers The query system allows for applying [modifiers][mod] to queries. These modifiers affect certain aspects of how the system treats the query with @@ -472,7 +472,7 @@ respect to incremental compilation: [mod]: ../query.html#adding-a-new-kind-of-query -## The Projection Query Pattern +## The projection query pattern It's interesting to note that `eval_always` and `no_hash` can be used together in the so-called "projection query" pattern. It is often the case that there is @@ -516,7 +516,7 @@ because we have the projections to take care of keeping things green as much as possible. -# Shortcomings of the Current System +# Shortcomings of the current system There are many things that still can be improved. diff --git a/src/doc/rustc-dev-guide/src/query.md b/src/doc/rustc-dev-guide/src/query.md index 782c5b4b3c02..0ca1b360a701 100644 --- a/src/doc/rustc-dev-guide/src/query.md +++ b/src/doc/rustc-dev-guide/src/query.md @@ -2,7 +2,7 @@ -As described in [the high-level overview of the compiler][hl], the Rust compiler +As described in [Overview of the compiler], the Rust compiler is still (as of July 2021) transitioning from a traditional "pass-based" setup to a "demand-driven" system. The compiler query system is the key to rustc's demand-driven organization. @@ -13,7 +13,7 @@ there is a query called `type_of` that, given the [`DefId`] of some item, will compute the type of that item and return it to you. [`DefId`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/def_id/struct.DefId.html -[hl]: ./compiler-src.md +[Overview of the compiler]: overview.md#queries Query execution is *memoized*. The first time you invoke a query, it will go do the computation, but the next time, the result is @@ -37,12 +37,15 @@ will in turn demand information about that crate, starting from the actual parsing. Although this vision is not fully realized, large sections of the -compiler (for example, generating [MIR](./mir/index.md)) currently work exactly like this. +compiler (for example, generating [MIR]) currently work exactly like this. -[^incr-comp-detail]: The ["Incremental Compilation in Detail](queries/incremental-compilation-in-detail.md) chapter gives a more +[^incr-comp-detail]: The [Incremental compilation in detail] chapter gives a more in-depth description of what queries are and how they work. If you intend to write a query of your own, this is a good read. +[Incremental compilation in detail]: queries/incremental-compilation-in-detail.md +[MIR]: mir/index.md + ## Invoking queries Invoking a query is simple. The [`TyCtxt`] ("type context") struct offers a method @@ -67,9 +70,15 @@ are cheaply cloneable; insert an `Rc` if necessary). ### Providers If, however, the query is *not* in the cache, then the compiler will -try to find a suitable **provider**. A provider is a function that has -been defined and linked into the compiler somewhere that contains the -code to compute the result of the query. +call the corresponding **provider** function. A provider is a function +implemented in a specific module and **manually registered** into the +[`Providers`][providers_struct] struct during compiler initialization. +The macro system generates the [`Providers`][providers_struct] struct, +which acts as a function table for all query implementations, where each +field is a function pointer to the actual provider. + +**Note:** The `Providers` struct is generated by macros and acts as a function table for all query implementations. +It is **not** a Rust trait, but a plain struct with function pointer fields. **Providers are defined per-crate.** The compiler maintains, internally, a table of providers for every crate, at least @@ -97,62 +106,6 @@ fn provider<'tcx>( Providers take two arguments: the `tcx` and the query key. They return the result of the query. -### How providers are setup - -When the tcx is created, it is given the providers by its creator using -the [`Providers`][providers_struct] struct. This struct is generated by -the macros here, but it is basically a big list of function pointers: - -[providers_struct]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/query/struct.Providers.html - -```rust,ignore -struct Providers { - type_of: for<'tcx> fn(TyCtxt<'tcx>, DefId) -> Ty<'tcx>, - ... -} -``` - -At present, we have one copy of the struct for local crates, and one -for external crates, though the plan is that we may eventually have -one per crate. - -These `Providers` structs are ultimately created and populated by -`rustc_driver`, but it does this by distributing the work -throughout the other `rustc_*` crates. This is done by invoking -various [`provide`][provide_fn] functions. These functions tend to look -something like this: - -[provide_fn]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/hir/fn.provide.html - -```rust,ignore -pub fn provide(providers: &mut Providers) { - *providers = Providers { - type_of, - ..*providers - }; -} -``` - -That is, they take an `&mut Providers` and mutate it in place. Usually -we use the formulation above just because it looks nice, but you could -as well do `providers.type_of = type_of`, which would be equivalent. -(Here, `type_of` would be a top-level function, defined as we saw -before.) So, if we want to add a provider for some other query, -let's call it `fubar`, into the crate above, we might modify the `provide()` -function like so: - -```rust,ignore -pub fn provide(providers: &mut Providers) { - *providers = Providers { - type_of, - fubar, - ..*providers - }; -} - -fn fubar<'tcx>(tcx: TyCtxt<'tcx>, key: DefId) -> Fubar<'tcx> { ... } -``` - N.B. Most of the `rustc_*` crates only provide **local providers**. Almost all **extern providers** wind up going through the [`rustc_metadata` crate][rustc_metadata], which loads the information @@ -164,6 +117,63 @@ they define both a `provide` and a `provide_extern` function, through [rustc_metadata]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/index.html [wasm_import_module_map]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/back/symbol_export/fn.wasm_import_module_map.html +### How providers are set up + +When the tcx is created, it is given the providers by its creator using +the [`Providers`][providers_struct] struct. This struct is generated by +the macros here, but it is basically a big list of function pointers: + +[providers_struct]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/query/struct.Providers.html + +```rust,ignore +struct Providers { + type_of: for<'tcx> fn(TyCtxt<'tcx>, DefId) -> Ty<'tcx>, + // ... one field for each query +} +``` + +#### How are providers registered? + +The `Providers` struct is filled in during compiler initialization, mainly by the `rustc_driver` crate. +But the actual provider functions are implemented in various `rustc_*` crates (like `rustc_middle`, `rustc_hir_analysis`, etc). + +To register providers, each crate exposes a [`provide`][provide_fn] function that looks like this: + +[provide_fn]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/hir/fn.provide.html + +```rust,ignore +pub fn provide(providers: &mut Providers) { + *providers = Providers { + type_of, + // ... add more providers here + ..*providers + }; +} +``` + +- This function takes a mutable reference to the `Providers` struct and sets the fields to point to the correct provider functions. +- You can also assign fields individually, e.g. `providers.type_of = type_of;`. + +#### Adding a new provider + +Suppose you want to add a new query called `fubar`. You would: + +1. Implement the provider function: + ```rust,ignore + fn fubar<'tcx>(tcx: TyCtxt<'tcx>, key: DefId) -> Fubar<'tcx> { ... } + ``` +2. Register it in the `provide` function: + ```rust,ignore + pub fn provide(providers: &mut Providers) { + *providers = Providers { + fubar, + ..*providers + }; + } + ``` + +--- + ## Adding a new query How do you add a new query? diff --git a/src/doc/rustc-dev-guide/src/sanitizers.md b/src/doc/rustc-dev-guide/src/sanitizers.md index b1654b15e081..664b4feac4f0 100644 --- a/src/doc/rustc-dev-guide/src/sanitizers.md +++ b/src/doc/rustc-dev-guide/src/sanitizers.md @@ -1,4 +1,4 @@ -# Sanitizers Support +# Sanitizers support The rustc compiler contains support for following sanitizers: diff --git a/src/doc/rustc-dev-guide/src/tests/ui.md b/src/doc/rustc-dev-guide/src/tests/ui.md index 8f4467a5551e..09dc476d68ee 100644 --- a/src/doc/rustc-dev-guide/src/tests/ui.md +++ b/src/doc/rustc-dev-guide/src/tests/ui.md @@ -59,6 +59,11 @@ The output is normalized to ignore unwanted differences, see the [Normalization](#normalization) section. If the file is missing, then compiletest expects the corresponding output to be empty. +A common reason to use normalization, revisions, and most of the other following tools, +is to account for platform differences. Consider alternatives to these tools, like +e.g. using the `extern "rust-invalid"` ABI that is invalid on every platform +instead of fixing the test to use cross-compilation and testing every possibly-invalid ABI. + There can be multiple stdout/stderr files. The general form is: ```text diff --git a/src/doc/rustc-dev-guide/src/ty.md b/src/doc/rustc-dev-guide/src/ty.md index ce6cffec1adb..767ac3fdba21 100644 --- a/src/doc/rustc-dev-guide/src/ty.md +++ b/src/doc/rustc-dev-guide/src/ty.md @@ -62,8 +62,8 @@ Here is a summary: | Describe the *syntax* of a type: what the user wrote (with some desugaring). | Describe the *semantics* of a type: the meaning of what the user wrote. | | Each `rustc_hir::Ty` has its own spans corresponding to the appropriate place in the program. | Doesn’t correspond to a single place in the user’s program. | | `rustc_hir::Ty` has generics and lifetimes; however, some of those lifetimes are special markers like [`LifetimeKind::Implicit`][implicit]. | `ty::Ty` has the full type, including generics and lifetimes, even if the user left them out | -| `fn foo(x: u32) → u32 { }` - Two `rustc_hir::Ty` representing each usage of `u32`, each has its own `Span`s, and `rustc_hir::Ty` doesn’t tell us that both are the same type | `fn foo(x: u32) → u32 { }` - One `ty::Ty` for all instances of `u32` throughout the program, and `ty::Ty` tells us that both usages of `u32` mean the same type. | -| `fn foo(x: &u32) -> &u32)` - Two `rustc_hir::Ty` again. Lifetimes for the references show up in the `rustc_hir::Ty`s using a special marker, [`LifetimeKind::Implicit`][implicit]. | `fn foo(x: &u32) -> &u32)`- A single `ty::Ty`. The `ty::Ty` has the hidden lifetime param. | +| `fn foo(x: u32) -> u32 { }` - Two `rustc_hir::Ty` representing each usage of `u32`, each has its own `Span`s, and `rustc_hir::Ty` doesn’t tell us that both are the same type | `fn foo(x: u32) -> u32 { }` - One `ty::Ty` for all instances of `u32` throughout the program, and `ty::Ty` tells us that both usages of `u32` mean the same type. | +| `fn foo(x: &u32) -> &u32 { }` - Two `rustc_hir::Ty` again. Lifetimes for the references show up in the `rustc_hir::Ty`s using a special marker, [`LifetimeKind::Implicit`][implicit]. | `fn foo(x: &u32) -> &u32 { }`- A single `ty::Ty`. The `ty::Ty` has the hidden lifetime param. | [implicit]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/enum.LifetimeKind.html#variant.Implicit diff --git a/src/doc/unstable-book/src/compiler-flags/autodiff.md b/src/doc/unstable-book/src/compiler-flags/autodiff.md index 95c188d1f3b2..28d2ece1468f 100644 --- a/src/doc/unstable-book/src/compiler-flags/autodiff.md +++ b/src/doc/unstable-book/src/compiler-flags/autodiff.md @@ -10,6 +10,7 @@ Multiple options can be separated with a comma. Valid options are: `Enable` - Required flag to enable autodiff `PrintTA` - print Type Analysis Information +`PrintTAFn` - print Type Analysis Information for a specific function `PrintAA` - print Activity Analysis Information `PrintPerf` - print Performance Warnings from Enzyme `PrintSteps` - prints all intermediate transformations diff --git a/src/doc/unstable-book/src/compiler-flags/macro-stats.md b/src/doc/unstable-book/src/compiler-flags/macro-stats.md index b2622cff0570..f3fa69058a71 100644 --- a/src/doc/unstable-book/src/compiler-flags/macro-stats.md +++ b/src/doc/unstable-book/src/compiler-flags/macro-stats.md @@ -10,12 +10,12 @@ generated code is normally invisible to the programmer. This flag helps identify such cases. When enabled, the compiler measures the effect on code size of all used macros and prints a table summarizing that -effect. For each distinct macro, it counts how many times it is used, and the -net effect on code size (in terms of lines of code, and bytes of code). The +effect. For each distinct macro, it counts how many times it is used, and how +much code it produces when expanded (in lines of code, and bytes of code). The code size evaluation uses the compiler's internal pretty-printing, and so will be independent of the formatting in the original code. -Note that the net effect of a macro may be negative. E.g. the `cfg!` and +Note that the output size of a macro may be zero. E.g. the `cfg!` and `#[test]` macros often strip out code. If a macro is identified as causing a large increase in code size, it is worth diff --git a/src/doc/unstable-book/src/compiler-flags/min-function-alignment.md b/src/doc/unstable-book/src/compiler-flags/min-function-alignment.md index b7a3aa71fc4c..03e576e3e300 100644 --- a/src/doc/unstable-book/src/compiler-flags/min-function-alignment.md +++ b/src/doc/unstable-book/src/compiler-flags/min-function-alignment.md @@ -15,7 +15,7 @@ This flag is equivalent to: - `-fmin-function-alignment` for [GCC](https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#index-fmin-function-alignment_003dn) - `-falign-functions` for [Clang](https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang1-falign-functions) -The specified alignment is a minimum. A higher alignment can be specified for specific functions by using the [`repr(align(...))`](https://github.com/rust-lang/rust/issues/82232) feature and annotating the function with a `#[repr(align())]` attribute. The attribute's value is ignored when it is lower than the value passed to `min-function-alignment`. +The specified alignment is a minimum. A higher alignment can be specified for specific functions by using the [`align(...)`](https://github.com/rust-lang/rust/issues/82232) feature and annotating the function with a `#[align()]` attribute. The attribute's value is ignored when it is lower than the value passed to `min-function-alignment`. There are two additional edge cases for this flag: diff --git a/src/doc/unstable-book/src/language-features/abi-c-cmse-nonsecure-call.md b/src/doc/unstable-book/src/language-features/abi-cmse-nonsecure-call.md similarity index 79% rename from src/doc/unstable-book/src/language-features/abi-c-cmse-nonsecure-call.md rename to src/doc/unstable-book/src/language-features/abi-cmse-nonsecure-call.md index 79a177cb28b1..6b653a194fe0 100644 --- a/src/doc/unstable-book/src/language-features/abi-c-cmse-nonsecure-call.md +++ b/src/doc/unstable-book/src/language-features/abi-cmse-nonsecure-call.md @@ -1,4 +1,4 @@ -# `abi_c_cmse_nonsecure_call` +# `abi_cmse_nonsecure_call` The tracking issue for this feature is: [#81391] @@ -14,10 +14,9 @@ LLVM, the Rust compiler and the linker are providing [support](https://developer.arm.com/documentation/ecm0359818/latest/) for the TrustZone-M feature. -One of the things provided, with this unstable feature, is the -`C-cmse-nonsecure-call` function ABI. This ABI is used on function pointers to -non-secure code to mark a non-secure function call (see [section -5.5](https://developer.arm.com/documentation/ecm0359818/latest/) for details). +One of the things provided with this unstable feature is the "cmse-nonsecure-call" function ABI. +This ABI is used on function pointers to non-secure code to mark a non-secure function call +(see [section 5.5](https://developer.arm.com/documentation/ecm0359818/latest/) for details). With this ABI, the compiler will do the following to perform the call: * save registers needed after the call to Secure memory @@ -28,19 +27,16 @@ With this ABI, the compiler will do the following to perform the call: To avoid using the non-secure stack, the compiler will constrain the number and type of parameters/return value. -The `extern "C-cmse-nonsecure-call"` ABI is otherwise equivalent to the -`extern "C"` ABI. - ``` rust,ignore #![no_std] -#![feature(abi_c_cmse_nonsecure_call)] +#![feature(abi_cmse_nonsecure_call)] #[no_mangle] pub fn call_nonsecure_function(addr: usize) -> u32 { let non_secure_function = - unsafe { core::mem::transmute:: u32>(addr) }; + unsafe { core::mem::transmute:: u32>(addr) }; non_secure_function() } ``` diff --git a/src/doc/unstable-book/src/language-features/cmse-nonsecure-entry.md b/src/doc/unstable-book/src/language-features/cmse-nonsecure-entry.md index ca95ccf33ac2..1d76d6516598 100644 --- a/src/doc/unstable-book/src/language-features/cmse-nonsecure-entry.md +++ b/src/doc/unstable-book/src/language-features/cmse-nonsecure-entry.md @@ -14,10 +14,9 @@ LLVM, the Rust compiler and the linker are providing [support](https://developer.arm.com/documentation/ecm0359818/latest/) for the TrustZone-M feature. -One of the things provided, with this unstable feature, is the -`C-cmse-nonsecure-entry` ABI. This ABI marks a Secure function as an -entry function (see [section -5.4](https://developer.arm.com/documentation/ecm0359818/latest/) for details). +One of the things provided with this unstable feature is the "cmse-nonsecure-entry" ABI. +This ABI marks a Secure function as an entry function (see +[section 5.4](https://developer.arm.com/documentation/ecm0359818/latest/) for details). With this ABI, the compiler will do the following: * add a special symbol on the function which is the `__acle_se_` prefix and the standard function name @@ -28,9 +27,7 @@ With this ABI, the compiler will do the following: Because the stack can not be used to pass parameters, there will be compilation errors if: -* the total size of all parameters is too big (for example more than four 32 - bits integers) -* the entry function is not using a C ABI +* the total size of all parameters is too big (for example, more than four 32-bit integers) The special symbol `__acle_se_` will be used by the linker to generate a secure gateway veneer. @@ -42,7 +39,7 @@ gateway veneer. #![feature(cmse_nonsecure_entry)] #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { +pub extern "cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { input + 6 } ``` diff --git a/src/doc/unstable-book/src/language-features/loop-match.md b/src/doc/unstable-book/src/language-features/loop-match.md new file mode 100644 index 000000000000..4cc763d34344 --- /dev/null +++ b/src/doc/unstable-book/src/language-features/loop-match.md @@ -0,0 +1,52 @@ +# `loop_match` + +The tracking issue for this feature is: [#132306] + +[#132306]: https://github.com/rust-lang/rust/issues/132306 + +------ + +The `#[loop_match]` and `#[const_continue]` attributes can be used to improve the code +generation of logic that fits this shape: + +```ignore (pseudo-rust) +loop { + state = 'blk: { + match state { + State::A => { + break 'blk State::B + } + State::B => { /* ... */ } + /* ... */ + } + } +} +``` + +Here the loop itself can be annotated with `#[loop_match]`, and any `break 'blk` with +`#[const_continue]` if the value is know at compile time: + +```ignore (pseudo-rust) +#[loop_match] +loop { + state = 'blk: { + match state { + State::A => { + #[const_continue] + break 'blk State::B + } + State::B => { /* ... */ } + /* ... */ + } + } +} +``` + +The observable behavior of this loop is exactly the same as without the extra attributes. +The difference is in the generated output: normally, when the state is `A`, control flow +moves from the `A` branch, back to the top of the loop, then to the `B` branch. With the +attributes, The `A` branch will immediately jump to the `B` branch. + +Removing the indirection can be beneficial for stack usage and branch prediction, and +enables other optimizations by clearly splitting out the control flow paths that your +program will actually use. diff --git a/src/doc/unstable-book/src/language-features/macro-metavar-expr-concat.md b/src/doc/unstable-book/src/language-features/macro-metavar-expr-concat.md index b6dbdb144077..7eb5dca532fd 100644 --- a/src/doc/unstable-book/src/language-features/macro-metavar-expr-concat.md +++ b/src/doc/unstable-book/src/language-features/macro-metavar-expr-concat.md @@ -8,7 +8,8 @@ In stable Rust, there is no way to create new identifiers by joining identifiers `#![feature(macro_metavar_expr_concat)]` introduces a way to do this, using the concat metavariable expression. > This feature uses the syntax from [`macro_metavar_expr`] but is otherwise -> independent. It replaces the old unstable feature [`concat_idents`]. +> independent. It replaces the since-removed unstable feature +> [`concat_idents`]. > This is an experimental feature; it and its syntax will require a RFC before stabilization. @@ -126,8 +127,7 @@ test result: ok. 6 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; fini [`paste`]: https://crates.io/crates/paste [RFC 3086]: https://rust-lang.github.io/rfcs/3086-macro-metavar-expr.html -[`concat_idents!`]: https://doc.rust-lang.org/nightly/std/macro.concat_idents.html [`macro_metavar_expr`]: ../language-features/macro-metavar-expr.md -[`concat_idents`]: ../library-features/concat-idents.md +[`concat_idents`]: https://github.com/rust-lang/rust/issues/29599 [#124225]: https://github.com/rust-lang/rust/issues/124225 [declarative macros]: https://doc.rust-lang.org/stable/reference/macros-by-example.html diff --git a/src/doc/unstable-book/src/library-features/concat-idents.md b/src/doc/unstable-book/src/library-features/concat-idents.md deleted file mode 100644 index 8a38d155e3db..000000000000 --- a/src/doc/unstable-book/src/library-features/concat-idents.md +++ /dev/null @@ -1,27 +0,0 @@ -# `concat_idents` - -The tracking issue for this feature is: [#29599] - -This feature is deprecated, to be replaced by [`macro_metavar_expr_concat`]. - -[#29599]: https://github.com/rust-lang/rust/issues/29599 -[`macro_metavar_expr_concat`]: https://github.com/rust-lang/rust/issues/124225 - ------------------------- - -> This feature is expected to be superseded by [`macro_metavar_expr_concat`](../language-features/macro-metavar-expr-concat.md). - -The `concat_idents` feature adds a macro for concatenating multiple identifiers -into one identifier. - -## Examples - -```rust -#![feature(concat_idents)] - -fn main() { - fn foobar() -> u32 { 23 } - let f = concat_idents!(foo, bar); - assert_eq!(f(), 23); -} -``` diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs index d77bdf09d010..3d027db2622d 100644 --- a/src/librustdoc/clean/mod.rs +++ b/src/librustdoc/clean/mod.rs @@ -227,7 +227,7 @@ fn clean_generic_bound<'tcx>( Some(match bound { hir::GenericBound::Outlives(lt) => GenericBound::Outlives(clean_lifetime(lt, cx)), hir::GenericBound::Trait(t) => { - // `T: ~const Destruct` is hidden because `T: Destruct` is a no-op. + // `T: [const] Destruct` is hidden because `T: Destruct` is a no-op. if let hir::BoundConstness::Maybe(_) = t.modifiers.constness && cx.tcx.lang_items().destruct_trait() == Some(t.trait_ref.trait_def_id().unwrap()) { @@ -395,7 +395,7 @@ pub(crate) fn clean_predicate<'tcx>( ty::ClauseKind::ConstEvaluatable(..) | ty::ClauseKind::WellFormed(..) | ty::ClauseKind::ConstArgHasType(..) - // FIXME(const_trait_impl): We can probably use this `HostEffect` pred to render `~const`. + // FIXME(const_trait_impl): We can probably use this `HostEffect` pred to render `[const]`. | ty::ClauseKind::HostEffect(_) => None, } } @@ -404,7 +404,7 @@ fn clean_poly_trait_predicate<'tcx>( pred: ty::PolyTraitPredicate<'tcx>, cx: &mut DocContext<'tcx>, ) -> Option { - // `T: ~const Destruct` is hidden because `T: Destruct` is a no-op. + // `T: [const] Destruct` is hidden because `T: Destruct` is a no-op. // FIXME(const_trait_impl) check constness if Some(pred.skip_binder().def_id()) == cx.tcx.lang_items().destruct_trait() { return None; diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs index 2d9670a3d10c..3cac55493f00 100644 --- a/src/librustdoc/clean/types.rs +++ b/src/librustdoc/clean/types.rs @@ -4,6 +4,7 @@ use std::sync::{Arc, OnceLock as OnceCell}; use std::{fmt, iter}; use arrayvec::ArrayVec; +use itertools::Either; use rustc_abi::{ExternAbi, VariantIdx}; use rustc_attr_data_structures::{ AttributeKind, ConstStability, Deprecation, Stability, StableSince, @@ -199,49 +200,49 @@ impl ExternalCrate { .unwrap_or(Unknown) // Well, at least we tried. } - pub(crate) fn keywords(&self, tcx: TyCtxt<'_>) -> ThinVec<(DefId, Symbol)> { + fn mapped_root_modules( + &self, + tcx: TyCtxt<'_>, + f: impl Fn(DefId, TyCtxt<'_>) -> Option<(DefId, T)>, + ) -> impl Iterator { let root = self.def_id(); - let as_keyword = |res: Res| { - if let Res::Def(DefKind::Mod, def_id) = res { - let mut keyword = None; - let meta_items = tcx - .get_attrs(def_id, sym::doc) - .flat_map(|attr| attr.meta_item_list().unwrap_or_default()); - for meta in meta_items { - if meta.has_name(sym::keyword) - && let Some(v) = meta.value_str() - { - keyword = Some(v); - break; - } - } - return keyword.map(|p| (def_id, p)); - } - None - }; if root.is_local() { - tcx.hir_root_module() - .item_ids - .iter() - .filter_map(|&id| { - let item = tcx.hir_item(id); - match item.kind { - hir::ItemKind::Mod(..) => { - as_keyword(Res::Def(DefKind::Mod, id.owner_id.to_def_id())) - } - _ => None, - } - }) - .collect() + Either::Left( + tcx.hir_root_module() + .item_ids + .iter() + .filter(move |&&id| matches!(tcx.hir_item(id).kind, hir::ItemKind::Mod(..))) + .filter_map(move |&id| f(id.owner_id.into(), tcx)), + ) } else { - tcx.module_children(root).iter().map(|item| item.res).filter_map(as_keyword).collect() + Either::Right( + tcx.module_children(root) + .iter() + .filter_map(|item| { + if let Res::Def(DefKind::Mod, did) = item.res { Some(did) } else { None } + }) + .filter_map(move |did| f(did, tcx)), + ) } } - pub(crate) fn primitives(&self, tcx: TyCtxt<'_>) -> ThinVec<(DefId, PrimitiveType)> { - let root = self.def_id(); + pub(crate) fn keywords(&self, tcx: TyCtxt<'_>) -> impl Iterator { + fn as_keyword(did: DefId, tcx: TyCtxt<'_>) -> Option<(DefId, Symbol)> { + tcx.get_attrs(did, sym::doc) + .flat_map(|attr| attr.meta_item_list().unwrap_or_default()) + .filter(|meta| meta.has_name(sym::keyword)) + .find_map(|meta| meta.value_str()) + .map(|value| (did, value)) + } + self.mapped_root_modules(tcx, as_keyword) + } + + pub(crate) fn primitives( + &self, + tcx: TyCtxt<'_>, + ) -> impl Iterator { // Collect all inner modules which are tagged as implementations of // primitives. // @@ -259,40 +260,21 @@ impl ExternalCrate { // Also note that this does not attempt to deal with modules tagged // duplicately for the same primitive. This is handled later on when // rendering by delegating everything to a hash map. - let as_primitive = |res: Res| { - let Res::Def(DefKind::Mod, def_id) = res else { return None }; - tcx.get_attrs(def_id, sym::rustc_doc_primitive) - .map(|attr| { - let attr_value = attr.value_str().expect("syntax should already be validated"); - let Some(prim) = PrimitiveType::from_symbol(attr_value) else { - span_bug!( - attr.span(), - "primitive `{attr_value}` is not a member of `PrimitiveType`" - ); - }; + fn as_primitive(def_id: DefId, tcx: TyCtxt<'_>) -> Option<(DefId, PrimitiveType)> { + tcx.get_attrs(def_id, sym::rustc_doc_primitive).next().map(|attr| { + let attr_value = attr.value_str().expect("syntax should already be validated"); + let Some(prim) = PrimitiveType::from_symbol(attr_value) else { + span_bug!( + attr.span(), + "primitive `{attr_value}` is not a member of `PrimitiveType`" + ); + }; - (def_id, prim) - }) - .next() - }; - - if root.is_local() { - tcx.hir_root_module() - .item_ids - .iter() - .filter_map(|&id| { - let item = tcx.hir_item(id); - match item.kind { - hir::ItemKind::Mod(..) => { - as_primitive(Res::Def(DefKind::Mod, id.owner_id.to_def_id())) - } - _ => None, - } - }) - .collect() - } else { - tcx.module_children(root).iter().map(|item| item.res).filter_map(as_primitive).collect() + (def_id, prim) + }) } + + self.mapped_root_modules(tcx, as_primitive) } } @@ -764,15 +746,21 @@ impl Item { Some(tcx.visibility(def_id)) } - pub(crate) fn attributes_without_repr(&self, tcx: TyCtxt<'_>, is_json: bool) -> Vec { + fn attributes_without_repr(&self, tcx: TyCtxt<'_>, is_json: bool) -> Vec { const ALLOWED_ATTRIBUTES: &[Symbol] = &[sym::export_name, sym::link_section, sym::no_mangle, sym::non_exhaustive]; - self.attrs .other_attrs .iter() .filter_map(|attr| { - if is_json { + // NoMangle is special cased, as it appears in HTML output, and we want to show it in source form, not HIR printing. + // It is also used by cargo-semver-checks. + if let hir::Attribute::Parsed(AttributeKind::NoMangle(..)) = attr { + Some("#[no_mangle]".to_string()) + } else if let hir::Attribute::Parsed(AttributeKind::ExportName { name, .. }) = attr + { + Some(format!("#[export_name = \"{name}\"]")) + } else if is_json { match attr { // rustdoc-json stores this in `Item::deprecation`, so we // don't want it it `Item::attrs`. @@ -785,26 +773,22 @@ impl Item { s }), } - } else if attr.has_any_name(ALLOWED_ATTRIBUTES) { + } else { + if !attr.has_any_name(ALLOWED_ATTRIBUTES) { + return None; + } Some( rustc_hir_pretty::attribute_to_string(&tcx, attr) .replace("\\\n", "") .replace('\n', "") .replace(" ", " "), ) - } else { - None } }) .collect() } - pub(crate) fn attributes_and_repr( - &self, - tcx: TyCtxt<'_>, - cache: &Cache, - is_json: bool, - ) -> Vec { + pub(crate) fn attributes(&self, tcx: TyCtxt<'_>, cache: &Cache, is_json: bool) -> Vec { let mut attrs = self.attributes_without_repr(tcx, is_json); if let Some(repr_attr) = self.repr(tcx, cache, is_json) { @@ -1966,7 +1950,7 @@ impl PrimitiveType { let e = ExternalCrate { crate_num }; let crate_name = e.name(tcx); debug!(?crate_num, ?crate_name); - for &(def_id, prim) in &e.primitives(tcx) { + for (def_id, prim) in e.primitives(tcx) { // HACK: try to link to std instead where possible if crate_name == sym::core && primitive_locations.contains_key(&prim) { continue; @@ -2450,20 +2434,6 @@ pub(crate) enum ConstantKind { Infer, } -impl Constant { - pub(crate) fn expr(&self, tcx: TyCtxt<'_>) -> String { - self.kind.expr(tcx) - } - - pub(crate) fn value(&self, tcx: TyCtxt<'_>) -> Option { - self.kind.value(tcx) - } - - pub(crate) fn is_literal(&self, tcx: TyCtxt<'_>) -> bool { - self.kind.is_literal(tcx) - } -} - impl ConstantKind { pub(crate) fn expr(&self, tcx: TyCtxt<'_>) -> String { match *self { diff --git a/src/librustdoc/clean/types/tests.rs b/src/librustdoc/clean/types/tests.rs index 7ff5026150b1..9499507b2c0f 100644 --- a/src/librustdoc/clean/types/tests.rs +++ b/src/librustdoc/clean/types/tests.rs @@ -10,6 +10,7 @@ fn create_doc_fragment(s: &str) -> Vec { doc: Symbol::intern(s), kind: DocFragmentKind::SugaredDoc, indent: 0, + from_expansion: false, }] } diff --git a/src/librustdoc/clean/utils.rs b/src/librustdoc/clean/utils.rs index c58b07a5b673..2c9878636abf 100644 --- a/src/librustdoc/clean/utils.rs +++ b/src/librustdoc/clean/utils.rs @@ -61,7 +61,7 @@ pub(crate) fn krate(cx: &mut DocContext<'_>) -> Crate { let keywords = local_crate.keywords(cx.tcx); { let ItemKind::ModuleItem(m) = &mut module.inner.kind else { unreachable!() }; - m.items.extend(primitives.iter().map(|&(def_id, prim)| { + m.items.extend(primitives.map(|(def_id, prim)| { Item::from_def_id_and_parts( def_id, Some(prim.as_sym()), @@ -69,7 +69,7 @@ pub(crate) fn krate(cx: &mut DocContext<'_>) -> Crate { cx, ) })); - m.items.extend(keywords.into_iter().map(|(def_id, kw)| { + m.items.extend(keywords.map(|(def_id, kw)| { Item::from_def_id_and_parts(def_id, Some(kw), ItemKind::KeywordItem, cx) })); } diff --git a/src/librustdoc/config.rs b/src/librustdoc/config.rs index f93aa8ffd0de..986390dbaa08 100644 --- a/src/librustdoc/config.rs +++ b/src/librustdoc/config.rs @@ -9,7 +9,7 @@ use rustc_data_structures::fx::FxIndexMap; use rustc_errors::DiagCtxtHandle; use rustc_session::config::{ self, CodegenOptions, CrateType, ErrorOutputType, Externs, Input, JsonUnusedExterns, - OptionsTargetModifiers, UnstableOptions, get_cmd_lint_options, nightly_options, + OptionsTargetModifiers, Sysroot, UnstableOptions, get_cmd_lint_options, nightly_options, parse_crate_types_from_list, parse_externs, parse_target_triple, }; use rustc_session::lint::Level; @@ -103,9 +103,7 @@ pub(crate) struct Options { /// compiling doctests from the crate. pub(crate) edition: Edition, /// The path to the sysroot. Used during the compilation process. - pub(crate) sysroot: PathBuf, - /// Has the same value as `sysroot` except is `None` when the user didn't pass `---sysroot`. - pub(crate) maybe_sysroot: Option, + pub(crate) sysroot: Sysroot, /// Lint information passed over the command-line. pub(crate) lint_opts: Vec<(String, Level)>, /// Whether to ask rustc to describe the lints it knows. @@ -201,7 +199,6 @@ impl fmt::Debug for Options { .field("target", &self.target) .field("edition", &self.edition) .field("sysroot", &self.sysroot) - .field("maybe_sysroot", &self.maybe_sysroot) .field("lint_opts", &self.lint_opts) .field("describe_lints", &self.describe_lints) .field("lint_cap", &self.lint_cap) @@ -725,16 +722,14 @@ impl Options { } let target = parse_target_triple(early_dcx, matches); - let maybe_sysroot = matches.opt_str("sysroot").map(PathBuf::from); - - let sysroot = rustc_session::filesearch::materialize_sysroot(maybe_sysroot.clone()); + let sysroot = Sysroot::new(matches.opt_str("sysroot").map(PathBuf::from)); let libs = matches .opt_strs("L") .iter() .map(|s| { SearchPath::from_cli_opt( - &sysroot, + sysroot.path(), &target, early_dcx, s, @@ -827,7 +822,6 @@ impl Options { target, edition, sysroot, - maybe_sysroot, lint_opts, describe_lints, lint_cap, diff --git a/src/librustdoc/core.rs b/src/librustdoc/core.rs index 204f8decffcc..cf3c4ac97af6 100644 --- a/src/librustdoc/core.rs +++ b/src/librustdoc/core.rs @@ -149,15 +149,12 @@ pub(crate) fn new_dcx( diagnostic_width: Option, unstable_opts: &UnstableOptions, ) -> rustc_errors::DiagCtxt { - let fallback_bundle = rustc_errors::fallback_fluent_bundle( - rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(), - false, - ); + let translator = rustc_driver::default_translator(); let emitter: Box = match error_format { ErrorOutputType::HumanReadable { kind, color_config } => { let short = kind.short(); Box::new( - HumanEmitter::new(stderr_destination(color_config), fallback_bundle) + HumanEmitter::new(stderr_destination(color_config), translator) .sm(source_map.map(|sm| sm as _)) .short_message(short) .diagnostic_width(diagnostic_width) @@ -178,7 +175,7 @@ pub(crate) fn new_dcx( JsonEmitter::new( Box::new(io::BufWriter::new(io::stderr())), Some(source_map), - fallback_bundle, + translator, pretty, json_rendered, color_config, @@ -387,8 +384,6 @@ pub(crate) fn run_global_ctxt( ctxt.external_traits.insert(sized_trait_did, sized_trait); } - debug!("crate: {:?}", tcx.hir_crate(())); - let mut krate = tcx.sess.time("clean_crate", || clean::krate(&mut ctxt)); if krate.module.doc_value().is_empty() { diff --git a/src/librustdoc/doctest.rs b/src/librustdoc/doctest.rs index 130fdff1afe2..1b5c9fd46641 100644 --- a/src/librustdoc/doctest.rs +++ b/src/librustdoc/doctest.rs @@ -514,8 +514,9 @@ fn run_test( compiler_args.push(format!("@{}", doctest.global_opts.args_file.display())); - if let Some(sysroot) = &rustdoc_options.maybe_sysroot { - compiler_args.push(format!("--sysroot={}", sysroot.display())); + let sysroot = &rustdoc_options.sysroot; + if let Some(explicit_sysroot) = &sysroot.explicit { + compiler_args.push(format!("--sysroot={}", explicit_sysroot.display())); } compiler_args.extend_from_slice(&["--edition".to_owned(), doctest.edition.to_string()]); @@ -574,7 +575,7 @@ fn run_test( let rustc_binary = rustdoc_options .test_builder .as_deref() - .unwrap_or_else(|| rustc_interface::util::rustc_path().expect("found rustc")); + .unwrap_or_else(|| rustc_interface::util::rustc_path(sysroot).expect("found rustc")); let mut compiler = wrapped_rustc_command(&rustdoc_options.test_builder_wrappers, rustc_binary); compiler.args(&compiler_args); diff --git a/src/librustdoc/doctest/make.rs b/src/librustdoc/doctest/make.rs index 3ff6828e52f9..f229f77c9784 100644 --- a/src/librustdoc/doctest/make.rs +++ b/src/librustdoc/doctest/make.rs @@ -456,16 +456,13 @@ fn parse_source( let filename = FileName::anon_source_code(&wrapped_source); let sm = Arc::new(SourceMap::new(FilePathMapping::empty())); - let fallback_bundle = rustc_errors::fallback_fluent_bundle( - rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(), - false, - ); + let translator = rustc_driver::default_translator(); info.supports_color = - HumanEmitter::new(stderr_destination(ColorConfig::Auto), fallback_bundle.clone()) + HumanEmitter::new(stderr_destination(ColorConfig::Auto), translator.clone()) .supports_color(); // Any errors in parsing should also appear when the doctest is compiled for real, so just // send all the errors that the parser emits directly into a `Sink` instead of stderr. - let emitter = HumanEmitter::new(Box::new(io::sink()), fallback_bundle); + let emitter = HumanEmitter::new(Box::new(io::sink()), translator); // FIXME(misdreavus): pass `-Z treat-err-as-bug` to the doctest parser let dcx = DiagCtxt::new(Box::new(emitter)).disable_warnings(); diff --git a/src/librustdoc/formats/renderer.rs b/src/librustdoc/formats/renderer.rs index 48626171404f..79ff1fa38c38 100644 --- a/src/librustdoc/formats/renderer.rs +++ b/src/librustdoc/formats/renderer.rs @@ -68,8 +68,6 @@ pub(crate) trait FormatRenderer<'tcx>: Sized { /// Post processing hook for cleanup and dumping output to files. fn after_krate(self) -> Result<(), Error>; - - fn cache(&self) -> &Cache; } fn run_format_inner<'tcx, T: FormatRenderer<'tcx>>( diff --git a/src/librustdoc/html/format.rs b/src/librustdoc/html/format.rs index 6ab1520386d8..bcb3e57c8442 100644 --- a/src/librustdoc/html/format.rs +++ b/src/librustdoc/html/format.rs @@ -268,7 +268,7 @@ impl clean::GenericBound { fmt::from_fn(move |f| match self { clean::GenericBound::Outlives(lt) => write!(f, "{}", lt.print()), clean::GenericBound::TraitBound(ty, modifiers) => { - // `const` and `~const` trait bounds are experimental; don't render them. + // `const` and `[const]` trait bounds are experimental; don't render them. let hir::TraitBoundModifiers { polarity, constness: _ } = modifiers; f.write_str(match polarity { hir::BoundPolarity::Positive => "", diff --git a/src/librustdoc/html/render/context.rs b/src/librustdoc/html/render/context.rs index 382144516575..3b4dae841ee7 100644 --- a/src/librustdoc/html/render/context.rs +++ b/src/librustdoc/html/render/context.rs @@ -875,8 +875,4 @@ impl<'tcx> FormatRenderer<'tcx> for Context<'tcx> { Ok(()) } - - fn cache(&self) -> &Cache { - &self.shared.cache - } } diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 66d5aafa3c1e..ed58bae70bd4 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -1194,7 +1194,7 @@ fn render_assoc_item( // a whitespace prefix and newline. fn render_attributes_in_pre(it: &clean::Item, prefix: &str, cx: &Context<'_>) -> impl fmt::Display { fmt::from_fn(move |f| { - for a in it.attributes_and_repr(cx.tcx(), cx.cache(), false) { + for a in it.attributes(cx.tcx(), cx.cache(), false) { writeln!(f, "{prefix}{a}")?; } Ok(()) @@ -1210,7 +1210,7 @@ fn render_code_attribute(code_attr: CodeAttribute, w: &mut impl fmt::Write) { // When an attribute is rendered inside a tag, it is formatted using // a div to produce a newline after it. fn render_attributes_in_code(w: &mut impl fmt::Write, it: &clean::Item, cx: &Context<'_>) { - for attr in it.attributes_and_repr(cx.tcx(), cx.cache(), false) { + for attr in it.attributes(cx.tcx(), cx.cache(), false) { render_code_attribute(CodeAttribute(attr), w); } } diff --git a/src/librustdoc/html/render/print_item.rs b/src/librustdoc/html/render/print_item.rs index a75088d27ccd..e16acc9622fc 100644 --- a/src/librustdoc/html/render/print_item.rs +++ b/src/librustdoc/html/render/print_item.rs @@ -469,7 +469,8 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i let unsafety_flag = match myitem.kind { clean::FunctionItem(_) | clean::ForeignFunctionItem(..) - if myitem.fn_header(tcx).unwrap().is_unsafe() => + if myitem.fn_header(tcx).unwrap().safety + == hir::HeaderSafety::Normal(hir::Safety::Unsafe) => { "" } @@ -1491,7 +1492,7 @@ impl<'a, 'cx: 'a> ItemUnion<'a, 'cx> { writeln!(f, "{repr}")?; }; } else { - for a in self.it.attributes_and_repr(self.cx.tcx(), self.cx.cache(), false) { + for a in self.it.attributes(self.cx.tcx(), self.cx.cache(), false) { writeln!(f, "{a}")?; } } diff --git a/src/librustdoc/html/render/write_shared.rs b/src/librustdoc/html/render/write_shared.rs index fb2b45802a67..606a91139087 100644 --- a/src/librustdoc/html/render/write_shared.rs +++ b/src/librustdoc/html/render/write_shared.rs @@ -439,24 +439,20 @@ impl CratesIndexPart { let content = format!("

List of all crates

    {DELIMITER}
"); let template = layout::render(layout, &page, "", content, style_files); - match SortedTemplate::from_template(&template, DELIMITER) { - Ok(template) => template, - Err(e) => panic!( - "Object Replacement Character (U+FFFC) should not appear in the --index-page: {e}" - ), - } + SortedTemplate::from_template(&template, DELIMITER) + .expect("Object Replacement Character (U+FFFC) should not appear in the --index-page") } /// Might return parts that are duplicate with ones in prexisting index.html fn get(crate_name: &str, external_crates: &[String]) -> Result, Error> { let mut ret = PartsAndLocations::default(); - let path = PathBuf::from("index.html"); + let path = Path::new("index.html"); for crate_name in external_crates.iter().map(|s| s.as_str()).chain(once(crate_name)) { let part = format!( "
  • {crate_name}
  • ", trailing_slash = ensure_trailing_slash(crate_name), ); - ret.push(path.clone(), part); + ret.push(path.to_path_buf(), part); } Ok(ret) } @@ -737,7 +733,7 @@ impl TraitAliasPart { }, }; - let implementors = imps + let mut implementors = imps .iter() .filter_map(|imp| { // If the trait and implementation are in the same crate, then @@ -759,12 +755,12 @@ impl TraitAliasPart { }) } }) - .collect::>(); + .peekable(); // Only create a js file if we have impls to add to it. If the trait is // documented locally though we always create the file to avoid dead // links. - if implementors.is_empty() && !cache.paths.contains_key(&did) { + if implementors.peek().is_none() && !cache.paths.contains_key(&did) { continue; } @@ -775,11 +771,7 @@ impl TraitAliasPart { path.push(format!("{remote_item_type}.{}.js", remote_path[remote_path.len() - 1])); let part = OrderedJson::array_sorted( - implementors - .iter() - .map(OrderedJson::serialize) - .collect::, _>>() - .unwrap(), + implementors.map(|implementor| OrderedJson::serialize(implementor).unwrap()), ); path_parts.push(path, OrderedJson::array_unsorted([crate_name_json, &part])); } @@ -874,9 +866,8 @@ impl<'item> DocVisitor<'item> for TypeImplCollector<'_, '_, 'item> { let impl_ = cache .impls .get(&target_did) - .map(|v| &v[..]) - .unwrap_or_default() - .iter() + .into_iter() + .flatten() .map(|impl_| { (impl_.impl_item.item_id, AliasedTypeImpl { impl_, type_aliases: Vec::new() }) }) @@ -891,14 +882,8 @@ impl<'item> DocVisitor<'item> for TypeImplCollector<'_, '_, 'item> { // Exclude impls that are directly on this type. They're already in the HTML. // Some inlining scenarios can cause there to be two versions of the same // impl: one on the type alias and one on the underlying target type. - let mut seen_impls: FxHashSet = cache - .impls - .get(&self_did) - .map(|s| &s[..]) - .unwrap_or_default() - .iter() - .map(|i| i.impl_item.item_id) - .collect(); + let mut seen_impls: FxHashSet = + cache.impls.get(&self_did).into_iter().flatten().map(|i| i.impl_item.item_id).collect(); for (impl_item_id, aliased_type_impl) in &mut aliased_type.impl_ { // Only include this impl if it actually unifies with this alias. // Synthetic impls are not included; those are also included in the HTML. diff --git a/src/librustdoc/html/sources.rs b/src/librustdoc/html/sources.rs index 1fa6b5a60f3a..c34b31542697 100644 --- a/src/librustdoc/html/sources.rs +++ b/src/librustdoc/html/sources.rs @@ -353,7 +353,7 @@ pub(crate) fn print_src( ); Ok(()) }); - let max_nb_digits = if lines > 0 { lines.ilog(10) + 1 } else { 1 }; + let max_nb_digits = if lines > 0 { lines.ilog10() + 1 } else { 1 }; match source_context { SourceContext::Standalone { file_path } => Source { code_html: code, diff --git a/src/librustdoc/html/static/js/main.js b/src/librustdoc/html/static/js/main.js index 7b1a61a3ffa4..2de8f836da3b 100644 --- a/src/librustdoc/html/static/js/main.js +++ b/src/librustdoc/html/static/js/main.js @@ -1,6 +1,6 @@ // Local js definitions: /* global addClass, getSettingValue, hasClass, updateLocalStorage */ -/* global onEachLazy, removeClass, getVar */ +/* global onEachLazy, removeClass, getVar, nonnull */ "use strict"; @@ -2138,3 +2138,31 @@ function preLoadCss(cssUrl) { elem.addEventListener("click", showHideCodeExampleButtons); }); }()); + +// This section is a bugfix for firefox: when copying text with `user-select: none`, it adds +// extra backline characters. +// +// Rustdoc issue: Workaround for https://github.com/rust-lang/rust/issues/141464 +// Firefox issue: https://bugzilla.mozilla.org/show_bug.cgi?id=1273836 +(function() { + document.body.addEventListener("copy", event => { + let target = nonnull(event.target); + let isInsideCode = false; + while (target && target !== document.body) { + // @ts-expect-error + if (target.tagName === "CODE") { + isInsideCode = true; + break; + } + // @ts-expect-error + target = target.parentElement; + } + if (!isInsideCode) { + return; + } + const selection = document.getSelection(); + // @ts-expect-error + nonnull(event.clipboardData).setData("text/plain", selection.toString()); + event.preventDefault(); + }); +}()); diff --git a/src/librustdoc/html/static/js/rustdoc.d.ts b/src/librustdoc/html/static/js/rustdoc.d.ts index 6af16441de88..bbcd96040bec 100644 --- a/src/librustdoc/html/static/js/rustdoc.d.ts +++ b/src/librustdoc/html/static/js/rustdoc.d.ts @@ -4,6 +4,8 @@ /* eslint-disable */ declare global { + /** Search engine data used by main.js and search.js */ + declare var searchState: rustdoc.SearchState; /** Defined and documented in `storage.js` */ declare function nonnull(x: T|null, msg: string|undefined); /** Defined and documented in `storage.js` */ @@ -17,8 +19,6 @@ declare global { RUSTDOC_TOOLTIP_HOVER_MS: number; /** Used by the popover tooltip code. */ RUSTDOC_TOOLTIP_HOVER_EXIT_MS: number; - /** Search engine data used by main.js and search.js */ - searchState: rustdoc.SearchState; /** Global option, with a long list of "../"'s */ rootPath: string|null; /** @@ -102,20 +102,22 @@ declare namespace rustdoc { currentTab: number; focusedByTab: [number|null, number|null, number|null]; clearInputTimeout: function; - outputElement: function(): HTMLElement|null; - focus: function(); - defocus: function(); - showResults: function(HTMLElement|null|undefined); - removeQueryParameters: function(); - hideResults: function(); - getQueryStringParams: function(): Object.; + outputElement(): HTMLElement|null; + focus(); + defocus(); + // note: an optional param is not the same as + // a nullable/undef-able param. + showResults(elem?: HTMLElement|null); + removeQueryParameters(); + hideResults(); + getQueryStringParams(): Object.; origPlaceholder: string; setup: function(); - setLoadingSearch: function(); + setLoadingSearch(); descShards: Map; loadDesc: function({descShard: SearchDescShard, descIndex: number}): Promise; - loadedDescShard: function(string, number, string); - isDisplayed: function(): boolean, + loadedDescShard(string, number, string); + isDisplayed(): boolean, } interface SearchDescShard { @@ -237,7 +239,7 @@ declare namespace rustdoc { query: ParsedQuery, } - type Results = Map; + type Results = { max_dist?: number } & Map /** * An annotated `Row`, used in the viewmodel. diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index b611a3e501dc..15cad31f555a 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -2515,13 +2515,17 @@ class DocSearch { * * @param {rustdoc.ParsedQuery} origParsedQuery * - The parsed user query - * @param {Object} [filterCrates] - Crate to search in if defined - * @param {Object} [currentCrate] - Current crate, to rank results from this crate higher + * @param {Object} filterCrates - Crate to search in if defined + * @param {string} currentCrate - Current crate, to rank results from this crate higher * * @return {Promise} */ async execQuery(origParsedQuery, filterCrates, currentCrate) { - const results_others = new Map(), results_in_args = new Map(), + /** @type {rustdoc.Results} */ + const results_others = new Map(), + /** @type {rustdoc.Results} */ + results_in_args = new Map(), + /** @type {rustdoc.Results} */ results_returned = new Map(); /** @type {rustdoc.ParsedQuery} */ @@ -4365,7 +4369,7 @@ class DocSearch { * * The `results` map contains information which will be used to sort the search results: * - * * `fullId` is a `string`` used as the key of the object we use for the `results` map. + * * `fullId` is an `integer`` used as the key of the object we use for the `results` map. * * `id` is the index in the `searchIndex` array for this element. * * `index` is an `integer`` used to sort by the position of the word in the item's name. * * `dist` is the main metric used to sort the search results. @@ -4373,19 +4377,18 @@ class DocSearch { * distance computed for everything other than the last path component. * * @param {rustdoc.Results} results - * @param {string} fullId + * @param {number} fullId * @param {number} id * @param {number} index * @param {number} dist * @param {number} path_dist + * @param {number} maxEditDistance */ - // @ts-expect-error function addIntoResults(results, fullId, id, index, dist, path_dist, maxEditDistance) { if (dist <= maxEditDistance || index !== -1) { if (results.has(fullId)) { const result = results.get(fullId); - // @ts-expect-error - if (result.dontValidate || result.dist <= dist) { + if (result === undefined || result.dontValidate || result.dist <= dist) { return; } } @@ -4452,9 +4455,8 @@ class DocSearch { return; } - // @ts-expect-error results.max_dist = Math.max(results.max_dist || 0, tfpDist); - addIntoResults(results, row.id.toString(), pos, 0, tfpDist, 0, Number.MAX_VALUE); + addIntoResults(results, row.id, pos, 0, tfpDist, 0, Number.MAX_VALUE); } /** @@ -4495,7 +4497,7 @@ class DocSearch { if (parsedQuery.foundElems === 1 && !parsedQuery.hasReturnArrow) { const elem = parsedQuery.elems[0]; // use arrow functions to preserve `this`. - // @ts-expect-error + /** @type {function(number): void} */ const handleNameSearch = id => { const row = this.searchIndex[id]; if (!typePassesFilter(elem.typeFilter, row.ty) || @@ -4505,22 +4507,21 @@ class DocSearch { let pathDist = 0; if (elem.fullPath.length > 1) { - // @ts-expect-error - pathDist = checkPath(elem.pathWithoutLast, row); - if (pathDist === null) { + + const maybePathDist = checkPath(elem.pathWithoutLast, row); + if (maybePathDist === null) { return; } + pathDist = maybePathDist; } if (parsedQuery.literalSearch) { if (row.word === elem.pathLast) { - // @ts-expect-error - addIntoResults(results_others, row.id, id, 0, 0, pathDist); + addIntoResults(results_others, row.id, id, 0, 0, pathDist, 0); } } else { addIntoResults( results_others, - // @ts-expect-error row.id, id, row.normalizedName.indexOf(elem.normalizedPathLast), @@ -4561,31 +4562,23 @@ class DocSearch { const returned = row.type && row.type.output && checkIfInList(row.type.output, elem, row.type.where_clause, null, 0); if (in_args) { - // @ts-expect-error results_in_args.max_dist = Math.max( - // @ts-expect-error results_in_args.max_dist || 0, tfpDist, ); const maxDist = results_in_args.size < MAX_RESULTS ? (tfpDist + 1) : - // @ts-expect-error results_in_args.max_dist; - // @ts-expect-error addIntoResults(results_in_args, row.id, i, -1, tfpDist, 0, maxDist); } if (returned) { - // @ts-expect-error results_returned.max_dist = Math.max( - // @ts-expect-error results_returned.max_dist || 0, tfpDist, ); const maxDist = results_returned.size < MAX_RESULTS ? (tfpDist + 1) : - // @ts-expect-error results_returned.max_dist; - // @ts-expect-error addIntoResults(results_returned, row.id, i, -1, tfpDist, 0, maxDist); } } @@ -4595,18 +4588,17 @@ class DocSearch { // types with generic parameters go last. // That's because of the way unification is structured: it eats off // the end, and hits a fast path if the last item is a simple atom. - // @ts-expect-error + /** @type {function(rustdoc.QueryElement, rustdoc.QueryElement): number} */ const sortQ = (a, b) => { const ag = a.generics.length === 0 && a.bindings.size === 0; const bg = b.generics.length === 0 && b.bindings.size === 0; if (ag !== bg) { - // @ts-expect-error - return ag - bg; + // unary `+` converts booleans into integers. + return +ag - +bg; } - const ai = a.id > 0; - const bi = b.id > 0; - // @ts-expect-error - return ai - bi; + const ai = a.id !== null && a.id > 0; + const bi = b.id !== null && b.id > 0; + return +ai - +bi; }; parsedQuery.elems.sort(sortQ); parsedQuery.returned.sort(sortQ); @@ -4622,9 +4614,7 @@ class DocSearch { const isType = parsedQuery.foundElems !== 1 || parsedQuery.hasReturnArrow; const [sorted_in_args, sorted_returned, sorted_others] = await Promise.all([ - // @ts-expect-error sortResults(results_in_args, "elems", currentCrate), - // @ts-expect-error sortResults(results_returned, "returned", currentCrate), // @ts-expect-error sortResults(results_others, (isType ? "query" : null), currentCrate), @@ -4724,7 +4714,6 @@ function printTab(nb) { iter += 1; }); if (foundCurrentTab && foundCurrentResultSet) { - // @ts-expect-error searchState.currentTab = nb; // Corrections only kick in on type-based searches. const correctionsElem = document.getElementsByClassName("search-corrections"); @@ -4777,7 +4766,6 @@ function getFilterCrates() { // @ts-expect-error function nextTab(direction) { - // @ts-expect-error const next = (searchState.currentTab + direction + 3) % searchState.focusedByTab.length; // @ts-expect-error searchState.focusedByTab[searchState.currentTab] = document.activeElement; @@ -4788,14 +4776,12 @@ function nextTab(direction) { // Focus the first search result on the active tab, or the result that // was focused last time this tab was active. function focusSearchResult() { - // @ts-expect-error const target = searchState.focusedByTab[searchState.currentTab] || document.querySelectorAll(".search-results.active a").item(0) || - // @ts-expect-error document.querySelectorAll("#search-tabs button").item(searchState.currentTab); - // @ts-expect-error searchState.focusedByTab[searchState.currentTab] = null; if (target) { + // @ts-expect-error target.focus(); } } @@ -4947,7 +4933,6 @@ function makeTabHeader(tabNb, text, nbElems) { const fmtNbElems = nbElems < 10 ? `\u{2007}(${nbElems})\u{2007}\u{2007}` : nbElems < 100 ? `\u{2007}(${nbElems})\u{2007}` : `\u{2007}(${nbElems})`; - // @ts-expect-error if (searchState.currentTab === tabNb) { return ""; @@ -4961,7 +4946,6 @@ function makeTabHeader(tabNb, text, nbElems) { * @param {string} filterCrates */ async function showResults(results, go_to_first, filterCrates) { - // @ts-expect-error const search = searchState.outputElement(); if (go_to_first || (results.others.length === 1 && getSettingValue("go-to-only-result") === "true") @@ -4979,7 +4963,6 @@ async function showResults(results, go_to_first, filterCrates) { // will be used, starting search again since the search input is not empty, leading you // back to the previous page again. window.onunload = () => { }; - // @ts-expect-error searchState.removeQueryParameters(); const elem = document.createElement("a"); elem.href = results.others[0].href; @@ -4999,7 +4982,6 @@ async function showResults(results, go_to_first, filterCrates) { // Navigate to the relevant tab if the current tab is empty, like in case users search // for "-> String". If they had selected another tab previously, they have to click on // it again. - // @ts-expect-error let currentTab = searchState.currentTab; if ((currentTab === 0 && results.others.length === 0) || (currentTab === 1 && results.in_args.length === 0) || @@ -5087,8 +5069,8 @@ async function showResults(results, go_to_first, filterCrates) { resultsElem.appendChild(ret_in_args); resultsElem.appendChild(ret_returned); - search.innerHTML = output; // @ts-expect-error + search.innerHTML = output; if (searchState.rustdocToolbar) { // @ts-expect-error search.querySelector(".main-heading").appendChild(searchState.rustdocToolbar); @@ -5097,9 +5079,9 @@ async function showResults(results, go_to_first, filterCrates) { if (crateSearch) { crateSearch.addEventListener("input", updateCrate); } + // @ts-expect-error search.appendChild(resultsElem); // Reset focused elements. - // @ts-expect-error searchState.showResults(search); // @ts-expect-error const elems = document.getElementById("search-tabs").childNodes; @@ -5110,7 +5092,6 @@ async function showResults(results, go_to_first, filterCrates) { const j = i; // @ts-expect-error elem.onclick = () => printTab(j); - // @ts-expect-error searchState.focusedByTab.push(null); i += 1; } @@ -5122,7 +5103,6 @@ function updateSearchHistory(url) { if (!browserSupportsHistoryApi()) { return; } - // @ts-expect-error const params = searchState.getQueryStringParams(); if (!history.state && !params.search) { history.pushState(null, "", url); @@ -5149,10 +5129,8 @@ async function search(forced) { return; } - // @ts-expect-error searchState.setLoadingSearch(); - // @ts-expect-error const params = searchState.getQueryStringParams(); // In case we have no information about the saved crate and there is a URL query parameter, @@ -5162,7 +5140,6 @@ async function search(forced) { } // Update document title to maintain a meaningful browser history - // @ts-expect-error searchState.title = "\"" + query.userQuery + "\" Search - Rust"; // Because searching is incremental by character, only the most @@ -5184,33 +5161,28 @@ async function search(forced) { function onSearchSubmit(e) { // @ts-expect-error e.preventDefault(); - // @ts-expect-error searchState.clearInputTimeout(); search(); } function putBackSearch() { - // @ts-expect-error const search_input = searchState.input; - // @ts-expect-error if (!searchState.input) { return; } // @ts-expect-error if (search_input.value !== "" && !searchState.isDisplayed()) { - // @ts-expect-error searchState.showResults(); if (browserSupportsHistoryApi()) { history.replaceState(null, "", + // @ts-expect-error buildUrl(search_input.value, getFilterCrates())); } - // @ts-expect-error document.title = searchState.title; } } function registerSearchEvents() { - // @ts-expect-error const params = searchState.getQueryStringParams(); // Populate search bar with query string search term when provided, @@ -5224,14 +5196,11 @@ function registerSearchEvents() { } const searchAfter500ms = () => { - // @ts-expect-error searchState.clearInputTimeout(); // @ts-expect-error if (searchState.input.value.length === 0) { - // @ts-expect-error searchState.hideResults(); } else { - // @ts-expect-error searchState.timeout = setTimeout(search, 500); } }; @@ -5248,7 +5217,6 @@ function registerSearchEvents() { return; } // Do NOT e.preventDefault() here. It will prevent pasting. - // @ts-expect-error searchState.clearInputTimeout(); // zero-timeout necessary here because at the time of event handler execution the // pasted content is not in the input field yet. Shouldn’t make any difference for @@ -5274,7 +5242,6 @@ function registerSearchEvents() { // @ts-expect-error previous.focus(); } else { - // @ts-expect-error searchState.focus(); } e.preventDefault(); @@ -5327,7 +5294,6 @@ function registerSearchEvents() { const previousTitle = document.title; window.addEventListener("popstate", e => { - // @ts-expect-error const params = searchState.getQueryStringParams(); // Revert to the previous title manually since the History // API ignores the title parameter. @@ -5355,7 +5321,6 @@ function registerSearchEvents() { searchState.input.value = ""; // When browsing back from search results the main page // visibility must be reset. - // @ts-expect-error searchState.hideResults(); } }); @@ -5368,7 +5333,6 @@ function registerSearchEvents() { // that try to sync state between the URL and the search input. To work around it, // do a small amount of re-init on page show. window.onpageshow = () => { - // @ts-expect-error const qSearch = searchState.getQueryStringParams().search; // @ts-expect-error if (searchState.input.value === "" && qSearch) { @@ -5394,43 +5358,6 @@ function updateCrate(ev) { search(true); } -// @ts-expect-error -function initSearch(searchIndx) { - rawSearchIndex = searchIndx; - if (typeof window !== "undefined") { - // @ts-expect-error - docSearch = new DocSearch(rawSearchIndex, ROOT_PATH, searchState); - registerSearchEvents(); - // If there's a search term in the URL, execute the search now. - if (window.searchState.getQueryStringParams().search) { - search(); - } - } else if (typeof exports !== "undefined") { - // @ts-expect-error - docSearch = new DocSearch(rawSearchIndex, ROOT_PATH, searchState); - exports.docSearch = docSearch; - exports.parseQuery = DocSearch.parseQuery; - } -} - -if (typeof exports !== "undefined") { - exports.initSearch = initSearch; -} - -if (typeof window !== "undefined") { - // @ts-expect-error - window.initSearch = initSearch; - // @ts-expect-error - if (window.searchIndex !== undefined) { - // @ts-expect-error - initSearch(window.searchIndex); - } -} else { - // Running in Node, not a browser. Run initSearch just to produce the - // exports. - initSearch(new Map()); -} - // Parts of this code are based on Lucene, which is licensed under the // Apache/2.0 license. // More information found here: @@ -5909,3 +5836,44 @@ Lev1TParametricDescription.prototype.toStates3 = /*3 bits per value */ new Int32 Lev1TParametricDescription.prototype.offsetIncrs3 = /*2 bits per value */ new Int32Array([ 0xa0fc0000,0x5555ba08,0x55555555, ]); + +// ==================== +// WARNING: Nothing should be added below this comment: we need the `initSearch` function to +// be called ONLY when the whole file has been parsed and loaded. + +// @ts-expect-error +function initSearch(searchIndx) { + rawSearchIndex = searchIndx; + if (typeof window !== "undefined") { + // @ts-expect-error + docSearch = new DocSearch(rawSearchIndex, ROOT_PATH, searchState); + registerSearchEvents(); + // If there's a search term in the URL, execute the search now. + if (window.searchState.getQueryStringParams().search) { + search(); + } + } else if (typeof exports !== "undefined") { + // @ts-expect-error + docSearch = new DocSearch(rawSearchIndex, ROOT_PATH, searchState); + exports.docSearch = docSearch; + exports.parseQuery = DocSearch.parseQuery; + } +} + +if (typeof exports !== "undefined") { + exports.initSearch = initSearch; +} + +if (typeof window !== "undefined") { + // @ts-expect-error + window.initSearch = initSearch; + // @ts-expect-error + if (window.searchIndex !== undefined) { + // @ts-expect-error + initSearch(window.searchIndex); + } +} else { + // Running in Node, not a browser. Run initSearch just to produce the + // exports. + initSearch(new Map()); +} diff --git a/src/librustdoc/json/conversions.rs b/src/librustdoc/json/conversions.rs index 6bdf3b5fe387..f51b35097f66 100644 --- a/src/librustdoc/json/conversions.rs +++ b/src/librustdoc/json/conversions.rs @@ -11,12 +11,11 @@ use rustc_hir::def::CtorKind; use rustc_hir::def_id::DefId; use rustc_metadata::rendered_const; use rustc_middle::{bug, ty}; -use rustc_span::{Pos, Symbol, kw}; +use rustc_span::{Pos, kw, sym}; use rustdoc_json_types::*; use thin_vec::ThinVec; use crate::clean::{self, ItemId}; -use crate::formats::FormatRenderer; use crate::formats::item_type::ItemType; use crate::json::JsonRenderer; use crate::passes::collect_intra_doc_links::UrlFragment; @@ -41,7 +40,7 @@ impl JsonRenderer<'_> { }) .collect(); let docs = item.opt_doc_value(); - let attrs = item.attributes_and_repr(self.tcx, self.cache(), true); + let attrs = item.attributes(self.tcx, &self.cache, true); let span = item.span(self.tcx); let visibility = item.visibility(self.tcx); let clean::ItemInner { name, item_id, .. } = *item.inner; @@ -67,47 +66,16 @@ impl JsonRenderer<'_> { id, crate_id: item_id.krate().as_u32(), name: name.map(|sym| sym.to_string()), - span: span.and_then(|span| self.convert_span(span)), - visibility: self.convert_visibility(visibility), + span: span.and_then(|span| span.into_json(self)), + visibility: visibility.into_json(self), docs, attrs, - deprecation: deprecation.map(from_deprecation), + deprecation: deprecation.into_json(self), inner, links, }) } - fn convert_span(&self, span: clean::Span) -> Option { - match span.filename(self.sess()) { - rustc_span::FileName::Real(name) => { - if let Some(local_path) = name.into_local_path() { - let hi = span.hi(self.sess()); - let lo = span.lo(self.sess()); - Some(Span { - filename: local_path, - begin: (lo.line, lo.col.to_usize() + 1), - end: (hi.line, hi.col.to_usize() + 1), - }) - } else { - None - } - } - _ => None, - } - } - - fn convert_visibility(&self, v: Option>) -> Visibility { - match v { - None => Visibility::Default, - Some(ty::Visibility::Public) => Visibility::Public, - Some(ty::Visibility::Restricted(did)) if did.is_crate_root() => Visibility::Crate, - Some(ty::Visibility::Restricted(did)) => Visibility::Restricted { - parent: self.id_from_item_default(did.into()), - path: self.tcx.def_path(did).to_string_no_crate_verbose(), - }, - } - } - fn ids(&self, items: &[clean::Item]) -> Vec { items .iter() @@ -141,11 +109,29 @@ where } } +impl FromClean> for U +where + U: FromClean, +{ + fn from_clean(opt: &Box, renderer: &JsonRenderer<'_>) -> Self { + opt.as_ref().into_json(renderer) + } +} + +impl FromClean> for Option +where + U: FromClean, +{ + fn from_clean(opt: &Option, renderer: &JsonRenderer<'_>) -> Self { + opt.as_ref().map(|x| x.into_json(renderer)) + } +} + impl FromClean> for Vec where U: FromClean, { - fn from_clean(items: &Vec, renderer: &JsonRenderer<'_>) -> Vec { + fn from_clean(items: &Vec, renderer: &JsonRenderer<'_>) -> Self { items.iter().map(|i| i.into_json(renderer)).collect() } } @@ -154,35 +140,78 @@ impl FromClean> for Vec where U: FromClean, { - fn from_clean(items: &ThinVec, renderer: &JsonRenderer<'_>) -> Vec { + fn from_clean(items: &ThinVec, renderer: &JsonRenderer<'_>) -> Self { items.iter().map(|i| i.into_json(renderer)).collect() } } -pub(crate) fn from_deprecation(deprecation: attrs::Deprecation) -> Deprecation { - let attrs::Deprecation { since, note, suggestion: _ } = deprecation; - let since = match since { - DeprecatedSince::RustcVersion(version) => Some(version.to_string()), - DeprecatedSince::Future => Some("TBD".to_owned()), - DeprecatedSince::NonStandard(since) => Some(since.to_string()), - DeprecatedSince::Unspecified | DeprecatedSince::Err => None, - }; - Deprecation { since, note: note.map(|s| s.to_string()) } +impl FromClean for Option { + fn from_clean(span: &clean::Span, renderer: &JsonRenderer<'_>) -> Self { + match span.filename(renderer.sess()) { + rustc_span::FileName::Real(name) => { + if let Some(local_path) = name.into_local_path() { + let hi = span.hi(renderer.sess()); + let lo = span.lo(renderer.sess()); + Some(Span { + filename: local_path, + begin: (lo.line, lo.col.to_usize() + 1), + end: (hi.line, hi.col.to_usize() + 1), + }) + } else { + None + } + } + _ => None, + } + } } -impl FromClean for GenericArgs { - fn from_clean(args: &clean::GenericArgs, renderer: &JsonRenderer<'_>) -> Self { +impl FromClean>> for Visibility { + fn from_clean(v: &Option>, renderer: &JsonRenderer<'_>) -> Self { + match v { + None => Visibility::Default, + Some(ty::Visibility::Public) => Visibility::Public, + Some(ty::Visibility::Restricted(did)) if did.is_crate_root() => Visibility::Crate, + Some(ty::Visibility::Restricted(did)) => Visibility::Restricted { + parent: renderer.id_from_item_default((*did).into()), + path: renderer.tcx.def_path(*did).to_string_no_crate_verbose(), + }, + } + } +} + +impl FromClean for Deprecation { + fn from_clean(deprecation: &attrs::Deprecation, _renderer: &JsonRenderer<'_>) -> Self { + let attrs::Deprecation { since, note, suggestion: _ } = deprecation; + let since = match since { + DeprecatedSince::RustcVersion(version) => Some(version.to_string()), + DeprecatedSince::Future => Some("TBD".to_string()), + DeprecatedSince::NonStandard(since) => Some(since.to_string()), + DeprecatedSince::Unspecified | DeprecatedSince::Err => None, + }; + Deprecation { since, note: note.map(|sym| sym.to_string()) } + } +} + +impl FromClean for Option> { + fn from_clean(generic_args: &clean::GenericArgs, renderer: &JsonRenderer<'_>) -> Self { use clean::GenericArgs::*; - match args { - AngleBracketed { args, constraints } => GenericArgs::AngleBracketed { - args: args.into_json(renderer), - constraints: constraints.into_json(renderer), - }, - Parenthesized { inputs, output } => GenericArgs::Parenthesized { + match generic_args { + AngleBracketed { args, constraints } => { + if generic_args.is_empty() { + None + } else { + Some(Box::new(GenericArgs::AngleBracketed { + args: args.into_json(renderer), + constraints: constraints.into_json(renderer), + })) + } + } + Parenthesized { inputs, output } => Some(Box::new(GenericArgs::Parenthesized { inputs: inputs.into_json(renderer), - output: output.as_ref().map(|a| a.as_ref().into_json(renderer)), - }, - ReturnTypeNotation => GenericArgs::ReturnTypeNotation, + output: output.into_json(renderer), + })), + ReturnTypeNotation => Some(Box::new(GenericArgs::ReturnTypeNotation)), } } } @@ -191,7 +220,7 @@ impl FromClean for GenericArg { fn from_clean(arg: &clean::GenericArg, renderer: &JsonRenderer<'_>) -> Self { use clean::GenericArg::*; match arg { - Lifetime(l) => GenericArg::Lifetime(convert_lifetime(l)), + Lifetime(l) => GenericArg::Lifetime(l.into_json(renderer)), Type(t) => GenericArg::Type(t.into_json(renderer)), Const(box c) => GenericArg::Const(c.into_json(renderer)), Infer => GenericArg::Infer, @@ -199,17 +228,6 @@ impl FromClean for GenericArg { } } -impl FromClean for Constant { - // FIXME(generic_const_items): Add support for generic const items. - fn from_clean(constant: &clean::Constant, renderer: &JsonRenderer<'_>) -> Self { - let tcx = renderer.tcx; - let expr = constant.expr(tcx); - let value = constant.value(tcx); - let is_literal = constant.is_literal(tcx); - Constant { expr, value, is_literal } - } -} - impl FromClean for Constant { // FIXME(generic_const_items): Add support for generic const items. fn from_clean(constant: &clean::ConstantKind, renderer: &JsonRenderer<'_>) -> Self { @@ -257,21 +275,25 @@ fn from_clean_item(item: &clean::Item, renderer: &JsonRenderer<'_>) -> ItemEnum StructFieldItem(f) => ItemEnum::StructField(f.into_json(renderer)), EnumItem(e) => ItemEnum::Enum(e.into_json(renderer)), VariantItem(v) => ItemEnum::Variant(v.into_json(renderer)), - FunctionItem(f) => ItemEnum::Function(from_function(f, true, header.unwrap(), renderer)), + FunctionItem(f) => { + ItemEnum::Function(from_clean_function(f, true, header.unwrap(), renderer)) + } ForeignFunctionItem(f, _) => { - ItemEnum::Function(from_function(f, false, header.unwrap(), renderer)) + ItemEnum::Function(from_clean_function(f, false, header.unwrap(), renderer)) } - TraitItem(t) => ItemEnum::Trait(t.as_ref().into_json(renderer)), + TraitItem(t) => ItemEnum::Trait(t.into_json(renderer)), TraitAliasItem(t) => ItemEnum::TraitAlias(t.into_json(renderer)), - MethodItem(m, _) => ItemEnum::Function(from_function(m, true, header.unwrap(), renderer)), - RequiredMethodItem(m) => { - ItemEnum::Function(from_function(m, false, header.unwrap(), renderer)) + MethodItem(m, _) => { + ItemEnum::Function(from_clean_function(m, true, header.unwrap(), renderer)) } - ImplItem(i) => ItemEnum::Impl(i.as_ref().into_json(renderer)), - StaticItem(s) => ItemEnum::Static(convert_static(s, &rustc_hir::Safety::Safe, renderer)), - ForeignStaticItem(s, safety) => ItemEnum::Static(convert_static(s, safety, renderer)), + RequiredMethodItem(m) => { + ItemEnum::Function(from_clean_function(m, false, header.unwrap(), renderer)) + } + ImplItem(i) => ItemEnum::Impl(i.into_json(renderer)), + StaticItem(s) => ItemEnum::Static(from_clean_static(s, rustc_hir::Safety::Safe, renderer)), + ForeignStaticItem(s, safety) => ItemEnum::Static(from_clean_static(s, *safety, renderer)), ForeignTypeItem => ItemEnum::ExternType, - TypeAliasItem(t) => ItemEnum::TypeAlias(t.as_ref().into_json(renderer)), + TypeAliasItem(t) => ItemEnum::TypeAlias(t.into_json(renderer)), // FIXME(generic_const_items): Add support for generic free consts ConstantItem(ci) => ItemEnum::Constant { type_: ci.type_.into_json(renderer), @@ -287,7 +309,7 @@ fn from_clean_item(item: &clean::Item, renderer: &JsonRenderer<'_>) -> ItemEnum } // FIXME(generic_const_items): Add support for generic associated consts. RequiredAssocConstItem(_generics, ty) => { - ItemEnum::AssocConst { type_: ty.as_ref().into_json(renderer), value: None } + ItemEnum::AssocConst { type_: ty.into_json(renderer), value: None } } // FIXME(generic_const_items): Add support for generic associated consts. ProvidedAssocConstItem(ci) | ImplAssocConstItem(ci) => ItemEnum::AssocConst { @@ -359,32 +381,38 @@ impl FromClean for Union { } } -pub(crate) fn from_fn_header(header: &rustc_hir::FnHeader) -> FunctionHeader { - FunctionHeader { - is_async: header.is_async(), - is_const: header.is_const(), - is_unsafe: header.is_unsafe(), - abi: convert_abi(header.abi), +impl FromClean for FunctionHeader { + fn from_clean(header: &rustc_hir::FnHeader, renderer: &JsonRenderer<'_>) -> Self { + FunctionHeader { + is_async: header.is_async(), + is_const: header.is_const(), + is_unsafe: header.is_unsafe(), + abi: header.abi.into_json(renderer), + } } } -fn convert_abi(a: ExternAbi) -> Abi { - match a { - ExternAbi::Rust => Abi::Rust, - ExternAbi::C { unwind } => Abi::C { unwind }, - ExternAbi::Cdecl { unwind } => Abi::Cdecl { unwind }, - ExternAbi::Stdcall { unwind } => Abi::Stdcall { unwind }, - ExternAbi::Fastcall { unwind } => Abi::Fastcall { unwind }, - ExternAbi::Aapcs { unwind } => Abi::Aapcs { unwind }, - ExternAbi::Win64 { unwind } => Abi::Win64 { unwind }, - ExternAbi::SysV64 { unwind } => Abi::SysV64 { unwind }, - ExternAbi::System { unwind } => Abi::System { unwind }, - _ => Abi::Other(a.to_string()), +impl FromClean for Abi { + fn from_clean(a: &ExternAbi, _renderer: &JsonRenderer<'_>) -> Self { + match *a { + ExternAbi::Rust => Abi::Rust, + ExternAbi::C { unwind } => Abi::C { unwind }, + ExternAbi::Cdecl { unwind } => Abi::Cdecl { unwind }, + ExternAbi::Stdcall { unwind } => Abi::Stdcall { unwind }, + ExternAbi::Fastcall { unwind } => Abi::Fastcall { unwind }, + ExternAbi::Aapcs { unwind } => Abi::Aapcs { unwind }, + ExternAbi::Win64 { unwind } => Abi::Win64 { unwind }, + ExternAbi::SysV64 { unwind } => Abi::SysV64 { unwind }, + ExternAbi::System { unwind } => Abi::System { unwind }, + _ => Abi::Other(a.to_string()), + } } } -fn convert_lifetime(l: &clean::Lifetime) -> String { - l.0.to_string() +impl FromClean for String { + fn from_clean(l: &clean::Lifetime, _renderer: &JsonRenderer<'_>) -> String { + l.0.to_string() + } } impl FromClean for Generics { @@ -409,16 +437,16 @@ impl FromClean for GenericParamDefKind { fn from_clean(kind: &clean::GenericParamDefKind, renderer: &JsonRenderer<'_>) -> Self { use clean::GenericParamDefKind::*; match kind { - Lifetime { outlives } => GenericParamDefKind::Lifetime { - outlives: outlives.into_iter().map(convert_lifetime).collect(), - }, + Lifetime { outlives } => { + GenericParamDefKind::Lifetime { outlives: outlives.into_json(renderer) } + } Type { bounds, default, synthetic } => GenericParamDefKind::Type { bounds: bounds.into_json(renderer), - default: default.as_ref().map(|x| x.as_ref().into_json(renderer)), + default: default.into_json(renderer), is_synthetic: *synthetic, }, Const { ty, default, synthetic: _ } => GenericParamDefKind::Const { - type_: ty.as_ref().into_json(renderer), + type_: ty.into_json(renderer), default: default.as_ref().map(|x| x.as_ref().clone()), }, } @@ -432,45 +460,14 @@ impl FromClean for WherePredicate { BoundPredicate { ty, bounds, bound_params } => WherePredicate::BoundPredicate { type_: ty.into_json(renderer), bounds: bounds.into_json(renderer), - generic_params: bound_params - .iter() - .map(|x| { - let name = x.name.to_string(); - let kind = match &x.kind { - clean::GenericParamDefKind::Lifetime { outlives } => { - GenericParamDefKind::Lifetime { - outlives: outlives.iter().map(|lt| lt.0.to_string()).collect(), - } - } - clean::GenericParamDefKind::Type { bounds, default, synthetic } => { - GenericParamDefKind::Type { - bounds: bounds - .into_iter() - .map(|bound| bound.into_json(renderer)) - .collect(), - default: default - .as_ref() - .map(|ty| ty.as_ref().into_json(renderer)), - is_synthetic: *synthetic, - } - } - clean::GenericParamDefKind::Const { ty, default, synthetic: _ } => { - GenericParamDefKind::Const { - type_: ty.as_ref().into_json(renderer), - default: default.as_ref().map(|d| d.as_ref().clone()), - } - } - }; - GenericParamDef { name, kind } - }) - .collect(), + generic_params: bound_params.into_json(renderer), }, RegionPredicate { lifetime, bounds } => WherePredicate::LifetimePredicate { - lifetime: convert_lifetime(lifetime), + lifetime: lifetime.into_json(renderer), outlives: bounds .iter() .map(|bound| match bound { - clean::GenericBound::Outlives(lt) => convert_lifetime(lt), + clean::GenericBound::Outlives(lt) => lt.into_json(renderer), _ => bug!("found non-outlives-bound on lifetime predicate"), }) .collect(), @@ -494,15 +491,15 @@ impl FromClean for GenericBound { GenericBound::TraitBound { trait_: trait_.into_json(renderer), generic_params: generic_params.into_json(renderer), - modifier: from_trait_bound_modifier(modifier), + modifier: modifier.into_json(renderer), } } - Outlives(lifetime) => GenericBound::Outlives(convert_lifetime(lifetime)), + Outlives(lifetime) => GenericBound::Outlives(lifetime.into_json(renderer)), Use(args) => GenericBound::Use( args.iter() .map(|arg| match arg { clean::PreciseCapturingArg::Lifetime(lt) => { - PreciseCapturingArg::Lifetime(convert_lifetime(lt)) + PreciseCapturingArg::Lifetime(lt.into_json(renderer)) } clean::PreciseCapturingArg::Param(param) => { PreciseCapturingArg::Param(param.to_string()) @@ -514,19 +511,22 @@ impl FromClean for GenericBound { } } -pub(crate) fn from_trait_bound_modifier( - modifiers: &rustc_hir::TraitBoundModifiers, -) -> TraitBoundModifier { - use rustc_hir as hir; - let hir::TraitBoundModifiers { constness, polarity } = modifiers; - match (constness, polarity) { - (hir::BoundConstness::Never, hir::BoundPolarity::Positive) => TraitBoundModifier::None, - (hir::BoundConstness::Never, hir::BoundPolarity::Maybe(_)) => TraitBoundModifier::Maybe, - (hir::BoundConstness::Maybe(_), hir::BoundPolarity::Positive) => { - TraitBoundModifier::MaybeConst +impl FromClean for TraitBoundModifier { + fn from_clean( + modifiers: &rustc_hir::TraitBoundModifiers, + _renderer: &JsonRenderer<'_>, + ) -> Self { + use rustc_hir as hir; + let hir::TraitBoundModifiers { constness, polarity } = modifiers; + match (constness, polarity) { + (hir::BoundConstness::Never, hir::BoundPolarity::Positive) => TraitBoundModifier::None, + (hir::BoundConstness::Never, hir::BoundPolarity::Maybe(_)) => TraitBoundModifier::Maybe, + (hir::BoundConstness::Maybe(_), hir::BoundPolarity::Positive) => { + TraitBoundModifier::MaybeConst + } + // FIXME: Fill out the rest of this matrix. + _ => TraitBoundModifier::None, } - // FIXME: Fill out the rest of this matrix. - _ => TraitBoundModifier::None, } } @@ -540,35 +540,35 @@ impl FromClean for Type { match ty { clean::Type::Path { path } => Type::ResolvedPath(path.into_json(renderer)), clean::Type::DynTrait(bounds, lt) => Type::DynTrait(DynTrait { - lifetime: lt.as_ref().map(convert_lifetime), + lifetime: lt.into_json(renderer), traits: bounds.into_json(renderer), }), Generic(s) => Type::Generic(s.to_string()), // FIXME: add dedicated variant to json Type? SelfTy => Type::Generic("Self".to_owned()), Primitive(p) => Type::Primitive(p.as_sym().to_string()), - BareFunction(f) => Type::FunctionPointer(Box::new(f.as_ref().into_json(renderer))), + BareFunction(f) => Type::FunctionPointer(Box::new(f.into_json(renderer))), Tuple(t) => Type::Tuple(t.into_json(renderer)), - Slice(t) => Type::Slice(Box::new(t.as_ref().into_json(renderer))), + Slice(t) => Type::Slice(Box::new(t.into_json(renderer))), Array(t, s) => { - Type::Array { type_: Box::new(t.as_ref().into_json(renderer)), len: s.to_string() } + Type::Array { type_: Box::new(t.into_json(renderer)), len: s.to_string() } } clean::Type::Pat(t, p) => Type::Pat { - type_: Box::new(t.as_ref().into_json(renderer)), + type_: Box::new(t.into_json(renderer)), __pat_unstable_do_not_use: p.to_string(), }, ImplTrait(g) => Type::ImplTrait(g.into_json(renderer)), Infer => Type::Infer, RawPointer(mutability, type_) => Type::RawPointer { is_mutable: *mutability == ast::Mutability::Mut, - type_: Box::new(type_.as_ref().into_json(renderer)), + type_: Box::new(type_.into_json(renderer)), }, BorrowedRef { lifetime, mutability, type_ } => Type::BorrowedRef { - lifetime: lifetime.as_ref().map(convert_lifetime), + lifetime: lifetime.into_json(renderer), is_mutable: *mutability == ast::Mutability::Mut, - type_: Box::new(type_.as_ref().into_json(renderer)), + type_: Box::new(type_.into_json(renderer)), }, - QPath(qpath) => qpath.as_ref().into_json(renderer), + QPath(qpath) => qpath.into_json(renderer), // FIXME(unsafe_binder): Implement rustdoc-json. UnsafeBinder(_) => todo!(), } @@ -576,11 +576,24 @@ impl FromClean for Type { } impl FromClean for Path { - fn from_clean(path: &clean::Path, renderer: &JsonRenderer<'_>) -> Path { + fn from_clean(path: &clean::Path, renderer: &JsonRenderer<'_>) -> Self { Path { path: path.whole_name(), id: renderer.id_from_item_default(path.def_id().into()), - args: path.segments.last().map(|args| Box::new(args.args.into_json(renderer))), + args: { + if let Some((final_seg, rest_segs)) = path.segments.split_last() { + // In general, `clean::Path` can hold things like + // `std::vec::Vec::::new`, where generic args appear + // in a middle segment. But for the places where `Path` is + // used by rustdoc-json-types, generic args can only be + // used in the final segment, e.g. `std::vec::Vec`. So + // check that the non-final segments have no generic args. + assert!(rest_segs.iter().all(|seg| seg.args.is_empty())); + final_seg.args.into_json(renderer) + } else { + None // no generics on any segments because there are no segments + } + }, } } } @@ -591,15 +604,15 @@ impl FromClean for Type { Self::QualifiedPath { name: assoc.name.to_string(), - args: Box::new(assoc.args.into_json(renderer)), + args: assoc.args.into_json(renderer), self_type: Box::new(self_type.into_json(renderer)), - trait_: trait_.as_ref().map(|trait_| trait_.into_json(renderer)), + trait_: trait_.into_json(renderer), } } } impl FromClean for Term { - fn from_clean(term: &clean::Term, renderer: &JsonRenderer<'_>) -> Term { + fn from_clean(term: &clean::Term, renderer: &JsonRenderer<'_>) -> Self { match term { clean::Term::Type(ty) => Term::Type(ty.into_json(renderer)), clean::Term::Constant(c) => Term::Constant(c.into_json(renderer)), @@ -615,7 +628,7 @@ impl FromClean for FunctionPointer { is_unsafe: safety.is_unsafe(), is_const: false, is_async: false, - abi: convert_abi(*abi), + abi: abi.into_json(renderer), }, generic_params: generic_params.into_json(renderer), sig: decl.into_json(renderer), @@ -694,17 +707,17 @@ impl FromClean for Impl { .into_iter() .map(|x| x.to_string()) .collect(), - trait_: trait_.as_ref().map(|path| path.into_json(renderer)), + trait_: trait_.into_json(renderer), for_: for_.into_json(renderer), items: renderer.ids(&items), is_negative, is_synthetic, - blanket_impl: blanket_impl.map(|x| x.as_ref().into_json(renderer)), + blanket_impl: blanket_impl.map(|x| x.into_json(renderer)), } } } -pub(crate) fn from_function( +pub(crate) fn from_clean_function( clean::Function { decl, generics }: &clean::Function, has_body: bool, header: rustc_hir::FnHeader, @@ -713,7 +726,7 @@ pub(crate) fn from_function( Function { sig: decl.into_json(renderer), generics: generics.into_json(renderer), - header: from_fn_header(&header), + header: header.into_json(renderer), has_body, } } @@ -735,7 +748,7 @@ impl FromClean for Variant { fn from_clean(variant: &clean::Variant, renderer: &JsonRenderer<'_>) -> Self { use clean::VariantKind::*; - let discriminant = variant.discriminant.as_ref().map(|d| d.into_json(renderer)); + let discriminant = variant.discriminant.into_json(renderer); let kind = match &variant.kind { CLike => VariantKind::Plain, @@ -768,10 +781,7 @@ impl FromClean for Use { use clean::ImportKind::*; let (name, is_glob) = match import.kind { Simple(s) => (s.to_string(), false), - Glob => ( - import.source.path.last_opt().unwrap_or_else(|| Symbol::intern("*")).to_string(), - true, - ), + Glob => (import.source.path.last_opt().unwrap_or(sym::asterisk).to_string(), true), }; Use { source: import.source.path.whole_name(), @@ -783,20 +793,22 @@ impl FromClean for Use { } impl FromClean for ProcMacro { - fn from_clean(mac: &clean::ProcMacro, _renderer: &JsonRenderer<'_>) -> Self { + fn from_clean(mac: &clean::ProcMacro, renderer: &JsonRenderer<'_>) -> Self { ProcMacro { - kind: from_macro_kind(mac.kind), + kind: mac.kind.into_json(renderer), helpers: mac.helpers.iter().map(|x| x.to_string()).collect(), } } } -pub(crate) fn from_macro_kind(kind: rustc_span::hygiene::MacroKind) -> MacroKind { - use rustc_span::hygiene::MacroKind::*; - match kind { - Bang => MacroKind::Bang, - Attr => MacroKind::Attr, - Derive => MacroKind::Derive, +impl FromClean for MacroKind { + fn from_clean(kind: &rustc_span::hygiene::MacroKind, _renderer: &JsonRenderer<'_>) -> Self { + use rustc_span::hygiene::MacroKind::*; + match kind { + Bang => MacroKind::Bang, + Attr => MacroKind::Attr, + Derive => MacroKind::Derive, + } } } @@ -807,9 +819,9 @@ impl FromClean for TypeAlias { } } -fn convert_static( +fn from_clean_static( stat: &clean::Static, - safety: &rustc_hir::Safety, + safety: rustc_hir::Safety, renderer: &JsonRenderer<'_>, ) -> Static { let tcx = renderer.tcx; diff --git a/src/librustdoc/json/mod.rs b/src/librustdoc/json/mod.rs index 2feadce26d09..600a4b429f3c 100644 --- a/src/librustdoc/json/mod.rs +++ b/src/librustdoc/json/mod.rs @@ -18,7 +18,6 @@ use rustc_data_structures::fx::FxHashSet; use rustc_hir::def_id::{DefId, DefIdSet}; use rustc_middle::ty::TyCtxt; use rustc_session::Session; -use rustc_session::features::StabilityExt; use rustc_span::def_id::LOCAL_CRATE; use rustdoc_json_types as types; // It's important to use the FxHashMap from rustdoc_json_types here, instead of @@ -148,7 +147,7 @@ fn target(sess: &rustc_session::Session) -> types::Target { .copied() .filter(|(_, stability, _)| { // Describe only target features which the user can toggle - stability.is_toggle_permitted(sess).is_ok() + stability.toggle_allowed().is_ok() }) .map(|(name, stability, implied_features)| { types::TargetFeature { @@ -164,7 +163,7 @@ fn target(sess: &rustc_session::Session) -> types::Target { // Imply only target features which the user can toggle feature_stability .get(name) - .map(|stability| stability.is_toggle_permitted(sess).is_ok()) + .map(|stability| stability.toggle_allowed().is_ok()) .unwrap_or(false) }) .map(String::from) @@ -377,8 +376,34 @@ impl<'tcx> FormatRenderer<'tcx> for JsonRenderer<'tcx> { self.serialize_and_write(output_crate, BufWriter::new(stdout().lock()), "") } } - - fn cache(&self) -> &Cache { - &self.cache - } +} + +// Some nodes are used a lot. Make sure they don't unintentionally get bigger. +// +// These assertions are here, not in `src/rustdoc-json-types/lib.rs` where the types are defined, +// because we have access to `static_assert_size` here. +#[cfg(target_pointer_width = "64")] +mod size_asserts { + use rustc_data_structures::static_assert_size; + + use super::types::*; + // tidy-alphabetical-start + static_assert_size!(AssocItemConstraint, 112); + static_assert_size!(Crate, 184); + static_assert_size!(ExternalCrate, 48); + static_assert_size!(FunctionPointer, 168); + static_assert_size!(GenericArg, 80); + static_assert_size!(GenericArgs, 104); + static_assert_size!(GenericBound, 72); + static_assert_size!(GenericParamDef, 136); + static_assert_size!(Impl, 304); + // `Item` contains a `PathBuf`, which is different sizes on different OSes. + static_assert_size!(Item, 528 + size_of::()); + static_assert_size!(ItemSummary, 32); + static_assert_size!(PolyTrait, 64); + static_assert_size!(PreciseCapturingArg, 32); + static_assert_size!(TargetFeature, 80); + static_assert_size!(Type, 80); + static_assert_size!(WherePredicate, 160); + // tidy-alphabetical-end } diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index 1daaba3b86c5..ca6f67eb6dfd 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -1387,13 +1387,15 @@ impl LinkCollector<'_, '_> { ori_link: &MarkdownLinkRange, item: &Item, ) { - let span = source_span_for_markdown_range( + let span = match source_span_for_markdown_range( self.cx.tcx, dox, ori_link.inner_range(), &item.attrs.doc_strings, - ) - .unwrap_or_else(|| item.attr_span(self.cx.tcx)); + ) { + Some((sp, _)) => sp, + None => item.attr_span(self.cx.tcx), + }; rustc_session::parse::feature_err( self.cx.tcx.sess, sym::intra_doc_pointers, @@ -1836,7 +1838,7 @@ fn report_diagnostic( let mut md_range = md_range.clone(); let sp = source_span_for_markdown_range(tcx, dox, &md_range, &item.attrs.doc_strings) - .map(|mut sp| { + .map(|(mut sp, _)| { while dox.as_bytes().get(md_range.start) == Some(&b' ') || dox.as_bytes().get(md_range.start) == Some(&b'`') { @@ -1854,7 +1856,8 @@ fn report_diagnostic( (sp, MarkdownLinkRange::Destination(md_range)) } MarkdownLinkRange::WholeLink(md_range) => ( - source_span_for_markdown_range(tcx, dox, md_range, &item.attrs.doc_strings), + source_span_for_markdown_range(tcx, dox, md_range, &item.attrs.doc_strings) + .map(|(sp, _)| sp), link_range.clone(), ), }; diff --git a/src/librustdoc/passes/collect_trait_impls.rs b/src/librustdoc/passes/collect_trait_impls.rs index f4e4cd924f7f..2339a6b69cd8 100644 --- a/src/librustdoc/passes/collect_trait_impls.rs +++ b/src/librustdoc/passes/collect_trait_impls.rs @@ -35,7 +35,7 @@ pub(crate) fn collect_trait_impls(mut krate: Crate, cx: &mut DocContext<'_>) -> }); let local_crate = ExternalCrate { crate_num: LOCAL_CRATE }; - let prims: FxHashSet = local_crate.primitives(tcx).iter().map(|p| p.1).collect(); + let prims: FxHashSet = local_crate.primitives(tcx).map(|(_, p)| p).collect(); let crate_items = { let mut coll = ItemAndAliasCollector::new(&cx.cache); diff --git a/src/librustdoc/passes/lint/bare_urls.rs b/src/librustdoc/passes/lint/bare_urls.rs index 3b3ce3e92202..f70bdf4e4fe3 100644 --- a/src/librustdoc/passes/lint/bare_urls.rs +++ b/src/librustdoc/passes/lint/bare_urls.rs @@ -18,7 +18,8 @@ use crate::html::markdown::main_body_opts; pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &str) { let report_diag = |cx: &DocContext<'_>, msg: &'static str, range: Range| { - let maybe_sp = source_span_for_markdown_range(cx.tcx, dox, &range, &item.attrs.doc_strings); + let maybe_sp = source_span_for_markdown_range(cx.tcx, dox, &range, &item.attrs.doc_strings) + .map(|(sp, _)| sp); let sp = maybe_sp.unwrap_or_else(|| item.attr_span(cx.tcx)); cx.tcx.node_span_lint(crate::lint::BARE_URLS, hir_id, sp, |lint| { lint.primary_message(msg) diff --git a/src/librustdoc/passes/lint/check_code_block_syntax.rs b/src/librustdoc/passes/lint/check_code_block_syntax.rs index 9662dd85d678..b08533317abe 100644 --- a/src/librustdoc/passes/lint/check_code_block_syntax.rs +++ b/src/librustdoc/passes/lint/check_code_block_syntax.rs @@ -6,8 +6,8 @@ use std::sync::Arc; use rustc_data_structures::sync::Lock; use rustc_errors::emitter::Emitter; use rustc_errors::registry::Registry; -use rustc_errors::translation::{Translate, to_fluent_args}; -use rustc_errors::{Applicability, DiagCtxt, DiagInner, LazyFallbackBundle}; +use rustc_errors::translation::{Translator, to_fluent_args}; +use rustc_errors::{Applicability, DiagCtxt, DiagInner}; use rustc_parse::{source_str_to_stream, unwrap_or_emit_fatal}; use rustc_resolve::rustdoc::source_span_for_markdown_range; use rustc_session::parse::ParseSess; @@ -36,11 +36,8 @@ fn check_rust_syntax( code_block: RustCodeBlock, ) { let buffer = Arc::new(Lock::new(Buffer::default())); - let fallback_bundle = rustc_errors::fallback_fluent_bundle( - rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(), - false, - ); - let emitter = BufferEmitter { buffer: Arc::clone(&buffer), fallback_bundle }; + let translator = rustc_driver::default_translator(); + let emitter = BufferEmitter { buffer: Arc::clone(&buffer), translator }; let sm = Arc::new(SourceMap::new(FilePathMapping::empty())); let dcx = DiagCtxt::new(Box::new(emitter)).disable_warnings(); @@ -90,7 +87,7 @@ fn check_rust_syntax( &code_block.range, &item.attrs.doc_strings, ) { - Some(sp) => (sp, true), + Some((sp, _)) => (sp, true), None => (item.attr_span(cx.tcx), false), }; @@ -149,17 +146,7 @@ struct Buffer { struct BufferEmitter { buffer: Arc>, - fallback_bundle: LazyFallbackBundle, -} - -impl Translate for BufferEmitter { - fn fluent_bundle(&self) -> Option<&rustc_errors::FluentBundle> { - None - } - - fn fallback_fluent_bundle(&self) -> &rustc_errors::FluentBundle { - &self.fallback_bundle - } + translator: Translator, } impl Emitter for BufferEmitter { @@ -168,6 +155,7 @@ impl Emitter for BufferEmitter { let fluent_args = to_fluent_args(diag.args.iter()); let translated_main_message = self + .translator .translate_message(&diag.messages[0].0, &fluent_args) .unwrap_or_else(|e| panic!("{e}")); @@ -180,4 +168,8 @@ impl Emitter for BufferEmitter { fn source_map(&self) -> Option<&SourceMap> { None } + + fn translator(&self) -> &Translator { + &self.translator + } } diff --git a/src/librustdoc/passes/lint/html_tags.rs b/src/librustdoc/passes/lint/html_tags.rs index b9739726c956..19cf15d40a3b 100644 --- a/src/librustdoc/passes/lint/html_tags.rs +++ b/src/librustdoc/passes/lint/html_tags.rs @@ -16,7 +16,7 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: & let tcx = cx.tcx; let report_diag = |msg: String, range: &Range, is_open_tag: bool| { let sp = match source_span_for_markdown_range(tcx, dox, range, &item.attrs.doc_strings) { - Some(sp) => sp, + Some((sp, _)) => sp, None => item.attr_span(tcx), }; tcx.node_span_lint(crate::lint::INVALID_HTML_TAGS, hir_id, sp, |lint| { @@ -55,7 +55,7 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: & &(generics_start..generics_end), &item.attrs.doc_strings, ) { - Some(sp) => sp, + Some((sp, _)) => sp, None => item.attr_span(tcx), }; // Sometimes, we only extract part of a path. For example, consider this: diff --git a/src/librustdoc/passes/lint/redundant_explicit_links.rs b/src/librustdoc/passes/lint/redundant_explicit_links.rs index 6bc4374c06b1..5757b6a97408 100644 --- a/src/librustdoc/passes/lint/redundant_explicit_links.rs +++ b/src/librustdoc/passes/lint/redundant_explicit_links.rs @@ -161,20 +161,36 @@ fn check_inline_or_reference_unknown_redundancy( if dest_res == display_res { let link_span = - source_span_for_markdown_range(cx.tcx, doc, &link_range, &item.attrs.doc_strings) - .unwrap_or(item.attr_span(cx.tcx)); - let explicit_span = source_span_for_markdown_range( + match source_span_for_markdown_range(cx.tcx, doc, &link_range, &item.attrs.doc_strings) + { + Some((sp, from_expansion)) => { + if from_expansion { + return None; + } + sp + } + None => item.attr_span(cx.tcx), + }; + let (explicit_span, false) = source_span_for_markdown_range( cx.tcx, doc, &offset_explicit_range(doc, link_range, open, close), &item.attrs.doc_strings, - )?; - let display_span = source_span_for_markdown_range( + )? + else { + // This `span` comes from macro expansion so skipping it. + return None; + }; + let (display_span, false) = source_span_for_markdown_range( cx.tcx, doc, resolvable_link_range, &item.attrs.doc_strings, - )?; + )? + else { + // This `span` comes from macro expansion so skipping it. + return None; + }; cx.tcx.node_span_lint(crate::lint::REDUNDANT_EXPLICIT_LINKS, hir_id, explicit_span, |lint| { lint.primary_message("redundant explicit link target") @@ -206,21 +222,37 @@ fn check_reference_redundancy( if dest_res == display_res { let link_span = - source_span_for_markdown_range(cx.tcx, doc, &link_range, &item.attrs.doc_strings) - .unwrap_or(item.attr_span(cx.tcx)); - let explicit_span = source_span_for_markdown_range( + match source_span_for_markdown_range(cx.tcx, doc, &link_range, &item.attrs.doc_strings) + { + Some((sp, from_expansion)) => { + if from_expansion { + return None; + } + sp + } + None => item.attr_span(cx.tcx), + }; + let (explicit_span, false) = source_span_for_markdown_range( cx.tcx, doc, &offset_explicit_range(doc, link_range.clone(), b'[', b']'), &item.attrs.doc_strings, - )?; - let display_span = source_span_for_markdown_range( + )? + else { + // This `span` comes from macro expansion so skipping it. + return None; + }; + let (display_span, false) = source_span_for_markdown_range( cx.tcx, doc, resolvable_link_range, &item.attrs.doc_strings, - )?; - let def_span = source_span_for_markdown_range( + )? + else { + // This `span` comes from macro expansion so skipping it. + return None; + }; + let (def_span, _) = source_span_for_markdown_range( cx.tcx, doc, &offset_reference_def_range(doc, dest, link_range), diff --git a/src/librustdoc/passes/lint/unescaped_backticks.rs b/src/librustdoc/passes/lint/unescaped_backticks.rs index 88f4c3ac1cd7..7f5643f4ba81 100644 --- a/src/librustdoc/passes/lint/unescaped_backticks.rs +++ b/src/librustdoc/passes/lint/unescaped_backticks.rs @@ -42,13 +42,15 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: & // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, // use the span of the entire attribute as a fallback. - let span = source_span_for_markdown_range( + let span = match source_span_for_markdown_range( tcx, dox, &(backtick_index..backtick_index + 1), &item.attrs.doc_strings, - ) - .unwrap_or_else(|| item.attr_span(tcx)); + ) { + Some((sp, _)) => sp, + None => item.attr_span(tcx), + }; tcx.node_span_lint(crate::lint::UNESCAPED_BACKTICKS, hir_id, span, |lint| { lint.primary_message("unescaped backtick"); @@ -419,7 +421,7 @@ fn suggest_insertion( /// Maximum bytes of context to show around the insertion. const CONTEXT_MAX_LEN: usize = 80; - if let Some(span) = source_span_for_markdown_range( + if let Some((span, _)) = source_span_for_markdown_range( cx.tcx, dox, &(insert_index..insert_index), diff --git a/src/rustdoc-json-types/lib.rs b/src/rustdoc-json-types/lib.rs index 1f93895ae076..0e72ddd9db1e 100644 --- a/src/rustdoc-json-types/lib.rs +++ b/src/rustdoc-json-types/lib.rs @@ -37,8 +37,8 @@ pub type FxHashMap = HashMap; // re-export for use in src/librustdoc // will instead cause conflicts. See #94591 for more. (This paragraph and the "Latest feature" line // are deliberately not in a doc comment, because they need not be in public docs.) // -// Latest feature: Pretty printing of inline attributes changed -pub const FORMAT_VERSION: u32 = 48; +// Latest feature: Pretty printing of no_mangle attributes changed +pub const FORMAT_VERSION: u32 = 53; /// The root of the emitted JSON blob. /// @@ -277,8 +277,8 @@ pub struct PolyTrait { /// A set of generic arguments provided to a path segment, e.g. /// /// ```text -/// std::option::Option::::None -/// ^^^^^ +/// std::option::Option +/// ^^^^^ /// ``` #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] @@ -331,7 +331,7 @@ pub enum GenericArg { Const(Constant), /// A generic argument that's explicitly set to be inferred. /// ```text - /// std::vec::Vec::<_>::new() + /// std::vec::Vec::<_> /// ^ /// ``` Infer, @@ -362,7 +362,7 @@ pub struct AssocItemConstraint { /// The name of the associated type/constant. pub name: String, /// Arguments provided to the associated type/constant. - pub args: GenericArgs, + pub args: Option>, /// The kind of bound applied to the associated type/constant. pub binding: AssocItemConstraintKind, } @@ -1118,7 +1118,7 @@ pub enum Type { /// as BetterIterator>::Item<'static> /// // ^^^^^^^^^ /// ``` - args: Box, + args: Option>, /// The type with which this type is associated. /// /// ```ignore (incomplete expression) diff --git a/src/tools/cargo b/src/tools/cargo index 2251525ae503..409fed7dc155 160000 --- a/src/tools/cargo +++ b/src/tools/cargo @@ -1 +1 @@ -Subproject commit 2251525ae503fa196f6d7f9ce6d32eccb2d5f044 +Subproject commit 409fed7dc1553d49cb9a8c0637d12d65571346ce diff --git a/src/tools/clippy/.github/ISSUE_TEMPLATE/new_lint.yml b/src/tools/clippy/.github/ISSUE_TEMPLATE/new_lint.yml index b49493edce1b..464740640e0c 100644 --- a/src/tools/clippy/.github/ISSUE_TEMPLATE/new_lint.yml +++ b/src/tools/clippy/.github/ISSUE_TEMPLATE/new_lint.yml @@ -1,5 +1,7 @@ name: New lint suggestion -description: Suggest a new Clippy lint. +description: | + Suggest a new Clippy lint (currently not accepting new lints) + Check out the Clippy book for more information about the feature freeze. labels: ["A-lint"] body: - type: markdown diff --git a/src/tools/clippy/.github/PULL_REQUEST_TEMPLATE.md b/src/tools/clippy/.github/PULL_REQUEST_TEMPLATE.md index 9e49f60892d2..83bfd8e9c686 100644 --- a/src/tools/clippy/.github/PULL_REQUEST_TEMPLATE.md +++ b/src/tools/clippy/.github/PULL_REQUEST_TEMPLATE.md @@ -32,6 +32,10 @@ order to get feedback. Delete this line and everything above before opening your PR. +Note that we are currently not taking in new PRs that add new lints. We are in a +feature freeze. Check out the book for more information. If you open a +feature-adding pull request, its review will be delayed. + --- *Please write a short comment explaining your change (or "none" for internal only changes)* diff --git a/src/tools/clippy/.github/workflows/feature_freeze.yml b/src/tools/clippy/.github/workflows/feature_freeze.yml new file mode 100644 index 000000000000..a5f8d4bc145c --- /dev/null +++ b/src/tools/clippy/.github/workflows/feature_freeze.yml @@ -0,0 +1,25 @@ +name: Feature freeze check + +on: + pull_request: + paths: + - 'clippy_lints/src/declared_lints.rs' + +jobs: + auto-comment: + runs-on: ubuntu-latest + + steps: + - name: Check PR Changes + id: pr-changes + run: echo "::set-output name=changes::${{ toJson(github.event.pull_request.changed_files) }}" + + - name: Create Comment + if: steps.pr-changes.outputs.changes != '[]' + run: | + # Use GitHub API to create a comment on the PR + PR_NUMBER=${{ github.event.pull_request.number }} + COMMENT="**Seems that you are trying to add a new lint!**\nWe are currently in a [feature freeze](https://doc.rust-lang.org/nightly/clippy/development/feature_freeze.html), so we are delaying all lint-adding PRs to August 1st and focusing on bugfixes.\nThanks a lot for your contribution, and sorry for the inconvenience.\nWith ❤ from the Clippy team" + GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} + COMMENT_URL="https://api.github.com/repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" + curl -s -H "Authorization: token ${GITHUB_TOKEN}" -X POST $COMMENT_URL -d "{\"body\":\"$COMMENT\"}" diff --git a/src/tools/clippy/CHANGELOG.md b/src/tools/clippy/CHANGELOG.md index 0cfe89ad3787..a92fbdc767bd 100644 --- a/src/tools/clippy/CHANGELOG.md +++ b/src/tools/clippy/CHANGELOG.md @@ -6,7 +6,94 @@ document. ## Unreleased / Beta / In Rust Nightly -[1e5237f4...master](https://github.com/rust-lang/rust-clippy/compare/1e5237f4...master) +[03a5b6b9...master](https://github.com/rust-lang/rust-clippy/compare/03a5b6b9...master) + +## Rust 1.88 + +Current stable, released 2025-06-26 + +[View all 126 merged pull requests](https://github.com/rust-lang/rust-clippy/pulls?q=merged%3A2025-03-21T10%3A30%3A57Z..2025-05-01T08%3A03%3A26Z+base%3Amaster) + +### New Lints + +* Added [`swap_with_temporary`] to `complexity` [#14046](https://github.com/rust-lang/rust-clippy/pull/14046) +* Added [`redundant_test_prefix`] to `restriction` [#13710](https://github.com/rust-lang/rust-clippy/pull/13710) +* Added [`manual_dangling_ptr`] to `style` [#14107](https://github.com/rust-lang/rust-clippy/pull/14107) +* Added [`char_indices_as_byte_indices`] to `correctness` [#13435](https://github.com/rust-lang/rust-clippy/pull/13435) +* Added [`manual_abs_diff`] to `complexity` [#14482](https://github.com/rust-lang/rust-clippy/pull/14482) +* Added [`ignore_without_reason`] to `pedantic` [#13931](https://github.com/rust-lang/rust-clippy/pull/13931) + +### Moves and Deprecations + +* Moved [`uninlined_format_args`] to `style` (from `pedantic`) + [#14160](https://github.com/rust-lang/rust-clippy/pull/14160) +* [`match_on_vec_items`] deprecated in favor of [`indexing_slicing`] + [#14217](https://github.com/rust-lang/rust-clippy/pull/14217) +* Removed superseded lints: `transmute_float_to_int`, `transmute_int_to_char`, + `transmute_int_to_float`, `transmute_num_to_bytes` (now in rustc) + [#14703](https://github.com/rust-lang/rust-clippy/pull/14703) + +### Enhancements + +* Configuration renamed from `lint-inconsistent-struct-field-initializers` + to `check-inconsistent-struct-field-initializers` + [#14280](https://github.com/rust-lang/rust-clippy/pull/14280) +* Paths in `disallowed_*` configurations are now validated + [#14397](https://github.com/rust-lang/rust-clippy/pull/14397) +* [`borrow_as_ptr`] now lints implicit casts as well + [#14408](https://github.com/rust-lang/rust-clippy/pull/14408) +* [`iter_kv_map`] now recognizes references on maps + [#14596](https://github.com/rust-lang/rust-clippy/pull/14596) +* [`empty_enum_variants_with_brackets`] no longer lints reachable enums or enums used + as functions within same crate [#12971](https://github.com/rust-lang/rust-clippy/pull/12971) +* [`needless_lifetimes`] now checks for lifetime uses in closures + [#14608](https://github.com/rust-lang/rust-clippy/pull/14608) +* [`wildcard_imports`] now lints on `pub use` when `warn_on_all_wildcard_imports` is enabled + [#14182](https://github.com/rust-lang/rust-clippy/pull/14182) +* [`collapsible_if`] now recognizes the `let_chains` feature + [#14481](https://github.com/rust-lang/rust-clippy/pull/14481) +* [`match_single_binding`] now allows macros in scrutinee and patterns + [#14635](https://github.com/rust-lang/rust-clippy/pull/14635) +* [`needless_borrow`] does not contradict the compiler's + `dangerous_implicit_autorefs` lint even though the references + are not mandatory + [#14810](https://github.com/rust-lang/rust-clippy/pull/14810) + +### False Positive Fixes + +* [`double_ended_iterator_last`] and [`needless_collect`] fixed FP when iter has side effects + [#14490](https://github.com/rust-lang/rust-clippy/pull/14490) +* [`mut_from_ref`] fixed FP where lifetimes nested in types were not considered + [#14471](https://github.com/rust-lang/rust-clippy/pull/14471) +* [`redundant_clone`] fixed FP in overlapping lifetime + [#14237](https://github.com/rust-lang/rust-clippy/pull/14237) +* [`map_entry`] fixed FP where lint would trigger without insert calls present + [#14568](https://github.com/rust-lang/rust-clippy/pull/14568) +* [`iter_cloned_collect`] fixed FP with custom `From`/`IntoIterator` impl + [#14473](https://github.com/rust-lang/rust-clippy/pull/14473) +* [`shadow_unrelated`] fixed FP in destructuring assignments + [#14381](https://github.com/rust-lang/rust-clippy/pull/14381) +* [`redundant_clone`] fixed FP on enum cast + [#14395](https://github.com/rust-lang/rust-clippy/pull/14395) +* [`collapsible_if`] fixed FP on block stmt before expr + [#14730](https://github.com/rust-lang/rust-clippy/pull/14730) + +### ICE Fixes + +* [`missing_const_for_fn`] fix ICE with `-Z validate-mir` compilation option + [#14776](https://github.com/rust-lang/rust-clippy/pull/14776) + +### Documentation Improvements + +* [`missing_asserts_for_indexing`] improved documentation and examples + [#14108](https://github.com/rust-lang/rust-clippy/pull/14108) + +### Others + +* We're testing with edition 2024 now + [#14602](https://github.com/rust-lang/rust-clippy/pull/14602) +* Don't warn about unloaded crates in `clippy.toml` disallowed paths + [#14733](https://github.com/rust-lang/rust-clippy/pull/14733) ## Rust 1.87 @@ -5729,6 +5816,7 @@ Released 2018-09-13 [`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type [`disallowed_types`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_types [`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression +[`doc_broken_link`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_broken_link [`doc_comment_double_space_linebreaks`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_comment_double_space_linebreaks [`doc_include_without_cfg`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_include_without_cfg [`doc_lazy_continuation`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_lazy_continuation @@ -5967,6 +6055,7 @@ Released 2018-09-13 [`manual_is_ascii_check`]: https://rust-lang.github.io/rust-clippy/master/index.html#manual_is_ascii_check [`manual_is_finite`]: https://rust-lang.github.io/rust-clippy/master/index.html#manual_is_finite [`manual_is_infinite`]: https://rust-lang.github.io/rust-clippy/master/index.html#manual_is_infinite +[`manual_is_multiple_of`]: https://rust-lang.github.io/rust-clippy/master/index.html#manual_is_multiple_of [`manual_is_power_of_two`]: https://rust-lang.github.io/rust-clippy/master/index.html#manual_is_power_of_two [`manual_is_variant_and`]: https://rust-lang.github.io/rust-clippy/master/index.html#manual_is_variant_and [`manual_let_else`]: https://rust-lang.github.io/rust-clippy/master/index.html#manual_let_else diff --git a/src/tools/clippy/Cargo.toml b/src/tools/clippy/Cargo.toml index 3a76c61489e2..1278427b5a76 100644 --- a/src/tools/clippy/Cargo.toml +++ b/src/tools/clippy/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "clippy" -version = "0.1.89" +version = "0.1.90" description = "A bunch of helpful lints to avoid common pitfalls in Rust" repository = "https://github.com/rust-lang/rust-clippy" readme = "README.md" @@ -24,6 +24,7 @@ path = "src/driver.rs" clippy_config = { path = "clippy_config" } clippy_lints = { path = "clippy_lints" } clippy_utils = { path = "clippy_utils" } +declare_clippy_lint = { path = "declare_clippy_lint" } rustc_tools_util = { path = "rustc_tools_util", version = "0.4.2" } clippy_lints_internal = { path = "clippy_lints_internal", optional = true } tempfile = { version = "3.20", optional = true } @@ -58,6 +59,7 @@ rustc_tools_util = { path = "rustc_tools_util", version = "0.4.2" } [features] integration = ["dep:tempfile"] internal = ["dep:clippy_lints_internal", "dep:tempfile"] +jemalloc = [] [package.metadata.rust-analyzer] # This package uses #[feature(rustc_private)] diff --git a/src/tools/clippy/book/src/README.md b/src/tools/clippy/book/src/README.md index 5d2c3972b060..db73b49ecc24 100644 --- a/src/tools/clippy/book/src/README.md +++ b/src/tools/clippy/book/src/README.md @@ -1,5 +1,9 @@ # Clippy +[### IMPORTANT NOTE FOR CONTRIBUTORS ================](development/feature_freeze.md) + +---- + [![License: MIT OR Apache-2.0](https://img.shields.io/crates/l/clippy.svg)](https://github.com/rust-lang/rust-clippy#license) A collection of lints to catch common mistakes and improve your diff --git a/src/tools/clippy/book/src/SUMMARY.md b/src/tools/clippy/book/src/SUMMARY.md index 39fe7358ed87..b66c3481e493 100644 --- a/src/tools/clippy/book/src/SUMMARY.md +++ b/src/tools/clippy/book/src/SUMMARY.md @@ -13,6 +13,7 @@ - [GitLab CI](continuous_integration/gitlab.md) - [Travis CI](continuous_integration/travis.md) - [Development](development/README.md) + - [IMPORTANT: FEATURE FREEZE](development/feature_freeze.md) - [Basics](development/basics.md) - [Adding Lints](development/adding_lints.md) - [Defining Lints](development/defining_lints.md) diff --git a/src/tools/clippy/book/src/development/adding_lints.md b/src/tools/clippy/book/src/development/adding_lints.md index 2b89e94cf8f4..a42a29837446 100644 --- a/src/tools/clippy/book/src/development/adding_lints.md +++ b/src/tools/clippy/book/src/development/adding_lints.md @@ -1,5 +1,8 @@ # Adding a new lint +[### IMPORTANT NOTE FOR CONTRIBUTORS ================](feature_freeze.md) + + You are probably here because you want to add a new lint to Clippy. If this is the first time you're contributing to Clippy, this document guides you through creating an example lint from scratch. diff --git a/src/tools/clippy/book/src/development/feature_freeze.md b/src/tools/clippy/book/src/development/feature_freeze.md new file mode 100644 index 000000000000..260cb136cc07 --- /dev/null +++ b/src/tools/clippy/book/src/development/feature_freeze.md @@ -0,0 +1,55 @@ +# IMPORTANT: FEATURE FREEZE + +This is a temporary notice. + +From the 26th of June until the 18th of September we will perform a feature freeze. Only bugfix PRs will be reviewed +except already open ones. Every feature-adding PR opened in between those dates will be moved into a +milestone to be reviewed separately at another time. + +We do this because of the long backlog of bugs that need to be addressed +in order to continue being the state-of-the-art linter that Clippy has become known for being. + +## For contributors + +If you are a contributor or are planning to become one, **please do not open a lint-adding PR**, we have lots of open +bugs of all levels of difficulty that you can address instead! + +We currently have about 800 lints, each one posing a maintainability challenge that needs to account to every possible +use case of the whole ecosystem. Bugs are natural in every software, but the Clippy team considers that Clippy needs a +refinement period. + +If you open a PR at this time, we will not review it but push it into a milestone until the refinement period ends, +adding additional load into our reviewing schedules. + +## I want to help, what can I do + +Thanks a lot to everyone who wants to help Clippy become better software in this feature freeze period! +If you'd like to help, making a bugfix, making sure that it works, and opening a PR is a great step! + +To find things to fix, go to the [tracking issue][tracking_issue], find an issue that you like, go there and claim that +issue with `@rustbot claim`. + +As a general metric and always taking into account your skill and knowledge level, you can use this guide: + +- 🟥 [ICEs][search_ice], these are compiler errors that causes Clippy to panic and crash. Usually involves high-level +debugging, sometimes interacting directly with the upstream compiler. Difficult to fix but a great challenge that +improves a lot developer workflows! + +- 🟧 [Suggestion causes bug][sugg_causes_bug], Clippy suggested code that changed logic in some silent way. +Unacceptable, as this may have disastrous consequences. Easier to fix than ICEs + +- 🟨 [Suggestion causes error][sugg_causes_error], Clippy suggested code snippet that caused a compiler error +when applied. We need to make sure that Clippy doesn't suggest using a variable twice at the same time or similar +easy-to-happen occurrences. + +- 🟩 [False positives][false_positive], a lint should not have fired, the easiest of them all, as this is "just" +identifying the root of a false positive and making an exception for those cases. + +Note that false negatives do not have priority unless the case is very clear, as they are a feature-request in a +trench coat. + +[search_ice]: https://github.com/rust-lang/rust-clippy/issues?q=sort%3Aupdated-desc+state%3Aopen+label%3A%22I-ICE%22 +[sugg_causes_bug]: https://github.com/rust-lang/rust-clippy/issues?q=sort%3Aupdated-desc%20state%3Aopen%20label%3AI-suggestion-causes-bug +[sugg_causes_error]: https://github.com/rust-lang/rust-clippy/issues?q=sort%3Aupdated-desc%20state%3Aopen%20label%3AI-suggestion-causes-error%20 +[false_positive]: https://github.com/rust-lang/rust-clippy/issues?q=sort%3Aupdated-desc%20state%3Aopen%20label%3AI-false-positive +[tracking_issue]: https://github.com/rust-lang/rust-clippy/issues/15086 diff --git a/src/tools/clippy/book/src/lint_configuration.md b/src/tools/clippy/book/src/lint_configuration.md index 7c850b4b023a..e9b7f42a1831 100644 --- a/src/tools/clippy/book/src/lint_configuration.md +++ b/src/tools/clippy/book/src/lint_configuration.md @@ -488,6 +488,13 @@ The maximum cognitive complexity a function can have ## `disallowed-macros` The list of disallowed macros, written as fully qualified paths. +**Fields:** +- `path` (required): the fully qualified path to the macro that should be disallowed +- `reason` (optional): explanation why this macro is disallowed +- `replacement` (optional): suggested alternative macro +- `allow-invalid` (optional, `false` by default): when set to `true`, it will ignore this entry + if the path doesn't exist, instead of emitting an error + **Default Value:** `[]` --- @@ -498,6 +505,13 @@ The list of disallowed macros, written as fully qualified paths. ## `disallowed-methods` The list of disallowed methods, written as fully qualified paths. +**Fields:** +- `path` (required): the fully qualified path to the method that should be disallowed +- `reason` (optional): explanation why this method is disallowed +- `replacement` (optional): suggested alternative method +- `allow-invalid` (optional, `false` by default): when set to `true`, it will ignore this entry + if the path doesn't exist, instead of emitting an error + **Default Value:** `[]` --- @@ -520,6 +534,13 @@ default configuration of Clippy. By default, any configuration will replace the ## `disallowed-types` The list of disallowed types, written as fully qualified paths. +**Fields:** +- `path` (required): the fully qualified path to the type that should be disallowed +- `reason` (optional): explanation why this type is disallowed +- `replacement` (optional): suggested alternative type +- `allow-invalid` (optional, `false` by default): when set to `true`, it will ignore this entry + if the path doesn't exist, instead of emitting an error + **Default Value:** `[]` --- @@ -651,13 +672,14 @@ The maximum size of the `Err`-variant in a `Result` returned from a function ## `lint-commented-code` -Whether collapsible `if` chains are linted if they contain comments inside the parts +Whether collapsible `if` and `else if` chains are linted if they contain comments inside the parts that would be collapsed. **Default Value:** `false` --- **Affected lints:** +* [`collapsible_else_if`](https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_else_if) * [`collapsible_if`](https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_if) diff --git a/src/tools/clippy/clippy_config/Cargo.toml b/src/tools/clippy/clippy_config/Cargo.toml index 0606245f990c..858366c8a5c4 100644 --- a/src/tools/clippy/clippy_config/Cargo.toml +++ b/src/tools/clippy/clippy_config/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "clippy_config" -version = "0.1.89" +version = "0.1.90" edition = "2024" publish = false diff --git a/src/tools/clippy/clippy_config/src/conf.rs b/src/tools/clippy/clippy_config/src/conf.rs index 87158cec42b2..841facdca06d 100644 --- a/src/tools/clippy/clippy_config/src/conf.rs +++ b/src/tools/clippy/clippy_config/src/conf.rs @@ -575,10 +575,24 @@ define_Conf! { #[conf_deprecated("Please use `cognitive-complexity-threshold` instead", cognitive_complexity_threshold)] cyclomatic_complexity_threshold: u64 = 25, /// The list of disallowed macros, written as fully qualified paths. + /// + /// **Fields:** + /// - `path` (required): the fully qualified path to the macro that should be disallowed + /// - `reason` (optional): explanation why this macro is disallowed + /// - `replacement` (optional): suggested alternative macro + /// - `allow-invalid` (optional, `false` by default): when set to `true`, it will ignore this entry + /// if the path doesn't exist, instead of emitting an error #[disallowed_paths_allow_replacements = true] #[lints(disallowed_macros)] disallowed_macros: Vec = Vec::new(), /// The list of disallowed methods, written as fully qualified paths. + /// + /// **Fields:** + /// - `path` (required): the fully qualified path to the method that should be disallowed + /// - `reason` (optional): explanation why this method is disallowed + /// - `replacement` (optional): suggested alternative method + /// - `allow-invalid` (optional, `false` by default): when set to `true`, it will ignore this entry + /// if the path doesn't exist, instead of emitting an error #[disallowed_paths_allow_replacements = true] #[lints(disallowed_methods)] disallowed_methods: Vec = Vec::new(), @@ -588,6 +602,13 @@ define_Conf! { #[lints(disallowed_names)] disallowed_names: Vec = DEFAULT_DISALLOWED_NAMES.iter().map(ToString::to_string).collect(), /// The list of disallowed types, written as fully qualified paths. + /// + /// **Fields:** + /// - `path` (required): the fully qualified path to the type that should be disallowed + /// - `reason` (optional): explanation why this type is disallowed + /// - `replacement` (optional): suggested alternative type + /// - `allow-invalid` (optional, `false` by default): when set to `true`, it will ignore this entry + /// if the path doesn't exist, instead of emitting an error #[disallowed_paths_allow_replacements = true] #[lints(disallowed_types)] disallowed_types: Vec = Vec::new(), @@ -641,9 +662,9 @@ define_Conf! { /// The maximum size of the `Err`-variant in a `Result` returned from a function #[lints(result_large_err)] large_error_threshold: u64 = 128, - /// Whether collapsible `if` chains are linted if they contain comments inside the parts + /// Whether collapsible `if` and `else if` chains are linted if they contain comments inside the parts /// that would be collapsed. - #[lints(collapsible_if)] + #[lints(collapsible_else_if, collapsible_if)] lint_commented_code: bool = false, /// Whether to suggest reordering constructor fields when initializers are present. /// DEPRECATED CONFIGURATION: lint-inconsistent-struct-field-initializers diff --git a/src/tools/clippy/clippy_dev/src/lint.rs b/src/tools/clippy/clippy_dev/src/lint.rs index e0e036757d56..0d66f167a386 100644 --- a/src/tools/clippy/clippy_dev/src/lint.rs +++ b/src/tools/clippy/clippy_dev/src/lint.rs @@ -13,7 +13,7 @@ pub fn run<'a>(path: &str, edition: &str, args: impl Iterator if is_file { exit_if_err( - Command::new(env::var("CARGO").unwrap_or("cargo".into())) + Command::new(env::var("CARGO").unwrap_or_else(|_| "cargo".into())) .args(["run", "--bin", "clippy-driver", "--"]) .args(["-L", "./target/debug"]) .args(["-Z", "no-codegen"]) @@ -26,7 +26,7 @@ pub fn run<'a>(path: &str, edition: &str, args: impl Iterator ); } else { exit_if_err( - Command::new(env::var("CARGO").unwrap_or("cargo".into())) + Command::new(env::var("CARGO").unwrap_or_else(|_| "cargo".into())) .arg("build") .status(), ); diff --git a/src/tools/clippy/clippy_dev/src/release.rs b/src/tools/clippy/clippy_dev/src/release.rs index 62c1bee81850..15392dd1d292 100644 --- a/src/tools/clippy/clippy_dev/src/release.rs +++ b/src/tools/clippy/clippy_dev/src/release.rs @@ -5,6 +5,7 @@ static CARGO_TOML_FILES: &[&str] = &[ "clippy_config/Cargo.toml", "clippy_lints/Cargo.toml", "clippy_utils/Cargo.toml", + "declare_clippy_lint/Cargo.toml", "Cargo.toml", ]; diff --git a/src/tools/clippy/clippy_dev/src/serve.rs b/src/tools/clippy/clippy_dev/src/serve.rs index a2d1236629fd..498ffeba9d67 100644 --- a/src/tools/clippy/clippy_dev/src/serve.rs +++ b/src/tools/clippy/clippy_dev/src/serve.rs @@ -28,7 +28,7 @@ pub fn run(port: u16, lint: Option) -> ! { .map(mtime); if times.iter().any(|&time| index_time < time) { - Command::new(env::var("CARGO").unwrap_or("cargo".into())) + Command::new(env::var("CARGO").unwrap_or_else(|_| "cargo".into())) .arg("collect-metadata") .spawn() .unwrap() diff --git a/src/tools/clippy/clippy_dev/src/update_lints.rs b/src/tools/clippy/clippy_dev/src/update_lints.rs index 08592f2521f7..5f6e874ffe25 100644 --- a/src/tools/clippy/clippy_dev/src/update_lints.rs +++ b/src/tools/clippy/clippy_dev/src/update_lints.rs @@ -4,8 +4,9 @@ use crate::utils::{ use itertools::Itertools; use std::collections::HashSet; use std::fmt::Write; +use std::fs; use std::ops::Range; -use std::path::{Path, PathBuf}; +use std::path::{self, Path, PathBuf}; use walkdir::{DirEntry, WalkDir}; const GENERATED_FILE_COMMENT: &str = "// This file was generated by `cargo dev update_lints`.\n\ @@ -36,123 +37,164 @@ pub fn generate_lint_files( deprecated: &[DeprecatedLint], renamed: &[RenamedLint], ) { - FileUpdater::default().update_files_checked( + let mut updater = FileUpdater::default(); + updater.update_file_checked( "cargo dev update_lints", update_mode, - &mut [ - ( - "README.md", - &mut update_text_region_fn("[There are over ", " lints included in this crate!]", |dst| { - write!(dst, "{}", round_to_fifty(lints.len())).unwrap(); - }), - ), - ( - "book/src/README.md", - &mut update_text_region_fn("[There are over ", " lints included in this crate!]", |dst| { - write!(dst, "{}", round_to_fifty(lints.len())).unwrap(); - }), - ), - ( - "CHANGELOG.md", - &mut update_text_region_fn( - "\n", - "", - |dst| { - for lint in lints - .iter() - .map(|l| &*l.name) - .chain(deprecated.iter().filter_map(|l| l.name.strip_prefix("clippy::"))) - .chain(renamed.iter().filter_map(|l| l.old_name.strip_prefix("clippy::"))) - .sorted() - { - writeln!(dst, "[`{lint}`]: {DOCS_LINK}#{lint}").unwrap(); - } - }, - ), - ), - ( - "clippy_lints/src/lib.rs", - &mut update_text_region_fn( - "// begin lints modules, do not remove this comment, it's used in `update_lints`\n", - "// end lints modules, do not remove this comment, it's used in `update_lints`", - |dst| { - for lint_mod in lints.iter().map(|l| &l.module).sorted().dedup() { - writeln!(dst, "mod {lint_mod};").unwrap(); - } - }, - ), - ), - ("clippy_lints/src/declared_lints.rs", &mut |_, src, dst| { - dst.push_str(GENERATED_FILE_COMMENT); - dst.push_str("pub static LINTS: &[&crate::LintInfo] = &[\n"); - for (module_name, lint_name) in lints.iter().map(|l| (&l.module, l.name.to_uppercase())).sorted() { - writeln!(dst, " crate::{module_name}::{lint_name}_INFO,").unwrap(); + "README.md", + &mut update_text_region_fn("[There are over ", " lints included in this crate!]", |dst| { + write!(dst, "{}", round_to_fifty(lints.len())).unwrap(); + }), + ); + updater.update_file_checked( + "cargo dev update_lints", + update_mode, + "book/src/README.md", + &mut update_text_region_fn("[There are over ", " lints included in this crate!]", |dst| { + write!(dst, "{}", round_to_fifty(lints.len())).unwrap(); + }), + ); + updater.update_file_checked( + "cargo dev update_lints", + update_mode, + "CHANGELOG.md", + &mut update_text_region_fn( + "\n", + "", + |dst| { + for lint in lints + .iter() + .map(|l| &*l.name) + .chain(deprecated.iter().filter_map(|l| l.name.strip_prefix("clippy::"))) + .chain(renamed.iter().filter_map(|l| l.old_name.strip_prefix("clippy::"))) + .sorted() + { + writeln!(dst, "[`{lint}`]: {DOCS_LINK}#{lint}").unwrap(); } - dst.push_str("];\n"); - UpdateStatus::from_changed(src != dst) - }), - ("clippy_lints/src/deprecated_lints.rs", &mut |_, src, dst| { - let mut searcher = RustSearcher::new(src); - assert!( - searcher.find_token(Token::Ident("declare_with_version")) - && searcher.find_token(Token::Ident("declare_with_version")), - "error reading deprecated lints" - ); - dst.push_str(&src[..searcher.pos() as usize]); - dst.push_str("! { DEPRECATED(DEPRECATED_VERSION) = [\n"); - for lint in deprecated { - write!( - dst, - " #[clippy::version = \"{}\"]\n (\"{}\", \"{}\"),\n", - lint.version, lint.name, lint.reason, - ) - .unwrap(); - } - dst.push_str( - "]}\n\n\ + }, + ), + ); + updater.update_file_checked( + "cargo dev update_lints", + update_mode, + "clippy_lints/src/deprecated_lints.rs", + &mut |_, src, dst| { + let mut searcher = RustSearcher::new(src); + assert!( + searcher.find_token(Token::Ident("declare_with_version")) + && searcher.find_token(Token::Ident("declare_with_version")), + "error reading deprecated lints" + ); + dst.push_str(&src[..searcher.pos() as usize]); + dst.push_str("! { DEPRECATED(DEPRECATED_VERSION) = [\n"); + for lint in deprecated { + write!( + dst, + " #[clippy::version = \"{}\"]\n (\"{}\", \"{}\"),\n", + lint.version, lint.name, lint.reason, + ) + .unwrap(); + } + dst.push_str( + "]}\n\n\ #[rustfmt::skip]\n\ declare_with_version! { RENAMED(RENAMED_VERSION) = [\n\ ", - ); - for lint in renamed { - write!( - dst, - " #[clippy::version = \"{}\"]\n (\"{}\", \"{}\"),\n", - lint.version, lint.old_name, lint.new_name, - ) - .unwrap(); - } - dst.push_str("]}\n"); - UpdateStatus::from_changed(src != dst) - }), - ("tests/ui/deprecated.rs", &mut |_, src, dst| { - dst.push_str(GENERATED_FILE_COMMENT); - for lint in deprecated { - writeln!(dst, "#![warn({})] //~ ERROR: lint `{}`", lint.name, lint.name).unwrap(); - } - dst.push_str("\nfn main() {}\n"); - UpdateStatus::from_changed(src != dst) - }), - ("tests/ui/rename.rs", &mut move |_, src, dst| { - let mut seen_lints = HashSet::new(); - dst.push_str(GENERATED_FILE_COMMENT); - dst.push_str("#![allow(clippy::duplicated_attributes)]\n"); - for lint in renamed { - if seen_lints.insert(&lint.new_name) { - writeln!(dst, "#![allow({})]", lint.new_name).unwrap(); - } - } - seen_lints.clear(); - for lint in renamed { - if seen_lints.insert(&lint.old_name) { - writeln!(dst, "#![warn({})] //~ ERROR: lint `{}`", lint.old_name, lint.old_name).unwrap(); - } - } - dst.push_str("\nfn main() {}\n"); - UpdateStatus::from_changed(src != dst) - }), - ], + ); + for lint in renamed { + write!( + dst, + " #[clippy::version = \"{}\"]\n (\"{}\", \"{}\"),\n", + lint.version, lint.old_name, lint.new_name, + ) + .unwrap(); + } + dst.push_str("]}\n"); + UpdateStatus::from_changed(src != dst) + }, ); + updater.update_file_checked( + "cargo dev update_lints", + update_mode, + "tests/ui/deprecated.rs", + &mut |_, src, dst| { + dst.push_str(GENERATED_FILE_COMMENT); + for lint in deprecated { + writeln!(dst, "#![warn({})] //~ ERROR: lint `{}`", lint.name, lint.name).unwrap(); + } + dst.push_str("\nfn main() {}\n"); + UpdateStatus::from_changed(src != dst) + }, + ); + updater.update_file_checked( + "cargo dev update_lints", + update_mode, + "tests/ui/rename.rs", + &mut move |_, src, dst| { + let mut seen_lints = HashSet::new(); + dst.push_str(GENERATED_FILE_COMMENT); + dst.push_str("#![allow(clippy::duplicated_attributes)]\n"); + for lint in renamed { + if seen_lints.insert(&lint.new_name) { + writeln!(dst, "#![allow({})]", lint.new_name).unwrap(); + } + } + seen_lints.clear(); + for lint in renamed { + if seen_lints.insert(&lint.old_name) { + writeln!(dst, "#![warn({})] //~ ERROR: lint `{}`", lint.old_name, lint.old_name).unwrap(); + } + } + dst.push_str("\nfn main() {}\n"); + UpdateStatus::from_changed(src != dst) + }, + ); + for (crate_name, lints) in lints.iter().into_group_map_by(|&l| { + let Some(path::Component::Normal(name)) = l.path.components().next() else { + // All paths should start with `{crate_name}/src` when parsed from `find_lint_decls` + panic!("internal error: can't read crate name from path `{}`", l.path.display()); + }; + name + }) { + updater.update_file_checked( + "cargo dev update_lints", + update_mode, + Path::new(crate_name).join("src/lib.rs"), + &mut update_text_region_fn( + "// begin lints modules, do not remove this comment, it's used in `update_lints`\n", + "// end lints modules, do not remove this comment, it's used in `update_lints`", + |dst| { + for lint_mod in lints + .iter() + .filter(|l| !l.module.is_empty()) + .map(|l| l.module.split_once("::").map_or(&*l.module, |x| x.0)) + .sorted() + .dedup() + { + writeln!(dst, "mod {lint_mod};").unwrap(); + } + }, + ), + ); + updater.update_file_checked( + "cargo dev update_lints", + update_mode, + Path::new(crate_name).join("src/declared_lints.rs"), + &mut |_, src, dst| { + dst.push_str(GENERATED_FILE_COMMENT); + dst.push_str("pub static LINTS: &[&::declare_clippy_lint::LintInfo] = &[\n"); + for (module_path, lint_name) in lints.iter().map(|l| (&l.module, l.name.to_uppercase())).sorted() { + if module_path.is_empty() { + writeln!(dst, " crate::{lint_name}_INFO,").unwrap(); + } else { + writeln!(dst, " crate::{module_path}::{lint_name}_INFO,").unwrap(); + } + } + dst.push_str("];\n"); + UpdateStatus::from_changed(src != dst) + }, + ); + } } fn round_to_fifty(count: usize) -> usize { @@ -186,13 +228,25 @@ pub struct RenamedLint { pub fn find_lint_decls() -> Vec { let mut lints = Vec::with_capacity(1000); let mut contents = String::new(); - for (file, module) in read_src_with_module("clippy_lints/src".as_ref()) { - parse_clippy_lint_decls( - file.path(), - File::open_read_to_cleared_string(file.path(), &mut contents), - &module, - &mut lints, - ); + for e in expect_action(fs::read_dir("."), ErrAction::Read, ".") { + let e = expect_action(e, ErrAction::Read, "."); + if !expect_action(e.file_type(), ErrAction::Read, ".").is_dir() { + continue; + } + let Ok(mut name) = e.file_name().into_string() else { + continue; + }; + if name.starts_with("clippy_lints") && name != "clippy_lints_internal" { + name.push_str("/src"); + for (file, module) in read_src_with_module(name.as_ref()) { + parse_clippy_lint_decls( + file.path(), + File::open_read_to_cleared_string(file.path(), &mut contents), + &module, + &mut lints, + ); + } + } } lints.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name)); lints @@ -204,7 +258,7 @@ fn read_src_with_module(src_root: &Path) -> impl use<'_> + Iterator (Vec, Vec) { /// Removes the line splices and surrounding quotes from a string literal fn parse_str_lit(s: &str) -> String { - let (s, mode) = if let Some(s) = s.strip_prefix("r") { - (s.trim_matches('#'), rustc_literal_escaper::Mode::RawStr) - } else { - (s, rustc_literal_escaper::Mode::Str) - }; + let s = s.strip_prefix("r").unwrap_or(s).trim_matches('#'); let s = s .strip_prefix('"') .and_then(|s| s.strip_suffix('"')) .unwrap_or_else(|| panic!("expected quoted string, found `{s}`")); let mut res = String::with_capacity(s.len()); - rustc_literal_escaper::unescape_unicode(s, mode, &mut |_, ch| { + rustc_literal_escaper::unescape_str(s, &mut |_, ch| { if let Ok(ch) = ch { res.push(ch); } diff --git a/src/tools/clippy/clippy_dev/src/utils.rs b/src/tools/clippy/clippy_dev/src/utils.rs index c4808b7048b0..89962a110341 100644 --- a/src/tools/clippy/clippy_dev/src/utils.rs +++ b/src/tools/clippy/clippy_dev/src/utils.rs @@ -383,21 +383,6 @@ impl FileUpdater { self.update_file_checked_inner(tool, mode, path.as_ref(), update); } - #[expect(clippy::type_complexity)] - pub fn update_files_checked( - &mut self, - tool: &str, - mode: UpdateMode, - files: &mut [( - impl AsRef, - &mut dyn FnMut(&Path, &str, &mut String) -> UpdateStatus, - )], - ) { - for (path, update) in files { - self.update_file_checked_inner(tool, mode, path.as_ref(), update); - } - } - pub fn update_file( &mut self, path: impl AsRef, diff --git a/src/tools/clippy/clippy_lints/Cargo.toml b/src/tools/clippy/clippy_lints/Cargo.toml index 39e4e2e365ea..c03cc99b581f 100644 --- a/src/tools/clippy/clippy_lints/Cargo.toml +++ b/src/tools/clippy/clippy_lints/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "clippy_lints" -version = "0.1.89" +version = "0.1.90" description = "A bunch of helpful lints to avoid common pitfalls in Rust" repository = "https://github.com/rust-lang/rust-clippy" readme = "README.md" @@ -13,6 +13,7 @@ arrayvec = { version = "0.7", default-features = false } cargo_metadata = "0.18" clippy_config = { path = "../clippy_config" } clippy_utils = { path = "../clippy_utils" } +declare_clippy_lint = { path = "../declare_clippy_lint" } itertools = "0.12" quine-mc_cluskey = "0.2" regex-syntax = "0.8" diff --git a/src/tools/clippy/clippy_lints/src/attrs/inline_always.rs b/src/tools/clippy/clippy_lints/src/attrs/inline_always.rs index 58e51128a0dc..b8f93ee5e2c1 100644 --- a/src/tools/clippy/clippy_lints/src/attrs/inline_always.rs +++ b/src/tools/clippy/clippy_lints/src/attrs/inline_always.rs @@ -1,10 +1,10 @@ use super::INLINE_ALWAYS; use clippy_utils::diagnostics::span_lint; -use rustc_attr_data_structures::{find_attr, AttributeKind, InlineAttr}; +use rustc_attr_data_structures::{AttributeKind, InlineAttr, find_attr}; use rustc_hir::Attribute; use rustc_lint::LateContext; -use rustc_span::symbol::Symbol; use rustc_span::Span; +use rustc_span::symbol::Symbol; pub(super) fn check(cx: &LateContext<'_>, span: Span, name: Symbol, attrs: &[Attribute]) { if span.from_expansion() { diff --git a/src/tools/clippy/clippy_lints/src/attrs/mod.rs b/src/tools/clippy/clippy_lints/src/attrs/mod.rs index 9a1242980418..91c2dc7f3dc6 100644 --- a/src/tools/clippy/clippy_lints/src/attrs/mod.rs +++ b/src/tools/clippy/clippy_lints/src/attrs/mod.rs @@ -207,7 +207,7 @@ declare_clippy_lint! { declare_clippy_lint! { /// ### What it does /// Checks for usage of the `#[allow]` attribute and suggests replacing it with - /// the `#[expect]` (See [RFC 2383](https://rust-lang.github.io/rfcs/2383-lint-reasons.html)) + /// the `#[expect]` attribute (See [RFC 2383](https://rust-lang.github.io/rfcs/2383-lint-reasons.html)) /// /// This lint only warns outer attributes (`#[allow]`), as inner attributes /// (`#![allow]`) are usually used to enable or disable lints on a global scale. diff --git a/src/tools/clippy/clippy_lints/src/attrs/utils.rs b/src/tools/clippy/clippy_lints/src/attrs/utils.rs index a5ce2137bffe..7b66f91f6c07 100644 --- a/src/tools/clippy/clippy_lints/src/attrs/utils.rs +++ b/src/tools/clippy/clippy_lints/src/attrs/utils.rs @@ -46,11 +46,13 @@ pub(super) fn is_relevant_trait(cx: &LateContext<'_>, item: &TraitItem<'_>) -> b } fn is_relevant_block(cx: &LateContext<'_>, typeck_results: &ty::TypeckResults<'_>, block: &Block<'_>) -> bool { - block.stmts.first().map_or( - block - .expr - .as_ref() - .is_some_and(|e| is_relevant_expr(cx, typeck_results, e)), + block.stmts.first().map_or_else( + || { + block + .expr + .as_ref() + .is_some_and(|e| is_relevant_expr(cx, typeck_results, e)) + }, |stmt| match &stmt.kind { StmtKind::Let(_) => true, StmtKind::Expr(expr) | StmtKind::Semi(expr) => is_relevant_expr(cx, typeck_results, expr), diff --git a/src/tools/clippy/clippy_lints/src/bool_assert_comparison.rs b/src/tools/clippy/clippy_lints/src/bool_assert_comparison.rs index ae36bb76117d..8f95e44bf853 100644 --- a/src/tools/clippy/clippy_lints/src/bool_assert_comparison.rs +++ b/src/tools/clippy/clippy_lints/src/bool_assert_comparison.rs @@ -56,7 +56,7 @@ fn is_impl_not_trait_with_bool_out<'tcx>(cx: &LateContext<'tcx>, ty: Ty<'tcx>) - .and_then(|trait_id| { cx.tcx.associated_items(trait_id).find_by_ident_and_kind( cx.tcx, - Ident::from_str("Output"), + Ident::with_dummy_span(sym::Output), ty::AssocTag::Type, trait_id, ) diff --git a/src/tools/clippy/clippy_lints/src/borrow_deref_ref.rs b/src/tools/clippy/clippy_lints/src/borrow_deref_ref.rs index 7cde007a9b66..70c9c45a60c8 100644 --- a/src/tools/clippy/clippy_lints/src/borrow_deref_ref.rs +++ b/src/tools/clippy/clippy_lints/src/borrow_deref_ref.rs @@ -2,9 +2,9 @@ use crate::reference::DEREF_ADDROF; use clippy_utils::diagnostics::span_lint_and_then; use clippy_utils::source::SpanRangeExt; use clippy_utils::ty::implements_trait; -use clippy_utils::{get_parent_expr, is_from_proc_macro, is_lint_allowed, is_mutable}; +use clippy_utils::{get_parent_expr, is_expr_temporary_value, is_from_proc_macro, is_lint_allowed, is_mutable}; use rustc_errors::Applicability; -use rustc_hir::{BorrowKind, ExprKind, UnOp}; +use rustc_hir::{BorrowKind, Expr, ExprKind, Node, UnOp}; use rustc_lint::{LateContext, LateLintPass}; use rustc_middle::mir::Mutability; use rustc_middle::ty; @@ -48,7 +48,7 @@ declare_clippy_lint! { declare_lint_pass!(BorrowDerefRef => [BORROW_DEREF_REF]); impl<'tcx> LateLintPass<'tcx> for BorrowDerefRef { - fn check_expr(&mut self, cx: &LateContext<'tcx>, e: &rustc_hir::Expr<'tcx>) { + fn check_expr(&mut self, cx: &LateContext<'tcx>, e: &'tcx Expr<'tcx>) { if let ExprKind::AddrOf(BorrowKind::Ref, Mutability::Not, addrof_target) = e.kind && let ExprKind::Unary(UnOp::Deref, deref_target) = addrof_target.kind && !matches!(deref_target.kind, ExprKind::Unary(UnOp::Deref, ..)) @@ -76,6 +76,9 @@ impl<'tcx> LateLintPass<'tcx> for BorrowDerefRef { && let e_ty = cx.typeck_results().expr_ty_adjusted(e) // check if the reference is coercing to a mutable reference && (!matches!(e_ty.kind(), ty::Ref(_, _, Mutability::Mut)) || is_mutable(cx, deref_target)) + // If the new borrow might be itself borrowed mutably and the original reference is not a temporary + // value, do not propose to use it directly. + && (is_expr_temporary_value(cx, deref_target) || !potentially_bound_to_mutable_ref(cx, e)) && let Some(deref_text) = deref_target.span.get_source_text(cx) { span_lint_and_then( @@ -110,3 +113,9 @@ impl<'tcx> LateLintPass<'tcx> for BorrowDerefRef { } } } + +/// Checks if `expr` is used as part of a `let` statement containing a `ref mut` binding. +fn potentially_bound_to_mutable_ref<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>) -> bool { + matches!(cx.tcx.parent_hir_node(expr.hir_id), Node::LetStmt(let_stmt) + if let_stmt.pat.contains_explicit_ref_binding() == Some(Mutability::Mut)) +} diff --git a/src/tools/clippy/clippy_lints/src/casts/cast_sign_loss.rs b/src/tools/clippy/clippy_lints/src/casts/cast_sign_loss.rs index 9a1ad8a74738..a70bd8861919 100644 --- a/src/tools/clippy/clippy_lints/src/casts/cast_sign_loss.rs +++ b/src/tools/clippy/clippy_lints/src/casts/cast_sign_loss.rs @@ -168,7 +168,7 @@ fn pow_call_result_sign(cx: &LateContext<'_>, base: &Expr<'_>, exponent: &Expr<' // Rust's integer pow() functions take an unsigned exponent. let exponent_val = get_const_unsigned_int_eval(cx, exponent, None); - let exponent_is_even = exponent_val.map(|val| val % 2 == 0); + let exponent_is_even = exponent_val.map(|val| val.is_multiple_of(2)); match (base_sign, exponent_is_even) { // Non-negative bases always return non-negative results, ignoring overflow. diff --git a/src/tools/clippy/clippy_lints/src/casts/manual_dangling_ptr.rs b/src/tools/clippy/clippy_lints/src/casts/manual_dangling_ptr.rs index 61dfc0fc0425..d9e88d6a401c 100644 --- a/src/tools/clippy/clippy_lints/src/casts/manual_dangling_ptr.rs +++ b/src/tools/clippy/clippy_lints/src/casts/manual_dangling_ptr.rs @@ -1,6 +1,6 @@ use clippy_utils::diagnostics::span_lint_and_sugg; use clippy_utils::source::SpanRangeExt; -use clippy_utils::{expr_or_init, path_def_id, paths, std_or_core}; +use clippy_utils::{expr_or_init, is_path_diagnostic_item, std_or_core, sym}; use rustc_ast::LitKind; use rustc_errors::Applicability; use rustc_hir::{Expr, ExprKind, GenericArg, Mutability, QPath, Ty, TyKind}; @@ -53,8 +53,7 @@ fn is_expr_const_aligned(cx: &LateContext<'_>, expr: &Expr<'_>, to: &Ty<'_>) -> fn is_align_of_call(cx: &LateContext<'_>, fun: &Expr<'_>, to: &Ty<'_>) -> bool { if let ExprKind::Path(QPath::Resolved(_, path)) = fun.kind - && let Some(fun_id) = path_def_id(cx, fun) - && paths::ALIGN_OF.matches(cx, fun_id) + && is_path_diagnostic_item(cx, fun, sym::mem_align_of) && let Some(args) = path.segments.last().and_then(|seg| seg.args) && let [GenericArg::Type(generic_ty)] = args.args { diff --git a/src/tools/clippy/clippy_lints/src/collapsible_if.rs b/src/tools/clippy/clippy_lints/src/collapsible_if.rs index 7f6ecea99fb0..1854d86c53b2 100644 --- a/src/tools/clippy/clippy_lints/src/collapsible_if.rs +++ b/src/tools/clippy/clippy_lints/src/collapsible_if.rs @@ -1,13 +1,16 @@ use clippy_config::Conf; -use clippy_utils::diagnostics::{span_lint_and_sugg, span_lint_and_then}; +use clippy_utils::diagnostics::span_lint_and_then; use clippy_utils::msrvs::{self, Msrv}; -use clippy_utils::source::{IntoSpan as _, SpanRangeExt, snippet, snippet_block, snippet_block_with_applicability}; +use clippy_utils::source::{IntoSpan as _, SpanRangeExt, snippet, snippet_block_with_applicability}; +use clippy_utils::{span_contains_non_whitespace, tokenize_with_text}; use rustc_ast::BinOpKind; use rustc_errors::Applicability; use rustc_hir::{Block, Expr, ExprKind, Stmt, StmtKind}; +use rustc_lexer::TokenKind; use rustc_lint::{LateContext, LateLintPass}; use rustc_session::impl_lint_pass; -use rustc_span::Span; +use rustc_span::source_map::SourceMap; +use rustc_span::{BytePos, Span}; declare_clippy_lint! { /// ### What it does @@ -90,35 +93,74 @@ impl CollapsibleIf { } } - fn check_collapsible_else_if(cx: &LateContext<'_>, then_span: Span, else_block: &Block<'_>) { - if !block_starts_with_comment(cx, else_block) - && let Some(else_) = expr_block(else_block) + fn check_collapsible_else_if(&self, cx: &LateContext<'_>, then_span: Span, else_block: &Block<'_>) { + if let Some(else_) = expr_block(else_block) && cx.tcx.hir_attrs(else_.hir_id).is_empty() && !else_.span.from_expansion() - && let ExprKind::If(..) = else_.kind + && let ExprKind::If(else_if_cond, ..) = else_.kind + && !block_starts_with_significant_tokens(cx, else_block, else_, self.lint_commented_code) { - // Prevent "elseif" - // Check that the "else" is followed by whitespace - let up_to_else = then_span.between(else_block.span); - let requires_space = if let Some(c) = snippet(cx, up_to_else, "..").chars().last() { - !c.is_whitespace() - } else { - false - }; - - let mut applicability = Applicability::MachineApplicable; - span_lint_and_sugg( + span_lint_and_then( cx, COLLAPSIBLE_ELSE_IF, else_block.span, "this `else { if .. }` block can be collapsed", - "collapse nested if block", - format!( - "{}{}", - if requires_space { " " } else { "" }, - snippet_block_with_applicability(cx, else_.span, "..", Some(else_block.span), &mut applicability) - ), - applicability, + |diag| { + let up_to_else = then_span.between(else_block.span); + let else_before_if = else_.span.shrink_to_lo().with_hi(else_if_cond.span.lo() - BytePos(1)); + if self.lint_commented_code + && let Some(else_keyword_span) = + span_extract_keyword(cx.tcx.sess.source_map(), up_to_else, "else") + && let Some(else_if_keyword_span) = + span_extract_keyword(cx.tcx.sess.source_map(), else_before_if, "if") + { + let else_keyword_span = else_keyword_span.with_leading_whitespace(cx).into_span(); + let else_open_bracket = else_block.span.split_at(1).0.with_leading_whitespace(cx).into_span(); + let else_closing_bracket = { + let end = else_block.span.shrink_to_hi(); + end.with_lo(end.lo() - BytePos(1)) + .with_leading_whitespace(cx) + .into_span() + }; + let sugg = vec![ + // Remove the outer else block `else` + (else_keyword_span, String::new()), + // Replace the inner `if` by `else if` + (else_if_keyword_span, String::from("else if")), + // Remove the outer else block `{` + (else_open_bracket, String::new()), + // Remove the outer else block '}' + (else_closing_bracket, String::new()), + ]; + diag.multipart_suggestion("collapse nested if block", sugg, Applicability::MachineApplicable); + return; + } + + // Prevent "elseif" + // Check that the "else" is followed by whitespace + let requires_space = if let Some(c) = snippet(cx, up_to_else, "..").chars().last() { + !c.is_whitespace() + } else { + false + }; + let mut applicability = Applicability::MachineApplicable; + diag.span_suggestion( + else_block.span, + "collapse nested if block", + format!( + "{}{}", + if requires_space { " " } else { "" }, + snippet_block_with_applicability( + cx, + else_.span, + "..", + Some(else_block.span), + &mut applicability + ) + ), + applicability, + ); + }, ); } } @@ -130,7 +172,7 @@ impl CollapsibleIf { && self.eligible_condition(cx, check_inner) && let ctxt = expr.span.ctxt() && inner.span.ctxt() == ctxt - && (self.lint_commented_code || !block_starts_with_comment(cx, then)) + && !block_starts_with_significant_tokens(cx, then, inner, self.lint_commented_code) { span_lint_and_then( cx, @@ -141,7 +183,7 @@ impl CollapsibleIf { let then_open_bracket = then.span.split_at(1).0.with_leading_whitespace(cx).into_span(); let then_closing_bracket = { let end = then.span.shrink_to_hi(); - end.with_lo(end.lo() - rustc_span::BytePos(1)) + end.with_lo(end.lo() - BytePos(1)) .with_leading_whitespace(cx) .into_span() }; @@ -179,7 +221,7 @@ impl LateLintPass<'_> for CollapsibleIf { if let Some(else_) = else_ && let ExprKind::Block(else_, None) = else_.kind { - Self::check_collapsible_else_if(cx, then.span, else_); + self.check_collapsible_else_if(cx, then.span, else_); } else if else_.is_none() && self.eligible_condition(cx, cond) && let ExprKind::Block(then, None) = then.kind @@ -190,12 +232,16 @@ impl LateLintPass<'_> for CollapsibleIf { } } -fn block_starts_with_comment(cx: &LateContext<'_>, block: &Block<'_>) -> bool { - // We trim all opening braces and whitespaces and then check if the next string is a comment. - let trimmed_block_text = snippet_block(cx, block.span, "..", None) - .trim_start_matches(|c: char| c.is_whitespace() || c == '{') - .to_owned(); - trimmed_block_text.starts_with("//") || trimmed_block_text.starts_with("/*") +// Check that nothing significant can be found but whitespaces between the initial `{` of `block` +// and the beginning of `stop_at`. +fn block_starts_with_significant_tokens( + cx: &LateContext<'_>, + block: &Block<'_>, + stop_at: &Expr<'_>, + lint_commented_code: bool, +) -> bool { + let span = block.span.split_at(1).1.until(stop_at.span); + span_contains_non_whitespace(cx, span, lint_commented_code) } /// If `block` is a block with either one expression or a statement containing an expression, @@ -226,3 +272,16 @@ fn parens_around(expr: &Expr<'_>) -> Vec<(Span, String)> { vec![] } } + +fn span_extract_keyword(sm: &SourceMap, span: Span, keyword: &str) -> Option { + let snippet = sm.span_to_snippet(span).ok()?; + tokenize_with_text(&snippet) + .filter(|(t, s, _)| matches!(t, TokenKind::Ident if *s == keyword)) + .map(|(_, _, inner)| { + span.split_at(u32::try_from(inner.start).unwrap()) + .1 + .split_at(u32::try_from(inner.end - inner.start).unwrap()) + .0 + }) + .next() +} diff --git a/src/tools/clippy/clippy_lints/src/copies.rs b/src/tools/clippy/clippy_lints/src/copies.rs index 5ef726638a56..27918698cd6b 100644 --- a/src/tools/clippy/clippy_lints/src/copies.rs +++ b/src/tools/clippy/clippy_lints/src/copies.rs @@ -11,7 +11,7 @@ use clippy_utils::{ use core::iter; use core::ops::ControlFlow; use rustc_errors::Applicability; -use rustc_hir::{BinOpKind, Block, Expr, ExprKind, HirId, HirIdSet, Stmt, StmtKind, intravisit}; +use rustc_hir::{BinOpKind, Block, Expr, ExprKind, HirId, HirIdSet, LetStmt, Node, Stmt, StmtKind, intravisit}; use rustc_lint::{LateContext, LateLintPass}; use rustc_middle::ty::TyCtxt; use rustc_session::impl_lint_pass; @@ -295,7 +295,7 @@ fn lint_branches_sharing_code<'tcx>( sugg, Applicability::Unspecified, ); - if !cx.typeck_results().expr_ty(expr).is_unit() { + if is_expr_parent_assignment(cx, expr) || !cx.typeck_results().expr_ty(expr).is_unit() { diag.note("the end suggestion probably needs some adjustments to use the expression result correctly"); } } @@ -660,3 +660,17 @@ fn lint_same_fns_in_if_cond(cx: &LateContext<'_>, conds: &[&Expr<'_>]) { ); } } + +fn is_expr_parent_assignment(cx: &LateContext<'_>, expr: &Expr<'_>) -> bool { + let parent = cx.tcx.parent_hir_node(expr.hir_id); + if let Node::LetStmt(LetStmt { init: Some(e), .. }) + | Node::Expr(Expr { + kind: ExprKind::Assign(_, e, _), + .. + }) = parent + { + return e.hir_id == expr.hir_id; + } + + false +} diff --git a/src/tools/clippy/clippy_lints/src/declare_clippy_lint.rs b/src/tools/clippy/clippy_lints/src/declare_clippy_lint.rs deleted file mode 100644 index 9f82f8767279..000000000000 --- a/src/tools/clippy/clippy_lints/src/declare_clippy_lint.rs +++ /dev/null @@ -1,168 +0,0 @@ -#[macro_export] -#[allow(clippy::crate_in_macro_def)] -macro_rules! declare_clippy_lint { - (@ - $(#[doc = $lit:literal])* - pub $lint_name:ident, - $level:ident, - $lintcategory:expr, - $desc:literal, - $version_expr:expr, - $version_lit:literal - $(, $eval_always: literal)? - ) => { - rustc_session::declare_tool_lint! { - $(#[doc = $lit])* - #[clippy::version = $version_lit] - pub clippy::$lint_name, - $level, - $desc, - report_in_external_macro:true - $(, @eval_always = $eval_always)? - } - - pub(crate) static ${concat($lint_name, _INFO)}: &'static crate::LintInfo = &crate::LintInfo { - lint: &$lint_name, - category: $lintcategory, - explanation: concat!($($lit,"\n",)*), - location: concat!(file!(), "#L", line!()), - version: $version_expr - }; - }; - ( - $(#[doc = $lit:literal])* - #[clippy::version = $version:literal] - pub $lint_name:ident, - restriction, - $desc:literal - $(, @eval_always = $eval_always: literal)? - ) => { - declare_clippy_lint! {@ - $(#[doc = $lit])* - pub $lint_name, Allow, crate::LintCategory::Restriction, $desc, - Some($version), $version - $(, $eval_always)? - } - }; - ( - $(#[doc = $lit:literal])* - #[clippy::version = $version:literal] - pub $lint_name:ident, - style, - $desc:literal - $(, @eval_always = $eval_always: literal)? - ) => { - declare_clippy_lint! {@ - $(#[doc = $lit])* - pub $lint_name, Warn, crate::LintCategory::Style, $desc, - Some($version), $version - $(, $eval_always)? - } - }; - ( - $(#[doc = $lit:literal])* - #[clippy::version = $version:literal] - pub $lint_name:ident, - correctness, - $desc:literal - $(, @eval_always = $eval_always: literal)? - ) => { - declare_clippy_lint! {@ - $(#[doc = $lit])* - pub $lint_name, Deny, crate::LintCategory::Correctness, $desc, - Some($version), $version - $(, $eval_always)? - - } - }; - ( - $(#[doc = $lit:literal])* - #[clippy::version = $version:literal] - pub $lint_name:ident, - perf, - $desc:literal - $(, @eval_always = $eval_always: literal)? - ) => { - declare_clippy_lint! {@ - $(#[doc = $lit])* - pub $lint_name, Warn, crate::LintCategory::Perf, $desc, - Some($version), $version - $(, $eval_always)? - } - }; - ( - $(#[doc = $lit:literal])* - #[clippy::version = $version:literal] - pub $lint_name:ident, - complexity, - $desc:literal - $(, @eval_always = $eval_always: literal)? - ) => { - declare_clippy_lint! {@ - $(#[doc = $lit])* - pub $lint_name, Warn, crate::LintCategory::Complexity, $desc, - Some($version), $version - $(, $eval_always)? - } - }; - ( - $(#[doc = $lit:literal])* - #[clippy::version = $version:literal] - pub $lint_name:ident, - suspicious, - $desc:literal - $(, @eval_always = $eval_always: literal)? - ) => { - declare_clippy_lint! {@ - $(#[doc = $lit])* - pub $lint_name, Warn, crate::LintCategory::Suspicious, $desc, - Some($version), $version - $(, $eval_always)? - } - }; - ( - $(#[doc = $lit:literal])* - #[clippy::version = $version:literal] - pub $lint_name:ident, - nursery, - $desc:literal - $(, @eval_always = $eval_always: literal)? - ) => { - declare_clippy_lint! {@ - $(#[doc = $lit])* - pub $lint_name, Allow, crate::LintCategory::Nursery, $desc, - Some($version), $version - $(, $eval_always)? - } - }; - ( - $(#[doc = $lit:literal])* - #[clippy::version = $version:literal] - pub $lint_name:ident, - pedantic, - $desc:literal - $(, @eval_always = $eval_always: literal)? - ) => { - declare_clippy_lint! {@ - $(#[doc = $lit])* - pub $lint_name, Allow, crate::LintCategory::Pedantic, $desc, - Some($version), $version - $(, $eval_always)? - } - }; - ( - $(#[doc = $lit:literal])* - #[clippy::version = $version:literal] - pub $lint_name:ident, - cargo, - $desc:literal - $(, @eval_always = $eval_always: literal)? - ) => { - declare_clippy_lint! {@ - $(#[doc = $lit])* - pub $lint_name, Allow, crate::LintCategory::Cargo, $desc, - Some($version), $version - $(, $eval_always)? - } - }; -} diff --git a/src/tools/clippy/clippy_lints/src/declared_lints.rs b/src/tools/clippy/clippy_lints/src/declared_lints.rs index 1e3907d9ede0..c3f8e02b4c06 100644 --- a/src/tools/clippy/clippy_lints/src/declared_lints.rs +++ b/src/tools/clippy/clippy_lints/src/declared_lints.rs @@ -2,7 +2,7 @@ // Use that command to update this file and do not edit by hand. // Manual edits will be overwritten. -pub static LINTS: &[&crate::LintInfo] = &[ +pub static LINTS: &[&::declare_clippy_lint::LintInfo] = &[ crate::absolute_paths::ABSOLUTE_PATHS_INFO, crate::almost_complete_range::ALMOST_COMPLETE_RANGE_INFO, crate::approx_const::APPROX_CONSTANT_INFO, @@ -112,6 +112,7 @@ pub static LINTS: &[&crate::LintInfo] = &[ crate::disallowed_names::DISALLOWED_NAMES_INFO, crate::disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS_INFO, crate::disallowed_types::DISALLOWED_TYPES_INFO, + crate::doc::DOC_BROKEN_LINK_INFO, crate::doc::DOC_COMMENT_DOUBLE_SPACE_LINEBREAKS_INFO, crate::doc::DOC_INCLUDE_WITHOUT_CFG_INFO, crate::doc::DOC_LAZY_CONTINUATION_INFO, @@ -590,6 +591,7 @@ pub static LINTS: &[&crate::LintInfo] = &[ crate::operators::IMPOSSIBLE_COMPARISONS_INFO, crate::operators::INEFFECTIVE_BIT_MASK_INFO, crate::operators::INTEGER_DIVISION_INFO, + crate::operators::MANUAL_IS_MULTIPLE_OF_INFO, crate::operators::MANUAL_MIDPOINT_INFO, crate::operators::MISREFACTORED_ASSIGN_OP_INFO, crate::operators::MODULO_ARITHMETIC_INFO, diff --git a/src/tools/clippy/clippy_lints/src/disallowed_macros.rs b/src/tools/clippy/clippy_lints/src/disallowed_macros.rs index 9814d4fa84f9..d55aeae98ede 100644 --- a/src/tools/clippy/clippy_lints/src/disallowed_macros.rs +++ b/src/tools/clippy/clippy_lints/src/disallowed_macros.rs @@ -40,6 +40,9 @@ declare_clippy_lint! { /// # When using an inline table, can add a `reason` for why the macro /// # is disallowed. /// { path = "serde::Serialize", reason = "no serializing" }, + /// # This would normally error if the path is incorrect, but with `allow-invalid` = `true`, + /// # it will be silently ignored + /// { path = "std::invalid_macro", reason = "use alternative instead", allow-invalid = true } /// ] /// ``` /// ```no_run diff --git a/src/tools/clippy/clippy_lints/src/disallowed_methods.rs b/src/tools/clippy/clippy_lints/src/disallowed_methods.rs index fb970e17f38f..8c067432cb4e 100644 --- a/src/tools/clippy/clippy_lints/src/disallowed_methods.rs +++ b/src/tools/clippy/clippy_lints/src/disallowed_methods.rs @@ -34,6 +34,9 @@ declare_clippy_lint! { /// { path = "std::vec::Vec::leak", reason = "no leaking memory" }, /// # Can also add a `replacement` that will be offered as a suggestion. /// { path = "std::sync::Mutex::new", reason = "prefer faster & simpler non-poisonable mutex", replacement = "parking_lot::Mutex::new" }, + /// # This would normally error if the path is incorrect, but with `allow-invalid` = `true`, + /// # it will be silently ignored + /// { path = "std::fs::InvalidPath", reason = "use alternative instead", allow-invalid = true }, /// ] /// ``` /// diff --git a/src/tools/clippy/clippy_lints/src/disallowed_types.rs b/src/tools/clippy/clippy_lints/src/disallowed_types.rs index 7875cdd77e86..9a82327a0d7b 100644 --- a/src/tools/clippy/clippy_lints/src/disallowed_types.rs +++ b/src/tools/clippy/clippy_lints/src/disallowed_types.rs @@ -35,6 +35,9 @@ declare_clippy_lint! { /// { path = "std::net::Ipv4Addr", reason = "no IPv4 allowed" }, /// # Can also add a `replacement` that will be offered as a suggestion. /// { path = "std::sync::Mutex", reason = "prefer faster & simpler non-poisonable mutex", replacement = "parking_lot::Mutex" }, + /// # This would normally error if the path is incorrect, but with `allow-invalid` = `true`, + /// # it will be silently ignored + /// { path = "std::invalid::Type", reason = "use alternative instead", allow-invalid = true } /// ] /// ``` /// diff --git a/src/tools/clippy/clippy_lints/src/doc/broken_link.rs b/src/tools/clippy/clippy_lints/src/doc/broken_link.rs new file mode 100644 index 000000000000..4af10510023d --- /dev/null +++ b/src/tools/clippy/clippy_lints/src/doc/broken_link.rs @@ -0,0 +1,83 @@ +use clippy_utils::diagnostics::span_lint; +use pulldown_cmark::BrokenLink as PullDownBrokenLink; +use rustc_lint::LateContext; +use rustc_resolve::rustdoc::{DocFragment, source_span_for_markdown_range}; +use rustc_span::{BytePos, Pos, Span}; + +use super::DOC_BROKEN_LINK; + +/// Scan and report broken link on documents. +/// It ignores false positives detected by `pulldown_cmark`, and only +/// warns users when the broken link is consider a URL. +// NOTE: We don't check these other cases because +// rustdoc itself will check and warn about it: +// - When a link url is broken across multiple lines in the URL path part +// - When a link tag is missing the close parenthesis character at the end. +// - When a link has whitespace within the url link. +pub fn check(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) { + warn_if_broken_link(cx, bl, doc, fragments); +} + +fn warn_if_broken_link(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) { + if let Some((span, _)) = source_span_for_markdown_range(cx.tcx, doc, &bl.span, fragments) { + let mut len = 0; + + // grab raw link data + let (_, raw_link) = doc.split_at(bl.span.start); + + // strip off link text part + let raw_link = match raw_link.split_once(']') { + None => return, + Some((prefix, suffix)) => { + len += prefix.len() + 1; + suffix + }, + }; + + let raw_link = match raw_link.split_once('(') { + None => return, + Some((prefix, suffix)) => { + if !prefix.is_empty() { + // there is text between ']' and '(' chars, so it is not a valid link + return; + } + len += prefix.len() + 1; + suffix + }, + }; + + if raw_link.starts_with("(http") { + // reduce chances of false positive reports + // by limiting this checking only to http/https links. + return; + } + + for c in raw_link.chars() { + if c == ')' { + // it is a valid link + return; + } + + if c == '\n' { + report_broken_link(cx, span, len); + break; + } + + len += 1; + } + } +} + +fn report_broken_link(cx: &LateContext<'_>, frag_span: Span, offset: usize) { + let start = frag_span.lo(); + let end = start + BytePos::from_usize(offset); + + let span = Span::new(start, end, frag_span.ctxt(), frag_span.parent()); + + span_lint( + cx, + DOC_BROKEN_LINK, + span, + "possible broken doc link: broken across multiple lines", + ); +} diff --git a/src/tools/clippy/clippy_lints/src/doc/doc_suspicious_footnotes.rs b/src/tools/clippy/clippy_lints/src/doc/doc_suspicious_footnotes.rs index 289b6b915d46..3330cc5defd3 100644 --- a/src/tools/clippy/clippy_lints/src/doc/doc_suspicious_footnotes.rs +++ b/src/tools/clippy/clippy_lints/src/doc/doc_suspicious_footnotes.rs @@ -1,4 +1,5 @@ use clippy_utils::diagnostics::span_lint_and_then; +use rustc_ast::attr::AttributeExt as _; use rustc_ast::token::CommentKind; use rustc_errors::Applicability; use rustc_hir::{AttrStyle, Attribute}; @@ -43,13 +44,15 @@ pub fn check(cx: &LateContext<'_>, doc: &str, range: Range, fragments: &F "looks like a footnote ref, but has no matching footnote", |diag| { if this_fragment.kind == DocFragmentKind::SugaredDoc { - let (doc_attr, (_, doc_attr_comment_kind)) = attrs + let (doc_attr, (_, doc_attr_comment_kind), attr_style) = attrs .iter() .filter(|attr| attr.span().overlaps(this_fragment.span)) .rev() - .find_map(|attr| Some((attr, attr.doc_str_and_comment_kind()?))) + .find_map(|attr| { + Some((attr, attr.doc_str_and_comment_kind()?, attr.doc_resolution_scope()?)) + }) .unwrap(); - let (to_add, terminator) = match (doc_attr_comment_kind, doc_attr.style()) { + let (to_add, terminator) = match (doc_attr_comment_kind, attr_style) { (CommentKind::Line, AttrStyle::Outer) => ("\n///\n/// ", ""), (CommentKind::Line, AttrStyle::Inner) => ("\n//!\n//! ", ""), (CommentKind::Block, AttrStyle::Outer) => ("\n/** ", " */"), diff --git a/src/tools/clippy/clippy_lints/src/doc/mod.rs b/src/tools/clippy/clippy_lints/src/doc/mod.rs index e0fc2fd93474..5ea55e102dfe 100644 --- a/src/tools/clippy/clippy_lints/src/doc/mod.rs +++ b/src/tools/clippy/clippy_lints/src/doc/mod.rs @@ -24,6 +24,7 @@ use rustc_span::edition::Edition; use std::ops::Range; use url::Url; +mod broken_link; mod doc_comment_double_space_linebreaks; mod doc_suspicious_footnotes; mod include_in_doc_without_cfg; @@ -292,6 +293,34 @@ declare_clippy_lint! { "possible typo for an intra-doc link" } +declare_clippy_lint! { + /// ### What it does + /// Checks the doc comments have unbroken links, mostly caused + /// by bad formatted links such as broken across multiple lines. + /// + /// ### Why is this bad? + /// Because documentation generated by rustdoc will be broken + /// since expected links won't be links and just text. + /// + /// ### Examples + /// This link is broken: + /// ```no_run + /// /// [example of a bad link](https:// + /// /// github.com/rust-lang/rust-clippy/) + /// pub fn do_something() {} + /// ``` + /// + /// It shouldn't be broken across multiple lines to work: + /// ```no_run + /// /// [example of a good link](https://github.com/rust-lang/rust-clippy/) + /// pub fn do_something() {} + /// ``` + #[clippy::version = "1.84.0"] + pub DOC_BROKEN_LINK, + pedantic, + "broken document link" +} + declare_clippy_lint! { /// ### What it does /// Checks for the doc comments of publicly visible @@ -656,6 +685,7 @@ impl Documentation { impl_lint_pass!(Documentation => [ DOC_LINK_CODE, DOC_LINK_WITH_QUOTES, + DOC_BROKEN_LINK, DOC_MARKDOWN, DOC_NESTED_REFDEFS, MISSING_SAFETY_DOC, @@ -765,8 +795,8 @@ impl Fragments<'_> { /// get the span for the markdown range. Note that this function is not cheap, use it with /// caution. #[must_use] - fn span(&self, cx: &LateContext<'_>, range: Range) -> Option { - source_span_for_markdown_range(cx.tcx, self.doc, &range, self.fragments) + fn span(self, cx: &LateContext<'_>, range: Range) -> Option { + source_span_for_markdown_range(cx.tcx, self.doc, &range, self.fragments).map(|(sp, _)| sp) } } @@ -786,9 +816,9 @@ struct DocHeaders { /// back in the various late lint pass methods if they need the final doc headers, like "Safety" or /// "Panics" sections. fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet, attrs: &[Attribute]) -> Option { - /// We don't want the parser to choke on intra doc links. Since we don't - /// actually care about rendering them, just pretend that all broken links - /// point to a fake address. + // We don't want the parser to choke on intra doc links. Since we don't + // actually care about rendering them, just pretend that all broken links + // point to a fake address. #[expect(clippy::unnecessary_wraps)] // we're following a type signature fn fake_broken_link_callback<'a>(_: BrokenLink<'_>) -> Option<(CowStr<'a>, CowStr<'a>)> { Some(("fake".into(), "fake".into())) @@ -828,14 +858,12 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet, attrs: &[ return Some(DocHeaders::default()); } - let mut cb = fake_broken_link_callback; - check_for_code_clusters( cx, pulldown_cmark::Parser::new_with_broken_link_callback( &doc, main_body_opts() - Options::ENABLE_SMART_PUNCTUATION, - Some(&mut cb), + Some(&mut fake_broken_link_callback), ) .into_offset_iter(), &doc, @@ -845,9 +873,17 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet, attrs: &[ }, ); + // NOTE: check_doc uses it own cb function, + // to avoid causing duplicated diagnostics for the broken link checker. + let mut full_fake_broken_link_callback = |bl: BrokenLink<'_>| -> Option<(CowStr<'_>, CowStr<'_>)> { + broken_link::check(cx, &bl, &doc, &fragments); + Some(("fake".into(), "fake".into())) + }; + // disable smart punctuation to pick up ['link'] more easily let opts = main_body_opts() - Options::ENABLE_SMART_PUNCTUATION; - let parser = pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut cb)); + let parser = + pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut full_fake_broken_link_callback)); Some(check_doc( cx, diff --git a/src/tools/clippy/clippy_lints/src/doc/needless_doctest_main.rs b/src/tools/clippy/clippy_lints/src/doc/needless_doctest_main.rs index ec4538039a91..74283d7ba863 100644 --- a/src/tools/clippy/clippy_lints/src/doc/needless_doctest_main.rs +++ b/src/tools/clippy/clippy_lints/src/doc/needless_doctest_main.rs @@ -42,9 +42,8 @@ pub fn check( let mut test_attr_spans = vec![]; let filename = FileName::anon_source_code(&code); - let fallback_bundle = - rustc_errors::fallback_fluent_bundle(rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(), false); - let emitter = HumanEmitter::new(Box::new(io::sink()), fallback_bundle); + let translator = rustc_driver::default_translator(); + let emitter = HumanEmitter::new(Box::new(io::sink()), translator); let dcx = DiagCtxt::new(Box::new(emitter)).disable_warnings(); #[expect(clippy::arc_with_non_send_sync)] // `Arc` is expected by with_dcx let sm = Arc::new(SourceMap::new(FilePathMapping::empty())); @@ -72,6 +71,7 @@ pub fn check( if !ignore { get_test_spans(&item, *ident, &mut test_attr_spans); } + let is_async = matches!(sig.header.coroutine_kind, Some(CoroutineKind::Async { .. })); let returns_nothing = match &sig.decl.output { FnRetTy::Default(..) => true, @@ -90,9 +90,14 @@ pub fn check( // Another function was found; this case is ignored for needless_doctest_main ItemKind::Fn(fn_) => { eligible = false; - if !ignore { - get_test_spans(&item, fn_.ident, &mut test_attr_spans); + if ignore { + // If ignore is active invalidating one lint, + // and we already found another function thus + // invalidating the other one, we have no + // business continuing. + return (false, test_attr_spans); } + get_test_spans(&item, fn_.ident, &mut test_attr_spans); }, // Tests with one of these items are ignored ItemKind::Static(..) @@ -105,7 +110,10 @@ pub fn check( }, Ok(None) => break, Err(e) => { - e.cancel(); + // See issue #15041. When calling `.cancel()` on the `Diag`, Clippy will unexpectedly panic + // when the `Diag` is unwinded. Meanwhile, we can just call `.emit()`, since the `DiagCtxt` + // is just a sink, nothing will be printed. + e.emit(); return (false, test_attr_spans); }, } @@ -120,6 +128,18 @@ pub fn check( let trailing_whitespace = text.len() - text.trim_end().len(); + // We currently only test for "fn main". Checking for the real + // entrypoint (with tcx.entry_fn(())) in each block would be unnecessarily + // expensive, as those are probably intended and relevant. Same goes for + // macros and other weird ways of declaring a main function. + // + // Also, as we only check for attribute names and don't do macro expansion, + // we can check only for #[test] + + if !((text.contains("main") && text.contains("fn")) || text.contains("#[test]")) { + return; + } + // Because of the global session, we need to create a new session in a different thread with // the edition we need. let text = text.to_owned(); diff --git a/src/tools/clippy/clippy_lints/src/empty_line_after.rs b/src/tools/clippy/clippy_lints/src/empty_line_after.rs index 0c5f8bbf4ca5..3bd74856165d 100644 --- a/src/tools/clippy/clippy_lints/src/empty_line_after.rs +++ b/src/tools/clippy/clippy_lints/src/empty_line_after.rs @@ -10,7 +10,7 @@ use rustc_errors::{Applicability, Diag, SuggestionStyle}; use rustc_lexer::TokenKind; use rustc_lint::{EarlyContext, EarlyLintPass, LintContext}; use rustc_session::impl_lint_pass; -use rustc_span::{BytePos, ExpnKind, Ident, InnerSpan, Span, SpanData, Symbol, kw}; +use rustc_span::{BytePos, ExpnKind, Ident, InnerSpan, Span, SpanData, Symbol, kw, sym}; declare_clippy_lint! { /// ### What it does @@ -129,10 +129,55 @@ struct Stop { kind: StopKind, first: usize, last: usize, + name: Option, } impl Stop { - fn convert_to_inner(&self) -> (Span, String) { + fn is_outer_attr_only(&self) -> bool { + let Some(name) = self.name else { + return false; + }; + // Check if the attribute only has effect when as an outer attribute + // The below attributes are collected from the builtin attributes of The Rust Reference + // https://doc.rust-lang.org/reference/attributes.html#r-attributes.builtin + // And the comments below are from compiler errors and warnings + matches!( + name, + // Cannot be used at crate level + sym::repr | sym::test | sym::derive | sym::automatically_derived | sym::path | sym::global_allocator | + // Only has an effect on macro definitions + sym::macro_export | + // Only be applied to trait definitions + sym::on_unimplemented | + // Only be placed on trait implementations + sym::do_not_recommend | + // Only has an effect on items + sym::ignore | sym::should_panic | sym::proc_macro | sym::proc_macro_derive | sym::proc_macro_attribute | + // Has no effect when applied to a module + sym::must_use | + // Should be applied to a foreign function or static + sym::link_name | sym::link_ordinal | sym::link_section | + // Should be applied to an `extern crate` item + sym::no_link | + // Should be applied to a free function, impl method or static + sym::export_name | sym::no_mangle | + // Should be applied to a `static` variable + sym::used | + // Should be applied to function or closure + sym::inline | + // Should be applied to a function definition + sym::cold | sym::target_feature | sym::track_caller | sym::instruction_set | + // Should be applied to a struct or enum + sym::non_exhaustive | + // Note: No any warning when it as an inner attribute, but it has no effect + sym::panic_handler + ) + } + + fn convert_to_inner(&self) -> Option<(Span, String)> { + if self.is_outer_attr_only() { + return None; + } let inner = match self.kind { // #![...] StopKind::Attr => InnerSpan::new(1, 1), @@ -140,7 +185,7 @@ impl Stop { // ^ ^ StopKind::Doc(_) => InnerSpan::new(2, 3), }; - (self.span.from_inner(inner), "!".into()) + Some((self.span.from_inner(inner), "!".into())) } fn comment_out(&self, cx: &EarlyContext<'_>, suggestions: &mut Vec<(Span, String)>) { @@ -177,6 +222,7 @@ impl Stop { }, first: file.lookup_line(file.relative_position(lo))?, last: file.lookup_line(file.relative_position(hi))?, + name: attr.name(), }) } } @@ -356,6 +402,12 @@ impl EmptyLineAfter { if let Some(parent) = self.items.iter().rev().nth(1) && (parent.kind == "module" || parent.kind == "crate") && parent.mod_items == Some(id) + && let suggestions = gaps + .iter() + .flat_map(|gap| gap.prev_chunk) + .filter_map(Stop::convert_to_inner) + .collect::>() + && !suggestions.is_empty() { let desc = if parent.kind == "module" { "parent module" @@ -367,10 +419,7 @@ impl EmptyLineAfter { StopKind::Attr => format!("if the attribute should apply to the {desc} use an inner attribute"), StopKind::Doc(_) => format!("if the comment should document the {desc} use an inner doc comment"), }, - gaps.iter() - .flat_map(|gap| gap.prev_chunk) - .map(Stop::convert_to_inner) - .collect(), + suggestions, Applicability::MaybeIncorrect, ); } @@ -425,6 +474,7 @@ impl EmptyLineAfter { first: line.line, // last doesn't need to be accurate here, we don't compare it with anything last: line.line, + name: None, }); } diff --git a/src/tools/clippy/clippy_lints/src/eta_reduction.rs b/src/tools/clippy/clippy_lints/src/eta_reduction.rs index 6ed7c87915b2..0288747d6f3e 100644 --- a/src/tools/clippy/clippy_lints/src/eta_reduction.rs +++ b/src/tools/clippy/clippy_lints/src/eta_reduction.rs @@ -1,4 +1,4 @@ -use clippy_utils::diagnostics::{span_lint_and_sugg, span_lint_and_then}; +use clippy_utils::diagnostics::span_lint_hir_and_then; use clippy_utils::higher::VecArgs; use clippy_utils::source::{snippet_opt, snippet_with_applicability}; use clippy_utils::ty::get_type_diagnostic_name; @@ -7,6 +7,7 @@ use clippy_utils::{ get_path_from_caller_to_method_type, is_adjusted, is_no_std_crate, path_to_local, path_to_local_id, }; use rustc_abi::ExternAbi; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_errors::Applicability; use rustc_hir::{BindingMode, Expr, ExprKind, FnRetTy, GenericArgs, Param, PatKind, QPath, Safety, TyKind}; use rustc_infer::infer::TyCtxtInferExt; @@ -108,14 +109,20 @@ fn check_closure<'tcx>(cx: &LateContext<'tcx>, outer_receiver: Option<&Expr<'tcx { let vec_crate = if is_no_std_crate(cx) { "alloc" } else { "std" }; // replace `|| vec![]` with `Vec::new` - span_lint_and_sugg( + span_lint_hir_and_then( cx, REDUNDANT_CLOSURE, + expr.hir_id, expr.span, "redundant closure", - "replace the closure with `Vec::new`", - format!("{vec_crate}::vec::Vec::new"), - Applicability::MachineApplicable, + |diag| { + diag.span_suggestion( + expr.span, + "replace the closure with `Vec::new`", + format!("{vec_crate}::vec::Vec::new"), + Applicability::MachineApplicable, + ); + }, ); } // skip `foo(|| macro!())` @@ -155,7 +162,7 @@ fn check_closure<'tcx>(cx: &LateContext<'tcx>, outer_receiver: Option<&Expr<'tcx let sig = match callee_ty_adjusted.kind() { ty::FnDef(def, _) => { // Rewriting `x(|| f())` to `x(f)` where f is marked `#[track_caller]` moves the `Location` - if cx.tcx.has_attr(*def, sym::track_caller) { + if find_attr!(cx.tcx.get_all_attrs(*def), AttributeKind::TrackCaller(..)) { return; } @@ -197,46 +204,53 @@ fn check_closure<'tcx>(cx: &LateContext<'tcx>, outer_receiver: Option<&Expr<'tcx // For now ignore all callee types which reference a type parameter. && !generic_args.types().any(|t| matches!(t.kind(), ty::Param(_))) { - span_lint_and_then(cx, REDUNDANT_CLOSURE, expr.span, "redundant closure", |diag| { - if let Some(mut snippet) = snippet_opt(cx, callee.span) { - if path_to_local(callee).is_some_and(|l| { - // FIXME: Do we really need this `local_used_in` check? - // Isn't it checking something like... `callee(callee)`? - // If somehow this check is needed, add some test for it, - // 'cuz currently nothing changes after deleting this check. - local_used_in(cx, l, args) || local_used_after_expr(cx, l, expr) - }) { - match cx - .tcx - .infer_ctxt() - .build(cx.typing_mode()) - .err_ctxt() - .type_implements_fn_trait( - cx.param_env, - Binder::bind_with_vars(callee_ty_adjusted, List::empty()), - ty::PredicatePolarity::Positive, - ) { - // Mutable closure is used after current expr; we cannot consume it. - Ok((ClosureKind::FnMut, _)) => snippet = format!("&mut {snippet}"), - Ok((ClosureKind::Fn, _)) if !callee_ty_raw.is_ref() => { - snippet = format!("&{snippet}"); - }, - _ => (), + span_lint_hir_and_then( + cx, + REDUNDANT_CLOSURE, + expr.hir_id, + expr.span, + "redundant closure", + |diag| { + if let Some(mut snippet) = snippet_opt(cx, callee.span) { + if path_to_local(callee).is_some_and(|l| { + // FIXME: Do we really need this `local_used_in` check? + // Isn't it checking something like... `callee(callee)`? + // If somehow this check is needed, add some test for it, + // 'cuz currently nothing changes after deleting this check. + local_used_in(cx, l, args) || local_used_after_expr(cx, l, expr) + }) { + match cx + .tcx + .infer_ctxt() + .build(cx.typing_mode()) + .err_ctxt() + .type_implements_fn_trait( + cx.param_env, + Binder::bind_with_vars(callee_ty_adjusted, List::empty()), + ty::PredicatePolarity::Positive, + ) { + // Mutable closure is used after current expr; we cannot consume it. + Ok((ClosureKind::FnMut, _)) => snippet = format!("&mut {snippet}"), + Ok((ClosureKind::Fn, _)) if !callee_ty_raw.is_ref() => { + snippet = format!("&{snippet}"); + }, + _ => (), + } } + diag.span_suggestion( + expr.span, + "replace the closure with the function itself", + snippet, + Applicability::MachineApplicable, + ); } - diag.span_suggestion( - expr.span, - "replace the closure with the function itself", - snippet, - Applicability::MachineApplicable, - ); - } - }); + }, + ); } }, ExprKind::MethodCall(path, self_, args, _) if check_inputs(typeck, body.params, Some(self_), args) => { if let Some(method_def_id) = typeck.type_dependent_def_id(body.value.hir_id) - && !cx.tcx.has_attr(method_def_id, sym::track_caller) + && !find_attr!(cx.tcx.get_all_attrs(method_def_id), AttributeKind::TrackCaller(..)) && check_sig(closure_sig, cx.tcx.fn_sig(method_def_id).skip_binder().skip_binder()) { let mut app = Applicability::MachineApplicable; @@ -244,9 +258,10 @@ fn check_closure<'tcx>(cx: &LateContext<'tcx>, outer_receiver: Option<&Expr<'tcx Some(span) => format!("::{}", snippet_with_applicability(cx, span, "<..>", &mut app)), None => String::new(), }; - span_lint_and_then( + span_lint_hir_and_then( cx, REDUNDANT_CLOSURE_FOR_METHOD_CALLS, + expr.hir_id, expr.span, "redundant closure", |diag| { diff --git a/src/tools/clippy/clippy_lints/src/exhaustive_items.rs b/src/tools/clippy/clippy_lints/src/exhaustive_items.rs index 1fb0e4d24d06..86d9038ec45d 100644 --- a/src/tools/clippy/clippy_lints/src/exhaustive_items.rs +++ b/src/tools/clippy/clippy_lints/src/exhaustive_items.rs @@ -76,7 +76,7 @@ impl LateLintPass<'_> for ExhaustiveItems { "exported enums should not be exhaustive", [].as_slice(), ), - ItemKind::Struct(_, _, v) => ( + ItemKind::Struct(_, _, v) if v.fields().iter().all(|f| f.default.is_none()) => ( EXHAUSTIVE_STRUCTS, "exported structs should not be exhaustive", v.fields(), diff --git a/src/tools/clippy/clippy_lints/src/floating_point_arithmetic.rs b/src/tools/clippy/clippy_lints/src/floating_point_arithmetic.rs index 3c7e83b06972..b3c9e8607589 100644 --- a/src/tools/clippy/clippy_lints/src/floating_point_arithmetic.rs +++ b/src/tools/clippy/clippy_lints/src/floating_point_arithmetic.rs @@ -5,14 +5,13 @@ use clippy_utils::{ eq_expr_value, get_parent_expr, higher, is_in_const_context, is_inherent_method_call, is_no_std_crate, numeric_literal, peel_blocks, sugg, sym, }; +use rustc_ast::ast; use rustc_errors::Applicability; use rustc_hir::{BinOpKind, Expr, ExprKind, PathSegment, UnOp}; use rustc_lint::{LateContext, LateLintPass}; use rustc_middle::ty; use rustc_session::declare_lint_pass; use rustc_span::source_map::Spanned; - -use rustc_ast::ast; use std::f32::consts as f32_consts; use std::f64::consts as f64_consts; use sugg::Sugg; diff --git a/src/tools/clippy/clippy_lints/src/functions/must_use.rs b/src/tools/clippy/clippy_lints/src/functions/must_use.rs index 70655838b6af..d959981a83ce 100644 --- a/src/tools/clippy/clippy_lints/src/functions/must_use.rs +++ b/src/tools/clippy/clippy_lints/src/functions/must_use.rs @@ -14,6 +14,8 @@ use clippy_utils::source::SpanRangeExt; use clippy_utils::ty::is_must_use_ty; use clippy_utils::visitors::for_each_expr_without_closures; use clippy_utils::{return_ty, trait_ref_of_method}; +use rustc_attr_data_structures::{AttributeKind, find_attr}; +use rustc_span::Symbol; use rustc_trait_selection::error_reporting::InferCtxtErrorExt; use core::ops::ControlFlow; @@ -22,7 +24,7 @@ use super::{DOUBLE_MUST_USE, MUST_USE_CANDIDATE, MUST_USE_UNIT}; pub(super) fn check_item<'tcx>(cx: &LateContext<'tcx>, item: &'tcx hir::Item<'_>) { let attrs = cx.tcx.hir_attrs(item.hir_id()); - let attr = cx.tcx.get_attr(item.owner_id, sym::must_use); + let attr = find_attr!(cx.tcx.hir_attrs(item.hir_id()), AttributeKind::MustUse { span, reason } => (span, reason)); if let hir::ItemKind::Fn { ref sig, body: ref body_id, @@ -31,9 +33,19 @@ pub(super) fn check_item<'tcx>(cx: &LateContext<'tcx>, item: &'tcx hir::Item<'_> { let is_public = cx.effective_visibilities.is_exported(item.owner_id.def_id); let fn_header_span = item.span.with_hi(sig.decl.output.span().hi()); - if let Some(attr) = attr { - check_needless_must_use(cx, sig.decl, item.owner_id, item.span, fn_header_span, attr, attrs, sig); - } else if is_public && !is_proc_macro(attrs) && !attrs.iter().any(|a| a.has_name(sym::no_mangle)) { + if let Some((attr_span, reason)) = attr { + check_needless_must_use( + cx, + sig.decl, + item.owner_id, + item.span, + fn_header_span, + *attr_span, + *reason, + attrs, + sig, + ); + } else if is_public && !is_proc_macro(attrs) && !find_attr!(attrs, AttributeKind::NoMangle(..)) { check_must_use_candidate( cx, sig.decl, @@ -52,9 +64,20 @@ pub(super) fn check_impl_item<'tcx>(cx: &LateContext<'tcx>, item: &'tcx hir::Imp let is_public = cx.effective_visibilities.is_exported(item.owner_id.def_id); let fn_header_span = item.span.with_hi(sig.decl.output.span().hi()); let attrs = cx.tcx.hir_attrs(item.hir_id()); - let attr = cx.tcx.get_attr(item.owner_id, sym::must_use); - if let Some(attr) = attr { - check_needless_must_use(cx, sig.decl, item.owner_id, item.span, fn_header_span, attr, attrs, sig); + let attr = + find_attr!(cx.tcx.hir_attrs(item.hir_id()), AttributeKind::MustUse { span, reason } => (span, reason)); + if let Some((attr_span, reason)) = attr { + check_needless_must_use( + cx, + sig.decl, + item.owner_id, + item.span, + fn_header_span, + *attr_span, + *reason, + attrs, + sig, + ); } else if is_public && !is_proc_macro(attrs) && trait_ref_of_method(cx, item.owner_id).is_none() { check_must_use_candidate( cx, @@ -75,9 +98,20 @@ pub(super) fn check_trait_item<'tcx>(cx: &LateContext<'tcx>, item: &'tcx hir::Tr let fn_header_span = item.span.with_hi(sig.decl.output.span().hi()); let attrs = cx.tcx.hir_attrs(item.hir_id()); - let attr = cx.tcx.get_attr(item.owner_id, sym::must_use); - if let Some(attr) = attr { - check_needless_must_use(cx, sig.decl, item.owner_id, item.span, fn_header_span, attr, attrs, sig); + let attr = + find_attr!(cx.tcx.hir_attrs(item.hir_id()), AttributeKind::MustUse { span, reason } => (span, reason)); + if let Some((attr_span, reason)) = attr { + check_needless_must_use( + cx, + sig.decl, + item.owner_id, + item.span, + fn_header_span, + *attr_span, + *reason, + attrs, + sig, + ); } else if let hir::TraitFn::Provided(eid) = *eid { let body = cx.tcx.hir_body(eid); if attr.is_none() && is_public && !is_proc_macro(attrs) { @@ -103,7 +137,8 @@ fn check_needless_must_use( item_id: hir::OwnerId, item_span: Span, fn_header_span: Span, - attr: &Attribute, + attr_span: Span, + reason: Option, attrs: &[Attribute], sig: &FnSig<'_>, ) { @@ -118,12 +153,7 @@ fn check_needless_must_use( fn_header_span, "this unit-returning function has a `#[must_use]` attribute", |diag| { - diag.span_suggestion( - attr.span(), - "remove the attribute", - "", - Applicability::MachineApplicable, - ); + diag.span_suggestion(attr_span, "remove the attribute", "", Applicability::MachineApplicable); }, ); } else { @@ -137,11 +167,11 @@ fn check_needless_must_use( MUST_USE_UNIT, fn_header_span, "this unit-returning function has a `#[must_use]` attribute", - Some(attr.span()), + Some(attr_span), "remove `must_use`", ); } - } else if attr.value_str().is_none() && is_must_use_ty(cx, return_ty(cx, item_id)) { + } else if reason.is_none() && is_must_use_ty(cx, return_ty(cx, item_id)) { // Ignore async functions unless Future::Output type is a must_use type if sig.header.is_async() { let infcx = cx.tcx.infer_ctxt().build(cx.typing_mode()); diff --git a/src/tools/clippy/clippy_lints/src/if_not_else.rs b/src/tools/clippy/clippy_lints/src/if_not_else.rs index 45f9aa0a53e4..ab7a965b3672 100644 --- a/src/tools/clippy/clippy_lints/src/if_not_else.rs +++ b/src/tools/clippy/clippy_lints/src/if_not_else.rs @@ -1,4 +1,4 @@ -use clippy_utils::consts::{ConstEvalCtxt, Constant}; +use clippy_utils::consts::is_zero_integer_const; use clippy_utils::diagnostics::{span_lint_and_help, span_lint_and_sugg}; use clippy_utils::is_else_clause; use clippy_utils::source::{HasSession, indent_of, reindent_multiline, snippet}; @@ -48,13 +48,6 @@ declare_clippy_lint! { declare_lint_pass!(IfNotElse => [IF_NOT_ELSE]); -fn is_zero_const(expr: &Expr<'_>, cx: &LateContext<'_>) -> bool { - if let Some(value) = ConstEvalCtxt::new(cx).eval_simple(expr) { - return Constant::Int(0) == value; - } - false -} - impl LateLintPass<'_> for IfNotElse { fn check_expr(&mut self, cx: &LateContext<'_>, e: &Expr<'_>) { if let ExprKind::If(cond, cond_inner, Some(els)) = e.kind @@ -68,7 +61,7 @@ impl LateLintPass<'_> for IfNotElse { ), // Don't lint on `… != 0`, as these are likely to be bit tests. // For example, `if foo & 0x0F00 != 0 { … } else { … }` is already in the "proper" order. - ExprKind::Binary(op, _, rhs) if op.node == BinOpKind::Ne && !is_zero_const(rhs, cx) => ( + ExprKind::Binary(op, _, rhs) if op.node == BinOpKind::Ne && !is_zero_integer_const(cx, rhs) => ( "unnecessary `!=` operation", "change to `==` and swap the blocks of the `if`/`else`", ), diff --git a/src/tools/clippy/clippy_lints/src/inline_fn_without_body.rs b/src/tools/clippy/clippy_lints/src/inline_fn_without_body.rs index 617c006795be..ffe6ad14f630 100644 --- a/src/tools/clippy/clippy_lints/src/inline_fn_without_body.rs +++ b/src/tools/clippy/clippy_lints/src/inline_fn_without_body.rs @@ -1,6 +1,6 @@ use clippy_utils::diagnostics::span_lint_and_then; use clippy_utils::sugg::DiagExt; -use rustc_attr_data_structures::{find_attr, AttributeKind}; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_errors::Applicability; use rustc_hir::{TraitFn, TraitItem, TraitItemKind}; use rustc_lint::{LateContext, LateLintPass}; @@ -33,10 +33,10 @@ impl<'tcx> LateLintPass<'tcx> for InlineFnWithoutBody { fn check_trait_item(&mut self, cx: &LateContext<'tcx>, item: &'tcx TraitItem<'_>) { if let TraitItemKind::Fn(_, TraitFn::Required(_)) = item.kind && let Some(attr_span) = find_attr!(cx - .tcx - .hir_attrs(item.hir_id()), - AttributeKind::Inline(_, span) => *span - ) + .tcx + .hir_attrs(item.hir_id()), + AttributeKind::Inline(_, span) => *span + ) { span_lint_and_then( cx, diff --git a/src/tools/clippy/clippy_lints/src/lib.rs b/src/tools/clippy/clippy_lints/src/lib.rs index be9142b17fe3..96a6dee58852 100644 --- a/src/tools/clippy/clippy_lints/src/lib.rs +++ b/src/tools/clippy/clippy_lints/src/lib.rs @@ -59,10 +59,10 @@ extern crate smallvec; extern crate thin_vec; #[macro_use] -mod declare_clippy_lint; +extern crate clippy_utils; #[macro_use] -extern crate clippy_utils; +extern crate declare_clippy_lint; mod utils; @@ -411,108 +411,9 @@ mod zombie_processes; use clippy_config::{Conf, get_configuration_metadata, sanitize_explanation}; use clippy_utils::macros::FormatArgsStorage; use rustc_data_structures::fx::FxHashSet; -use rustc_lint::{Lint, LintId}; +use rustc_lint::Lint; use utils::attr_collector::{AttrCollector, AttrStorage}; -#[derive(Default)] -struct RegistrationGroups { - all: Vec, - cargo: Vec, - complexity: Vec, - correctness: Vec, - nursery: Vec, - pedantic: Vec, - perf: Vec, - restriction: Vec, - style: Vec, - suspicious: Vec, -} - -impl RegistrationGroups { - #[rustfmt::skip] - fn register(self, store: &mut rustc_lint::LintStore) { - store.register_group(true, "clippy::all", Some("clippy_all"), self.all); - store.register_group(true, "clippy::cargo", Some("clippy_cargo"), self.cargo); - store.register_group(true, "clippy::complexity", Some("clippy_complexity"), self.complexity); - store.register_group(true, "clippy::correctness", Some("clippy_correctness"), self.correctness); - store.register_group(true, "clippy::nursery", Some("clippy_nursery"), self.nursery); - store.register_group(true, "clippy::pedantic", Some("clippy_pedantic"), self.pedantic); - store.register_group(true, "clippy::perf", Some("clippy_perf"), self.perf); - store.register_group(true, "clippy::restriction", Some("clippy_restriction"), self.restriction); - store.register_group(true, "clippy::style", Some("clippy_style"), self.style); - store.register_group(true, "clippy::suspicious", Some("clippy_suspicious"), self.suspicious); - } -} - -#[derive(Copy, Clone, Debug)] -pub(crate) enum LintCategory { - Cargo, - Complexity, - Correctness, - Nursery, - Pedantic, - Perf, - Restriction, - Style, - Suspicious, -} - -#[allow(clippy::enum_glob_use)] -use LintCategory::*; - -impl LintCategory { - fn is_all(self) -> bool { - matches!(self, Correctness | Suspicious | Style | Complexity | Perf) - } - - fn group(self, groups: &mut RegistrationGroups) -> &mut Vec { - match self { - Cargo => &mut groups.cargo, - Complexity => &mut groups.complexity, - Correctness => &mut groups.correctness, - Nursery => &mut groups.nursery, - Pedantic => &mut groups.pedantic, - Perf => &mut groups.perf, - Restriction => &mut groups.restriction, - Style => &mut groups.style, - Suspicious => &mut groups.suspicious, - } - } -} - -pub struct LintInfo { - /// Double reference to maintain pointer equality - pub lint: &'static &'static Lint, - category: LintCategory, - pub explanation: &'static str, - /// e.g. `clippy_lints/src/absolute_paths.rs#43` - pub location: &'static str, - pub version: Option<&'static str>, -} - -impl LintInfo { - /// Returns the lint name in lowercase without the `clippy::` prefix - #[allow(clippy::missing_panics_doc)] - pub fn name_lower(&self) -> String { - self.lint.name.strip_prefix("clippy::").unwrap().to_ascii_lowercase() - } - - /// Returns the name of the lint's category in lowercase (`style`, `pedantic`) - pub fn category_str(&self) -> &'static str { - match self.category { - Cargo => "cargo", - Complexity => "complexity", - Correctness => "correctness", - Nursery => "nursery", - Pedantic => "pedantic", - Perf => "perf", - Restriction => "restriction", - Style => "style", - Suspicious => "suspicious", - } - } -} - pub fn explain(name: &str) -> i32 { let target = format!("clippy::{}", name.to_ascii_uppercase()); @@ -535,30 +436,11 @@ pub fn explain(name: &str) -> i32 { } } -fn register_categories(store: &mut rustc_lint::LintStore) { - let mut groups = RegistrationGroups::default(); - - for LintInfo { lint, category, .. } in declared_lints::LINTS { - if category.is_all() { - groups.all.push(LintId::of(lint)); - } - - category.group(&mut groups).push(LintId::of(lint)); - } - - let lints: Vec<&'static Lint> = declared_lints::LINTS.iter().map(|info| *info.lint).collect(); - - store.register_lints(&lints); - groups.register(store); -} - /// Register all lints and lint groups with the rustc lint store /// /// Used in `./src/driver.rs`. #[expect(clippy::too_many_lines)] -pub fn register_lints(store: &mut rustc_lint::LintStore, conf: &'static Conf) { - register_categories(store); - +pub fn register_lint_passes(store: &mut rustc_lint::LintStore, conf: &'static Conf) { for (old_name, new_name) in deprecated_lints::RENAMED { store.register_renamed(old_name, new_name); } diff --git a/src/tools/clippy/clippy_lints/src/loops/same_item_push.rs b/src/tools/clippy/clippy_lints/src/loops/same_item_push.rs index 388034c39f52..e792edbe23e0 100644 --- a/src/tools/clippy/clippy_lints/src/loops/same_item_push.rs +++ b/src/tools/clippy/clippy_lints/src/loops/same_item_push.rs @@ -163,15 +163,14 @@ impl<'tcx> Visitor<'tcx> for SameItemPushVisitor<'_, 'tcx> { StmtKind::Expr(expr) | StmtKind::Semi(expr) => self.visit_expr(expr), _ => {}, } + } + // Current statement is a push ...check whether another + // push had been previously done + else if self.vec_push.is_none() { + self.vec_push = vec_push_option; } else { - // Current statement is a push ...check whether another - // push had been previously done - if self.vec_push.is_none() { - self.vec_push = vec_push_option; - } else { - // There are multiple pushes ... don't lint - self.multiple_pushes = true; - } + // There are multiple pushes ... don't lint + self.multiple_pushes = true; } } } diff --git a/src/tools/clippy/clippy_lints/src/manual_let_else.rs b/src/tools/clippy/clippy_lints/src/manual_let_else.rs index 0b3bec714c0e..9ff82cdcb664 100644 --- a/src/tools/clippy/clippy_lints/src/manual_let_else.rs +++ b/src/tools/clippy/clippy_lints/src/manual_let_else.rs @@ -13,7 +13,6 @@ use rustc_errors::Applicability; use rustc_hir::def::{CtorOf, DefKind, Res}; use rustc_hir::{Arm, Expr, ExprKind, HirId, MatchSource, Pat, PatExpr, PatExprKind, PatKind, QPath, Stmt, StmtKind}; use rustc_lint::{LateContext, LintContext}; - use rustc_span::Span; use rustc_span::symbol::{Symbol, sym}; use std::slice; diff --git a/src/tools/clippy/clippy_lints/src/manual_option_as_slice.rs b/src/tools/clippy/clippy_lints/src/manual_option_as_slice.rs index b55c11f2d5b6..922db174e3d4 100644 --- a/src/tools/clippy/clippy_lints/src/manual_option_as_slice.rs +++ b/src/tools/clippy/clippy_lints/src/manual_option_as_slice.rs @@ -1,7 +1,7 @@ use clippy_config::Conf; use clippy_utils::diagnostics::{span_lint, span_lint_and_sugg}; use clippy_utils::msrvs::Msrv; -use clippy_utils::{is_none_arm, msrvs, paths, peel_hir_expr_refs, sym}; +use clippy_utils::{is_none_arm, msrvs, peel_hir_expr_refs, sym}; use rustc_errors::Applicability; use rustc_hir::def::{DefKind, Res}; use rustc_hir::{Arm, Expr, ExprKind, LangItem, Pat, PatKind, QPath, is_range_literal}; @@ -220,5 +220,5 @@ fn is_empty_slice(cx: &LateContext<'_>, expr: &Expr<'_>) -> bool { } fn is_slice_from_ref(cx: &LateContext<'_>, expr: &Expr<'_>) -> bool { - paths::SLICE_FROM_REF.matches_path(cx, expr) + clippy_utils::is_path_diagnostic_item(cx, expr, sym::slice_from_ref) } diff --git a/src/tools/clippy/clippy_lints/src/matches/manual_ok_err.rs b/src/tools/clippy/clippy_lints/src/matches/manual_ok_err.rs index 4959908dad63..edbb556fd976 100644 --- a/src/tools/clippy/clippy_lints/src/matches/manual_ok_err.rs +++ b/src/tools/clippy/clippy_lints/src/matches/manual_ok_err.rs @@ -1,9 +1,9 @@ use clippy_utils::diagnostics::span_lint_and_sugg; use clippy_utils::source::{indent_of, reindent_multiline}; use clippy_utils::sugg::Sugg; -use clippy_utils::ty::option_arg_ty; +use clippy_utils::ty::{option_arg_ty, peel_mid_ty_refs_is_mutable}; use clippy_utils::{get_parent_expr, is_res_lang_ctor, path_res, peel_blocks, span_contains_comment}; -use rustc_ast::BindingMode; +use rustc_ast::{BindingMode, Mutability}; use rustc_errors::Applicability; use rustc_hir::LangItem::{OptionNone, OptionSome, ResultErr}; use rustc_hir::def::{DefKind, Res}; @@ -133,7 +133,21 @@ fn apply_lint(cx: &LateContext<'_>, expr: &Expr<'_>, scrutinee: &Expr<'_>, is_ok Applicability::MachineApplicable }; let scrut = Sugg::hir_with_applicability(cx, scrutinee, "..", &mut app).maybe_paren(); - let sugg = format!("{scrut}.{method}()"); + + let scrutinee_ty = cx.typeck_results().expr_ty(scrutinee); + let (_, n_ref, mutability) = peel_mid_ty_refs_is_mutable(scrutinee_ty); + let prefix = if n_ref > 0 { + if mutability == Mutability::Mut { + ".as_mut()" + } else { + ".as_ref()" + } + } else { + "" + }; + + let sugg = format!("{scrut}{prefix}.{method}()"); + // If the expression being expanded is the `if …` part of an `else if …`, it must be blockified. let sugg = if let Some(parent_expr) = get_parent_expr(cx, expr) && let ExprKind::If(_, _, Some(else_part)) = parent_expr.kind diff --git a/src/tools/clippy/clippy_lints/src/matches/match_wild_enum.rs b/src/tools/clippy/clippy_lints/src/matches/match_wild_enum.rs index 24b4a6758004..70a03ff93762 100644 --- a/src/tools/clippy/clippy_lints/src/matches/match_wild_enum.rs +++ b/src/tools/clippy/clippy_lints/src/matches/match_wild_enum.rs @@ -1,4 +1,5 @@ use clippy_utils::diagnostics::{span_lint_and_sugg, span_lint_and_then}; +use clippy_utils::source::SpanRangeExt; use clippy_utils::ty::is_type_diagnostic_item; use clippy_utils::{is_refutable, peel_hir_pat_refs, recurse_or_patterns}; use rustc_errors::Applicability; @@ -116,11 +117,12 @@ pub(crate) fn check(cx: &LateContext<'_>, ex: &Expr<'_>, arms: &[Arm<'_>]) { let format_suggestion = |variant: &VariantDef| { format!( "{}{}{}{}", - if let Some(ident) = wildcard_ident { - format!("{} @ ", ident.name) - } else { - String::new() - }, + wildcard_ident.map_or(String::new(), |ident| { + ident + .span + .get_source_text(cx) + .map_or_else(|| format!("{} @ ", ident.name), |s| format!("{s} @ ")) + }), if let CommonPrefixSearcher::Path(path_prefix) = path_prefix { let mut s = String::new(); for seg in path_prefix { @@ -138,7 +140,7 @@ pub(crate) fn check(cx: &LateContext<'_>, ex: &Expr<'_>, arms: &[Arm<'_>]) { Some(CtorKind::Fn) if variant.fields.len() == 1 => "(_)", Some(CtorKind::Fn) => "(..)", Some(CtorKind::Const) => "", - None => "{ .. }", + None => " { .. }", } ) }; diff --git a/src/tools/clippy/clippy_lints/src/methods/io_other_error.rs b/src/tools/clippy/clippy_lints/src/methods/io_other_error.rs index ec4b9c7ae2ee..9276261606e1 100644 --- a/src/tools/clippy/clippy_lints/src/methods/io_other_error.rs +++ b/src/tools/clippy/clippy_lints/src/methods/io_other_error.rs @@ -1,6 +1,6 @@ use clippy_utils::diagnostics::span_lint_and_then; use clippy_utils::msrvs::{self, Msrv}; -use clippy_utils::{expr_or_init, paths}; +use clippy_utils::{expr_or_init, is_path_diagnostic_item, sym}; use rustc_errors::Applicability; use rustc_hir::{Expr, ExprKind, QPath}; use rustc_lint::LateContext; @@ -10,8 +10,11 @@ pub(super) fn check(cx: &LateContext<'_>, expr: &Expr<'_>, path: &Expr<'_>, args && !expr.span.from_expansion() && !error_kind.span.from_expansion() && let ExprKind::Path(QPath::TypeRelative(_, new_segment)) = path.kind - && paths::IO_ERROR_NEW.matches_path(cx, path) - && paths::IO_ERRORKIND_OTHER_CTOR.matches_path(cx, expr_or_init(cx, error_kind)) + && is_path_diagnostic_item(cx, path, sym::io_error_new) + && let ExprKind::Path(QPath::Resolved(_, init_path)) = &expr_or_init(cx, error_kind).kind + && let [.., error_kind_ty, error_kind_variant] = init_path.segments + && cx.tcx.is_diagnostic_item(sym::io_errorkind, error_kind_ty.res.def_id()) + && error_kind_variant.ident.name == sym::Other && msrv.meets(cx, msrvs::IO_ERROR_OTHER) { span_lint_and_then( diff --git a/src/tools/clippy/clippy_lints/src/methods/mod.rs b/src/tools/clippy/clippy_lints/src/methods/mod.rs index 347960e0003d..f2dabdd34387 100644 --- a/src/tools/clippy/clippy_lints/src/methods/mod.rs +++ b/src/tools/clippy/clippy_lints/src/methods/mod.rs @@ -4426,7 +4426,7 @@ declare_clippy_lint! { /// ```no_run /// use std::io::{BufReader, Read}; /// use std::fs::File; - /// let file = BufReader::new(std::fs::File::open("./bytes.txt").unwrap()); + /// let file = BufReader::new(File::open("./bytes.txt").unwrap()); /// file.bytes(); /// ``` #[clippy::version = "1.87.0"] diff --git a/src/tools/clippy/clippy_lints/src/methods/or_fun_call.rs b/src/tools/clippy/clippy_lints/src/methods/or_fun_call.rs index 7bdd999bbbad..2139466ce746 100644 --- a/src/tools/clippy/clippy_lints/src/methods/or_fun_call.rs +++ b/src/tools/clippy/clippy_lints/src/methods/or_fun_call.rs @@ -136,7 +136,7 @@ pub(super) fn check<'tcx>( fun_span: Option, ) -> bool { // (path, fn_has_argument, methods, suffix) - const KNOW_TYPES: [(Symbol, bool, &[Symbol], &str); 4] = [ + const KNOW_TYPES: [(Symbol, bool, &[Symbol], &str); 5] = [ (sym::BTreeEntry, false, &[sym::or_insert], "with"), (sym::HashMapEntry, false, &[sym::or_insert], "with"), ( @@ -145,16 +145,17 @@ pub(super) fn check<'tcx>( &[sym::map_or, sym::ok_or, sym::or, sym::unwrap_or], "else", ), - (sym::Result, true, &[sym::or, sym::unwrap_or], "else"), + (sym::Option, false, &[sym::get_or_insert], "with"), + (sym::Result, true, &[sym::map_or, sym::or, sym::unwrap_or], "else"), ]; if KNOW_TYPES.iter().any(|k| k.2.contains(&name)) && switch_to_lazy_eval(cx, arg) && !contains_return(arg) && let self_ty = cx.typeck_results().expr_ty(self_expr) - && let Some(&(_, fn_has_arguments, poss, suffix)) = - KNOW_TYPES.iter().find(|&&i| is_type_diagnostic_item(cx, self_ty, i.0)) - && poss.contains(&name) + && let Some(&(_, fn_has_arguments, _, suffix)) = KNOW_TYPES + .iter() + .find(|&&i| is_type_diagnostic_item(cx, self_ty, i.0) && i.2.contains(&name)) { let ctxt = span.ctxt(); let mut app = Applicability::HasPlaceholders; diff --git a/src/tools/clippy/clippy_lints/src/missing_inline.rs b/src/tools/clippy/clippy_lints/src/missing_inline.rs index f835bbb7c561..25c95d234363 100644 --- a/src/tools/clippy/clippy_lints/src/missing_inline.rs +++ b/src/tools/clippy/clippy_lints/src/missing_inline.rs @@ -1,5 +1,5 @@ use clippy_utils::diagnostics::span_lint; -use rustc_attr_data_structures::{find_attr, AttributeKind}; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_hir as hir; use rustc_hir::Attribute; use rustc_lint::{LateContext, LateLintPass, LintContext}; diff --git a/src/tools/clippy/clippy_lints/src/needless_borrows_for_generic_args.rs b/src/tools/clippy/clippy_lints/src/needless_borrows_for_generic_args.rs index 8d2f8029112f..17d251a7bbb9 100644 --- a/src/tools/clippy/clippy_lints/src/needless_borrows_for_generic_args.rs +++ b/src/tools/clippy/clippy_lints/src/needless_borrows_for_generic_args.rs @@ -161,7 +161,7 @@ fn path_has_args(p: &QPath<'_>) -> bool { /// - `Copy` itself, or /// - the only use of a mutable reference, or /// - not a variable (created by a function call) -#[expect(clippy::too_many_arguments)] +#[expect(clippy::too_many_arguments, clippy::too_many_lines)] fn needless_borrow_count<'tcx>( cx: &LateContext<'tcx>, possible_borrowers: &mut Vec<(LocalDefId, PossibleBorrowerMap<'tcx, 'tcx>)>, @@ -232,11 +232,11 @@ fn needless_borrow_count<'tcx>( let mut args_with_referent_ty = callee_args.to_vec(); let mut check_reference_and_referent = |reference: &Expr<'tcx>, referent: &Expr<'tcx>| { - if let ExprKind::Field(base, _) = &referent.kind { - let base_ty = cx.typeck_results().expr_ty(base); - if drop_trait_def_id.is_some_and(|id| implements_trait(cx, base_ty, id, &[])) { - return false; - } + if let ExprKind::Field(base, _) = &referent.kind + && let base_ty = cx.typeck_results().expr_ty(base) + && drop_trait_def_id.is_some_and(|id| implements_trait(cx, base_ty, id, &[])) + { + return false; } let referent_ty = cx.typeck_results().expr_ty(referent); diff --git a/src/tools/clippy/clippy_lints/src/needless_parens_on_range_literals.rs b/src/tools/clippy/clippy_lints/src/needless_parens_on_range_literals.rs index 8a62106377c5..021a11593f3a 100644 --- a/src/tools/clippy/clippy_lints/src/needless_parens_on_range_literals.rs +++ b/src/tools/clippy/clippy_lints/src/needless_parens_on_range_literals.rs @@ -5,7 +5,6 @@ use clippy_utils::source::{snippet, snippet_with_applicability}; use rustc_ast::ast; use rustc_errors::Applicability; use rustc_hir::{Expr, ExprKind}; - use rustc_lint::{LateContext, LateLintPass}; use rustc_session::declare_lint_pass; diff --git a/src/tools/clippy/clippy_lints/src/needless_pass_by_value.rs b/src/tools/clippy/clippy_lints/src/needless_pass_by_value.rs index 9aede1dec934..c97ecce75b46 100644 --- a/src/tools/clippy/clippy_lints/src/needless_pass_by_value.rs +++ b/src/tools/clippy/clippy_lints/src/needless_pass_by_value.rs @@ -124,8 +124,10 @@ impl<'tcx> LateLintPass<'tcx> for NeedlessPassByValue { // Note that we do not want to deal with qualified predicates here. match pred.kind().no_bound_vars() { Some(ty::ClauseKind::Trait(pred)) - if pred.def_id() != sized_trait && pred.def_id() != meta_sized_trait - => Some(pred), + if pred.def_id() != sized_trait && pred.def_id() != meta_sized_trait => + { + Some(pred) + }, _ => None, } }) diff --git a/src/tools/clippy/clippy_lints/src/no_mangle_with_rust_abi.rs b/src/tools/clippy/clippy_lints/src/no_mangle_with_rust_abi.rs index b71dde906918..dee8efeb2910 100644 --- a/src/tools/clippy/clippy_lints/src/no_mangle_with_rust_abi.rs +++ b/src/tools/clippy/clippy_lints/src/no_mangle_with_rust_abi.rs @@ -1,8 +1,9 @@ use clippy_utils::diagnostics::span_lint_and_then; use clippy_utils::source::{snippet, snippet_with_applicability}; use rustc_abi::ExternAbi; +use rustc_attr_data_structures::AttributeKind; use rustc_errors::Applicability; -use rustc_hir::{Item, ItemKind}; +use rustc_hir::{Attribute, Item, ItemKind}; use rustc_lint::{LateContext, LateLintPass}; use rustc_session::declare_lint_pass; use rustc_span::{BytePos, Pos}; @@ -44,8 +45,7 @@ impl<'tcx> LateLintPass<'tcx> for NoMangleWithRustAbi { let mut app = Applicability::MaybeIncorrect; let fn_snippet = snippet_with_applicability(cx, fn_sig.span.with_hi(ident.span.lo()), "..", &mut app); for attr in attrs { - if let Some(ident) = attr.ident() - && ident.name == rustc_span::sym::no_mangle + if let Attribute::Parsed(AttributeKind::NoMangle(attr_span)) = attr && fn_sig.header.abi == ExternAbi::Rust && let Some((fn_attrs, _)) = fn_snippet.rsplit_once("fn") && !fn_attrs.contains("extern") @@ -54,7 +54,7 @@ impl<'tcx> LateLintPass<'tcx> for NoMangleWithRustAbi { .span .with_lo(fn_sig.span.lo() + BytePos::from_usize(fn_attrs.len())) .shrink_to_lo(); - let attr_snippet = snippet(cx, attr.span(), ".."); + let attr_snippet = snippet(cx, *attr_span, ".."); span_lint_and_then( cx, diff --git a/src/tools/clippy/clippy_lints/src/non_copy_const.rs b/src/tools/clippy/clippy_lints/src/non_copy_const.rs index a27c6aa75e36..5f10e1968f1d 100644 --- a/src/tools/clippy/clippy_lints/src/non_copy_const.rs +++ b/src/tools/clippy/clippy_lints/src/non_copy_const.rs @@ -617,7 +617,7 @@ impl<'tcx> NonCopyConst<'tcx> { // Then a type check. Note we only check the type here as the result // gets cached. - let ty = EarlyBinder::bind(typeck.expr_ty(src_expr)).instantiate(tcx, init_args); + let ty = typeck.expr_ty(src_expr); // Normalized as we need to check if this is an array later. let ty = tcx.try_normalize_erasing_regions(typing_env, ty).unwrap_or(ty); if self.is_ty_freeze(tcx, typing_env, ty).is_freeze() { diff --git a/src/tools/clippy/clippy_lints/src/operators/identity_op.rs b/src/tools/clippy/clippy_lints/src/operators/identity_op.rs index e1fd09549a4b..3efbb8963587 100644 --- a/src/tools/clippy/clippy_lints/src/operators/identity_op.rs +++ b/src/tools/clippy/clippy_lints/src/operators/identity_op.rs @@ -1,12 +1,13 @@ -use clippy_utils::consts::{ConstEvalCtxt, Constant, FullInt}; +use clippy_utils::consts::{ConstEvalCtxt, Constant, FullInt, integer_const, is_zero_integer_const}; use clippy_utils::diagnostics::span_lint_and_sugg; use clippy_utils::source::snippet_with_applicability; -use clippy_utils::{clip, peel_hir_expr_refs, unsext}; +use clippy_utils::{ExprUseNode, clip, expr_use_ctxt, peel_hir_expr_refs, unsext}; use rustc_errors::Applicability; -use rustc_hir::{BinOpKind, Expr, ExprKind, Node}; +use rustc_hir::def::{DefKind, Res}; +use rustc_hir::{BinOpKind, Expr, ExprKind, Node, Path, QPath}; use rustc_lint::LateContext; use rustc_middle::ty; -use rustc_span::Span; +use rustc_span::{Span, kw}; use super::IDENTITY_OP; @@ -17,7 +18,7 @@ pub(crate) fn check<'tcx>( left: &'tcx Expr<'_>, right: &'tcx Expr<'_>, ) { - if !is_allowed(cx, op, left, right) { + if !is_allowed(cx, expr, op, left, right) { return; } @@ -165,14 +166,27 @@ fn needs_parenthesis(cx: &LateContext<'_>, binary: &Expr<'_>, child: &Expr<'_>) Parens::Needed } -fn is_allowed(cx: &LateContext<'_>, cmp: BinOpKind, left: &Expr<'_>, right: &Expr<'_>) -> bool { +fn is_allowed<'tcx>( + cx: &LateContext<'tcx>, + expr: &'tcx Expr<'tcx>, + cmp: BinOpKind, + left: &Expr<'tcx>, + right: &Expr<'tcx>, +) -> bool { + // Exclude case where the left or right side is associated function call returns a type which is + // `Self` that is not given explicitly, and the expression is not a let binding's init + // expression and the let binding has a type annotation, or a function's return value. + if (is_assoc_fn_without_type_instance(cx, left) || is_assoc_fn_without_type_instance(cx, right)) + && !is_expr_used_with_type_annotation(cx, expr) + { + return false; + } + // This lint applies to integers and their references cx.typeck_results().expr_ty(left).peel_refs().is_integral() && cx.typeck_results().expr_ty(right).peel_refs().is_integral() // `1 << 0` is a common pattern in bit manipulation code - && !(cmp == BinOpKind::Shl - && ConstEvalCtxt::new(cx).eval_simple(right) == Some(Constant::Int(0)) - && ConstEvalCtxt::new(cx).eval_simple(left) == Some(Constant::Int(1))) + && !(cmp == BinOpKind::Shl && is_zero_integer_const(cx, right) && integer_const(cx, left) == Some(1)) } fn check_remainder(cx: &LateContext<'_>, left: &Expr<'_>, right: &Expr<'_>, span: Span, arg: Span) { @@ -234,3 +248,47 @@ fn span_ineffective_operation( applicability, ); } + +fn is_expr_used_with_type_annotation<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) -> bool { + match expr_use_ctxt(cx, expr).use_node(cx) { + ExprUseNode::LetStmt(letstmt) => letstmt.ty.is_some(), + ExprUseNode::Return(_) => true, + _ => false, + } +} + +/// Check if the expression is an associated function without a type instance. +/// Example: +/// ``` +/// trait Def { +/// fn def() -> Self; +/// } +/// impl Def for usize { +/// fn def() -> Self { +/// 0 +/// } +/// } +/// fn test() { +/// let _ = 0usize + &Default::default(); +/// let _ = 0usize + &Def::def(); +/// } +/// ``` +fn is_assoc_fn_without_type_instance<'tcx>(cx: &LateContext<'tcx>, expr: &Expr<'tcx>) -> bool { + if let ExprKind::Call(func, _) = peel_hir_expr_refs(expr).0.kind + && let ExprKind::Path(QPath::Resolved( + // If it's not None, don't need to go further. + None, + Path { + res: Res::Def(DefKind::AssocFn, def_id), + .. + }, + )) = func.kind + && let output_ty = cx.tcx.fn_sig(def_id).instantiate_identity().skip_binder().output() + && let ty::Param(ty::ParamTy { + name: kw::SelfUpper, .. + }) = output_ty.kind() + { + return true; + } + false +} diff --git a/src/tools/clippy/clippy_lints/src/operators/manual_is_multiple_of.rs b/src/tools/clippy/clippy_lints/src/operators/manual_is_multiple_of.rs new file mode 100644 index 000000000000..821178a43158 --- /dev/null +++ b/src/tools/clippy/clippy_lints/src/operators/manual_is_multiple_of.rs @@ -0,0 +1,66 @@ +use clippy_utils::consts::is_zero_integer_const; +use clippy_utils::diagnostics::span_lint_and_sugg; +use clippy_utils::msrvs::{self, Msrv}; +use clippy_utils::sugg::Sugg; +use rustc_ast::BinOpKind; +use rustc_errors::Applicability; +use rustc_hir::{Expr, ExprKind}; +use rustc_lint::LateContext; +use rustc_middle::ty; + +use super::MANUAL_IS_MULTIPLE_OF; + +pub(super) fn check<'tcx>( + cx: &LateContext<'tcx>, + expr: &Expr<'_>, + op: BinOpKind, + lhs: &'tcx Expr<'tcx>, + rhs: &'tcx Expr<'tcx>, + msrv: Msrv, +) { + if msrv.meets(cx, msrvs::UNSIGNED_IS_MULTIPLE_OF) + && let Some(operand) = uint_compare_to_zero(cx, op, lhs, rhs) + && let ExprKind::Binary(operand_op, operand_left, operand_right) = operand.kind + && operand_op.node == BinOpKind::Rem + { + let mut app = Applicability::MachineApplicable; + let divisor = Sugg::hir_with_applicability(cx, operand_right, "_", &mut app); + span_lint_and_sugg( + cx, + MANUAL_IS_MULTIPLE_OF, + expr.span, + "manual implementation of `.is_multiple_of()`", + "replace with", + format!( + "{}{}.is_multiple_of({divisor})", + if op == BinOpKind::Eq { "" } else { "!" }, + Sugg::hir_with_applicability(cx, operand_left, "_", &mut app).maybe_paren() + ), + app, + ); + } +} + +// If we have a `x == 0`, `x != 0` or `x > 0` (or the reverted ones), return the non-zero operand +fn uint_compare_to_zero<'tcx>( + cx: &LateContext<'tcx>, + op: BinOpKind, + lhs: &'tcx Expr<'tcx>, + rhs: &'tcx Expr<'tcx>, +) -> Option<&'tcx Expr<'tcx>> { + let operand = if matches!(lhs.kind, ExprKind::Binary(..)) + && matches!(op, BinOpKind::Eq | BinOpKind::Ne | BinOpKind::Gt) + && is_zero_integer_const(cx, rhs) + { + lhs + } else if matches!(rhs.kind, ExprKind::Binary(..)) + && matches!(op, BinOpKind::Eq | BinOpKind::Ne | BinOpKind::Lt) + && is_zero_integer_const(cx, lhs) + { + rhs + } else { + return None; + }; + + matches!(cx.typeck_results().expr_ty_adjusted(operand).kind(), ty::Uint(_)).then_some(operand) +} diff --git a/src/tools/clippy/clippy_lints/src/operators/mod.rs b/src/tools/clippy/clippy_lints/src/operators/mod.rs index 2f4e8e995886..bdbbb3475cd5 100644 --- a/src/tools/clippy/clippy_lints/src/operators/mod.rs +++ b/src/tools/clippy/clippy_lints/src/operators/mod.rs @@ -11,6 +11,7 @@ mod float_cmp; mod float_equality_without_abs; mod identity_op; mod integer_division; +mod manual_is_multiple_of; mod manual_midpoint; mod misrefactored_assign_op; mod modulo_arithmetic; @@ -830,12 +831,42 @@ declare_clippy_lint! { "manual implementation of `midpoint` which can overflow" } +declare_clippy_lint! { + /// ### What it does + /// Checks for manual implementation of `.is_multiple_of()` on + /// unsigned integer types. + /// + /// ### Why is this bad? + /// `a.is_multiple_of(b)` is a clearer way to check for divisibility + /// of `a` by `b`. This expression can never panic. + /// + /// ### Example + /// ```no_run + /// # let (a, b) = (3u64, 4u64); + /// if a % b == 0 { + /// println!("{a} is divisible by {b}"); + /// } + /// ``` + /// Use instead: + /// ```no_run + /// # let (a, b) = (3u64, 4u64); + /// if a.is_multiple_of(b) { + /// println!("{a} is divisible by {b}"); + /// } + /// ``` + #[clippy::version = "1.89.0"] + pub MANUAL_IS_MULTIPLE_OF, + complexity, + "manual implementation of `.is_multiple_of()`" +} + pub struct Operators { arithmetic_context: numeric_arithmetic::Context, verbose_bit_mask_threshold: u64, modulo_arithmetic_allow_comparison_to_zero: bool, msrv: Msrv, } + impl Operators { pub fn new(conf: &'static Conf) -> Self { Self { @@ -874,6 +905,7 @@ impl_lint_pass!(Operators => [ NEEDLESS_BITWISE_BOOL, SELF_ASSIGNMENT, MANUAL_MIDPOINT, + MANUAL_IS_MULTIPLE_OF, ]); impl<'tcx> LateLintPass<'tcx> for Operators { @@ -891,6 +923,7 @@ impl<'tcx> LateLintPass<'tcx> for Operators { identity_op::check(cx, e, op.node, lhs, rhs); needless_bitwise_bool::check(cx, e, op.node, lhs, rhs); manual_midpoint::check(cx, e, op.node, lhs, rhs, self.msrv); + manual_is_multiple_of::check(cx, e, op.node, lhs, rhs, self.msrv); } self.arithmetic_context.check_binary(cx, e, op.node, lhs, rhs); bit_mask::check(cx, e, op.node, lhs, rhs); diff --git a/src/tools/clippy/clippy_lints/src/pass_by_ref_or_value.rs b/src/tools/clippy/clippy_lints/src/pass_by_ref_or_value.rs index e18bdfb34ac8..b8005dfd6f8e 100644 --- a/src/tools/clippy/clippy_lints/src/pass_by_ref_or_value.rs +++ b/src/tools/clippy/clippy_lints/src/pass_by_ref_or_value.rs @@ -3,10 +3,10 @@ use clippy_utils::diagnostics::span_lint_and_sugg; use clippy_utils::source::snippet; use clippy_utils::ty::{for_each_top_level_late_bound_region, is_copy}; use clippy_utils::{is_self, is_self_ty}; -use rustc_attr_data_structures::{find_attr, AttributeKind, InlineAttr}; -use rustc_data_structures::fx::FxHashSet; use core::ops::ControlFlow; use rustc_abi::ExternAbi; +use rustc_attr_data_structures::{AttributeKind, InlineAttr, find_attr}; +use rustc_data_structures::fx::FxHashSet; use rustc_errors::Applicability; use rustc_hir as hir; use rustc_hir::intravisit::FnKind; diff --git a/src/tools/clippy/clippy_lints/src/question_mark.rs b/src/tools/clippy/clippy_lints/src/question_mark.rs index c02e5e0621c9..de12a25b03df 100644 --- a/src/tools/clippy/clippy_lints/src/question_mark.rs +++ b/src/tools/clippy/clippy_lints/src/question_mark.rs @@ -142,6 +142,7 @@ fn check_let_some_else_return_none(cx: &LateContext<'_>, stmt: &Stmt<'_>) { && let Some(ret) = find_let_else_ret_expression(els) && let Some(inner_pat) = pat_and_expr_can_be_question_mark(cx, pat, ret) && !span_contains_comment(cx.tcx.sess.source_map(), els.span) + && !span_contains_cfg(cx, els.span) { let mut applicability = Applicability::MaybeIncorrect; let init_expr_str = Sugg::hir_with_applicability(cx, init_expr, "..", &mut applicability).maybe_paren(); diff --git a/src/tools/clippy/clippy_lints/src/question_mark_used.rs b/src/tools/clippy/clippy_lints/src/question_mark_used.rs index 96ea485d7693..7bbbd0d25acf 100644 --- a/src/tools/clippy/clippy_lints/src/question_mark_used.rs +++ b/src/tools/clippy/clippy_lints/src/question_mark_used.rs @@ -1,5 +1,4 @@ use clippy_utils::diagnostics::span_lint_and_then; - use clippy_utils::macros::span_is_local; use rustc_hir::{Expr, ExprKind, MatchSource}; use rustc_lint::{LateContext, LateLintPass}; diff --git a/src/tools/clippy/clippy_lints/src/read_zero_byte_vec.rs b/src/tools/clippy/clippy_lints/src/read_zero_byte_vec.rs index 6b1dc864fb7a..acd840401c6b 100644 --- a/src/tools/clippy/clippy_lints/src/read_zero_byte_vec.rs +++ b/src/tools/clippy/clippy_lints/src/read_zero_byte_vec.rs @@ -3,11 +3,10 @@ use clippy_utils::higher::{VecInitKind, get_vec_init_kind}; use clippy_utils::source::snippet; use clippy_utils::{get_enclosing_block, sym}; -use hir::{Expr, ExprKind, HirId, LetStmt, PatKind, PathSegment, QPath, StmtKind}; use rustc_errors::Applicability; -use rustc_hir as hir; use rustc_hir::def::Res; use rustc_hir::intravisit::{Visitor, walk_expr}; +use rustc_hir::{self as hir, Expr, ExprKind, HirId, LetStmt, PatKind, PathSegment, QPath, StmtKind}; use rustc_lint::{LateContext, LateLintPass}; use rustc_session::declare_lint_pass; diff --git a/src/tools/clippy/clippy_lints/src/return_self_not_must_use.rs b/src/tools/clippy/clippy_lints/src/return_self_not_must_use.rs index 07ae92fa9843..25929b853af8 100644 --- a/src/tools/clippy/clippy_lints/src/return_self_not_must_use.rs +++ b/src/tools/clippy/clippy_lints/src/return_self_not_must_use.rs @@ -1,12 +1,13 @@ use clippy_utils::diagnostics::span_lint_and_help; use clippy_utils::ty::is_must_use_ty; use clippy_utils::{nth_arg, return_ty}; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_hir::def_id::LocalDefId; use rustc_hir::intravisit::FnKind; use rustc_hir::{Body, FnDecl, OwnerId, TraitItem, TraitItemKind}; use rustc_lint::{LateContext, LateLintPass, LintContext}; use rustc_session::declare_lint_pass; -use rustc_span::{Span, sym}; +use rustc_span::Span; declare_clippy_lint! { /// ### What it does @@ -74,7 +75,10 @@ fn check_method(cx: &LateContext<'_>, decl: &FnDecl<'_>, fn_def: LocalDefId, spa // We only show this warning for public exported methods. && cx.effective_visibilities.is_exported(fn_def) // We don't want to emit this lint if the `#[must_use]` attribute is already there. - && !cx.tcx.hir_attrs(owner_id.into()).iter().any(|attr| attr.has_name(sym::must_use)) + && !find_attr!( + cx.tcx.hir_attrs(owner_id.into()), + AttributeKind::MustUse { .. } + ) && cx.tcx.visibility(fn_def.to_def_id()).is_public() && let ret_ty = return_ty(cx, owner_id) && let self_arg = nth_arg(cx, owner_id, 0) diff --git a/src/tools/clippy/clippy_lints/src/single_component_path_imports.rs b/src/tools/clippy/clippy_lints/src/single_component_path_imports.rs index 62939912304b..38cf7e3822a1 100644 --- a/src/tools/clippy/clippy_lints/src/single_component_path_imports.rs +++ b/src/tools/clippy/clippy_lints/src/single_component_path_imports.rs @@ -219,22 +219,21 @@ impl SingleComponentPathImports { } } } - } else { - // keep track of `use self::some_module` usages - if segments[0].ident.name == kw::SelfLower { - // simple case such as `use self::module::SomeStruct` - if segments.len() > 1 { - imports_reused_with_self.push(segments[1].ident.name); - return; - } + } + // keep track of `use self::some_module` usages + else if segments[0].ident.name == kw::SelfLower { + // simple case such as `use self::module::SomeStruct` + if segments.len() > 1 { + imports_reused_with_self.push(segments[1].ident.name); + return; + } - // nested case such as `use self::{module1::Struct1, module2::Struct2}` - if let UseTreeKind::Nested { items, .. } = &use_tree.kind { - for tree in items { - let segments = &tree.0.prefix.segments; - if !segments.is_empty() { - imports_reused_with_self.push(segments[0].ident.name); - } + // nested case such as `use self::{module1::Struct1, module2::Struct2}` + if let UseTreeKind::Nested { items, .. } = &use_tree.kind { + for tree in items { + let segments = &tree.0.prefix.segments; + if !segments.is_empty() { + imports_reused_with_self.push(segments[0].ident.name); } } } diff --git a/src/tools/clippy/clippy_lints/src/single_range_in_vec_init.rs b/src/tools/clippy/clippy_lints/src/single_range_in_vec_init.rs index 54d09ff9ee40..dda2f8cc1d00 100644 --- a/src/tools/clippy/clippy_lints/src/single_range_in_vec_init.rs +++ b/src/tools/clippy/clippy_lints/src/single_range_in_vec_init.rs @@ -3,7 +3,7 @@ use clippy_utils::higher::VecArgs; use clippy_utils::macros::root_macro_call_first_node; use clippy_utils::source::SpanRangeExt; use clippy_utils::ty::implements_trait; -use clippy_utils::{is_no_std_crate, paths}; +use clippy_utils::{is_no_std_crate, sym}; use rustc_ast::{LitIntType, LitKind, UintTy}; use rustc_errors::Applicability; use rustc_hir::{Expr, ExprKind, LangItem, QPath, StructTailExpr}; @@ -100,7 +100,7 @@ impl LateLintPass<'_> for SingleRangeInVecInit { && let Some(start_snippet) = start.span.get_source_text(cx) && let Some(end_snippet) = end.span.get_source_text(cx) { - let should_emit_every_value = if let Some(step_def_id) = paths::ITER_STEP.only(cx) + let should_emit_every_value = if let Some(step_def_id) = cx.tcx.get_diagnostic_item(sym::range_step) && implements_trait(cx, ty, step_def_id, &[]) { true diff --git a/src/tools/clippy/clippy_lints/src/to_digit_is_some.rs b/src/tools/clippy/clippy_lints/src/to_digit_is_some.rs index 7d7d74f27b3c..3e847543e1c1 100644 --- a/src/tools/clippy/clippy_lints/src/to_digit_is_some.rs +++ b/src/tools/clippy/clippy_lints/src/to_digit_is_some.rs @@ -2,7 +2,7 @@ use clippy_config::Conf; use clippy_utils::diagnostics::span_lint_and_sugg; use clippy_utils::msrvs::{self, Msrv}; use clippy_utils::source::snippet_with_applicability; -use clippy_utils::{is_in_const_context, paths, sym}; +use clippy_utils::{is_in_const_context, is_path_diagnostic_item, sym}; use rustc_errors::Applicability; use rustc_hir as hir; use rustc_lint::{LateContext, LateLintPass}; @@ -62,7 +62,7 @@ impl<'tcx> LateLintPass<'tcx> for ToDigitIsSome { } }, hir::ExprKind::Call(to_digits_call, [char_arg, radix_arg]) => { - if paths::CHAR_TO_DIGIT.matches_path(cx, to_digits_call) { + if is_path_diagnostic_item(cx, to_digits_call, sym::char_to_digit) { Some((false, char_arg, radix_arg)) } else { None diff --git a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs index a2938c86c76a..92427473a8ee 100644 --- a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs +++ b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs @@ -606,32 +606,31 @@ fn span_from_macro_expansion_has_safety_comment(cx: &LateContext<'_>, span: Span let ctxt = span.ctxt(); if ctxt == SyntaxContext::root() { HasSafetyComment::Maybe - } else { - // From a macro expansion. Get the text from the start of the macro declaration to start of the - // unsafe block. - // macro_rules! foo { () => { stuff }; (x) => { unsafe { stuff } }; } - // ^--------------------------------------------^ - if let Ok(unsafe_line) = source_map.lookup_line(span.lo()) - && let Ok(macro_line) = source_map.lookup_line(ctxt.outer_expn_data().def_site.lo()) - && Arc::ptr_eq(&unsafe_line.sf, ¯o_line.sf) - && let Some(src) = unsafe_line.sf.src.as_deref() - { - if macro_line.line < unsafe_line.line { - match text_has_safety_comment( - src, - &unsafe_line.sf.lines()[macro_line.line + 1..=unsafe_line.line], - unsafe_line.sf.start_pos, - ) { - Some(b) => HasSafetyComment::Yes(b), - None => HasSafetyComment::No, - } - } else { - HasSafetyComment::No + } + // From a macro expansion. Get the text from the start of the macro declaration to start of the + // unsafe block. + // macro_rules! foo { () => { stuff }; (x) => { unsafe { stuff } }; } + // ^--------------------------------------------^ + else if let Ok(unsafe_line) = source_map.lookup_line(span.lo()) + && let Ok(macro_line) = source_map.lookup_line(ctxt.outer_expn_data().def_site.lo()) + && Arc::ptr_eq(&unsafe_line.sf, ¯o_line.sf) + && let Some(src) = unsafe_line.sf.src.as_deref() + { + if macro_line.line < unsafe_line.line { + match text_has_safety_comment( + src, + &unsafe_line.sf.lines()[macro_line.line + 1..=unsafe_line.line], + unsafe_line.sf.start_pos, + ) { + Some(b) => HasSafetyComment::Yes(b), + None => HasSafetyComment::No, } } else { - // Problem getting source text. Pretend a comment was found. - HasSafetyComment::Maybe + HasSafetyComment::No } + } else { + // Problem getting source text. Pretend a comment was found. + HasSafetyComment::Maybe } } diff --git a/src/tools/clippy/clippy_lints/src/useless_concat.rs b/src/tools/clippy/clippy_lints/src/useless_concat.rs index 1ed1fbb3b9c6..96845adb04a2 100644 --- a/src/tools/clippy/clippy_lints/src/useless_concat.rs +++ b/src/tools/clippy/clippy_lints/src/useless_concat.rs @@ -1,8 +1,7 @@ use clippy_utils::diagnostics::span_lint_and_sugg; use clippy_utils::macros::macro_backtrace; -use clippy_utils::paths::CONCAT; use clippy_utils::source::snippet_opt; -use clippy_utils::tokenize_with_text; +use clippy_utils::{sym, tokenize_with_text}; use rustc_ast::LitKind; use rustc_errors::Applicability; use rustc_hir::{Expr, ExprKind}; @@ -43,7 +42,7 @@ impl LateLintPass<'_> for UselessConcat { // Get the direct parent of the expression. && let Some(macro_call) = macro_backtrace(expr.span).next() // Check if the `concat` macro from the `core` library. - && CONCAT.matches(cx, macro_call.def_id) + && cx.tcx.is_diagnostic_item(sym::macro_concat, macro_call.def_id) // We get the original code to parse it. && let Some(original_code) = snippet_opt(cx, macro_call.span) // This check allows us to ensure that the code snippet: diff --git a/src/tools/clippy/clippy_utils/Cargo.toml b/src/tools/clippy/clippy_utils/Cargo.toml index 615c0995e8b1..73291aa8cdf7 100644 --- a/src/tools/clippy/clippy_utils/Cargo.toml +++ b/src/tools/clippy/clippy_utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "clippy_utils" -version = "0.1.89" +version = "0.1.90" edition = "2024" description = "Helpful tools for writing lints, provided as they are used in Clippy" repository = "https://github.com/rust-lang/rust-clippy" diff --git a/src/tools/clippy/clippy_utils/README.md b/src/tools/clippy/clippy_utils/README.md index 1aa16e3943c4..649748d1534b 100644 --- a/src/tools/clippy/clippy_utils/README.md +++ b/src/tools/clippy/clippy_utils/README.md @@ -8,7 +8,7 @@ This crate is only guaranteed to build with this `nightly` toolchain: ``` -nightly-2025-06-12 +nightly-2025-06-26 ``` diff --git a/src/tools/clippy/clippy_utils/src/consts.rs b/src/tools/clippy/clippy_utils/src/consts.rs index 1ec5d11384f5..aaa071fd5c93 100644 --- a/src/tools/clippy/clippy_utils/src/consts.rs +++ b/src/tools/clippy/clippy_utils/src/consts.rs @@ -958,3 +958,18 @@ fn field_of_struct<'tcx>( None } } + +/// If `expr` evaluates to an integer constant, return its value. +pub fn integer_const(cx: &LateContext<'_>, expr: &Expr<'_>) -> Option { + if let Some(Constant::Int(value)) = ConstEvalCtxt::new(cx).eval_simple(expr) { + Some(value) + } else { + None + } +} + +/// Check if `expr` evaluates to an integer constant of 0. +#[inline] +pub fn is_zero_integer_const(cx: &LateContext<'_>, expr: &Expr<'_>) -> bool { + integer_const(cx, expr) == Some(0) +} diff --git a/src/tools/clippy/clippy_utils/src/diagnostics.rs b/src/tools/clippy/clippy_utils/src/diagnostics.rs index cd2098a89891..dc240dd067b1 100644 --- a/src/tools/clippy/clippy_utils/src/diagnostics.rs +++ b/src/tools/clippy/clippy_utils/src/diagnostics.rs @@ -109,7 +109,7 @@ pub fn span_lint(cx: &T, lint: &'static Lint, sp: impl Into( }); } -/// Like `span_lint` but with a `note` section instead of a `help` message. +/// Like [`span_lint`] but with a `note` section instead of a `help` message. /// /// The `note` message is presented separately from the main lint message /// and is attached to a specific span: @@ -226,7 +226,7 @@ pub fn span_lint_and_note( }); } -/// Like `span_lint` but allows to add notes, help and suggestions using a closure. +/// Like [`span_lint`] but allows to add notes, help and suggestions using a closure. /// /// If you need to customize your lint output a lot, use this function. /// If you change the signature, remember to update the internal lint `CollapsibleCalls` diff --git a/src/tools/clippy/clippy_utils/src/lib.rs b/src/tools/clippy/clippy_utils/src/lib.rs index c7a2375c8df7..a8b33418c8c0 100644 --- a/src/tools/clippy/clippy_utils/src/lib.rs +++ b/src/tools/clippy/clippy_utils/src/lib.rs @@ -122,7 +122,7 @@ use rustc_span::hygiene::{ExpnKind, MacroKind}; use rustc_span::source_map::SourceMap; use rustc_span::symbol::{Ident, Symbol, kw}; use rustc_span::{InnerSpan, Span}; -use source::walk_span_to_context; +use source::{SpanRangeExt, walk_span_to_context}; use visitors::{Visitable, for_each_unconsumed_temporary}; use crate::consts::{ConstEvalCtxt, Constant, mir_to_const}; @@ -1886,7 +1886,7 @@ pub fn is_must_use_func_call(cx: &LateContext<'_>, expr: &Expr<'_>) -> bool { _ => None, }; - did.is_some_and(|did| cx.tcx.has_attr(did, sym::must_use)) + did.is_some_and(|did| find_attr!(cx.tcx.get_all_attrs(did), AttributeKind::MustUse { .. })) } /// Checks if a function's body represents the identity function. Looks for bodies of the form: @@ -2710,7 +2710,7 @@ impl<'tcx> ExprUseNode<'tcx> { } /// Gets the context an expression's value is used in. -pub fn expr_use_ctxt<'tcx>(cx: &LateContext<'tcx>, e: &'tcx Expr<'tcx>) -> ExprUseCtxt<'tcx> { +pub fn expr_use_ctxt<'tcx>(cx: &LateContext<'tcx>, e: &Expr<'tcx>) -> ExprUseCtxt<'tcx> { let mut adjustments = [].as_slice(); let mut is_ty_unified = false; let mut moved_before_use = false; @@ -2787,6 +2787,19 @@ pub fn span_contains_comment(sm: &SourceMap, span: Span) -> bool { }); } +/// Checks whether a given span has any significant token. A significant token is a non-whitespace +/// token, including comments unless `skip_comments` is set. +/// This is useful to determine if there are any actual code tokens in the span that are omitted in +/// the late pass, such as platform-specific code. +pub fn span_contains_non_whitespace(cx: &impl source::HasSession, span: Span, skip_comments: bool) -> bool { + matches!(span.get_source_text(cx), Some(snippet) if tokenize_with_text(&snippet).any(|(token, _, _)| + match token { + TokenKind::Whitespace => false, + TokenKind::BlockComment { .. } | TokenKind::LineComment { .. } => !skip_comments, + _ => true, + } + )) +} /// Returns all the comments a given span contains /// /// Comments are returned wrapped with their relevant delimiters diff --git a/src/tools/clippy/clippy_utils/src/msrvs.rs b/src/tools/clippy/clippy_utils/src/msrvs.rs index a5e66ad463bb..7a0bef1a9bbb 100644 --- a/src/tools/clippy/clippy_utils/src/msrvs.rs +++ b/src/tools/clippy/clippy_utils/src/msrvs.rs @@ -24,7 +24,7 @@ macro_rules! msrv_aliases { // names may refer to stabilized feature flags or library items msrv_aliases! { 1,88,0 { LET_CHAINS } - 1,87,0 { OS_STR_DISPLAY, INT_MIDPOINT, CONST_CHAR_IS_DIGIT } + 1,87,0 { OS_STR_DISPLAY, INT_MIDPOINT, CONST_CHAR_IS_DIGIT, UNSIGNED_IS_MULTIPLE_OF } 1,85,0 { UINT_FLOAT_MIDPOINT, CONST_SIZE_OF_VAL } 1,84,0 { CONST_OPTION_AS_SLICE, MANUAL_DANGLING_PTR } 1,83,0 { CONST_EXTERN_FN, CONST_FLOAT_BITS_CONV, CONST_FLOAT_CLASSIFY, CONST_MUT_REFS, CONST_UNWRAP } @@ -42,6 +42,7 @@ msrv_aliases! { 1,65,0 { LET_ELSE, POINTER_CAST_CONSTNESS } 1,63,0 { CLONE_INTO, CONST_SLICE_FROM_REF } 1,62,0 { BOOL_THEN_SOME, DEFAULT_ENUM_ATTRIBUTE, CONST_EXTERN_C_FN } + 1,61,0 { CONST_FN_TRAIT_BOUND } 1,60,0 { ABS_DIFF } 1,59,0 { THREAD_LOCAL_CONST_INIT } 1,58,0 { FORMAT_ARGS_CAPTURE, PATTERN_TRAIT_CHAR_ARRAY, CONST_RAW_PTR_DEREF } diff --git a/src/tools/clippy/clippy_utils/src/paths.rs b/src/tools/clippy/clippy_utils/src/paths.rs index f37a609497eb..8bbcb220210a 100644 --- a/src/tools/clippy/clippy_utils/src/paths.rs +++ b/src/tools/clippy/clippy_utils/src/paths.rs @@ -126,15 +126,6 @@ path_macros! { macro_path: PathNS::Macro, } -// Paths in `core`/`alloc`/`std`. This should be avoided and cleaned up by adding diagnostic items. -pub static ALIGN_OF: PathLookup = value_path!(core::mem::align_of); -pub static CHAR_TO_DIGIT: PathLookup = value_path!(char::to_digit); -pub static CONCAT: PathLookup = macro_path!(core::concat); -pub static IO_ERROR_NEW: PathLookup = value_path!(std::io::Error::new); -pub static IO_ERRORKIND_OTHER_CTOR: PathLookup = value_path!(std::io::ErrorKind::Other); -pub static ITER_STEP: PathLookup = type_path!(core::iter::Step); -pub static SLICE_FROM_REF: PathLookup = value_path!(core::slice::from_ref); - // Paths in external crates pub static FUTURES_IO_ASYNCREADEXT: PathLookup = type_path!(futures_util::AsyncReadExt); pub static FUTURES_IO_ASYNCWRITEEXT: PathLookup = type_path!(futures_util::AsyncWriteExt); diff --git a/src/tools/clippy/clippy_utils/src/qualify_min_const_fn.rs b/src/tools/clippy/clippy_utils/src/qualify_min_const_fn.rs index e629012b187c..8f1ebb8ada6e 100644 --- a/src/tools/clippy/clippy_utils/src/qualify_min_const_fn.rs +++ b/src/tools/clippy/clippy_utils/src/qualify_min_const_fn.rs @@ -32,6 +32,21 @@ pub fn is_min_const_fn<'tcx>(cx: &LateContext<'tcx>, body: &Body<'tcx>, msrv: Ms for local in &body.local_decls { check_ty(cx, local.ty, local.source_info.span, msrv)?; } + if !msrv.meets(cx, msrvs::CONST_FN_TRAIT_BOUND) + && let Some(sized_did) = cx.tcx.lang_items().sized_trait() + && let Some(meta_sized_did) = cx.tcx.lang_items().meta_sized_trait() + && cx.tcx.param_env(def_id).caller_bounds().iter().any(|bound| { + bound.as_trait_clause().is_some_and(|clause| { + let did = clause.def_id(); + did != sized_did && did != meta_sized_did + }) + }) + { + return Err(( + body.span, + "non-`Sized` trait clause before `const_fn_trait_bound` is stabilized".into(), + )); + } // impl trait is gone in MIR, so check the return type manually check_ty( cx, @@ -436,7 +451,7 @@ fn is_ty_const_destruct<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>, body: &Body<'tcx> // FIXME(const_trait_impl, fee1-dead) revert to const destruct once it works again #[expect(unused)] fn is_ty_const_destruct_unused<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>, body: &Body<'tcx>) -> bool { - // If this doesn't need drop at all, then don't select `~const Destruct`. + // If this doesn't need drop at all, then don't select `[const] Destruct`. if !ty.needs_drop(tcx, body.typing_env(tcx)) { return false; } diff --git a/src/tools/clippy/clippy_utils/src/sugg.rs b/src/tools/clippy/clippy_utils/src/sugg.rs index 6974e6512e2c..7a24d07fa1df 100644 --- a/src/tools/clippy/clippy_utils/src/sugg.rs +++ b/src/tools/clippy/clippy_utils/src/sugg.rs @@ -494,7 +494,17 @@ impl Display for ParenHelper { /// operators have the same /// precedence. pub fn make_unop(op: &str, expr: Sugg<'_>) -> Sugg<'static> { - Sugg::MaybeParen(format!("{op}{}", expr.maybe_paren()).into()) + // If the `expr` starts with `op` already, do not add wrap it in + // parentheses. + let expr = if let Sugg::MaybeParen(ref sugg) = expr + && !has_enclosing_paren(sugg) + && sugg.starts_with(op) + { + expr + } else { + expr.maybe_paren() + }; + Sugg::MaybeParen(format!("{op}{expr}").into()) } /// Builds the string for ` ` adding parenthesis when necessary. @@ -1016,6 +1026,16 @@ mod test { let sugg = Sugg::BinOp(AssocOp::Binary(ast::BinOpKind::Add), "(1 + 1)".into(), "(1 + 1)".into()); assert_eq!("((1 + 1) + (1 + 1))", sugg.maybe_paren().to_string()); } + + #[test] + fn unop_parenthesize() { + let sugg = Sugg::NonParen("x".into()).mut_addr(); + assert_eq!("&mut x", sugg.to_string()); + let sugg = sugg.mut_addr(); + assert_eq!("&mut &mut x", sugg.to_string()); + assert_eq!("(&mut &mut x)", sugg.maybe_paren().to_string()); + } + #[test] fn not_op() { use ast::BinOpKind::{Add, And, Eq, Ge, Gt, Le, Lt, Ne, Or}; diff --git a/src/tools/clippy/clippy_utils/src/sym.rs b/src/tools/clippy/clippy_utils/src/sym.rs index 3b58dba5628f..8a8218c6976f 100644 --- a/src/tools/clippy/clippy_utils/src/sym.rs +++ b/src/tools/clippy/clippy_utils/src/sym.rs @@ -46,7 +46,6 @@ generate! { DOUBLE_QUOTE: "\"", Deserialize, EarlyLintPass, - ErrorKind, IntoIter, Itertools, LF: "\n", @@ -65,7 +64,6 @@ generate! { RegexBuilder, RegexSet, Start, - Step, Symbol, SyntaxContext, TBD, @@ -158,7 +156,6 @@ generate! { from_ne_bytes, from_ptr, from_raw, - from_ref, from_str, from_str_radix, fs, @@ -166,6 +163,7 @@ generate! { futures_util, get, get_mut, + get_or_insert, get_or_insert_with, get_unchecked, get_unchecked_mut, @@ -216,7 +214,6 @@ generate! { max_by_key, max_value, maximum, - mem, min, min_by, min_by_key, diff --git a/src/tools/clippy/clippy_utils/src/ty/mod.rs b/src/tools/clippy/clippy_utils/src/ty/mod.rs index 32a992ccc2d7..bffbcf073ab0 100644 --- a/src/tools/clippy/clippy_utils/src/ty/mod.rs +++ b/src/tools/clippy/clippy_utils/src/ty/mod.rs @@ -6,6 +6,7 @@ use core::ops::ControlFlow; use itertools::Itertools; use rustc_abi::VariantIdx; use rustc_ast::ast::Mutability; +use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_hir as hir; use rustc_hir::def::{CtorKind, CtorOf, DefKind, Res}; @@ -20,8 +21,8 @@ use rustc_middle::traits::EvaluationResult; use rustc_middle::ty::layout::ValidityRequirement; use rustc_middle::ty::{ self, AdtDef, AliasTy, AssocItem, AssocTag, Binder, BoundRegion, FnSig, GenericArg, GenericArgKind, GenericArgsRef, - GenericParamDefKind, IntTy, Region, RegionKind, TraitRef, Ty, TyCtxt, TypeSuperVisitable, - TypeVisitable, TypeVisitableExt, TypeVisitor, UintTy, Upcast, VariantDef, VariantDiscr, + GenericParamDefKind, IntTy, Region, RegionKind, TraitRef, Ty, TyCtxt, TypeSuperVisitable, TypeVisitable, + TypeVisitableExt, TypeVisitor, UintTy, Upcast, VariantDef, VariantDiscr, }; use rustc_span::symbol::Ident; use rustc_span::{DUMMY_SP, Span, Symbol, sym}; @@ -326,8 +327,8 @@ pub fn has_drop<'tcx>(cx: &LateContext<'tcx>, ty: Ty<'tcx>) -> bool { // Returns whether the type has #[must_use] attribute pub fn is_must_use_ty<'tcx>(cx: &LateContext<'tcx>, ty: Ty<'tcx>) -> bool { match ty.kind() { - ty::Adt(adt, _) => cx.tcx.has_attr(adt.did(), sym::must_use), - ty::Foreign(did) => cx.tcx.has_attr(*did, sym::must_use), + ty::Adt(adt, _) => find_attr!(cx.tcx.get_all_attrs(adt.did()), AttributeKind::MustUse { .. }), + ty::Foreign(did) => find_attr!(cx.tcx.get_all_attrs(*did), AttributeKind::MustUse { .. }), ty::Slice(ty) | ty::Array(ty, _) | ty::RawPtr(ty, _) | ty::Ref(_, ty, _) => { // for the Array case we don't need to care for the len == 0 case // because we don't want to lint functions returning empty arrays @@ -337,7 +338,10 @@ pub fn is_must_use_ty<'tcx>(cx: &LateContext<'tcx>, ty: Ty<'tcx>) -> bool { ty::Alias(ty::Opaque, AliasTy { def_id, .. }) => { for (predicate, _) in cx.tcx.explicit_item_self_bounds(def_id).skip_binder() { if let ty::ClauseKind::Trait(trait_predicate) = predicate.kind().skip_binder() - && cx.tcx.has_attr(trait_predicate.trait_ref.def_id, sym::must_use) + && find_attr!( + cx.tcx.get_all_attrs(trait_predicate.trait_ref.def_id), + AttributeKind::MustUse { .. } + ) { return true; } @@ -347,7 +351,7 @@ pub fn is_must_use_ty<'tcx>(cx: &LateContext<'tcx>, ty: Ty<'tcx>) -> bool { ty::Dynamic(binder, _, _) => { for predicate in *binder { if let ty::ExistentialPredicate::Trait(ref trait_ref) = predicate.skip_binder() - && cx.tcx.has_attr(trait_ref.def_id, sym::must_use) + && find_attr!(cx.tcx.get_all_attrs(trait_ref.def_id), AttributeKind::MustUse { .. }) { return true; } diff --git a/src/tools/clippy/declare_clippy_lint/Cargo.toml b/src/tools/clippy/declare_clippy_lint/Cargo.toml new file mode 100644 index 000000000000..bd6b4dfdee4d --- /dev/null +++ b/src/tools/clippy/declare_clippy_lint/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "declare_clippy_lint" +version = "0.1.90" +edition = "2024" +repository = "https://github.com/rust-lang/rust-clippy" +license = "MIT OR Apache-2.0" + +[package.metadata.rust-analyzer] +# This crate uses #[feature(rustc_private)] +rustc_private = true diff --git a/src/tools/clippy/declare_clippy_lint/src/lib.rs b/src/tools/clippy/declare_clippy_lint/src/lib.rs new file mode 100644 index 000000000000..f7d9c64bfbd0 --- /dev/null +++ b/src/tools/clippy/declare_clippy_lint/src/lib.rs @@ -0,0 +1,280 @@ +#![feature(macro_metavar_expr_concat, rustc_private)] + +extern crate rustc_lint; + +use rustc_lint::{Lint, LintId, LintStore}; + +// Needed by `declare_clippy_lint!`. +pub extern crate rustc_session; + +#[derive(Default)] +pub struct LintListBuilder { + lints: Vec<&'static Lint>, + all: Vec, + cargo: Vec, + complexity: Vec, + correctness: Vec, + nursery: Vec, + pedantic: Vec, + perf: Vec, + restriction: Vec, + style: Vec, + suspicious: Vec, +} +impl LintListBuilder { + pub fn insert(&mut self, lints: &[&LintInfo]) { + #[allow(clippy::enum_glob_use)] + use LintCategory::*; + + self.lints.extend(lints.iter().map(|&x| x.lint)); + for &&LintInfo { lint, category, .. } in lints { + let (all, cat) = match category { + Complexity => (Some(&mut self.all), &mut self.complexity), + Correctness => (Some(&mut self.all), &mut self.correctness), + Perf => (Some(&mut self.all), &mut self.perf), + Style => (Some(&mut self.all), &mut self.style), + Suspicious => (Some(&mut self.all), &mut self.suspicious), + Cargo => (None, &mut self.cargo), + Nursery => (None, &mut self.nursery), + Pedantic => (None, &mut self.pedantic), + Restriction => (None, &mut self.restriction), + }; + if let Some(all) = all { + all.push(LintId::of(lint)); + } + cat.push(LintId::of(lint)); + } + } + + pub fn register(self, store: &mut LintStore) { + store.register_lints(&self.lints); + store.register_group(true, "clippy::all", Some("clippy_all"), self.all); + store.register_group(true, "clippy::cargo", Some("clippy_cargo"), self.cargo); + store.register_group(true, "clippy::complexity", Some("clippy_complexity"), self.complexity); + store.register_group( + true, + "clippy::correctness", + Some("clippy_correctness"), + self.correctness, + ); + store.register_group(true, "clippy::nursery", Some("clippy_nursery"), self.nursery); + store.register_group(true, "clippy::pedantic", Some("clippy_pedantic"), self.pedantic); + store.register_group(true, "clippy::perf", Some("clippy_perf"), self.perf); + store.register_group( + true, + "clippy::restriction", + Some("clippy_restriction"), + self.restriction, + ); + store.register_group(true, "clippy::style", Some("clippy_style"), self.style); + store.register_group(true, "clippy::suspicious", Some("clippy_suspicious"), self.suspicious); + } +} + +#[derive(Copy, Clone, Debug)] +pub enum LintCategory { + Cargo, + Complexity, + Correctness, + Nursery, + Pedantic, + Perf, + Restriction, + Style, + Suspicious, +} +impl LintCategory { + #[must_use] + pub fn name(self) -> &'static str { + match self { + Self::Cargo => "cargo", + Self::Complexity => "complexity", + Self::Correctness => "correctness", + Self::Nursery => "nursery", + Self::Pedantic => "pedantic", + Self::Perf => "perf", + Self::Restriction => "restriction", + Self::Style => "style", + Self::Suspicious => "suspicious", + } + } +} + +pub struct LintInfo { + pub lint: &'static Lint, + pub category: LintCategory, + pub explanation: &'static str, + /// e.g. `clippy_lints/src/absolute_paths.rs#43` + pub location: &'static str, + pub version: &'static str, +} + +impl LintInfo { + /// Returns the lint name in lowercase without the `clippy::` prefix + #[must_use] + #[expect(clippy::missing_panics_doc)] + pub fn name_lower(&self) -> String { + self.lint.name.strip_prefix("clippy::").unwrap().to_ascii_lowercase() + } +} + +#[macro_export] +macro_rules! declare_clippy_lint_inner { + ( + $(#[doc = $docs:literal])* + #[clippy::version = $version:literal] + $vis:vis $lint_name:ident, + $level:ident, + $category:ident, + $desc:literal + $(, @eval_always = $eval_always:literal)? + ) => { + $crate::rustc_session::declare_tool_lint! { + $(#[doc = $docs])* + #[clippy::version = $version] + $vis clippy::$lint_name, + $level, + $desc, + report_in_external_macro:true + $(, @eval_always = $eval_always)? + } + + pub(crate) static ${concat($lint_name, _INFO)}: &'static $crate::LintInfo = &$crate::LintInfo { + lint: $lint_name, + category: $crate::LintCategory::$category, + explanation: concat!($($docs,"\n",)*), + location: concat!(file!(), "#L", line!()), + version: $version, + }; + }; +} + +#[macro_export] +macro_rules! declare_clippy_lint { + ( + $(#[$($meta:tt)*])* + $vis:vis $lint_name:ident, + correctness, + $($rest:tt)* + ) => { + $crate::declare_clippy_lint_inner! { + $(#[$($meta)*])* + $vis $lint_name, + Deny, + Correctness, + $($rest)* + } + }; + ( + $(#[$($meta:tt)*])* + $vis:vis $lint_name:ident, + complexity, + $($rest:tt)* + ) => { + $crate::declare_clippy_lint_inner! { + $(#[$($meta)*])* + $vis $lint_name, + Warn, + Complexity, + $($rest)* + } + }; + ( + $(#[$($meta:tt)*])* + $vis:vis $lint_name:ident, + perf, + $($rest:tt)* + ) => { + $crate::declare_clippy_lint_inner! { + $(#[$($meta)*])* + $vis $lint_name, + Warn, + Perf, + $($rest)* + } + }; + ( + $(#[$($meta:tt)*])* + $vis:vis $lint_name:ident, + style, + $($rest:tt)* + ) => { + $crate::declare_clippy_lint_inner! { + $(#[$($meta)*])* + $vis $lint_name, + Warn, + Style, + $($rest)* + } + }; + ( + $(#[$($meta:tt)*])* + $vis:vis $lint_name:ident, + suspicious, + $($rest:tt)* + ) => { + $crate::declare_clippy_lint_inner! { + $(#[$($meta)*])* + $vis $lint_name, + Warn, + Suspicious, + $($rest)* + } + }; + ( + $(#[$($meta:tt)*])* + $vis:vis $lint_name:ident, + cargo, + $($rest:tt)* + ) => { + $crate::declare_clippy_lint_inner! { + $(#[$($meta)*])* + $vis $lint_name, + Allow, + Cargo, + $($rest)* + } + }; + ( + $(#[$($meta:tt)*])* + $vis:vis $lint_name:ident, + nursery, + $($rest:tt)* + ) => { + $crate::declare_clippy_lint_inner! { + $(#[$($meta)*])* + $vis $lint_name, + Allow, + Nursery, + $($rest)* + } + }; + ( + $(#[$($meta:tt)*])* + $vis:vis $lint_name:ident, + pedantic, + $($rest:tt)* + ) => { + $crate::declare_clippy_lint_inner! { + $(#[$($meta)*])* + $vis $lint_name, + Allow, + Pedantic, + $($rest)* + } + }; + ( + $(#[$($meta:tt)*])* + $vis:vis $lint_name:ident, + restriction, + $($rest:tt)* + ) => { + $crate::declare_clippy_lint_inner! { + $(#[$($meta)*])* + $vis $lint_name, + Allow, + Restriction, + $($rest)* + } + }; +} diff --git a/src/tools/clippy/lintcheck/src/main.rs b/src/tools/clippy/lintcheck/src/main.rs index 841838314328..eb390eecbcca 100644 --- a/src/tools/clippy/lintcheck/src/main.rs +++ b/src/tools/clippy/lintcheck/src/main.rs @@ -45,7 +45,7 @@ use rayon::prelude::*; #[must_use] pub fn target_dir() -> String { - env::var("CARGO_TARGET_DIR").unwrap_or("target".to_owned()) + env::var("CARGO_TARGET_DIR").unwrap_or_else(|_| "target".to_owned()) } fn lintcheck_sources() -> String { diff --git a/src/tools/clippy/rust-toolchain.toml b/src/tools/clippy/rust-toolchain.toml index 3fc5a1224a8d..124756a36009 100644 --- a/src/tools/clippy/rust-toolchain.toml +++ b/src/tools/clippy/rust-toolchain.toml @@ -1,6 +1,6 @@ [toolchain] # begin autogenerated nightly -channel = "nightly-2025-06-12" +channel = "nightly-2025-06-26" # end autogenerated nightly components = ["cargo", "llvm-tools", "rust-src", "rust-std", "rustc", "rustc-dev", "rustfmt"] profile = "minimal" diff --git a/src/tools/clippy/src/driver.rs b/src/tools/clippy/src/driver.rs index 37adb14169a3..c4076cbaa77b 100644 --- a/src/tools/clippy/src/driver.rs +++ b/src/tools/clippy/src/driver.rs @@ -13,7 +13,13 @@ extern crate rustc_interface; extern crate rustc_session; extern crate rustc_span; +// See docs in https://github.com/rust-lang/rust/blob/master/compiler/rustc/src/main.rs +// about jemalloc. +#[cfg(feature = "jemalloc")] +extern crate tikv_jemalloc_sys as jemalloc_sys; + use clippy_utils::sym; +use declare_clippy_lint::LintListBuilder; use rustc_interface::interface; use rustc_session::EarlyDiagCtxt; use rustc_session::config::ErrorOutputType; @@ -151,8 +157,13 @@ impl rustc_driver::Callbacks for ClippyCallbacks { (previous)(sess, lint_store); } + let mut list_builder = LintListBuilder::default(); + list_builder.insert(clippy_lints::declared_lints::LINTS); + list_builder.register(lint_store); + let conf = clippy_config::Conf::read(sess, &conf_path); - clippy_lints::register_lints(lint_store, conf); + clippy_lints::register_lint_passes(lint_store, conf); + #[cfg(feature = "internal")] clippy_lints_internal::register_lints(lint_store); })); @@ -181,6 +192,36 @@ const BUG_REPORT_URL: &str = "https://github.com/rust-lang/rust-clippy/issues/ne #[allow(clippy::too_many_lines)] #[allow(clippy::ignored_unit_patterns)] pub fn main() { + // See docs in https://github.com/rust-lang/rust/blob/master/compiler/rustc/src/main.rs + // about jemalloc. + #[cfg(feature = "jemalloc")] + { + use std::os::raw::{c_int, c_void}; + + #[used] + static _F1: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::calloc; + #[used] + static _F2: unsafe extern "C" fn(*mut *mut c_void, usize, usize) -> c_int = jemalloc_sys::posix_memalign; + #[used] + static _F3: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::aligned_alloc; + #[used] + static _F4: unsafe extern "C" fn(usize) -> *mut c_void = jemalloc_sys::malloc; + #[used] + static _F5: unsafe extern "C" fn(*mut c_void, usize) -> *mut c_void = jemalloc_sys::realloc; + #[used] + static _F6: unsafe extern "C" fn(*mut c_void) = jemalloc_sys::free; + + #[cfg(target_os = "macos")] + { + unsafe extern "C" { + fn _rjem_je_zone_register(); + } + + #[used] + static _F7: unsafe extern "C" fn() = _rjem_je_zone_register; + } + } + let early_dcx = EarlyDiagCtxt::new(ErrorOutputType::default()); rustc_driver::init_rustc_env_logger(&early_dcx); diff --git a/src/tools/clippy/src/main.rs b/src/tools/clippy/src/main.rs index c9853e53f3b3..3c2eec1f05b9 100644 --- a/src/tools/clippy/src/main.rs +++ b/src/tools/clippy/src/main.rs @@ -107,7 +107,7 @@ impl ClippyCmd { } fn into_std_cmd(self) -> Command { - let mut cmd = Command::new(env::var("CARGO").unwrap_or("cargo".into())); + let mut cmd = Command::new(env::var("CARGO").unwrap_or_else(|_| "cargo".into())); let clippy_args: String = self .clippy_args .iter() diff --git a/src/tools/clippy/tests/compile-test.rs b/src/tools/clippy/tests/compile-test.rs index 99a01257a7b6..cefe654fef68 100644 --- a/src/tools/clippy/tests/compile-test.rs +++ b/src/tools/clippy/tests/compile-test.rs @@ -7,9 +7,9 @@ use askama::filters::Safe; use cargo_metadata::Message; use cargo_metadata::diagnostic::{Applicability, Diagnostic}; use clippy_config::ClippyConfiguration; -use clippy_lints::LintInfo; use clippy_lints::declared_lints::LINTS; use clippy_lints::deprecated_lints::{DEPRECATED, DEPRECATED_VERSION, RENAMED}; +use declare_clippy_lint::LintInfo; use pulldown_cmark::{Options, Parser, html}; use serde::Deserialize; use test_utils::IS_RUSTC_TEST_SUITE; @@ -568,10 +568,10 @@ impl LintMetadata { Self { id: name, id_location: Some(lint.location), - group: lint.category_str(), + group: lint.category.name(), level: lint.lint.default_level.as_str(), docs, - version: lint.version.unwrap(), + version: lint.version, applicability, } } diff --git a/src/tools/clippy/tests/dogfood.rs b/src/tools/clippy/tests/dogfood.rs index 4ac2bd532851..389616801fca 100644 --- a/src/tools/clippy/tests/dogfood.rs +++ b/src/tools/clippy/tests/dogfood.rs @@ -40,6 +40,7 @@ fn dogfood() { "clippy_lints", "clippy_utils", "clippy_config", + "declare_clippy_lint", "lintcheck", "rustc_tools_util", ] { diff --git a/src/tools/clippy/tests/ui-toml/collapsible_if/collapsible_else_if.fixed b/src/tools/clippy/tests/ui-toml/collapsible_if/collapsible_else_if.fixed new file mode 100644 index 000000000000..0dc0fc230c8d --- /dev/null +++ b/src/tools/clippy/tests/ui-toml/collapsible_if/collapsible_else_if.fixed @@ -0,0 +1,50 @@ +#![allow(clippy::eq_op, clippy::nonminimal_bool)] + +#[rustfmt::skip] +#[warn(clippy::collapsible_if)] +fn main() { + let (x, y) = ("hello", "world"); + + if x == "hello" { + todo!() + } + // Comment must be kept + else if y == "world" { + println!("Hello world!"); + } + //~^^^^^^ collapsible_else_if + + if x == "hello" { + todo!() + } // Inner comment + else if y == "world" { + println!("Hello world!"); + } + //~^^^^^ collapsible_else_if + + if x == "hello" { + todo!() + } + /* Inner comment */ + else if y == "world" { + println!("Hello world!"); + } + //~^^^^^^ collapsible_else_if + + if x == "hello" { + todo!() + } /* Inner comment */ + else if y == "world" { + println!("Hello world!"); + } + //~^^^^^ collapsible_else_if + + if x == "hello" { + todo!() + } /* This should not be removed */ /* So does this */ + // Comment must be kept + else if y == "world" { + println!("Hello world!"); + } + //~^^^^^^ collapsible_else_if +} diff --git a/src/tools/clippy/tests/ui-toml/collapsible_if/collapsible_else_if.rs b/src/tools/clippy/tests/ui-toml/collapsible_if/collapsible_else_if.rs new file mode 100644 index 000000000000..8344c122f16c --- /dev/null +++ b/src/tools/clippy/tests/ui-toml/collapsible_if/collapsible_else_if.rs @@ -0,0 +1,55 @@ +#![allow(clippy::eq_op, clippy::nonminimal_bool)] + +#[rustfmt::skip] +#[warn(clippy::collapsible_if)] +fn main() { + let (x, y) = ("hello", "world"); + + if x == "hello" { + todo!() + } else { + // Comment must be kept + if y == "world" { + println!("Hello world!"); + } + } + //~^^^^^^ collapsible_else_if + + if x == "hello" { + todo!() + } else { // Inner comment + if y == "world" { + println!("Hello world!"); + } + } + //~^^^^^ collapsible_else_if + + if x == "hello" { + todo!() + } else { + /* Inner comment */ + if y == "world" { + println!("Hello world!"); + } + } + //~^^^^^^ collapsible_else_if + + if x == "hello" { + todo!() + } else { /* Inner comment */ + if y == "world" { + println!("Hello world!"); + } + } + //~^^^^^ collapsible_else_if + + if x == "hello" { + todo!() + } /* This should not be removed */ else /* So does this */ { + // Comment must be kept + if y == "world" { + println!("Hello world!"); + } + } + //~^^^^^^ collapsible_else_if +} diff --git a/src/tools/clippy/tests/ui-toml/collapsible_if/collapsible_else_if.stderr b/src/tools/clippy/tests/ui-toml/collapsible_if/collapsible_else_if.stderr new file mode 100644 index 000000000000..0ffe5f0a960d --- /dev/null +++ b/src/tools/clippy/tests/ui-toml/collapsible_if/collapsible_else_if.stderr @@ -0,0 +1,105 @@ +error: this `else { if .. }` block can be collapsed + --> tests/ui-toml/collapsible_if/collapsible_else_if.rs:10:12 + | +LL | } else { + | ____________^ +LL | | // Comment must be kept +LL | | if y == "world" { +LL | | println!("Hello world!"); +LL | | } +LL | | } + | |_____^ + | + = note: `-D clippy::collapsible-else-if` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::collapsible_else_if)]` +help: collapse nested if block + | +LL ~ } +LL | // Comment must be kept +LL ~ else if y == "world" { +LL | println!("Hello world!"); +LL ~ } + | + +error: this `else { if .. }` block can be collapsed + --> tests/ui-toml/collapsible_if/collapsible_else_if.rs:20:12 + | +LL | } else { // Inner comment + | ____________^ +LL | | if y == "world" { +LL | | println!("Hello world!"); +LL | | } +LL | | } + | |_____^ + | +help: collapse nested if block + | +LL ~ } // Inner comment +LL ~ else if y == "world" { +LL | println!("Hello world!"); +LL ~ } + | + +error: this `else { if .. }` block can be collapsed + --> tests/ui-toml/collapsible_if/collapsible_else_if.rs:29:12 + | +LL | } else { + | ____________^ +LL | | /* Inner comment */ +LL | | if y == "world" { +LL | | println!("Hello world!"); +LL | | } +LL | | } + | |_____^ + | +help: collapse nested if block + | +LL ~ } +LL | /* Inner comment */ +LL ~ else if y == "world" { +LL | println!("Hello world!"); +LL ~ } + | + +error: this `else { if .. }` block can be collapsed + --> tests/ui-toml/collapsible_if/collapsible_else_if.rs:39:12 + | +LL | } else { /* Inner comment */ + | ____________^ +LL | | if y == "world" { +LL | | println!("Hello world!"); +LL | | } +LL | | } + | |_____^ + | +help: collapse nested if block + | +LL ~ } /* Inner comment */ +LL ~ else if y == "world" { +LL | println!("Hello world!"); +LL ~ } + | + +error: this `else { if .. }` block can be collapsed + --> tests/ui-toml/collapsible_if/collapsible_else_if.rs:48:64 + | +LL | } /* This should not be removed */ else /* So does this */ { + | ________________________________________________________________^ +LL | | // Comment must be kept +LL | | if y == "world" { +LL | | println!("Hello world!"); +LL | | } +LL | | } + | |_____^ + | +help: collapse nested if block + | +LL ~ } /* This should not be removed */ /* So does this */ +LL | // Comment must be kept +LL ~ else if y == "world" { +LL | println!("Hello world!"); +LL ~ } + | + +error: aborting due to 5 previous errors + diff --git a/src/tools/clippy/tests/ui/assign_ops.fixed b/src/tools/clippy/tests/ui/assign_ops.fixed index 3bc6885d7c3e..99beea850a25 100644 --- a/src/tools/clippy/tests/ui/assign_ops.fixed +++ b/src/tools/clippy/tests/ui/assign_ops.fixed @@ -91,7 +91,7 @@ mod issue14871 { impl const NumberConstants for T where - T: Number + ~const core::ops::Add, + T: Number + [const] core::ops::Add, { fn constant(value: usize) -> Self { let mut res = Self::ZERO; diff --git a/src/tools/clippy/tests/ui/assign_ops.rs b/src/tools/clippy/tests/ui/assign_ops.rs index f1f8f9daff95..900d5ad38e03 100644 --- a/src/tools/clippy/tests/ui/assign_ops.rs +++ b/src/tools/clippy/tests/ui/assign_ops.rs @@ -91,7 +91,7 @@ mod issue14871 { impl const NumberConstants for T where - T: Number + ~const core::ops::Add, + T: Number + [const] core::ops::Add, { fn constant(value: usize) -> Self { let mut res = Self::ZERO; diff --git a/src/tools/clippy/tests/ui/borrow_deref_ref.fixed b/src/tools/clippy/tests/ui/borrow_deref_ref.fixed index 765dd75fceb9..6d06fcc3037a 100644 --- a/src/tools/clippy/tests/ui/borrow_deref_ref.fixed +++ b/src/tools/clippy/tests/ui/borrow_deref_ref.fixed @@ -124,3 +124,50 @@ mod issue_11346 { //~^ borrow_deref_ref } } + +fn issue_14934() { + let x: &'static str = "x"; + let y = "y".to_string(); + { + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = &*x; // Do not lint + *x = &*y; + } + { + let mut x = x; + //~^ borrow_deref_ref + x = &*y; + } + { + #[expect(clippy::toplevel_ref_arg, clippy::needless_borrow)] + let ref x = x; + //~^ borrow_deref_ref + } + { + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = std::convert::identity(x); + //~^ borrow_deref_ref + *x = &*y; + } + { + #[derive(Clone)] + struct S(&'static str); + let s = S("foo"); + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = &*s.0; // Do not lint + *x = "bar"; + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = s.clone().0; + //~^ borrow_deref_ref + *x = "bar"; + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = &*std::convert::identity(&s).0; + *x = "bar"; + } + { + let y = &1; + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = { y }; + //~^ borrow_deref_ref + } +} diff --git a/src/tools/clippy/tests/ui/borrow_deref_ref.rs b/src/tools/clippy/tests/ui/borrow_deref_ref.rs index 8ee66bfa881a..b43f4c93bf2b 100644 --- a/src/tools/clippy/tests/ui/borrow_deref_ref.rs +++ b/src/tools/clippy/tests/ui/borrow_deref_ref.rs @@ -124,3 +124,50 @@ mod issue_11346 { //~^ borrow_deref_ref } } + +fn issue_14934() { + let x: &'static str = "x"; + let y = "y".to_string(); + { + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = &*x; // Do not lint + *x = &*y; + } + { + let mut x = &*x; + //~^ borrow_deref_ref + x = &*y; + } + { + #[expect(clippy::toplevel_ref_arg, clippy::needless_borrow)] + let ref x = &*x; + //~^ borrow_deref_ref + } + { + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = &*std::convert::identity(x); + //~^ borrow_deref_ref + *x = &*y; + } + { + #[derive(Clone)] + struct S(&'static str); + let s = S("foo"); + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = &*s.0; // Do not lint + *x = "bar"; + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = &*s.clone().0; + //~^ borrow_deref_ref + *x = "bar"; + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = &*std::convert::identity(&s).0; + *x = "bar"; + } + { + let y = &1; + #[expect(clippy::toplevel_ref_arg)] + let ref mut x = { &*y }; + //~^ borrow_deref_ref + } +} diff --git a/src/tools/clippy/tests/ui/borrow_deref_ref.stderr b/src/tools/clippy/tests/ui/borrow_deref_ref.stderr index 3d55da25b9b2..3a1f968b4be1 100644 --- a/src/tools/clippy/tests/ui/borrow_deref_ref.stderr +++ b/src/tools/clippy/tests/ui/borrow_deref_ref.stderr @@ -25,5 +25,35 @@ error: deref on an immutable reference LL | (&*s).foo(); | ^^^^^ help: if you would like to reborrow, try removing `&*`: `s` -error: aborting due to 4 previous errors +error: deref on an immutable reference + --> tests/ui/borrow_deref_ref.rs:137:21 + | +LL | let mut x = &*x; + | ^^^ help: if you would like to reborrow, try removing `&*`: `x` + +error: deref on an immutable reference + --> tests/ui/borrow_deref_ref.rs:143:21 + | +LL | let ref x = &*x; + | ^^^ help: if you would like to reborrow, try removing `&*`: `x` + +error: deref on an immutable reference + --> tests/ui/borrow_deref_ref.rs:148:25 + | +LL | let ref mut x = &*std::convert::identity(x); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: if you would like to reborrow, try removing `&*`: `std::convert::identity(x)` + +error: deref on an immutable reference + --> tests/ui/borrow_deref_ref.rs:160:25 + | +LL | let ref mut x = &*s.clone().0; + | ^^^^^^^^^^^^^ help: if you would like to reborrow, try removing `&*`: `s.clone().0` + +error: deref on an immutable reference + --> tests/ui/borrow_deref_ref.rs:170:27 + | +LL | let ref mut x = { &*y }; + | ^^^ help: if you would like to reborrow, try removing `&*`: `y` + +error: aborting due to 9 previous errors diff --git a/src/tools/clippy/tests/ui/borrow_interior_mutable_const.rs b/src/tools/clippy/tests/ui/borrow_interior_mutable_const.rs index 0f439f789150..674450a73ad2 100644 --- a/src/tools/clippy/tests/ui/borrow_interior_mutable_const.rs +++ b/src/tools/clippy/tests/ui/borrow_interior_mutable_const.rs @@ -218,4 +218,20 @@ fn main() { let _ = &S::VALUE.1; //~ borrow_interior_mutable_const let _ = &S::VALUE.2; } + { + pub struct Foo(pub Entry, pub T); + + pub struct Entry(pub Cell<[u32; N]>); + + impl Entry { + const INIT: Self = Self(Cell::new([42; N])); + } + + impl Foo { + pub fn make_foo(v: T) -> Self { + // Used to ICE due to incorrect instantiation. + Foo(Entry::INIT, v) + } + } + } } diff --git a/src/tools/clippy/tests/ui/box_default.fixed b/src/tools/clippy/tests/ui/box_default.fixed index 80000f5de4fd..ed00494433b9 100644 --- a/src/tools/clippy/tests/ui/box_default.fixed +++ b/src/tools/clippy/tests/ui/box_default.fixed @@ -126,7 +126,7 @@ fn issue_10381() { impl Bar for Foo {} fn maybe_get_bar(i: u32) -> Option> { - if i % 2 == 0 { + if i.is_multiple_of(2) { Some(Box::new(Foo::default())) } else { None diff --git a/src/tools/clippy/tests/ui/box_default.rs b/src/tools/clippy/tests/ui/box_default.rs index 4681016d7cd3..801d92f5c290 100644 --- a/src/tools/clippy/tests/ui/box_default.rs +++ b/src/tools/clippy/tests/ui/box_default.rs @@ -126,7 +126,7 @@ fn issue_10381() { impl Bar for Foo {} fn maybe_get_bar(i: u32) -> Option> { - if i % 2 == 0 { + if i.is_multiple_of(2) { Some(Box::new(Foo::default())) } else { None diff --git a/src/tools/clippy/tests/ui/branches_sharing_code/shared_at_bottom.rs b/src/tools/clippy/tests/ui/branches_sharing_code/shared_at_bottom.rs index 922d30443fcc..fa322dc28a78 100644 --- a/src/tools/clippy/tests/ui/branches_sharing_code/shared_at_bottom.rs +++ b/src/tools/clippy/tests/ui/branches_sharing_code/shared_at_bottom.rs @@ -276,3 +276,27 @@ mod issue14873 { } } } + +fn issue15004() { + let a = 12u32; + let b = 13u32; + let mut c = 8u32; + + let mut result = if b > a { + c += 1; + 0 + } else { + c += 2; + 0 + //~^ branches_sharing_code + }; + + result = if b > a { + c += 1; + 1 + } else { + c += 2; + 1 + //~^ branches_sharing_code + }; +} diff --git a/src/tools/clippy/tests/ui/branches_sharing_code/shared_at_bottom.stderr b/src/tools/clippy/tests/ui/branches_sharing_code/shared_at_bottom.stderr index f437db8b7331..1c470fb0da5e 100644 --- a/src/tools/clippy/tests/ui/branches_sharing_code/shared_at_bottom.stderr +++ b/src/tools/clippy/tests/ui/branches_sharing_code/shared_at_bottom.stderr @@ -172,5 +172,35 @@ LL ~ } LL + let y = 1; | -error: aborting due to 10 previous errors +error: all if blocks contain the same code at the end + --> tests/ui/branches_sharing_code/shared_at_bottom.rs:290:5 + | +LL | / 0 +LL | | +LL | | }; + | |_____^ + | + = note: the end suggestion probably needs some adjustments to use the expression result correctly +help: consider moving these statements after the if + | +LL ~ } +LL ~ 0; + | + +error: all if blocks contain the same code at the end + --> tests/ui/branches_sharing_code/shared_at_bottom.rs:299:5 + | +LL | / 1 +LL | | +LL | | }; + | |_____^ + | + = note: the end suggestion probably needs some adjustments to use the expression result correctly +help: consider moving these statements after the if + | +LL ~ } +LL ~ 1; + | + +error: aborting due to 12 previous errors diff --git a/src/tools/clippy/tests/ui/collapsible_else_if.fixed b/src/tools/clippy/tests/ui/collapsible_else_if.fixed index 9f530ad670a0..fed75244c6f7 100644 --- a/src/tools/clippy/tests/ui/collapsible_else_if.fixed +++ b/src/tools/clippy/tests/ui/collapsible_else_if.fixed @@ -86,3 +86,21 @@ fn issue_7318() { }else if false {} //~^^^ collapsible_else_if } + +fn issue14799() { + use std::ops::ControlFlow; + + let c: ControlFlow<_, ()> = ControlFlow::Break(Some(42)); + if let ControlFlow::Break(Some(_)) = c { + todo!(); + } else { + #[cfg(target_os = "freebsd")] + todo!(); + + if let ControlFlow::Break(None) = c { + todo!(); + } else { + todo!(); + } + } +} diff --git a/src/tools/clippy/tests/ui/collapsible_else_if.rs b/src/tools/clippy/tests/ui/collapsible_else_if.rs index 2c646cd1d4da..e50e781fb698 100644 --- a/src/tools/clippy/tests/ui/collapsible_else_if.rs +++ b/src/tools/clippy/tests/ui/collapsible_else_if.rs @@ -102,3 +102,21 @@ fn issue_7318() { } //~^^^ collapsible_else_if } + +fn issue14799() { + use std::ops::ControlFlow; + + let c: ControlFlow<_, ()> = ControlFlow::Break(Some(42)); + if let ControlFlow::Break(Some(_)) = c { + todo!(); + } else { + #[cfg(target_os = "freebsd")] + todo!(); + + if let ControlFlow::Break(None) = c { + todo!(); + } else { + todo!(); + } + } +} diff --git a/src/tools/clippy/tests/ui/collapsible_if.fixed b/src/tools/clippy/tests/ui/collapsible_if.fixed index b553182a4454..77bc791ea8e9 100644 --- a/src/tools/clippy/tests/ui/collapsible_if.fixed +++ b/src/tools/clippy/tests/ui/collapsible_if.fixed @@ -154,3 +154,12 @@ fn issue14722() { None }; } + +fn issue14799() { + if true { + #[cfg(target_os = "freebsd")] + todo!(); + + if true {} + }; +} diff --git a/src/tools/clippy/tests/ui/collapsible_if.rs b/src/tools/clippy/tests/ui/collapsible_if.rs index f5998457ca6c..d30df157d5eb 100644 --- a/src/tools/clippy/tests/ui/collapsible_if.rs +++ b/src/tools/clippy/tests/ui/collapsible_if.rs @@ -164,3 +164,12 @@ fn issue14722() { None }; } + +fn issue14799() { + if true { + #[cfg(target_os = "freebsd")] + todo!(); + + if true {} + }; +} diff --git a/src/tools/clippy/tests/ui/doc/needless_doctest_main.rs b/src/tools/clippy/tests/ui/doc/needless_doctest_main.rs index 633a435ca5ed..8c3217624d44 100644 --- a/src/tools/clippy/tests/ui/doc/needless_doctest_main.rs +++ b/src/tools/clippy/tests/ui/doc/needless_doctest_main.rs @@ -1,5 +1,3 @@ -//@ check-pass - #![warn(clippy::needless_doctest_main)] //! issue 10491: //! ```rust,no_test @@ -19,4 +17,114 @@ /// ``` fn foo() {} +#[rustfmt::skip] +/// Description +/// ```rust +/// fn main() { +//~^ error: needless `fn main` in doctest +/// let a = 0; +/// } +/// ``` +fn mulpipulpi() {} + +#[rustfmt::skip] +/// With a `#[no_main]` +/// ```rust +/// #[no_main] +/// fn a() { +/// let _ = 0; +/// } +/// ``` +fn pulpimulpi() {} + +// Without a `#[no_main]` attribute +/// ```rust +/// fn a() { +/// let _ = 0; +/// } +/// ``` +fn plumilupi() {} + +#[rustfmt::skip] +/// Additional function, shouldn't trigger +/// ```rust +/// fn additional_function() { +/// let _ = 0; +/// // Thus `fn main` is actually relevant! +/// } +/// fn main() { +/// let _ = 0; +/// } +/// ``` +fn mlupipupi() {} + +#[rustfmt::skip] +/// Additional function AFTER main, shouldn't trigger +/// ```rust +/// fn main() { +/// let _ = 0; +/// } +/// fn additional_function() { +/// let _ = 0; +/// // Thus `fn main` is actually relevant! +/// } +/// ``` +fn lumpimupli() {} + +#[rustfmt::skip] +/// Ignore code block, should not lint at all +/// ```rust, ignore +/// fn main() { +//~^ error: needless `fn main` in doctest +/// // Hi! +/// let _ = 0; +/// } +/// ``` +fn mpulpilumi() {} + +#[rustfmt::skip] +/// Spaces in weird positions (including an \u{A0} after `main`) +/// ```rust +/// fn main (){ +//~^ error: needless `fn main` in doctest +/// let _ = 0; +/// } +/// ``` +fn plumpiplupi() {} + +/// 4 Functions, this should not lint because there are several function +/// +/// ```rust +/// fn a() {let _ = 0; } +/// fn b() {let _ = 0; } +/// fn main() { let _ = 0; } +/// fn d() { let _ = 0; } +/// ``` +fn pulmipulmip() {} + +/// 3 Functions but main is first, should also not lint +/// +///```rust +/// fn main() { let _ = 0; } +/// fn b() { let _ = 0; } +/// fn c() { let _ = 0; } +/// ``` +fn pmuplimulip() {} + fn main() {} + +fn issue8244() -> Result<(), ()> { + //! ```compile_fail + //! fn test() -> Result< {} + //! ``` + Ok(()) +} + +/// # Examples +/// +/// ``` +/// use std::error::Error; +/// fn main() -> Result<(), Box/* > */ { +/// } +/// ``` +fn issue15041() {} diff --git a/src/tools/clippy/tests/ui/doc/needless_doctest_main.stderr b/src/tools/clippy/tests/ui/doc/needless_doctest_main.stderr new file mode 100644 index 000000000000..dd5474ccb85a --- /dev/null +++ b/src/tools/clippy/tests/ui/doc/needless_doctest_main.stderr @@ -0,0 +1,36 @@ +error: needless `fn main` in doctest + --> tests/ui/doc/needless_doctest_main.rs:23:5 + | +LL | /// fn main() { + | _____^ +LL | | +LL | | /// let a = 0; +LL | | /// } + | |_____^ + | + = note: `-D clippy::needless-doctest-main` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::needless_doctest_main)]` + +error: needless `fn main` in doctest + --> tests/ui/doc/needless_doctest_main.rs:77:5 + | +LL | /// fn main() { + | _____^ +LL | | +LL | | /// // Hi! +LL | | /// let _ = 0; +LL | | /// } + | |_____^ + +error: needless `fn main` in doctest + --> tests/ui/doc/needless_doctest_main.rs:88:5 + | +LL | /// fn main (){ + | _____^ +LL | | +LL | | /// let _ = 0; +LL | | /// } + | |_____^ + +error: aborting due to 3 previous errors + diff --git a/src/tools/clippy/tests/ui/doc_broken_link.rs b/src/tools/clippy/tests/ui/doc_broken_link.rs new file mode 100644 index 000000000000..7d9c0ef13b3c --- /dev/null +++ b/src/tools/clippy/tests/ui/doc_broken_link.rs @@ -0,0 +1,72 @@ +#![warn(clippy::doc_broken_link)] + +fn main() {} + +pub struct FakeType {} + +/// This might be considered a link false positive +/// and should be ignored by this lint rule: +/// Example of referencing some code with brackets [FakeType]. +pub fn doc_ignore_link_false_positive_1() {} + +/// This might be considered a link false positive +/// and should be ignored by this lint rule: +/// [`FakeType`]. Continue text after brackets, +/// then (something in +/// parenthesis). +pub fn doc_ignore_link_false_positive_2() {} + +/// Test valid link, whole link single line. +/// [doc valid link](https://test.fake/doc_valid_link) +pub fn doc_valid_link() {} + +/// Test valid link, whole link single line but it has special chars such as brackets and +/// parenthesis. [doc invalid link url invalid char](https://test.fake/doc_valid_link_url_invalid_char?foo[bar]=1&bar(foo)=2) +pub fn doc_valid_link_url_invalid_char() {} + +/// Test valid link, text tag broken across multiple lines. +/// [doc valid link broken +/// text](https://test.fake/doc_valid_link_broken_text) +pub fn doc_valid_link_broken_text() {} + +/// Test valid link, url tag broken across multiple lines, but +/// the whole url part in a single line. +/// [doc valid link broken url tag two lines first](https://test.fake/doc_valid_link_broken_url_tag_two_lines_first +/// ) +pub fn doc_valid_link_broken_url_tag_two_lines_first() {} + +/// Test valid link, url tag broken across multiple lines, but +/// the whole url part in a single line. +/// [doc valid link broken url tag two lines second]( +/// https://test.fake/doc_valid_link_broken_url_tag_two_lines_second) +pub fn doc_valid_link_broken_url_tag_two_lines_second() {} + +/// Test valid link, url tag broken across multiple lines, but +/// the whole url part in a single line, but the closing pharentesis +/// in a third line. +/// [doc valid link broken url tag three lines]( +/// https://test.fake/doc_valid_link_broken_url_tag_three_lines +/// ) +pub fn doc_valid_link_broken_url_tag_three_lines() {} + +/// Test invalid link, url part broken across multiple lines. +/// [doc invalid link broken url scheme part](https:// +/// test.fake/doc_invalid_link_broken_url_scheme_part) +//~^^ ERROR: possible broken doc link: broken across multiple lines +pub fn doc_invalid_link_broken_url_scheme_part() {} + +/// Test invalid link, url part broken across multiple lines. +/// [doc invalid link broken url host part](https://test +/// .fake/doc_invalid_link_broken_url_host_part) +//~^^ ERROR: possible broken doc link: broken across multiple lines +pub fn doc_invalid_link_broken_url_host_part() {} + +/// Test invalid link, for multiple urls in the same block of comment. +/// There is a [fist link - invalid](https://test +/// .fake) then it continues +//~^^ ERROR: possible broken doc link: broken across multiple lines +/// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test +/// .fake). It ends with another +//~^^ ERROR: possible broken doc link: broken across multiple lines +/// line of comment. +pub fn doc_multiple_invalid_link_broken_url() {} diff --git a/src/tools/clippy/tests/ui/doc_broken_link.stderr b/src/tools/clippy/tests/ui/doc_broken_link.stderr new file mode 100644 index 000000000000..179ed97635ee --- /dev/null +++ b/src/tools/clippy/tests/ui/doc_broken_link.stderr @@ -0,0 +1,29 @@ +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:53:5 + | +LL | /// [doc invalid link broken url scheme part](https:// + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: `-D clippy::doc-broken-link` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::doc_broken_link)]` + +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:59:5 + | +LL | /// [doc invalid link broken url host part](https://test + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:65:16 + | +LL | /// There is a [fist link - invalid](https://test + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:68:80 + | +LL | /// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: aborting due to 4 previous errors + diff --git a/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.1.fixed b/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.1.fixed index 36d80a2c95bf..e36e3c2aea6a 100644 --- a/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.1.fixed +++ b/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.1.fixed @@ -105,4 +105,13 @@ second line ")] pub struct Args; +mod issue_14980 { + //~v empty_line_after_outer_attr + #[repr(align(536870912))] + enum Aligned { + Zero = 0, + One = 1, + } +} + fn main() {} diff --git a/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.2.fixed b/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.2.fixed index 0e8e4129e858..b0908fc72147 100644 --- a/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.2.fixed +++ b/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.2.fixed @@ -108,4 +108,13 @@ second line ")] pub struct Args; +mod issue_14980 { + //~v empty_line_after_outer_attr + #[repr(align(536870912))] + enum Aligned { + Zero = 0, + One = 1, + } +} + fn main() {} diff --git a/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.rs b/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.rs index 1295088ac00e..4ae113c68f52 100644 --- a/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.rs +++ b/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.rs @@ -116,4 +116,14 @@ second line ")] pub struct Args; +mod issue_14980 { + //~v empty_line_after_outer_attr + #[repr(align(536870912))] + + enum Aligned { + Zero = 0, + One = 1, + } +} + fn main() {} diff --git a/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.stderr b/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.stderr index 519ba6e67615..331bc7c8856d 100644 --- a/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.stderr +++ b/src/tools/clippy/tests/ui/empty_line_after/outer_attribute.stderr @@ -111,5 +111,16 @@ LL | pub fn isolated_comment() {} | = help: if the empty lines are unintentional, remove them -error: aborting due to 9 previous errors +error: empty line after outer attribute + --> tests/ui/empty_line_after/outer_attribute.rs:121:5 + | +LL | / #[repr(align(536870912))] +LL | | + | |_^ +LL | enum Aligned { + | ------------ the attribute applies to this enum + | + = help: if the empty line is unintentional, remove it + +error: aborting due to 10 previous errors diff --git a/src/tools/clippy/tests/ui/eta.fixed b/src/tools/clippy/tests/ui/eta.fixed index 0ba631fda051..c93b83f53ecb 100644 --- a/src/tools/clippy/tests/ui/eta.fixed +++ b/src/tools/clippy/tests/ui/eta.fixed @@ -543,3 +543,21 @@ mod issue_13073 { //~^ redundant_closure } } + +fn issue_14789() { + _ = Some(1u8).map( + #[expect(clippy::redundant_closure)] + |a| foo(a), + ); + + _ = Some("foo").map( + #[expect(clippy::redundant_closure_for_method_calls)] + |s| s.to_owned(), + ); + + let _: Vec = None.map_or_else( + #[expect(clippy::redundant_closure)] + || vec![], + std::convert::identity, + ); +} diff --git a/src/tools/clippy/tests/ui/eta.rs b/src/tools/clippy/tests/ui/eta.rs index 4d8b29d450c5..273c8b21f4ad 100644 --- a/src/tools/clippy/tests/ui/eta.rs +++ b/src/tools/clippy/tests/ui/eta.rs @@ -543,3 +543,21 @@ mod issue_13073 { //~^ redundant_closure } } + +fn issue_14789() { + _ = Some(1u8).map( + #[expect(clippy::redundant_closure)] + |a| foo(a), + ); + + _ = Some("foo").map( + #[expect(clippy::redundant_closure_for_method_calls)] + |s| s.to_owned(), + ); + + let _: Vec = None.map_or_else( + #[expect(clippy::redundant_closure)] + || vec![], + std::convert::identity, + ); +} diff --git a/src/tools/clippy/tests/ui/exhaustive_items.fixed b/src/tools/clippy/tests/ui/exhaustive_items.fixed index 79c74aeefbd8..3b2f33dbd2ce 100644 --- a/src/tools/clippy/tests/ui/exhaustive_items.fixed +++ b/src/tools/clippy/tests/ui/exhaustive_items.fixed @@ -1,3 +1,4 @@ +#![feature(default_field_values)] #![deny(clippy::exhaustive_enums, clippy::exhaustive_structs)] #![allow(unused)] @@ -90,3 +91,9 @@ pub mod structs { pub bar: String, } } + +pub mod issue14992 { + pub struct A { + pub a: isize = 42, + } +} diff --git a/src/tools/clippy/tests/ui/exhaustive_items.rs b/src/tools/clippy/tests/ui/exhaustive_items.rs index 4e851f4c492e..b0a6a7170766 100644 --- a/src/tools/clippy/tests/ui/exhaustive_items.rs +++ b/src/tools/clippy/tests/ui/exhaustive_items.rs @@ -1,3 +1,4 @@ +#![feature(default_field_values)] #![deny(clippy::exhaustive_enums, clippy::exhaustive_structs)] #![allow(unused)] @@ -87,3 +88,9 @@ pub mod structs { pub bar: String, } } + +pub mod issue14992 { + pub struct A { + pub a: isize = 42, + } +} diff --git a/src/tools/clippy/tests/ui/exhaustive_items.stderr b/src/tools/clippy/tests/ui/exhaustive_items.stderr index c92c8a9efaae..55928fa458d3 100644 --- a/src/tools/clippy/tests/ui/exhaustive_items.stderr +++ b/src/tools/clippy/tests/ui/exhaustive_items.stderr @@ -1,5 +1,5 @@ error: exported enums should not be exhaustive - --> tests/ui/exhaustive_items.rs:9:5 + --> tests/ui/exhaustive_items.rs:10:5 | LL | / pub enum Exhaustive { LL | | @@ -11,7 +11,7 @@ LL | | } | |_____^ | note: the lint level is defined here - --> tests/ui/exhaustive_items.rs:1:9 + --> tests/ui/exhaustive_items.rs:2:9 | LL | #![deny(clippy::exhaustive_enums, clippy::exhaustive_structs)] | ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -22,7 +22,7 @@ LL ~ pub enum Exhaustive { | error: exported enums should not be exhaustive - --> tests/ui/exhaustive_items.rs:19:5 + --> tests/ui/exhaustive_items.rs:20:5 | LL | / pub enum ExhaustiveWithAttrs { LL | | @@ -40,7 +40,7 @@ LL ~ pub enum ExhaustiveWithAttrs { | error: exported structs should not be exhaustive - --> tests/ui/exhaustive_items.rs:55:5 + --> tests/ui/exhaustive_items.rs:56:5 | LL | / pub struct Exhaustive { LL | | @@ -50,7 +50,7 @@ LL | | } | |_____^ | note: the lint level is defined here - --> tests/ui/exhaustive_items.rs:1:35 + --> tests/ui/exhaustive_items.rs:2:35 | LL | #![deny(clippy::exhaustive_enums, clippy::exhaustive_structs)] | ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/tools/clippy/tests/ui/identity_op.fixed b/src/tools/clippy/tests/ui/identity_op.fixed index a1b556029987..4e14e1a5e33f 100644 --- a/src/tools/clippy/tests/ui/identity_op.fixed +++ b/src/tools/clippy/tests/ui/identity_op.fixed @@ -312,3 +312,49 @@ fn issue_13470() { let _: u64 = 1u64 + ((x as i32 + y as i32) as u64); //~^ identity_op } + +fn issue_14932() { + let _ = 0usize + &Default::default(); // no error + + 0usize + &Default::default(); // no error + + ::default(); + //~^ identity_op + + let _ = usize::default(); + //~^ identity_op + + let _n: usize = Default::default(); + //~^ identity_op +} + +// Expr's type can be inferred by the function's return type +fn issue_14932_2() -> usize { + Default::default() + //~^ identity_op +} + +trait Def { + fn def() -> Self; +} + +impl Def for usize { + fn def() -> Self { + 0 + } +} + +fn issue_14932_3() { + let _ = 0usize + &Def::def(); // no error + + 0usize + &Def::def(); // no error + + ::def(); + //~^ identity_op + + let _ = usize::def(); + //~^ identity_op + + let _n: usize = Def::def(); + //~^ identity_op +} diff --git a/src/tools/clippy/tests/ui/identity_op.rs b/src/tools/clippy/tests/ui/identity_op.rs index f603e1078e4e..ebbef5723ffb 100644 --- a/src/tools/clippy/tests/ui/identity_op.rs +++ b/src/tools/clippy/tests/ui/identity_op.rs @@ -312,3 +312,49 @@ fn issue_13470() { let _: u64 = 1u64 + ((x as i32 + y as i32) as u64 + 0u64); //~^ identity_op } + +fn issue_14932() { + let _ = 0usize + &Default::default(); // no error + + 0usize + &Default::default(); // no error + + 0usize + &::default(); + //~^ identity_op + + let _ = 0usize + &usize::default(); + //~^ identity_op + + let _n: usize = 0usize + &Default::default(); + //~^ identity_op +} + +// Expr's type can be inferred by the function's return type +fn issue_14932_2() -> usize { + 0usize + &Default::default() + //~^ identity_op +} + +trait Def { + fn def() -> Self; +} + +impl Def for usize { + fn def() -> Self { + 0 + } +} + +fn issue_14932_3() { + let _ = 0usize + &Def::def(); // no error + + 0usize + &Def::def(); // no error + + 0usize + &::def(); + //~^ identity_op + + let _ = 0usize + &usize::def(); + //~^ identity_op + + let _n: usize = 0usize + &Def::def(); + //~^ identity_op +} diff --git a/src/tools/clippy/tests/ui/identity_op.stderr b/src/tools/clippy/tests/ui/identity_op.stderr index 8f9c2b603c49..24fa5db08ce5 100644 --- a/src/tools/clippy/tests/ui/identity_op.stderr +++ b/src/tools/clippy/tests/ui/identity_op.stderr @@ -379,5 +379,47 @@ error: this operation has no effect LL | let _: u64 = 1u64 + ((x as i32 + y as i32) as u64 + 0u64); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider reducing it to: `((x as i32 + y as i32) as u64)` -error: aborting due to 63 previous errors +error: this operation has no effect + --> tests/ui/identity_op.rs:321:5 + | +LL | 0usize + &::default(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider reducing it to: `::default()` + +error: this operation has no effect + --> tests/ui/identity_op.rs:324:13 + | +LL | let _ = 0usize + &usize::default(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider reducing it to: `usize::default()` + +error: this operation has no effect + --> tests/ui/identity_op.rs:327:21 + | +LL | let _n: usize = 0usize + &Default::default(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider reducing it to: `Default::default()` + +error: this operation has no effect + --> tests/ui/identity_op.rs:333:5 + | +LL | 0usize + &Default::default() + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider reducing it to: `Default::default()` + +error: this operation has no effect + --> tests/ui/identity_op.rs:352:5 + | +LL | 0usize + &::def(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider reducing it to: `::def()` + +error: this operation has no effect + --> tests/ui/identity_op.rs:355:13 + | +LL | let _ = 0usize + &usize::def(); + | ^^^^^^^^^^^^^^^^^^^^^^ help: consider reducing it to: `usize::def()` + +error: this operation has no effect + --> tests/ui/identity_op.rs:358:21 + | +LL | let _n: usize = 0usize + &Def::def(); + | ^^^^^^^^^^^^^^^^^^^^ help: consider reducing it to: `Def::def()` + +error: aborting due to 70 previous errors diff --git a/src/tools/clippy/tests/ui/infinite_iter.rs b/src/tools/clippy/tests/ui/infinite_iter.rs index 002a791a6579..701a86534ba0 100644 --- a/src/tools/clippy/tests/ui/infinite_iter.rs +++ b/src/tools/clippy/tests/ui/infinite_iter.rs @@ -38,7 +38,7 @@ fn infinite_iters() { //~^ infinite_iter // infinite iter - (0_u64..).filter(|x| x % 2 == 0).last(); + (0_u64..).filter(|x| x.is_multiple_of(2)).last(); //~^ infinite_iter // not an infinite, because ranges are double-ended diff --git a/src/tools/clippy/tests/ui/infinite_iter.stderr b/src/tools/clippy/tests/ui/infinite_iter.stderr index 47133a2ea62e..b9e7c008f93e 100644 --- a/src/tools/clippy/tests/ui/infinite_iter.stderr +++ b/src/tools/clippy/tests/ui/infinite_iter.stderr @@ -42,8 +42,8 @@ LL | (0_usize..).flat_map(|x| 0..x).product::(); error: infinite iteration detected --> tests/ui/infinite_iter.rs:41:5 | -LL | (0_u64..).filter(|x| x % 2 == 0).last(); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | (0_u64..).filter(|x| x.is_multiple_of(2)).last(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: possible infinite iteration detected --> tests/ui/infinite_iter.rs:53:5 diff --git a/src/tools/clippy/tests/ui/iter_kv_map.fixed b/src/tools/clippy/tests/ui/iter_kv_map.fixed index 874f749b33d0..b18dda358877 100644 --- a/src/tools/clippy/tests/ui/iter_kv_map.fixed +++ b/src/tools/clippy/tests/ui/iter_kv_map.fixed @@ -30,15 +30,19 @@ fn main() { let _ = map.clone().values().collect::>(); //~^ iter_kv_map - let _ = map.keys().filter(|x| *x % 2 == 0).count(); + let _ = map.keys().filter(|x| x.is_multiple_of(2)).count(); //~^ iter_kv_map // Don't lint - let _ = map.iter().filter(|(_, val)| *val % 2 == 0).map(|(key, _)| key).count(); + let _ = map + .iter() + .filter(|(_, val)| val.is_multiple_of(2)) + .map(|(key, _)| key) + .count(); let _ = map.iter().map(get_key).collect::>(); // Linting the following could be an improvement to the lint - // map.iter().filter_map(|(_, val)| (val % 2 == 0).then(val * 17)).count(); + // map.iter().filter_map(|(_, val)| (val.is_multiple_of(2)).then(val * 17)).count(); // Lint let _ = map.keys().map(|key| key * 9).count(); @@ -84,15 +88,19 @@ fn main() { let _ = map.clone().values().collect::>(); //~^ iter_kv_map - let _ = map.keys().filter(|x| *x % 2 == 0).count(); + let _ = map.keys().filter(|x| x.is_multiple_of(2)).count(); //~^ iter_kv_map // Don't lint - let _ = map.iter().filter(|(_, val)| *val % 2 == 0).map(|(key, _)| key).count(); + let _ = map + .iter() + .filter(|(_, val)| val.is_multiple_of(2)) + .map(|(key, _)| key) + .count(); let _ = map.iter().map(get_key).collect::>(); // Linting the following could be an improvement to the lint - // map.iter().filter_map(|(_, val)| (val % 2 == 0).then(val * 17)).count(); + // map.iter().filter_map(|(_, val)| (val.is_multiple_of(2)).then(val * 17)).count(); // Lint let _ = map.keys().map(|key| key * 9).count(); diff --git a/src/tools/clippy/tests/ui/iter_kv_map.rs b/src/tools/clippy/tests/ui/iter_kv_map.rs index f570e3c32cb6..729e4e8a266c 100644 --- a/src/tools/clippy/tests/ui/iter_kv_map.rs +++ b/src/tools/clippy/tests/ui/iter_kv_map.rs @@ -30,15 +30,19 @@ fn main() { let _ = map.clone().iter().map(|(_, val)| val).collect::>(); //~^ iter_kv_map - let _ = map.iter().map(|(key, _)| key).filter(|x| *x % 2 == 0).count(); + let _ = map.iter().map(|(key, _)| key).filter(|x| x.is_multiple_of(2)).count(); //~^ iter_kv_map // Don't lint - let _ = map.iter().filter(|(_, val)| *val % 2 == 0).map(|(key, _)| key).count(); + let _ = map + .iter() + .filter(|(_, val)| val.is_multiple_of(2)) + .map(|(key, _)| key) + .count(); let _ = map.iter().map(get_key).collect::>(); // Linting the following could be an improvement to the lint - // map.iter().filter_map(|(_, val)| (val % 2 == 0).then(val * 17)).count(); + // map.iter().filter_map(|(_, val)| (val.is_multiple_of(2)).then(val * 17)).count(); // Lint let _ = map.iter().map(|(key, _value)| key * 9).count(); @@ -86,15 +90,19 @@ fn main() { let _ = map.clone().iter().map(|(_, val)| val).collect::>(); //~^ iter_kv_map - let _ = map.iter().map(|(key, _)| key).filter(|x| *x % 2 == 0).count(); + let _ = map.iter().map(|(key, _)| key).filter(|x| x.is_multiple_of(2)).count(); //~^ iter_kv_map // Don't lint - let _ = map.iter().filter(|(_, val)| *val % 2 == 0).map(|(key, _)| key).count(); + let _ = map + .iter() + .filter(|(_, val)| val.is_multiple_of(2)) + .map(|(key, _)| key) + .count(); let _ = map.iter().map(get_key).collect::>(); // Linting the following could be an improvement to the lint - // map.iter().filter_map(|(_, val)| (val % 2 == 0).then(val * 17)).count(); + // map.iter().filter_map(|(_, val)| (val.is_multiple_of(2)).then(val * 17)).count(); // Lint let _ = map.iter().map(|(key, _value)| key * 9).count(); diff --git a/src/tools/clippy/tests/ui/iter_kv_map.stderr b/src/tools/clippy/tests/ui/iter_kv_map.stderr index 31ee76c25b7a..8f73541f5033 100644 --- a/src/tools/clippy/tests/ui/iter_kv_map.stderr +++ b/src/tools/clippy/tests/ui/iter_kv_map.stderr @@ -52,29 +52,29 @@ LL | let _ = map.clone().iter().map(|(_, val)| val).collect::>(); error: iterating on a map's keys --> tests/ui/iter_kv_map.rs:33:13 | -LL | let _ = map.iter().map(|(key, _)| key).filter(|x| *x % 2 == 0).count(); +LL | let _ = map.iter().map(|(key, _)| key).filter(|x| x.is_multiple_of(2)).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.keys()` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:44:13 + --> tests/ui/iter_kv_map.rs:48:13 | LL | let _ = map.iter().map(|(key, _value)| key * 9).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.keys().map(|key| key * 9)` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:46:13 + --> tests/ui/iter_kv_map.rs:50:13 | LL | let _ = map.iter().map(|(_key, value)| value * 17).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.values().map(|value| value * 17)` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:50:13 + --> tests/ui/iter_kv_map.rs:54:13 | LL | let _ = map.clone().into_iter().map(|(_, ref val)| ref_acceptor(val)).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_values().map(|ref val| ref_acceptor(val))` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:54:13 + --> tests/ui/iter_kv_map.rs:58:13 | LL | let _ = map | _____________^ @@ -97,85 +97,85 @@ LL + }) | error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:65:13 + --> tests/ui/iter_kv_map.rs:69:13 | LL | let _ = map.clone().into_iter().map(|(_, mut val)| val).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_values()` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:70:13 + --> tests/ui/iter_kv_map.rs:74:13 | LL | let _ = map.iter().map(|(key, _)| key).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.keys()` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:72:13 + --> tests/ui/iter_kv_map.rs:76:13 | LL | let _ = map.iter().map(|(_, value)| value).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.values()` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:74:13 + --> tests/ui/iter_kv_map.rs:78:13 | LL | let _ = map.iter().map(|(_, v)| v + 2).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.values().map(|v| v + 2)` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:77:13 + --> tests/ui/iter_kv_map.rs:81:13 | LL | let _ = map.clone().into_iter().map(|(key, _)| key).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_keys()` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:79:13 + --> tests/ui/iter_kv_map.rs:83:13 | LL | let _ = map.clone().into_iter().map(|(key, _)| key + 2).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_keys().map(|key| key + 2)` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:82:13 + --> tests/ui/iter_kv_map.rs:86:13 | LL | let _ = map.clone().into_iter().map(|(_, val)| val).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_values()` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:84:13 + --> tests/ui/iter_kv_map.rs:88:13 | LL | let _ = map.clone().into_iter().map(|(_, val)| val + 2).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_values().map(|val| val + 2)` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:87:13 + --> tests/ui/iter_kv_map.rs:91:13 | LL | let _ = map.clone().iter().map(|(_, val)| val).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().values()` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:89:13 + --> tests/ui/iter_kv_map.rs:93:13 | -LL | let _ = map.iter().map(|(key, _)| key).filter(|x| *x % 2 == 0).count(); +LL | let _ = map.iter().map(|(key, _)| key).filter(|x| x.is_multiple_of(2)).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.keys()` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:100:13 + --> tests/ui/iter_kv_map.rs:108:13 | LL | let _ = map.iter().map(|(key, _value)| key * 9).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.keys().map(|key| key * 9)` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:102:13 + --> tests/ui/iter_kv_map.rs:110:13 | LL | let _ = map.iter().map(|(_key, value)| value * 17).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.values().map(|value| value * 17)` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:106:13 + --> tests/ui/iter_kv_map.rs:114:13 | LL | let _ = map.clone().into_iter().map(|(_, ref val)| ref_acceptor(val)).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_values().map(|ref val| ref_acceptor(val))` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:110:13 + --> tests/ui/iter_kv_map.rs:118:13 | LL | let _ = map | _____________^ @@ -198,73 +198,73 @@ LL + }) | error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:121:13 + --> tests/ui/iter_kv_map.rs:129:13 | LL | let _ = map.clone().into_iter().map(|(_, mut val)| val).count(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_values()` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:137:13 + --> tests/ui/iter_kv_map.rs:145:13 | LL | let _ = map.iter().map(|(key, _)| key).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.keys()` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:140:13 + --> tests/ui/iter_kv_map.rs:148:13 | LL | let _ = map.iter().map(|(_, value)| value).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.values()` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:143:13 + --> tests/ui/iter_kv_map.rs:151:13 | LL | let _ = map.iter().map(|(_, v)| v + 2).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.values().map(|v| v + 2)` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:152:13 + --> tests/ui/iter_kv_map.rs:160:13 | LL | let _ = map.clone().into_iter().map(|(key, _)| key).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_keys()` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:155:13 + --> tests/ui/iter_kv_map.rs:163:13 | LL | let _ = map.clone().into_iter().map(|(key, _)| key + 2).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_keys().map(|key| key + 2)` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:158:13 + --> tests/ui/iter_kv_map.rs:166:13 | LL | let _ = map.clone().into_iter().map(|(_, val)| val).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_values()` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:161:13 + --> tests/ui/iter_kv_map.rs:169:13 | LL | let _ = map.clone().into_iter().map(|(_, val)| val + 2).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.clone().into_values().map(|val| val + 2)` error: iterating on a map's keys - --> tests/ui/iter_kv_map.rs:164:13 + --> tests/ui/iter_kv_map.rs:172:13 | LL | let _ = map.iter().map(|(key, _)| key).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.keys()` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:167:13 + --> tests/ui/iter_kv_map.rs:175:13 | LL | let _ = map.iter().map(|(_, value)| value).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.values()` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:170:13 + --> tests/ui/iter_kv_map.rs:178:13 | LL | let _ = map.iter().map(|(_, v)| v + 2).collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.values().map(|v| v + 2)` error: iterating on a map's values - --> tests/ui/iter_kv_map.rs:185:13 + --> tests/ui/iter_kv_map.rs:193:13 | LL | let _ = map.as_ref().iter().map(|(_, v)| v).copied().collect::>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `map.as_ref().values()` diff --git a/src/tools/clippy/tests/ui/let_unit.fixed b/src/tools/clippy/tests/ui/let_unit.fixed index 5e7a2ad37a84..304eacecd942 100644 --- a/src/tools/clippy/tests/ui/let_unit.fixed +++ b/src/tools/clippy/tests/ui/let_unit.fixed @@ -61,7 +61,7 @@ fn multiline_sugg() { //~^ let_unit_value .into_iter() .map(|i| i * 2) - .filter(|i| i % 2 == 0) + .filter(|i| i.is_multiple_of(2)) .map(|_| ()) .next() .unwrap(); diff --git a/src/tools/clippy/tests/ui/let_unit.rs b/src/tools/clippy/tests/ui/let_unit.rs index 7b06f6940121..a02cb346ff99 100644 --- a/src/tools/clippy/tests/ui/let_unit.rs +++ b/src/tools/clippy/tests/ui/let_unit.rs @@ -61,7 +61,7 @@ fn multiline_sugg() { //~^ let_unit_value .into_iter() .map(|i| i * 2) - .filter(|i| i % 2 == 0) + .filter(|i| i.is_multiple_of(2)) .map(|_| ()) .next() .unwrap(); diff --git a/src/tools/clippy/tests/ui/let_unit.stderr b/src/tools/clippy/tests/ui/let_unit.stderr index d7d01d304cad..d743110c99dd 100644 --- a/src/tools/clippy/tests/ui/let_unit.stderr +++ b/src/tools/clippy/tests/ui/let_unit.stderr @@ -25,7 +25,7 @@ LL ~ v LL + LL + .into_iter() LL + .map(|i| i * 2) -LL + .filter(|i| i % 2 == 0) +LL + .filter(|i| i.is_multiple_of(2)) LL + .map(|_| ()) LL + .next() LL + .unwrap(); diff --git a/src/tools/clippy/tests/ui/manual_contains.fixed b/src/tools/clippy/tests/ui/manual_contains.fixed index d26c948a7817..18171f0b2b40 100644 --- a/src/tools/clippy/tests/ui/manual_contains.fixed +++ b/src/tools/clippy/tests/ui/manual_contains.fixed @@ -58,7 +58,7 @@ fn should_not_lint() { let vec: Vec = vec![1, 2, 3, 4, 5, 6]; let values = &vec[..]; - let _ = values.iter().any(|&v| v % 2 == 0); + let _ = values.iter().any(|&v| v.is_multiple_of(2)); let _ = values.iter().any(|&v| v * 2 == 6); let _ = values.iter().any(|&v| v == v); let _ = values.iter().any(|&v| 4 == 4); diff --git a/src/tools/clippy/tests/ui/manual_contains.rs b/src/tools/clippy/tests/ui/manual_contains.rs index fe67d2ee5d5c..918f4d6b8dd7 100644 --- a/src/tools/clippy/tests/ui/manual_contains.rs +++ b/src/tools/clippy/tests/ui/manual_contains.rs @@ -58,7 +58,7 @@ fn should_not_lint() { let vec: Vec = vec![1, 2, 3, 4, 5, 6]; let values = &vec[..]; - let _ = values.iter().any(|&v| v % 2 == 0); + let _ = values.iter().any(|&v| v.is_multiple_of(2)); let _ = values.iter().any(|&v| v * 2 == 6); let _ = values.iter().any(|&v| v == v); let _ = values.iter().any(|&v| 4 == 4); diff --git a/src/tools/clippy/tests/ui/manual_find_fixable.fixed b/src/tools/clippy/tests/ui/manual_find_fixable.fixed index 01b3ebacbebc..c69b0cb11e3c 100644 --- a/src/tools/clippy/tests/ui/manual_find_fixable.fixed +++ b/src/tools/clippy/tests/ui/manual_find_fixable.fixed @@ -11,7 +11,7 @@ fn lookup(n: u32) -> Option { } fn with_pat(arr: Vec<(u32, u32)>) -> Option { - arr.into_iter().map(|(a, _)| a).find(|&a| a % 2 == 0) + arr.into_iter().map(|(a, _)| a).find(|&a| a.is_multiple_of(2)) } struct Data { @@ -63,7 +63,7 @@ fn with_side_effects(arr: Vec) -> Option { fn with_else(arr: Vec) -> Option { for el in arr { - if el % 2 == 0 { + if el.is_multiple_of(2) { return Some(el); } else { println!("{}", el); diff --git a/src/tools/clippy/tests/ui/manual_find_fixable.rs b/src/tools/clippy/tests/ui/manual_find_fixable.rs index ce62a4beba1c..db7092f020c1 100644 --- a/src/tools/clippy/tests/ui/manual_find_fixable.rs +++ b/src/tools/clippy/tests/ui/manual_find_fixable.rs @@ -19,7 +19,7 @@ fn lookup(n: u32) -> Option { fn with_pat(arr: Vec<(u32, u32)>) -> Option { for (a, _) in arr { //~^ manual_find - if a % 2 == 0 { + if a.is_multiple_of(2) { return Some(a); } } @@ -111,7 +111,7 @@ fn with_side_effects(arr: Vec) -> Option { fn with_else(arr: Vec) -> Option { for el in arr { - if el % 2 == 0 { + if el.is_multiple_of(2) { return Some(el); } else { println!("{}", el); diff --git a/src/tools/clippy/tests/ui/manual_find_fixable.stderr b/src/tools/clippy/tests/ui/manual_find_fixable.stderr index 020635d90bb5..0c05c0d2c440 100644 --- a/src/tools/clippy/tests/ui/manual_find_fixable.stderr +++ b/src/tools/clippy/tests/ui/manual_find_fixable.stderr @@ -17,11 +17,11 @@ error: manual implementation of `Iterator::find` | LL | / for (a, _) in arr { LL | | -LL | | if a % 2 == 0 { +LL | | if a.is_multiple_of(2) { LL | | return Some(a); ... | LL | | None - | |________^ help: replace with an iterator: `arr.into_iter().map(|(a, _)| a).find(|&a| a % 2 == 0)` + | |________^ help: replace with an iterator: `arr.into_iter().map(|(a, _)| a).find(|&a| a.is_multiple_of(2))` error: manual implementation of `Iterator::find` --> tests/ui/manual_find_fixable.rs:34:5 diff --git a/src/tools/clippy/tests/ui/manual_is_multiple_of.fixed b/src/tools/clippy/tests/ui/manual_is_multiple_of.fixed new file mode 100644 index 000000000000..6735b99f298c --- /dev/null +++ b/src/tools/clippy/tests/ui/manual_is_multiple_of.fixed @@ -0,0 +1,25 @@ +//@aux-build: proc_macros.rs +#![warn(clippy::manual_is_multiple_of)] + +fn main() {} + +#[clippy::msrv = "1.87"] +fn f(a: u64, b: u64) { + let _ = a.is_multiple_of(b); //~ manual_is_multiple_of + let _ = (a + 1).is_multiple_of(b + 1); //~ manual_is_multiple_of + let _ = !a.is_multiple_of(b); //~ manual_is_multiple_of + let _ = !(a + 1).is_multiple_of(b + 1); //~ manual_is_multiple_of + + let _ = !a.is_multiple_of(b); //~ manual_is_multiple_of + let _ = !a.is_multiple_of(b); //~ manual_is_multiple_of + + proc_macros::external! { + let a: u64 = 23424; + let _ = a % 4096 == 0; + } +} + +#[clippy::msrv = "1.86"] +fn g(a: u64, b: u64) { + let _ = a % b == 0; +} diff --git a/src/tools/clippy/tests/ui/manual_is_multiple_of.rs b/src/tools/clippy/tests/ui/manual_is_multiple_of.rs new file mode 100644 index 000000000000..00b638e4fd9f --- /dev/null +++ b/src/tools/clippy/tests/ui/manual_is_multiple_of.rs @@ -0,0 +1,25 @@ +//@aux-build: proc_macros.rs +#![warn(clippy::manual_is_multiple_of)] + +fn main() {} + +#[clippy::msrv = "1.87"] +fn f(a: u64, b: u64) { + let _ = a % b == 0; //~ manual_is_multiple_of + let _ = (a + 1) % (b + 1) == 0; //~ manual_is_multiple_of + let _ = a % b != 0; //~ manual_is_multiple_of + let _ = (a + 1) % (b + 1) != 0; //~ manual_is_multiple_of + + let _ = a % b > 0; //~ manual_is_multiple_of + let _ = 0 < a % b; //~ manual_is_multiple_of + + proc_macros::external! { + let a: u64 = 23424; + let _ = a % 4096 == 0; + } +} + +#[clippy::msrv = "1.86"] +fn g(a: u64, b: u64) { + let _ = a % b == 0; +} diff --git a/src/tools/clippy/tests/ui/manual_is_multiple_of.stderr b/src/tools/clippy/tests/ui/manual_is_multiple_of.stderr new file mode 100644 index 000000000000..0b1ae70c2a70 --- /dev/null +++ b/src/tools/clippy/tests/ui/manual_is_multiple_of.stderr @@ -0,0 +1,41 @@ +error: manual implementation of `.is_multiple_of()` + --> tests/ui/manual_is_multiple_of.rs:8:13 + | +LL | let _ = a % b == 0; + | ^^^^^^^^^^ help: replace with: `a.is_multiple_of(b)` + | + = note: `-D clippy::manual-is-multiple-of` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::manual_is_multiple_of)]` + +error: manual implementation of `.is_multiple_of()` + --> tests/ui/manual_is_multiple_of.rs:9:13 + | +LL | let _ = (a + 1) % (b + 1) == 0; + | ^^^^^^^^^^^^^^^^^^^^^^ help: replace with: `(a + 1).is_multiple_of(b + 1)` + +error: manual implementation of `.is_multiple_of()` + --> tests/ui/manual_is_multiple_of.rs:10:13 + | +LL | let _ = a % b != 0; + | ^^^^^^^^^^ help: replace with: `!a.is_multiple_of(b)` + +error: manual implementation of `.is_multiple_of()` + --> tests/ui/manual_is_multiple_of.rs:11:13 + | +LL | let _ = (a + 1) % (b + 1) != 0; + | ^^^^^^^^^^^^^^^^^^^^^^ help: replace with: `!(a + 1).is_multiple_of(b + 1)` + +error: manual implementation of `.is_multiple_of()` + --> tests/ui/manual_is_multiple_of.rs:13:13 + | +LL | let _ = a % b > 0; + | ^^^^^^^^^ help: replace with: `!a.is_multiple_of(b)` + +error: manual implementation of `.is_multiple_of()` + --> tests/ui/manual_is_multiple_of.rs:14:13 + | +LL | let _ = 0 < a % b; + | ^^^^^^^^^ help: replace with: `!a.is_multiple_of(b)` + +error: aborting due to 6 previous errors + diff --git a/src/tools/clippy/tests/ui/manual_is_variant_and.fixed b/src/tools/clippy/tests/ui/manual_is_variant_and.fixed index 18a72188ab59..6425f32c09c4 100644 --- a/src/tools/clippy/tests/ui/manual_is_variant_and.fixed +++ b/src/tools/clippy/tests/ui/manual_is_variant_and.fixed @@ -77,7 +77,7 @@ fn option_methods() { let _ = opt_map!(opt2, |x| x == 'a').unwrap_or_default(); // should not lint // Should not lint. - let _ = Foo::(0).map(|x| x % 2 == 0) == Some(true); + let _ = Foo::(0).map(|x| x.is_multiple_of(2)) == Some(true); let _ = Some(2).map(|x| x % 2 == 0) != foo(); let _ = mac!(eq Some(2).map(|x| x % 2 == 0), Some(true)); let _ = mac!(some 2).map(|x| x % 2 == 0) == Some(true); @@ -96,11 +96,11 @@ fn result_methods() { }); let _ = res.is_ok_and(|x| x > 1); - let _ = Ok::(2).is_ok_and(|x| x % 2 == 0); + let _ = Ok::(2).is_ok_and(|x| x.is_multiple_of(2)); //~^ manual_is_variant_and - let _ = !Ok::(2).is_ok_and(|x| x % 2 == 0); + let _ = !Ok::(2).is_ok_and(|x| x.is_multiple_of(2)); //~^ manual_is_variant_and - let _ = !Ok::(2).is_ok_and(|x| x % 2 == 0); + let _ = !Ok::(2).is_ok_and(|x| x.is_multiple_of(2)); //~^ manual_is_variant_and // won't fix because the return type of the closure is not `bool` diff --git a/src/tools/clippy/tests/ui/manual_is_variant_and.rs b/src/tools/clippy/tests/ui/manual_is_variant_and.rs index a92f7c043695..e069e97a04dd 100644 --- a/src/tools/clippy/tests/ui/manual_is_variant_and.rs +++ b/src/tools/clippy/tests/ui/manual_is_variant_and.rs @@ -83,7 +83,7 @@ fn option_methods() { let _ = opt_map!(opt2, |x| x == 'a').unwrap_or_default(); // should not lint // Should not lint. - let _ = Foo::(0).map(|x| x % 2 == 0) == Some(true); + let _ = Foo::(0).map(|x| x.is_multiple_of(2)) == Some(true); let _ = Some(2).map(|x| x % 2 == 0) != foo(); let _ = mac!(eq Some(2).map(|x| x % 2 == 0), Some(true)); let _ = mac!(some 2).map(|x| x % 2 == 0) == Some(true); @@ -105,11 +105,11 @@ fn result_methods() { //~^ manual_is_variant_and .unwrap_or_default(); - let _ = Ok::(2).map(|x| x % 2 == 0) == Ok(true); + let _ = Ok::(2).map(|x| x.is_multiple_of(2)) == Ok(true); //~^ manual_is_variant_and - let _ = Ok::(2).map(|x| x % 2 == 0) != Ok(true); + let _ = Ok::(2).map(|x| x.is_multiple_of(2)) != Ok(true); //~^ manual_is_variant_and - let _ = Ok::(2).map(|x| x % 2 == 0) != Ok(true); + let _ = Ok::(2).map(|x| x.is_multiple_of(2)) != Ok(true); //~^ manual_is_variant_and // won't fix because the return type of the closure is not `bool` diff --git a/src/tools/clippy/tests/ui/manual_is_variant_and.stderr b/src/tools/clippy/tests/ui/manual_is_variant_and.stderr index 1fb437a8bc74..f770319a2681 100644 --- a/src/tools/clippy/tests/ui/manual_is_variant_and.stderr +++ b/src/tools/clippy/tests/ui/manual_is_variant_and.stderr @@ -105,20 +105,20 @@ LL | | .unwrap_or_default(); error: called `.map() == Ok()` --> tests/ui/manual_is_variant_and.rs:108:13 | -LL | let _ = Ok::(2).map(|x| x % 2 == 0) == Ok(true); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: use: `Ok::(2).is_ok_and(|x| x % 2 == 0)` +LL | let _ = Ok::(2).map(|x| x.is_multiple_of(2)) == Ok(true); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: use: `Ok::(2).is_ok_and(|x| x.is_multiple_of(2))` error: called `.map() != Ok()` --> tests/ui/manual_is_variant_and.rs:110:13 | -LL | let _ = Ok::(2).map(|x| x % 2 == 0) != Ok(true); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: use: `!Ok::(2).is_ok_and(|x| x % 2 == 0)` +LL | let _ = Ok::(2).map(|x| x.is_multiple_of(2)) != Ok(true); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: use: `!Ok::(2).is_ok_and(|x| x.is_multiple_of(2))` error: called `.map() != Ok()` --> tests/ui/manual_is_variant_and.rs:112:13 | -LL | let _ = Ok::(2).map(|x| x % 2 == 0) != Ok(true); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: use: `!Ok::(2).is_ok_and(|x| x % 2 == 0)` +LL | let _ = Ok::(2).map(|x| x.is_multiple_of(2)) != Ok(true); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: use: `!Ok::(2).is_ok_and(|x| x.is_multiple_of(2))` error: called `map().unwrap_or_default()` on a `Result` value --> tests/ui/manual_is_variant_and.rs:119:18 diff --git a/src/tools/clippy/tests/ui/manual_ok_err.fixed b/src/tools/clippy/tests/ui/manual_ok_err.fixed index e6f799aa58d6..9b70ce0df43a 100644 --- a/src/tools/clippy/tests/ui/manual_ok_err.fixed +++ b/src/tools/clippy/tests/ui/manual_ok_err.fixed @@ -103,3 +103,27 @@ fn issue14239() { }; //~^^^^^ manual_ok_err } + +mod issue15051 { + struct Container { + field: Result, + } + + #[allow(clippy::needless_borrow)] + fn with_addr_of(x: &Container) -> Option<&bool> { + (&x.field).as_ref().ok() + } + + fn from_fn(x: &Container) -> Option<&bool> { + let result_with_ref = || &x.field; + result_with_ref().as_ref().ok() + } + + fn result_with_ref_mut(x: &mut Container) -> &mut Result { + &mut x.field + } + + fn from_fn_mut(x: &mut Container) -> Option<&mut bool> { + result_with_ref_mut(x).as_mut().ok() + } +} diff --git a/src/tools/clippy/tests/ui/manual_ok_err.rs b/src/tools/clippy/tests/ui/manual_ok_err.rs index 972b2c41ee7a..dee904638245 100644 --- a/src/tools/clippy/tests/ui/manual_ok_err.rs +++ b/src/tools/clippy/tests/ui/manual_ok_err.rs @@ -141,3 +141,39 @@ fn issue14239() { }; //~^^^^^ manual_ok_err } + +mod issue15051 { + struct Container { + field: Result, + } + + #[allow(clippy::needless_borrow)] + fn with_addr_of(x: &Container) -> Option<&bool> { + match &x.field { + //~^ manual_ok_err + Ok(panel) => Some(panel), + Err(_) => None, + } + } + + fn from_fn(x: &Container) -> Option<&bool> { + let result_with_ref = || &x.field; + match result_with_ref() { + //~^ manual_ok_err + Ok(panel) => Some(panel), + Err(_) => None, + } + } + + fn result_with_ref_mut(x: &mut Container) -> &mut Result { + &mut x.field + } + + fn from_fn_mut(x: &mut Container) -> Option<&mut bool> { + match result_with_ref_mut(x) { + //~^ manual_ok_err + Ok(panel) => Some(panel), + Err(_) => None, + } + } +} diff --git a/src/tools/clippy/tests/ui/manual_ok_err.stderr b/src/tools/clippy/tests/ui/manual_ok_err.stderr index 040e170f397e..448fbffc0509 100644 --- a/src/tools/clippy/tests/ui/manual_ok_err.stderr +++ b/src/tools/clippy/tests/ui/manual_ok_err.stderr @@ -111,5 +111,35 @@ LL + "1".parse::().ok() LL ~ }; | -error: aborting due to 9 previous errors +error: manual implementation of `ok` + --> tests/ui/manual_ok_err.rs:152:9 + | +LL | / match &x.field { +LL | | +LL | | Ok(panel) => Some(panel), +LL | | Err(_) => None, +LL | | } + | |_________^ help: replace with: `(&x.field).as_ref().ok()` + +error: manual implementation of `ok` + --> tests/ui/manual_ok_err.rs:161:9 + | +LL | / match result_with_ref() { +LL | | +LL | | Ok(panel) => Some(panel), +LL | | Err(_) => None, +LL | | } + | |_________^ help: replace with: `result_with_ref().as_ref().ok()` + +error: manual implementation of `ok` + --> tests/ui/manual_ok_err.rs:173:9 + | +LL | / match result_with_ref_mut(x) { +LL | | +LL | | Ok(panel) => Some(panel), +LL | | Err(_) => None, +LL | | } + | |_________^ help: replace with: `result_with_ref_mut(x).as_mut().ok()` + +error: aborting due to 12 previous errors diff --git a/src/tools/clippy/tests/ui/missing_const_for_fn/const_trait.fixed b/src/tools/clippy/tests/ui/missing_const_for_fn/const_trait.fixed index 7e0d4fccaae2..f1d5579a7230 100644 --- a/src/tools/clippy/tests/ui/missing_const_for_fn/const_trait.fixed +++ b/src/tools/clippy/tests/ui/missing_const_for_fn/const_trait.fixed @@ -25,7 +25,7 @@ const fn can_be_const() { 0u64.method(); } -// False negative, see FIXME comment in `clipy_utils::qualify_min_const` +// False negative, see FIXME comment in `clippy_utils::qualify_min_const_fn` fn could_be_const_but_does_not_trigger(t: T) where T: const ConstTrait, diff --git a/src/tools/clippy/tests/ui/missing_const_for_fn/const_trait.rs b/src/tools/clippy/tests/ui/missing_const_for_fn/const_trait.rs index 439da4622d7e..d495759526d3 100644 --- a/src/tools/clippy/tests/ui/missing_const_for_fn/const_trait.rs +++ b/src/tools/clippy/tests/ui/missing_const_for_fn/const_trait.rs @@ -25,7 +25,7 @@ fn can_be_const() { 0u64.method(); } -// False negative, see FIXME comment in `clipy_utils::qualify_min_const` +// False negative, see FIXME comment in `clippy_utils::qualify_min_const_fn` fn could_be_const_but_does_not_trigger(t: T) where T: const ConstTrait, diff --git a/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.fixed b/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.fixed index 65eb2d5938b6..95bf63ed1df6 100644 --- a/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.fixed +++ b/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.fixed @@ -221,3 +221,60 @@ const fn mut_add(x: &mut i32) { //~^ missing_const_for_fn *x += 1; } + +mod issue_15079 { + pub trait Trait {} + + pub struct Struct { + _t: Option, + } + + impl Struct { + #[clippy::msrv = "1.60"] + pub fn new_1_60() -> Self { + Self { _t: None } + } + + #[clippy::msrv = "1.61"] + pub const fn new_1_61() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + } + + pub struct S2 { + _t: Option, + } + + impl S2 { + #[clippy::msrv = "1.60"] + pub const fn new_1_60() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + + #[clippy::msrv = "1.61"] + pub const fn new_1_61() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + } + + pub struct S3 { + _t: Option<&'static T>, + } + + impl S3 { + #[clippy::msrv = "1.60"] + pub const fn new_1_60() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + + #[clippy::msrv = "1.61"] + pub const fn new_1_61() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + } +} diff --git a/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.rs b/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.rs index 3690d2f799ff..8290be675462 100644 --- a/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.rs +++ b/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.rs @@ -221,3 +221,60 @@ fn mut_add(x: &mut i32) { //~^ missing_const_for_fn *x += 1; } + +mod issue_15079 { + pub trait Trait {} + + pub struct Struct { + _t: Option, + } + + impl Struct { + #[clippy::msrv = "1.60"] + pub fn new_1_60() -> Self { + Self { _t: None } + } + + #[clippy::msrv = "1.61"] + pub fn new_1_61() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + } + + pub struct S2 { + _t: Option, + } + + impl S2 { + #[clippy::msrv = "1.60"] + pub fn new_1_60() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + + #[clippy::msrv = "1.61"] + pub fn new_1_61() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + } + + pub struct S3 { + _t: Option<&'static T>, + } + + impl S3 { + #[clippy::msrv = "1.60"] + pub fn new_1_60() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + + #[clippy::msrv = "1.61"] + pub fn new_1_61() -> Self { + //~^ missing_const_for_fn + Self { _t: None } + } + } +} diff --git a/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.stderr b/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.stderr index 10e07d12f5a4..17cbc4312766 100644 --- a/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.stderr +++ b/src/tools/clippy/tests/ui/missing_const_for_fn/could_be_const.stderr @@ -332,5 +332,75 @@ help: make the function `const` LL | const fn mut_add(x: &mut i32) { | +++++ -error: aborting due to 25 previous errors +error: this could be a `const fn` + --> tests/ui/missing_const_for_fn/could_be_const.rs:239:9 + | +LL | / pub fn new_1_61() -> Self { +LL | | +LL | | Self { _t: None } +LL | | } + | |_________^ + | +help: make the function `const` + | +LL | pub const fn new_1_61() -> Self { + | +++++ + +error: this could be a `const fn` + --> tests/ui/missing_const_for_fn/could_be_const.rs:251:9 + | +LL | / pub fn new_1_60() -> Self { +LL | | +LL | | Self { _t: None } +LL | | } + | |_________^ + | +help: make the function `const` + | +LL | pub const fn new_1_60() -> Self { + | +++++ + +error: this could be a `const fn` + --> tests/ui/missing_const_for_fn/could_be_const.rs:257:9 + | +LL | / pub fn new_1_61() -> Self { +LL | | +LL | | Self { _t: None } +LL | | } + | |_________^ + | +help: make the function `const` + | +LL | pub const fn new_1_61() -> Self { + | +++++ + +error: this could be a `const fn` + --> tests/ui/missing_const_for_fn/could_be_const.rs:269:9 + | +LL | / pub fn new_1_60() -> Self { +LL | | +LL | | Self { _t: None } +LL | | } + | |_________^ + | +help: make the function `const` + | +LL | pub const fn new_1_60() -> Self { + | +++++ + +error: this could be a `const fn` + --> tests/ui/missing_const_for_fn/could_be_const.rs:275:9 + | +LL | / pub fn new_1_61() -> Self { +LL | | +LL | | Self { _t: None } +LL | | } + | |_________^ + | +help: make the function `const` + | +LL | pub const fn new_1_61() -> Self { + | +++++ + +error: aborting due to 30 previous errors diff --git a/src/tools/clippy/tests/ui/nonminimal_bool.stderr b/src/tools/clippy/tests/ui/nonminimal_bool.stderr index 0e3e4cf7988e..ecb82a23da03 100644 --- a/src/tools/clippy/tests/ui/nonminimal_bool.stderr +++ b/src/tools/clippy/tests/ui/nonminimal_bool.stderr @@ -179,7 +179,7 @@ error: inequality checks against true can be replaced by a negation --> tests/ui/nonminimal_bool.rs:186:8 | LL | if !b != true {} - | ^^^^^^^^^^ help: try simplifying it as shown: `!(!b)` + | ^^^^^^^^^^ help: try simplifying it as shown: `!!b` error: this boolean expression can be simplified --> tests/ui/nonminimal_bool.rs:189:8 @@ -209,7 +209,7 @@ error: inequality checks against true can be replaced by a negation --> tests/ui/nonminimal_bool.rs:193:8 | LL | if true != !b {} - | ^^^^^^^^^^ help: try simplifying it as shown: `!(!b)` + | ^^^^^^^^^^ help: try simplifying it as shown: `!!b` error: this boolean expression can be simplified --> tests/ui/nonminimal_bool.rs:196:8 diff --git a/src/tools/clippy/tests/ui/or_fun_call.fixed b/src/tools/clippy/tests/ui/or_fun_call.fixed index a1119d75c231..34f3e0468419 100644 --- a/src/tools/clippy/tests/ui/or_fun_call.fixed +++ b/src/tools/clippy/tests/ui/or_fun_call.fixed @@ -5,6 +5,7 @@ clippy::uninlined_format_args, clippy::unnecessary_wraps, clippy::unnecessary_literal_unwrap, + clippy::unnecessary_result_map_or_else, clippy::useless_vec )] @@ -409,4 +410,33 @@ fn fn_call_in_nested_expr() { //~^ or_fun_call } +mod result_map_or { + fn g() -> i32 { + 3 + } + + fn f(n: i32) -> i32 { + n + } + + fn test_map_or() { + let x: Result = Ok(4); + let _ = x.map_or_else(|_| g(), |v| v); + //~^ or_fun_call + let _ = x.map_or_else(|_| g(), f); + //~^ or_fun_call + let _ = x.map_or(0, f); + } +} + +fn test_option_get_or_insert() { + // assume that this is slow call + fn g() -> u8 { + 99 + } + let mut x = Some(42_u8); + let _ = x.get_or_insert_with(g); + //~^ or_fun_call +} + fn main() {} diff --git a/src/tools/clippy/tests/ui/or_fun_call.rs b/src/tools/clippy/tests/ui/or_fun_call.rs index a7cd632bf166..dc57bd6060ac 100644 --- a/src/tools/clippy/tests/ui/or_fun_call.rs +++ b/src/tools/clippy/tests/ui/or_fun_call.rs @@ -5,6 +5,7 @@ clippy::uninlined_format_args, clippy::unnecessary_wraps, clippy::unnecessary_literal_unwrap, + clippy::unnecessary_result_map_or_else, clippy::useless_vec )] @@ -409,4 +410,33 @@ fn fn_call_in_nested_expr() { //~^ or_fun_call } +mod result_map_or { + fn g() -> i32 { + 3 + } + + fn f(n: i32) -> i32 { + n + } + + fn test_map_or() { + let x: Result = Ok(4); + let _ = x.map_or(g(), |v| v); + //~^ or_fun_call + let _ = x.map_or(g(), f); + //~^ or_fun_call + let _ = x.map_or(0, f); + } +} + +fn test_option_get_or_insert() { + // assume that this is slow call + fn g() -> u8 { + 99 + } + let mut x = Some(42_u8); + let _ = x.get_or_insert(g()); + //~^ or_fun_call +} + fn main() {} diff --git a/src/tools/clippy/tests/ui/or_fun_call.stderr b/src/tools/clippy/tests/ui/or_fun_call.stderr index 35bda7e4d331..0f159fe8bff4 100644 --- a/src/tools/clippy/tests/ui/or_fun_call.stderr +++ b/src/tools/clippy/tests/ui/or_fun_call.stderr @@ -1,5 +1,5 @@ error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:52:22 + --> tests/ui/or_fun_call.rs:53:22 | LL | with_constructor.unwrap_or(make()); | ^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(make)` @@ -8,7 +8,7 @@ LL | with_constructor.unwrap_or(make()); = help: to override `-D warnings` add `#[allow(clippy::or_fun_call)]` error: use of `unwrap_or` to construct default value - --> tests/ui/or_fun_call.rs:56:14 + --> tests/ui/or_fun_call.rs:57:14 | LL | with_new.unwrap_or(Vec::new()); | ^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` @@ -17,199 +17,199 @@ LL | with_new.unwrap_or(Vec::new()); = help: to override `-D warnings` add `#[allow(clippy::unwrap_or_default)]` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:60:21 + --> tests/ui/or_fun_call.rs:61:21 | LL | with_const_args.unwrap_or(Vec::with_capacity(12)); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|| Vec::with_capacity(12))` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:64:14 + --> tests/ui/or_fun_call.rs:65:14 | LL | with_err.unwrap_or(make()); | ^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|_| make())` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:68:19 + --> tests/ui/or_fun_call.rs:69:19 | LL | with_err_args.unwrap_or(Vec::with_capacity(12)); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|_| Vec::with_capacity(12))` error: use of `unwrap_or` to construct default value - --> tests/ui/or_fun_call.rs:72:24 + --> tests/ui/or_fun_call.rs:73:24 | LL | with_default_trait.unwrap_or(Default::default()); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: use of `unwrap_or` to construct default value - --> tests/ui/or_fun_call.rs:76:23 + --> tests/ui/or_fun_call.rs:77:23 | LL | with_default_type.unwrap_or(u64::default()); | ^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:80:18 + --> tests/ui/or_fun_call.rs:81:18 | LL | self_default.unwrap_or(::default()); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(::default)` error: use of `unwrap_or` to construct default value - --> tests/ui/or_fun_call.rs:84:18 + --> tests/ui/or_fun_call.rs:85:18 | LL | real_default.unwrap_or(::default()); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: use of `unwrap_or` to construct default value - --> tests/ui/or_fun_call.rs:88:14 + --> tests/ui/or_fun_call.rs:89:14 | LL | with_vec.unwrap_or(vec![]); | ^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:92:21 + --> tests/ui/or_fun_call.rs:93:21 | LL | without_default.unwrap_or(Foo::new()); | ^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(Foo::new)` error: use of `or_insert` to construct default value - --> tests/ui/or_fun_call.rs:96:19 + --> tests/ui/or_fun_call.rs:97:19 | LL | map.entry(42).or_insert(String::new()); | ^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `or_default()` error: use of `or_insert` to construct default value - --> tests/ui/or_fun_call.rs:100:23 + --> tests/ui/or_fun_call.rs:101:23 | LL | map_vec.entry(42).or_insert(vec![]); | ^^^^^^^^^^^^^^^^^ help: try: `or_default()` error: use of `or_insert` to construct default value - --> tests/ui/or_fun_call.rs:104:21 + --> tests/ui/or_fun_call.rs:105:21 | LL | btree.entry(42).or_insert(String::new()); | ^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `or_default()` error: use of `or_insert` to construct default value - --> tests/ui/or_fun_call.rs:108:25 + --> tests/ui/or_fun_call.rs:109:25 | LL | btree_vec.entry(42).or_insert(vec![]); | ^^^^^^^^^^^^^^^^^ help: try: `or_default()` error: use of `unwrap_or` to construct default value - --> tests/ui/or_fun_call.rs:112:21 + --> tests/ui/or_fun_call.rs:113:21 | LL | let _ = stringy.unwrap_or(String::new()); | ^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: function call inside of `ok_or` - --> tests/ui/or_fun_call.rs:117:17 + --> tests/ui/or_fun_call.rs:118:17 | LL | let _ = opt.ok_or(format!("{} world.", hello)); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `ok_or_else(|| format!("{} world.", hello))` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:122:21 + --> tests/ui/or_fun_call.rs:123:21 | LL | let _ = Some(1).unwrap_or(map[&1]); | ^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|| map[&1])` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:125:21 + --> tests/ui/or_fun_call.rs:126:21 | LL | let _ = Some(1).unwrap_or(map[&1]); | ^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|| map[&1])` error: function call inside of `or` - --> tests/ui/or_fun_call.rs:150:35 + --> tests/ui/or_fun_call.rs:151:35 | LL | let _ = Some("a".to_string()).or(Some("b".to_string())); | ^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `or_else(|| Some("b".to_string()))` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:193:18 + --> tests/ui/or_fun_call.rs:194:18 | LL | None.unwrap_or(ptr_to_ref(s)); | ^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|| ptr_to_ref(s))` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:201:14 + --> tests/ui/or_fun_call.rs:202:14 | LL | None.unwrap_or(unsafe { ptr_to_ref(s) }); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|| unsafe { ptr_to_ref(s) })` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:204:14 + --> tests/ui/or_fun_call.rs:205:14 | LL | None.unwrap_or( unsafe { ptr_to_ref(s) } ); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|| unsafe { ptr_to_ref(s) })` error: function call inside of `map_or` - --> tests/ui/or_fun_call.rs:280:25 + --> tests/ui/or_fun_call.rs:281:25 | LL | let _ = Some(4).map_or(g(), |v| v); | ^^^^^^^^^^^^^^^^^^ help: try: `map_or_else(g, |v| v)` error: function call inside of `map_or` - --> tests/ui/or_fun_call.rs:282:25 + --> tests/ui/or_fun_call.rs:283:25 | LL | let _ = Some(4).map_or(g(), f); | ^^^^^^^^^^^^^^ help: try: `map_or_else(g, f)` error: use of `unwrap_or_else` to construct default value - --> tests/ui/or_fun_call.rs:314:18 + --> tests/ui/or_fun_call.rs:315:18 | LL | with_new.unwrap_or_else(Vec::new); | ^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: use of `unwrap_or_else` to construct default value - --> tests/ui/or_fun_call.rs:318:28 + --> tests/ui/or_fun_call.rs:319:28 | LL | with_default_trait.unwrap_or_else(Default::default); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: use of `unwrap_or_else` to construct default value - --> tests/ui/or_fun_call.rs:322:27 + --> tests/ui/or_fun_call.rs:323:27 | LL | with_default_type.unwrap_or_else(u64::default); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: use of `unwrap_or_else` to construct default value - --> tests/ui/or_fun_call.rs:326:22 + --> tests/ui/or_fun_call.rs:327:22 | LL | real_default.unwrap_or_else(::default); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: use of `or_insert_with` to construct default value - --> tests/ui/or_fun_call.rs:330:23 + --> tests/ui/or_fun_call.rs:331:23 | LL | map.entry(42).or_insert_with(String::new); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `or_default()` error: use of `or_insert_with` to construct default value - --> tests/ui/or_fun_call.rs:334:25 + --> tests/ui/or_fun_call.rs:335:25 | LL | btree.entry(42).or_insert_with(String::new); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `or_default()` error: use of `unwrap_or_else` to construct default value - --> tests/ui/or_fun_call.rs:338:25 + --> tests/ui/or_fun_call.rs:339:25 | LL | let _ = stringy.unwrap_or_else(String::new); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:380:17 + --> tests/ui/or_fun_call.rs:381:17 | LL | let _ = opt.unwrap_or({ f() }); // suggest `.unwrap_or_else(f)` | ^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(f)` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:385:17 + --> tests/ui/or_fun_call.rs:386:17 | LL | let _ = opt.unwrap_or(f() + 1); // suggest `.unwrap_or_else(|| f() + 1)` | ^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|| f() + 1)` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:390:17 + --> tests/ui/or_fun_call.rs:391:17 | LL | let _ = opt.unwrap_or({ | _________________^ @@ -229,22 +229,40 @@ LL ~ }); | error: function call inside of `map_or` - --> tests/ui/or_fun_call.rs:396:17 + --> tests/ui/or_fun_call.rs:397:17 | LL | let _ = opt.map_or(f() + 1, |v| v); // suggest `.map_or_else(|| f() + 1, |v| v)` | ^^^^^^^^^^^^^^^^^^^^^^ help: try: `map_or_else(|| f() + 1, |v| v)` error: use of `unwrap_or` to construct default value - --> tests/ui/or_fun_call.rs:401:17 + --> tests/ui/or_fun_call.rs:402:17 | LL | let _ = opt.unwrap_or({ i32::default() }); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_default()` error: function call inside of `unwrap_or` - --> tests/ui/or_fun_call.rs:408:21 + --> tests/ui/or_fun_call.rs:409:21 | LL | let _ = opt_foo.unwrap_or(Foo { val: String::default() }); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `unwrap_or_else(|| Foo { val: String::default() })` -error: aborting due to 38 previous errors +error: function call inside of `map_or` + --> tests/ui/or_fun_call.rs:424:19 + | +LL | let _ = x.map_or(g(), |v| v); + | ^^^^^^^^^^^^^^^^^^ help: try: `map_or_else(|_| g(), |v| v)` + +error: function call inside of `map_or` + --> tests/ui/or_fun_call.rs:426:19 + | +LL | let _ = x.map_or(g(), f); + | ^^^^^^^^^^^^^^ help: try: `map_or_else(|_| g(), f)` + +error: function call inside of `get_or_insert` + --> tests/ui/or_fun_call.rs:438:15 + | +LL | let _ = x.get_or_insert(g()); + | ^^^^^^^^^^^^^^^^^^ help: try: `get_or_insert_with(g)` + +error: aborting due to 41 previous errors diff --git a/src/tools/clippy/tests/ui/question_mark.fixed b/src/tools/clippy/tests/ui/question_mark.fixed index 60dc1c101b6e..8d6f5fbadca5 100644 --- a/src/tools/clippy/tests/ui/question_mark.fixed +++ b/src/tools/clippy/tests/ui/question_mark.fixed @@ -453,3 +453,15 @@ fn const_in_pattern(x: Option<(i32, i32)>) -> Option<()> { None } + +fn issue_13642(x: Option) -> Option<()> { + let Some(x) = x else { + #[cfg(false)] + panic!(); + + #[cfg(true)] + return None; + }; + + None +} diff --git a/src/tools/clippy/tests/ui/question_mark.rs b/src/tools/clippy/tests/ui/question_mark.rs index 99d0122a98fa..f13eee29c113 100644 --- a/src/tools/clippy/tests/ui/question_mark.rs +++ b/src/tools/clippy/tests/ui/question_mark.rs @@ -549,3 +549,15 @@ fn const_in_pattern(x: Option<(i32, i32)>) -> Option<()> { None } + +fn issue_13642(x: Option) -> Option<()> { + let Some(x) = x else { + #[cfg(false)] + panic!(); + + #[cfg(true)] + return None; + }; + + None +} diff --git a/src/tools/clippy/tests/ui/trait_duplication_in_bounds.fixed b/src/tools/clippy/tests/ui/trait_duplication_in_bounds.fixed index 666ff78b2189..cf52ecf2f032 100644 --- a/src/tools/clippy/tests/ui/trait_duplication_in_bounds.fixed +++ b/src/tools/clippy/tests/ui/trait_duplication_in_bounds.fixed @@ -169,9 +169,9 @@ where // #13476 #[const_trait] trait ConstTrait {} -const fn const_trait_bounds_good() {} +const fn const_trait_bounds_good() {} -const fn const_trait_bounds_bad() {} +const fn const_trait_bounds_bad() {} //~^ trait_duplication_in_bounds fn projections() diff --git a/src/tools/clippy/tests/ui/trait_duplication_in_bounds.rs b/src/tools/clippy/tests/ui/trait_duplication_in_bounds.rs index a1a86fe058e6..955562f08dc3 100644 --- a/src/tools/clippy/tests/ui/trait_duplication_in_bounds.rs +++ b/src/tools/clippy/tests/ui/trait_duplication_in_bounds.rs @@ -169,9 +169,9 @@ where // #13476 #[const_trait] trait ConstTrait {} -const fn const_trait_bounds_good() {} +const fn const_trait_bounds_good() {} -const fn const_trait_bounds_bad() {} +const fn const_trait_bounds_bad() {} //~^ trait_duplication_in_bounds fn projections() diff --git a/src/tools/clippy/tests/ui/trait_duplication_in_bounds.stderr b/src/tools/clippy/tests/ui/trait_duplication_in_bounds.stderr index d76b4e458480..ab31721ef515 100644 --- a/src/tools/clippy/tests/ui/trait_duplication_in_bounds.stderr +++ b/src/tools/clippy/tests/ui/trait_duplication_in_bounds.stderr @@ -61,8 +61,8 @@ LL | fn bad_trait_object(arg0: &(dyn Any + Send + Send)) { error: these bounds contain repeated elements --> tests/ui/trait_duplication_in_bounds.rs:174:36 | -LL | const fn const_trait_bounds_bad() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `~const ConstTrait` +LL | const fn const_trait_bounds_bad() {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: try: `[const] ConstTrait` error: these where clauses contain repeated elements --> tests/ui/trait_duplication_in_bounds.rs:181:8 diff --git a/src/tools/clippy/tests/ui/unnecessary_os_str_debug_formatting.rs b/src/tools/clippy/tests/ui/unnecessary_os_str_debug_formatting.rs index 6652efd9ae1d..66590be3d054 100644 --- a/src/tools/clippy/tests/ui/unnecessary_os_str_debug_formatting.rs +++ b/src/tools/clippy/tests/ui/unnecessary_os_str_debug_formatting.rs @@ -21,3 +21,16 @@ fn main() { let _: String = format!("{:?}", os_str); //~ unnecessary_debug_formatting let _: String = format!("{:?}", os_string); //~ unnecessary_debug_formatting } + +#[clippy::msrv = "1.86"] +fn msrv_1_86() { + let os_str = OsStr::new("test"); + println!("{:?}", os_str); +} + +#[clippy::msrv = "1.87"] +fn msrv_1_87() { + let os_str = OsStr::new("test"); + println!("{:?}", os_str); + //~^ unnecessary_debug_formatting +} diff --git a/src/tools/clippy/tests/ui/unnecessary_os_str_debug_formatting.stderr b/src/tools/clippy/tests/ui/unnecessary_os_str_debug_formatting.stderr index 382e59b04619..f04d2d5bdc82 100644 --- a/src/tools/clippy/tests/ui/unnecessary_os_str_debug_formatting.stderr +++ b/src/tools/clippy/tests/ui/unnecessary_os_str_debug_formatting.stderr @@ -54,5 +54,14 @@ LL | let _: String = format!("{:?}", os_string); = help: use `Display` formatting and change this to `os_string.display()` = note: switching to `Display` formatting will change how the value is shown; escaped characters will no longer be escaped and surrounding quotes will be removed -error: aborting due to 6 previous errors +error: unnecessary `Debug` formatting in `println!` args + --> tests/ui/unnecessary_os_str_debug_formatting.rs:34:22 + | +LL | println!("{:?}", os_str); + | ^^^^^^ + | + = help: use `Display` formatting and change this to `os_str.display()` + = note: switching to `Display` formatting will change how the value is shown; escaped characters will no longer be escaped and surrounding quotes will be removed + +error: aborting due to 7 previous errors diff --git a/src/tools/clippy/tests/ui/wildcard_enum_match_arm.fixed b/src/tools/clippy/tests/ui/wildcard_enum_match_arm.fixed index 141ff6eb2ac7..5f738a254dcd 100644 --- a/src/tools/clippy/tests/ui/wildcard_enum_match_arm.fixed +++ b/src/tools/clippy/tests/ui/wildcard_enum_match_arm.fixed @@ -90,6 +90,21 @@ fn main() { _ => {}, } + { + pub enum Enum { + A, + B, + C(u8), + D(u8, u8), + E { e: u8 }, + }; + match Enum::A { + Enum::A => (), + Enum::B | Enum::C(_) | Enum::D(..) | Enum::E { .. } => (), + //~^ wildcard_enum_match_arm + } + } + { #![allow(clippy::manual_non_exhaustive)] pub enum Enum { @@ -105,3 +120,17 @@ fn main() { } } } + +fn issue15091() { + enum Foo { + A, + B, + C, + } + + match Foo::A { + Foo::A => {}, + r#type @ Foo::B | r#type @ Foo::C => {}, + //~^ wildcard_enum_match_arm + } +} diff --git a/src/tools/clippy/tests/ui/wildcard_enum_match_arm.rs b/src/tools/clippy/tests/ui/wildcard_enum_match_arm.rs index a13684e9100b..4bc4bfdcb794 100644 --- a/src/tools/clippy/tests/ui/wildcard_enum_match_arm.rs +++ b/src/tools/clippy/tests/ui/wildcard_enum_match_arm.rs @@ -90,6 +90,21 @@ fn main() { _ => {}, } + { + pub enum Enum { + A, + B, + C(u8), + D(u8, u8), + E { e: u8 }, + }; + match Enum::A { + Enum::A => (), + _ => (), + //~^ wildcard_enum_match_arm + } + } + { #![allow(clippy::manual_non_exhaustive)] pub enum Enum { @@ -105,3 +120,17 @@ fn main() { } } } + +fn issue15091() { + enum Foo { + A, + B, + C, + } + + match Foo::A { + Foo::A => {}, + r#type => {}, + //~^ wildcard_enum_match_arm + } +} diff --git a/src/tools/clippy/tests/ui/wildcard_enum_match_arm.stderr b/src/tools/clippy/tests/ui/wildcard_enum_match_arm.stderr index 088c6b7b2841..d0929989494a 100644 --- a/src/tools/clippy/tests/ui/wildcard_enum_match_arm.stderr +++ b/src/tools/clippy/tests/ui/wildcard_enum_match_arm.stderr @@ -37,8 +37,20 @@ LL | _ => {}, error: wildcard match will also match any future added variants --> tests/ui/wildcard_enum_match_arm.rs:103:13 | +LL | _ => (), + | ^ help: try: `Enum::B | Enum::C(_) | Enum::D(..) | Enum::E { .. }` + +error: wildcard match will also match any future added variants + --> tests/ui/wildcard_enum_match_arm.rs:118:13 + | LL | _ => (), | ^ help: try: `Enum::B | Enum::__Private` -error: aborting due to 6 previous errors +error: wildcard match will also match any future added variants + --> tests/ui/wildcard_enum_match_arm.rs:133:9 + | +LL | r#type => {}, + | ^^^^^^ help: try: `r#type @ Foo::B | r#type @ Foo::C` + +error: aborting due to 8 previous errors diff --git a/src/tools/clippy/tests/versioncheck.rs b/src/tools/clippy/tests/versioncheck.rs index f6fc2354ca08..b0179387b2b8 100644 --- a/src/tools/clippy/tests/versioncheck.rs +++ b/src/tools/clippy/tests/versioncheck.rs @@ -27,6 +27,7 @@ fn consistent_clippy_crate_versions() { "clippy_config/Cargo.toml", "clippy_lints/Cargo.toml", "clippy_utils/Cargo.toml", + "declare_clippy_lint/Cargo.toml", ]; for path in paths { diff --git a/src/tools/clippy/triagebot.toml b/src/tools/clippy/triagebot.toml index 16557a4bebb8..4f370758c006 100644 --- a/src/tools/clippy/triagebot.toml +++ b/src/tools/clippy/triagebot.toml @@ -17,6 +17,9 @@ allow-unauthenticated = [ [issue-links] +[mentions."clippy_lints/src/doc"] +cc = ["@notriddle"] + # Prevents mentions in commits to avoid users being spammed [no-mentions] diff --git a/src/tools/clippy/util/versions.py b/src/tools/clippy/util/versions.py index fee0d292df16..6e06d77a7714 100755 --- a/src/tools/clippy/util/versions.py +++ b/src/tools/clippy/util/versions.py @@ -6,11 +6,11 @@ import os import sys def key(v): - if v == "master": - return sys.maxsize if v == "stable": - return sys.maxsize - 1 + return sys.maxsize if v == "beta": + return sys.maxsize - 1 + if v == "master": return sys.maxsize - 2 if v == "pre-1.29.0": return -1 diff --git a/src/tools/compiletest/src/errors.rs b/src/tools/compiletest/src/errors.rs index b5a2b7feac9d..9fa26305f6b0 100644 --- a/src/tools/compiletest/src/errors.rs +++ b/src/tools/compiletest/src/errors.rs @@ -16,6 +16,8 @@ pub enum ErrorKind { Suggestion, Warning, Raw, + /// Used for better recovery and diagnostics in compiletest. + Unknown, } impl ErrorKind { @@ -31,21 +33,25 @@ impl ErrorKind { /// Either the canonical uppercase string, or some additional versions for compatibility. /// FIXME: consider keeping only the canonical versions here. - pub fn from_user_str(s: &str) -> ErrorKind { - match s { + fn from_user_str(s: &str) -> Option { + Some(match s { "HELP" | "help" => ErrorKind::Help, "ERROR" | "error" => ErrorKind::Error, - // `MONO_ITEM` makes annotations in `codegen-units` tests syntactically correct, - // but those tests never use the error kind later on. - "NOTE" | "note" | "MONO_ITEM" => ErrorKind::Note, + "NOTE" | "note" => ErrorKind::Note, "SUGGESTION" => ErrorKind::Suggestion, "WARN" | "WARNING" | "warn" | "warning" => ErrorKind::Warning, "RAW" => ErrorKind::Raw, - _ => panic!( + _ => return None, + }) + } + + pub fn expect_from_user_str(s: &str) -> ErrorKind { + ErrorKind::from_user_str(s).unwrap_or_else(|| { + panic!( "unexpected diagnostic kind `{s}`, expected \ - `ERROR`, `WARN`, `NOTE`, `HELP` or `SUGGESTION`" - ), - } + `ERROR`, `WARN`, `NOTE`, `HELP`, `SUGGESTION` or `RAW`" + ) + }) } } @@ -58,6 +64,7 @@ impl fmt::Display for ErrorKind { ErrorKind::Suggestion => write!(f, "SUGGESTION"), ErrorKind::Warning => write!(f, "WARN"), ErrorKind::Raw => write!(f, "RAW"), + ErrorKind::Unknown => write!(f, "UNKNOWN"), } } } @@ -65,6 +72,7 @@ impl fmt::Display for ErrorKind { #[derive(Debug)] pub struct Error { pub line_num: Option, + pub column_num: Option, /// What kind of message we expect (e.g., warning, error, suggestion). pub kind: ErrorKind, pub msg: String, @@ -74,17 +82,6 @@ pub struct Error { pub require_annotation: bool, } -impl Error { - pub fn render_for_expected(&self) -> String { - use colored::Colorize; - format!("{: <10}line {: >3}: {}", self.kind, self.line_num_str(), self.msg.cyan()) - } - - pub fn line_num_str(&self) -> String { - self.line_num.map_or("?".to_string(), |line_num| line_num.to_string()) - } -} - /// Looks for either "//~| KIND MESSAGE" or "//~^^... KIND MESSAGE" /// The former is a "follow" that inherits its target from the preceding line; /// the latter is an "adjusts" that goes that many lines up. @@ -168,8 +165,10 @@ fn parse_expected( let rest = line[tag.end()..].trim_start(); let (kind_str, _) = rest.split_once(|c: char| c != '_' && !c.is_ascii_alphabetic()).unwrap_or((rest, "")); - let kind = ErrorKind::from_user_str(kind_str); - let untrimmed_msg = &rest[kind_str.len()..]; + let (kind, untrimmed_msg) = match ErrorKind::from_user_str(kind_str) { + Some(kind) => (kind, &rest[kind_str.len()..]), + None => (ErrorKind::Unknown, rest), + }; let msg = untrimmed_msg.strip_prefix(':').unwrap_or(untrimmed_msg).trim().to_owned(); let line_num_adjust = &captures["adjust"]; @@ -182,6 +181,7 @@ fn parse_expected( } else { (false, Some(line_num - line_num_adjust.len())) }; + let column_num = Some(tag.start() + 1); debug!( "line={:?} tag={:?} follow_prev={:?} kind={:?} msg={:?}", @@ -191,7 +191,7 @@ fn parse_expected( kind, msg ); - Some((follow_prev, Error { line_num, kind, msg, require_annotation: true })) + Some((follow_prev, Error { line_num, column_num, kind, msg, require_annotation: true })) } #[cfg(test)] diff --git a/src/tools/compiletest/src/executor.rs b/src/tools/compiletest/src/executor.rs index e774c5e2047c..0c4ef36828a0 100644 --- a/src/tools/compiletest/src/executor.rs +++ b/src/tools/compiletest/src/executor.rs @@ -40,9 +40,9 @@ pub(crate) fn run_tests(config: &Config, tests: Vec) -> bool { // In that case, the tests will effectively be run serially anyway. loop { // Spawn new test threads, up to the concurrency limit. - // FIXME(let_chains): Use a let-chain here when stable in bootstrap. - 'spawn: while running_tests.len() < concurrency { - let Some((id, test)) = fresh_tests.next() else { break 'spawn }; + while running_tests.len() < concurrency + && let Some((id, test)) = fresh_tests.next() + { listener.test_started(test); deadline_queue.push(id, test); let join_handle = spawn_test_thread(id, test, completion_tx.clone()); diff --git a/src/tools/compiletest/src/header.rs b/src/tools/compiletest/src/header.rs index 8bee9caacc94..2b203bb309c6 100644 --- a/src/tools/compiletest/src/header.rs +++ b/src/tools/compiletest/src/header.rs @@ -593,7 +593,7 @@ impl TestProps { config.parse_name_value_directive(ln, DONT_REQUIRE_ANNOTATIONS) { self.dont_require_annotations - .insert(ErrorKind::from_user_str(err_kind.trim())); + .insert(ErrorKind::expect_from_user_str(err_kind.trim())); } }, ); diff --git a/src/tools/compiletest/src/json.rs b/src/tools/compiletest/src/json.rs index 6ed2b52c66d2..a8e6416e56c8 100644 --- a/src/tools/compiletest/src/json.rs +++ b/src/tools/compiletest/src/json.rs @@ -36,9 +36,7 @@ struct UnusedExternNotification { struct DiagnosticSpan { file_name: String, line_start: usize, - line_end: usize, column_start: usize, - column_end: usize, is_primary: bool, label: Option, suggested_replacement: Option, @@ -148,6 +146,7 @@ pub fn parse_output(file_name: &str, output: &str) -> Vec { Ok(diagnostic) => push_actual_errors(&mut errors, &diagnostic, &[], file_name), Err(_) => errors.push(Error { line_num: None, + column_num: None, kind: ErrorKind::Raw, msg: line.to_string(), require_annotation: false, @@ -193,25 +192,9 @@ fn push_actual_errors( // also ensure that `//~ ERROR E123` *always* works. The // assumption is that these multi-line error messages are on their // way out anyhow. - let with_code = |span: Option<&DiagnosticSpan>, text: &str| { - // FIXME(#33000) -- it'd be better to use a dedicated - // UI harness than to include the line/col number like - // this, but some current tests rely on it. - // - // Note: Do NOT include the filename. These can easily - // cause false matches where the expected message - // appears in the filename, and hence the message - // changes but the test still passes. - let span_str = match span { - Some(DiagnosticSpan { line_start, column_start, line_end, column_end, .. }) => { - format!("{line_start}:{column_start}: {line_end}:{column_end}") - } - None => format!("?:?: ?:?"), - }; - match &diagnostic.code { - Some(code) => format!("{span_str}: {text} [{}]", code.code), - None => format!("{span_str}: {text}"), - } + let with_code = |text| match &diagnostic.code { + Some(code) => format!("{text} [{}]", code.code), + None => format!("{text}"), }; // Convert multi-line messages into multiple errors. @@ -225,8 +208,9 @@ fn push_actual_errors( || Regex::new(r"aborting due to \d+ previous errors?|\d+ warnings? emitted").unwrap(); errors.push(Error { line_num: None, + column_num: None, kind, - msg: with_code(None, first_line), + msg: with_code(first_line), require_annotation: diagnostic.level != "failure-note" && !RE.get_or_init(re_init).is_match(first_line), }); @@ -234,8 +218,9 @@ fn push_actual_errors( for span in primary_spans { errors.push(Error { line_num: Some(span.line_start), + column_num: Some(span.column_start), kind, - msg: with_code(Some(span), first_line), + msg: with_code(first_line), require_annotation: true, }); } @@ -244,16 +229,18 @@ fn push_actual_errors( if primary_spans.is_empty() { errors.push(Error { line_num: None, + column_num: None, kind, - msg: with_code(None, next_line), + msg: with_code(next_line), require_annotation: false, }); } else { for span in primary_spans { errors.push(Error { line_num: Some(span.line_start), + column_num: Some(span.column_start), kind, - msg: with_code(Some(span), next_line), + msg: with_code(next_line), require_annotation: false, }); } @@ -266,6 +253,7 @@ fn push_actual_errors( for (index, line) in suggested_replacement.lines().enumerate() { errors.push(Error { line_num: Some(span.line_start + index), + column_num: Some(span.column_start), kind: ErrorKind::Suggestion, msg: line.to_string(), // Empty suggestions (suggestions to remove something) are common @@ -288,6 +276,7 @@ fn push_actual_errors( if let Some(label) = &span.label { errors.push(Error { line_num: Some(span.line_start), + column_num: Some(span.column_start), kind: ErrorKind::Note, msg: label.clone(), // Empty labels (only underlining spans) are common and do not need annotations. @@ -310,6 +299,7 @@ fn push_backtrace( if Path::new(&expansion.span.file_name) == Path::new(&file_name) { errors.push(Error { line_num: Some(expansion.span.line_start), + column_num: Some(expansion.span.column_start), kind: ErrorKind::Note, msg: format!("in this expansion of {}", expansion.macro_decl_name), require_annotation: true, diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index 42c851ea9991..980e89889abb 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -11,7 +11,7 @@ use std::{env, iter, str}; use build_helper::fs::remove_and_create_dir_all; use camino::{Utf8Path, Utf8PathBuf}; -use colored::Colorize; +use colored::{Color, Colorize}; use regex::{Captures, Regex}; use tracing::*; @@ -677,9 +677,6 @@ impl<'test> TestCx<'test> { return; } - // On Windows, translate all '\' path separators to '/' - let file_name = self.testpaths.file.to_string().replace(r"\", "/"); - // On Windows, keep all '\' path separators to match the paths reported in the JSON output // from the compiler let diagnostic_file_name = if self.props.remap_src_base { @@ -704,6 +701,7 @@ impl<'test> TestCx<'test> { .map(|e| Error { msg: self.normalize_output(&e.msg, &[]), ..e }); let mut unexpected = Vec::new(); + let mut unimportant = Vec::new(); let mut found = vec![false; expected_errors.len()]; for actual_error in actual_errors { for pattern in &self.props.error_patterns { @@ -738,14 +736,9 @@ impl<'test> TestCx<'test> { && expected_kinds.contains(&actual_error.kind) && !self.props.dont_require_annotations.contains(&actual_error.kind) { - self.error(&format!( - "{}:{}: unexpected {}: '{}'", - file_name, - actual_error.line_num_str(), - actual_error.kind, - actual_error.msg - )); unexpected.push(actual_error); + } else { + unimportant.push(actual_error); } } } @@ -755,39 +748,140 @@ impl<'test> TestCx<'test> { // anything not yet found is a problem for (index, expected_error) in expected_errors.iter().enumerate() { if !found[index] { - self.error(&format!( - "{}:{}: expected {} not found: {}", - file_name, - expected_error.line_num_str(), - expected_error.kind, - expected_error.msg - )); not_found.push(expected_error); } } if !unexpected.is_empty() || !not_found.is_empty() { self.error(&format!( - "{} unexpected errors found, {} expected errors not found", + "{} unexpected diagnostics reported, {} expected diagnostics not reported", unexpected.len(), not_found.len() )); - println!("status: {}\ncommand: {}\n", proc_res.status, proc_res.cmdline); + + // Emit locations in a format that is short (relative paths) but "clickable" in editors. + // Also normalize path separators to `/`. + let file_name = self + .testpaths + .file + .strip_prefix(self.config.src_root.as_str()) + .unwrap_or(&self.testpaths.file) + .to_string() + .replace(r"\", "/"); + let line_str = |e: &Error| { + let line_num = e.line_num.map_or("?".to_string(), |line_num| line_num.to_string()); + // `file:?:NUM` may be confusing to editors and unclickable. + let opt_col_num = match e.column_num { + Some(col_num) if line_num != "?" => format!(":{col_num}"), + _ => "".to_string(), + }; + format!("{file_name}:{line_num}{opt_col_num}") + }; + let print_error = |e| println!("{}: {}: {}", line_str(e), e.kind, e.msg.cyan()); + let push_suggestion = + |suggestions: &mut Vec<_>, e: &Error, kind, line, msg, color, rank| { + let mut ret = String::new(); + if kind { + ret += &format!("{} {}", "with kind".color(color), e.kind); + } + if line { + if !ret.is_empty() { + ret.push(' '); + } + ret += &format!("{} {}", "on line".color(color), line_str(e)); + } + if msg { + if !ret.is_empty() { + ret.push(' '); + } + ret += &format!("{} {}", "with message".color(color), e.msg.cyan()); + } + suggestions.push((ret, rank)); + }; + let show_suggestions = |mut suggestions: Vec<_>, prefix: &str, color| { + // Only show suggestions with the highest rank. + suggestions.sort_by_key(|(_, rank)| *rank); + if let Some(&(_, top_rank)) = suggestions.first() { + for (suggestion, rank) in suggestions { + if rank == top_rank { + println!(" {} {suggestion}", prefix.color(color)); + } + } + } + }; + + // Fuzzy matching quality: + // - message and line / message and kind - great, suggested + // - only message - good, suggested + // - known line and kind - ok, suggested + // - only known line - meh, but suggested + // - others are not worth suggesting if !unexpected.is_empty() { - println!("{}", "--- unexpected errors (from JSON output) ---".green()); + let header = "--- reported in JSON output but not expected in test file ---"; + println!("{}", header.green()); for error in &unexpected { - println!("{}", error.render_for_expected()); + print_error(error); + let mut suggestions = Vec::new(); + for candidate in ¬_found { + let mut push_red_suggestion = |line, msg, rank| { + push_suggestion( + &mut suggestions, + candidate, + candidate.kind != error.kind, + line, + msg, + Color::Red, + rank, + ) + }; + if error.msg.contains(&candidate.msg) { + push_red_suggestion(candidate.line_num != error.line_num, false, 0); + } else if candidate.line_num.is_some() + && candidate.line_num == error.line_num + { + push_red_suggestion(false, true, 1); + } + } + + show_suggestions(suggestions, "expected", Color::Red); } println!("{}", "---".green()); } if !not_found.is_empty() { - println!("{}", "--- not found errors (from test file) ---".red()); + let header = "--- expected in test file but not reported in JSON output ---"; + println!("{}", header.red()); for error in ¬_found { - println!("{}", error.render_for_expected()); + print_error(error); + let mut suggestions = Vec::new(); + for candidate in unexpected.iter().chain(&unimportant) { + let mut push_green_suggestion = |line, msg, rank| { + push_suggestion( + &mut suggestions, + candidate, + candidate.kind != error.kind, + line, + msg, + Color::Green, + rank, + ) + }; + if candidate.msg.contains(&error.msg) { + push_green_suggestion(candidate.line_num != error.line_num, false, 0); + } else if candidate.line_num.is_some() + && candidate.line_num == error.line_num + { + push_green_suggestion(false, true, 1); + } + } + + show_suggestions(suggestions, "reported", Color::Green); } - println!("{}", "---\n".red()); + println!("{}", "---".red()); } - panic!("errors differ from expected"); + panic!( + "errors differ from expected\nstatus: {}\ncommand: {}\n", + proc_res.status, proc_res.cmdline + ); } } @@ -2073,7 +2167,6 @@ impl<'test> TestCx<'test> { println!("{}", String::from_utf8_lossy(&output.stdout)); eprintln!("{}", String::from_utf8_lossy(&output.stderr)); } else { - use colored::Colorize; eprintln!("warning: no pager configured, falling back to unified diff"); eprintln!( "help: try configuring a git pager (e.g. `delta`) with `git config --global core.pager delta`" diff --git a/src/tools/compiletest/src/runtest/run_make.rs b/src/tools/compiletest/src/runtest/run_make.rs index 029da1c18983..60e8e16e25ef 100644 --- a/src/tools/compiletest/src/runtest/run_make.rs +++ b/src/tools/compiletest/src/runtest/run_make.rs @@ -221,6 +221,10 @@ impl TestCx<'_> { cmd.env("REMOTE_TEST_CLIENT", remote_test_client); } + if let Some(runner) = &self.config.runner { + cmd.env("RUNNER", runner); + } + // We don't want RUSTFLAGS set from the outside to interfere with // compiler flags set in the test cases: cmd.env_remove("RUSTFLAGS"); diff --git a/src/tools/enzyme b/src/tools/enzyme index a35f4f773118..b5098d515d5e 160000 --- a/src/tools/enzyme +++ b/src/tools/enzyme @@ -1 +1 @@ -Subproject commit a35f4f773118ccfbd8d05102eb12a34097b1ee55 +Subproject commit b5098d515d5e1bd0f5470553bc0d18da9794ca8b diff --git a/src/tools/jsondoclint/src/validator.rs b/src/tools/jsondoclint/src/validator.rs index 8c9e4c8bb3a6..0a4051fcbe8c 100644 --- a/src/tools/jsondoclint/src/validator.rs +++ b/src/tools/jsondoclint/src/validator.rs @@ -271,7 +271,7 @@ impl<'a> Validator<'a> { Type::RawPointer { is_mutable: _, type_ } => self.check_type(&**type_), Type::BorrowedRef { lifetime: _, is_mutable: _, type_ } => self.check_type(&**type_), Type::QualifiedPath { name: _, args, self_type, trait_ } => { - self.check_generic_args(&**args); + self.check_opt_generic_args(&args); self.check_type(&**self_type); if let Some(trait_) = trait_ { self.check_path(trait_, PathKind::Trait); @@ -309,13 +309,12 @@ impl<'a> Validator<'a> { self.fail(&x.id, ErrorKind::Custom(format!("No entry in '$.paths' for {x:?}"))); } - if let Some(args) = &x.args { - self.check_generic_args(&**args); - } + self.check_opt_generic_args(&x.args); } - fn check_generic_args(&mut self, x: &'a GenericArgs) { - match x { + fn check_opt_generic_args(&mut self, x: &'a Option>) { + let Some(x) = x else { return }; + match &**x { GenericArgs::AngleBracketed { args, constraints } => { args.iter().for_each(|arg| self.check_generic_arg(arg)); constraints.iter().for_each(|bind| self.check_assoc_item_constraint(bind)); @@ -355,7 +354,7 @@ impl<'a> Validator<'a> { } fn check_assoc_item_constraint(&mut self, bind: &'a AssocItemConstraint) { - self.check_generic_args(&bind.args); + self.check_opt_generic_args(&bind.args); match &bind.binding { AssocItemConstraintKind::Equality(term) => self.check_term(term), AssocItemConstraintKind::Constraint(bounds) => { diff --git a/src/tools/linkchecker/linkcheck.sh b/src/tools/linkchecker/linkcheck.sh index 6c1e668a7f0d..d230610a6e79 100755 --- a/src/tools/linkchecker/linkcheck.sh +++ b/src/tools/linkchecker/linkcheck.sh @@ -98,6 +98,7 @@ then nightly_hash=$(rustc +nightly -Vv | grep commit-hash | cut -f2 -d" ") url="https://raw.githubusercontent.com/rust-lang/rust" mkdir linkchecker + curl -o linkchecker/Cargo.lock ${url}/${nightly_hash}/Cargo.lock curl -o linkchecker/Cargo.toml ${url}/${nightly_hash}/src/tools/linkchecker/Cargo.toml curl -o linkchecker/main.rs ${url}/${nightly_hash}/src/tools/linkchecker/main.rs fi diff --git a/src/tools/lint-docs/Cargo.toml b/src/tools/lint-docs/Cargo.toml index f1ffda75ac0f..e914a2df2bad 100644 --- a/src/tools/lint-docs/Cargo.toml +++ b/src/tools/lint-docs/Cargo.toml @@ -7,7 +7,7 @@ description = "A script to extract the lint documentation for the rustc book." # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -rustc-literal-escaper = "0.0.2" +rustc-literal-escaper = "0.0.4" serde_json = "1.0.57" tempfile = "3.1.0" walkdir = "2.3.1" diff --git a/src/tools/lint-docs/src/lib.rs b/src/tools/lint-docs/src/lib.rs index 6bb18c2bced7..b33344ca5dda 100644 --- a/src/tools/lint-docs/src/lib.rs +++ b/src/tools/lint-docs/src/lib.rs @@ -4,7 +4,7 @@ use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; -use rustc_literal_escaper::{Mode, unescape_unicode}; +use rustc_literal_escaper::unescape_str; use walkdir::WalkDir; mod groups; @@ -218,7 +218,7 @@ impl<'a> LintExtractor<'a> { } else if let Some(text) = line.strip_prefix("#[doc = \"") { let escaped = text.strip_suffix("\"]").unwrap(); let mut buf = String::new(); - unescape_unicode(escaped, Mode::Str, &mut |_, c| match c { + unescape_str(escaped, |_, res| match res { Ok(c) => buf.push(c), Err(err) => { assert!(!err.is_fatal(), "failed to unescape string literal") diff --git a/src/tools/miri/cargo-miri/src/setup.rs b/src/tools/miri/cargo-miri/src/setup.rs index b9b58c04f9e4..e399f66fbc9c 100644 --- a/src/tools/miri/cargo-miri/src/setup.rs +++ b/src/tools/miri/cargo-miri/src/setup.rs @@ -83,7 +83,7 @@ pub fn setup( SysrootConfig::NoStd } else { SysrootConfig::WithStd { - std_features: ["panic_unwind", "backtrace"].into_iter().map(Into::into).collect(), + std_features: ["panic-unwind", "backtrace"].into_iter().map(Into::into).collect(), } }; let cargo_cmd = { diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 2ef8d717d548..b2ab17fd76ad 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -255aa220821c05c3eac7605fce4ea1c9ab2cbdb4 +d41e12f1f4e4884c356f319b881921aa37040de5 diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index ef3dd9823371..c3ea7270e8a6 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -189,7 +189,7 @@ impl IsolatedAlloc { }; assert_ne!(page_ptr.addr(), usize::MAX, "mmap failed"); // `page_infos` has to have one bit for each `COMPRESSION_FACTOR`-sized chunk of bytes in the page. - assert!(self.page_size % COMPRESSION_FACTOR == 0); + assert!(self.page_size.is_multiple_of(COMPRESSION_FACTOR)); self.page_infos.push(DenseBitSet::new_empty(self.page_size / COMPRESSION_FACTOR)); self.page_ptrs.push(NonNull::new(page_ptr).unwrap()); (NonNull::new(page_ptr).unwrap(), self.page_infos.last_mut().unwrap()) diff --git a/src/tools/miri/src/alloc_addresses/reuse_pool.rs b/src/tools/miri/src/alloc_addresses/reuse_pool.rs index ab6aaed5e3e1..b6cc017f7723 100644 --- a/src/tools/miri/src/alloc_addresses/reuse_pool.rs +++ b/src/tools/miri/src/alloc_addresses/reuse_pool.rs @@ -129,7 +129,7 @@ impl ReusePool { let idx = rng.random_range(begin..end); // Remove it from the pool and return. let (chosen_addr, chosen_size, chosen_thread, clock) = subpool.remove(idx); - debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0); + debug_assert!(chosen_size >= size && chosen_addr.is_multiple_of(align.bytes())); debug_assert!(cross_thread_reuse || chosen_thread == thread); // No synchronization needed if we reused from the current thread. Some((chosen_addr, if chosen_thread == thread { None } else { Some(clock) })) diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs index d410d7bcc875..2e82dbee34d9 100644 --- a/src/tools/miri/src/bin/miri.rs +++ b/src/tools/miri/src/bin/miri.rs @@ -296,6 +296,7 @@ impl rustc_driver::Callbacks for MiriBeRustCompilerCalls { level: SymbolExportLevel::C, kind: SymbolExportKind::Text, used: false, + rustc_std_internal_symbol: false, }, )) } else { diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs index bc57ba697b38..b8bcacf7c994 100644 --- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs @@ -814,7 +814,7 @@ trait EvalContextPrivExt<'tcx, 'ecx>: crate::MiriInterpCxExt<'tcx> { info: RetagInfo, // diagnostics info about this retag ) -> InterpResult<'tcx, MPlaceTy<'tcx>> { let this = self.eval_context_mut(); - let size = this.size_and_align_of_mplace(place)?.map(|(size, _)| size); + let size = this.size_and_align_of_val(place)?.map(|(size, _)| size); // FIXME: If we cannot determine the size (because the unsized tail is an `extern type`), // bail out -- we cannot reasonably figure out which memory range to reborrow. // See https://github.com/rust-lang/unsafe-code-guidelines/issues/276. diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs index ce8fe03ee477..a0761cb07a1d 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs @@ -468,10 +468,8 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // - when `extern type` is involved we use the size of the known prefix, // - if the pointer is not reborrowed (raw pointer) then we override the size // to do a zero-length reborrow. - let reborrow_size = this - .size_and_align_of_mplace(place)? - .map(|(size, _)| size) - .unwrap_or(place.layout.size); + let reborrow_size = + this.size_and_align_of_val(place)?.map(|(size, _)| size).unwrap_or(place.layout.size); trace!("Creating new permission: {:?} with size {:?}", new_perm, reborrow_size); // This new tag is not guaranteed to actually be used. diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs index dcd5a6cb0230..ad0a565dfd85 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs @@ -327,7 +327,7 @@ mod tests { for i in 0..1000 { i.hash(&mut hasher); let rng = hasher.finish(); - let op = rng % 3 == 0; + let op = rng.is_multiple_of(3); let key = (rng / 2) % 50; let val = (rng / 100) % 1000; if op { diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs index 0ad20cefb922..c0dc484187a8 100644 --- a/src/tools/miri/src/eval.rs +++ b/src/tools/miri/src/eval.rs @@ -11,7 +11,7 @@ use rustc_abi::ExternAbi; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_hir::def::Namespace; use rustc_hir::def_id::DefId; -use rustc_middle::ty::layout::{LayoutCx, LayoutOf}; +use rustc_middle::ty::layout::LayoutCx; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_session::config::EntryFnType; diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index 4edecc864dd4..fb34600fa37d 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -489,7 +489,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { trace!("visit_frozen(place={:?}, size={:?})", *place, size); debug_assert_eq!( size, - this.size_and_align_of_mplace(place)? + this.size_and_align_of_val(place)? .map(|(size, _)| size) .unwrap_or_else(|| place.layout.size) ); @@ -530,7 +530,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { trace!("unsafe_cell_action on {:?}", place.ptr()); // We need a size to go on. let unsafe_cell_size = this - .size_and_align_of_mplace(place)? + .size_and_align_of_val(place)? .map(|(size, _)| size) // for extern types, just cover what we can .unwrap_or_else(|| place.layout.size); diff --git a/src/tools/miri/src/intrinsics/simd.rs b/src/tools/miri/src/intrinsics/simd.rs index dbe193bdbda8..52224f33c730 100644 --- a/src/tools/miri/src/intrinsics/simd.rs +++ b/src/tools/miri/src/intrinsics/simd.rs @@ -3,7 +3,6 @@ use rand::Rng; use rustc_abi::{Endian, HasDataLayout}; use rustc_apfloat::{Float, Round}; use rustc_middle::ty::FloatTy; -use rustc_middle::ty::layout::LayoutOf; use rustc_middle::{mir, ty}; use rustc_span::{Symbol, sym}; diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index b4d7db34efa7..3a748c4c6872 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1056,7 +1056,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { // What's the offset between us and the promised alignment? let distance = offset.bytes().wrapping_sub(promised_offset.bytes()); // That must also be aligned. - if distance % align.bytes() == 0 { + if distance.is_multiple_of(align.bytes()) { // All looking good! None } else { @@ -1612,7 +1612,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { ecx.machine.since_gc += 1; // Possibly report our progress. This will point at the terminator we are about to execute. if let Some(report_progress) = ecx.machine.report_progress { - if ecx.machine.basic_block_count % u64::from(report_progress) == 0 { + if ecx.machine.basic_block_count.is_multiple_of(u64::from(report_progress)) { ecx.emit_diagnostic(NonHaltingDiagnostic::ProgressReport { block_count: ecx.machine.basic_block_count, }); diff --git a/src/tools/miri/src/shims/backtrace.rs b/src/tools/miri/src/shims/backtrace.rs index feb83ca8829a..dd00b270b38f 100644 --- a/src/tools/miri/src/shims/backtrace.rs +++ b/src/tools/miri/src/shims/backtrace.rs @@ -1,5 +1,4 @@ use rustc_abi::{CanonAbi, FieldIdx, Size}; -use rustc_middle::ty::layout::LayoutOf as _; use rustc_middle::ty::{self, Instance, Ty}; use rustc_span::{BytePos, Loc, Symbol, hygiene}; use rustc_target::callconv::FnAbi; diff --git a/src/tools/miri/src/shims/os_str.rs b/src/tools/miri/src/shims/os_str.rs index 7080edb26a5f..b9391a0ffe07 100644 --- a/src/tools/miri/src/shims/os_str.rs +++ b/src/tools/miri/src/shims/os_str.rs @@ -7,7 +7,6 @@ use std::os::windows::ffi::{OsStrExt, OsStringExt}; use std::path::{Path, PathBuf}; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::LayoutOf; use crate::*; diff --git a/src/tools/miri/src/shims/unix/env.rs b/src/tools/miri/src/shims/unix/env.rs index 62ac7ee38065..604fb0974d29 100644 --- a/src/tools/miri/src/shims/unix/env.rs +++ b/src/tools/miri/src/shims/unix/env.rs @@ -6,7 +6,6 @@ use rustc_abi::{FieldIdx, Size}; use rustc_data_structures::fx::FxHashMap; use rustc_index::IndexVec; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::LayoutOf; use crate::*; diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs index f34b95e730b1..b3c58397a02b 100644 --- a/src/tools/miri/src/shims/unix/foreign_items.rs +++ b/src/tools/miri/src/shims/unix/foreign_items.rs @@ -3,7 +3,6 @@ use std::str; use rustc_abi::{CanonAbi, ExternAbi, Size}; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::LayoutOf; use rustc_span::Symbol; use rustc_target::callconv::FnAbi; diff --git a/src/tools/miri/src/shims/unix/linux/mem.rs b/src/tools/miri/src/shims/unix/linux/mem.rs index 8e5a3021b1c0..47732f811f1a 100644 --- a/src/tools/miri/src/shims/unix/linux/mem.rs +++ b/src/tools/miri/src/shims/unix/linux/mem.rs @@ -22,8 +22,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let flags = this.read_scalar(flags)?.to_i32()?; // old_address must be a multiple of the page size - #[expect(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero - if old_address.addr().bytes() % this.machine.page_size != 0 || new_size == 0 { + if !old_address.addr().bytes().is_multiple_of(this.machine.page_size) || new_size == 0 { this.set_last_error(LibcError("EINVAL"))?; return interp_ok(this.eval_libc("MAP_FAILED")); } diff --git a/src/tools/miri/src/shims/unix/mem.rs b/src/tools/miri/src/shims/unix/mem.rs index aefeee6f7a3a..4bbbbc69c08a 100644 --- a/src/tools/miri/src/shims/unix/mem.rs +++ b/src/tools/miri/src/shims/unix/mem.rs @@ -130,8 +130,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // addr must be a multiple of the page size, but apart from that munmap is just implemented // as a dealloc. - #[expect(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero - if addr.addr().bytes() % this.machine.page_size != 0 { + if !addr.addr().bytes().is_multiple_of(this.machine.page_size) { return this.set_last_error_and_return_i32(LibcError("EINVAL")); } diff --git a/src/tools/miri/src/shims/unix/sync.rs b/src/tools/miri/src/shims/unix/sync.rs index eee2bbcb903d..50eb4d922891 100644 --- a/src/tools/miri/src/shims/unix/sync.rs +++ b/src/tools/miri/src/shims/unix/sync.rs @@ -17,7 +17,7 @@ fn bytewise_equal_atomic_relaxed<'tcx>( // We do this in chunks of 4, so that we are okay to race with (sufficiently aligned) // 4-byte atomic accesses. - assert!(size.bytes() % 4 == 0); + assert!(size.bytes().is_multiple_of(4)); for i in 0..(size.bytes() / 4) { let offset = Size::from_bytes(i.strict_mul(4)); let load = |place: &MPlaceTy<'tcx>| { diff --git a/src/tools/miri/src/shims/windows/thread.rs b/src/tools/miri/src/shims/windows/thread.rs index 981742391b98..a155ec65a51f 100644 --- a/src/tools/miri/src/shims/windows/thread.rs +++ b/src/tools/miri/src/shims/windows/thread.rs @@ -1,5 +1,4 @@ use rustc_abi::ExternAbi; -use rustc_middle::ty::layout::LayoutOf; use self::shims::windows::handle::{EvalContextExt as _, Handle, PseudoHandle}; use crate::*; diff --git a/src/tools/miri/src/shims/x86/aesni.rs b/src/tools/miri/src/shims/x86/aesni.rs index 7191284b5a3f..058ca24e730f 100644 --- a/src/tools/miri/src/shims/x86/aesni.rs +++ b/src/tools/miri/src/shims/x86/aesni.rs @@ -1,6 +1,5 @@ use rustc_abi::CanonAbi; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::LayoutOf as _; use rustc_span::Symbol; use rustc_target::callconv::FnAbi; diff --git a/src/tools/miri/src/shims/x86/avx.rs b/src/tools/miri/src/shims/x86/avx.rs index 37539fec7485..83d23d6ad369 100644 --- a/src/tools/miri/src/shims/x86/avx.rs +++ b/src/tools/miri/src/shims/x86/avx.rs @@ -2,7 +2,6 @@ use rustc_abi::CanonAbi; use rustc_apfloat::ieee::{Double, Single}; use rustc_middle::mir; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::LayoutOf as _; use rustc_span::Symbol; use rustc_target::callconv::FnAbi; diff --git a/src/tools/miri/src/shims/x86/avx2.rs b/src/tools/miri/src/shims/x86/avx2.rs index 5dfe5cc2c542..49d5977078b0 100644 --- a/src/tools/miri/src/shims/x86/avx2.rs +++ b/src/tools/miri/src/shims/x86/avx2.rs @@ -1,7 +1,6 @@ use rustc_abi::CanonAbi; use rustc_middle::mir; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::LayoutOf as _; use rustc_span::Symbol; use rustc_target::callconv::FnAbi; diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index 1e82f521249f..fbfe459711e0 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -2,7 +2,6 @@ use rustc_abi::{CanonAbi, FieldIdx, Size}; use rustc_apfloat::Float; use rustc_apfloat::ieee::Single; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::LayoutOf as _; use rustc_middle::{mir, ty}; use rustc_span::Symbol; use rustc_target::callconv::FnAbi; diff --git a/src/tools/miri/src/shims/x86/sse42.rs b/src/tools/miri/src/shims/x86/sse42.rs index 830513f02911..7e1e1482ef47 100644 --- a/src/tools/miri/src/shims/x86/sse42.rs +++ b/src/tools/miri/src/shims/x86/sse42.rs @@ -1,7 +1,6 @@ use rustc_abi::{CanonAbi, Size}; use rustc_middle::mir; use rustc_middle::ty::Ty; -use rustc_middle::ty::layout::LayoutOf as _; use rustc_span::Symbol; use rustc_target::callconv::FnAbi; diff --git a/src/tools/run-make-support/src/lib.rs b/src/tools/run-make-support/src/lib.rs index 947f815fd697..67d8c351a59f 100644 --- a/src/tools/run-make-support/src/lib.rs +++ b/src/tools/run-make-support/src/lib.rs @@ -83,7 +83,7 @@ pub use run::{cmd, run, run_fail, run_with_args}; /// Helpers for checking target information. pub use targets::{ - apple_os, is_aix, is_darwin, is_msvc, is_windows, is_windows_gnu, is_win7, llvm_components_contain, + apple_os, is_aix, is_darwin, is_msvc, is_windows, is_windows_gnu, is_windows_msvc, is_win7, llvm_components_contain, target, uname, }; diff --git a/src/tools/run-make-support/src/run.rs b/src/tools/run-make-support/src/run.rs index 60e711d34027..b95f3a5cfe5a 100644 --- a/src/tools/run-make-support/src/run.rs +++ b/src/tools/run-make-support/src/run.rs @@ -1,4 +1,4 @@ -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::path::PathBuf; use std::{env, panic}; @@ -21,6 +21,20 @@ fn run_common(name: &str, args: Option<&[&str]>) -> Command { // will have to be changed (and the support files will have to be uploaded). cmd.arg("0"); cmd.arg(bin_path); + cmd + } else if let Ok(runner) = std::env::var("RUNNER") { + let mut args = split_maybe_args(&runner); + + let prog = args.remove(0); + let mut cmd = Command::new(prog); + + for arg in args { + cmd.arg(arg); + } + + cmd.arg("--"); + cmd.arg(bin_path); + cmd } else { Command::new(bin_path) @@ -92,3 +106,12 @@ pub fn cmd>(program: S) -> Command { command.env("LC_ALL", "C"); // force english locale command } + +fn split_maybe_args(s: &str) -> Vec { + // FIXME(132599): implement proper env var/shell argument splitting. + s.split(' ') + .filter_map(|s| { + if s.chars().all(|c| c.is_whitespace()) { None } else { Some(OsString::from(s)) } + }) + .collect() +} diff --git a/src/tools/run-make-support/src/targets.rs b/src/tools/run-make-support/src/targets.rs index 86edbdf750bb..1ab2e2ab2be4 100644 --- a/src/tools/run-make-support/src/targets.rs +++ b/src/tools/run-make-support/src/targets.rs @@ -28,6 +28,12 @@ pub fn is_windows_gnu() -> bool { target().ends_with("windows-gnu") } +/// Check if target is windows-msvc. +#[must_use] +pub fn is_windows_msvc() -> bool { + target().ends_with("windows-msvc") +} + /// Check if target is win7. #[must_use] pub fn is_win7() -> bool { diff --git a/src/tools/rust-analyzer/crates/hir-def/src/db.rs b/src/tools/rust-analyzer/crates/hir-def/src/db.rs index c618e4bdce7d..00408e95ae6f 100644 --- a/src/tools/rust-analyzer/crates/hir-def/src/db.rs +++ b/src/tools/rust-analyzer/crates/hir-def/src/db.rs @@ -11,12 +11,12 @@ use syntax::{AstPtr, ast}; use triomphe::Arc; use crate::{ - AssocItemId, AttrDefId, ConstId, ConstLoc, DefWithBodyId, EnumId, EnumLoc, EnumVariantId, - EnumVariantLoc, ExternBlockId, ExternBlockLoc, ExternCrateId, ExternCrateLoc, FunctionId, - FunctionLoc, GenericDefId, ImplId, ImplLoc, LocalFieldId, Macro2Id, Macro2Loc, MacroExpander, - MacroId, MacroRulesId, MacroRulesLoc, MacroRulesLocFlags, ProcMacroId, ProcMacroLoc, StaticId, - StaticLoc, StructId, StructLoc, TraitAliasId, TraitAliasLoc, TraitId, TraitLoc, TypeAliasId, - TypeAliasLoc, UnionId, UnionLoc, UseId, UseLoc, VariantId, + AssocItemId, AttrDefId, BlockId, BlockLoc, ConstId, ConstLoc, DefWithBodyId, EnumId, EnumLoc, + EnumVariantId, EnumVariantLoc, ExternBlockId, ExternBlockLoc, ExternCrateId, ExternCrateLoc, + FunctionId, FunctionLoc, GenericDefId, ImplId, ImplLoc, LocalFieldId, Macro2Id, Macro2Loc, + MacroExpander, MacroId, MacroRulesId, MacroRulesLoc, MacroRulesLocFlags, ProcMacroId, + ProcMacroLoc, StaticId, StaticLoc, StructId, StructLoc, TraitAliasId, TraitAliasLoc, TraitId, + TraitLoc, TypeAliasId, TypeAliasLoc, UnionId, UnionLoc, UseId, UseLoc, VariantId, attr::{Attrs, AttrsWithOwner}, expr_store::{ Body, BodySourceMap, ExpressionStore, ExpressionStoreSourceMap, scope::ExprScopes, @@ -90,7 +90,10 @@ pub trait InternDatabase: RootQueryDb { #[salsa::interned] fn intern_macro_rules(&self, loc: MacroRulesLoc) -> MacroRulesId; - // // endregion: items + // endregion: items + + #[salsa::interned] + fn intern_block(&self, loc: BlockLoc) -> BlockId; } #[query_group::query_group] diff --git a/src/tools/rust-analyzer/crates/hir-def/src/expr_store/lower.rs b/src/tools/rust-analyzer/crates/hir-def/src/expr_store/lower.rs index 03683ec9203c..efa1374a4465 100644 --- a/src/tools/rust-analyzer/crates/hir-def/src/expr_store/lower.rs +++ b/src/tools/rust-analyzer/crates/hir-def/src/expr_store/lower.rs @@ -11,7 +11,7 @@ use base_db::FxIndexSet; use cfg::CfgOptions; use either::Either; use hir_expand::{ - HirFileId, InFile, Intern, MacroDefId, + HirFileId, InFile, MacroDefId, mod_path::tool_path, name::{AsName, Name}, span_map::SpanMapRef, @@ -2148,7 +2148,7 @@ impl ExprCollector<'_> { ) -> ExprId { let block_id = self.expander.ast_id_map().ast_id_for_block(&block).map(|file_local_id| { let ast_id = self.expander.in_file(file_local_id); - BlockLoc { ast_id, module: self.module }.intern(self.db) + self.db.intern_block(BlockLoc { ast_id, module: self.module }) }); let (module, def_map) = @@ -2815,6 +2815,51 @@ impl ExprCollector<'_> { mutability: Mutability::Shared, }) }; + + // Assume that rustc version >= 1.89.0 iff lang item `format_arguments` exists + // but `format_unsafe_arg` does not + let fmt_args = + || crate::lang_item::lang_item(self.db, self.module.krate(), LangItem::FormatArguments); + let fmt_unsafe_arg = + || crate::lang_item::lang_item(self.db, self.module.krate(), LangItem::FormatUnsafeArg); + let use_format_args_since_1_89_0 = fmt_args().is_some() && fmt_unsafe_arg().is_none(); + + let idx = if use_format_args_since_1_89_0 { + self.collect_format_args_impl( + syntax_ptr, + fmt, + hygiene, + argmap, + lit_pieces, + format_options, + ) + } else { + self.collect_format_args_before_1_89_0_impl( + syntax_ptr, + fmt, + argmap, + lit_pieces, + format_options, + ) + }; + + self.source_map + .template_map + .get_or_insert_with(Default::default) + .format_args_to_captures + .insert(idx, (hygiene, mappings)); + idx + } + + /// `format_args!` expansion implementation for rustc versions < `1.89.0` + fn collect_format_args_before_1_89_0_impl( + &mut self, + syntax_ptr: AstPtr, + fmt: FormatArgs, + argmap: FxIndexSet<(usize, ArgumentType)>, + lit_pieces: ExprId, + format_options: ExprId, + ) -> ExprId { let arguments = &*fmt.arguments.arguments; let args = if arguments.is_empty() { @@ -2902,19 +2947,181 @@ impl ExprCollector<'_> { }); } - let idx = self.alloc_expr( + self.alloc_expr( Expr::Call { callee: new_v1_formatted, args: Box::new([lit_pieces, args, format_options, unsafe_arg_new]), }, syntax_ptr, - ); - self.source_map - .template_map - .get_or_insert_with(Default::default) - .format_args_to_captures - .insert(idx, (hygiene, mappings)); - idx + ) + } + + /// `format_args!` expansion implementation for rustc versions >= `1.89.0`, + /// especially since [this PR](https://github.com/rust-lang/rust/pull/140748) + fn collect_format_args_impl( + &mut self, + syntax_ptr: AstPtr, + fmt: FormatArgs, + hygiene: HygieneId, + argmap: FxIndexSet<(usize, ArgumentType)>, + lit_pieces: ExprId, + format_options: ExprId, + ) -> ExprId { + let arguments = &*fmt.arguments.arguments; + + let (let_stmts, args) = if arguments.is_empty() { + ( + // Generate: + // [] + vec![], + self.alloc_expr_desugared(Expr::Array(Array::ElementList { + elements: Box::default(), + })), + ) + } else if argmap.len() == 1 && arguments.len() == 1 { + // Only one argument, so we don't need to make the `args` tuple. + // + // Generate: + // super let args = [::new_display(&arg)]; + let args = argmap + .iter() + .map(|&(arg_index, ty)| { + let ref_arg = self.alloc_expr_desugared(Expr::Ref { + expr: arguments[arg_index].expr, + rawness: Rawness::Ref, + mutability: Mutability::Shared, + }); + self.make_argument(ref_arg, ty) + }) + .collect(); + let args = + self.alloc_expr_desugared(Expr::Array(Array::ElementList { elements: args })); + let args_name = Name::new_symbol_root(sym::args); + let args_binding = + self.alloc_binding(args_name.clone(), BindingAnnotation::Unannotated, hygiene); + let args_pat = self.alloc_pat_desugared(Pat::Bind { id: args_binding, subpat: None }); + self.add_definition_to_binding(args_binding, args_pat); + // TODO: We don't have `super let` yet. + let let_stmt = Statement::Let { + pat: args_pat, + type_ref: None, + initializer: Some(args), + else_branch: None, + }; + (vec![let_stmt], self.alloc_expr_desugared(Expr::Path(Path::from(args_name)))) + } else { + // Generate: + // super let args = (&arg0, &arg1, &...); + let args_name = Name::new_symbol_root(sym::args); + let args_binding = + self.alloc_binding(args_name.clone(), BindingAnnotation::Unannotated, hygiene); + let args_pat = self.alloc_pat_desugared(Pat::Bind { id: args_binding, subpat: None }); + self.add_definition_to_binding(args_binding, args_pat); + let elements = arguments + .iter() + .map(|arg| { + self.alloc_expr_desugared(Expr::Ref { + expr: arg.expr, + rawness: Rawness::Ref, + mutability: Mutability::Shared, + }) + }) + .collect(); + let args_tuple = self.alloc_expr_desugared(Expr::Tuple { exprs: elements }); + // TODO: We don't have `super let` yet + let let_stmt1 = Statement::Let { + pat: args_pat, + type_ref: None, + initializer: Some(args_tuple), + else_branch: None, + }; + + // Generate: + // super let args = [ + // ::new_display(args.0), + // ::new_lower_hex(args.1), + // ::new_debug(args.0), + // … + // ]; + let args = argmap + .iter() + .map(|&(arg_index, ty)| { + let args_ident_expr = + self.alloc_expr_desugared(Expr::Path(args_name.clone().into())); + let arg = self.alloc_expr_desugared(Expr::Field { + expr: args_ident_expr, + name: Name::new_tuple_field(arg_index), + }); + self.make_argument(arg, ty) + }) + .collect(); + let array = + self.alloc_expr_desugared(Expr::Array(Array::ElementList { elements: args })); + let args_binding = + self.alloc_binding(args_name.clone(), BindingAnnotation::Unannotated, hygiene); + let args_pat = self.alloc_pat_desugared(Pat::Bind { id: args_binding, subpat: None }); + self.add_definition_to_binding(args_binding, args_pat); + let let_stmt2 = Statement::Let { + pat: args_pat, + type_ref: None, + initializer: Some(array), + else_branch: None, + }; + (vec![let_stmt1, let_stmt2], self.alloc_expr_desugared(Expr::Path(args_name.into()))) + }; + + // Generate: + // &args + let args = self.alloc_expr_desugared(Expr::Ref { + expr: args, + rawness: Rawness::Ref, + mutability: Mutability::Shared, + }); + + let call_block = { + // Generate: + // unsafe { + // ::new_v1_formatted( + // lit_pieces, + // args, + // format_options, + // ) + // } + + let new_v1_formatted = LangItem::FormatArguments.ty_rel_path( + self.db, + self.module.krate(), + Name::new_symbol_root(sym::new_v1_formatted), + ); + let new_v1_formatted = + self.alloc_expr_desugared(new_v1_formatted.map_or(Expr::Missing, Expr::Path)); + let args = [lit_pieces, args, format_options]; + let call = self + .alloc_expr_desugared(Expr::Call { callee: new_v1_formatted, args: args.into() }); + + Expr::Unsafe { id: None, statements: Box::default(), tail: Some(call) } + }; + + if !let_stmts.is_empty() { + // Generate: + // { + // super let … + // super let … + // ::new_…(…) + // } + let call = self.alloc_expr_desugared(call_block); + self.alloc_expr( + Expr::Block { + id: None, + statements: let_stmts.into(), + tail: Some(call), + label: None, + }, + syntax_ptr, + ) + } else { + self.alloc_expr(call_block, syntax_ptr) + } } /// Generate a hir expression for a format_args placeholder specification. diff --git a/src/tools/rust-analyzer/crates/hir-def/src/expr_store/tests/body.rs b/src/tools/rust-analyzer/crates/hir-def/src/expr_store/tests/body.rs index 29e249b07a72..927e280d7394 100644 --- a/src/tools/rust-analyzer/crates/hir-def/src/expr_store/tests/body.rs +++ b/src/tools/rust-analyzer/crates/hir-def/src/expr_store/tests/body.rs @@ -178,14 +178,14 @@ fn main() { } #[test] -fn desugar_builtin_format_args() { +fn desugar_builtin_format_args_before_1_89_0() { let (db, body, def) = lower( r#" -//- minicore: fmt +//- minicore: fmt_before_1_89_0 fn main() { let are = "are"; let count = 10; - builtin#format_args("\u{1b}hello {count:02} {} friends, we {are:?} {0}{last}", "fancy", last = "!"); + builtin#format_args("\u{1b}hello {count:02} {} friends, we {are:?} {0}{last}", "fancy", orphan = (), last = "!"); } "#, ); @@ -249,14 +249,100 @@ fn main() { builtin#lang(Count::Implied), ), ], - unsafe { - builtin#lang(UnsafeArg::new)() + { + (); + unsafe { + builtin#lang(UnsafeArg::new)() + } }, ); }"#]] .assert_eq(&body.pretty_print(&db, def, Edition::CURRENT)) } +#[test] +fn desugar_builtin_format_args() { + let (db, body, def) = lower( + r#" +//- minicore: fmt +fn main() { + let are = "are"; + let count = 10; + builtin#format_args("\u{1b}hello {count:02} {} friends, we {are:?} {0}{last}", "fancy", orphan = (), last = "!"); +} +"#, + ); + + expect![[r#" + fn main() { + let are = "are"; + let count = 10; + { + let args = (&"fancy", &(), &"!", &count, &are, ); + let args = [ + builtin#lang(Argument::new_display)( + args.3, + ), builtin#lang(Argument::new_display)( + args.0, + ), builtin#lang(Argument::new_debug)( + args.4, + ), builtin#lang(Argument::new_display)( + args.2, + ), + ]; + unsafe { + builtin#lang(Arguments::new_v1_formatted)( + &[ + "\u{1b}hello ", " ", " friends, we ", " ", "", + ], + &args, + &[ + builtin#lang(Placeholder::new)( + 0usize, + ' ', + builtin#lang(Alignment::Unknown), + 8u32, + builtin#lang(Count::Implied), + builtin#lang(Count::Is)( + 2, + ), + ), builtin#lang(Placeholder::new)( + 1usize, + ' ', + builtin#lang(Alignment::Unknown), + 0u32, + builtin#lang(Count::Implied), + builtin#lang(Count::Implied), + ), builtin#lang(Placeholder::new)( + 2usize, + ' ', + builtin#lang(Alignment::Unknown), + 0u32, + builtin#lang(Count::Implied), + builtin#lang(Count::Implied), + ), builtin#lang(Placeholder::new)( + 1usize, + ' ', + builtin#lang(Alignment::Unknown), + 0u32, + builtin#lang(Count::Implied), + builtin#lang(Count::Implied), + ), builtin#lang(Placeholder::new)( + 3usize, + ' ', + builtin#lang(Alignment::Unknown), + 0u32, + builtin#lang(Count::Implied), + builtin#lang(Count::Implied), + ), + ], + ) + } + }; + }"#]] + .assert_eq(&body.pretty_print(&db, def, Edition::CURRENT)) +} + #[test] fn test_macro_hygiene() { let (db, body, def) = lower( @@ -295,29 +381,31 @@ impl SsrError { expect![[r#" fn main() { _ = ra_test_fixture::error::SsrError::new( - builtin#lang(Arguments::new_v1_formatted)( - &[ - "Failed to resolve path `", "`", - ], - &[ + { + let args = [ builtin#lang(Argument::new_display)( &node.text(), ), - ], - &[ - builtin#lang(Placeholder::new)( - 0usize, - ' ', - builtin#lang(Alignment::Unknown), - 0u32, - builtin#lang(Count::Implied), - builtin#lang(Count::Implied), - ), - ], + ]; unsafe { - builtin#lang(UnsafeArg::new)() - }, - ), + builtin#lang(Arguments::new_v1_formatted)( + &[ + "Failed to resolve path `", "`", + ], + &args, + &[ + builtin#lang(Placeholder::new)( + 0usize, + ' ', + builtin#lang(Alignment::Unknown), + 0u32, + builtin#lang(Count::Implied), + builtin#lang(Count::Implied), + ), + ], + ) + } + }, ); }"#]] .assert_eq(&body.pretty_print(&db, def, Edition::CURRENT)) @@ -327,7 +415,7 @@ impl SsrError { fn regression_10300() { let (db, body, def) = lower( r#" -//- minicore: concat, panic +//- minicore: concat, panic, fmt_before_1_89_0 mod private { pub use core::concat; } diff --git a/src/tools/rust-analyzer/crates/hir-def/src/expr_store/tests/body/block.rs b/src/tools/rust-analyzer/crates/hir-def/src/expr_store/tests/body/block.rs index bb0b70bc5bf2..c7707378a5b3 100644 --- a/src/tools/rust-analyzer/crates/hir-def/src/expr_store/tests/body/block.rs +++ b/src/tools/rust-analyzer/crates/hir-def/src/expr_store/tests/body/block.rs @@ -189,8 +189,8 @@ fn f() { } "#, expect![[r#" - BlockIdLt { [salsa id]: Id(3c01) } in BlockRelativeModuleId { block: Some(BlockIdLt { [salsa id]: Id(3c00) }), local_id: Idx::(1) } - BlockIdLt { [salsa id]: Id(3c00) } in BlockRelativeModuleId { block: None, local_id: Idx::(0) } + BlockId(3c01) in BlockRelativeModuleId { block: Some(BlockId(3c00)), local_id: Idx::(1) } + BlockId(3c00) in BlockRelativeModuleId { block: None, local_id: Idx::(0) } crate scope "#]], ); diff --git a/src/tools/rust-analyzer/crates/hir-def/src/lib.rs b/src/tools/rust-analyzer/crates/hir-def/src/lib.rs index a542214d3031..a562f2d0af2f 100644 --- a/src/tools/rust-analyzer/crates/hir-def/src/lib.rs +++ b/src/tools/rust-analyzer/crates/hir-def/src/lib.rs @@ -384,26 +384,7 @@ pub struct BlockLoc { /// The containing module. pub module: ModuleId, } -#[salsa_macros::tracked(debug)] -#[derive(PartialOrd, Ord)] -pub struct BlockIdLt<'db> { - pub loc: BlockLoc, -} -pub type BlockId = BlockIdLt<'static>; -impl hir_expand::Intern for BlockLoc { - type Database = dyn DefDatabase; - type ID = BlockId; - fn intern(self, db: &Self::Database) -> Self::ID { - unsafe { std::mem::transmute::, BlockId>(BlockIdLt::new(db, self)) } - } -} -impl hir_expand::Lookup for BlockId { - type Database = dyn DefDatabase; - type Data = BlockLoc; - fn lookup(&self, db: &Self::Database) -> Self::Data { - self.loc(db) - } -} +impl_intern!(BlockId, BlockLoc, intern_block, lookup_intern_block); /// A `ModuleId` that is always a crate's root module. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] diff --git a/src/tools/rust-analyzer/crates/hir-ty/src/infer/closure.rs b/src/tools/rust-analyzer/crates/hir-ty/src/infer/closure.rs index d1432cacf8d8..b756bb859d3e 100644 --- a/src/tools/rust-analyzer/crates/hir-ty/src/infer/closure.rs +++ b/src/tools/rust-analyzer/crates/hir-ty/src/infer/closure.rs @@ -1230,11 +1230,15 @@ impl InferenceContext<'_> { self.select_from_expr(*expr); } } + Expr::Let { pat: _, expr } => { + self.walk_expr(*expr); + let place = self.place_of_expr(*expr); + self.ref_expr(*expr, place); + } Expr::UnaryOp { expr, op: _ } | Expr::Array(Array::Repeat { initializer: expr, repeat: _ }) | Expr::Await { expr } | Expr::Loop { body: expr, label: _ } - | Expr::Let { pat: _, expr } | Expr::Box { expr } | Expr::Cast { expr, type_ref: _ } => { self.consume_expr(*expr); diff --git a/src/tools/rust-analyzer/crates/hir-ty/src/layout.rs b/src/tools/rust-analyzer/crates/hir-ty/src/layout.rs index c253fe25672f..c58bd1b773e2 100644 --- a/src/tools/rust-analyzer/crates/hir-ty/src/layout.rs +++ b/src/tools/rust-analyzer/crates/hir-ty/src/layout.rs @@ -268,7 +268,7 @@ pub fn layout_of_ty_query( // let pointee = tcx.normalize_erasing_regions(param_env, pointee); // if pointee.is_sized(tcx.at(DUMMY_SP), param_env) { - // return Ok(tcx.mk_layout(LayoutS::scalar(cx, data_ptr))); + // return Ok(tcx.mk_layout(LayoutData::scalar(cx, data_ptr))); // } let mut unsized_part = struct_tail_erasing_lifetimes(db, pointee.clone()); diff --git a/src/tools/rust-analyzer/crates/hir-ty/src/tests/closure_captures.rs b/src/tools/rust-analyzer/crates/hir-ty/src/tests/closure_captures.rs index 88d21be81ea6..7fb981752de8 100644 --- a/src/tools/rust-analyzer/crates/hir-ty/src/tests/closure_captures.rs +++ b/src/tools/rust-analyzer/crates/hir-ty/src/tests/closure_captures.rs @@ -444,3 +444,22 @@ fn main() { expect!["99..165;49..54;120..121,133..134 ByRef(Mut { kind: Default }) a &'? mut A"], ); } + +#[test] +fn let_binding_is_a_ref_capture() { + check_closure_captures( + r#" +//- minicore:copy +struct S; +fn main() { + let mut s = S; + let s_ref = &mut s; + let closure = || { + if let ref cb = s_ref { + } + }; +} +"#, + expect!["83..135;49..54;112..117 ByRef(Shared) s_ref &'? &'? mut S"], + ); +} diff --git a/src/tools/rust-analyzer/crates/hir/src/attrs.rs b/src/tools/rust-analyzer/crates/hir/src/attrs.rs index b1cf30b98f5b..0bce69a179b8 100644 --- a/src/tools/rust-analyzer/crates/hir/src/attrs.rs +++ b/src/tools/rust-analyzer/crates/hir/src/attrs.rs @@ -242,9 +242,9 @@ fn resolve_assoc_or_field( resolve_field(db, variant_def, name, ns) } -fn resolve_assoc_item( - db: &dyn HirDatabase, - ty: &Type, +fn resolve_assoc_item<'db>( + db: &'db dyn HirDatabase, + ty: &Type<'db>, name: &Name, ns: Option, ) -> Option { @@ -256,10 +256,10 @@ fn resolve_assoc_item( }) } -fn resolve_impl_trait_item( - db: &dyn HirDatabase, +fn resolve_impl_trait_item<'db>( + db: &'db dyn HirDatabase, resolver: Resolver<'_>, - ty: &Type, + ty: &Type<'db>, name: &Name, ns: Option, ) -> Option { diff --git a/src/tools/rust-analyzer/crates/hir/src/diagnostics.rs b/src/tools/rust-analyzer/crates/hir/src/diagnostics.rs index f7b140e03d43..074bde91fb69 100644 --- a/src/tools/rust-analyzer/crates/hir/src/diagnostics.rs +++ b/src/tools/rust-analyzer/crates/hir/src/diagnostics.rs @@ -36,15 +36,15 @@ pub use hir_ty::{ }; macro_rules! diagnostics { - ($($diag:ident,)*) => { + ($($diag:ident $(<$lt:lifetime>)?,)*) => { #[derive(Debug)] - pub enum AnyDiagnostic {$( - $diag(Box<$diag>), + pub enum AnyDiagnostic<'db> {$( + $diag(Box<$diag $(<$lt>)?>), )*} $( - impl From<$diag> for AnyDiagnostic { - fn from(d: $diag) -> AnyDiagnostic { + impl<'db> From<$diag $(<$lt>)?> for AnyDiagnostic<'db> { + fn from(d: $diag $(<$lt>)?) -> AnyDiagnostic<'db> { AnyDiagnostic::$diag(Box::new(d)) } } @@ -69,12 +69,12 @@ macro_rules! diagnostics { diagnostics![ AwaitOutsideOfAsync, BreakOutsideOfLoop, - CastToUnsized, - ExpectedFunction, + CastToUnsized<'db>, + ExpectedFunction<'db>, InactiveCode, IncoherentImpl, IncorrectCase, - InvalidCast, + InvalidCast<'db>, InvalidDeriveTarget, MacroDefError, MacroError, @@ -85,7 +85,7 @@ diagnostics![ MissingFields, MissingMatchArms, MissingUnsafe, - MovedOutOfRef, + MovedOutOfRef<'db>, NeedMut, NonExhaustiveLet, NoSuchField, @@ -98,17 +98,17 @@ diagnostics![ TraitImplMissingAssocItems, TraitImplOrphan, TraitImplRedundantAssocItems, - TypedHole, - TypeMismatch, + TypedHole<'db>, + TypeMismatch<'db>, UndeclaredLabel, UnimplementedBuiltinMacro, UnreachableLabel, UnresolvedAssocItem, UnresolvedExternCrate, - UnresolvedField, + UnresolvedField<'db>, UnresolvedImport, UnresolvedMacroCall, - UnresolvedMethodCall, + UnresolvedMethodCall<'db>, UnresolvedModule, UnresolvedIdent, UnusedMut, @@ -130,9 +130,9 @@ pub struct BreakOutsideOfLoop { } #[derive(Debug)] -pub struct TypedHole { +pub struct TypedHole<'db> { pub expr: InFile, - pub expected: Type, + pub expected: Type<'db>, } #[derive(Debug)] @@ -242,25 +242,25 @@ pub struct MismatchedTupleStructPatArgCount { } #[derive(Debug)] -pub struct ExpectedFunction { +pub struct ExpectedFunction<'db> { pub call: InFile, - pub found: Type, + pub found: Type<'db>, } #[derive(Debug)] -pub struct UnresolvedField { +pub struct UnresolvedField<'db> { pub expr: InFile, - pub receiver: Type, + pub receiver: Type<'db>, pub name: Name, pub method_with_same_name_exists: bool, } #[derive(Debug)] -pub struct UnresolvedMethodCall { +pub struct UnresolvedMethodCall<'db> { pub expr: InFile, - pub receiver: Type, + pub receiver: Type<'db>, pub name: Name, - pub field_with_same_name: Option, + pub field_with_same_name: Option>, pub assoc_func_with_same_name: Option, } @@ -329,10 +329,10 @@ pub struct NonExhaustiveLet { } #[derive(Debug)] -pub struct TypeMismatch { +pub struct TypeMismatch<'db> { pub expr_or_pat: InFile, - pub expected: Type, - pub actual: Type, + pub expected: Type<'db>, + pub actual: Type<'db>, } #[derive(Debug)] @@ -352,8 +352,8 @@ pub struct UnusedVariable { } #[derive(Debug)] -pub struct MovedOutOfRef { - pub ty: Type, +pub struct MovedOutOfRef<'db> { + pub ty: Type<'db>, pub span: InFile, } @@ -403,17 +403,17 @@ pub struct RemoveUnnecessaryElse { } #[derive(Debug)] -pub struct CastToUnsized { +pub struct CastToUnsized<'db> { pub expr: InFile, - pub cast_ty: Type, + pub cast_ty: Type<'db>, } #[derive(Debug)] -pub struct InvalidCast { +pub struct InvalidCast<'db> { pub expr: InFile, pub error: CastError, - pub expr_ty: Type, - pub cast_ty: Type, + pub expr_ty: Type<'db>, + pub cast_ty: Type<'db>, } #[derive(Debug)] @@ -482,12 +482,12 @@ pub struct IncorrectGenericsOrder { pub expected_kind: GenericArgKind, } -impl AnyDiagnostic { +impl<'db> AnyDiagnostic<'db> { pub(crate) fn body_validation_diagnostic( - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, diagnostic: BodyValidationDiagnostic, source_map: &hir_def::expr_store::BodySourceMap, - ) -> Option { + ) -> Option> { match diagnostic { BodyValidationDiagnostic::RecordMissingFields { record, variant, missed_fields } => { let variant_data = variant.variant_data(db); @@ -618,12 +618,12 @@ impl AnyDiagnostic { } pub(crate) fn inference_diagnostic( - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, def: DefWithBodyId, d: &InferenceDiagnostic, source_map: &hir_def::expr_store::BodySourceMap, sig_map: &hir_def::expr_store::ExpressionStoreSourceMap, - ) -> Option { + ) -> Option> { let expr_syntax = |expr| { source_map .expr_syntax(expr) @@ -819,7 +819,7 @@ impl AnyDiagnostic { fn path_diagnostic( diag: &PathLoweringDiagnostic, path: InFile, - ) -> Option { + ) -> Option> { Some(match *diag { PathLoweringDiagnostic::GenericArgsProhibited { segment, reason } => { let segment = hir_segment_to_ast_segment(&path.value, segment)?; @@ -912,8 +912,8 @@ impl AnyDiagnostic { pub(crate) fn ty_diagnostic( diag: &TyLoweringDiagnostic, source_map: &ExpressionStoreSourceMap, - db: &dyn HirDatabase, - ) -> Option { + db: &'db dyn HirDatabase, + ) -> Option> { let Ok(source) = source_map.type_syntax(diag.source) else { stdx::never!("error on synthetic type syntax"); return None; diff --git a/src/tools/rust-analyzer/crates/hir/src/display.rs b/src/tools/rust-analyzer/crates/hir/src/display.rs index 124ab8e274af..112558bdd04a 100644 --- a/src/tools/rust-analyzer/crates/hir/src/display.rs +++ b/src/tools/rust-analyzer/crates/hir/src/display.rs @@ -431,7 +431,7 @@ impl HirDisplay for Variant { } } -impl HirDisplay for Type { +impl HirDisplay for Type<'_> { fn hir_fmt(&self, f: &mut HirFormatter<'_>) -> Result<(), HirDisplayError> { self.ty.hir_fmt(f) } @@ -743,7 +743,7 @@ impl HirDisplay for Static { } } -impl HirDisplay for TraitRef { +impl HirDisplay for TraitRef<'_> { fn hir_fmt(&self, f: &mut HirFormatter<'_>) -> Result<(), HirDisplayError> { self.trait_ref.hir_fmt(f) } diff --git a/src/tools/rust-analyzer/crates/hir/src/has_source.rs b/src/tools/rust-analyzer/crates/hir/src/has_source.rs index fe7429c86725..4767d4792e71 100644 --- a/src/tools/rust-analyzer/crates/hir/src/has_source.rs +++ b/src/tools/rust-analyzer/crates/hir/src/has_source.rs @@ -225,7 +225,7 @@ impl HasSource for LocalSource { } } -impl HasSource for Param { +impl HasSource for Param<'_> { type Ast = Either; fn source(self, db: &dyn HirDatabase) -> Option> { diff --git a/src/tools/rust-analyzer/crates/hir/src/lib.rs b/src/tools/rust-analyzer/crates/hir/src/lib.rs index adae335627ba..3b39707cf609 100644 --- a/src/tools/rust-analyzer/crates/hir/src/lib.rs +++ b/src/tools/rust-analyzer/crates/hir/src/lib.rs @@ -84,7 +84,7 @@ use nameres::diagnostics::DefDiagnosticKind; use rustc_hash::FxHashSet; use smallvec::SmallVec; use span::{AstIdNode, Edition, FileId}; -use stdx::{format_to, impl_from, never}; +use stdx::{format_to, impl_from, never, variance::PhantomCovariantLifetime}; use syntax::{ AstNode, AstPtr, SmolStr, SyntaxNode, SyntaxNodePtr, T, TextRange, ToSmolStr, ast::{self, HasAttrs as _, HasName, HasVisibility as _}, @@ -400,7 +400,11 @@ impl ModuleDef { Some(name) } - pub fn diagnostics(self, db: &dyn HirDatabase, style_lints: bool) -> Vec { + pub fn diagnostics<'db>( + self, + db: &'db dyn HirDatabase, + style_lints: bool, + ) -> Vec> { let id = match self { ModuleDef::Adt(it) => match it { Adt::Struct(it) => it.id.into(), @@ -612,10 +616,10 @@ impl Module { } /// Fills `acc` with the module's diagnostics. - pub fn diagnostics( + pub fn diagnostics<'db>( self, - db: &dyn HirDatabase, - acc: &mut Vec, + db: &'db dyn HirDatabase, + acc: &mut Vec>, style_lints: bool, ) { let _p = tracing::info_span!("diagnostics", name = ?self.name(db)).entered(); @@ -970,10 +974,10 @@ impl Module { } } -fn macro_call_diagnostics( - db: &dyn HirDatabase, +fn macro_call_diagnostics<'db>( + db: &'db dyn HirDatabase, macro_call_id: MacroCallId, - acc: &mut Vec, + acc: &mut Vec>, ) { let Some(e) = db.parse_macro_expansion_error(macro_call_id) else { return; @@ -1010,7 +1014,11 @@ fn macro_call_diagnostics( } } -fn emit_macro_def_diagnostics(db: &dyn HirDatabase, acc: &mut Vec, m: Macro) { +fn emit_macro_def_diagnostics<'db>( + db: &'db dyn HirDatabase, + acc: &mut Vec>, + m: Macro, +) { let id = db.macro_def(m.id); if let hir_expand::db::TokenExpander::DeclarativeMacro(expander) = db.macro_expander(id) { if let Some(e) = expander.mac.err() { @@ -1030,18 +1038,18 @@ fn emit_macro_def_diagnostics(db: &dyn HirDatabase, acc: &mut Vec } } -fn emit_def_diagnostic( - db: &dyn HirDatabase, - acc: &mut Vec, +fn emit_def_diagnostic<'db>( + db: &'db dyn HirDatabase, + acc: &mut Vec>, diag: &DefDiagnostic, edition: Edition, ) { emit_def_diagnostic_(db, acc, &diag.kind, edition) } -fn emit_def_diagnostic_( - db: &dyn HirDatabase, - acc: &mut Vec, +fn emit_def_diagnostic_<'db>( + db: &'db dyn HirDatabase, + acc: &mut Vec>, diag: &DefDiagnosticKind, edition: Edition, ) { @@ -1251,14 +1259,18 @@ impl TupleField { Name::new_tuple_field(self.index as usize) } - pub fn ty(&self, db: &dyn HirDatabase) -> Type { + pub fn ty<'db>(&self, db: &'db dyn HirDatabase) -> Type<'db> { let ty = db.infer(self.owner).tuple_field_access_types[&self.tuple] .as_slice(Interner) .get(self.index as usize) .and_then(|arg| arg.ty(Interner)) .cloned() .unwrap_or_else(|| TyKind::Error.intern(Interner)); - Type { env: db.trait_environment_for_body(self.owner), ty } + Type { + env: db.trait_environment_for_body(self.owner), + ty, + _pd: PhantomCovariantLifetime::new(), + } } } @@ -1309,7 +1321,7 @@ impl Field { /// Returns the type as in the signature of the struct (i.e., with /// placeholder types for type parameters). Only use this in the context of /// the field definition. - pub fn ty(&self, db: &dyn HirDatabase) -> Type { + pub fn ty<'db>(&self, db: &'db dyn HirDatabase) -> Type<'db> { let var_id = self.parent.into(); let generic_def_id: GenericDefId = match self.parent { VariantDef::Struct(it) => it.id.into(), @@ -1322,7 +1334,11 @@ impl Field { } // FIXME: Find better API to also handle const generics - pub fn ty_with_args(&self, db: &dyn HirDatabase, generics: impl Iterator) -> Type { + pub fn ty_with_args<'db>( + &self, + db: &'db dyn HirDatabase, + generics: impl Iterator>, + ) -> Type<'db> { let var_id = self.parent.into(); let def_id: AdtId = match self.parent { VariantDef::Struct(it) => it.id.into(), @@ -1394,15 +1410,15 @@ impl Struct { .collect() } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_def(db, self.id) } - pub fn ty_placeholders(self, db: &dyn HirDatabase) -> Type { + pub fn ty_placeholders(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_def_placeholders(db, self.id) } - pub fn constructor_ty(self, db: &dyn HirDatabase) -> Type { + pub fn constructor_ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_value_def(db, self.id) } @@ -1449,15 +1465,15 @@ impl Union { Module { id: self.id.lookup(db).container } } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_def(db, self.id) } - pub fn ty_placeholders(self, db: &dyn HirDatabase) -> Type { + pub fn ty_placeholders(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_def_placeholders(db, self.id) } - pub fn constructor_ty(self, db: &dyn HirDatabase) -> Type { + pub fn constructor_ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_value_def(db, self.id) } @@ -1515,16 +1531,16 @@ impl Enum { db.enum_signature(self.id).repr } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty<'db>(self, db: &'db dyn HirDatabase) -> Type<'db> { Type::from_def(db, self.id) } - pub fn ty_placeholders(self, db: &dyn HirDatabase) -> Type { + pub fn ty_placeholders<'db>(self, db: &'db dyn HirDatabase) -> Type<'db> { Type::from_def_placeholders(db, self.id) } /// The type of the enum variant bodies. - pub fn variant_body_ty(self, db: &dyn HirDatabase) -> Type { + pub fn variant_body_ty<'db>(self, db: &'db dyn HirDatabase) -> Type<'db> { Type::new_for_crate( self.id.lookup(db).container.krate(), TyBuilder::builtin(match db.enum_signature(self.id).variant_body_type() { @@ -1599,7 +1615,7 @@ impl Variant { self.id.lookup(db).parent.into() } - pub fn constructor_ty(self, db: &dyn HirDatabase) -> Type { + pub fn constructor_ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_value_def(db, self.id) } @@ -1701,14 +1717,18 @@ impl Adt { /// Turns this ADT into a type. Any type parameters of the ADT will be /// turned into unknown types, which is good for e.g. finding the most /// general set of completions, but will not look very nice when printed. - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { let id = AdtId::from(self); Type::from_def(db, id) } /// Turns this ADT into a type with the given type parameters. This isn't /// the greatest API, FIXME find a better one. - pub fn ty_with_args(self, db: &dyn HirDatabase, args: impl Iterator) -> Type { + pub fn ty_with_args<'db>( + self, + db: &'db dyn HirDatabase, + args: impl Iterator>, + ) -> Type<'db> { let id = AdtId::from(self); let mut it = args.map(|t| t.ty); let ty = TyBuilder::def_ty(db, id.into(), None) @@ -1841,7 +1861,7 @@ impl DefWithBody { } /// Returns the type this def's body has to evaluate to. - pub fn body_type(self, db: &dyn HirDatabase) -> Type { + pub fn body_type(self, db: &dyn HirDatabase) -> Type<'_> { match self { DefWithBody::Function(it) => it.ret_type(db), DefWithBody::Static(it) => it.ty(db), @@ -1874,10 +1894,10 @@ impl DefWithBody { } } - pub fn diagnostics( + pub fn diagnostics<'db>( self, - db: &dyn HirDatabase, - acc: &mut Vec, + db: &'db dyn HirDatabase, + acc: &mut Vec>, style_lints: bool, ) { let krate = self.module(db).id.krate(); @@ -2107,7 +2127,7 @@ impl DefWithBody { fn expr_store_diagnostics( db: &dyn HirDatabase, - acc: &mut Vec, + acc: &mut Vec>, source_map: &ExpressionStoreSourceMap, ) { for diag in source_map.diagnostics() { @@ -2172,11 +2192,11 @@ impl Function { db.function_signature(self.id).name.clone() } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_value_def(db, self.id) } - pub fn fn_ptr_type(self, db: &dyn HirDatabase) -> Type { + pub fn fn_ptr_type(self, db: &dyn HirDatabase) -> Type<'_> { let resolver = self.id.resolver(db); let substs = TyBuilder::placeholder_subst(db, self.id); let callable_sig = db.callable_item_signature(self.id.into()).substitute(Interner, &substs); @@ -2185,7 +2205,7 @@ impl Function { } /// Get this function's return type - pub fn ret_type(self, db: &dyn HirDatabase) -> Type { + pub fn ret_type(self, db: &dyn HirDatabase) -> Type<'_> { let resolver = self.id.resolver(db); let substs = TyBuilder::placeholder_subst(db, self.id); let callable_sig = db.callable_item_signature(self.id.into()).substitute(Interner, &substs); @@ -2194,11 +2214,11 @@ impl Function { } // FIXME: Find better API to also handle const generics - pub fn ret_type_with_args( + pub fn ret_type_with_args<'db>( self, - db: &dyn HirDatabase, - generics: impl Iterator, - ) -> Type { + db: &'db dyn HirDatabase, + generics: impl Iterator>, + ) -> Type<'db> { let resolver = self.id.resolver(db); let parent_id: Option = match self.id.lookup(db).container { ItemContainerId::ImplId(it) => Some(it.into()), @@ -2223,7 +2243,7 @@ impl Function { Type::new_with_resolver_inner(db, &resolver, ty) } - pub fn async_ret_type(self, db: &dyn HirDatabase) -> Option { + pub fn async_ret_type<'db>(self, db: &'db dyn HirDatabase) -> Option> { if !self.is_async(db) { return None; } @@ -2247,7 +2267,7 @@ impl Function { self.has_self_param(db).then_some(SelfParam { func: self.id }) } - pub fn assoc_fn_params(self, db: &dyn HirDatabase) -> Vec { + pub fn assoc_fn_params(self, db: &dyn HirDatabase) -> Vec> { let environment = db.trait_environment(self.id.into()); let substs = TyBuilder::placeholder_subst(db, self.id); let callable_sig = db.callable_item_signature(self.id.into()).substitute(Interner, &substs); @@ -2256,7 +2276,11 @@ impl Function { .iter() .enumerate() .map(|(idx, ty)| { - let ty = Type { env: environment.clone(), ty: ty.clone() }; + let ty = Type { + env: environment.clone(), + ty: ty.clone(), + _pd: PhantomCovariantLifetime::new(), + }; Param { func: Callee::Def(CallableDefId::FunctionId(self.id)), ty, idx } }) .collect() @@ -2266,12 +2290,12 @@ impl Function { db.function_signature(self.id).params.len() } - pub fn method_params(self, db: &dyn HirDatabase) -> Option> { + pub fn method_params(self, db: &dyn HirDatabase) -> Option>> { self.self_param(db)?; Some(self.params_without_self(db)) } - pub fn params_without_self(self, db: &dyn HirDatabase) -> Vec { + pub fn params_without_self(self, db: &dyn HirDatabase) -> Vec> { let environment = db.trait_environment(self.id.into()); let substs = TyBuilder::placeholder_subst(db, self.id); let callable_sig = db.callable_item_signature(self.id.into()).substitute(Interner, &substs); @@ -2282,18 +2306,22 @@ impl Function { .enumerate() .skip(skip) .map(|(idx, ty)| { - let ty = Type { env: environment.clone(), ty: ty.clone() }; + let ty = Type { + env: environment.clone(), + ty: ty.clone(), + _pd: PhantomCovariantLifetime::new(), + }; Param { func: Callee::Def(CallableDefId::FunctionId(self.id)), ty, idx } }) .collect() } // FIXME: Find better API to also handle const generics - pub fn params_without_self_with_args( + pub fn params_without_self_with_args<'db>( self, - db: &dyn HirDatabase, - generics: impl Iterator, - ) -> Vec { + db: &'db dyn HirDatabase, + generics: impl Iterator>, + ) -> Vec> { let environment = db.trait_environment(self.id.into()); let parent_id: Option = match self.id.lookup(db).container { ItemContainerId::ImplId(it) => Some(it.into()), @@ -2328,7 +2356,11 @@ impl Function { .enumerate() .skip(skip) .map(|(idx, ty)| { - let ty = Type { env: environment.clone(), ty: ty.clone() }; + let ty = Type { + env: environment.clone(), + ty: ty.clone(), + _pd: PhantomCovariantLifetime::new(), + }; Param { func: Callee::Def(CallableDefId::FunctionId(self.id)), ty, idx } }) .collect() @@ -2358,7 +2390,8 @@ impl Function { return true; } - let Some(impl_traits) = self.ret_type(db).as_impl_traits(db) else { return false }; + let ret_type = self.ret_type(db); + let Some(impl_traits) = ret_type.as_impl_traits(db) else { return false }; let Some(future_trait_id) = LangItem::Future.resolve_trait(db, self.ty(db).env.krate) else { return false; @@ -2501,14 +2534,14 @@ impl From for Access { } #[derive(Clone, PartialEq, Eq, Hash, Debug)] -pub struct Param { +pub struct Param<'db> { func: Callee, /// The index in parameter list, including self parameter. idx: usize, - ty: Type, + ty: Type<'db>, } -impl Param { +impl<'db> Param<'db> { pub fn parent_fn(&self) -> Option { match self.func { Callee::Def(CallableDefId::FunctionId(f)) => Some(f.into()), @@ -2524,7 +2557,7 @@ impl Param { self.idx } - pub fn ty(&self) -> &Type { + pub fn ty(&self) -> &Type<'db> { &self.ty } @@ -2591,17 +2624,21 @@ impl SelfParam { Function::from(self.func) } - pub fn ty(&self, db: &dyn HirDatabase) -> Type { + pub fn ty<'db>(&self, db: &'db dyn HirDatabase) -> Type<'db> { let substs = TyBuilder::placeholder_subst(db, self.func); let callable_sig = db.callable_item_signature(self.func.into()).substitute(Interner, &substs); let environment = db.trait_environment(self.func.into()); let ty = callable_sig.params()[0].clone(); - Type { env: environment, ty } + Type { env: environment, ty, _pd: PhantomCovariantLifetime::new() } } // FIXME: Find better API to also handle const generics - pub fn ty_with_args(&self, db: &dyn HirDatabase, generics: impl Iterator) -> Type { + pub fn ty_with_args<'db>( + &self, + db: &'db dyn HirDatabase, + generics: impl Iterator>, + ) -> Type<'db> { let parent_id: GenericDefId = match self.func.lookup(db).container { ItemContainerId::ImplId(it) => it.into(), ItemContainerId::TraitId(it) => it.into(), @@ -2626,7 +2663,7 @@ impl SelfParam { db.callable_item_signature(self.func.into()).substitute(Interner, &substs); let environment = db.trait_environment(self.func.into()); let ty = callable_sig.params()[0].clone(); - Type { env: environment, ty } + Type { env: environment, ty, _pd: PhantomCovariantLifetime::new() } } } @@ -2714,7 +2751,7 @@ impl Const { self.source(db)?.value.body() } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_value_def(db, self.id) } @@ -2791,7 +2828,7 @@ impl Static { self.source(db)?.value.body() } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_value_def(db, self.id) } @@ -2961,11 +2998,11 @@ impl TypeAlias { Module { id: self.id.module(db) } } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_def(db, self.id) } - pub fn ty_placeholders(self, db: &dyn HirDatabase) -> Type { + pub fn ty_placeholders(self, db: &dyn HirDatabase) -> Type<'_> { Type::from_def_placeholders(db, self.id) } @@ -3010,7 +3047,7 @@ impl BuiltinType { BuiltinType { inner: hir_def::builtin_type::BuiltinType::Str } } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty<'db>(self, db: &'db dyn HirDatabase) -> Type<'db> { let core = Crate::core(db).map(|core| core.id).unwrap_or_else(|| db.all_crates()[0]); Type::new_for_crate(core, TyBuilder::builtin(self.inner)) } @@ -3472,7 +3509,7 @@ impl AssocItem { } } - pub fn implementing_ty(self, db: &dyn HirDatabase) -> Option { + pub fn implementing_ty(self, db: &dyn HirDatabase) -> Option> { match self.container(db) { AssocItemContainer::Impl(i) => Some(i.self_ty(db)), _ => None, @@ -3500,10 +3537,10 @@ impl AssocItem { } } - pub fn diagnostics( + pub fn diagnostics<'db>( self, - db: &dyn HirDatabase, - acc: &mut Vec, + db: &'db dyn HirDatabase, + acc: &mut Vec>, style_lints: bool, ) { match self { @@ -3625,7 +3662,7 @@ impl GenericDef { } } - pub fn diagnostics(self, db: &dyn HirDatabase, acc: &mut Vec) { + pub fn diagnostics<'db>(self, db: &'db dyn HirDatabase, acc: &mut Vec>) { let def = self.id(); let generics = db.generic_params(def); @@ -3690,18 +3727,19 @@ impl GenericDef { // We cannot call this `Substitution` unfortunately... #[derive(Debug)] -pub struct GenericSubstitution { +pub struct GenericSubstitution<'db> { def: GenericDefId, subst: Substitution, env: Arc, + _pd: PhantomCovariantLifetime<'db>, } -impl GenericSubstitution { +impl<'db> GenericSubstitution<'db> { fn new(def: GenericDefId, subst: Substitution, env: Arc) -> Self { - Self { def, subst, env } + Self { def, subst, env, _pd: PhantomCovariantLifetime::new() } } - pub fn types(&self, db: &dyn HirDatabase) -> Vec<(Symbol, Type)> { + pub fn types(&self, db: &'db dyn HirDatabase) -> Vec<(Symbol, Type<'db>)> { let container = match self.def { GenericDefId::ConstId(id) => Some(id.lookup(db).container), GenericDefId::FunctionId(id) => Some(id.lookup(db).container), @@ -3744,7 +3782,10 @@ impl GenericSubstitution { container_params .chain(self_params) .filter_map(|(ty, name)| { - Some((name?.symbol().clone(), Type { ty, env: self.env.clone() })) + Some(( + name?.symbol().clone(), + Type { ty, env: self.env.clone(), _pd: PhantomCovariantLifetime::new() }, + )) }) .collect() } @@ -3847,7 +3888,7 @@ impl Local { self.parent(db).module(db) } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { let def = self.parent; let infer = db.infer(def); let ty = infer[self.binding_id].clone(); @@ -4109,6 +4150,10 @@ impl TypeParam { self.merge().name(db) } + pub fn parent(self, _db: &dyn HirDatabase) -> GenericDef { + self.id.parent().into() + } + pub fn module(self, db: &dyn HirDatabase) -> Module { self.id.parent().module(db).into() } @@ -4124,7 +4169,7 @@ impl TypeParam { } } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { let resolver = self.id.parent().resolver(db); let ty = TyKind::Placeholder(hir_ty::to_placeholder_idx(db, self.id.into())).intern(Interner); @@ -4146,7 +4191,7 @@ impl TypeParam { .collect() } - pub fn default(self, db: &dyn HirDatabase) -> Option { + pub fn default(self, db: &dyn HirDatabase) -> Option> { let ty = generic_arg_from_param(db, self.id.into())?; let resolver = self.id.parent().resolver(db); match ty.data(Interner) { @@ -4211,7 +4256,7 @@ impl ConstParam { self.id.parent().into() } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { Type::new(db, self.id.parent(), db.const_param_ty(self.id)) } @@ -4268,7 +4313,7 @@ impl TypeOrConstParam { } } - pub fn ty(self, db: &dyn HirDatabase) -> Type { + pub fn ty(self, db: &dyn HirDatabase) -> Type<'_> { match self.split(db) { Either::Left(it) => it.ty(db), Either::Right(it) => it.ty(db), @@ -4313,7 +4358,10 @@ impl Impl { module.id.def_map(db)[module.id.local_id].scope.impls().map(Into::into).collect() } - pub fn all_for_type(db: &dyn HirDatabase, Type { ty, env }: Type) -> Vec { + pub fn all_for_type<'db>( + db: &'db dyn HirDatabase, + Type { ty, env, _pd: _ }: Type<'db>, + ) -> Vec { let def_crates = match method_resolution::def_crates(db, &ty, env.krate) { Some(def_crates) => def_crates, None => return Vec::new(), @@ -4398,14 +4446,14 @@ impl Impl { Some(Trait { id }) } - pub fn trait_ref(self, db: &dyn HirDatabase) -> Option { + pub fn trait_ref(self, db: &dyn HirDatabase) -> Option> { let substs = TyBuilder::placeholder_subst(db, self.id); let trait_ref = db.impl_trait(self.id)?.substitute(Interner, &substs); let resolver = self.id.resolver(db); Some(TraitRef::new_with_resolver(db, &resolver, trait_ref)) } - pub fn self_ty(self, db: &dyn HirDatabase) -> Type { + pub fn self_ty(self, db: &dyn HirDatabase) -> Type<'_> { let resolver = self.id.resolver(db); let substs = TyBuilder::placeholder_subst(db, self.id); let ty = db.impl_self_ty(self.id).substitute(Interner, &substs); @@ -4467,21 +4515,22 @@ impl Impl { } #[derive(Clone, PartialEq, Eq, Debug, Hash)] -pub struct TraitRef { +pub struct TraitRef<'db> { env: Arc, trait_ref: hir_ty::TraitRef, + _pd: PhantomCovariantLifetime<'db>, } -impl TraitRef { +impl<'db> TraitRef<'db> { pub(crate) fn new_with_resolver( - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, resolver: &Resolver<'_>, trait_ref: hir_ty::TraitRef, - ) -> TraitRef { + ) -> Self { let env = resolver .generic_def() .map_or_else(|| TraitEnvironment::empty(resolver.krate()), |d| db.trait_environment(d)); - TraitRef { env, trait_ref } + TraitRef { env, trait_ref, _pd: PhantomCovariantLifetime::new() } } pub fn trait_(&self) -> Trait { @@ -4489,21 +4538,21 @@ impl TraitRef { Trait { id } } - pub fn self_ty(&self) -> Type { + pub fn self_ty(&self) -> Type<'_> { let ty = self.trait_ref.self_type_parameter(Interner); - Type { env: self.env.clone(), ty } + Type { env: self.env.clone(), ty, _pd: PhantomCovariantLifetime::new() } } /// Returns `idx`-th argument of this trait reference if it is a type argument. Note that the /// first argument is the `Self` type. - pub fn get_type_argument(&self, idx: usize) -> Option { + pub fn get_type_argument(&self, idx: usize) -> Option> { self.trait_ref .substitution .as_slice(Interner) .get(idx) .and_then(|arg| arg.ty(Interner)) .cloned() - .map(|ty| Type { env: self.env.clone(), ty }) + .map(|ty| Type { env: self.env.clone(), ty, _pd: PhantomCovariantLifetime::new() }) } } @@ -4551,7 +4600,7 @@ impl Closure { .collect() } - pub fn capture_types(&self, db: &dyn HirDatabase) -> Vec { + pub fn capture_types<'db>(&self, db: &'db dyn HirDatabase) -> Vec> { let owner = db.lookup_intern_closure((self.id).into()).0; let infer = &db.infer(owner); let (captures, _) = infer.closure_info(&self.id); @@ -4560,6 +4609,7 @@ impl Closure { .map(|capture| Type { env: db.trait_environment_for_body(owner), ty: capture.ty(&self.subst), + _pd: PhantomCovariantLifetime::new(), }) .collect() } @@ -4691,40 +4741,45 @@ impl CaptureUsageSource { } #[derive(Clone, PartialEq, Eq, Debug, Hash)] -pub struct Type { +pub struct Type<'db> { env: Arc, ty: Ty, + _pd: PhantomCovariantLifetime<'db>, } -impl Type { - pub(crate) fn new_with_resolver(db: &dyn HirDatabase, resolver: &Resolver<'_>, ty: Ty) -> Type { +impl<'db> Type<'db> { + pub(crate) fn new_with_resolver( + db: &'db dyn HirDatabase, + resolver: &Resolver<'_>, + ty: Ty, + ) -> Self { Type::new_with_resolver_inner(db, resolver, ty) } pub(crate) fn new_with_resolver_inner( - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, resolver: &Resolver<'_>, ty: Ty, - ) -> Type { + ) -> Self { let environment = resolver .generic_def() .map_or_else(|| TraitEnvironment::empty(resolver.krate()), |d| db.trait_environment(d)); - Type { env: environment, ty } + Type { env: environment, ty, _pd: PhantomCovariantLifetime::new() } } - pub(crate) fn new_for_crate(krate: base_db::Crate, ty: Ty) -> Type { - Type { env: TraitEnvironment::empty(krate), ty } + pub(crate) fn new_for_crate(krate: base_db::Crate, ty: Ty) -> Self { + Type { env: TraitEnvironment::empty(krate), ty, _pd: PhantomCovariantLifetime::new() } } - fn new(db: &dyn HirDatabase, lexical_env: impl HasResolver, ty: Ty) -> Type { + fn new(db: &'db dyn HirDatabase, lexical_env: impl HasResolver, ty: Ty) -> Self { let resolver = lexical_env.resolver(db); let environment = resolver .generic_def() .map_or_else(|| TraitEnvironment::empty(resolver.krate()), |d| db.trait_environment(d)); - Type { env: environment, ty } + Type { env: environment, ty, _pd: PhantomCovariantLifetime::new() } } - fn from_def(db: &dyn HirDatabase, def: impl Into + HasResolver) -> Type { + fn from_def(db: &'db dyn HirDatabase, def: impl Into + HasResolver) -> Self { let ty = db.ty(def.into()); let substs = TyBuilder::unknown_subst( db, @@ -4737,7 +4792,10 @@ impl Type { Type::new(db, def, ty.substitute(Interner, &substs)) } - fn from_def_placeholders(db: &dyn HirDatabase, def: impl Into + HasResolver) -> Type { + fn from_def_placeholders( + db: &'db dyn HirDatabase, + def: impl Into + HasResolver, + ) -> Self { let ty = db.ty(def.into()); let substs = TyBuilder::placeholder_subst( db, @@ -4750,7 +4808,10 @@ impl Type { Type::new(db, def, ty.substitute(Interner, &substs)) } - fn from_value_def(db: &dyn HirDatabase, def: impl Into + HasResolver) -> Type { + fn from_value_def( + db: &'db dyn HirDatabase, + def: impl Into + HasResolver, + ) -> Self { let Some(ty) = db.value_ty(def.into()) else { return Type::new(db, def, TyKind::Error.intern(Interner)); }; @@ -4770,13 +4831,17 @@ impl Type { Type::new(db, def, ty.substitute(Interner, &substs)) } - pub fn new_slice(ty: Type) -> Type { - Type { env: ty.env, ty: TyBuilder::slice(ty.ty) } + pub fn new_slice(ty: Self) -> Self { + Type { env: ty.env, ty: TyBuilder::slice(ty.ty), _pd: PhantomCovariantLifetime::new() } } - pub fn new_tuple(krate: base_db::Crate, tys: &[Type]) -> Type { + pub fn new_tuple(krate: base_db::Crate, tys: &[Self]) -> Self { let tys = tys.iter().map(|it| it.ty.clone()); - Type { env: TraitEnvironment::empty(krate), ty: TyBuilder::tuple_with(tys) } + Type { + env: TraitEnvironment::empty(krate), + ty: TyBuilder::tuple_with(tys), + _pd: PhantomCovariantLifetime::new(), + } } pub fn is_unit(&self) -> bool { @@ -4803,7 +4868,7 @@ impl Type { matches!(self.ty.kind(Interner), TyKind::Ref(..)) } - pub fn contains_reference(&self, db: &dyn HirDatabase) -> bool { + pub fn contains_reference(&self, db: &'db dyn HirDatabase) -> bool { return go(db, self.env.krate, &self.ty); fn go(db: &dyn HirDatabase, krate: base_db::Crate, ty: &Ty) -> bool { @@ -4847,13 +4912,13 @@ impl Type { } } - pub fn as_reference(&self) -> Option<(Type, Mutability)> { + pub fn as_reference(&self) -> Option<(Type<'db>, Mutability)> { let (ty, _lt, m) = self.ty.as_reference()?; let m = Mutability::from_mutable(matches!(m, hir_ty::Mutability::Mut)); Some((self.derived(ty.clone()), m)) } - pub fn add_reference(&self, mutability: Mutability) -> Type { + pub fn add_reference(&self, mutability: Mutability) -> Self { let ty_mutability = match mutability { Mutability::Shared => hir_ty::Mutability::Not, Mutability::Mut => hir_ty::Mutability::Mut, @@ -4889,25 +4954,25 @@ impl Type { matches!(self.ty.kind(Interner), TyKind::Tuple(..)) } - pub fn remove_ref(&self) -> Option { + pub fn remove_ref(&self) -> Option> { match &self.ty.kind(Interner) { TyKind::Ref(.., ty) => Some(self.derived(ty.clone())), _ => None, } } - pub fn as_slice(&self) -> Option { + pub fn as_slice(&self) -> Option> { match &self.ty.kind(Interner) { TyKind::Slice(ty) => Some(self.derived(ty.clone())), _ => None, } } - pub fn strip_references(&self) -> Type { + pub fn strip_references(&self) -> Self { self.derived(self.ty.strip_references().clone()) } - pub fn strip_reference(&self) -> Type { + pub fn strip_reference(&self) -> Self { self.derived(self.ty.strip_reference().clone()) } @@ -4918,7 +4983,7 @@ impl Type { /// Checks that particular type `ty` implements `std::future::IntoFuture` or /// `std::future::Future` and returns the `Output` associated type. /// This function is used in `.await` syntax completion. - pub fn into_future_output(&self, db: &dyn HirDatabase) -> Option { + pub fn into_future_output(&self, db: &'db dyn HirDatabase) -> Option> { let trait_ = LangItem::IntoFutureIntoFuture .resolve_function(db, self.env.krate) .and_then(|into_future_fn| { @@ -4940,13 +5005,13 @@ impl Type { } /// This does **not** resolve `IntoFuture`, only `Future`. - pub fn future_output(self, db: &dyn HirDatabase) -> Option { + pub fn future_output(self, db: &'db dyn HirDatabase) -> Option> { let future_output = LangItem::FutureOutput.resolve_type_alias(db, self.env.krate)?; self.normalize_trait_assoc_type(db, &[], future_output.into()) } /// This does **not** resolve `IntoIterator`, only `Iterator`. - pub fn iterator_item(self, db: &dyn HirDatabase) -> Option { + pub fn iterator_item(self, db: &'db dyn HirDatabase) -> Option> { let iterator_trait = LangItem::Iterator.resolve_trait(db, self.env.krate)?; let iterator_item = db .trait_items(iterator_trait) @@ -4954,7 +5019,7 @@ impl Type { self.normalize_trait_assoc_type(db, &[], iterator_item.into()) } - pub fn impls_iterator(self, db: &dyn HirDatabase) -> bool { + pub fn impls_iterator(self, db: &'db dyn HirDatabase) -> bool { let Some(iterator_trait) = LangItem::Iterator.resolve_trait(db, self.env.krate) else { return false; }; @@ -4964,7 +5029,7 @@ impl Type { } /// Resolves the projection `::IntoIter` and returns the resulting type - pub fn into_iterator_iter(self, db: &dyn HirDatabase) -> Option { + pub fn into_iterator_iter(self, db: &'db dyn HirDatabase) -> Option> { let trait_ = LangItem::IntoIterIntoIter.resolve_function(db, self.env.krate).and_then( |into_iter_fn| { let assoc_item = as_assoc_item(db, AssocItem::Function, into_iter_fn)?; @@ -4989,7 +5054,7 @@ impl Type { /// /// This function can be used to check if a particular type is callable, since FnOnce is a /// supertrait of Fn and FnMut, so all callable types implements at least FnOnce. - pub fn impls_fnonce(&self, db: &dyn HirDatabase) -> bool { + pub fn impls_fnonce(&self, db: &'db dyn HirDatabase) -> bool { let fnonce_trait = match FnTrait::FnOnce.get_id(db, self.env.krate) { Some(it) => it, None => return false, @@ -5001,7 +5066,7 @@ impl Type { } // FIXME: Find better API that also handles const generics - pub fn impls_trait(&self, db: &dyn HirDatabase, trait_: Trait, args: &[Type]) -> bool { + pub fn impls_trait(&self, db: &'db dyn HirDatabase, trait_: Trait, args: &[Type<'db>]) -> bool { let mut it = args.iter().map(|t| t.ty.clone()); let trait_ref = TyBuilder::trait_ref(db, trait_.id) .push(self.ty.clone()) @@ -5029,10 +5094,10 @@ impl Type { pub fn normalize_trait_assoc_type( &self, - db: &dyn HirDatabase, - args: &[Type], + db: &'db dyn HirDatabase, + args: &[Type<'db>], alias: TypeAlias, - ) -> Option { + ) -> Option> { let mut args = args.iter(); let trait_id = match alias.id.lookup(db).container { ItemContainerId::TraitId(id) => id, @@ -5056,14 +5121,14 @@ impl Type { if ty.is_unknown() { None } else { Some(self.derived(ty)) } } - pub fn is_copy(&self, db: &dyn HirDatabase) -> bool { + pub fn is_copy(&self, db: &'db dyn HirDatabase) -> bool { let Some(copy_trait) = LangItem::Copy.resolve_trait(db, self.env.krate) else { return false; }; self.impls_trait(db, copy_trait.into(), &[]) } - pub fn as_callable(&self, db: &dyn HirDatabase) -> Option { + pub fn as_callable(&self, db: &'db dyn HirDatabase) -> Option> { let callee = match self.ty.kind(Interner) { TyKind::Closure(id, subst) => Callee::Closure(*id, subst.clone()), TyKind::Function(_) => Callee::FnPtr, @@ -5117,7 +5182,7 @@ impl Type { matches!(self.ty.kind(Interner), TyKind::Array(..)) } - pub fn is_packed(&self, db: &dyn HirDatabase) -> bool { + pub fn is_packed(&self, db: &'db dyn HirDatabase) -> bool { let adt_id = match *self.ty.kind(Interner) { TyKind::Adt(hir_ty::AdtId(adt_id), ..) => adt_id, _ => return false, @@ -5134,7 +5199,7 @@ impl Type { matches!(self.ty.kind(Interner), TyKind::Raw(..)) } - pub fn remove_raw_ptr(&self) -> Option { + pub fn remove_raw_ptr(&self) -> Option> { if let TyKind::Raw(_, ty) = self.ty.kind(Interner) { Some(self.derived(ty.clone())) } else { @@ -5182,7 +5247,7 @@ impl Type { } } - pub fn fields(&self, db: &dyn HirDatabase) -> Vec<(Field, Type)> { + pub fn fields(&self, db: &'db dyn HirDatabase) -> Vec<(Field, Self)> { let (variant_id, substs) = match self.ty.kind(Interner) { TyKind::Adt(hir_ty::AdtId(AdtId::StructId(s)), substs) => ((*s).into(), substs), TyKind::Adt(hir_ty::AdtId(AdtId::UnionId(u)), substs) => ((*u).into(), substs), @@ -5199,7 +5264,7 @@ impl Type { .collect() } - pub fn tuple_fields(&self, _db: &dyn HirDatabase) -> Vec { + pub fn tuple_fields(&self, _db: &'db dyn HirDatabase) -> Vec { if let TyKind::Tuple(_, substs) = &self.ty.kind(Interner) { substs .iter(Interner) @@ -5210,7 +5275,7 @@ impl Type { } } - pub fn as_array(&self, db: &dyn HirDatabase) -> Option<(Type, usize)> { + pub fn as_array(&self, db: &'db dyn HirDatabase) -> Option<(Self, usize)> { if let TyKind::Array(ty, len) = &self.ty.kind(Interner) { try_const_usize(db, len).map(|it| (self.derived(ty.clone()), it as usize)) } else { @@ -5228,14 +5293,14 @@ impl Type { /// Returns types that this type dereferences to (including this type itself). The returned /// iterator won't yield the same type more than once even if the deref chain contains a cycle. - pub fn autoderef<'db>( + pub fn autoderef( &self, db: &'db dyn HirDatabase, - ) -> impl Iterator + use<'_, 'db> { + ) -> impl Iterator> + use<'_, 'db> { self.autoderef_(db).map(move |ty| self.derived(ty)) } - fn autoderef_(&self, db: &dyn HirDatabase) -> impl Iterator { + fn autoderef_(&self, db: &'db dyn HirDatabase) -> impl Iterator { // There should be no inference vars in types passed here let canonical = hir_ty::replace_errors_with_variables(&self.ty); autoderef(db, self.env.clone(), canonical) @@ -5245,7 +5310,7 @@ impl Type { // lifetime problems, because we need to borrow temp `CrateImplDefs`. pub fn iterate_assoc_items( &self, - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, krate: Crate, mut callback: impl FnMut(AssocItem) -> Option, ) -> Option { @@ -5259,7 +5324,7 @@ impl Type { fn iterate_assoc_items_dyn( &self, - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, krate: Crate, callback: &mut dyn FnMut(AssocItemId) -> bool, ) { @@ -5298,7 +5363,7 @@ impl Type { /// - "String" /// - "U" /// ``` - pub fn type_arguments(&self) -> impl Iterator + '_ { + pub fn type_arguments(&self) -> impl Iterator> + '_ { self.ty .strip_references() .as_adt() @@ -5368,7 +5433,7 @@ impl Type { pub fn iterate_method_candidates_with_traits( &self, - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, scope: &SemanticsScope<'_>, traits_in_scope: &FxHashSet, with_local_impls: Option, @@ -5396,7 +5461,7 @@ impl Type { pub fn iterate_method_candidates( &self, - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, scope: &SemanticsScope<'_>, with_local_impls: Option, name: Option<&Name>, @@ -5418,7 +5483,7 @@ impl Type { /// are considered inherent methods. pub fn iterate_method_candidates_split_inherent( &self, - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, scope: &SemanticsScope<'_>, traits_in_scope: &FxHashSet, with_local_impls: Option, @@ -5486,7 +5551,7 @@ impl Type { #[tracing::instrument(skip_all, fields(name = ?name))] pub fn iterate_path_candidates( &self, - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, scope: &SemanticsScope<'_>, traits_in_scope: &FxHashSet, with_local_impls: Option, @@ -5521,7 +5586,7 @@ impl Type { #[tracing::instrument(skip_all, fields(name = ?name))] pub fn iterate_path_candidates_split_inherent( &self, - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, scope: &SemanticsScope<'_>, traits_in_scope: &FxHashSet, with_local_impls: Option, @@ -5584,10 +5649,10 @@ impl Type { /// If a type can be represented as `dyn Trait`, returns all traits accessible via this type, /// or an empty iterator otherwise. - pub fn applicable_inherent_traits<'a>( - &'a self, - db: &'a dyn HirDatabase, - ) -> impl Iterator + 'a { + pub fn applicable_inherent_traits( + &self, + db: &'db dyn HirDatabase, + ) -> impl Iterator { let _p = tracing::info_span!("applicable_inherent_traits").entered(); self.autoderef_(db) .filter_map(|ty| ty.dyn_trait()) @@ -5595,7 +5660,7 @@ impl Type { .map(Trait::from) } - pub fn env_traits<'a>(&'a self, db: &'a dyn HirDatabase) -> impl Iterator + 'a { + pub fn env_traits(&self, db: &'db dyn HirDatabase) -> impl Iterator { let _p = tracing::info_span!("env_traits").entered(); self.autoderef_(db) .filter(|ty| matches!(ty.kind(Interner), TyKind::Placeholder(_))) @@ -5607,10 +5672,7 @@ impl Type { .map(Trait::from) } - pub fn as_impl_traits( - &self, - db: &dyn HirDatabase, - ) -> Option + use<>> { + pub fn as_impl_traits(&self, db: &'db dyn HirDatabase) -> Option> { self.ty.impl_trait_bounds(db).map(|it| { it.into_iter().filter_map(|pred| match pred.skip_binders() { hir_ty::WhereClause::Implemented(trait_ref) => { @@ -5621,33 +5683,33 @@ impl Type { }) } - pub fn as_associated_type_parent_trait(&self, db: &dyn HirDatabase) -> Option { + pub fn as_associated_type_parent_trait(&self, db: &'db dyn HirDatabase) -> Option { self.ty.associated_type_parent_trait(db).map(Into::into) } - fn derived(&self, ty: Ty) -> Type { - Type { env: self.env.clone(), ty } + fn derived(&self, ty: Ty) -> Self { + Type { env: self.env.clone(), ty, _pd: PhantomCovariantLifetime::new() } } /// Visits every type, including generic arguments, in this type. `cb` is called with type /// itself first, and then with its generic arguments. - pub fn walk(&self, db: &dyn HirDatabase, mut cb: impl FnMut(Type)) { - fn walk_substs( - db: &dyn HirDatabase, - type_: &Type, + pub fn walk(&self, db: &'db dyn HirDatabase, mut cb: impl FnMut(Type<'db>)) { + fn walk_substs<'db>( + db: &'db dyn HirDatabase, + type_: &Type<'db>, substs: &Substitution, - cb: &mut impl FnMut(Type), + cb: &mut impl FnMut(Type<'db>), ) { for ty in substs.iter(Interner).filter_map(|a| a.ty(Interner)) { walk_type(db, &type_.derived(ty.clone()), cb); } } - fn walk_bounds( - db: &dyn HirDatabase, - type_: &Type, + fn walk_bounds<'db>( + db: &'db dyn HirDatabase, + type_: &Type<'db>, bounds: &[QuantifiedWhereClause], - cb: &mut impl FnMut(Type), + cb: &mut impl FnMut(Type<'db>), ) { for pred in bounds { if let WhereClause::Implemented(trait_ref) = pred.skip_binders() { @@ -5664,7 +5726,11 @@ impl Type { } } - fn walk_type(db: &dyn HirDatabase, type_: &Type, cb: &mut impl FnMut(Type)) { + fn walk_type<'db>( + db: &'db dyn HirDatabase, + type_: &Type<'db>, + cb: &mut impl FnMut(Type<'db>), + ) { let ty = type_.ty.strip_references(); match ty.kind(Interner) { TyKind::Adt(_, substs) => { @@ -5732,7 +5798,7 @@ impl Type { /// /// Note that we consider placeholder types to unify with everything. /// For example `Option` and `Option` unify although there is unresolved goal `T = U`. - pub fn could_unify_with(&self, db: &dyn HirDatabase, other: &Type) -> bool { + pub fn could_unify_with(&self, db: &'db dyn HirDatabase, other: &Type<'db>) -> bool { let tys = hir_ty::replace_errors_with_variables(&(self.ty.clone(), other.ty.clone())); hir_ty::could_unify(db, self.env.clone(), &tys) } @@ -5741,17 +5807,17 @@ impl Type { /// /// This means that placeholder types are not considered to unify if there are any bounds set on /// them. For example `Option` and `Option` do not unify as we cannot show that `T = U` - pub fn could_unify_with_deeply(&self, db: &dyn HirDatabase, other: &Type) -> bool { + pub fn could_unify_with_deeply(&self, db: &'db dyn HirDatabase, other: &Type<'db>) -> bool { let tys = hir_ty::replace_errors_with_variables(&(self.ty.clone(), other.ty.clone())); hir_ty::could_unify_deeply(db, self.env.clone(), &tys) } - pub fn could_coerce_to(&self, db: &dyn HirDatabase, to: &Type) -> bool { + pub fn could_coerce_to(&self, db: &'db dyn HirDatabase, to: &Type<'db>) -> bool { let tys = hir_ty::replace_errors_with_variables(&(self.ty.clone(), to.ty.clone())); hir_ty::could_coerce(db, self.env.clone(), &tys) } - pub fn as_type_param(&self, db: &dyn HirDatabase) -> Option { + pub fn as_type_param(&self, db: &'db dyn HirDatabase) -> Option { match self.ty.kind(Interner) { TyKind::Placeholder(p) => Some(TypeParam { id: TypeParamId::from_unchecked(hir_ty::from_placeholder_idx(db, *p)), @@ -5761,19 +5827,19 @@ impl Type { } /// Returns unique `GenericParam`s contained in this type. - pub fn generic_params(&self, db: &dyn HirDatabase) -> FxHashSet { + pub fn generic_params(&self, db: &'db dyn HirDatabase) -> FxHashSet { hir_ty::collect_placeholders(&self.ty, db) .into_iter() .map(|id| TypeOrConstParam { id }.split(db).either_into()) .collect() } - pub fn layout(&self, db: &dyn HirDatabase) -> Result { + pub fn layout(&self, db: &'db dyn HirDatabase) -> Result { db.layout_of_ty(self.ty.clone(), self.env.clone()) .map(|layout| Layout(layout, db.target_data_layout(self.env.krate).unwrap())) } - pub fn drop_glue(&self, db: &dyn HirDatabase) -> DropGlue { + pub fn drop_glue(&self, db: &'db dyn HirDatabase) -> DropGlue { db.has_drop_glue(self.ty.clone(), self.env.clone()) } } @@ -5800,8 +5866,8 @@ impl InlineAsmOperand { // FIXME: Document this #[derive(Debug)] -pub struct Callable { - ty: Type, +pub struct Callable<'db> { + ty: Type<'db>, sig: CallableSig, callee: Callee, /// Whether this is a method that was called with method call syntax. @@ -5825,7 +5891,7 @@ pub enum CallableKind { FnImpl(FnTrait), } -impl Callable { +impl<'db> Callable<'db> { pub fn kind(&self) -> CallableKind { match self.callee { Callee::Def(CallableDefId::FunctionId(it)) => CallableKind::Function(it.into()), @@ -5840,7 +5906,7 @@ impl Callable { Callee::FnImpl(fn_) => CallableKind::FnImpl(fn_), } } - pub fn receiver_param(&self, db: &dyn HirDatabase) -> Option<(SelfParam, Type)> { + pub fn receiver_param(&self, db: &'db dyn HirDatabase) -> Option<(SelfParam, Type<'db>)> { let func = match self.callee { Callee::Def(CallableDefId::FunctionId(it)) if self.is_bound_method => it, _ => return None, @@ -5851,7 +5917,7 @@ impl Callable { pub fn n_params(&self) -> usize { self.sig.params().len() - if self.is_bound_method { 1 } else { 0 } } - pub fn params(&self) -> Vec { + pub fn params(&self) -> Vec> { self.sig .params() .iter() @@ -5861,14 +5927,14 @@ impl Callable { .map(|(idx, ty)| Param { func: self.callee.clone(), idx, ty }) .collect() } - pub fn return_type(&self) -> Type { + pub fn return_type(&self) -> Type<'db> { self.ty.derived(self.sig.ret().clone()) } pub fn sig(&self) -> &CallableSig { &self.sig } - pub fn ty(&self) -> &Type { + pub fn ty(&self) -> &Type<'db> { &self.ty } } @@ -6070,9 +6136,9 @@ impl From for ScopeDef { } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct Adjustment { - pub source: Type, - pub target: Type, +pub struct Adjustment<'db> { + pub source: Type<'db>, + pub target: Type<'db>, pub kind: Adjust, } @@ -6171,7 +6237,7 @@ impl HasCrate for TypeAlias { } } -impl HasCrate for Type { +impl HasCrate for Type<'_> { fn krate(&self, _db: &dyn HirDatabase) -> Crate { self.env.krate.into() } @@ -6325,9 +6391,9 @@ pub enum DocLinkDef { SelfType(Trait), } -fn push_ty_diagnostics( - db: &dyn HirDatabase, - acc: &mut Vec, +fn push_ty_diagnostics<'db>( + db: &'db dyn HirDatabase, + acc: &mut Vec>, diagnostics: Option>, source_map: &ExpressionStoreSourceMap, ) { diff --git a/src/tools/rust-analyzer/crates/hir/src/semantics.rs b/src/tools/rust-analyzer/crates/hir/src/semantics.rs index 10498958242a..d96975831e06 100644 --- a/src/tools/rust-analyzer/crates/hir/src/semantics.rs +++ b/src/tools/rust-analyzer/crates/hir/src/semantics.rs @@ -123,15 +123,15 @@ impl PathResolutionPerNs { } #[derive(Debug)] -pub struct TypeInfo { +pub struct TypeInfo<'db> { /// The original type of the expression or pattern. - pub original: Type, + pub original: Type<'db>, /// The adjusted type, if an adjustment happened. - pub adjusted: Option, + pub adjusted: Option>, } -impl TypeInfo { - pub fn original(self) -> Type { +impl<'db> TypeInfo<'db> { + pub fn original(self) -> Type<'db> { self.original } @@ -140,7 +140,7 @@ impl TypeInfo { } /// The adjusted type, or the original in case no adjustments occurred. - pub fn adjusted(self) -> Type { + pub fn adjusted(self) -> Type<'db> { self.adjusted.unwrap_or(self.original) } } @@ -1534,7 +1534,7 @@ impl<'db> SemanticsImpl<'db> { Some(Label { parent, label_id }) } - pub fn resolve_type(&self, ty: &ast::Type) -> Option { + pub fn resolve_type(&self, ty: &ast::Type) -> Option> { let analyze = self.analyze(ty.syntax())?; analyze.type_of_type(self.db, ty) } @@ -1553,7 +1553,7 @@ impl<'db> SemanticsImpl<'db> { } } - pub fn expr_adjustments(&self, expr: &ast::Expr) -> Option> { + pub fn expr_adjustments(&self, expr: &ast::Expr) -> Option>> { let mutability = |m| match m { hir_ty::Mutability::Not => Mutability::Shared, hir_ty::Mutability::Mut => Mutability::Mut, @@ -1596,13 +1596,13 @@ impl<'db> SemanticsImpl<'db> { }) } - pub fn type_of_expr(&self, expr: &ast::Expr) -> Option { + pub fn type_of_expr(&self, expr: &ast::Expr) -> Option> { self.analyze(expr.syntax())? .type_of_expr(self.db, expr) .map(|(ty, coerced)| TypeInfo { original: ty, adjusted: coerced }) } - pub fn type_of_pat(&self, pat: &ast::Pat) -> Option { + pub fn type_of_pat(&self, pat: &ast::Pat) -> Option> { self.analyze(pat.syntax())? .type_of_pat(self.db, pat) .map(|(ty, coerced)| TypeInfo { original: ty, adjusted: coerced }) @@ -1611,15 +1611,15 @@ impl<'db> SemanticsImpl<'db> { /// It also includes the changes that binding mode makes in the type. For example in /// `let ref x @ Some(_) = None` the result of `type_of_pat` is `Option` but the result /// of this function is `&mut Option` - pub fn type_of_binding_in_pat(&self, pat: &ast::IdentPat) -> Option { + pub fn type_of_binding_in_pat(&self, pat: &ast::IdentPat) -> Option> { self.analyze(pat.syntax())?.type_of_binding_in_pat(self.db, pat) } - pub fn type_of_self(&self, param: &ast::SelfParam) -> Option { + pub fn type_of_self(&self, param: &ast::SelfParam) -> Option> { self.analyze(param.syntax())?.type_of_self(self.db, param) } - pub fn pattern_adjustments(&self, pat: &ast::Pat) -> SmallVec<[Type; 1]> { + pub fn pattern_adjustments(&self, pat: &ast::Pat) -> SmallVec<[Type<'db>; 1]> { self.analyze(pat.syntax()) .and_then(|it| it.pattern_adjustments(self.db, pat)) .unwrap_or_default() @@ -1629,7 +1629,7 @@ impl<'db> SemanticsImpl<'db> { self.analyze(pat.syntax())?.binding_mode_of_pat(self.db, pat) } - pub fn resolve_expr_as_callable(&self, call: &ast::Expr) -> Option { + pub fn resolve_expr_as_callable(&self, call: &ast::Expr) -> Option> { self.analyze(call.syntax())?.resolve_expr_as_callable(self.db, call) } @@ -1641,7 +1641,7 @@ impl<'db> SemanticsImpl<'db> { pub fn resolve_method_call_fallback( &self, call: &ast::MethodCallExpr, - ) -> Option<(Either, Option)> { + ) -> Option<(Either, Option>)> { self.analyze(call.syntax())?.resolve_method_call_fallback(self.db, call) } @@ -1649,10 +1649,10 @@ impl<'db> SemanticsImpl<'db> { // FIXME: better api for the trait environment pub fn resolve_trait_impl_method( &self, - env: Type, + env: Type<'db>, trait_: Trait, func: Function, - subst: impl IntoIterator, + subst: impl IntoIterator>, ) -> Option { let mut substs = hir_ty::TyBuilder::subst_for_def(self.db, TraitId::from(trait_), None); for s in subst { @@ -1691,7 +1691,10 @@ impl<'db> SemanticsImpl<'db> { // This does not resolve the method call to the correct trait impl! // We should probably fix that. - pub fn resolve_method_call_as_callable(&self, call: &ast::MethodCallExpr) -> Option { + pub fn resolve_method_call_as_callable( + &self, + call: &ast::MethodCallExpr, + ) -> Option> { self.analyze(call.syntax())?.resolve_method_call_as_callable(self.db, call) } @@ -1702,14 +1705,15 @@ impl<'db> SemanticsImpl<'db> { pub fn resolve_field_fallback( &self, field: &ast::FieldExpr, - ) -> Option<(Either, Function>, Option)> { + ) -> Option<(Either, Function>, Option>)> + { self.analyze(field.syntax())?.resolve_field_fallback(self.db, field) } pub fn resolve_record_field( &self, field: &ast::RecordExprField, - ) -> Option<(Field, Option, Type)> { + ) -> Option<(Field, Option, Type<'db>)> { self.resolve_record_field_with_substitution(field) .map(|(field, local, ty, _)| (field, local, ty)) } @@ -1717,18 +1721,21 @@ impl<'db> SemanticsImpl<'db> { pub fn resolve_record_field_with_substitution( &self, field: &ast::RecordExprField, - ) -> Option<(Field, Option, Type, GenericSubstitution)> { + ) -> Option<(Field, Option, Type<'db>, GenericSubstitution<'db>)> { self.analyze(field.syntax())?.resolve_record_field(self.db, field) } - pub fn resolve_record_pat_field(&self, field: &ast::RecordPatField) -> Option<(Field, Type)> { + pub fn resolve_record_pat_field( + &self, + field: &ast::RecordPatField, + ) -> Option<(Field, Type<'db>)> { self.resolve_record_pat_field_with_subst(field).map(|(field, ty, _)| (field, ty)) } pub fn resolve_record_pat_field_with_subst( &self, field: &ast::RecordPatField, - ) -> Option<(Field, Type, GenericSubstitution)> { + ) -> Option<(Field, Type<'db>, GenericSubstitution<'db>)> { self.analyze(field.syntax())?.resolve_record_pat_field(self.db, field) } @@ -1801,7 +1808,7 @@ impl<'db> SemanticsImpl<'db> { pub fn resolve_path_with_subst( &self, path: &ast::Path, - ) -> Option<(PathResolution, Option)> { + ) -> Option<(PathResolution, Option>)> { self.analyze(path.syntax())?.resolve_path(self.db, path) } @@ -1812,7 +1819,7 @@ impl<'db> SemanticsImpl<'db> { pub fn resolve_offset_of_field( &self, name_ref: &ast::NameRef, - ) -> Option<(Either, GenericSubstitution)> { + ) -> Option<(Either, GenericSubstitution<'db>)> { self.analyze_no_infer(name_ref.syntax())?.resolve_offset_of_field(self.db, name_ref) } @@ -1834,13 +1841,19 @@ impl<'db> SemanticsImpl<'db> { self.analyze(pat.syntax())?.resolve_bind_pat_to_const(self.db, pat) } - pub fn record_literal_missing_fields(&self, literal: &ast::RecordExpr) -> Vec<(Field, Type)> { + pub fn record_literal_missing_fields( + &self, + literal: &ast::RecordExpr, + ) -> Vec<(Field, Type<'db>)> { self.analyze(literal.syntax()) .and_then(|it| it.record_literal_missing_fields(self.db, literal)) .unwrap_or_default() } - pub fn record_pattern_missing_fields(&self, pattern: &ast::RecordPat) -> Vec<(Field, Type)> { + pub fn record_pattern_missing_fields( + &self, + pattern: &ast::RecordPat, + ) -> Vec<(Field, Type<'db>)> { self.analyze(pattern.syntax()) .and_then(|it| it.record_pattern_missing_fields(self.db, pattern)) .unwrap_or_default() diff --git a/src/tools/rust-analyzer/crates/hir/src/source_analyzer.rs b/src/tools/rust-analyzer/crates/hir/src/source_analyzer.rs index 3273358b78e9..48543ca581ff 100644 --- a/src/tools/rust-analyzer/crates/hir/src/source_analyzer.rs +++ b/src/tools/rust-analyzer/crates/hir/src/source_analyzer.rs @@ -257,7 +257,11 @@ impl<'db> SourceAnalyzer<'db> { infer.expr_adjustments.get(&expr_id).map(|v| &**v) } - pub(crate) fn type_of_type(&self, db: &'db dyn HirDatabase, ty: &ast::Type) -> Option { + pub(crate) fn type_of_type( + &self, + db: &'db dyn HirDatabase, + ty: &ast::Type, + ) -> Option> { let type_ref = self.type_id(ty)?; let ty = TyLoweringContext::new( db, @@ -277,7 +281,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, expr: &ast::Expr, - ) -> Option<(Type, Option)> { + ) -> Option<(Type<'db>, Option>)> { let expr_id = self.expr_id(expr.clone())?; let infer = self.infer()?; let coerced = expr_id @@ -293,7 +297,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, pat: &ast::Pat, - ) -> Option<(Type, Option)> { + ) -> Option<(Type<'db>, Option>)> { let expr_or_pat_id = self.pat_id(pat)?; let infer = self.infer()?; let coerced = match expr_or_pat_id { @@ -316,7 +320,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, pat: &ast::IdentPat, - ) -> Option { + ) -> Option> { let binding_id = self.binding_id_of_pat(pat)?; let infer = self.infer()?; let ty = infer[binding_id].clone(); @@ -328,7 +332,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, _param: &ast::SelfParam, - ) -> Option { + ) -> Option> { let binding = self.body()?.self_param?; let ty = self.infer()?[binding].clone(); Some(Type::new_with_resolver(db, &self.resolver, ty)) @@ -353,7 +357,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, pat: &ast::Pat, - ) -> Option> { + ) -> Option; 1]>> { let pat_id = self.pat_id(pat)?; let infer = self.infer()?; Some( @@ -370,7 +374,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, call: &ast::MethodCallExpr, - ) -> Option { + ) -> Option> { let expr_id = self.expr_id(call.clone().into())?.as_expr()?; let (func, substs) = self.infer()?.method_resolution(expr_id)?; let ty = db.value_ty(func.into())?.substitute(Interner, &substs); @@ -395,7 +399,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, call: &ast::MethodCallExpr, - ) -> Option<(Either, Option)> { + ) -> Option<(Either, Option>)> { let expr_id = self.expr_id(call.clone().into())?.as_expr()?; let inference_result = self.infer()?; match inference_result.method_resolution(expr_id) { @@ -419,7 +423,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, call: &ast::Expr, - ) -> Option { + ) -> Option> { let (orig, adjusted) = self.type_of_expr(db, &call.clone())?; adjusted.unwrap_or(orig).as_callable(db) } @@ -440,7 +444,7 @@ impl<'db> SourceAnalyzer<'db> { field_expr: ExprId, infer: &InferenceResult, db: &'db dyn HirDatabase, - ) -> Option { + ) -> Option> { let body = self.store()?; if let Expr::Field { expr: object_expr, name: _ } = body[field_expr] { let (adt, subst) = type_of_expr_including_adjust(infer, object_expr)?.as_adt()?; @@ -457,7 +461,8 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, field: &ast::FieldExpr, - ) -> Option<(Either, Function>, Option)> { + ) -> Option<(Either, Function>, Option>)> + { let (def, ..) = self.body_()?; let expr_id = self.expr_id(field.clone().into())?.as_expr()?; let inference_result = self.infer()?; @@ -680,7 +685,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, field: &ast::RecordExprField, - ) -> Option<(Field, Option, Type, GenericSubstitution)> { + ) -> Option<(Field, Option, Type<'db>, GenericSubstitution<'db>)> { let record_expr = ast::RecordExpr::cast(field.syntax().parent().and_then(|p| p.parent())?)?; let expr = ast::Expr::from(record_expr); let expr_id = self.store_sm()?.node_expr(InFile::new(self.file_id, &expr))?; @@ -724,7 +729,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, field: &ast::RecordPatField, - ) -> Option<(Field, Type, GenericSubstitution)> { + ) -> Option<(Field, Type<'db>, GenericSubstitution<'db>)> { let field_name = field.field_name()?.as_name(); let record_pat = ast::RecordPat::cast(field.syntax().parent().and_then(|p| p.parent())?)?; let pat_id = self.pat_id(&record_pat.into())?; @@ -779,7 +784,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, name_ref: &ast::NameRef, - ) -> Option<(Either, GenericSubstitution)> { + ) -> Option<(Either, GenericSubstitution<'db>)> { let offset_of_expr = ast::OffsetOfExpr::cast(name_ref.syntax().parent()?)?; let container = offset_of_expr.ty()?; let container = self.type_of_type(db, &container)?; @@ -851,7 +856,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, path: &ast::Path, - ) -> Option<(PathResolution, Option)> { + ) -> Option<(PathResolution, Option>)> { let parent = path.syntax().parent(); let parent = || parent.clone(); @@ -1216,7 +1221,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, literal: &ast::RecordExpr, - ) -> Option> { + ) -> Option)>> { let body = self.store()?; let infer = self.infer()?; @@ -1239,7 +1244,7 @@ impl<'db> SourceAnalyzer<'db> { &self, db: &'db dyn HirDatabase, pattern: &ast::RecordPat, - ) -> Option> { + ) -> Option)>> { let body = self.store()?; let infer = self.infer()?; @@ -1258,7 +1263,7 @@ impl<'db> SourceAnalyzer<'db> { substs: &Substitution, variant: VariantId, missing_fields: Vec, - ) -> Vec<(Field, Type)> { + ) -> Vec<(Field, Type<'db>)> { let field_types = db.field_types(variant); missing_fields diff --git a/src/tools/rust-analyzer/crates/hir/src/term_search.rs b/src/tools/rust-analyzer/crates/hir/src/term_search.rs index af72179305c8..4b354e640628 100644 --- a/src/tools/rust-analyzer/crates/hir/src/term_search.rs +++ b/src/tools/rust-analyzer/crates/hir/src/term_search.rs @@ -22,20 +22,20 @@ enum NewTypesKey { /// Helper enum to squash big number of alternative trees into `Many` variant as there is too many /// to take into account. #[derive(Debug)] -enum AlternativeExprs { +enum AlternativeExprs<'db> { /// There are few trees, so we keep track of them all - Few(FxHashSet), + Few(FxHashSet>), /// There are too many trees to keep track of Many, } -impl AlternativeExprs { +impl<'db> AlternativeExprs<'db> { /// Construct alternative trees /// /// # Arguments /// `threshold` - threshold value for many trees (more than that is many) /// `exprs` - expressions iterator - fn new(threshold: usize, exprs: impl Iterator) -> AlternativeExprs { + fn new(threshold: usize, exprs: impl Iterator>) -> AlternativeExprs<'db> { let mut it = AlternativeExprs::Few(Default::default()); it.extend_with_threshold(threshold, exprs); it @@ -45,7 +45,7 @@ impl AlternativeExprs { /// /// # Arguments /// `ty` - Type of expressions queried (this is used to give type to `Expr::Many`) - fn exprs(&self, ty: &Type) -> Vec { + fn exprs(&self, ty: &Type<'db>) -> Vec> { match self { AlternativeExprs::Few(exprs) => exprs.iter().cloned().collect(), AlternativeExprs::Many => vec![Expr::Many(ty.clone())], @@ -57,7 +57,7 @@ impl AlternativeExprs { /// # Arguments /// `threshold` - threshold value for many trees (more than that is many) /// `exprs` - expressions iterator - fn extend_with_threshold(&mut self, threshold: usize, exprs: impl Iterator) { + fn extend_with_threshold(&mut self, threshold: usize, exprs: impl Iterator>) { match self { AlternativeExprs::Few(tts) => { for it in exprs { @@ -88,20 +88,20 @@ impl AlternativeExprs { /// Both of them are to speed up the term search by leaving out types / ScopeDefs that likely do /// not produce any new results. #[derive(Default, Debug)] -struct LookupTable { +struct LookupTable<'db> { /// All the `Expr`s in "value" produce the type of "key" - data: FxHashMap, + data: FxHashMap, AlternativeExprs<'db>>, /// New types reached since last query by the `NewTypesKey` - new_types: FxHashMap>, + new_types: FxHashMap>>, /// Types queried but not present - types_wishlist: FxHashSet, + types_wishlist: FxHashSet>, /// Threshold to squash trees to `Many` many_threshold: usize, } -impl LookupTable { +impl<'db> LookupTable<'db> { /// Initialize lookup table - fn new(many_threshold: usize, goal: Type) -> Self { + fn new(many_threshold: usize, goal: Type<'db>) -> Self { let mut res = Self { many_threshold, ..Default::default() }; res.new_types.insert(NewTypesKey::ImplMethod, Vec::new()); res.new_types.insert(NewTypesKey::StructProjection, Vec::new()); @@ -110,7 +110,7 @@ impl LookupTable { } /// Find all `Expr`s that unify with the `ty` - fn find(&mut self, db: &dyn HirDatabase, ty: &Type) -> Option> { + fn find(&mut self, db: &'db dyn HirDatabase, ty: &Type<'db>) -> Option>> { let res = self .data .iter() @@ -135,7 +135,7 @@ impl LookupTable { /// /// For example if we have type `i32` in data and we query for `&i32` it map all the type /// trees we have for `i32` with `Expr::Reference` and returns them. - fn find_autoref(&mut self, db: &dyn HirDatabase, ty: &Type) -> Option> { + fn find_autoref(&mut self, db: &'db dyn HirDatabase, ty: &Type<'db>) -> Option>> { let res = self .data .iter() @@ -174,7 +174,7 @@ impl LookupTable { /// Note that the types have to be the same, unification is not enough as unification is not /// transitive. For example Vec and FxHashSet both unify with Iterator, /// but they clearly do not unify themselves. - fn insert(&mut self, ty: Type, exprs: impl Iterator) { + fn insert(&mut self, ty: Type<'db>, exprs: impl Iterator>) { match self.data.get_mut(&ty) { Some(it) => { it.extend_with_threshold(self.many_threshold, exprs); @@ -192,14 +192,14 @@ impl LookupTable { } /// Iterate all the reachable types - fn iter_types(&self) -> impl Iterator + '_ { + fn iter_types(&self) -> impl Iterator> + '_ { self.data.keys().cloned() } /// Query new types reached since last query by key /// /// Create new key if you wish to query it to avoid conflicting with existing queries. - fn new_types(&mut self, key: NewTypesKey) -> Vec { + fn new_types(&mut self, key: NewTypesKey) -> Vec> { match self.new_types.get_mut(&key) { Some(it) => std::mem::take(it), None => Vec::new(), @@ -207,20 +207,20 @@ impl LookupTable { } /// Types queried but not found - fn types_wishlist(&mut self) -> &FxHashSet { + fn types_wishlist(&mut self) -> &FxHashSet> { &self.types_wishlist } } /// Context for the `term_search` function #[derive(Debug)] -pub struct TermSearchCtx<'a, DB: HirDatabase> { +pub struct TermSearchCtx<'db, DB: HirDatabase> { /// Semantics for the program - pub sema: &'a Semantics<'a, DB>, + pub sema: &'db Semantics<'db, DB>, /// Semantic scope, captures context for the term search - pub scope: &'a SemanticsScope<'a>, + pub scope: &'db SemanticsScope<'db>, /// Target / expected output type - pub goal: Type, + pub goal: Type<'db>, /// Configuration for term search pub config: TermSearchConfig, } @@ -263,7 +263,7 @@ impl Default for TermSearchConfig { /// Note that there are usually more ways we can get to the `goal` type but some are discarded to /// reduce the memory consumption. It is also unlikely anyone is willing ti browse through /// thousands of possible responses so we currently take first 10 from every tactic. -pub fn term_search(ctx: &TermSearchCtx<'_, DB>) -> Vec { +pub fn term_search<'db, DB: HirDatabase>(ctx: &'db TermSearchCtx<'db, DB>) -> Vec> { let module = ctx.scope.module(); let mut defs = FxHashSet::default(); defs.insert(ScopeDef::ModuleDef(ModuleDef::Module(module))); @@ -285,7 +285,7 @@ pub fn term_search(ctx: &TermSearchCtx<'_, DB>) -> Vec { }; // Try trivial tactic first, also populates lookup table - let mut solutions: Vec = tactics::trivial(ctx, &defs, &mut lookup).collect(); + let mut solutions: Vec> = tactics::trivial(ctx, &defs, &mut lookup).collect(); // Use well known types tactic before iterations as it does not depend on other tactics solutions.extend(tactics::famous_types(ctx, &defs, &mut lookup)); solutions.extend(tactics::assoc_const(ctx, &defs, &mut lookup)); diff --git a/src/tools/rust-analyzer/crates/hir/src/term_search/expr.rs b/src/tools/rust-analyzer/crates/hir/src/term_search/expr.rs index 78ee3b5aa683..843831948adc 100644 --- a/src/tools/rust-analyzer/crates/hir/src/term_search/expr.rs +++ b/src/tools/rust-analyzer/crates/hir/src/term_search/expr.rs @@ -59,7 +59,7 @@ fn mod_item_path_str( /// So in short it pretty much gives us a way to get type `Option` using the items we have in /// scope. #[derive(Debug, Clone, Eq, Hash, PartialEq)] -pub enum Expr { +pub enum Expr<'db> { /// Constant Const(Const), /// Static variable @@ -69,26 +69,31 @@ pub enum Expr { /// Constant generic parameter ConstParam(ConstParam), /// Well known type (such as `true` for bool) - FamousType { ty: Type, value: &'static str }, + FamousType { ty: Type<'db>, value: &'static str }, /// Function call (does not take self param) - Function { func: Function, generics: Vec, params: Vec }, + Function { func: Function, generics: Vec>, params: Vec> }, /// Method call (has self param) - Method { func: Function, generics: Vec, target: Box, params: Vec }, + Method { + func: Function, + generics: Vec>, + target: Box>, + params: Vec>, + }, /// Enum variant construction - Variant { variant: Variant, generics: Vec, params: Vec }, + Variant { variant: Variant, generics: Vec>, params: Vec> }, /// Struct construction - Struct { strukt: Struct, generics: Vec, params: Vec }, + Struct { strukt: Struct, generics: Vec>, params: Vec> }, /// Tuple construction - Tuple { ty: Type, params: Vec }, + Tuple { ty: Type<'db>, params: Vec> }, /// Struct field access - Field { expr: Box, field: Field }, + Field { expr: Box>, field: Field }, /// Passing type as reference (with `&`) - Reference(Box), + Reference(Box>), /// Indicates possibility of many different options that all evaluate to `ty` - Many(Type), + Many(Type<'db>), } -impl Expr { +impl<'db> Expr<'db> { /// Generate source code for type tree. /// /// Note that trait imports are not added to generated code. @@ -96,8 +101,8 @@ impl Expr { /// by `traits_used` method are also imported. pub fn gen_source_code( &self, - sema_scope: &SemanticsScope<'_>, - many_formatter: &mut dyn FnMut(&Type) -> String, + sema_scope: &SemanticsScope<'db>, + many_formatter: &mut dyn FnMut(&Type<'db>) -> String, cfg: ImportPathConfig, display_target: DisplayTarget, ) -> Result { @@ -298,7 +303,7 @@ impl Expr { /// Get type of the type tree. /// /// Same as getting the type of root node - pub fn ty(&self, db: &dyn HirDatabase) -> Type { + pub fn ty(&self, db: &'db dyn HirDatabase) -> Type<'db> { match self { Expr::Const(it) => it.ty(db), Expr::Static(it) => it.ty(db), diff --git a/src/tools/rust-analyzer/crates/hir/src/term_search/tactics.rs b/src/tools/rust-analyzer/crates/hir/src/term_search/tactics.rs index bcff44fcd016..9df131f90e40 100644 --- a/src/tools/rust-analyzer/crates/hir/src/term_search/tactics.rs +++ b/src/tools/rust-analyzer/crates/hir/src/term_search/tactics.rs @@ -40,11 +40,11 @@ use super::{LookupTable, NewTypesKey, TermSearchCtx}; /// /// _Note that there is no use of calling this tactic in every iteration as the output does not /// depend on the current state of `lookup`_ -pub(super) fn trivial<'a, DB: HirDatabase>( - ctx: &'a TermSearchCtx<'a, DB>, +pub(super) fn trivial<'a, 'lt, 'db, DB: HirDatabase>( + ctx: &'a TermSearchCtx<'db, DB>, defs: &'a FxHashSet, - lookup: &'a mut LookupTable, -) -> impl Iterator + 'a { + lookup: &'lt mut LookupTable<'db>, +) -> impl Iterator> + use<'a, 'db, 'lt, DB> { let db = ctx.sema.db; defs.iter().filter_map(|def| { let expr = match def { @@ -104,11 +104,11 @@ pub(super) fn trivial<'a, DB: HirDatabase>( /// /// _Note that there is no use of calling this tactic in every iteration as the output does not /// depend on the current state of `lookup`_ -pub(super) fn assoc_const<'a, DB: HirDatabase>( - ctx: &'a TermSearchCtx<'a, DB>, +pub(super) fn assoc_const<'a, 'lt, 'db, DB: HirDatabase>( + ctx: &'a TermSearchCtx<'db, DB>, defs: &'a FxHashSet, - lookup: &'a mut LookupTable, -) -> impl Iterator + 'a { + lookup: &'lt mut LookupTable<'db>, +) -> impl Iterator> + use<'a, 'db, 'lt, DB> { let db = ctx.sema.db; let module = ctx.scope.module(); @@ -152,12 +152,12 @@ pub(super) fn assoc_const<'a, DB: HirDatabase>( /// * `defs` - Set of items in scope at term search target location /// * `lookup` - Lookup table for types /// * `should_continue` - Function that indicates when to stop iterating -pub(super) fn data_constructor<'a, DB: HirDatabase>( - ctx: &'a TermSearchCtx<'a, DB>, +pub(super) fn data_constructor<'a, 'lt, 'db, DB: HirDatabase>( + ctx: &'a TermSearchCtx<'db, DB>, _defs: &'a FxHashSet, - lookup: &'a mut LookupTable, + lookup: &'lt mut LookupTable<'db>, should_continue: &'a dyn std::ops::Fn() -> bool, -) -> impl Iterator + 'a { +) -> impl Iterator> + use<'a, 'db, 'lt, DB> { let db = ctx.sema.db; let module = ctx.scope.module(); lookup @@ -199,14 +199,14 @@ pub(super) fn data_constructor<'a, DB: HirDatabase>( let generics: Vec<_> = ty.type_arguments().collect(); // Early exit if some param cannot be filled from lookup - let param_exprs: Vec> = fields + let param_exprs: Vec>> = fields .into_iter() .map(|field| lookup.find(db, &field.ty_with_args(db, generics.iter().cloned()))) .collect::>()?; // Note that we need special case for 0 param constructors because of multi cartesian // product - let exprs: Vec = if param_exprs.is_empty() { + let exprs: Vec> = if param_exprs.is_empty() { vec![Expr::Struct { strukt, generics, params: Vec::new() }] } else { param_exprs @@ -247,7 +247,7 @@ pub(super) fn data_constructor<'a, DB: HirDatabase>( .into_iter() .filter_map(|variant| { // Early exit if some param cannot be filled from lookup - let param_exprs: Vec> = variant + let param_exprs: Vec>> = variant .fields(db) .into_iter() .map(|field| { @@ -257,7 +257,7 @@ pub(super) fn data_constructor<'a, DB: HirDatabase>( // Note that we need special case for 0 param constructors because of multi cartesian // product - let variant_exprs: Vec = if param_exprs.is_empty() { + let variant_exprs: Vec> = if param_exprs.is_empty() { vec![Expr::Variant { variant, generics: generics.clone(), @@ -301,12 +301,12 @@ pub(super) fn data_constructor<'a, DB: HirDatabase>( /// * `defs` - Set of items in scope at term search target location /// * `lookup` - Lookup table for types /// * `should_continue` - Function that indicates when to stop iterating -pub(super) fn free_function<'a, DB: HirDatabase>( - ctx: &'a TermSearchCtx<'a, DB>, +pub(super) fn free_function<'a, 'lt, 'db, DB: HirDatabase>( + ctx: &'a TermSearchCtx<'db, DB>, defs: &'a FxHashSet, - lookup: &'a mut LookupTable, + lookup: &'lt mut LookupTable<'db>, should_continue: &'a dyn std::ops::Fn() -> bool, -) -> impl Iterator + 'a { +) -> impl Iterator> + use<'a, 'db, 'lt, DB> { let db = ctx.sema.db; let module = ctx.scope.module(); defs.iter() @@ -375,7 +375,7 @@ pub(super) fn free_function<'a, DB: HirDatabase>( } // Early exit if some param cannot be filled from lookup - let param_exprs: Vec> = it + let param_exprs: Vec>> = it .params_without_self_with_args(db, generics.iter().cloned()) .into_iter() .map(|field| { @@ -389,7 +389,7 @@ pub(super) fn free_function<'a, DB: HirDatabase>( // Note that we need special case for 0 param constructors because of multi cartesian // product - let fn_exprs: Vec = if param_exprs.is_empty() { + let fn_exprs: Vec> = if param_exprs.is_empty() { vec![Expr::Function { func: *it, generics, params: Vec::new() }] } else { param_exprs @@ -432,12 +432,12 @@ pub(super) fn free_function<'a, DB: HirDatabase>( /// * `defs` - Set of items in scope at term search target location /// * `lookup` - Lookup table for types /// * `should_continue` - Function that indicates when to stop iterating -pub(super) fn impl_method<'a, DB: HirDatabase>( - ctx: &'a TermSearchCtx<'a, DB>, +pub(super) fn impl_method<'a, 'lt, 'db, DB: HirDatabase>( + ctx: &'a TermSearchCtx<'db, DB>, _defs: &'a FxHashSet, - lookup: &'a mut LookupTable, + lookup: &'lt mut LookupTable<'db>, should_continue: &'a dyn std::ops::Fn() -> bool, -) -> impl Iterator + 'a { +) -> impl Iterator> + use<'a, 'db, 'lt, DB> { let db = ctx.sema.db; let module = ctx.scope.module(); lookup @@ -507,14 +507,14 @@ pub(super) fn impl_method<'a, DB: HirDatabase>( let target_type_exprs = lookup.find(db, &ty).expect("Type not in lookup"); // Early exit if some param cannot be filled from lookup - let param_exprs: Vec> = it + let param_exprs: Vec>> = it .params_without_self_with_args(db, ty.type_arguments()) .into_iter() .map(|field| lookup.find_autoref(db, field.ty())) .collect::>()?; let generics: Vec<_> = ty.type_arguments().collect(); - let fn_exprs: Vec = std::iter::once(target_type_exprs) + let fn_exprs: Vec> = std::iter::once(target_type_exprs) .chain(param_exprs) .multi_cartesian_product() .map(|params| { @@ -547,12 +547,12 @@ pub(super) fn impl_method<'a, DB: HirDatabase>( /// * `defs` - Set of items in scope at term search target location /// * `lookup` - Lookup table for types /// * `should_continue` - Function that indicates when to stop iterating -pub(super) fn struct_projection<'a, DB: HirDatabase>( - ctx: &'a TermSearchCtx<'a, DB>, +pub(super) fn struct_projection<'a, 'lt, 'db, DB: HirDatabase>( + ctx: &'a TermSearchCtx<'db, DB>, _defs: &'a FxHashSet, - lookup: &'a mut LookupTable, + lookup: &'lt mut LookupTable<'db>, should_continue: &'a dyn std::ops::Fn() -> bool, -) -> impl Iterator + 'a { +) -> impl Iterator> + use<'a, 'db, 'lt, DB> { let db = ctx.sema.db; let module = ctx.scope.module(); lookup @@ -589,11 +589,11 @@ pub(super) fn struct_projection<'a, DB: HirDatabase>( /// * `ctx` - Context for the term search /// * `defs` - Set of items in scope at term search target location /// * `lookup` - Lookup table for types -pub(super) fn famous_types<'a, DB: HirDatabase>( - ctx: &'a TermSearchCtx<'a, DB>, +pub(super) fn famous_types<'a, 'lt, 'db, DB: HirDatabase>( + ctx: &'a TermSearchCtx<'db, DB>, _defs: &'a FxHashSet, - lookup: &'a mut LookupTable, -) -> impl Iterator + 'a { + lookup: &'lt mut LookupTable<'db>, +) -> impl Iterator> + use<'a, 'db, 'lt, DB> { let db = ctx.sema.db; let module = ctx.scope.module(); [ @@ -620,12 +620,12 @@ pub(super) fn famous_types<'a, DB: HirDatabase>( /// * `defs` - Set of items in scope at term search target location /// * `lookup` - Lookup table for types /// * `should_continue` - Function that indicates when to stop iterating -pub(super) fn impl_static_method<'a, DB: HirDatabase>( - ctx: &'a TermSearchCtx<'a, DB>, +pub(super) fn impl_static_method<'a, 'lt, 'db, DB: HirDatabase>( + ctx: &'a TermSearchCtx<'db, DB>, _defs: &'a FxHashSet, - lookup: &'a mut LookupTable, + lookup: &'lt mut LookupTable<'db>, should_continue: &'a dyn std::ops::Fn() -> bool, -) -> impl Iterator + 'a { +) -> impl Iterator> + use<'a, 'db, 'lt, DB> { let db = ctx.sema.db; let module = ctx.scope.module(); lookup @@ -683,7 +683,7 @@ pub(super) fn impl_static_method<'a, DB: HirDatabase>( } // Early exit if some param cannot be filled from lookup - let param_exprs: Vec> = it + let param_exprs: Vec>> = it .params_without_self_with_args(db, ty.type_arguments()) .into_iter() .map(|field| lookup.find_autoref(db, field.ty())) @@ -692,7 +692,7 @@ pub(super) fn impl_static_method<'a, DB: HirDatabase>( // Note that we need special case for 0 param constructors because of multi cartesian // product let generics = ty.type_arguments().collect(); - let fn_exprs: Vec = if param_exprs.is_empty() { + let fn_exprs: Vec> = if param_exprs.is_empty() { vec![Expr::Function { func: it, generics, params: Vec::new() }] } else { param_exprs @@ -722,12 +722,12 @@ pub(super) fn impl_static_method<'a, DB: HirDatabase>( /// * `defs` - Set of items in scope at term search target location /// * `lookup` - Lookup table for types /// * `should_continue` - Function that indicates when to stop iterating -pub(super) fn make_tuple<'a, DB: HirDatabase>( - ctx: &'a TermSearchCtx<'a, DB>, +pub(super) fn make_tuple<'a, 'lt, 'db, DB: HirDatabase>( + ctx: &'a TermSearchCtx<'db, DB>, _defs: &'a FxHashSet, - lookup: &'a mut LookupTable, + lookup: &'lt mut LookupTable<'db>, should_continue: &'a dyn std::ops::Fn() -> bool, -) -> impl Iterator + 'a { +) -> impl Iterator> + use<'a, 'db, 'lt, DB> { let db = ctx.sema.db; let module = ctx.scope.module(); @@ -749,15 +749,15 @@ pub(super) fn make_tuple<'a, DB: HirDatabase>( } // Early exit if some param cannot be filled from lookup - let param_exprs: Vec> = + let param_exprs: Vec>> = ty.type_arguments().map(|field| lookup.find(db, &field)).collect::>()?; - let exprs: Vec = param_exprs + let exprs: Vec> = param_exprs .into_iter() .multi_cartesian_product() .filter(|_| should_continue()) .map(|params| { - let tys: Vec = params.iter().map(|it| it.ty(db)).collect(); + let tys: Vec> = params.iter().map(|it| it.ty(db)).collect(); let tuple_ty = Type::new_tuple(module.krate().into(), &tys); let expr = Expr::Tuple { ty: tuple_ty.clone(), params }; diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/assist_config.rs b/src/tools/rust-analyzer/crates/ide-assists/src/assist_config.rs index fb569f8cdae0..57ced8d8534b 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/assist_config.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/assist_config.rs @@ -22,6 +22,7 @@ pub struct AssistConfig { pub term_search_borrowck: bool, pub code_action_grouping: bool, pub expr_fill_default: ExprFillDefaultMode, + pub prefer_self_ty: bool, } impl AssistConfig { diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/assist_context.rs b/src/tools/rust-analyzer/crates/ide-assists/src/assist_context.rs index 9eb9452a2b83..207a7548f49b 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/assist_context.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/assist_context.rs @@ -95,7 +95,7 @@ impl<'a> AssistContext<'a> { } } - pub(crate) fn db(&self) -> &RootDatabase { + pub(crate) fn db(&self) -> &'a RootDatabase { self.sema.db } diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/add_missing_impl_members.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/add_missing_impl_members.rs index 6a55f39e6934..9f9d21923ff7 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/add_missing_impl_members.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/add_missing_impl_members.rs @@ -192,7 +192,7 @@ fn add_missing_impl_members_inner( fn try_gen_trait_body( ctx: &AssistContext<'_>, func: &ast::Fn, - trait_ref: hir::TraitRef, + trait_ref: hir::TraitRef<'_>, impl_def: &ast::Impl, edition: Edition, ) -> Option<()> { diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/add_missing_match_arms.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/add_missing_match_arms.rs index 858d4369914a..1ece7ddab101 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/add_missing_match_arms.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/add_missing_match_arms.rs @@ -1,12 +1,13 @@ use std::iter::{self, Peekable}; use either::Either; -use hir::{Adt, Crate, HasAttrs, ImportPathConfig, ModuleDef, Semantics, sym}; +use hir::{Adt, AsAssocItem, Crate, HasAttrs, ImportPathConfig, ModuleDef, Semantics, sym}; use ide_db::RootDatabase; use ide_db::assists::ExprFillDefaultMode; use ide_db::syntax_helpers::suggest_name; use ide_db::{famous_defs::FamousDefs, helpers::mod_path_to_ast}; use itertools::Itertools; +use syntax::ToSmolStr; use syntax::ast::edit::IndentLevel; use syntax::ast::edit_in_place::Indent; use syntax::ast::syntax_factory::SyntaxFactory; @@ -79,12 +80,20 @@ pub(crate) fn add_missing_match_arms(acc: &mut Assists, ctx: &AssistContext<'_>) let make = SyntaxFactory::with_mappings(); - let module = ctx.sema.scope(expr.syntax())?.module(); + let scope = ctx.sema.scope(expr.syntax())?; + let module = scope.module(); + let self_ty = if ctx.config.prefer_self_ty { + scope + .containing_function() + .and_then(|function| function.as_assoc_item(ctx.db())?.implementing_ty(ctx.db())) + } else { + None + }; let (mut missing_pats, is_non_exhaustive, has_hidden_variants): ( Peekable>>, bool, bool, - ) = if let Some(enum_def) = resolve_enum_def(&ctx.sema, &expr) { + ) = if let Some(enum_def) = resolve_enum_def(&ctx.sema, &expr, self_ty.as_ref()) { let is_non_exhaustive = enum_def.is_non_exhaustive(ctx.db(), module.krate()); let variants = enum_def.variants(ctx.db()); @@ -102,8 +111,9 @@ pub(crate) fn add_missing_match_arms(acc: &mut Assists, ctx: &AssistContext<'_>) }) .filter(|(variant_pat, _)| is_variant_missing(&top_lvl_pats, variant_pat)); - let option_enum = FamousDefs(&ctx.sema, module.krate()).core_option_Option().map(lift_enum); - let missing_pats: Box> = if Some(enum_def) == option_enum { + let option_enum = FamousDefs(&ctx.sema, module.krate()).core_option_Option(); + let missing_pats: Box> = if matches!(enum_def, ExtendedEnum::Enum { enum_: e, .. } if Some(e) == option_enum) + { // Match `Some` variant first. cov_mark::hit!(option_order); Box::new(missing_pats.rev()) @@ -111,7 +121,7 @@ pub(crate) fn add_missing_match_arms(acc: &mut Assists, ctx: &AssistContext<'_>) Box::new(missing_pats) }; (missing_pats.peekable(), is_non_exhaustive, has_hidden_variants) - } else if let Some(enum_defs) = resolve_tuple_of_enum_def(&ctx.sema, &expr) { + } else if let Some(enum_defs) = resolve_tuple_of_enum_def(&ctx.sema, &expr, self_ty.as_ref()) { let is_non_exhaustive = enum_defs.iter().any(|enum_def| enum_def.is_non_exhaustive(ctx.db(), module.krate())); @@ -159,7 +169,9 @@ pub(crate) fn add_missing_match_arms(acc: &mut Assists, ctx: &AssistContext<'_>) is_non_exhaustive, has_hidden_variants, ) - } else if let Some((enum_def, len)) = resolve_array_of_enum_def(&ctx.sema, &expr) { + } else if let Some((enum_def, len)) = + resolve_array_of_enum_def(&ctx.sema, &expr, self_ty.as_ref()) + { let is_non_exhaustive = enum_def.is_non_exhaustive(ctx.db(), module.krate()); let variants = enum_def.variants(ctx.db()); @@ -373,23 +385,23 @@ fn does_pat_match_variant(pat: &Pat, var: &Pat) -> bool { } } -#[derive(Eq, PartialEq, Clone, Copy)] +#[derive(Eq, PartialEq, Clone)] enum ExtendedEnum { Bool, - Enum(hir::Enum), + Enum { enum_: hir::Enum, use_self: bool }, } #[derive(Eq, PartialEq, Clone, Copy, Debug)] enum ExtendedVariant { True, False, - Variant(hir::Variant), + Variant { variant: hir::Variant, use_self: bool }, } impl ExtendedVariant { fn should_be_hidden(self, db: &RootDatabase, krate: Crate) -> bool { match self { - ExtendedVariant::Variant(var) => { + ExtendedVariant::Variant { variant: var, .. } => { var.attrs(db).has_doc_hidden() && var.module(db).krate() != krate } _ => false, @@ -397,25 +409,35 @@ impl ExtendedVariant { } } -fn lift_enum(e: hir::Enum) -> ExtendedEnum { - ExtendedEnum::Enum(e) -} - impl ExtendedEnum { - fn is_non_exhaustive(self, db: &RootDatabase, krate: Crate) -> bool { + fn enum_( + db: &RootDatabase, + enum_: hir::Enum, + enum_ty: &hir::Type<'_>, + self_ty: Option<&hir::Type<'_>>, + ) -> Self { + ExtendedEnum::Enum { + enum_, + use_self: self_ty.is_some_and(|self_ty| self_ty.could_unify_with_deeply(db, enum_ty)), + } + } + + fn is_non_exhaustive(&self, db: &RootDatabase, krate: Crate) -> bool { match self { - ExtendedEnum::Enum(e) => { + ExtendedEnum::Enum { enum_: e, .. } => { e.attrs(db).by_key(sym::non_exhaustive).exists() && e.module(db).krate() != krate } _ => false, } } - fn variants(self, db: &RootDatabase) -> Vec { - match self { - ExtendedEnum::Enum(e) => { - e.variants(db).into_iter().map(ExtendedVariant::Variant).collect::>() - } + fn variants(&self, db: &RootDatabase) -> Vec { + match *self { + ExtendedEnum::Enum { enum_: e, use_self } => e + .variants(db) + .into_iter() + .map(|variant| ExtendedVariant::Variant { variant, use_self }) + .collect::>(), ExtendedEnum::Bool => { Vec::::from([ExtendedVariant::True, ExtendedVariant::False]) } @@ -423,9 +445,13 @@ impl ExtendedEnum { } } -fn resolve_enum_def(sema: &Semantics<'_, RootDatabase>, expr: &ast::Expr) -> Option { +fn resolve_enum_def( + sema: &Semantics<'_, RootDatabase>, + expr: &ast::Expr, + self_ty: Option<&hir::Type<'_>>, +) -> Option { sema.type_of_expr(expr)?.adjusted().autoderef(sema.db).find_map(|ty| match ty.as_adt() { - Some(Adt::Enum(e)) => Some(ExtendedEnum::Enum(e)), + Some(Adt::Enum(e)) => Some(ExtendedEnum::enum_(sema.db, e, &ty, self_ty)), _ => ty.is_bool().then_some(ExtendedEnum::Bool), }) } @@ -433,6 +459,7 @@ fn resolve_enum_def(sema: &Semantics<'_, RootDatabase>, expr: &ast::Expr) -> Opt fn resolve_tuple_of_enum_def( sema: &Semantics<'_, RootDatabase>, expr: &ast::Expr, + self_ty: Option<&hir::Type<'_>>, ) -> Option> { sema.type_of_expr(expr)? .adjusted() @@ -441,7 +468,7 @@ fn resolve_tuple_of_enum_def( .map(|ty| { ty.autoderef(sema.db).find_map(|ty| { match ty.as_adt() { - Some(Adt::Enum(e)) => Some(lift_enum(e)), + Some(Adt::Enum(e)) => Some(ExtendedEnum::enum_(sema.db, e, &ty, self_ty)), // For now we only handle expansion for a tuple of enums. Here // we map non-enum items to None and rely on `collect` to // convert Vec> into Option>. @@ -456,10 +483,11 @@ fn resolve_tuple_of_enum_def( fn resolve_array_of_enum_def( sema: &Semantics<'_, RootDatabase>, expr: &ast::Expr, + self_ty: Option<&hir::Type<'_>>, ) -> Option<(ExtendedEnum, usize)> { sema.type_of_expr(expr)?.adjusted().as_array(sema.db).and_then(|(ty, len)| { ty.autoderef(sema.db).find_map(|ty| match ty.as_adt() { - Some(Adt::Enum(e)) => Some((lift_enum(e), len)), + Some(Adt::Enum(e)) => Some((ExtendedEnum::enum_(sema.db, e, &ty, self_ty), len)), _ => ty.is_bool().then_some((ExtendedEnum::Bool, len)), }) }) @@ -474,9 +502,21 @@ fn build_pat( ) -> Option { let db = ctx.db(); match var { - ExtendedVariant::Variant(var) => { + ExtendedVariant::Variant { variant: var, use_self } => { let edition = module.krate().edition(db); - let path = mod_path_to_ast(&module.find_path(db, ModuleDef::from(var), cfg)?, edition); + let path = if use_self { + make::path_from_segments( + [ + make::path_segment(make::name_ref_self_ty()), + make::path_segment(make::name_ref( + &var.name(db).display(db, edition).to_smolstr(), + )), + ], + false, + ) + } else { + mod_path_to_ast(&module.find_path(db, ModuleDef::from(var), cfg)?, edition) + }; let fields = var.fields(db); let pat: ast::Pat = match var.kind(db) { hir::StructKind::Tuple => { @@ -509,8 +549,10 @@ fn build_pat( #[cfg(test)] mod tests { + use crate::AssistConfig; use crate::tests::{ - check_assist, check_assist_not_applicable, check_assist_target, check_assist_unresolved, + TEST_CONFIG, check_assist, check_assist_not_applicable, check_assist_target, + check_assist_unresolved, check_assist_with_config, }; use super::add_missing_match_arms; @@ -2095,4 +2137,111 @@ fn f() { "#, ); } + + #[test] + fn prefer_self() { + check_assist_with_config( + add_missing_match_arms, + AssistConfig { prefer_self_ty: true, ..TEST_CONFIG }, + r#" +enum Foo { + Bar, + Baz, +} + +impl Foo { + fn qux(&self) { + match self { + $0_ => {} + } + } +} + "#, + r#" +enum Foo { + Bar, + Baz, +} + +impl Foo { + fn qux(&self) { + match self { + Self::Bar => ${1:todo!()}, + Self::Baz => ${2:todo!()},$0 + } + } +} + "#, + ); + } + + #[test] + fn prefer_self_with_generics() { + check_assist_with_config( + add_missing_match_arms, + AssistConfig { prefer_self_ty: true, ..TEST_CONFIG }, + r#" +enum Foo { + Bar(T), + Baz, +} + +impl Foo { + fn qux(&self) { + match self { + $0_ => {} + } + } +} + "#, + r#" +enum Foo { + Bar(T), + Baz, +} + +impl Foo { + fn qux(&self) { + match self { + Self::Bar(${1:_}) => ${2:todo!()}, + Self::Baz => ${3:todo!()},$0 + } + } +} + "#, + ); + check_assist_with_config( + add_missing_match_arms, + AssistConfig { prefer_self_ty: true, ..TEST_CONFIG }, + r#" +enum Foo { + Bar(T), + Baz, +} + +impl Foo { + fn qux(v: Foo) { + match v { + $0_ => {} + } + } +} + "#, + r#" +enum Foo { + Bar(T), + Baz, +} + +impl Foo { + fn qux(v: Foo) { + match v { + Foo::Bar(${1:_}) => ${2:todo!()}, + Foo::Baz => ${3:todo!()},$0 + } + } +} + "#, + ); + } } diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/auto_import.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/auto_import.rs index f3243d369a0b..bb6a10d40b71 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/auto_import.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/auto_import.rs @@ -164,9 +164,9 @@ pub(crate) fn auto_import(acc: &mut Assists, ctx: &AssistContext<'_>) -> Option< Some(()) } -pub(super) fn find_importable_node( - ctx: &AssistContext<'_>, -) -> Option<(ImportAssets, SyntaxNode, Option)> { +pub(super) fn find_importable_node<'a: 'db, 'db>( + ctx: &'a AssistContext<'db>, +) -> Option<(ImportAssets<'db>, SyntaxNode, Option>)> { // Deduplicate this with the `expected_type_and_name` logic for completions let expected = |expr_or_pat: Either| match expr_or_pat { Either::Left(expr) => { @@ -226,7 +226,7 @@ pub(super) fn find_importable_node( } } -fn group_label(import_candidate: &ImportCandidate) -> GroupLabel { +fn group_label(import_candidate: &ImportCandidate<'_>) -> GroupLabel { let name = match import_candidate { ImportCandidate::Path(candidate) => format!("Import {}", candidate.name.text()), ImportCandidate::TraitAssocItem(candidate) => { @@ -244,7 +244,7 @@ fn group_label(import_candidate: &ImportCandidate) -> GroupLabel { pub(crate) fn relevance_score( ctx: &AssistContext<'_>, import: &LocatedImport, - expected: Option<&Type>, + expected: Option<&Type<'_>>, current_module: Option<&Module>, ) -> i32 { let mut score = 0; diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/extract_function.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/extract_function.rs index cf45ea0a30d0..00cbef1c01c0 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/extract_function.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/extract_function.rs @@ -309,23 +309,23 @@ fn extraction_target(node: &SyntaxNode, selection_range: TextRange) -> Option { name: ast::NameRef, self_param: Option, - params: Vec, - control_flow: ControlFlow, - ret_ty: RetType, + params: Vec>, + control_flow: ControlFlow<'db>, + ret_ty: RetType<'db>, body: FunctionBody, outliving_locals: Vec, /// Whether at least one of the container's tail expr is contained in the range we're extracting. contains_tail_expr: bool, - mods: ContainerInfo, + mods: ContainerInfo<'db>, } #[derive(Debug)] -struct Param { +struct Param<'db> { var: Local, - ty: hir::Type, + ty: hir::Type<'db>, move_local: bool, requires_mut: bool, is_copy: bool, @@ -340,10 +340,10 @@ enum ParamKind { } #[derive(Debug)] -enum FunType { +enum FunType<'db> { Unit, - Single(hir::Type), - Tuple(Vec), + Single(hir::Type<'db>), + Tuple(Vec>), } /// Where to put extracted function definition @@ -358,19 +358,19 @@ enum Anchor { // FIXME: ControlFlow and ContainerInfo both track some function modifiers, feels like these two should // probably be merged somehow. #[derive(Debug)] -struct ControlFlow { - kind: Option, +struct ControlFlow<'db> { + kind: Option>, is_async: bool, is_unsafe: bool, } /// The thing whose expression we are extracting from. Can be a function, const, static, const arg, ... #[derive(Clone, Debug)] -struct ContainerInfo { +struct ContainerInfo<'db> { is_const: bool, parent_loop: Option, /// The function's return type, const's type etc. - ret_type: Option, + ret_type: Option>, generic_param_lists: Vec, where_clauses: Vec, edition: Edition, @@ -389,11 +389,11 @@ struct ContainerInfo { /// } /// ``` #[derive(Debug, Clone)] -enum FlowKind { +enum FlowKind<'db> { /// Return with value (`return $expr;`) Return(Option), Try { - kind: TryKind, + kind: TryKind<'db>, }, /// Break with label and value (`break 'label $expr;`) Break(Option, Option), @@ -402,18 +402,18 @@ enum FlowKind { } #[derive(Debug, Clone)] -enum TryKind { +enum TryKind<'db> { Option, - Result { ty: hir::Type }, + Result { ty: hir::Type<'db> }, } #[derive(Debug)] -enum RetType { - Expr(hir::Type), +enum RetType<'db> { + Expr(hir::Type<'db>), Stmt, } -impl RetType { +impl RetType<'_> { fn is_unit(&self) -> bool { match self { RetType::Expr(ty) => ty.is_unit(), @@ -456,8 +456,8 @@ impl LocalUsages { } } -impl Function { - fn return_type(&self, ctx: &AssistContext<'_>) -> FunType { +impl<'db> Function<'db> { + fn return_type(&self, ctx: &AssistContext<'db>) -> FunType<'db> { match &self.ret_ty { RetType::Expr(ty) if ty.is_unit() => FunType::Unit, RetType::Expr(ty) => FunType::Single(ty.clone()), @@ -487,7 +487,7 @@ impl ParamKind { } } -impl Param { +impl<'db> Param<'db> { fn kind(&self) -> ParamKind { match (self.move_local, self.requires_mut, self.is_copy) { (false, true, _) => ParamKind::MutRef, @@ -497,7 +497,7 @@ impl Param { } } - fn to_arg(&self, ctx: &AssistContext<'_>, edition: Edition) -> ast::Expr { + fn to_arg(&self, ctx: &AssistContext<'db>, edition: Edition) -> ast::Expr { let var = path_expr_from_local(ctx, self.var, edition); match self.kind() { ParamKind::Value | ParamKind::MutValue => var, @@ -532,8 +532,12 @@ impl Param { } } -impl TryKind { - fn of_ty(ty: hir::Type, ctx: &AssistContext<'_>, edition: Edition) -> Option { +impl<'db> TryKind<'db> { + fn of_ty( + ty: hir::Type<'db>, + ctx: &AssistContext<'db>, + edition: Edition, + ) -> Option> { if ty.is_unknown() { // We favour Result for `expr?` return Some(TryKind::Result { ty }); @@ -551,7 +555,7 @@ impl TryKind { } } -impl FlowKind { +impl<'db> FlowKind<'db> { fn make_result_handler(&self, expr: Option) -> ast::Expr { match self { FlowKind::Return(_) => make::expr_return(expr), @@ -567,7 +571,7 @@ impl FlowKind { } } - fn expr_ty(&self, ctx: &AssistContext<'_>) -> Option { + fn expr_ty(&self, ctx: &AssistContext<'db>) -> Option> { match self { FlowKind::Return(Some(expr)) | FlowKind::Break(_, Some(expr)) => { ctx.sema.type_of_expr(expr).map(TypeInfo::adjusted) @@ -876,11 +880,11 @@ impl FunctionBody { (res, self_param) } - fn analyze_container( + fn analyze_container<'db>( &self, - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, edition: Edition, - ) -> Option<(ContainerInfo, bool)> { + ) -> Option<(ContainerInfo<'db>, bool)> { let mut ancestors = self.parent()?.ancestors(); let infer_expr_opt = |expr| sema.type_of_expr(&expr?).map(TypeInfo::adjusted); let mut parent_loop = None; @@ -985,7 +989,7 @@ impl FunctionBody { )) } - fn return_ty(&self, ctx: &AssistContext<'_>) -> Option { + fn return_ty<'db>(&self, ctx: &AssistContext<'db>) -> Option> { match self.tail_expr() { Some(expr) => ctx.sema.type_of_expr(&expr).map(TypeInfo::original).map(RetType::Expr), None => Some(RetType::Stmt), @@ -1006,11 +1010,11 @@ impl FunctionBody { } /// Analyses the function body for external control flow. - fn external_control_flow( + fn external_control_flow<'db>( &self, - ctx: &AssistContext<'_>, - container_info: &ContainerInfo, - ) -> Option { + ctx: &AssistContext<'db>, + container_info: &ContainerInfo<'db>, + ) -> Option> { let mut ret_expr = None; let mut try_expr = None; let mut break_expr = None; @@ -1096,12 +1100,12 @@ impl FunctionBody { /// find variables that should be extracted as params /// /// Computes additional info that affects param type and mutability - fn extracted_function_params( + fn extracted_function_params<'db>( &self, - ctx: &AssistContext<'_>, - container_info: &ContainerInfo, + ctx: &AssistContext<'db>, + container_info: &ContainerInfo<'db>, locals: FxIndexSet, - ) -> Vec { + ) -> Vec> { locals .into_iter() .sorted() @@ -1449,7 +1453,7 @@ fn fixup_call_site(builder: &mut SourceChangeBuilder, body: &FunctionBody) { } } -fn make_call(ctx: &AssistContext<'_>, fun: &Function, indent: IndentLevel) -> SyntaxNode { +fn make_call(ctx: &AssistContext<'_>, fun: &Function<'_>, indent: IndentLevel) -> SyntaxNode { let ret_ty = fun.return_type(ctx); let args = make::arg_list(fun.params.iter().map(|param| param.to_arg(ctx, fun.mods.edition))); @@ -1508,17 +1512,17 @@ fn make_call(ctx: &AssistContext<'_>, fun: &Function, indent: IndentLevel) -> Sy } } -enum FlowHandler { +enum FlowHandler<'db> { None, - Try { kind: TryKind }, - If { action: FlowKind }, - IfOption { action: FlowKind }, - MatchOption { none: FlowKind }, - MatchResult { err: FlowKind }, + Try { kind: TryKind<'db> }, + If { action: FlowKind<'db> }, + IfOption { action: FlowKind<'db> }, + MatchOption { none: FlowKind<'db> }, + MatchResult { err: FlowKind<'db> }, } -impl FlowHandler { - fn from_ret_ty(fun: &Function, ret_ty: &FunType) -> FlowHandler { +impl<'db> FlowHandler<'db> { + fn from_ret_ty(fun: &Function<'db>, ret_ty: &FunType<'db>) -> FlowHandler<'db> { if fun.contains_tail_expr { return FlowHandler::None; } @@ -1628,7 +1632,7 @@ fn path_expr_from_local(ctx: &AssistContext<'_>, var: Local, edition: Edition) - fn format_function( ctx: &AssistContext<'_>, module: hir::Module, - fun: &Function, + fun: &Function<'_>, old_indent: IndentLevel, ) -> ast::Fn { let fun_name = make::name(&fun.name.text()); @@ -1654,7 +1658,7 @@ fn format_function( fn make_generic_params_and_where_clause( ctx: &AssistContext<'_>, - fun: &Function, + fun: &Function<'_>, ) -> (Option, Option) { let used_type_params = fun.type_params(ctx); @@ -1666,7 +1670,7 @@ fn make_generic_params_and_where_clause( fn make_generic_param_list( ctx: &AssistContext<'_>, - fun: &Function, + fun: &Function<'_>, used_type_params: &[TypeParam], ) -> Option { let mut generic_params = fun @@ -1703,7 +1707,7 @@ fn param_is_required( fn make_where_clause( ctx: &AssistContext<'_>, - fun: &Function, + fun: &Function<'_>, used_type_params: &[TypeParam], ) -> Option { let mut predicates = fun @@ -1743,9 +1747,9 @@ fn resolved_type_param(ctx: &AssistContext<'_>, pred: &ast::WherePred) -> Option } } -impl Function { +impl<'db> Function<'db> { /// Collect all the `TypeParam`s used in the `body` and `params`. - fn type_params(&self, ctx: &AssistContext<'_>) -> Vec { + fn type_params(&self, ctx: &AssistContext<'db>) -> Vec { let type_params_in_descendant_paths = self.body.descendant_paths().filter_map(|it| match ctx.sema.resolve_path(&it) { Some(PathResolution::TypeParam(type_param)) => Some(type_param), @@ -1808,8 +1812,8 @@ impl Function { } } -impl FunType { - fn make_ty(&self, ctx: &AssistContext<'_>, module: hir::Module) -> ast::Type { +impl<'db> FunType<'db> { + fn make_ty(&self, ctx: &AssistContext<'db>, module: hir::Module) -> ast::Type { match self { FunType::Unit => make::ty_unit(), FunType::Single(ty) => make_ty(ty, ctx, module), @@ -1831,7 +1835,11 @@ impl FunType { } } -fn make_body(ctx: &AssistContext<'_>, old_indent: IndentLevel, fun: &Function) -> ast::BlockExpr { +fn make_body( + ctx: &AssistContext<'_>, + old_indent: IndentLevel, + fun: &Function<'_>, +) -> ast::BlockExpr { let ret_ty = fun.return_type(ctx); let handler = FlowHandler::from_ret_ty(fun, &ret_ty); @@ -2009,19 +2017,19 @@ fn with_tail_expr(block: ast::BlockExpr, tail_expr: ast::Expr) -> ast::BlockExpr make::hacky_block_expr(elements, Some(tail_expr)) } -fn format_type(ty: &hir::Type, ctx: &AssistContext<'_>, module: hir::Module) -> String { +fn format_type(ty: &hir::Type<'_>, ctx: &AssistContext<'_>, module: hir::Module) -> String { ty.display_source_code(ctx.db(), module.into(), true).ok().unwrap_or_else(|| "_".to_owned()) } -fn make_ty(ty: &hir::Type, ctx: &AssistContext<'_>, module: hir::Module) -> ast::Type { +fn make_ty(ty: &hir::Type<'_>, ctx: &AssistContext<'_>, module: hir::Module) -> ast::Type { let ty_str = format_type(ty, ctx, module); make::ty(&ty_str) } fn rewrite_body_segment( ctx: &AssistContext<'_>, - params: &[Param], - handler: &FlowHandler, + params: &[Param<'_>], + handler: &FlowHandler<'_>, syntax: &SyntaxNode, ) -> SyntaxNode { let syntax = fix_param_usages(ctx, params, syntax); @@ -2030,8 +2038,12 @@ fn rewrite_body_segment( } /// change all usages to account for added `&`/`&mut` for some params -fn fix_param_usages(ctx: &AssistContext<'_>, params: &[Param], syntax: &SyntaxNode) -> SyntaxNode { - let mut usages_for_param: Vec<(&Param, Vec)> = Vec::new(); +fn fix_param_usages( + ctx: &AssistContext<'_>, + params: &[Param<'_>], + syntax: &SyntaxNode, +) -> SyntaxNode { + let mut usages_for_param: Vec<(&Param<'_>, Vec)> = Vec::new(); let tm = TreeMutator::new(syntax); @@ -2085,7 +2097,7 @@ fn fix_param_usages(ctx: &AssistContext<'_>, params: &[Param], syntax: &SyntaxNo res } -fn update_external_control_flow(handler: &FlowHandler, syntax: &SyntaxNode) { +fn update_external_control_flow(handler: &FlowHandler<'_>, syntax: &SyntaxNode) { let mut nested_loop = None; let mut nested_scope = None; for event in syntax.preorder() { @@ -2146,7 +2158,10 @@ fn update_external_control_flow(handler: &FlowHandler, syntax: &SyntaxNode) { } } -fn make_rewritten_flow(handler: &FlowHandler, arg_expr: Option) -> Option { +fn make_rewritten_flow( + handler: &FlowHandler<'_>, + arg_expr: Option, +) -> Option { let value = match handler { FlowHandler::None | FlowHandler::Try { .. } => return None, FlowHandler::If { .. } => make::expr_call( diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/generate_function.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/generate_function.rs index 30084d23d1fb..78ae815dc87a 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/generate_function.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/generate_function.rs @@ -307,7 +307,7 @@ impl FunctionBuilder { ctx: &AssistContext<'_>, call: &ast::MethodCallExpr, name: &ast::NameRef, - receiver_ty: Type, + receiver_ty: Type<'_>, target_module: Module, target: GeneratedFunctionTarget, ) -> Option { diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/inline_call.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/inline_call.rs index 6f028e58d0cd..b7b8bc604a51 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/inline_call.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/inline_call.rs @@ -283,11 +283,11 @@ impl CallInfo { } } -fn get_fn_params( - db: &dyn HirDatabase, +fn get_fn_params<'db>( + db: &'db dyn HirDatabase, function: hir::Function, param_list: &ast::ParamList, -) -> Option, hir::Param)>> { +) -> Option, hir::Param<'db>)>> { let mut assoc_fn_params = function.assoc_fn_params(db).into_iter(); let mut params = Vec::new(); @@ -316,7 +316,7 @@ fn inline( function_def_file_id: EditionedFileId, function: hir::Function, fn_body: &ast::BlockExpr, - params: &[(ast::Pat, Option, hir::Param)], + params: &[(ast::Pat, Option, hir::Param<'_>)], CallInfo { node, arguments, generic_arg_list, krate }: &CallInfo, ) -> ast::Expr { let file_id = sema.hir_file_for(fn_body.syntax()); diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/inline_const_as_literal.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/inline_const_as_literal.rs index e5ed04fdc7c9..b11d3792bc4c 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/inline_const_as_literal.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/inline_const_as_literal.rs @@ -58,7 +58,7 @@ pub(crate) fn inline_const_as_literal(acc: &mut Assists, ctx: &AssistContext<'_> fn validate_type_recursively( ctx: &AssistContext<'_>, - ty_hir: Option<&hir::Type>, + ty_hir: Option<&hir::Type<'_>>, refed: bool, fuel: i32, ) -> Option<()> { diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/merge_match_arms.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/merge_match_arms.rs index 42f35210b496..08170f81b283 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/merge_match_arms.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/merge_match_arms.rs @@ -105,7 +105,7 @@ fn contains_placeholder(a: &ast::MatchArm) -> bool { } fn are_same_types( - current_arm_types: &FxHashMap>, + current_arm_types: &FxHashMap>>, arm: &ast::MatchArm, ctx: &AssistContext<'_>, ) -> bool { @@ -121,15 +121,15 @@ fn are_same_types( true } -fn get_arm_types( - context: &AssistContext<'_>, +fn get_arm_types<'db>( + context: &AssistContext<'db>, arm: &ast::MatchArm, -) -> FxHashMap> { - let mut mapping: FxHashMap> = FxHashMap::default(); +) -> FxHashMap>> { + let mut mapping: FxHashMap>> = FxHashMap::default(); - fn recurse( - map: &mut FxHashMap>, - ctx: &AssistContext<'_>, + fn recurse<'db>( + map: &mut FxHashMap>>, + ctx: &AssistContext<'db>, pat: &Option, ) { if let Some(local_pat) = pat { diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/qualify_path.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/qualify_path.rs index 07d2f52a34ee..8834ad97652e 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/qualify_path.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/qualify_path.rs @@ -217,7 +217,7 @@ fn item_as_trait(db: &RootDatabase, item: hir::ItemInNs) -> Option { } } -fn group_label(candidate: &ImportCandidate) -> GroupLabel { +fn group_label(candidate: &ImportCandidate<'_>) -> GroupLabel { let name = match candidate { ImportCandidate::Path(it) => &it.name, ImportCandidate::TraitAssocItem(it) | ImportCandidate::TraitMethod(it) => { @@ -230,7 +230,7 @@ fn group_label(candidate: &ImportCandidate) -> GroupLabel { fn label( db: &RootDatabase, - candidate: &ImportCandidate, + candidate: &ImportCandidate<'_>, import: &LocatedImport, edition: Edition, ) -> String { diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/remove_unused_imports.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/remove_unused_imports.rs index 16debc4d7285..c38bdfdccf5b 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/remove_unused_imports.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/remove_unused_imports.rs @@ -117,7 +117,7 @@ pub(crate) fn remove_unused_imports(acc: &mut Assists, ctx: &AssistContext<'_>) if unused.peek().is_some() { acc.add( AssistId::quick_fix("remove_unused_imports"), - "Remove all the unused imports", + "Remove all unused imports", selected_el.text_range(), |builder| { let unused: Vec = unused.map(|x| builder.make_mut(x)).collect(); diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/term_search.rs b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/term_search.rs index 6af8e1482c24..019ddaf1441d 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/handlers/term_search.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/handlers/term_search.rs @@ -46,7 +46,7 @@ pub(crate) fn term_search(acc: &mut Assists, ctx: &AssistContext<'_>) -> Option< return None; } - let mut formatter = |_: &hir::Type| String::from("todo!()"); + let mut formatter = |_: &hir::Type<'_>| String::from("todo!()"); let edition = scope.krate().edition(ctx.db()); let paths = paths @@ -100,7 +100,9 @@ fn f() { let a: u128 = 1; let b: u128 = todo$0!() }"#, fn test_complete_todo_with_msg() { check_assist( term_search, - r#"//- minicore: todo, unimplemented + // FIXME: Since we are lacking of `super let`, term search fails due to borrowck failure. + // Should implement super let and remove `fmt_before_1_89_0` + r#"//- minicore: todo, unimplemented, fmt_before_1_89_0 fn f() { let a: u128 = 1; let b: u128 = todo$0!("asd") }"#, r#"fn f() { let a: u128 = 1; let b: u128 = a }"#, ) @@ -110,7 +112,9 @@ fn f() { let a: u128 = 1; let b: u128 = todo$0!("asd") }"#, fn test_complete_unimplemented_with_msg() { check_assist( term_search, - r#"//- minicore: todo, unimplemented + // FIXME: Since we are lacking of `super let`, term search fails due to borrowck failure. + // Should implement super let and remove `fmt_before_1_89_0` + r#"//- minicore: todo, unimplemented, fmt_before_1_89_0 fn f() { let a: u128 = 1; let b: u128 = todo$0!("asd") }"#, r#"fn f() { let a: u128 = 1; let b: u128 = a }"#, ) @@ -120,7 +124,9 @@ fn f() { let a: u128 = 1; let b: u128 = todo$0!("asd") }"#, fn test_complete_unimplemented() { check_assist( term_search, - r#"//- minicore: todo, unimplemented + // FIXME: Since we are lacking of `super let`, term search fails due to borrowck failure. + // Should implement super let and remove `fmt_before_1_89_0` + r#"//- minicore: todo, unimplemented, fmt_before_1_89_0 fn f() { let a: u128 = 1; let b: u128 = todo$0!("asd") }"#, r#"fn f() { let a: u128 = 1; let b: u128 = a }"#, ) diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/tests.rs b/src/tools/rust-analyzer/crates/ide-assists/src/tests.rs index 5e6889792db6..cda2ad43278a 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/tests.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/tests.rs @@ -37,6 +37,7 @@ pub(crate) const TEST_CONFIG: AssistConfig = AssistConfig { term_search_borrowck: true, code_action_grouping: true, expr_fill_default: ExprFillDefaultMode::Todo, + prefer_self_ty: false, }; pub(crate) const TEST_CONFIG_NO_GROUPING: AssistConfig = AssistConfig { @@ -57,6 +58,7 @@ pub(crate) const TEST_CONFIG_NO_GROUPING: AssistConfig = AssistConfig { term_search_borrowck: true, code_action_grouping: false, expr_fill_default: ExprFillDefaultMode::Todo, + prefer_self_ty: false, }; pub(crate) const TEST_CONFIG_NO_SNIPPET_CAP: AssistConfig = AssistConfig { @@ -77,6 +79,7 @@ pub(crate) const TEST_CONFIG_NO_SNIPPET_CAP: AssistConfig = AssistConfig { term_search_borrowck: true, code_action_grouping: true, expr_fill_default: ExprFillDefaultMode::Todo, + prefer_self_ty: false, }; pub(crate) const TEST_CONFIG_IMPORT_ONE: AssistConfig = AssistConfig { @@ -97,6 +100,7 @@ pub(crate) const TEST_CONFIG_IMPORT_ONE: AssistConfig = AssistConfig { term_search_borrowck: true, code_action_grouping: true, expr_fill_default: ExprFillDefaultMode::Todo, + prefer_self_ty: false, }; pub(crate) fn with_single_file(text: &str) -> (RootDatabase, EditionedFileId) { @@ -113,6 +117,23 @@ pub(crate) fn check_assist( check(assist, ra_fixture_before, ExpectedResult::After(&ra_fixture_after), None); } +#[track_caller] +pub(crate) fn check_assist_with_config( + assist: Handler, + config: AssistConfig, + #[rust_analyzer::rust_fixture] ra_fixture_before: &str, + #[rust_analyzer::rust_fixture] ra_fixture_after: &str, +) { + let ra_fixture_after = trim_indent(ra_fixture_after); + check_with_config( + config, + assist, + ra_fixture_before, + ExpectedResult::After(&ra_fixture_after), + None, + ); +} + #[track_caller] pub(crate) fn check_assist_no_snippet_cap( assist: Handler, diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/utils.rs b/src/tools/rust-analyzer/crates/ide-assists/src/utils.rs index ef6914fda1d5..1a91053f93ca 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/utils.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/utils.rs @@ -405,7 +405,7 @@ pub(crate) fn does_pat_variant_nested_or_literal(ctx: &AssistContext<'_>, pat: & } fn check_pat_variant_from_enum(ctx: &AssistContext<'_>, pat: &ast::Pat) -> bool { - ctx.sema.type_of_pat(pat).is_none_or(|ty: hir::TypeInfo| { + ctx.sema.type_of_pat(pat).is_none_or(|ty: hir::TypeInfo<'_>| { ty.adjusted().as_adt().is_some_and(|adt| matches!(adt, hir::Adt::Enum(_))) }) } @@ -780,9 +780,9 @@ pub(crate) fn add_method_to_adt( } #[derive(Debug)] -pub(crate) struct ReferenceConversion { +pub(crate) struct ReferenceConversion<'db> { conversion: ReferenceConversionType, - ty: hir::Type, + ty: hir::Type<'db>, impls_deref: bool, } @@ -802,10 +802,10 @@ enum ReferenceConversionType { Result, } -impl ReferenceConversion { +impl<'db> ReferenceConversion<'db> { pub(crate) fn convert_type( &self, - db: &dyn HirDatabase, + db: &'db dyn HirDatabase, display_target: DisplayTarget, ) -> ast::Type { let ty = match self.conversion { @@ -878,11 +878,11 @@ impl ReferenceConversion { // FIXME: It should return a new hir::Type, but currently constructing new types is too cumbersome // and all users of this function operate on string type names, so they can do the conversion // itself themselves. -pub(crate) fn convert_reference_type( - ty: hir::Type, - db: &RootDatabase, - famous_defs: &FamousDefs<'_, '_>, -) -> Option { +pub(crate) fn convert_reference_type<'db>( + ty: hir::Type<'db>, + db: &'db RootDatabase, + famous_defs: &FamousDefs<'_, 'db>, +) -> Option> { handle_copy(&ty, db) .or_else(|| handle_as_ref_str(&ty, db, famous_defs)) .or_else(|| handle_as_ref_slice(&ty, db, famous_defs)) @@ -892,18 +892,21 @@ pub(crate) fn convert_reference_type( .map(|(conversion, impls_deref)| ReferenceConversion { ty, conversion, impls_deref }) } -fn could_deref_to_target(ty: &hir::Type, target: &hir::Type, db: &dyn HirDatabase) -> bool { +fn could_deref_to_target(ty: &hir::Type<'_>, target: &hir::Type<'_>, db: &dyn HirDatabase) -> bool { let ty_ref = ty.add_reference(hir::Mutability::Shared); let target_ref = target.add_reference(hir::Mutability::Shared); ty_ref.could_coerce_to(db, &target_ref) } -fn handle_copy(ty: &hir::Type, db: &dyn HirDatabase) -> Option<(ReferenceConversionType, bool)> { +fn handle_copy( + ty: &hir::Type<'_>, + db: &dyn HirDatabase, +) -> Option<(ReferenceConversionType, bool)> { ty.is_copy(db).then_some((ReferenceConversionType::Copy, true)) } fn handle_as_ref_str( - ty: &hir::Type, + ty: &hir::Type<'_>, db: &dyn HirDatabase, famous_defs: &FamousDefs<'_, '_>, ) -> Option<(ReferenceConversionType, bool)> { @@ -914,7 +917,7 @@ fn handle_as_ref_str( } fn handle_as_ref_slice( - ty: &hir::Type, + ty: &hir::Type<'_>, db: &dyn HirDatabase, famous_defs: &FamousDefs<'_, '_>, ) -> Option<(ReferenceConversionType, bool)> { @@ -928,7 +931,7 @@ fn handle_as_ref_slice( } fn handle_dereferenced( - ty: &hir::Type, + ty: &hir::Type<'_>, db: &dyn HirDatabase, famous_defs: &FamousDefs<'_, '_>, ) -> Option<(ReferenceConversionType, bool)> { @@ -941,7 +944,7 @@ fn handle_dereferenced( } fn handle_option_as_ref( - ty: &hir::Type, + ty: &hir::Type<'_>, db: &dyn HirDatabase, famous_defs: &FamousDefs<'_, '_>, ) -> Option<(ReferenceConversionType, bool)> { @@ -953,7 +956,7 @@ fn handle_option_as_ref( } fn handle_result_as_ref( - ty: &hir::Type, + ty: &hir::Type<'_>, db: &dyn HirDatabase, famous_defs: &FamousDefs<'_, '_>, ) -> Option<(ReferenceConversionType, bool)> { diff --git a/src/tools/rust-analyzer/crates/ide-assists/src/utils/gen_trait_fn_body.rs b/src/tools/rust-analyzer/crates/ide-assists/src/utils/gen_trait_fn_body.rs index 4ea56dc46aaa..c58bdd9e8ede 100644 --- a/src/tools/rust-analyzer/crates/ide-assists/src/utils/gen_trait_fn_body.rs +++ b/src/tools/rust-analyzer/crates/ide-assists/src/utils/gen_trait_fn_body.rs @@ -17,7 +17,7 @@ pub(crate) fn gen_trait_fn_body( func: &ast::Fn, trait_path: &ast::Path, adt: &ast::Adt, - trait_ref: Option, + trait_ref: Option>, ) -> Option<()> { match trait_path.segment()?.name_ref()?.text().as_str() { "Clone" => gen_clone_impl(adt, func), @@ -405,7 +405,7 @@ fn gen_hash_impl(adt: &ast::Adt, func: &ast::Fn) -> Option<()> { } /// Generate a `PartialEq` impl based on the fields and members of the target type. -fn gen_partial_eq(adt: &ast::Adt, func: &ast::Fn, trait_ref: Option) -> Option<()> { +fn gen_partial_eq(adt: &ast::Adt, func: &ast::Fn, trait_ref: Option>) -> Option<()> { stdx::always!(func.name().is_some_and(|name| name.text() == "eq")); fn gen_eq_chain(expr: Option, cmp: ast::Expr) -> Option { match expr { @@ -599,7 +599,7 @@ fn gen_partial_eq(adt: &ast::Adt, func: &ast::Fn, trait_ref: Option) - Some(()) } -fn gen_partial_ord(adt: &ast::Adt, func: &ast::Fn, trait_ref: Option) -> Option<()> { +fn gen_partial_ord(adt: &ast::Adt, func: &ast::Fn, trait_ref: Option>) -> Option<()> { stdx::always!(func.name().is_some_and(|name| name.text() == "partial_cmp")); fn gen_partial_eq_match(match_target: ast::Expr) -> Option { let mut arms = vec![]; diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions.rs index 5d68aca9e615..65072d936f63 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions.rs @@ -161,7 +161,11 @@ impl Completions { item.add_to(self, ctx.db); } - pub(crate) fn add_expr(&mut self, ctx: &CompletionContext<'_>, expr: &hir::term_search::Expr) { + pub(crate) fn add_expr( + &mut self, + ctx: &CompletionContext<'_>, + expr: &hir::term_search::Expr<'_>, + ) { if let Some(item) = render_expr(ctx, expr) { item.add_to(self, ctx.db) } @@ -170,7 +174,7 @@ impl Completions { pub(crate) fn add_crate_roots( &mut self, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, ) { ctx.process_all_names(&mut |name, res, doc_aliases| match res { ScopeDef::ModuleDef(hir::ModuleDef::Module(m)) if m.is_crate_root() => { @@ -183,7 +187,7 @@ impl Completions { pub(crate) fn add_path_resolution( &mut self, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, local_name: hir::Name, resolution: hir::ScopeDef, doc_aliases: Vec, @@ -232,7 +236,7 @@ impl Completions { pub(crate) fn add_enum_variants( &mut self, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, e: hir::Enum, ) { if !ctx.check_stability_and_hidden(e) { @@ -246,7 +250,7 @@ impl Completions { pub(crate) fn add_module( &mut self, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, module: hir::Module, local_name: hir::Name, doc_aliases: Vec, @@ -263,7 +267,7 @@ impl Completions { pub(crate) fn add_macro( &mut self, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, mac: hir::Macro, local_name: hir::Name, ) { @@ -286,7 +290,7 @@ impl Completions { pub(crate) fn add_function( &mut self, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, func: hir::Function, local_name: Option, ) { @@ -312,7 +316,7 @@ impl Completions { pub(crate) fn add_method( &mut self, ctx: &CompletionContext<'_>, - dot_access: &DotAccess, + dot_access: &DotAccess<'_>, func: hir::Function, receiver: Option, local_name: Option, @@ -340,7 +344,7 @@ impl Completions { pub(crate) fn add_method_with_import( &mut self, ctx: &CompletionContext<'_>, - dot_access: &DotAccess, + dot_access: &DotAccess<'_>, func: hir::Function, import: LocatedImport, ) { @@ -407,7 +411,7 @@ impl Completions { pub(crate) fn add_qualified_enum_variant( &mut self, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, variant: hir::Variant, path: hir::ModPath, ) { @@ -424,7 +428,7 @@ impl Completions { pub(crate) fn add_enum_variant( &mut self, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, variant: hir::Variant, local_name: Option, ) { @@ -447,10 +451,10 @@ impl Completions { pub(crate) fn add_field( &mut self, ctx: &CompletionContext<'_>, - dot_access: &DotAccess, + dot_access: &DotAccess<'_>, receiver: Option, field: hir::Field, - ty: &hir::Type, + ty: &hir::Type<'_>, ) { let is_private_editable = match ctx.is_visible(&field) { Visible::Yes => false, @@ -471,7 +475,7 @@ impl Completions { pub(crate) fn add_struct_literal( &mut self, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, strukt: hir::Struct, path: Option, local_name: Option, @@ -518,7 +522,7 @@ impl Completions { ctx: &CompletionContext<'_>, receiver: Option, field: usize, - ty: &hir::Type, + ty: &hir::Type<'_>, ) { // Only used for (unnamed) tuples, whose all fields *are* stable. No need to check // stability here. @@ -550,7 +554,7 @@ impl Completions { &mut self, ctx: &CompletionContext<'_>, pattern_ctx: &PatternContext, - path_ctx: Option<&PathCompletionCtx>, + path_ctx: Option<&PathCompletionCtx<'_>>, variant: hir::Variant, local_name: Option, ) { @@ -704,7 +708,7 @@ pub(super) fn complete_name( pub(super) fn complete_name_ref( acc: &mut Completions, ctx: &CompletionContext<'_>, - NameRefContext { nameref, kind }: &NameRefContext, + NameRefContext { nameref, kind }: &NameRefContext<'_>, ) { match kind { NameRefKind::Path(path_ctx) => { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/attribute.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/attribute.rs index 705402c785a2..c542e140df54 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/attribute.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/attribute.rs @@ -86,7 +86,7 @@ pub(crate) fn complete_known_attribute_input( pub(crate) fn complete_attribute_path( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx, + path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx<'_>, &AttrCtx { kind, annotated_item_kind, ref derive_helpers }: &AttrCtx, ) { let is_inner = kind == AttrKind::Inner; diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/attribute/derive.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/attribute/derive.rs index 2fc07e013828..267d92b6c090 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/attribute/derive.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/attribute/derive.rs @@ -13,7 +13,7 @@ use crate::{ pub(crate) fn complete_derive_path( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx, + path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx<'_>, existing_derives: &ExistingDerives, ) { let core = ctx.famous_defs().core(); diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/dot.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/dot.rs index 4f21136d214e..5340d65a142d 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/dot.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/dot.rs @@ -18,7 +18,7 @@ use crate::{ pub(crate) fn complete_dot( acc: &mut Completions, ctx: &CompletionContext<'_>, - dot_access: &DotAccess, + dot_access: &DotAccess<'_>, ) { let receiver_ty = match dot_access { DotAccess { receiver_ty: Some(receiver_ty), .. } => &receiver_ty.original, @@ -130,8 +130,8 @@ pub(crate) fn complete_dot( pub(crate) fn complete_undotted_self( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, - expr_ctx: &PathExprCtx, + path_ctx: &PathCompletionCtx<'_>, + expr_ctx: &PathExprCtx<'_>, ) { if !ctx.config.enable_self_on_the_fly { return; @@ -198,9 +198,9 @@ pub(crate) fn complete_undotted_self( fn complete_fields( acc: &mut Completions, ctx: &CompletionContext<'_>, - receiver: &hir::Type, - mut named_field: impl FnMut(&mut Completions, hir::Field, hir::Type), - mut tuple_index: impl FnMut(&mut Completions, usize, hir::Type), + receiver: &hir::Type<'_>, + mut named_field: impl FnMut(&mut Completions, hir::Field, hir::Type<'_>), + mut tuple_index: impl FnMut(&mut Completions, usize, hir::Type<'_>), is_field_access: bool, is_method_access_with_parens: bool, ) { @@ -230,7 +230,7 @@ fn complete_fields( fn complete_methods( ctx: &CompletionContext<'_>, - receiver: &hir::Type, + receiver: &hir::Type<'_>, traits_in_scope: &FxHashSet, f: impl FnMut(hir::Function), ) { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/expr.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/expr.rs index 7fbd1fbc1af4..2133291b1de1 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/expr.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/expr.rs @@ -47,8 +47,8 @@ where pub(crate) fn complete_expr_path( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx, - expr_ctx: &PathExprCtx, + path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx<'_>, + expr_ctx: &PathExprCtx<'_>, ) { let _p = tracing::info_span!("complete_expr_path").entered(); if !ctx.qualifier_ctx.none() { @@ -145,10 +145,16 @@ pub(crate) fn complete_expr_path( }); match resolution { hir::PathResolution::Def(hir::ModuleDef::Module(module)) => { - // Set visible_from to None so private items are returned. - // They will be possibly filtered out in add_path_resolution() - // via def_is_visible(). - let module_scope = module.scope(ctx.db, None); + let visible_from = if ctx.config.enable_private_editable { + // Set visible_from to None so private items are returned. + // They will be possibly filtered out in add_path_resolution() + // via def_is_visible(). + None + } else { + Some(ctx.module) + }; + + let module_scope = module.scope(ctx.db, visible_from); for (name, def) in module_scope { if scope_def_applicable(def) { acc.add_path_resolution( diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/field.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/field.rs index 1441b0e3a01a..26afa9c8ad96 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/field.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/field.rs @@ -8,7 +8,7 @@ use crate::{ pub(crate) fn complete_field_list_tuple_variant( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, ) { if ctx.qualifier_ctx.vis_node.is_some() { } else if let PathCompletionCtx { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/flyimport.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/flyimport.rs index a74756138090..dad8a76de87d 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/flyimport.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/flyimport.rs @@ -111,7 +111,7 @@ use crate::{ pub(crate) fn import_on_the_fly_path( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, ) -> Option<()> { if !ctx.config.enable_imports_on_the_fly { return None; @@ -175,7 +175,7 @@ pub(crate) fn import_on_the_fly_pat( pub(crate) fn import_on_the_fly_dot( acc: &mut Completions, ctx: &CompletionContext<'_>, - dot_access: &DotAccess, + dot_access: &DotAccess<'_>, ) -> Option<()> { if !ctx.config.enable_imports_on_the_fly { return None; @@ -203,8 +203,8 @@ pub(crate) fn import_on_the_fly_dot( fn import_on_the_fly( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx @ PathCompletionCtx { kind, .. }: &PathCompletionCtx, - import_assets: ImportAssets, + path_ctx @ PathCompletionCtx { kind, .. }: &PathCompletionCtx<'_>, + import_assets: ImportAssets<'_>, position: SyntaxNode, potential_import_name: String, ) -> Option<()> { @@ -290,7 +290,7 @@ fn import_on_the_fly_pat_( acc: &mut Completions, ctx: &CompletionContext<'_>, pattern_ctx: &PatternContext, - import_assets: ImportAssets, + import_assets: ImportAssets<'_>, position: SyntaxNode, potential_import_name: String, ) -> Option<()> { @@ -335,8 +335,8 @@ fn import_on_the_fly_pat_( fn import_on_the_fly_method( acc: &mut Completions, ctx: &CompletionContext<'_>, - dot_access: &DotAccess, - import_assets: ImportAssets, + dot_access: &DotAccess<'_>, + import_assets: ImportAssets<'_>, position: SyntaxNode, potential_import_name: String, ) -> Option<()> { @@ -400,11 +400,11 @@ fn import_name(ctx: &CompletionContext<'_>) -> String { if token_kind.is_any_identifier() { ctx.token.to_string() } else { String::new() } } -fn import_assets_for_path( - ctx: &CompletionContext<'_>, +fn import_assets_for_path<'db>( + ctx: &CompletionContext<'db>, potential_import_name: &str, qualifier: Option, -) -> Option { +) -> Option> { let _p = tracing::info_span!("import_assets_for_path", ?potential_import_name, ?qualifier).entered(); diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/item_list.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/item_list.rs index 893997cee473..6c001bd16bfe 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/item_list.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/item_list.rs @@ -10,8 +10,8 @@ pub(crate) mod trait_impl; pub(crate) fn complete_item_list_in_expr( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, - expr_ctx: &PathExprCtx, + path_ctx: &PathCompletionCtx<'_>, + expr_ctx: &PathExprCtx<'_>, ) { if !expr_ctx.in_block_expr { return; @@ -25,7 +25,7 @@ pub(crate) fn complete_item_list_in_expr( pub(crate) fn complete_item_list( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx, + path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx<'_>, kind: &ItemListKind, ) { let _p = tracing::info_span!("complete_item_list").entered(); diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/item_list/trait_impl.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/item_list/trait_impl.rs index 58aead73fd6f..092219a058a1 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/item_list/trait_impl.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/item_list/trait_impl.rs @@ -122,7 +122,7 @@ fn complete_trait_impl_name( pub(crate) fn complete_trait_impl_item_by_name( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, name_ref: &Option, impl_: &Option, ) { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/pattern.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/pattern.rs index ea3511d31caf..62fae1cb2374 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/pattern.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/pattern.rs @@ -124,7 +124,7 @@ pub(crate) fn complete_pattern( pub(crate) fn complete_pattern_path( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx, + path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx<'_>, ) { match qualified { Qualified::With { resolution: Some(resolution), super_chain_len, .. } => { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/postfix.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/postfix.rs index 3cdf2112835d..d0023852acf9 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/postfix.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/postfix.rs @@ -11,6 +11,7 @@ use ide_db::{ text_edit::TextEdit, ty_filter::TryEnum, }; +use itertools::Either; use stdx::never; use syntax::{ SyntaxKind::{BLOCK_EXPR, EXPR_STMT, FOR_EXPR, IF_EXPR, LOOP_EXPR, STMT_LIST, WHILE_EXPR}, @@ -28,7 +29,7 @@ use crate::{ pub(crate) fn complete_postfix( acc: &mut Completions, ctx: &CompletionContext<'_>, - dot_access: &DotAccess, + dot_access: &DotAccess<'_>, ) { if !ctx.config.enable_postfix_completions { return; @@ -86,98 +87,10 @@ pub(crate) fn complete_postfix( } } - let try_enum = TryEnum::from_ty(&ctx.sema, &receiver_ty.strip_references()); - if let Some(try_enum) = &try_enum { - match try_enum { - TryEnum::Result => { - postfix_snippet( - "ifl", - "if let Ok {}", - &format!("if let Ok($1) = {receiver_text} {{\n $0\n}}"), - ) - .add_to(acc, ctx.db); - - postfix_snippet( - "lete", - "let Ok else {}", - &format!("let Ok($1) = {receiver_text} else {{\n $2\n}};\n$0"), - ) - .add_to(acc, ctx.db); - - postfix_snippet( - "while", - "while let Ok {}", - &format!("while let Ok($1) = {receiver_text} {{\n $0\n}}"), - ) - .add_to(acc, ctx.db); - } - TryEnum::Option => { - postfix_snippet( - "ifl", - "if let Some {}", - &format!("if let Some($1) = {receiver_text} {{\n $0\n}}"), - ) - .add_to(acc, ctx.db); - - postfix_snippet( - "lete", - "let Some else {}", - &format!("let Some($1) = {receiver_text} else {{\n $2\n}};\n$0"), - ) - .add_to(acc, ctx.db); - - postfix_snippet( - "while", - "while let Some {}", - &format!("while let Some($1) = {receiver_text} {{\n $0\n}}"), - ) - .add_to(acc, ctx.db); - } - } - } else if receiver_ty.is_bool() || receiver_ty.is_unknown() { - postfix_snippet("if", "if expr {}", &format!("if {receiver_text} {{\n $0\n}}")) - .add_to(acc, ctx.db); - postfix_snippet("while", "while expr {}", &format!("while {receiver_text} {{\n $0\n}}")) - .add_to(acc, ctx.db); - postfix_snippet("not", "!expr", &format!("!{receiver_text}")).add_to(acc, ctx.db); - } else if let Some(trait_) = ctx.famous_defs().core_iter_IntoIterator() { - if receiver_ty.impls_trait(ctx.db, trait_, &[]) { - postfix_snippet( - "for", - "for ele in expr {}", - &format!("for ele in {receiver_text} {{\n $0\n}}"), - ) - .add_to(acc, ctx.db); - } - } - postfix_snippet("ref", "&expr", &format!("&{receiver_text}")).add_to(acc, ctx.db); postfix_snippet("refm", "&mut expr", &format!("&mut {receiver_text}")).add_to(acc, ctx.db); postfix_snippet("deref", "*expr", &format!("*{receiver_text}")).add_to(acc, ctx.db); - let mut block_should_be_wrapped = true; - if dot_receiver.syntax().kind() == BLOCK_EXPR { - block_should_be_wrapped = false; - if let Some(parent) = dot_receiver.syntax().parent() { - if matches!(parent.kind(), IF_EXPR | WHILE_EXPR | LOOP_EXPR | FOR_EXPR) { - block_should_be_wrapped = true; - } - } - }; - let unsafe_completion_string = if block_should_be_wrapped { - format!("unsafe {{ {receiver_text} }}") - } else { - format!("unsafe {receiver_text}") - }; - postfix_snippet("unsafe", "unsafe {}", &unsafe_completion_string).add_to(acc, ctx.db); - - let const_completion_string = if block_should_be_wrapped { - format!("const {{ {receiver_text} }}") - } else { - format!("const {receiver_text}") - }; - postfix_snippet("const", "const {}", &const_completion_string).add_to(acc, ctx.db); - // The rest of the postfix completions create an expression that moves an argument, // so it's better to consider references now to avoid breaking the compilation @@ -195,37 +108,6 @@ pub(crate) fn complete_postfix( add_custom_postfix_completions(acc, ctx, &postfix_snippet, &receiver_text); } - match try_enum { - Some(try_enum) => match try_enum { - TryEnum::Result => { - postfix_snippet( - "match", - "match expr {}", - &format!("match {receiver_text} {{\n Ok(${{1:_}}) => {{$2}},\n Err(${{3:_}}) => {{$0}},\n}}"), - ) - .add_to(acc, ctx.db); - } - TryEnum::Option => { - postfix_snippet( - "match", - "match expr {}", - &format!( - "match {receiver_text} {{\n Some(${{1:_}}) => {{$2}},\n None => {{$0}},\n}}" - ), - ) - .add_to(acc, ctx.db); - } - }, - None => { - postfix_snippet( - "match", - "match expr {}", - &format!("match {receiver_text} {{\n ${{1:_}} => {{$0}},\n}}"), - ) - .add_to(acc, ctx.db); - } - } - postfix_snippet("box", "Box::new(expr)", &format!("Box::new({receiver_text})")) .add_to(acc, ctx.db); postfix_snippet("dbg", "dbg!(expr)", &format!("dbg!({receiver_text})")).add_to(acc, ctx.db); // fixme @@ -233,15 +115,183 @@ pub(crate) fn complete_postfix( postfix_snippet("call", "function(expr)", &format!("${{1}}({receiver_text})")) .add_to(acc, ctx.db); - if let Some(parent) = dot_receiver_including_refs.syntax().parent().and_then(|p| p.parent()) { - if matches!(parent.kind(), STMT_LIST | EXPR_STMT) { - postfix_snippet("let", "let", &format!("let $0 = {receiver_text};")) - .add_to(acc, ctx.db); - postfix_snippet("letm", "let mut", &format!("let mut $0 = {receiver_text};")) - .add_to(acc, ctx.db); + let try_enum = TryEnum::from_ty(&ctx.sema, &receiver_ty.strip_references()); + let mut is_in_cond = false; + if let Some(parent) = dot_receiver_including_refs.syntax().parent() { + if let Some(second_ancestor) = parent.parent() { + let sec_ancestor_kind = second_ancestor.kind(); + if let Some(expr) = >::cast(second_ancestor) { + is_in_cond = match expr { + Either::Left(it) => it.condition().is_some_and(|cond| *cond.syntax() == parent), + Either::Right(it) => { + it.condition().is_some_and(|cond| *cond.syntax() == parent) + } + } + } + match &try_enum { + Some(try_enum) if is_in_cond => match try_enum { + TryEnum::Result => { + postfix_snippet( + "let", + "let Ok(_)", + &format!("let Ok($0) = {receiver_text}"), + ) + .add_to(acc, ctx.db); + postfix_snippet( + "letm", + "let Ok(mut _)", + &format!("let Ok(mut $0) = {receiver_text}"), + ) + .add_to(acc, ctx.db); + } + TryEnum::Option => { + postfix_snippet( + "let", + "let Some(_)", + &format!("let Some($0) = {receiver_text}"), + ) + .add_to(acc, ctx.db); + postfix_snippet( + "letm", + "let Some(mut _)", + &format!("let Some(mut $0) = {receiver_text}"), + ) + .add_to(acc, ctx.db); + } + }, + _ if matches!(sec_ancestor_kind, STMT_LIST | EXPR_STMT) => { + postfix_snippet("let", "let", &format!("let $0 = {receiver_text};")) + .add_to(acc, ctx.db); + postfix_snippet("letm", "let mut", &format!("let mut $0 = {receiver_text};")) + .add_to(acc, ctx.db); + } + _ => (), + } } } + if !is_in_cond { + match try_enum { + Some(try_enum) => match try_enum { + TryEnum::Result => { + postfix_snippet( + "match", + "match expr {}", + &format!("match {receiver_text} {{\n Ok(${{1:_}}) => {{$2}},\n Err(${{3:_}}) => {{$0}},\n}}"), + ) + .add_to(acc, ctx.db); + } + TryEnum::Option => { + postfix_snippet( + "match", + "match expr {}", + &format!( + "match {receiver_text} {{\n Some(${{1:_}}) => {{$2}},\n None => {{$0}},\n}}" + ), + ) + .add_to(acc, ctx.db); + } + }, + None => { + postfix_snippet( + "match", + "match expr {}", + &format!("match {receiver_text} {{\n ${{1:_}} => {{$0}},\n}}"), + ) + .add_to(acc, ctx.db); + } + } + if let Some(try_enum) = &try_enum { + match try_enum { + TryEnum::Result => { + postfix_snippet( + "ifl", + "if let Ok {}", + &format!("if let Ok($1) = {receiver_text} {{\n $0\n}}"), + ) + .add_to(acc, ctx.db); + + postfix_snippet( + "lete", + "let Ok else {}", + &format!("let Ok($1) = {receiver_text} else {{\n $2\n}};\n$0"), + ) + .add_to(acc, ctx.db); + + postfix_snippet( + "while", + "while let Ok {}", + &format!("while let Ok($1) = {receiver_text} {{\n $0\n}}"), + ) + .add_to(acc, ctx.db); + } + TryEnum::Option => { + postfix_snippet( + "ifl", + "if let Some {}", + &format!("if let Some($1) = {receiver_text} {{\n $0\n}}"), + ) + .add_to(acc, ctx.db); + + postfix_snippet( + "lete", + "let Some else {}", + &format!("let Some($1) = {receiver_text} else {{\n $2\n}};\n$0"), + ) + .add_to(acc, ctx.db); + + postfix_snippet( + "while", + "while let Some {}", + &format!("while let Some($1) = {receiver_text} {{\n $0\n}}"), + ) + .add_to(acc, ctx.db); + } + } + } else if receiver_ty.is_bool() || receiver_ty.is_unknown() { + postfix_snippet("if", "if expr {}", &format!("if {receiver_text} {{\n $0\n}}")) + .add_to(acc, ctx.db); + postfix_snippet( + "while", + "while expr {}", + &format!("while {receiver_text} {{\n $0\n}}"), + ) + .add_to(acc, ctx.db); + postfix_snippet("not", "!expr", &format!("!{receiver_text}")).add_to(acc, ctx.db); + } else if let Some(trait_) = ctx.famous_defs().core_iter_IntoIterator() { + if receiver_ty.impls_trait(ctx.db, trait_, &[]) { + postfix_snippet( + "for", + "for ele in expr {}", + &format!("for ele in {receiver_text} {{\n $0\n}}"), + ) + .add_to(acc, ctx.db); + } + } + } + + let mut block_should_be_wrapped = true; + if dot_receiver.syntax().kind() == BLOCK_EXPR { + block_should_be_wrapped = false; + if let Some(parent) = dot_receiver.syntax().parent() { + if matches!(parent.kind(), IF_EXPR | WHILE_EXPR | LOOP_EXPR | FOR_EXPR) { + block_should_be_wrapped = true; + } + } + }; + { + let (open_brace, close_brace) = + if block_should_be_wrapped { ("{ ", " }") } else { ("", "") }; + let (open_paren, close_paren) = if is_in_cond { ("(", ")") } else { ("", "") }; + let unsafe_completion_string = + format!("{open_paren}unsafe {open_brace}{receiver_text}{close_brace}{close_paren}"); + postfix_snippet("unsafe", "unsafe {}", &unsafe_completion_string).add_to(acc, ctx.db); + + let const_completion_string = + format!("{open_paren}const {open_brace}{receiver_text}{close_brace}{close_paren}"); + postfix_snippet("const", "const {}", &const_completion_string).add_to(acc, ctx.db); + } + if let ast::Expr::Literal(literal) = dot_receiver_including_refs.clone() { if let Some(literal_text) = ast::String::cast(literal.token()) { add_format_like_completions(acc, ctx, &dot_receiver_including_refs, cap, &literal_text); @@ -567,6 +617,54 @@ fn main() { ); } + #[test] + fn option_iflet_cond() { + check( + r#" +//- minicore: option +fn main() { + let bar = Some(true); + if bar.$0 +} +"#, + expect![[r#" + me and(…) fn(self, Option) -> Option + me as_ref() const fn(&self) -> Option<&T> + me ok_or(…) const fn(self, E) -> Result + me unwrap() const fn(self) -> T + me unwrap_or(…) fn(self, T) -> T + sn box Box::new(expr) + sn call function(expr) + sn const const {} + sn dbg dbg!(expr) + sn dbgr dbg!(&expr) + sn deref *expr + sn let let Some(_) + sn letm let Some(mut _) + sn ref &expr + sn refm &mut expr + sn return return expr + sn unsafe unsafe {} + "#]], + ); + check_edit( + "let", + r#" +//- minicore: option +fn main() { + let bar = Some(true); + if bar.$0 +} +"#, + r#" +fn main() { + let bar = Some(true); + if let Some($0) = bar +} +"#, + ); + } + #[test] fn option_letelse() { check_edit( diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/record.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/record.rs index c18aab007b2c..36f38a70db63 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/record.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/record.rs @@ -88,7 +88,7 @@ pub(crate) fn complete_record_expr_fields( pub(crate) fn add_default_update( acc: &mut Completions, ctx: &CompletionContext<'_>, - ty: Option, + ty: Option>, ) { let default_trait = ctx.famous_defs().core_default_Default(); let impls_default_trait = default_trait @@ -117,7 +117,7 @@ pub(crate) fn add_default_update( fn complete_fields( acc: &mut Completions, ctx: &CompletionContext<'_>, - missing_fields: Vec<(hir::Field, hir::Type)>, + missing_fields: Vec<(hir::Field, hir::Type<'_>)>, ) { for (field, ty) in missing_fields { // This should call something else, we shouldn't be synthesizing a DotAccess here diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/snippet.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/snippet.rs index 31aae1167622..ead9852eff53 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/snippet.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/snippet.rs @@ -11,8 +11,8 @@ use crate::{ pub(crate) fn complete_expr_snippet( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, - &PathExprCtx { in_block_expr, .. }: &PathExprCtx, + path_ctx: &PathCompletionCtx<'_>, + &PathExprCtx { in_block_expr, .. }: &PathExprCtx<'_>, ) { if !matches!(path_ctx.qualified, Qualified::No) { return; @@ -51,7 +51,7 @@ macro_rules! $1 { pub(crate) fn complete_item_snippet( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, kind: &ItemListKind, ) { if !matches!(path_ctx.qualified, Qualified::No) { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/type.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/type.rs index 79db705af495..7c38c7d8ce44 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/type.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/type.rs @@ -12,7 +12,7 @@ use crate::{ pub(crate) fn complete_type_path( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx, + path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx<'_>, location: &TypeLocation, ) { let _p = tracing::info_span!("complete_type_path").entered(); @@ -220,7 +220,7 @@ pub(crate) fn complete_type_path( pub(crate) fn complete_ascribed_type( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, ascription: &TypeAscriptionTarget, ) -> Option<()> { if !path_ctx.is_trivial_path() { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/use_.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/use_.rs index 4d6d0b758a38..d2ab193ec3df 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/use_.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/use_.rs @@ -13,7 +13,7 @@ use crate::{ pub(crate) fn complete_use_path( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx @ PathCompletionCtx { qualified, use_tree_parent, .. }: &PathCompletionCtx, + path_ctx @ PathCompletionCtx { qualified, use_tree_parent, .. }: &PathCompletionCtx<'_>, name_ref: &Option, ) { match qualified { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/completions/vis.rs b/src/tools/rust-analyzer/crates/ide-completion/src/completions/vis.rs index d15c35ac8499..38761f77a2c5 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/completions/vis.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/completions/vis.rs @@ -8,7 +8,7 @@ use crate::{ pub(crate) fn complete_vis_path( acc: &mut Completions, ctx: &CompletionContext<'_>, - path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx, + path_ctx @ PathCompletionCtx { qualified, .. }: &PathCompletionCtx<'_>, &has_in_token: &bool, ) { match qualified { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/context.rs b/src/tools/rust-analyzer/crates/ide-completion/src/context.rs index 5287627790ae..cfd7f80d40b3 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/context.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/context.rs @@ -65,13 +65,13 @@ impl QualifierCtx { /// The state of the path we are currently completing. #[derive(Debug)] -pub(crate) struct PathCompletionCtx { +pub(crate) struct PathCompletionCtx<'db> { /// If this is a call with () already there (or {} in case of record patterns) pub(crate) has_call_parens: bool, /// If this has a macro call bang ! pub(crate) has_macro_bang: bool, /// The qualifier of the current path. - pub(crate) qualified: Qualified, + pub(crate) qualified: Qualified<'db>, /// The parent of the path we are completing. pub(crate) parent: Option, #[allow(dead_code)] @@ -79,14 +79,14 @@ pub(crate) struct PathCompletionCtx { pub(crate) path: ast::Path, /// The path of which we are completing the segment in the original file pub(crate) original_path: Option, - pub(crate) kind: PathKind, + pub(crate) kind: PathKind<'db>, /// Whether the path segment has type args or not. pub(crate) has_type_args: bool, /// Whether the qualifier comes from a use tree parent or not pub(crate) use_tree_parent: bool, } -impl PathCompletionCtx { +impl PathCompletionCtx<'_> { pub(crate) fn is_trivial_path(&self) -> bool { matches!( self, @@ -104,9 +104,9 @@ impl PathCompletionCtx { /// The kind of path we are completing right now. #[derive(Debug, PartialEq, Eq)] -pub(crate) enum PathKind { +pub(crate) enum PathKind<'db> { Expr { - expr_ctx: PathExprCtx, + expr_ctx: PathExprCtx<'db>, }, Type { location: TypeLocation, @@ -140,7 +140,7 @@ pub(crate) struct AttrCtx { } #[derive(Debug, PartialEq, Eq)] -pub(crate) struct PathExprCtx { +pub(crate) struct PathExprCtx<'db> { pub(crate) in_block_expr: bool, pub(crate) in_breakable: BreakableKind, pub(crate) after_if_expr: bool, @@ -152,7 +152,7 @@ pub(crate) struct PathExprCtx { /// The surrounding RecordExpression we are completing a functional update pub(crate) is_func_update: Option, pub(crate) self_param: Option, - pub(crate) innermost_ret_ty: Option, + pub(crate) innermost_ret_ty: Option>, pub(crate) impl_: Option, /// Whether this expression occurs in match arm guard position: before the /// fat arrow token @@ -241,7 +241,7 @@ pub(crate) enum ItemListKind { } #[derive(Debug)] -pub(crate) enum Qualified { +pub(crate) enum Qualified<'db> { No, With { path: ast::Path, @@ -260,7 +260,7 @@ pub(crate) enum Qualified { }, /// <_>:: TypeAnchor { - ty: Option, + ty: Option>, trait_: Option, }, /// Whether the path is an absolute path @@ -341,17 +341,17 @@ pub(crate) enum NameKind { /// The state of the NameRef we are completing. #[derive(Debug)] -pub(crate) struct NameRefContext { +pub(crate) struct NameRefContext<'db> { /// NameRef syntax in the original file pub(crate) nameref: Option, - pub(crate) kind: NameRefKind, + pub(crate) kind: NameRefKind<'db>, } /// The kind of the NameRef we are completing. #[derive(Debug)] -pub(crate) enum NameRefKind { - Path(PathCompletionCtx), - DotAccess(DotAccess), +pub(crate) enum NameRefKind<'db> { + Path(PathCompletionCtx<'db>), + DotAccess(DotAccess<'db>), /// Position where we are only interested in keyword completions Keyword(ast::Item), /// The record expression this nameref is a field of and whether a dot precedes the completion identifier. @@ -365,9 +365,9 @@ pub(crate) enum NameRefKind { /// The identifier we are currently completing. #[derive(Debug)] -pub(crate) enum CompletionAnalysis { +pub(crate) enum CompletionAnalysis<'db> { Name(NameContext), - NameRef(NameRefContext), + NameRef(NameRefContext<'db>), Lifetime(LifetimeContext), /// The string the cursor is currently inside String { @@ -386,9 +386,9 @@ pub(crate) enum CompletionAnalysis { /// Information about the field or method access we are completing. #[derive(Debug)] -pub(crate) struct DotAccess { +pub(crate) struct DotAccess<'db> { pub(crate) receiver: Option, - pub(crate) receiver_ty: Option, + pub(crate) receiver_ty: Option>, pub(crate) kind: DotAccessKind, pub(crate) ctx: DotAccessExprCtx, } @@ -457,7 +457,7 @@ pub(crate) struct CompletionContext<'a> { /// This is usually the parameter name of the function argument we are completing. pub(crate) expected_name: Option, /// The expected type of what we are completing. - pub(crate) expected_type: Option, + pub(crate) expected_type: Option>, pub(crate) qualifier_ctx: QualifierCtx, @@ -608,7 +608,7 @@ impl CompletionContext<'_> { pub(crate) fn iterate_path_candidates( &self, - ty: &hir::Type, + ty: &hir::Type<'_>, mut cb: impl FnMut(hir::AssocItem), ) { let mut seen = FxHashSet::default(); @@ -695,12 +695,12 @@ impl CompletionContext<'_> { } // CompletionContext construction -impl<'a> CompletionContext<'a> { +impl<'db> CompletionContext<'db> { pub(crate) fn new( - db: &'a RootDatabase, + db: &'db RootDatabase, position @ FilePosition { file_id, offset }: FilePosition, - config: &'a CompletionConfig<'a>, - ) -> Option<(CompletionContext<'a>, CompletionAnalysis)> { + config: &'db CompletionConfig<'db>, + ) -> Option<(CompletionContext<'db>, CompletionAnalysis<'db>)> { let _p = tracing::info_span!("CompletionContext::new").entered(); let sema = Semantics::new(db); diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/context/analysis.rs b/src/tools/rust-analyzer/crates/ide-completion/src/context/analysis.rs index 7a2230b3e361..6e3a76f346a8 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/context/analysis.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/context/analysis.rs @@ -38,9 +38,9 @@ struct ExpansionResult { derive_ctx: Option<(SyntaxNode, SyntaxNode, TextSize, ast::Attr)>, } -pub(super) struct AnalysisResult { - pub(super) analysis: CompletionAnalysis, - pub(super) expected: (Option, Option), +pub(super) struct AnalysisResult<'db> { + pub(super) analysis: CompletionAnalysis<'db>, + pub(super) expected: (Option>, Option), pub(super) qualifier_ctx: QualifierCtx, /// the original token of the expanded file pub(super) token: SyntaxToken, @@ -48,13 +48,13 @@ pub(super) struct AnalysisResult { pub(super) original_offset: TextSize, } -pub(super) fn expand_and_analyze( - sema: &Semantics<'_, RootDatabase>, +pub(super) fn expand_and_analyze<'db>( + sema: &Semantics<'db, RootDatabase>, original_file: InFile, speculative_file: SyntaxNode, offset: TextSize, original_token: &SyntaxToken, -) -> Option { +) -> Option> { // as we insert after the offset, right biased will *always* pick the identifier no matter // if there is an ident already typed or not let fake_ident_token = speculative_file.token_at_offset(offset).right_biased()?; @@ -432,12 +432,13 @@ fn expand( /// Fill the completion context, this is what does semantic reasoning about the surrounding context /// of the completion location. -fn analyze( - sema: &Semantics<'_, RootDatabase>, +fn analyze<'db>( + sema: &Semantics<'db, RootDatabase>, expansion_result: ExpansionResult, original_token: &SyntaxToken, self_token: &SyntaxToken, -) -> Option<(CompletionAnalysis, (Option, Option), QualifierCtx)> { +) -> Option<(CompletionAnalysis<'db>, (Option>, Option), QualifierCtx)> +{ let _p = tracing::info_span!("CompletionContext::analyze").entered(); let ExpansionResult { original_file, @@ -555,17 +556,17 @@ fn analyze( } /// Calculate the expected type and name of the cursor position. -fn expected_type_and_name( - sema: &Semantics<'_, RootDatabase>, +fn expected_type_and_name<'db>( + sema: &Semantics<'db, RootDatabase>, token: &SyntaxToken, name_like: &ast::NameLike, -) -> (Option, Option) { +) -> (Option>, Option) { let mut node = match token.parent() { Some(it) => it, None => return (None, None), }; - let strip_refs = |mut ty: Type| match name_like { + let strip_refs = |mut ty: Type<'db>| match name_like { ast::NameLike::NameRef(n) => { let p = match n.syntax().parent() { Some(it) => it, @@ -805,13 +806,13 @@ fn classify_name( Some(NameContext { name, kind }) } -fn classify_name_ref( - sema: &Semantics<'_, RootDatabase>, +fn classify_name_ref<'db>( + sema: &Semantics<'db, RootDatabase>, original_file: &SyntaxNode, name_ref: ast::NameRef, original_offset: TextSize, parent: SyntaxNode, -) -> Option<(NameRefContext, QualifierCtx)> { +) -> Option<(NameRefContext<'db>, QualifierCtx)> { let nameref = find_node_at_offset(original_file, original_offset); let make_res = |kind| (NameRefContext { nameref: nameref.clone(), kind }, Default::default()); diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/item.rs b/src/tools/rust-analyzer/crates/ide-completion/src/item.rs index 19cdef30bd96..dcaac3997b27 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/item.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/item.rs @@ -502,7 +502,7 @@ pub(crate) struct Builder { impl Builder { pub(crate) fn from_resolution( ctx: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, local_name: hir::Name, resolution: hir::ScopeDef, ) -> Self { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/render.rs b/src/tools/rust-analyzer/crates/ide-completion/src/render.rs index 00c0b470f987..c6b8af3c79a2 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/render.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/render.rs @@ -122,10 +122,10 @@ impl<'a> RenderContext<'a> { pub(crate) fn render_field( ctx: RenderContext<'_>, - dot_access: &DotAccess, + dot_access: &DotAccess<'_>, receiver: Option, field: hir::Field, - ty: &hir::Type, + ty: &hir::Type<'_>, ) -> CompletionItem { let db = ctx.db(); let is_deprecated = ctx.is_deprecated(field); @@ -204,7 +204,7 @@ pub(crate) fn render_tuple_field( ctx: RenderContext<'_>, receiver: Option, field: usize, - ty: &hir::Type, + ty: &hir::Type<'_>, ) -> CompletionItem { let mut item = CompletionItem::new( SymbolKind::Field, @@ -241,7 +241,7 @@ pub(crate) fn render_type_inference( pub(crate) fn render_path_resolution( ctx: RenderContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, local_name: hir::Name, resolution: ScopeDef, ) -> Builder { @@ -259,7 +259,7 @@ pub(crate) fn render_pattern_resolution( pub(crate) fn render_resolution_with_import( ctx: RenderContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, import_edit: LocatedImport, ) -> Option { let resolution = ScopeDef::from(import_edit.original_item); @@ -282,10 +282,10 @@ pub(crate) fn render_resolution_with_import_pat( pub(crate) fn render_expr( ctx: &CompletionContext<'_>, - expr: &hir::term_search::Expr, + expr: &hir::term_search::Expr<'_>, ) -> Option { let mut i = 1; - let mut snippet_formatter = |ty: &hir::Type| { + let mut snippet_formatter = |ty: &hir::Type<'_>| { let arg_name = ty .as_adt() .map(|adt| stdx::to_lower_snake_case(adt.name(ctx.db).as_str())) @@ -295,7 +295,7 @@ pub(crate) fn render_expr( res }; - let mut label_formatter = |ty: &hir::Type| { + let mut label_formatter = |ty: &hir::Type<'_>| { ty.as_adt() .map(|adt| stdx::to_lower_snake_case(adt.name(ctx.db).as_str())) .unwrap_or_else(|| String::from("...")) @@ -391,7 +391,7 @@ fn render_resolution_pat( fn render_resolution_path( ctx: RenderContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, local_name: hir::Name, import_to_add: Option, resolution: ScopeDef, @@ -460,7 +460,7 @@ fn render_resolution_path( } } - let mut set_item_relevance = |ty: Type| { + let mut set_item_relevance = |ty: Type<'_>| { if !ty.is_unknown() { item.detail(ty.display(db, krate).to_string()); } @@ -593,8 +593,8 @@ fn scope_def_is_deprecated(ctx: &RenderContext<'_>, resolution: ScopeDef) -> boo // FIXME: This checks types without possible coercions which some completions might want to do fn match_types( ctx: &CompletionContext<'_>, - ty1: &hir::Type, - ty2: &hir::Type, + ty1: &hir::Type<'_>, + ty2: &hir::Type<'_>, ) -> Option { if ty1 == ty2 { Some(CompletionRelevanceTypeMatch::Exact) @@ -607,7 +607,7 @@ fn match_types( fn compute_type_match( ctx: &CompletionContext<'_>, - completion_ty: &hir::Type, + completion_ty: &hir::Type<'_>, ) -> Option { let expected_type = ctx.expected_type.as_ref()?; @@ -626,7 +626,7 @@ fn compute_exact_name_match(ctx: &CompletionContext<'_>, completion_name: &str) fn compute_ref_match( ctx: &CompletionContext<'_>, - completion_ty: &hir::Type, + completion_ty: &hir::Type<'_>, ) -> Option { let expected_type = ctx.expected_type.as_ref()?; let expected_without_ref = expected_type.remove_ref(); @@ -658,8 +658,8 @@ fn compute_ref_match( fn path_ref_match( completion: &CompletionContext<'_>, - path_ctx: &PathCompletionCtx, - ty: &hir::Type, + path_ctx: &PathCompletionCtx<'_>, + ty: &hir::Type<'_>, item: &mut Builder, ) { if let Some(original_path) = &path_ctx.original_path { @@ -733,7 +733,7 @@ mod tests { ) { let mut actual = get_all_items(TEST_CONFIG, ra_fixture, None); actual.retain(|it| kinds.contains(&it.kind)); - actual.sort_by_key(|it| cmp::Reverse(it.relevance.score())); + actual.sort_by_key(|it| (cmp::Reverse(it.relevance.score()), it.label.primary.clone())); check_relevance_(actual, expect); } @@ -743,7 +743,7 @@ mod tests { actual.retain(|it| it.kind != CompletionItemKind::Snippet); actual.retain(|it| it.kind != CompletionItemKind::Keyword); actual.retain(|it| it.kind != CompletionItemKind::BuiltinType); - actual.sort_by_key(|it| cmp::Reverse(it.relevance.score())); + actual.sort_by_key(|it| (cmp::Reverse(it.relevance.score()), it.label.primary.clone())); check_relevance_(actual, expect); } @@ -824,9 +824,9 @@ fn main() { st dep::test_mod_b::Struct {…} dep::test_mod_b::Struct { } [type_could_unify] ex dep::test_mod_b::Struct { } [type_could_unify] st Struct Struct [type_could_unify+requires_import] + md dep [] fn main() fn() [] fn test(…) fn(Struct) [] - md dep [] st Struct Struct [requires_import] "#]], ); @@ -862,9 +862,9 @@ fn main() { "#, expect![[r#" un Union Union [type_could_unify+requires_import] + md dep [] fn main() fn() [] fn test(…) fn(Union) [] - md dep [] en Union Union [requires_import] "#]], ); @@ -900,9 +900,9 @@ fn main() { ev dep::test_mod_b::Enum::variant dep::test_mod_b::Enum::variant [type_could_unify] ex dep::test_mod_b::Enum::variant [type_could_unify] en Enum Enum [type_could_unify+requires_import] + md dep [] fn main() fn() [] fn test(…) fn(Enum) [] - md dep [] en Enum Enum [requires_import] "#]], ); @@ -937,9 +937,9 @@ fn main() { expect![[r#" ev dep::test_mod_b::Enum::Variant dep::test_mod_b::Enum::Variant [type_could_unify] ex dep::test_mod_b::Enum::Variant [type_could_unify] + md dep [] fn main() fn() [] fn test(…) fn(Enum) [] - md dep [] "#]], ); } @@ -967,9 +967,9 @@ fn main() { } "#, expect![[r#" + md dep [] fn main() fn() [] fn test(…) fn(fn(usize) -> i32) [] - md dep [] fn function fn(usize) -> i32 [requires_import] fn function(…) fn(isize) -> i32 [requires_import] "#]], @@ -1000,9 +1000,9 @@ fn main() { "#, expect![[r#" ct CONST i32 [type_could_unify+requires_import] + md dep [] fn main() fn() [] fn test(…) fn(i32) [] - md dep [] ct CONST i64 [requires_import] "#]], ); @@ -1032,9 +1032,9 @@ fn main() { "#, expect![[r#" sc STATIC i32 [type_could_unify+requires_import] + md dep [] fn main() fn() [] fn test(…) fn(i32) [] - md dep [] sc STATIC i64 [requires_import] "#]], ); @@ -1090,8 +1090,8 @@ fn func(input: Struct) { } "#, expect![[r#" - st Struct Struct [type] st Self Self [type] + st Struct Struct [type] sp Self Struct [type] st Struct Struct [type] ex Struct [type] @@ -1119,9 +1119,9 @@ fn main() { "#, expect![[r#" lc input bool [type+name+local] + ex false [type] ex input [type] ex true [type] - ex false [type] lc inputbad i32 [local] fn main() fn() [] fn test(…) fn(bool) [] @@ -2088,9 +2088,9 @@ fn f() { A { bar: b$0 }; } "#, expect![[r#" fn bar() fn() -> u8 [type+name] + ex bar() [type] fn baz() fn() -> u8 [type] ex baz() [type] - ex bar() [type] st A A [] fn f() fn() [] "#]], @@ -2199,8 +2199,8 @@ fn main() { lc s S [type+name+local] st S S [type] st S S [type] - ex s [type] ex S [type] + ex s [type] fn foo(…) fn(&mut S) [] fn main() fn() [] "#]], @@ -2218,8 +2218,8 @@ fn main() { st S S [type] lc ssss S [type+local] st S S [type] - ex ssss [type] ex S [type] + ex ssss [type] fn foo(…) fn(&mut S) [] fn main() fn() [] "#]], @@ -2252,11 +2252,11 @@ fn main() { ex Foo [type] lc foo &Foo [local] lc *foo [type+local] - fn bar(…) fn(Foo) [] - fn main() fn() [] - md core [] tt Clone [] tt Copy [] + fn bar(…) fn(Foo) [] + md core [] + fn main() fn() [] "#]], ); } @@ -2297,9 +2297,9 @@ fn main() { st &S [type] st T T [] st &T [type] + md core [] fn foo(…) fn(&S) [] fn main() fn() [] - md core [] "#]], ) } @@ -2346,9 +2346,9 @@ fn main() { st &mut S [type] st T T [] st &mut T [type] + md core [] fn foo(…) fn(&mut S) [] fn main() fn() [] - md core [] "#]], ) } @@ -2364,8 +2364,8 @@ fn foo(bar: u32) { } "#, expect![[r#" - lc baz i32 [local] lc bar u32 [local] + lc baz i32 [local] fn foo(…) fn(u32) [] "#]], ); @@ -2449,9 +2449,9 @@ fn main() { st &T [type] fn bar() fn() -> T [] fn &bar() [type] + md core [] fn foo(…) fn(&S) [] fn main() fn() [] - md core [] "#]], ) } @@ -2702,8 +2702,8 @@ fn test() { fn fn_builder() fn() -> FooBuilder [type_could_unify] fn fn_ctr_wrapped() fn() -> Option> [type_could_unify] fn fn_ctr_wrapped_2() fn() -> Result, u32> [type_could_unify] - me fn_returns_unit(…) fn(&self) [type_could_unify] fn fn_other() fn() -> Option [type_could_unify] + me fn_returns_unit(…) fn(&self) [type_could_unify] "#]], ); } @@ -2965,12 +2965,12 @@ fn foo() { ev Foo::B Foo::B [type_could_unify] ev Foo::A(…) Foo::A(T) [type_could_unify] lc foo Foo [type+local] - ex foo [type] ex Foo::B [type] + ex foo [type] en Foo Foo<{unknown}> [type_could_unify] - fn foo() fn() [] fn bar() fn() -> Foo [] fn baz() fn() -> Foo [] + fn foo() fn() [] "#]], ); } @@ -3000,19 +3000,19 @@ fn main() { expect![[r#" sn not !expr [snippet] me not() fn(self) -> ::Output [type_could_unify+requires_import] - sn if if expr {} [] - sn while while expr {} [] - sn ref &expr [] - sn refm &mut expr [] - sn deref *expr [] - sn unsafe unsafe {} [] - sn const const {} [] - sn match match expr {} [] sn box Box::new(expr) [] + sn call function(expr) [] + sn const const {} [] sn dbg dbg!(expr) [] sn dbgr dbg!(&expr) [] - sn call function(expr) [] + sn deref *expr [] + sn if if expr {} [] + sn match match expr {} [] + sn ref &expr [] + sn refm &mut expr [] sn return return expr [] + sn unsafe unsafe {} [] + sn while while expr {} [] "#]], ); } @@ -3033,19 +3033,19 @@ fn main() { &[CompletionItemKind::Snippet, CompletionItemKind::SymbolKind(SymbolKind::Method)], expect![[r#" me f() fn(&self) [] - sn ref &expr [] - sn refm &mut expr [] - sn deref *expr [] - sn unsafe unsafe {} [] - sn const const {} [] - sn match match expr {} [] sn box Box::new(expr) [] + sn call function(expr) [] + sn const const {} [] sn dbg dbg!(expr) [] sn dbgr dbg!(&expr) [] - sn call function(expr) [] + sn deref *expr [] sn let let [] sn letm let mut [] + sn match match expr {} [] + sn ref &expr [] + sn refm &mut expr [] sn return return expr [] + sn unsafe unsafe {} [] "#]], ); } diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/render/function.rs b/src/tools/rust-analyzer/crates/ide-completion/src/render/function.rs index 2fe517fa8cd0..7669aec8f535 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/render/function.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/render/function.rs @@ -22,13 +22,13 @@ use crate::{ #[derive(Debug)] enum FuncKind<'ctx> { - Function(&'ctx PathCompletionCtx), - Method(&'ctx DotAccess, Option), + Function(&'ctx PathCompletionCtx<'ctx>), + Method(&'ctx DotAccess<'ctx>, Option), } pub(crate) fn render_fn( ctx: RenderContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, local_name: Option, func: hir::Function, ) -> Builder { @@ -38,7 +38,7 @@ pub(crate) fn render_fn( pub(crate) fn render_method( ctx: RenderContext<'_>, - dot_access: &DotAccess, + dot_access: &DotAccess<'_>, receiver: Option, local_name: Option, func: hir::Function, @@ -186,8 +186,8 @@ fn render( fn compute_return_type_match( db: &dyn HirDatabase, ctx: &RenderContext<'_>, - self_type: hir::Type, - ret_type: &hir::Type, + self_type: hir::Type<'_>, + ret_type: &hir::Type<'_>, ) -> CompletionRelevanceReturnType { if match_types(ctx.completion, &self_type, ret_type).is_some() { // fn([..]) -> Self @@ -217,8 +217,8 @@ pub(super) fn add_call_parens<'b>( name: SmolStr, escaped_name: SmolStr, self_param: Option, - params: Vec, - ret_type: &hir::Type, + params: Vec>, + ret_type: &hir::Type<'_>, ) -> &'b mut Builder { cov_mark::hit!(inserts_parens_for_function_calls); @@ -288,7 +288,7 @@ pub(super) fn add_call_parens<'b>( builder.label(SmolStr::from_iter([&name, label_suffix])).insert_snippet(cap, snippet) } -fn ref_of_param(ctx: &CompletionContext<'_>, arg: &str, ty: &hir::Type) -> &'static str { +fn ref_of_param(ctx: &CompletionContext<'_>, arg: &str, ty: &hir::Type<'_>) -> &'static str { if let Some(derefed_ty) = ty.remove_ref() { for (name, local) in ctx.locals.iter().sorted_by_key(|&(k, _)| k.clone()) { if name.as_str() == arg { @@ -369,12 +369,12 @@ fn params_display(ctx: &CompletionContext<'_>, detail: &mut String, func: hir::F } } -fn params( - ctx: &CompletionContext<'_>, +fn params<'db>( + ctx: &CompletionContext<'db>, func: hir::Function, func_kind: &FuncKind<'_>, has_dot_receiver: bool, -) -> Option<(Option, Vec)> { +) -> Option<(Option, Vec>)> { ctx.config.callable.as_ref()?; // Don't add parentheses if the expected type is a function reference with the same signature. diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/render/literal.rs b/src/tools/rust-analyzer/crates/ide-completion/src/render/literal.rs index 5a9e35a7290b..6c89e49f94e8 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/render/literal.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/render/literal.rs @@ -21,7 +21,7 @@ use crate::{ pub(crate) fn render_variant_lit( ctx: RenderContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, local_name: Option, variant: hir::Variant, path: Option, @@ -35,7 +35,7 @@ pub(crate) fn render_variant_lit( pub(crate) fn render_struct_literal( ctx: RenderContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, strukt: hir::Struct, path: Option, local_name: Option, @@ -49,7 +49,7 @@ pub(crate) fn render_struct_literal( fn render( ctx @ RenderContext { completion, .. }: RenderContext<'_>, - path_ctx: &PathCompletionCtx, + path_ctx: &PathCompletionCtx<'_>, thing: Variant, name: hir::Name, path: Option, @@ -194,7 +194,7 @@ impl Variant { } } - fn ty(self, db: &dyn HirDatabase) -> hir::Type { + fn ty(self, db: &dyn HirDatabase) -> hir::Type<'_> { match self { Variant::Struct(it) => it.ty(db), Variant::EnumVariant(it) => it.parent_enum(db).ty(db), diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/render/macro_.rs b/src/tools/rust-analyzer/crates/ide-completion/src/render/macro_.rs index 4674dae03144..35fe407b2e68 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/render/macro_.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/render/macro_.rs @@ -12,7 +12,7 @@ use crate::{ pub(crate) fn render_macro( ctx: RenderContext<'_>, - PathCompletionCtx { kind, has_macro_bang, has_call_parens, .. }: &PathCompletionCtx, + PathCompletionCtx { kind, has_macro_bang, has_call_parens, .. }: &PathCompletionCtx<'_>, name: hir::Name, macro_: hir::Macro, diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/render/pattern.rs b/src/tools/rust-analyzer/crates/ide-completion/src/render/pattern.rs index dcc51a86a8ed..60ec1128233e 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/render/pattern.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/render/pattern.rs @@ -46,7 +46,7 @@ pub(crate) fn render_struct_pat( pub(crate) fn render_variant_pat( ctx: RenderContext<'_>, pattern_ctx: &PatternContext, - path_ctx: Option<&PathCompletionCtx>, + path_ctx: Option<&PathCompletionCtx<'_>>, variant: hir::Variant, local_name: Option, path: Option<&hir::ModPath>, @@ -109,7 +109,7 @@ fn build_completion( lookup: SmolStr, pat: String, def: impl HasDocs + Copy, - adt_ty: hir::Type, + adt_ty: hir::Type<'_>, // Missing in context of match statement completions is_variant_missing: bool, ) -> CompletionItem { diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/tests/expression.rs b/src/tools/rust-analyzer/crates/ide-completion/src/tests/expression.rs index b46e4c32061b..b2d18b796f19 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/tests/expression.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/tests/expression.rs @@ -1474,20 +1474,18 @@ fn main() { } "#, expect![[r#" + me foo() fn(&self) sn box Box::new(expr) sn call function(expr) sn const const {} sn dbg dbg!(expr) sn dbgr dbg!(&expr) sn deref *expr - sn if if expr {} sn match match expr {} - sn not !expr sn ref &expr sn refm &mut expr sn return return expr sn unsafe unsafe {} - sn while while expr {} "#]], ); } diff --git a/src/tools/rust-analyzer/crates/ide-completion/src/tests/visibility.rs b/src/tools/rust-analyzer/crates/ide-completion/src/tests/visibility.rs index 4b5a0ac1c2b9..b404011dfe65 100644 --- a/src/tools/rust-analyzer/crates/ide-completion/src/tests/visibility.rs +++ b/src/tools/rust-analyzer/crates/ide-completion/src/tests/visibility.rs @@ -1,7 +1,7 @@ //! Completion tests for visibility modifiers. use expect_test::expect; -use crate::tests::{check, check_with_trigger_character}; +use crate::tests::{check, check_with_private_editable, check_with_trigger_character}; #[test] fn empty_pub() { @@ -78,3 +78,90 @@ mod bar {} "#]], ); } + +#[test] +fn use_inner_public_function() { + check( + r#" +//- /inner.rs crate:inner +pub fn inner_public() {} +fn inner_private() {} +//- /foo.rs crate:foo deps:inner +use inner::inner_public; +pub fn outer_public() {} +//- /lib.rs crate:lib deps:foo +fn x() { + foo::$0 +} + "#, + expect![[r#" + fn outer_public() fn() + "#]], + ); +} + +#[test] +fn pub_use_inner_public_function() { + check( + r#" +//- /inner.rs crate:inner +pub fn inner_public() {} +fn inner_private() {} +//- /foo.rs crate:foo deps:inner +pub use inner::inner_public; +pub fn outer_public() {} +//- /lib.rs crate:lib deps:foo +fn x() { + foo::$0 +} + "#, + expect![[r#" + fn inner_public() fn() + fn outer_public() fn() + "#]], + ); +} + +#[test] +fn use_inner_public_function_private_editable() { + check_with_private_editable( + r#" +//- /inner.rs crate:inner +pub fn inner_public() {} +fn inner_private() {} +//- /foo.rs crate:foo deps:inner +use inner::inner_public; +pub fn outer_public() {} +//- /lib.rs crate:lib deps:foo +fn x() { + foo::$0 +} + "#, + expect![[r#" + fn inner_public() fn() + fn outer_public() fn() + "#]], + ); +} + +#[test] +fn pub_use_inner_public_function_private_editable() { + check_with_private_editable( + r#" +//- /inner.rs crate:inner +pub fn inner_public() {} +fn inner_private() {} +//- /foo.rs crate:foo deps:inner +pub use inner::inner_public; +pub fn outer_public() {} +//- /lib.rs crate:lib deps:foo +fn x() { + foo::$0 +} + "#, + expect![[r#" + fn inner_public() fn() + fn outer_public() fn() + "#]], + ); +} diff --git a/src/tools/rust-analyzer/crates/ide-db/src/active_parameter.rs b/src/tools/rust-analyzer/crates/ide-db/src/active_parameter.rs index 7b5723f37f76..9edfc113f764 100644 --- a/src/tools/rust-analyzer/crates/ide-db/src/active_parameter.rs +++ b/src/tools/rust-analyzer/crates/ide-db/src/active_parameter.rs @@ -13,21 +13,21 @@ use syntax::{ use crate::RootDatabase; #[derive(Debug)] -pub struct ActiveParameter { - pub ty: Type, +pub struct ActiveParameter<'db> { + pub ty: Type<'db>, pub src: Option>>, } -impl ActiveParameter { +impl<'db> ActiveParameter<'db> { /// Returns information about the call argument this token is part of. - pub fn at_token(sema: &Semantics<'_, RootDatabase>, token: SyntaxToken) -> Option { + pub fn at_token(sema: &Semantics<'db, RootDatabase>, token: SyntaxToken) -> Option { let (signature, active_parameter) = callable_for_token(sema, token)?; Self::from_signature_and_active_parameter(sema, signature, active_parameter) } /// Returns information about the call argument this token is part of. pub fn at_arg( - sema: &Semantics<'_, RootDatabase>, + sema: &'db Semantics<'db, RootDatabase>, list: ast::ArgList, at: TextSize, ) -> Option { @@ -36,8 +36,8 @@ impl ActiveParameter { } fn from_signature_and_active_parameter( - sema: &Semantics<'_, RootDatabase>, - signature: hir::Callable, + sema: &Semantics<'db, RootDatabase>, + signature: hir::Callable<'db>, active_parameter: Option, ) -> Option { let idx = active_parameter?; @@ -63,10 +63,10 @@ impl ActiveParameter { } /// Returns a [`hir::Callable`] this token is a part of and its argument index of said callable. -pub fn callable_for_token( - sema: &Semantics<'_, RootDatabase>, +pub fn callable_for_token<'db>( + sema: &Semantics<'db, RootDatabase>, token: SyntaxToken, -) -> Option<(hir::Callable, Option)> { +) -> Option<(hir::Callable<'db>, Option)> { let offset = token.text_range().start(); // Find the calling expression and its NameRef let parent = token.parent()?; @@ -79,21 +79,21 @@ pub fn callable_for_token( } /// Returns a [`hir::Callable`] this token is a part of and its argument index of said callable. -pub fn callable_for_arg_list( - sema: &Semantics<'_, RootDatabase>, +pub fn callable_for_arg_list<'db>( + sema: &Semantics<'db, RootDatabase>, arg_list: ast::ArgList, at: TextSize, -) -> Option<(hir::Callable, Option)> { +) -> Option<(hir::Callable<'db>, Option)> { debug_assert!(arg_list.syntax().text_range().contains(at)); let callable = arg_list.syntax().parent().and_then(ast::CallableExpr::cast)?; callable_for_node(sema, &callable, at) } -pub fn callable_for_node( - sema: &Semantics<'_, RootDatabase>, +pub fn callable_for_node<'db>( + sema: &Semantics<'db, RootDatabase>, calling_node: &ast::CallableExpr, offset: TextSize, -) -> Option<(hir::Callable, Option)> { +) -> Option<(hir::Callable<'db>, Option)> { let callable = match calling_node { ast::CallableExpr::Call(call) => sema.resolve_expr_as_callable(&call.expr()?), ast::CallableExpr::MethodCall(call) => sema.resolve_method_call_as_callable(call), diff --git a/src/tools/rust-analyzer/crates/ide-db/src/defs.rs b/src/tools/rust-analyzer/crates/ide-db/src/defs.rs index d5db1c481b69..a4a140ec57aa 100644 --- a/src/tools/rust-analyzer/crates/ide-db/src/defs.rs +++ b/src/tools/rust-analyzer/crates/ide-db/src/defs.rs @@ -385,17 +385,17 @@ fn find_std_module( // FIXME: IdentClass as a name no longer fits #[derive(Debug)] -pub enum IdentClass { - NameClass(NameClass), - NameRefClass(NameRefClass), +pub enum IdentClass<'db> { + NameClass(NameClass<'db>), + NameRefClass(NameRefClass<'db>), Operator(OperatorClass), } -impl IdentClass { +impl<'db> IdentClass<'db> { pub fn classify_node( - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, node: &SyntaxNode, - ) -> Option { + ) -> Option> { match_ast! { match node { ast::Name(name) => NameClass::classify(sema, &name).map(IdentClass::NameClass), @@ -418,23 +418,23 @@ impl IdentClass { } pub fn classify_token( - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, token: &SyntaxToken, - ) -> Option { + ) -> Option> { let parent = token.parent()?; Self::classify_node(sema, &parent) } pub fn classify_lifetime( - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, lifetime: &ast::Lifetime, - ) -> Option { + ) -> Option> { NameRefClass::classify_lifetime(sema, lifetime) .map(IdentClass::NameRefClass) .or_else(|| NameClass::classify_lifetime(sema, lifetime).map(IdentClass::NameClass)) } - pub fn definitions(self) -> ArrayVec<(Definition, Option), 2> { + pub fn definitions(self) -> ArrayVec<(Definition, Option>), 2> { let mut res = ArrayVec::new(); match self { IdentClass::NameClass(NameClass::Definition(it) | NameClass::ConstReference(it)) => { @@ -518,7 +518,7 @@ impl IdentClass { /// /// A model special case is `None` constant in pattern. #[derive(Debug)] -pub enum NameClass { +pub enum NameClass<'db> { Definition(Definition), /// `None` in `if let None = Some(82) {}`. /// Syntactically, it is a name, but semantically it is a reference. @@ -528,11 +528,11 @@ pub enum NameClass { PatFieldShorthand { local_def: Local, field_ref: Field, - adt_subst: GenericSubstitution, + adt_subst: GenericSubstitution<'db>, }, } -impl NameClass { +impl<'db> NameClass<'db> { /// `Definition` defined by this name. pub fn defined(self) -> Option { let res = match self { @@ -545,7 +545,10 @@ impl NameClass { Some(res) } - pub fn classify(sema: &Semantics<'_, RootDatabase>, name: &ast::Name) -> Option { + pub fn classify( + sema: &Semantics<'db, RootDatabase>, + name: &ast::Name, + ) -> Option> { let _p = tracing::info_span!("NameClass::classify").entered(); let parent = name.syntax().parent()?; @@ -597,10 +600,10 @@ impl NameClass { Some(definition) } - fn classify_ident_pat( - sema: &Semantics<'_, RootDatabase>, + fn classify_ident_pat<'db>( + sema: &Semantics<'db, RootDatabase>, ident_pat: ast::IdentPat, - ) -> Option { + ) -> Option> { if let Some(def) = sema.resolve_bind_pat_to_const(&ident_pat) { return Some(NameClass::ConstReference(Definition::from(def))); } @@ -638,9 +641,9 @@ impl NameClass { } pub fn classify_lifetime( - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, lifetime: &ast::Lifetime, - ) -> Option { + ) -> Option> { let _p = tracing::info_span!("NameClass::classify_lifetime", ?lifetime).entered(); let parent = lifetime.syntax().parent()?; @@ -723,12 +726,12 @@ impl OperatorClass { /// A model special case is field shorthand syntax, which uses a single /// reference to point to two different defs. #[derive(Debug)] -pub enum NameRefClass { - Definition(Definition, Option), +pub enum NameRefClass<'db> { + Definition(Definition, Option>), FieldShorthand { local_ref: Local, field_ref: Field, - adt_subst: GenericSubstitution, + adt_subst: GenericSubstitution<'db>, }, /// The specific situation where we have an extern crate decl without a rename /// Here we have both a declaration and a reference. @@ -741,13 +744,13 @@ pub enum NameRefClass { }, } -impl NameRefClass { +impl<'db> NameRefClass<'db> { // Note: we don't have unit-tests for this rather important function. // It is primarily exercised via goto definition tests in `ide`. pub fn classify( - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, name_ref: &ast::NameRef, - ) -> Option { + ) -> Option> { let _p = tracing::info_span!("NameRefClass::classify", ?name_ref).entered(); let parent = name_ref.syntax().parent()?; @@ -866,9 +869,9 @@ impl NameRefClass { } pub fn classify_lifetime( - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, lifetime: &ast::Lifetime, - ) -> Option { + ) -> Option> { let _p = tracing::info_span!("NameRefClass::classify_lifetime", ?lifetime).entered(); if lifetime.text() == "'static" { return Some(NameRefClass::Definition( diff --git a/src/tools/rust-analyzer/crates/ide-db/src/imports/import_assets.rs b/src/tools/rust-analyzer/crates/ide-db/src/imports/import_assets.rs index ac592dfe93cf..9f35988924b9 100644 --- a/src/tools/rust-analyzer/crates/ide-db/src/imports/import_assets.rs +++ b/src/tools/rust-analyzer/crates/ide-db/src/imports/import_assets.rs @@ -25,26 +25,26 @@ use crate::{ /// * assists /// * etc. #[derive(Debug)] -pub enum ImportCandidate { +pub enum ImportCandidate<'db> { /// A path, qualified (`std::collections::HashMap`) or not (`HashMap`). Path(PathImportCandidate), /// A trait associated function (with no self parameter) or an associated constant. /// For 'test_mod::TestEnum::test_function', `ty` is the `test_mod::TestEnum` expression type /// and `name` is the `test_function` - TraitAssocItem(TraitImportCandidate), + TraitAssocItem(TraitImportCandidate<'db>), /// A trait method with self parameter. /// For 'test_enum.test_method()', `ty` is the `test_enum` expression type /// and `name` is the `test_method` - TraitMethod(TraitImportCandidate), + TraitMethod(TraitImportCandidate<'db>), } /// A trait import needed for a given associated item access. /// For `some::path::SomeStruct::ASSOC_`, contains the /// type of `some::path::SomeStruct` and `ASSOC_` as the item name. #[derive(Debug)] -pub struct TraitImportCandidate { +pub struct TraitImportCandidate<'db> { /// A type of the item that has the associated item accessed at. - pub receiver_ty: Type, + pub receiver_ty: Type<'db>, /// The associated item name that the trait to import should contain. pub assoc_item_name: NameToImport, } @@ -100,16 +100,16 @@ impl NameToImport { /// A struct to find imports in the project, given a certain name (or its part) and the context. #[derive(Debug)] -pub struct ImportAssets { - import_candidate: ImportCandidate, +pub struct ImportAssets<'db> { + import_candidate: ImportCandidate<'db>, candidate_node: SyntaxNode, module_with_candidate: Module, } -impl ImportAssets { +impl<'db> ImportAssets<'db> { pub fn for_method_call( method_call: &ast::MethodCallExpr, - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, ) -> Option { let candidate_node = method_call.syntax().clone(); Some(Self { @@ -121,7 +121,7 @@ impl ImportAssets { pub fn for_exact_path( fully_qualified_path: &ast::Path, - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, ) -> Option { let candidate_node = fully_qualified_path.syntax().clone(); if let Some(use_tree) = candidate_node.ancestors().find_map(ast::UseTree::cast) { @@ -139,7 +139,7 @@ impl ImportAssets { }) } - pub fn for_ident_pat(sema: &Semantics<'_, RootDatabase>, pat: &ast::IdentPat) -> Option { + pub fn for_ident_pat(sema: &Semantics<'db, RootDatabase>, pat: &ast::IdentPat) -> Option { if !pat.is_simple_ident() { return None; } @@ -156,7 +156,7 @@ impl ImportAssets { module_with_candidate: Module, qualifier: Option, fuzzy_name: String, - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, candidate_node: SyntaxNode, ) -> Option { Some(Self { @@ -168,7 +168,7 @@ impl ImportAssets { pub fn for_fuzzy_method_call( module_with_method_call: Module, - receiver_ty: Type, + receiver_ty: Type<'db>, fuzzy_method_name: String, candidate_node: SyntaxNode, ) -> Option { @@ -229,14 +229,14 @@ impl LocatedImport { } } -impl ImportAssets { - pub fn import_candidate(&self) -> &ImportCandidate { +impl<'db> ImportAssets<'db> { + pub fn import_candidate(&self) -> &ImportCandidate<'db> { &self.import_candidate } pub fn search_for_imports( &self, - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, cfg: ImportPathConfig, prefix_kind: PrefixKind, ) -> impl Iterator { @@ -247,7 +247,7 @@ impl ImportAssets { /// This may return non-absolute paths if a part of the returned path is already imported into scope. pub fn search_for_relative_paths( &self, - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, cfg: ImportPathConfig, ) -> impl Iterator { let _p = tracing::info_span!("ImportAssets::search_for_relative_paths").entered(); @@ -286,7 +286,7 @@ impl ImportAssets { fn search_for( &self, - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, prefixed: Option, cfg: ImportPathConfig, ) -> impl Iterator { @@ -533,11 +533,11 @@ fn item_for_path_search_assoc(db: &RootDatabase, assoc_item: AssocItem) -> Optio }) } -fn trait_applicable_items( - db: &RootDatabase, +fn trait_applicable_items<'db>( + db: &'db RootDatabase, current_crate: Crate, - scope: &SemanticsScope<'_>, - trait_candidate: &TraitImportCandidate, + scope: &SemanticsScope<'db>, + trait_candidate: &TraitImportCandidate<'db>, trait_assoc_item: bool, mod_path: impl Fn(ItemInNs) -> Option, scope_filter: impl Fn(hir::Trait) -> bool, @@ -709,9 +709,9 @@ fn get_mod_path( } } -impl ImportCandidate { +impl<'db> ImportCandidate<'db> { fn for_method_call( - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, method_call: &ast::MethodCallExpr, ) -> Option { match sema.resolve_method_call(method_call) { @@ -725,7 +725,7 @@ impl ImportCandidate { } } - fn for_regular_path(sema: &Semantics<'_, RootDatabase>, path: &ast::Path) -> Option { + fn for_regular_path(sema: &Semantics<'db, RootDatabase>, path: &ast::Path) -> Option { if sema.resolve_path(path).is_some() { return None; } @@ -736,7 +736,7 @@ impl ImportCandidate { ) } - fn for_name(sema: &Semantics<'_, RootDatabase>, name: &ast::Name) -> Option { + fn for_name(sema: &Semantics<'db, RootDatabase>, name: &ast::Name) -> Option { if sema .scope(name.syntax())? .speculative_resolve(&make::ext::ident_path(&name.text())) @@ -753,17 +753,17 @@ impl ImportCandidate { fn for_fuzzy_path( qualifier: Option, fuzzy_name: String, - sema: &Semantics<'_, RootDatabase>, + sema: &Semantics<'db, RootDatabase>, ) -> Option { path_import_candidate(sema, qualifier, NameToImport::fuzzy(fuzzy_name)) } } -fn path_import_candidate( - sema: &Semantics<'_, RootDatabase>, +fn path_import_candidate<'db>( + sema: &Semantics<'db, RootDatabase>, qualifier: Option, name: NameToImport, -) -> Option { +) -> Option> { Some(match qualifier { Some(qualifier) => match sema.resolve_path(&qualifier) { Some(PathResolution::Def(ModuleDef::BuiltinType(_))) | None => { diff --git a/src/tools/rust-analyzer/crates/ide-db/src/search.rs b/src/tools/rust-analyzer/crates/ide-db/src/search.rs index c5ad64ed5941..7d460f72492c 100644 --- a/src/tools/rust-analyzer/crates/ide-db/src/search.rs +++ b/src/tools/rust-analyzer/crates/ide-db/src/search.rs @@ -429,7 +429,7 @@ pub struct FindUsages<'a> { /// The container of our definition should it be an assoc item assoc_item_container: Option, /// whether to search for the `Self` type of the definition - include_self_kw_refs: Option, + include_self_kw_refs: Option>, /// whether to search for the `self` module search_self_mod: bool, } @@ -1087,12 +1087,12 @@ impl<'a> FindUsages<'a> { fn found_self_ty_name_ref( &self, - self_ty: &hir::Type, + self_ty: &hir::Type<'_>, name_ref: &ast::NameRef, sink: &mut dyn FnMut(EditionedFileId, FileReference) -> bool, ) -> bool { // See https://github.com/rust-lang/rust-analyzer/pull/15864/files/e0276dc5ddc38c65240edb408522bb869f15afb4#r1389848845 - let ty_eq = |ty: hir::Type| match (ty.as_adt(), self_ty.as_adt()) { + let ty_eq = |ty: hir::Type<'_>| match (ty.as_adt(), self_ty.as_adt()) { (Some(ty), Some(self_ty)) => ty == self_ty, (None, None) => ty == *self_ty, _ => false, @@ -1315,7 +1315,7 @@ impl<'a> FindUsages<'a> { } } -fn def_to_ty(sema: &Semantics<'_, RootDatabase>, def: &Definition) -> Option { +fn def_to_ty<'db>(sema: &Semantics<'db, RootDatabase>, def: &Definition) -> Option> { match def { Definition::Adt(adt) => Some(adt.ty(sema.db)), Definition::TypeAlias(it) => Some(it.ty(sema.db)), diff --git a/src/tools/rust-analyzer/crates/ide-db/src/syntax_helpers/suggest_name.rs b/src/tools/rust-analyzer/crates/ide-db/src/syntax_helpers/suggest_name.rs index 9b9f450bc734..995bf72dca16 100644 --- a/src/tools/rust-analyzer/crates/ide-db/src/syntax_helpers/suggest_name.rs +++ b/src/tools/rust-analyzer/crates/ide-db/src/syntax_helpers/suggest_name.rs @@ -151,10 +151,10 @@ impl NameGenerator { /// - If `ty` is an `impl Trait`, it will suggest the name of the first trait. /// /// If the suggested name conflicts with reserved keywords, it will return `None`. - pub fn for_type( + pub fn for_type<'db>( &mut self, - ty: &hir::Type, - db: &RootDatabase, + ty: &hir::Type<'db>, + db: &'db RootDatabase, edition: Edition, ) -> Option { let name = name_of_type(ty, db, edition)?; @@ -373,7 +373,11 @@ fn from_type(expr: &ast::Expr, sema: &Semantics<'_, RootDatabase>) -> Option Option { +fn name_of_type<'db>( + ty: &hir::Type<'db>, + db: &'db RootDatabase, + edition: Edition, +) -> Option { let name = if let Some(adt) = ty.as_adt() { let name = adt.name(db).display(db, edition).to_string(); @@ -407,7 +411,11 @@ fn name_of_type(ty: &hir::Type, db: &RootDatabase, edition: Edition) -> Option, db: &RootDatabase, edition: Edition) -> SmolStr { +fn sequence_name<'db>( + inner_ty: Option<&hir::Type<'db>>, + db: &'db RootDatabase, + edition: Edition, +) -> SmolStr { let items_str = SmolStr::new_static("items"); let Some(inner_ty) = inner_ty else { return items_str; diff --git a/src/tools/rust-analyzer/crates/ide-db/src/ty_filter.rs b/src/tools/rust-analyzer/crates/ide-db/src/ty_filter.rs index 63ce0ddbb8fc..095256d8294e 100644 --- a/src/tools/rust-analyzer/crates/ide-db/src/ty_filter.rs +++ b/src/tools/rust-analyzer/crates/ide-db/src/ty_filter.rs @@ -10,7 +10,7 @@ use syntax::ast::{self, Pat, make}; use crate::RootDatabase; /// Enum types that implement `std::ops::Try` trait. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum TryEnum { Result, Option, @@ -20,7 +20,7 @@ impl TryEnum { const ALL: [TryEnum; 2] = [TryEnum::Option, TryEnum::Result]; /// Returns `Some(..)` if the provided type is an enum that implements `std::ops::Try`. - pub fn from_ty(sema: &Semantics<'_, RootDatabase>, ty: &hir::Type) -> Option { + pub fn from_ty(sema: &Semantics<'_, RootDatabase>, ty: &hir::Type<'_>) -> Option { let enum_ = match ty.as_adt() { Some(hir::Adt::Enum(it)) => it, _ => return None, diff --git a/src/tools/rust-analyzer/crates/ide-db/src/use_trivial_constructor.rs b/src/tools/rust-analyzer/crates/ide-db/src/use_trivial_constructor.rs index a4a93e36f0e1..f63cd92694b3 100644 --- a/src/tools/rust-analyzer/crates/ide-db/src/use_trivial_constructor.rs +++ b/src/tools/rust-analyzer/crates/ide-db/src/use_trivial_constructor.rs @@ -11,7 +11,7 @@ use syntax::{ pub fn use_trivial_constructor( db: &crate::RootDatabase, path: Path, - ty: &hir::Type, + ty: &hir::Type<'_>, edition: Edition, ) -> Option { match ty.as_adt() { diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/expected_function.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/expected_function.rs index 7d2ac373dc08..afd1687ae073 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/expected_function.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/expected_function.rs @@ -7,7 +7,7 @@ use crate::{Diagnostic, DiagnosticCode, DiagnosticsContext}; // This diagnostic is triggered if a call is made on something that is not callable. pub(crate) fn expected_function( ctx: &DiagnosticsContext<'_>, - d: &hir::ExpectedFunction, + d: &hir::ExpectedFunction<'_>, ) -> Diagnostic { Diagnostic::new_with_syntax_node_ptr( ctx, diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/invalid_cast.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/invalid_cast.rs index 7a6e98fe1b54..a59077b757b1 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/invalid_cast.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/invalid_cast.rs @@ -18,7 +18,7 @@ macro_rules! format_ty { // Diagnostic: invalid-cast // // This diagnostic is triggered if the code contains an illegal cast -pub(crate) fn invalid_cast(ctx: &DiagnosticsContext<'_>, d: &hir::InvalidCast) -> Diagnostic { +pub(crate) fn invalid_cast(ctx: &DiagnosticsContext<'_>, d: &hir::InvalidCast<'_>) -> Diagnostic { let display_range = ctx.sema.diagnostics_display_range(d.expr.map(|it| it.into())); let (code, message) = match d.error { CastError::CastToBool => ( @@ -106,7 +106,10 @@ pub(crate) fn invalid_cast(ctx: &DiagnosticsContext<'_>, d: &hir::InvalidCast) - // Diagnostic: cast-to-unsized // // This diagnostic is triggered when casting to an unsized type -pub(crate) fn cast_to_unsized(ctx: &DiagnosticsContext<'_>, d: &hir::CastToUnsized) -> Diagnostic { +pub(crate) fn cast_to_unsized( + ctx: &DiagnosticsContext<'_>, + d: &hir::CastToUnsized<'_>, +) -> Diagnostic { let display_range = ctx.sema.diagnostics_display_range(d.expr.map(|it| it.into())); Diagnostic::new( DiagnosticCode::RustcHardError("E0620"), diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/missing_fields.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/missing_fields.rs index 2b76efb1965b..8a5d82b48c01 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/missing_fields.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/missing_fields.rs @@ -106,7 +106,7 @@ fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::MissingFields) -> Option| match ctx.config.expr_fill_default { ExprFillDefaultMode::Todo => make::ext::expr_todo(), ExprFillDefaultMode::Underscore => make::ext::expr_underscore(), ExprFillDefaultMode::Default => { @@ -180,7 +180,7 @@ fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::MissingFields) -> Option, db: &dyn HirDatabase, module: hir::Module, edition: Edition, @@ -198,7 +198,7 @@ fn make_ty( fn get_default_constructor( ctx: &DiagnosticsContext<'_>, d: &hir::MissingFields, - ty: &Type, + ty: &Type<'_>, ) -> Option { if let Some(builtin_ty) = ty.as_builtin() { if builtin_ty.is_int() || builtin_ty.is_uint() { diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/missing_unsafe.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/missing_unsafe.rs index 6bd5417b25d7..d8f6e813d800 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/missing_unsafe.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/missing_unsafe.rs @@ -628,6 +628,17 @@ fn main() { #[test] fn orphan_unsafe_format_args() { // Checks that we don't place orphan arguments for formatting under an unsafe block. + check_diagnostics( + r#" +//- minicore: fmt_before_1_89_0 +fn foo() { + let p = 0xDEADBEEF as *const i32; + format_args!("", *p); + // ^^ error: dereference of raw pointer is unsafe and requires an unsafe function or block +} + "#, + ); + check_diagnostics( r#" //- minicore: fmt @@ -958,4 +969,18 @@ impl FooTrait for S2 { "#, ); } + + #[test] + fn no_false_positive_on_format_args_since_1_89_0() { + check_diagnostics( + r#" +//- minicore: fmt +fn test() { + let foo = 10; + let bar = true; + let _x = format_args!("{} {0} {} {last}", foo, bar, last = "!"); +} + "#, + ); + } } diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/moved_out_of_ref.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/moved_out_of_ref.rs index 01cf5e8fa522..0928262d22fa 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/moved_out_of_ref.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/moved_out_of_ref.rs @@ -4,7 +4,10 @@ use hir::HirDisplay; // Diagnostic: moved-out-of-ref // // This diagnostic is triggered on moving non copy things out of references. -pub(crate) fn moved_out_of_ref(ctx: &DiagnosticsContext<'_>, d: &hir::MovedOutOfRef) -> Diagnostic { +pub(crate) fn moved_out_of_ref( + ctx: &DiagnosticsContext<'_>, + d: &hir::MovedOutOfRef<'_>, +) -> Diagnostic { Diagnostic::new_with_syntax_node_ptr( ctx, DiagnosticCode::RustcHardError("E0507"), @@ -217,4 +220,23 @@ fn test() { "#, ) } + + #[test] + fn regression_18201() { + check_diagnostics( + r#" +//- minicore: copy +struct NotCopy; +struct S(NotCopy); +impl S { + fn f(&mut self) { + || { + if let ref mut _cb = self.0 { + } + }; + } +} +"#, + ) + } } diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/no_such_field.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/no_such_field.rs index ef42f2dc7448..0edab5e0b3b1 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/no_such_field.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/no_such_field.rs @@ -1,5 +1,4 @@ use either::Either; -use hir::{Field, HasCrate}; use hir::{HasSource, HirDisplay, Semantics, VariantId, db::ExpandDatabase}; use ide_db::text_edit::TextEdit; use ide_db::{EditionedFileId, RootDatabase, source_change::SourceChange}; @@ -8,7 +7,10 @@ use syntax::{ ast::{self, edit::IndentLevel, make}, }; -use crate::{Assist, Diagnostic, DiagnosticCode, DiagnosticsContext, fix}; +use crate::{ + Assist, Diagnostic, DiagnosticCode, DiagnosticsContext, fix, + handlers::private_field::field_is_private_fixes, +}; // Diagnostic: no-such-field // @@ -37,8 +39,8 @@ fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::NoSuchField) -> Option, d: &hir::NoSuchField) -> Option, - usage_file_id: EditionedFileId, - record_expr_field: &ast::RecordExprField, - private_field: Field, -) -> Option> { - let def_crate = private_field.krate(sema.db); - let usage_crate = sema.file_to_module_def(usage_file_id.file_id(sema.db))?.krate(); - let visibility = if usage_crate == def_crate { "pub(crate) " } else { "pub " }; - - let source = private_field.source(sema.db)?; - let (range, _) = source.syntax().original_file_range_opt(sema.db)?; - let source_change = SourceChange::from_text_edit( - range.file_id.file_id(sema.db), - TextEdit::insert(range.range.start(), visibility.into()), - ); - - Some(vec![fix( - "increase_field_visibility", - "Increase field visibility", - source_change, - sema.original_range(record_expr_field.syntax()).range, - )]) -} - fn missing_record_expr_field_fixes( sema: &Semantics<'_, RootDatabase>, usage_file_id: EditionedFileId, diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/private_field.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/private_field.rs index 5b4273a5a627..69cd0d27cb06 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/private_field.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/private_field.rs @@ -1,4 +1,8 @@ -use crate::{Diagnostic, DiagnosticCode, DiagnosticsContext}; +use hir::{EditionedFileId, FileRange, HasCrate, HasSource, Semantics}; +use ide_db::{RootDatabase, assists::Assist, source_change::SourceChange, text_edit::TextEdit}; +use syntax::{AstNode, TextRange, TextSize, ast::HasVisibility}; + +use crate::{Diagnostic, DiagnosticCode, DiagnosticsContext, fix}; // Diagnostic: private-field // @@ -16,11 +20,59 @@ pub(crate) fn private_field(ctx: &DiagnosticsContext<'_>, d: &hir::PrivateField) d.expr.map(|it| it.into()), ) .stable() + .with_fixes(field_is_private_fixes( + &ctx.sema, + d.expr.file_id.original_file(ctx.sema.db), + d.field, + ctx.sema.original_range(d.expr.to_node(ctx.sema.db).syntax()).range, + )) +} + +pub(crate) fn field_is_private_fixes( + sema: &Semantics<'_, RootDatabase>, + usage_file_id: EditionedFileId, + private_field: hir::Field, + fix_range: TextRange, +) -> Option> { + let def_crate = private_field.krate(sema.db); + let usage_crate = sema.file_to_module_def(usage_file_id.file_id(sema.db))?.krate(); + let mut visibility_text = if usage_crate == def_crate { "pub(crate) " } else { "pub " }; + + let source = private_field.source(sema.db)?; + let existing_visibility = match &source.value { + hir::FieldSource::Named(it) => it.visibility(), + hir::FieldSource::Pos(it) => it.visibility(), + }; + let range = match existing_visibility { + Some(visibility) => { + // If there is an existing visibility, don't insert whitespace after. + visibility_text = visibility_text.trim_end(); + source.with_value(visibility.syntax()).original_file_range_opt(sema.db)?.0 + } + None => { + let (range, _) = source.syntax().original_file_range_opt(sema.db)?; + FileRange { + file_id: range.file_id, + range: TextRange::at(range.range.start(), TextSize::new(0)), + } + } + }; + let source_change = SourceChange::from_text_edit( + range.file_id.file_id(sema.db), + TextEdit::replace(range.range, visibility_text.into()), + ); + + Some(vec![fix( + "increase_field_visibility", + "Increase field visibility", + source_change, + fix_range, + )]) } #[cfg(test)] mod tests { - use crate::tests::check_diagnostics; + use crate::tests::{check_diagnostics, check_fix}; #[test] fn private_field() { @@ -29,7 +81,7 @@ mod tests { mod module { pub struct Struct { field: u32 } } fn main(s: module::Struct) { s.field; - //^^^^^^^ error: field `field` of `Struct` is private + //^^^^^^^ 💡 error: field `field` of `Struct` is private } "#, ); @@ -42,7 +94,7 @@ fn main(s: module::Struct) { mod module { pub struct Struct(u32); } fn main(s: module::Struct) { s.0; - //^^^ error: field `0` of `Struct` is private + //^^^ 💡 error: field `0` of `Struct` is private } "#, ); @@ -113,4 +165,68 @@ fn main() { "#, ); } + + #[test] + fn change_visibility_fix() { + check_fix( + r#" +pub mod foo { + pub mod bar { + pub struct Struct { + field: i32, + } + } +} + +fn foo(v: foo::bar::Struct) { + v.field$0; +} + "#, + r#" +pub mod foo { + pub mod bar { + pub struct Struct { + pub(crate) field: i32, + } + } +} + +fn foo(v: foo::bar::Struct) { + v.field; +} + "#, + ); + } + + #[test] + fn change_visibility_with_existing_visibility() { + check_fix( + r#" +pub mod foo { + pub mod bar { + pub struct Struct { + pub(super) field: i32, + } + } +} + +fn foo(v: foo::bar::Struct) { + v.field$0; +} + "#, + r#" +pub mod foo { + pub mod bar { + pub struct Struct { + pub(crate) field: i32, + } + } +} + +fn foo(v: foo::bar::Struct) { + v.field; +} + "#, + ); + } } diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/type_mismatch.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/type_mismatch.rs index 076df1ab0f82..e2957fcaefb4 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/type_mismatch.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/type_mismatch.rs @@ -20,7 +20,7 @@ use crate::{Assist, Diagnostic, DiagnosticCode, DiagnosticsContext, adjusted_dis // // This diagnostic is triggered when the type of an expression or pattern does not match // the expected type. -pub(crate) fn type_mismatch(ctx: &DiagnosticsContext<'_>, d: &hir::TypeMismatch) -> Diagnostic { +pub(crate) fn type_mismatch(ctx: &DiagnosticsContext<'_>, d: &hir::TypeMismatch<'_>) -> Diagnostic { let display_range = adjusted_display_range(ctx, d.expr_or_pat, &|node| { let Either::Left(expr) = node else { return None }; let salient_token_range = match expr { @@ -39,7 +39,7 @@ pub(crate) fn type_mismatch(ctx: &DiagnosticsContext<'_>, d: &hir::TypeMismatch) cov_mark::hit!(type_mismatch_range_adjustment); Some(salient_token_range) }); - let mut diag = Diagnostic::new( + Diagnostic::new( DiagnosticCode::RustcHardError("E0308"), format!( "expected {}, found {}", @@ -52,14 +52,10 @@ pub(crate) fn type_mismatch(ctx: &DiagnosticsContext<'_>, d: &hir::TypeMismatch) ), display_range, ) - .with_fixes(fixes(ctx, d)); - if diag.fixes.is_some() { - diag.experimental = false; - } - diag + .with_fixes(fixes(ctx, d)) } -fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::TypeMismatch) -> Option> { +fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::TypeMismatch<'_>) -> Option> { let mut fixes = Vec::new(); if let Some(expr_ptr) = d.expr_or_pat.value.cast::() { @@ -76,7 +72,7 @@ fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::TypeMismatch) -> Option, - d: &hir::TypeMismatch, + d: &hir::TypeMismatch<'_>, expr_ptr: &InFile>, acc: &mut Vec, ) -> Option<()> { @@ -98,7 +94,7 @@ fn add_reference( fn add_missing_ok_or_some( ctx: &DiagnosticsContext<'_>, - d: &hir::TypeMismatch, + d: &hir::TypeMismatch<'_>, expr_ptr: &InFile>, acc: &mut Vec, ) -> Option<()> { @@ -188,7 +184,7 @@ fn add_missing_ok_or_some( fn remove_unnecessary_wrapper( ctx: &DiagnosticsContext<'_>, - d: &hir::TypeMismatch, + d: &hir::TypeMismatch<'_>, expr_ptr: &InFile>, acc: &mut Vec, ) -> Option<()> { @@ -271,7 +267,7 @@ fn remove_unnecessary_wrapper( fn remove_semicolon( ctx: &DiagnosticsContext<'_>, - d: &hir::TypeMismatch, + d: &hir::TypeMismatch<'_>, expr_ptr: &InFile>, acc: &mut Vec, ) -> Option<()> { @@ -301,7 +297,7 @@ fn remove_semicolon( fn str_ref_to_owned( ctx: &DiagnosticsContext<'_>, - d: &hir::TypeMismatch, + d: &hir::TypeMismatch<'_>, expr_ptr: &InFile>, acc: &mut Vec, ) -> Option<()> { diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/typed_hole.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/typed_hole.rs index 1915a88dd002..8d4277026905 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/typed_hole.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/typed_hole.rs @@ -20,7 +20,7 @@ use syntax::AstNode; // Diagnostic: typed-hole // // This diagnostic is triggered when an underscore expression is used in an invalid position. -pub(crate) fn typed_hole(ctx: &DiagnosticsContext<'_>, d: &hir::TypedHole) -> Diagnostic { +pub(crate) fn typed_hole(ctx: &DiagnosticsContext<'_>, d: &hir::TypedHole<'_>) -> Diagnostic { let display_range = ctx.sema.diagnostics_display_range(d.expr.map(|it| it.into())); let (message, fixes) = if d.expected.is_unknown() { ("`_` expressions may only appear on the left-hand side of an assignment".to_owned(), None) @@ -41,7 +41,7 @@ pub(crate) fn typed_hole(ctx: &DiagnosticsContext<'_>, d: &hir::TypedHole) -> Di .with_fixes(fixes) } -fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::TypedHole) -> Option> { +fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::TypedHole<'_>) -> Option> { let db = ctx.sema.db; let root = db.parse_or_expand(d.expr.file_id); let (original_range, _) = @@ -61,7 +61,7 @@ fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::TypedHole) -> Option }; let paths = term_search(&term_search_ctx); - let mut formatter = |_: &hir::Type| String::from("_"); + let mut formatter = |_: &hir::Type<'_>| String::from("_"); let assists: Vec = d .expected diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/unresolved_field.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/unresolved_field.rs index 0649c97f8205..690158989679 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/unresolved_field.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/unresolved_field.rs @@ -25,7 +25,7 @@ use crate::{Diagnostic, DiagnosticCode, DiagnosticsContext, adjusted_display_ran // This diagnostic is triggered if a field does not exist on a given type. pub(crate) fn unresolved_field( ctx: &DiagnosticsContext<'_>, - d: &hir::UnresolvedField, + d: &hir::UnresolvedField<'_>, ) -> Diagnostic { let method_suffix = if d.method_with_same_name_exists { ", but a method with a similar name exists" @@ -54,7 +54,7 @@ pub(crate) fn unresolved_field( .with_fixes(fixes(ctx, d)) } -fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::UnresolvedField) -> Option> { +fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::UnresolvedField<'_>) -> Option> { let mut fixes = Vec::new(); if d.method_with_same_name_exists { fixes.extend(method_fix(ctx, &d.expr)); @@ -64,7 +64,7 @@ fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::UnresolvedField) -> Option, d: &hir::UnresolvedField) -> Option { +fn field_fix(ctx: &DiagnosticsContext<'_>, d: &hir::UnresolvedField<'_>) -> Option { // Get the FileRange of the invalid field access let root = ctx.sema.db.parse_or_expand(d.expr.file_id); let expr = d.expr.value.to_node(&root).left()?; diff --git a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/unresolved_method.rs b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/unresolved_method.rs index 00c2a8c4c468..1f2d671249de 100644 --- a/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/unresolved_method.rs +++ b/src/tools/rust-analyzer/crates/ide-diagnostics/src/handlers/unresolved_method.rs @@ -18,7 +18,7 @@ use crate::{Diagnostic, DiagnosticCode, DiagnosticsContext, adjusted_display_ran // This diagnostic is triggered if a method does not exist on a given type. pub(crate) fn unresolved_method( ctx: &DiagnosticsContext<'_>, - d: &hir::UnresolvedMethodCall, + d: &hir::UnresolvedMethodCall<'_>, ) -> Diagnostic { let suffix = if d.field_with_same_name.is_some() { ", but a field with a similar name exists" @@ -49,7 +49,7 @@ pub(crate) fn unresolved_method( .with_fixes(fixes(ctx, d)) } -fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::UnresolvedMethodCall) -> Option> { +fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::UnresolvedMethodCall<'_>) -> Option> { let field_fix = if let Some(ty) = &d.field_with_same_name { field_fix(ctx, d, ty) } else { @@ -72,8 +72,8 @@ fn fixes(ctx: &DiagnosticsContext<'_>, d: &hir::UnresolvedMethodCall) -> Option< fn field_fix( ctx: &DiagnosticsContext<'_>, - d: &hir::UnresolvedMethodCall, - ty: &hir::Type, + d: &hir::UnresolvedMethodCall<'_>, + ty: &hir::Type<'_>, ) -> Option { if !ty.impls_fnonce(ctx.sema.db) { return None; @@ -107,7 +107,10 @@ fn field_fix( }) } -fn assoc_func_fix(ctx: &DiagnosticsContext<'_>, d: &hir::UnresolvedMethodCall) -> Option { +fn assoc_func_fix( + ctx: &DiagnosticsContext<'_>, + d: &hir::UnresolvedMethodCall<'_>, +) -> Option { if let Some(f) = d.assoc_func_with_same_name { let db = ctx.sema.db; diff --git a/src/tools/rust-analyzer/crates/ide-ssr/src/lib.rs b/src/tools/rust-analyzer/crates/ide-ssr/src/lib.rs index 43c56ac8bec5..e4b20f3f1aad 100644 --- a/src/tools/rust-analyzer/crates/ide-ssr/src/lib.rs +++ b/src/tools/rust-analyzer/crates/ide-ssr/src/lib.rs @@ -110,7 +110,7 @@ pub struct SsrMatches { pub struct MatchFinder<'db> { /// Our source of information about the user's code. sema: Semantics<'db, ide_db::RootDatabase>, - rules: Vec, + rules: Vec>, resolution_scope: resolving::ResolutionScope<'db>, restrict_ranges: Vec, } diff --git a/src/tools/rust-analyzer/crates/ide-ssr/src/matching.rs b/src/tools/rust-analyzer/crates/ide-ssr/src/matching.rs index cff4eede0426..b350315ba548 100644 --- a/src/tools/rust-analyzer/crates/ide-ssr/src/matching.rs +++ b/src/tools/rust-analyzer/crates/ide-ssr/src/matching.rs @@ -84,12 +84,12 @@ pub(crate) struct MatchFailed { /// Checks if `code` matches the search pattern found in `search_scope`, returning information about /// the match, if it does. Since we only do matching in this module and searching is done by the /// parent module, we don't populate nested matches. -pub(crate) fn get_match( +pub(crate) fn get_match<'db>( debug_active: bool, - rule: &ResolvedRule, + rule: &ResolvedRule<'db>, code: &SyntaxNode, restrict_range: &Option, - sema: &Semantics<'_, ide_db::RootDatabase>, + sema: &Semantics<'db, ide_db::RootDatabase>, ) -> Result { record_match_fails_reasons_scope(debug_active, || { Matcher::try_match(rule, code, restrict_range, sema) @@ -102,7 +102,7 @@ struct Matcher<'db, 'sema> { /// If any placeholders come from anywhere outside of this range, then the match will be /// rejected. restrict_range: Option, - rule: &'sema ResolvedRule, + rule: &'sema ResolvedRule<'db>, } /// Which phase of matching we're currently performing. We do two phases because most attempted @@ -117,7 +117,7 @@ enum Phase<'a> { impl<'db, 'sema> Matcher<'db, 'sema> { fn try_match( - rule: &ResolvedRule, + rule: &ResolvedRule<'db>, code: &SyntaxNode, restrict_range: &Option, sema: &'sema Semantics<'db, ide_db::RootDatabase>, @@ -535,7 +535,7 @@ impl<'db, 'sema> Matcher<'db, 'sema> { fn attempt_match_ufcs_to_method_call( &self, phase: &mut Phase<'_>, - pattern_ufcs: &UfcsCallInfo, + pattern_ufcs: &UfcsCallInfo<'db>, code: &ast::MethodCallExpr, ) -> Result<(), MatchFailed> { use ast::HasArgList; @@ -597,7 +597,7 @@ impl<'db, 'sema> Matcher<'db, 'sema> { fn attempt_match_ufcs_to_ufcs( &self, phase: &mut Phase<'_>, - pattern_ufcs: &UfcsCallInfo, + pattern_ufcs: &UfcsCallInfo<'db>, code: &ast::CallExpr, ) -> Result<(), MatchFailed> { use ast::HasArgList; @@ -615,7 +615,7 @@ impl<'db, 'sema> Matcher<'db, 'sema> { /// times. Returns the number of times it needed to be dereferenced. fn check_expr_type( &self, - pattern_type: &hir::Type, + pattern_type: &hir::Type<'db>, expr: &ast::Expr, ) -> Result { use hir::HirDisplay; @@ -656,10 +656,10 @@ impl<'db, 'sema> Matcher<'db, 'sema> { } impl Match { - fn render_template_paths( + fn render_template_paths<'db>( &mut self, - template: &ResolvedPattern, - sema: &Semantics<'_, ide_db::RootDatabase>, + template: &ResolvedPattern<'db>, + sema: &Semantics<'db, ide_db::RootDatabase>, ) -> Result<(), MatchFailed> { let module = sema .scope(&self.matched_node) diff --git a/src/tools/rust-analyzer/crates/ide-ssr/src/replacing.rs b/src/tools/rust-analyzer/crates/ide-ssr/src/replacing.rs index 3c92697926f3..752edd6535a6 100644 --- a/src/tools/rust-analyzer/crates/ide-ssr/src/replacing.rs +++ b/src/tools/rust-analyzer/crates/ide-ssr/src/replacing.rs @@ -14,21 +14,21 @@ use crate::{Match, SsrMatches, fragments, resolving::ResolvedRule}; /// Returns a text edit that will replace each match in `matches` with its corresponding replacement /// template. Placeholders in the template will have been substituted with whatever they matched to /// in the original code. -pub(crate) fn matches_to_edit( - db: &dyn hir::db::ExpandDatabase, +pub(crate) fn matches_to_edit<'db>( + db: &'db dyn hir::db::ExpandDatabase, matches: &SsrMatches, file_src: &str, - rules: &[ResolvedRule], + rules: &[ResolvedRule<'db>], ) -> TextEdit { matches_to_edit_at_offset(db, matches, file_src, 0.into(), rules) } -fn matches_to_edit_at_offset( - db: &dyn hir::db::ExpandDatabase, +fn matches_to_edit_at_offset<'db>( + db: &'db dyn hir::db::ExpandDatabase, matches: &SsrMatches, file_src: &str, relative_start: TextSize, - rules: &[ResolvedRule], + rules: &[ResolvedRule<'db>], ) -> TextEdit { let mut edit_builder = TextEdit::builder(); for m in &matches.matches { @@ -40,12 +40,12 @@ fn matches_to_edit_at_offset( edit_builder.finish() } -struct ReplacementRenderer<'a> { - db: &'a dyn hir::db::ExpandDatabase, +struct ReplacementRenderer<'a, 'db> { + db: &'db dyn hir::db::ExpandDatabase, match_info: &'a Match, file_src: &'a str, - rules: &'a [ResolvedRule], - rule: &'a ResolvedRule, + rules: &'a [ResolvedRule<'db>], + rule: &'a ResolvedRule<'db>, out: String, // Map from a range within `out` to a token in `template` that represents a placeholder. This is // used to validate that the generated source code doesn't split any placeholder expansions (see @@ -58,11 +58,11 @@ struct ReplacementRenderer<'a> { edition: Edition, } -fn render_replace( - db: &dyn hir::db::ExpandDatabase, +fn render_replace<'db>( + db: &'db dyn hir::db::ExpandDatabase, match_info: &Match, file_src: &str, - rules: &[ResolvedRule], + rules: &[ResolvedRule<'db>], edition: Edition, ) -> String { let rule = &rules[match_info.rule_index]; @@ -89,7 +89,7 @@ fn render_replace( renderer.out } -impl ReplacementRenderer<'_> { +impl<'db> ReplacementRenderer<'_, 'db> { fn render_node_children(&mut self, node: &SyntaxNode) { for node_or_token in node.children_with_tokens() { self.render_node_or_token(&node_or_token); diff --git a/src/tools/rust-analyzer/crates/ide-ssr/src/resolving.rs b/src/tools/rust-analyzer/crates/ide-ssr/src/resolving.rs index a687db4bf58d..8f28a1cd3a62 100644 --- a/src/tools/rust-analyzer/crates/ide-ssr/src/resolving.rs +++ b/src/tools/rust-analyzer/crates/ide-ssr/src/resolving.rs @@ -15,18 +15,18 @@ pub(crate) struct ResolutionScope<'db> { node: SyntaxNode, } -pub(crate) struct ResolvedRule { - pub(crate) pattern: ResolvedPattern, - pub(crate) template: Option, +pub(crate) struct ResolvedRule<'db> { + pub(crate) pattern: ResolvedPattern<'db>, + pub(crate) template: Option>, pub(crate) index: usize, } -pub(crate) struct ResolvedPattern { +pub(crate) struct ResolvedPattern<'db> { pub(crate) placeholders_by_stand_in: FxHashMap, pub(crate) node: SyntaxNode, // Paths in `node` that we've resolved. pub(crate) resolved_paths: FxHashMap, - pub(crate) ufcs_function_calls: FxHashMap, + pub(crate) ufcs_function_calls: FxHashMap>, pub(crate) contains_self: bool, } @@ -36,18 +36,18 @@ pub(crate) struct ResolvedPath { pub(crate) depth: u32, } -pub(crate) struct UfcsCallInfo { +pub(crate) struct UfcsCallInfo<'db> { pub(crate) call_expr: ast::CallExpr, pub(crate) function: hir::Function, - pub(crate) qualifier_type: Option, + pub(crate) qualifier_type: Option>, } -impl ResolvedRule { +impl<'db> ResolvedRule<'db> { pub(crate) fn new( rule: parsing::ParsedRule, - resolution_scope: &ResolutionScope<'_>, + resolution_scope: &ResolutionScope<'db>, index: usize, - ) -> Result { + ) -> Result, SsrError> { let resolver = Resolver { resolution_scope, placeholders_by_stand_in: rule.placeholders_by_stand_in }; let resolved_template = match rule.template { @@ -74,8 +74,8 @@ struct Resolver<'a, 'db> { placeholders_by_stand_in: FxHashMap, } -impl Resolver<'_, '_> { - fn resolve_pattern_tree(&self, pattern: SyntaxNode) -> Result { +impl<'db> Resolver<'_, 'db> { + fn resolve_pattern_tree(&self, pattern: SyntaxNode) -> Result, SsrError> { use syntax::ast::AstNode; use syntax::{SyntaxElement, T}; let mut resolved_paths = FxHashMap::default(); @@ -250,7 +250,7 @@ impl<'db> ResolutionScope<'db> { } } - fn qualifier_type(&self, path: &SyntaxNode) -> Option { + fn qualifier_type(&self, path: &SyntaxNode) -> Option> { use syntax::ast::AstNode; if let Some(path) = ast::Path::cast(path.clone()) { if let Some(qualifier) = path.qualifier() { diff --git a/src/tools/rust-analyzer/crates/ide-ssr/src/search.rs b/src/tools/rust-analyzer/crates/ide-ssr/src/search.rs index 9afbedbb1ab4..99a98fb2a713 100644 --- a/src/tools/rust-analyzer/crates/ide-ssr/src/search.rs +++ b/src/tools/rust-analyzer/crates/ide-ssr/src/search.rs @@ -21,13 +21,13 @@ pub(crate) struct UsageCache { usages: Vec<(Definition, UsageSearchResult)>, } -impl MatchFinder<'_> { +impl<'db> MatchFinder<'db> { /// Adds all matches for `rule` to `matches_out`. Matches may overlap in ways that make /// replacement impossible, so further processing is required in order to properly nest matches /// and remove overlapping matches. This is done in the `nesting` module. pub(crate) fn find_matches_for_rule( &self, - rule: &ResolvedRule, + rule: &ResolvedRule<'db>, usage_cache: &mut UsageCache, matches_out: &mut Vec, ) { @@ -49,8 +49,8 @@ impl MatchFinder<'_> { fn find_matches_for_pattern_tree( &self, - rule: &ResolvedRule, - pattern: &ResolvedPattern, + rule: &ResolvedRule<'db>, + pattern: &ResolvedPattern<'db>, usage_cache: &mut UsageCache, matches_out: &mut Vec, ) { @@ -144,7 +144,7 @@ impl MatchFinder<'_> { SearchScope::files(&files) } - fn slow_scan(&self, rule: &ResolvedRule, matches_out: &mut Vec) { + fn slow_scan(&self, rule: &ResolvedRule<'db>, matches_out: &mut Vec) { self.search_files_do(|file_id| { let file = self.sema.parse_guess_edition(file_id); let code = file.syntax(); @@ -177,7 +177,7 @@ impl MatchFinder<'_> { fn slow_scan_node( &self, code: &SyntaxNode, - rule: &ResolvedRule, + rule: &ResolvedRule<'db>, restrict_range: &Option, matches_out: &mut Vec, ) { @@ -206,7 +206,7 @@ impl MatchFinder<'_> { fn try_add_match( &self, - rule: &ResolvedRule, + rule: &ResolvedRule<'db>, code: &SyntaxNode, restrict_range: &Option, matches_out: &mut Vec, @@ -274,7 +274,7 @@ impl UsageCache { /// Returns a path that's suitable for path resolution. We exclude builtin types, since they aren't /// something that we can find references to. We then somewhat arbitrarily pick the path that is the /// longest as this is hopefully more likely to be less common, making it faster to find. -fn pick_path_for_usages(pattern: &ResolvedPattern) -> Option<&ResolvedPath> { +fn pick_path_for_usages<'a>(pattern: &'a ResolvedPattern<'_>) -> Option<&'a ResolvedPath> { // FIXME: Take the scope of the resolved path into account. e.g. if there are any paths that are // private to the current module, then we definitely would want to pick them over say a path // from std. Possibly we should go further than this and intersect the search scopes for all diff --git a/src/tools/rust-analyzer/crates/ide/src/goto_implementation.rs b/src/tools/rust-analyzer/crates/ide/src/goto_implementation.rs index 1bc28f28b6f5..02d96a647328 100644 --- a/src/tools/rust-analyzer/crates/ide/src/goto_implementation.rs +++ b/src/tools/rust-analyzer/crates/ide/src/goto_implementation.rs @@ -83,7 +83,7 @@ pub(crate) fn goto_implementation( Some(RangeInfo { range, info: navs }) } -fn impls_for_ty(sema: &Semantics<'_, RootDatabase>, ty: hir::Type) -> Vec { +fn impls_for_ty(sema: &Semantics<'_, RootDatabase>, ty: hir::Type<'_>) -> Vec { Impl::all_for_type(sema.db, ty) .into_iter() .filter_map(|imp| imp.try_to_nav(sema.db)) diff --git a/src/tools/rust-analyzer/crates/ide/src/goto_type_definition.rs b/src/tools/rust-analyzer/crates/ide/src/goto_type_definition.rs index 9781e7116dec..86d72fefe05e 100644 --- a/src/tools/rust-analyzer/crates/ide/src/goto_type_definition.rs +++ b/src/tools/rust-analyzer/crates/ide/src/goto_type_definition.rs @@ -38,7 +38,7 @@ pub(crate) fn goto_type_definition( } } }; - let mut process_ty = |ty: hir::Type| { + let mut process_ty = |ty: hir::Type<'_>| { // collect from each `ty` into the `res` result vec let ty = ty.strip_references(); ty.walk(db, |t| { diff --git a/src/tools/rust-analyzer/crates/ide/src/hover.rs b/src/tools/rust-analyzer/crates/ide/src/hover.rs index 5404a9dc2cec..e4d6279759ed 100644 --- a/src/tools/rust-analyzer/crates/ide/src/hover.rs +++ b/src/tools/rust-analyzer/crates/ide/src/hover.rs @@ -426,7 +426,7 @@ pub(crate) fn hover_for_definition( sema: &Semantics<'_, RootDatabase>, file_id: FileId, def: Definition, - subst: Option, + subst: Option>, scope_node: &SyntaxNode, macro_arm: Option, render_extras: bool, @@ -483,10 +483,10 @@ pub(crate) fn hover_for_definition( } } -fn notable_traits( - db: &RootDatabase, - ty: &hir::Type, -) -> Vec<(hir::Trait, Vec<(Option, hir::Name)>)> { +fn notable_traits<'db>( + db: &'db RootDatabase, + ty: &hir::Type<'db>, +) -> Vec<(hir::Trait, Vec<(Option>, hir::Name)>)> { db.notable_traits_in_deps(ty.krate(db).into()) .iter() .flat_map(|it| &**it) @@ -567,8 +567,8 @@ fn runnable_action( fn goto_type_action_for_def( db: &RootDatabase, def: Definition, - notable_traits: &[(hir::Trait, Vec<(Option, hir::Name)>)], - subst_types: Option>, + notable_traits: &[(hir::Trait, Vec<(Option>, hir::Name)>)], + subst_types: Option)>>, edition: Edition, ) -> Option { let mut targets: Vec = Vec::new(); @@ -622,7 +622,7 @@ fn goto_type_action_for_def( fn walk_and_push_ty( db: &RootDatabase, - ty: &hir::Type, + ty: &hir::Type<'_>, push_new_def: &mut dyn FnMut(hir::ModuleDef), ) { ty.walk(db, |t| { diff --git a/src/tools/rust-analyzer/crates/ide/src/hover/render.rs b/src/tools/rust-analyzer/crates/ide/src/hover/render.rs index c24864a18bdf..670210d4998d 100644 --- a/src/tools/rust-analyzer/crates/ide/src/hover/render.rs +++ b/src/tools/rust-analyzer/crates/ide/src/hover/render.rs @@ -476,10 +476,10 @@ pub(super) fn definition( db: &RootDatabase, def: Definition, famous_defs: Option<&FamousDefs<'_, '_>>, - notable_traits: &[(Trait, Vec<(Option, Name)>)], + notable_traits: &[(Trait, Vec<(Option>, Name)>)], macro_arm: Option, render_extras: bool, - subst_types: Option<&Vec<(Symbol, Type)>>, + subst_types: Option<&Vec<(Symbol, Type<'_>)>>, config: &HoverConfig, edition: Edition, display_target: DisplayTarget, @@ -938,7 +938,7 @@ pub(super) fn literal( fn render_notable_trait( db: &RootDatabase, - notable_traits: &[(Trait, Vec<(Option, Name)>)], + notable_traits: &[(Trait, Vec<(Option>, Name)>)], edition: Edition, display_target: DisplayTarget, ) -> Option { @@ -979,7 +979,7 @@ fn render_notable_trait( fn type_info( sema: &Semantics<'_, RootDatabase>, config: &HoverConfig, - ty: TypeInfo, + ty: TypeInfo<'_>, edition: Edition, display_target: DisplayTarget, ) -> Option { @@ -1038,7 +1038,7 @@ fn type_info( fn closure_ty( sema: &Semantics<'_, RootDatabase>, config: &HoverConfig, - TypeInfo { original, adjusted }: &TypeInfo, + TypeInfo { original, adjusted }: &TypeInfo<'_>, edition: Edition, display_target: DisplayTarget, ) -> Option { diff --git a/src/tools/rust-analyzer/crates/ide/src/inlay_hints.rs b/src/tools/rust-analyzer/crates/ide/src/inlay_hints.rs index b094b098462f..19e5509681aa 100644 --- a/src/tools/rust-analyzer/crates/ide/src/inlay_hints.rs +++ b/src/tools/rust-analyzer/crates/ide/src/inlay_hints.rs @@ -722,14 +722,14 @@ impl InlayHintLabelBuilder<'_> { fn label_of_ty( famous_defs @ FamousDefs(sema, _): &FamousDefs<'_, '_>, config: &InlayHintsConfig, - ty: &hir::Type, + ty: &hir::Type<'_>, display_target: DisplayTarget, ) -> Option { fn rec( sema: &Semantics<'_, RootDatabase>, famous_defs: &FamousDefs<'_, '_>, mut max_length: Option, - ty: &hir::Type, + ty: &hir::Type<'_>, label_builder: &mut InlayHintLabelBuilder<'_>, config: &InlayHintsConfig, display_target: DisplayTarget, @@ -788,11 +788,11 @@ fn label_of_ty( } /// Checks if the type is an Iterator from std::iter and returns the iterator trait and the item type of the concrete iterator. -fn hint_iterator( - sema: &Semantics<'_, RootDatabase>, - famous_defs: &FamousDefs<'_, '_>, - ty: &hir::Type, -) -> Option<(hir::Trait, hir::TypeAlias, hir::Type)> { +fn hint_iterator<'db>( + sema: &Semantics<'db, RootDatabase>, + famous_defs: &FamousDefs<'_, 'db>, + ty: &hir::Type<'db>, +) -> Option<(hir::Trait, hir::TypeAlias, hir::Type<'db>)> { let db = sema.db; let strukt = ty.strip_references().as_adt()?; let krate = strukt.module(db).krate(); @@ -826,7 +826,7 @@ fn ty_to_text_edit( sema: &Semantics<'_, RootDatabase>, config: &InlayHintsConfig, node_for_hint: &SyntaxNode, - ty: &hir::Type, + ty: &hir::Type<'_>, offset_to_insert_ty: TextSize, additional_edits: &dyn Fn(&mut TextEditBuilder), prefix: impl Into, diff --git a/src/tools/rust-analyzer/crates/ide/src/inlay_hints/param_name.rs b/src/tools/rust-analyzer/crates/ide/src/inlay_hints/param_name.rs index 5ff9fee60abf..5174228466c0 100644 --- a/src/tools/rust-analyzer/crates/ide/src/inlay_hints/param_name.rs +++ b/src/tools/rust-analyzer/crates/ide/src/inlay_hints/param_name.rs @@ -87,10 +87,10 @@ pub(super) fn hints( Some(()) } -fn get_callable( - sema: &Semantics<'_, RootDatabase>, +fn get_callable<'db>( + sema: &Semantics<'db, RootDatabase>, expr: &ast::Expr, -) -> Option<(hir::Callable, ast::ArgList)> { +) -> Option<(hir::Callable<'db>, ast::ArgList)> { match expr { ast::Expr::CallExpr(expr) => { let descended = sema.descend_node_into_attributes(expr.clone()).pop(); diff --git a/src/tools/rust-analyzer/crates/ide/src/lib.rs b/src/tools/rust-analyzer/crates/ide/src/lib.rs index 82dbcde4c06f..b3b8deb61fc0 100644 --- a/src/tools/rust-analyzer/crates/ide/src/lib.rs +++ b/src/tools/rust-analyzer/crates/ide/src/lib.rs @@ -409,7 +409,7 @@ impl Analysis { self.with_db(|db| typing::on_enter(db, position)) } - pub const SUPPORTED_TRIGGER_CHARS: &'static str = typing::TRIGGER_CHARS; + pub const SUPPORTED_TRIGGER_CHARS: &[char] = typing::TRIGGER_CHARS; /// Returns an edit which should be applied after a character was typed. /// @@ -421,7 +421,7 @@ impl Analysis { char_typed: char, ) -> Cancellable> { // Fast path to not even parse the file. - if !typing::TRIGGER_CHARS.contains(char_typed) { + if !typing::TRIGGER_CHARS.contains(&char_typed) { return Ok(None); } diff --git a/src/tools/rust-analyzer/crates/ide/src/signature_help.rs b/src/tools/rust-analyzer/crates/ide/src/signature_help.rs index 0e17b3559074..e30a3ebefb98 100644 --- a/src/tools/rust-analyzer/crates/ide/src/signature_help.rs +++ b/src/tools/rust-analyzer/crates/ide/src/signature_help.rs @@ -278,7 +278,7 @@ fn signature_help_for_call( } res.signature.push(')'); - let mut render = |ret_type: hir::Type| { + let mut render = |ret_type: hir::Type<'_>| { if !ret_type.is_unit() { format_to!(res.signature, " -> {}", ret_type.display(db, display_target)); } @@ -597,11 +597,11 @@ fn signature_help_for_tuple_expr( Some(res) } -fn signature_help_for_record_( - sema: &Semantics<'_, RootDatabase>, +fn signature_help_for_record_<'db>( + sema: &Semantics<'db, RootDatabase>, field_list_children: SyntaxElementChildren, path: &ast::Path, - fields2: impl Iterator, + fields2: impl Iterator)>, token: SyntaxToken, edition: Edition, display_target: DisplayTarget, @@ -689,13 +689,13 @@ fn signature_help_for_record_( Some(res) } -fn signature_help_for_tuple_pat_ish( - db: &RootDatabase, +fn signature_help_for_tuple_pat_ish<'db>( + db: &'db RootDatabase, mut res: SignatureHelp, pat: &SyntaxNode, token: SyntaxToken, mut field_pats: AstChildren, - fields: impl ExactSizeIterator, + fields: impl ExactSizeIterator>, display_target: DisplayTarget, ) -> SignatureHelp { let rest_pat = field_pats.find(|it| matches!(it, ast::Pat::RestPat(_))); diff --git a/src/tools/rust-analyzer/crates/ide/src/typing.rs b/src/tools/rust-analyzer/crates/ide/src/typing.rs index 4df7e25223d9..ed55ac5bf04b 100644 --- a/src/tools/rust-analyzer/crates/ide/src/typing.rs +++ b/src/tools/rust-analyzer/crates/ide/src/typing.rs @@ -15,6 +15,7 @@ mod on_enter; +use either::Either; use hir::EditionedFileId; use ide_db::{FilePosition, RootDatabase, base_db::RootQueryDb}; use span::Edition; @@ -33,7 +34,7 @@ use crate::SourceChange; pub(crate) use on_enter::on_enter; // Don't forget to add new trigger characters to `server_capabilities` in `caps.rs`. -pub(crate) const TRIGGER_CHARS: &str = ".=<>{(|"; +pub(crate) const TRIGGER_CHARS: &[char] = &['.', '=', '<', '>', '{', '(', '|', '+']; struct ExtendedTextEdit { edit: TextEdit, @@ -66,7 +67,7 @@ pub(crate) fn on_char_typed( position: FilePosition, char_typed: char, ) -> Option { - if !stdx::always!(TRIGGER_CHARS.contains(char_typed)) { + if !TRIGGER_CHARS.contains(&char_typed) { return None; } // FIXME: We need to figure out the edition of the file here, but that means hitting the @@ -101,6 +102,7 @@ fn on_char_typed_( '>' => on_right_angle_typed(&file.tree(), offset), '{' | '(' | '<' => on_opening_delimiter_typed(file, offset, char_typed, edition), '|' => on_pipe_typed(&file.tree(), offset), + '+' => on_plus_typed(&file.tree(), offset), _ => None, } .map(conv) @@ -402,6 +404,28 @@ fn on_pipe_typed(file: &SourceFile, offset: TextSize) -> Option { Some(TextEdit::insert(after_lpipe, "|".to_owned())) } +fn on_plus_typed(file: &SourceFile, offset: TextSize) -> Option { + let plus_token = file.syntax().token_at_offset(offset).right_biased()?; + if plus_token.kind() != SyntaxKind::PLUS { + return None; + } + let mut ancestors = plus_token.parent_ancestors(); + ancestors.next().and_then(ast::TypeBoundList::cast)?; + let trait_type = + ancestors.next().and_then(>::cast)?; + let kind = ancestors.next()?.kind(); + + if ast::RefType::can_cast(kind) || ast::PtrType::can_cast(kind) || ast::RetType::can_cast(kind) + { + let mut builder = TextEdit::builder(); + builder.insert(trait_type.syntax().text_range().start(), "(".to_owned()); + builder.insert(trait_type.syntax().text_range().end(), ")".to_owned()); + Some(builder.finish()) + } else { + None + } +} + /// Adds a space after an arrow when `fn foo() { ... }` is turned into `fn foo() -> { ... }` fn on_right_angle_typed(file: &SourceFile, offset: TextSize) -> Option { let file_text = file.syntax().text(); @@ -1594,6 +1618,66 @@ fn foo() { fn foo() { let $0 } +"#, + ); + } + + #[test] + fn adds_parentheses_around_trait_object_in_ref_type() { + type_char( + '+', + r#" +fn foo(x: &dyn A$0) {} +"#, + r#" +fn foo(x: &(dyn A+)) {} +"#, + ); + type_char( + '+', + r#" +fn foo(x: &'static dyn A$0B) {} +"#, + r#" +fn foo(x: &'static (dyn A+B)) {} +"#, + ); + type_char_noop( + '+', + r#" +fn foo(x: &(dyn A$0)) {} +"#, + ); + type_char_noop( + '+', + r#" +fn foo(x: Box) {} +"#, + ); + } + + #[test] + fn adds_parentheses_around_trait_object_in_ptr_type() { + type_char( + '+', + r#" +fn foo(x: *const dyn A$0) {} +"#, + r#" +fn foo(x: *const (dyn A+)) {} +"#, + ); + } + + #[test] + fn adds_parentheses_around_trait_object_in_return_type() { + type_char( + '+', + r#" +fn foo(x: fn() -> dyn A$0) {} +"#, + r#" +fn foo(x: fn() -> (dyn A+)) {} "#, ); } diff --git a/src/tools/rust-analyzer/crates/ide/src/view_memory_layout.rs b/src/tools/rust-analyzer/crates/ide/src/view_memory_layout.rs index 140ae4265be7..63701a4d15e9 100644 --- a/src/tools/rust-analyzer/crates/ide/src/view_memory_layout.rs +++ b/src/tools/rust-analyzer/crates/ide/src/view_memory_layout.rs @@ -107,7 +107,7 @@ pub(crate) fn view_memory_layout( fn read_layout( nodes: &mut Vec, db: &RootDatabase, - ty: &Type, + ty: &Type<'_>, layout: &Layout, parent_idx: usize, display_target: DisplayTarget, diff --git a/src/tools/rust-analyzer/crates/intern/src/symbol/symbols.rs b/src/tools/rust-analyzer/crates/intern/src/symbol/symbols.rs index d5cbb7328c14..adc581309d15 100644 --- a/src/tools/rust-analyzer/crates/intern/src/symbol/symbols.rs +++ b/src/tools/rust-analyzer/crates/intern/src/symbol/symbols.rs @@ -496,6 +496,7 @@ define_symbols! { vectorcall, wasm, win64, + args, array, boxed_slice, completions, diff --git a/src/tools/rust-analyzer/crates/load-cargo/src/lib.rs b/src/tools/rust-analyzer/crates/load-cargo/src/lib.rs index 89b8631cd25f..52f59679b587 100644 --- a/src/tools/rust-analyzer/crates/load-cargo/src/lib.rs +++ b/src/tools/rust-analyzer/crates/load-cargo/src/lib.rs @@ -42,7 +42,7 @@ pub fn load_workspace_at( root: &Path, cargo_config: &CargoConfig, load_config: &LoadCargoConfig, - progress: &dyn Fn(String), + progress: &(dyn Fn(String) + Sync), ) -> anyhow::Result<(RootDatabase, vfs::Vfs, Option)> { let root = AbsPathBuf::assert_utf8(std::env::current_dir()?.join(root)); let root = ProjectManifest::discover_single(&root)?; diff --git a/src/tools/rust-analyzer/crates/project-model/src/build_dependencies.rs b/src/tools/rust-analyzer/crates/project-model/src/build_dependencies.rs index e0c38ccf3331..4435376eab62 100644 --- a/src/tools/rust-analyzer/crates/project-model/src/build_dependencies.rs +++ b/src/tools/rust-analyzer/crates/project-model/src/build_dependencies.rs @@ -20,7 +20,9 @@ use toolchain::Tool; use crate::{ CargoConfig, CargoFeatures, CargoWorkspace, InvocationStrategy, ManifestPath, Package, Sysroot, - TargetKind, utf8_stdout, + TargetKind, + toolchain_info::{QueryConfig, version}, + utf8_stdout, }; /// Output of the build script and proc-macro building steps for a workspace. @@ -446,10 +448,30 @@ impl WorkspaceBuildScripts { } }; - if config.wrap_rustc_in_build_scripts { + // If [`--compile-time-deps` flag](https://github.com/rust-lang/cargo/issues/14434) is + // available in current toolchain's cargo, use it to build compile time deps only. + const COMP_TIME_DEPS_MIN_TOOLCHAIN_VERSION: semver::Version = semver::Version { + major: 1, + minor: 90, + patch: 0, + pre: semver::Prerelease::EMPTY, + build: semver::BuildMetadata::EMPTY, + }; + + let query_config = QueryConfig::Cargo(sysroot, manifest_path); + let toolchain = version::get(query_config, &config.extra_env).ok().flatten(); + let cargo_comp_time_deps_available = + toolchain.is_some_and(|v| v >= COMP_TIME_DEPS_MIN_TOOLCHAIN_VERSION); + + if cargo_comp_time_deps_available { + cmd.env("__CARGO_TEST_CHANNEL_OVERRIDE_DO_NOT_USE_THIS", "nightly"); + cmd.arg("-Zunstable-options"); + cmd.arg("--compile-time-deps"); + } else if config.wrap_rustc_in_build_scripts { // Setup RUSTC_WRAPPER to point to `rust-analyzer` binary itself. We use // that to compile only proc macros and build scripts during the initial // `cargo check`. + // We don't need this if we are using `--compile-time-deps` flag. let myself = std::env::current_exe()?; cmd.env("RUSTC_WRAPPER", myself); cmd.env("RA_RUSTC_WRAPPER", "1"); diff --git a/src/tools/rust-analyzer/crates/project-model/src/cargo_workspace.rs b/src/tools/rust-analyzer/crates/project-model/src/cargo_workspace.rs index 1fade7b33233..58507418e4d3 100644 --- a/src/tools/rust-analyzer/crates/project-model/src/cargo_workspace.rs +++ b/src/tools/rust-analyzer/crates/project-model/src/cargo_workspace.rs @@ -7,16 +7,25 @@ use anyhow::Context; use base_db::Env; use cargo_metadata::{CargoOpt, MetadataCommand}; use la_arena::{Arena, Idx}; -use paths::{AbsPath, AbsPathBuf, Utf8PathBuf}; +use paths::{AbsPath, AbsPathBuf, Utf8Path, Utf8PathBuf}; use rustc_hash::{FxHashMap, FxHashSet}; use serde_derive::Deserialize; use serde_json::from_value; use span::Edition; +use stdx::process::spawn_with_streaming_output; use toolchain::Tool; use crate::{CfgOverrides, InvocationStrategy}; use crate::{ManifestPath, Sysroot}; +const MINIMUM_TOOLCHAIN_VERSION_SUPPORTING_LOCKFILE_PATH: semver::Version = semver::Version { + major: 1, + minor: 82, + patch: 0, + pre: semver::Prerelease::EMPTY, + build: semver::BuildMetadata::EMPTY, +}; + /// [`CargoWorkspace`] represents the logical structure of, well, a Cargo /// workspace. It pretty closely mirrors `cargo metadata` output. /// @@ -290,6 +299,13 @@ pub struct CargoMetadataConfig { pub extra_args: Vec, /// Extra env vars to set when invoking the cargo command pub extra_env: FxHashMap>, + /// The target dir for this workspace load. + pub target_dir: Utf8PathBuf, + /// What kind of metadata are we fetching: workspace, rustc, or sysroot. + pub kind: &'static str, + /// The toolchain version, if known. + /// Used to conditionally enable unstable cargo features. + pub toolchain_version: Option, } // Deserialize helper for the cargo metadata @@ -382,28 +398,74 @@ impl CargoWorkspace { config.targets.iter().flat_map(|it| ["--filter-platform".to_owned(), it.clone()]), ); } - // The manifest is a rust file, so this means its a script manifest - if cargo_toml.is_rust_manifest() { - // Deliberately don't set up RUSTC_BOOTSTRAP or a nightly override here, the user should - // opt into it themselves. - other_options.push("-Zscript".to_owned()); - } - if locked { - other_options.push("--locked".to_owned()); - } if no_deps { other_options.push("--no-deps".to_owned()); } + + let mut using_lockfile_copy = false; + // The manifest is a rust file, so this means its a script manifest + if cargo_toml.is_rust_manifest() { + other_options.push("-Zscript".to_owned()); + } else if config + .toolchain_version + .as_ref() + .is_some_and(|v| *v >= MINIMUM_TOOLCHAIN_VERSION_SUPPORTING_LOCKFILE_PATH) + { + let lockfile = <_ as AsRef>::as_ref(cargo_toml).with_extension("lock"); + let target_lockfile = config + .target_dir + .join("rust-analyzer") + .join("metadata") + .join(config.kind) + .join("Cargo.lock"); + match std::fs::copy(&lockfile, &target_lockfile) { + Ok(_) => { + using_lockfile_copy = true; + other_options.push("--lockfile-path".to_owned()); + other_options.push(target_lockfile.to_string()); + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + // There exists no lockfile yet + using_lockfile_copy = true; + other_options.push("--lockfile-path".to_owned()); + other_options.push(target_lockfile.to_string()); + } + Err(e) => { + tracing::warn!( + "Failed to copy lock file from `{lockfile}` to `{target_lockfile}`: {e}", + ); + } + } + } + if using_lockfile_copy { + other_options.push("-Zunstable-options".to_owned()); + meta.env("RUSTC_BOOTSTRAP", "1"); + } + // No need to lock it if we copied the lockfile, we won't modify the original after all/ + // This way cargo cannot error out on us if the lockfile requires updating. + if !using_lockfile_copy && locked { + other_options.push("--locked".to_owned()); + } meta.other_options(other_options); // FIXME: Fetching metadata is a slow process, as it might require // calling crates.io. We should be reporting progress here, but it's // unclear whether cargo itself supports it. - progress("metadata".to_owned()); + progress("cargo metadata: started".to_owned()); - (|| -> anyhow::Result<(_, _)> { - let output = meta.cargo_command().output()?; + let res = (|| -> anyhow::Result<(_, _)> { + let mut errored = false; + let output = + spawn_with_streaming_output(meta.cargo_command(), &mut |_| (), &mut |line| { + errored = errored || line.starts_with("error") || line.starts_with("warning"); + if errored { + progress("cargo metadata: ?".to_owned()); + return; + } + progress(format!("cargo metadata: {line}")); + })?; if !output.status.success() { + progress(format!("cargo metadata: failed {}", output.status)); let error = cargo_metadata::Error::CargoMetadata { stderr: String::from_utf8(output.stderr)?, } @@ -416,8 +478,8 @@ impl CargoWorkspace { current_dir, config, sysroot, - locked, true, + locked, progress, ) { return Ok((metadata, Some(error))); @@ -431,7 +493,9 @@ impl CargoWorkspace { .ok_or(cargo_metadata::Error::NoJson)?; Ok((cargo_metadata::MetadataCommand::parse(stdout)?, None)) })() - .with_context(|| format!("Failed to run `{:?}`", meta.cargo_command())) + .with_context(|| format!("Failed to run `{:?}`", meta.cargo_command())); + progress("cargo metadata: finished".to_owned()); + res } pub fn new( diff --git a/src/tools/rust-analyzer/crates/project-model/src/manifest_path.rs b/src/tools/rust-analyzer/crates/project-model/src/manifest_path.rs index 4f43be2f38fd..fba8cc9709d1 100644 --- a/src/tools/rust-analyzer/crates/project-model/src/manifest_path.rs +++ b/src/tools/rust-analyzer/crates/project-model/src/manifest_path.rs @@ -1,7 +1,7 @@ //! See [`ManifestPath`]. use std::{borrow::Borrow, fmt, ops}; -use paths::{AbsPath, AbsPathBuf}; +use paths::{AbsPath, AbsPathBuf, Utf8Path}; /// More or less [`AbsPathBuf`] with non-None parent. /// @@ -78,6 +78,12 @@ impl AsRef for ManifestPath { } } +impl AsRef for ManifestPath { + fn as_ref(&self) -> &Utf8Path { + self.file.as_ref() + } +} + impl Borrow for ManifestPath { fn borrow(&self) -> &AbsPath { self.file.borrow() diff --git a/src/tools/rust-analyzer/crates/project-model/src/sysroot.rs b/src/tools/rust-analyzer/crates/project-model/src/sysroot.rs index ebd86e3dc48a..4b34fc007112 100644 --- a/src/tools/rust-analyzer/crates/project-model/src/sysroot.rs +++ b/src/tools/rust-analyzer/crates/project-model/src/sysroot.rs @@ -4,6 +4,7 @@ //! but we can't process `.rlib` and need source code instead. The source code //! is typically installed with `rustup component add rust-src` command. +use core::fmt; use std::{env, fs, ops::Not, path::Path, process::Command}; use anyhow::{Result, format_err}; @@ -34,6 +35,19 @@ pub enum RustLibSrcWorkspace { Empty, } +impl fmt::Display for RustLibSrcWorkspace { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RustLibSrcWorkspace::Workspace(ws) => write!(f, "workspace {}", ws.workspace_root()), + RustLibSrcWorkspace::Json(json) => write!(f, "json {}", json.manifest_or_root()), + RustLibSrcWorkspace::Stitched(stitched) => { + write!(f, "stitched with {} crates", stitched.crates.len()) + } + RustLibSrcWorkspace::Empty => write!(f, "empty"), + } + } +} + impl Sysroot { pub const fn empty() -> Sysroot { Sysroot { @@ -195,6 +209,8 @@ impl Sysroot { pub fn load_workspace( &self, sysroot_source_config: &RustSourceWorkspaceConfig, + current_dir: &AbsPath, + progress: &dyn Fn(String), ) -> Option { assert!(matches!(self.workspace, RustLibSrcWorkspace::Empty), "workspace already loaded"); let Self { root: _, rust_lib_src_root: Some(src_root), workspace: _, error: _ } = self @@ -204,10 +220,16 @@ impl Sysroot { if let RustSourceWorkspaceConfig::CargoMetadata(cargo_config) = sysroot_source_config { let library_manifest = ManifestPath::try_from(src_root.join("Cargo.toml")).unwrap(); if fs::metadata(&library_manifest).is_ok() { - if let Some(loaded) = - self.load_library_via_cargo(library_manifest, src_root, cargo_config) - { - return Some(loaded); + match self.load_library_via_cargo( + &library_manifest, + current_dir, + cargo_config, + progress, + ) { + Ok(loaded) => return Some(loaded), + Err(e) => { + tracing::error!("`cargo metadata` failed on `{library_manifest}` : {e}") + } } } tracing::debug!("Stitching sysroot library: {src_root}"); @@ -293,10 +315,11 @@ impl Sysroot { fn load_library_via_cargo( &self, - library_manifest: ManifestPath, - rust_lib_src_dir: &AbsPathBuf, + library_manifest: &ManifestPath, + current_dir: &AbsPath, cargo_config: &CargoMetadataConfig, - ) -> Option { + progress: &dyn Fn(String), + ) -> Result { tracing::debug!("Loading library metadata: {library_manifest}"); let mut cargo_config = cargo_config.clone(); // the sysroot uses `public-dependency`, so we make cargo think it's a nightly @@ -305,22 +328,16 @@ impl Sysroot { Some("nightly".to_owned()), ); - let (mut res, _) = match CargoWorkspace::fetch_metadata( - &library_manifest, - rust_lib_src_dir, + let (mut res, _) = CargoWorkspace::fetch_metadata( + library_manifest, + current_dir, &cargo_config, self, false, // Make sure we never attempt to write to the sysroot true, - &|_| (), - ) { - Ok(it) => it, - Err(e) => { - tracing::error!("`cargo metadata` failed on `{library_manifest}` : {e}"); - return None; - } - }; + progress, + )?; // Patch out `rustc-std-workspace-*` crates to point to the real crates. // This is done prior to `CrateGraph` construction to prevent de-duplication logic from failing. @@ -371,8 +388,9 @@ impl Sysroot { res.packages.remove(idx); }); - let cargo_workspace = CargoWorkspace::new(res, library_manifest, Default::default(), true); - Some(RustLibSrcWorkspace::Workspace(cargo_workspace)) + let cargo_workspace = + CargoWorkspace::new(res, library_manifest.clone(), Default::default(), true); + Ok(RustLibSrcWorkspace::Workspace(cargo_workspace)) } } diff --git a/src/tools/rust-analyzer/crates/project-model/src/tests.rs b/src/tools/rust-analyzer/crates/project-model/src/tests.rs index c69891b7463d..4f11af2d06cf 100644 --- a/src/tools/rust-analyzer/crates/project-model/src/tests.rs +++ b/src/tools/rust-analyzer/crates/project-model/src/tests.rs @@ -1,3 +1,5 @@ +use std::env::temp_dir; + use base_db::{CrateGraphBuilder, ProcMacroPaths}; use cargo_metadata::Metadata; use cfg::{CfgAtom, CfgDiff}; @@ -235,11 +237,18 @@ fn smoke_test_real_sysroot_cargo() { AbsPath::assert(Utf8Path::new(env!("CARGO_MANIFEST_DIR"))), &Default::default(), ); - let loaded_sysroot = sysroot.load_workspace(&RustSourceWorkspaceConfig::default_cargo()); + let cwd = AbsPathBuf::assert_utf8(temp_dir().join("smoke_test_real_sysroot_cargo")); + std::fs::create_dir_all(&cwd).unwrap(); + let loaded_sysroot = + sysroot.load_workspace(&RustSourceWorkspaceConfig::default_cargo(), &cwd, &|_| ()); if let Some(loaded_sysroot) = loaded_sysroot { sysroot.set_workspace(loaded_sysroot); } - assert!(matches!(sysroot.workspace(), RustLibSrcWorkspace::Workspace(_))); + assert!( + matches!(sysroot.workspace(), RustLibSrcWorkspace::Workspace(_)), + "got {}", + sysroot.workspace() + ); let project_workspace = ProjectWorkspace { kind: ProjectWorkspaceKind::Cargo { cargo: cargo_workspace, diff --git a/src/tools/rust-analyzer/crates/project-model/src/workspace.rs b/src/tools/rust-analyzer/crates/project-model/src/workspace.rs index eec0077ea6e2..a6743a32b142 100644 --- a/src/tools/rust-analyzer/crates/project-model/src/workspace.rs +++ b/src/tools/rust-analyzer/crates/project-model/src/workspace.rs @@ -12,7 +12,7 @@ use base_db::{ }; use cfg::{CfgAtom, CfgDiff, CfgOptions}; use intern::{Symbol, sym}; -use paths::{AbsPath, AbsPathBuf}; +use paths::{AbsPath, AbsPathBuf, Utf8PathBuf}; use rustc_hash::{FxHashMap, FxHashSet}; use semver::Version; use span::{Edition, FileId}; @@ -170,7 +170,7 @@ impl ProjectWorkspace { pub fn load( manifest: ProjectManifest, config: &CargoConfig, - progress: &dyn Fn(String), + progress: &(dyn Fn(String) + Sync), ) -> anyhow::Result { ProjectWorkspace::load_inner(&manifest, config, progress) .with_context(|| format!("Failed to load the project at {manifest}")) @@ -179,7 +179,7 @@ impl ProjectWorkspace { fn load_inner( manifest: &ProjectManifest, config: &CargoConfig, - progress: &dyn Fn(String), + progress: &(dyn Fn(String) + Sync), ) -> anyhow::Result { let res = match manifest { ProjectManifest::ProjectJson(project_json) => { @@ -206,9 +206,10 @@ impl ProjectWorkspace { fn load_cargo( cargo_toml: &ManifestPath, config: &CargoConfig, - progress: &dyn Fn(String), + progress: &(dyn Fn(String) + Sync), ) -> Result { progress("Discovering sysroot".to_owned()); + let workspace_dir = cargo_toml.parent(); let CargoConfig { features, rustc_source, @@ -224,15 +225,9 @@ impl ProjectWorkspace { .. } = config; let mut sysroot = match (sysroot, sysroot_src) { - (Some(RustLibSource::Discover), None) => { - Sysroot::discover(cargo_toml.parent(), extra_env) - } + (Some(RustLibSource::Discover), None) => Sysroot::discover(workspace_dir, extra_env), (Some(RustLibSource::Discover), Some(sysroot_src)) => { - Sysroot::discover_with_src_override( - cargo_toml.parent(), - extra_env, - sysroot_src.clone(), - ) + Sysroot::discover_with_src_override(workspace_dir, extra_env, sysroot_src.clone()) } (Some(RustLibSource::Path(path)), None) => { Sysroot::discover_rust_lib_src_dir(path.clone()) @@ -248,24 +243,23 @@ impl ProjectWorkspace { let toolchain_config = QueryConfig::Cargo(&sysroot, cargo_toml); let targets = target_tuple::get(toolchain_config, target.as_deref(), extra_env).unwrap_or_default(); + let toolchain = version::get(toolchain_config, extra_env) + .inspect_err(|e| { + tracing::error!(%e, + "failed fetching toolchain version for {cargo_toml:?} workspace" + ) + }) + .ok() + .flatten(); + + let target_dir = + config.target_dir.clone().unwrap_or_else(|| workspace_dir.join("target").into()); // We spawn a bunch of processes to query various information about the workspace's // toolchain and sysroot // We can speed up loading a bit by spawning all of these processes in parallel (especially // on systems were process spawning is delayed) let join = thread::scope(|s| { - let workspace_dir = cargo_toml.parent(); - let toolchain = s.spawn(|| { - version::get(toolchain_config, extra_env) - .inspect_err(|e| { - tracing::error!(%e, - "failed fetching toolchain version for {cargo_toml:?} workspace" - ) - }) - .ok() - .flatten() - }); - let rustc_cfg = s.spawn(|| { rustc_cfg::get(toolchain_config, targets.first().map(Deref::deref), extra_env) }); @@ -300,11 +294,14 @@ impl ProjectWorkspace { targets: targets.clone(), extra_args: extra_args.clone(), extra_env: extra_env.clone(), + target_dir: target_dir.clone(), + toolchain_version: toolchain.clone(), + kind: "rustc-dev" }, &sysroot, *no_deps, - false, - &|_| (), + true, + progress, ) { Ok((meta, _error)) => { let workspace = CargoWorkspace::new( @@ -343,22 +340,31 @@ impl ProjectWorkspace { targets: targets.clone(), extra_args: extra_args.clone(), extra_env: extra_env.clone(), + target_dir: target_dir.clone(), + toolchain_version: toolchain.clone(), + kind: "workspace", }, &sysroot, *no_deps, false, - &|_| (), + progress, ) }); let loaded_sysroot = s.spawn(|| { - sysroot.load_workspace(&RustSourceWorkspaceConfig::CargoMetadata( - sysroot_metadata_config(extra_env, &targets), - )) + sysroot.load_workspace( + &RustSourceWorkspaceConfig::CargoMetadata(sysroot_metadata_config( + config, + &targets, + toolchain.clone(), + target_dir.clone(), + )), + workspace_dir, + progress, + ) }); let cargo_config_extra_env = s.spawn(|| cargo_config_env(cargo_toml, extra_env, &sysroot)); thread::Result::Ok(( - toolchain.join()?, rustc_cfg.join()?, data_layout.join()?, rustc_dir.join()?, @@ -368,18 +374,11 @@ impl ProjectWorkspace { )) }); - let ( - toolchain, - rustc_cfg, - data_layout, - rustc, - loaded_sysroot, - cargo_metadata, - cargo_config_extra_env, - ) = match join { - Ok(it) => it, - Err(e) => std::panic::resume_unwind(e), - }; + let (rustc_cfg, data_layout, rustc, loaded_sysroot, cargo_metadata, cargo_config_extra_env) = + match join { + Ok(it) => it, + Err(e) => std::panic::resume_unwind(e), + }; let (meta, error) = cargo_metadata.with_context(|| { format!( @@ -388,6 +387,7 @@ impl ProjectWorkspace { })?; let cargo = CargoWorkspace::new(meta, cargo_toml.clone(), cargo_config_extra_env, false); if let Some(loaded_sysroot) = loaded_sysroot { + tracing::info!(src_root = ?sysroot.rust_lib_src_root(), root = %loaded_sysroot, "Loaded sysroot"); sysroot.set_workspace(loaded_sysroot); } @@ -411,7 +411,7 @@ impl ProjectWorkspace { pub fn load_inline( mut project_json: ProjectJson, config: &CargoConfig, - progress: &dyn Fn(String), + progress: &(dyn Fn(String) + Sync), ) -> ProjectWorkspace { progress("Discovering sysroot".to_owned()); let mut sysroot = @@ -423,14 +423,13 @@ impl ProjectWorkspace { let query_config = QueryConfig::Rustc(&sysroot, project_json.path().as_ref()); let targets = target_tuple::get(query_config, config.target.as_deref(), &config.extra_env) .unwrap_or_default(); + let toolchain = version::get(query_config, &config.extra_env).ok().flatten(); // We spawn a bunch of processes to query various information about the workspace's // toolchain and sysroot // We can speed up loading a bit by spawning all of these processes in parallel (especially // on systems were process spawning is delayed) let join = thread::scope(|s| { - let toolchain = - s.spawn(|| version::get(query_config, &config.extra_env).ok().flatten()); let rustc_cfg = s.spawn(|| { rustc_cfg::get(query_config, targets.first().map(Deref::deref), &config.extra_env) }); @@ -442,24 +441,35 @@ impl ProjectWorkspace { ) }); let loaded_sysroot = s.spawn(|| { + let project_root = project_json.project_root(); if let Some(sysroot_project) = sysroot_project { - sysroot.load_workspace(&RustSourceWorkspaceConfig::Json(*sysroot_project)) + sysroot.load_workspace( + &RustSourceWorkspaceConfig::Json(*sysroot_project), + project_root, + progress, + ) } else { - sysroot.load_workspace(&RustSourceWorkspaceConfig::CargoMetadata( - sysroot_metadata_config(&config.extra_env, &targets), - )) + let target_dir = config + .target_dir + .clone() + .unwrap_or_else(|| project_root.join("target").into()); + sysroot.load_workspace( + &RustSourceWorkspaceConfig::CargoMetadata(sysroot_metadata_config( + config, + &targets, + toolchain.clone(), + target_dir, + )), + project_root, + progress, + ) } }); - thread::Result::Ok(( - toolchain.join()?, - rustc_cfg.join()?, - data_layout.join()?, - loaded_sysroot.join()?, - )) + thread::Result::Ok((rustc_cfg.join()?, data_layout.join()?, loaded_sysroot.join()?)) }); - let (toolchain, rustc_cfg, target_layout, loaded_sysroot) = match join { + let (rustc_cfg, target_layout, loaded_sysroot) = match join { Ok(it) => it, Err(e) => std::panic::resume_unwind(e), }; @@ -497,9 +507,17 @@ impl ProjectWorkspace { .unwrap_or_default(); let rustc_cfg = rustc_cfg::get(query_config, None, &config.extra_env); let data_layout = target_data_layout::get(query_config, None, &config.extra_env); - let loaded_sysroot = sysroot.load_workspace(&RustSourceWorkspaceConfig::CargoMetadata( - sysroot_metadata_config(&config.extra_env, &targets), - )); + let target_dir = config.target_dir.clone().unwrap_or_else(|| dir.join("target").into()); + let loaded_sysroot = sysroot.load_workspace( + &RustSourceWorkspaceConfig::CargoMetadata(sysroot_metadata_config( + config, + &targets, + toolchain.clone(), + target_dir.clone(), + )), + dir, + &|_| (), + ); if let Some(loaded_sysroot) = loaded_sysroot { sysroot.set_workspace(loaded_sysroot); } @@ -512,6 +530,9 @@ impl ProjectWorkspace { targets, extra_args: config.extra_args.clone(), extra_env: config.extra_env.clone(), + target_dir, + toolchain_version: toolchain.clone(), + kind: "detached-file", }, &sysroot, config.no_deps, @@ -1804,13 +1825,18 @@ fn add_dep_inner(graph: &mut CrateGraphBuilder, from: CrateBuilderId, dep: Depen } fn sysroot_metadata_config( - extra_env: &FxHashMap>, + config: &CargoConfig, targets: &[String], + toolchain_version: Option, + target_dir: Utf8PathBuf, ) -> CargoMetadataConfig { CargoMetadataConfig { features: Default::default(), targets: targets.to_vec(), extra_args: Default::default(), - extra_env: extra_env.clone(), + extra_env: config.extra_env.clone(), + target_dir, + toolchain_version, + kind: "sysroot", } } diff --git a/src/tools/rust-analyzer/crates/query-group-macro/src/queries.rs b/src/tools/rust-analyzer/crates/query-group-macro/src/queries.rs index baac3e8bbfe7..c151cca07272 100644 --- a/src/tools/rust-analyzer/crates/query-group-macro/src/queries.rs +++ b/src/tools/rust-analyzer/crates/query-group-macro/src/queries.rs @@ -74,8 +74,8 @@ impl ToTokens for TrackedQuery { quote! { #sig { #annotation - fn #shim( - db: &dyn #trait_name, + fn #shim<'db>( + db: &'db dyn #trait_name, _input: #input_struct_name, #(#pat_and_tys),* ) #ret @@ -88,8 +88,8 @@ impl ToTokens for TrackedQuery { quote! { #sig { #annotation - fn #shim( - db: &dyn #trait_name, + fn #shim<'db>( + db: &'db dyn #trait_name, #(#pat_and_tys),* ) #ret #invoke_block diff --git a/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/analysis_stats.rs b/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/analysis_stats.rs index 12b393b80c0d..0ee01982fea2 100644 --- a/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/analysis_stats.rs +++ b/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/analysis_stats.rs @@ -532,7 +532,7 @@ impl flags::AnalysisStats { } let todo = syntax::ast::make::ext::expr_todo().to_string(); - let mut formatter = |_: &hir::Type| todo.clone(); + let mut formatter = |_: &hir::Type<'_>| todo.clone(); let mut syntax_hit_found = false; for term in found_terms { let generated = term diff --git a/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/rustc_tests.rs b/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/rustc_tests.rs index e3b372c91494..740fcd81ea98 100644 --- a/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/rustc_tests.rs +++ b/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/rustc_tests.rs @@ -9,7 +9,6 @@ use hir::{ChangeWithProcMacros, Crate}; use ide::{AnalysisHost, DiagnosticCode, DiagnosticsConfig}; use ide_db::base_db; use itertools::Either; -use paths::Utf8PathBuf; use profile::StopWatch; use project_model::toolchain_info::{QueryConfig, target_data_layout}; use project_model::{ @@ -64,9 +63,9 @@ fn detect_errors_from_rustc_stderr_file(p: PathBuf) -> FxHashMap Result { - let mut path = std::env::temp_dir(); - path.push("ra-rustc-test.rs"); - let tmp_file = AbsPathBuf::try_from(Utf8PathBuf::from_path_buf(path).unwrap()).unwrap(); + let mut path = AbsPathBuf::assert_utf8(std::env::temp_dir()); + path.push("ra-rustc-test"); + let tmp_file = path.join("ra-rustc-test.rs"); std::fs::write(&tmp_file, "")?; let cargo_config = CargoConfig { sysroot: Some(RustLibSource::Discover), @@ -76,7 +75,8 @@ impl Tester { }; let mut sysroot = Sysroot::discover(tmp_file.parent().unwrap(), &cargo_config.extra_env); - let loaded_sysroot = sysroot.load_workspace(&RustSourceWorkspaceConfig::default_cargo()); + let loaded_sysroot = + sysroot.load_workspace(&RustSourceWorkspaceConfig::default_cargo(), &path, &|_| ()); if let Some(loaded_sysroot) = loaded_sysroot { sysroot.set_workspace(loaded_sysroot); } diff --git a/src/tools/rust-analyzer/crates/rust-analyzer/src/config.rs b/src/tools/rust-analyzer/crates/rust-analyzer/src/config.rs index 762b63f54b09..05e1b832cd16 100644 --- a/src/tools/rust-analyzer/crates/rust-analyzer/src/config.rs +++ b/src/tools/rust-analyzer/crates/rust-analyzer/src/config.rs @@ -452,6 +452,8 @@ config_data! { assist_emitMustUse: bool = false, /// Placeholder expression to use for missing expressions in assists. assist_expressionFillDefault: ExprFillDefaultDef = ExprFillDefaultDef::Todo, + /// When inserting a type (e.g. in "fill match arms" assist), prefer to use `Self` over the type name where possible. + assist_preferSelf: bool = false, /// Enable borrow checking for term search code assists. If set to false, also there will be more suggestions, but some of them may not borrow-check. assist_termSearch_borrowcheck: bool = true, /// Term search fuel in "units of work" for assists (Defaults to 1800). @@ -760,7 +762,11 @@ config_data! { /// though Cargo might be the eventual consumer. vfs_extraIncludes: Vec = vec![], - /// Exclude imports from symbol search. + /// Exclude all imports from workspace symbol search. + /// + /// In addition to regular imports (which are always excluded), + /// this option removes public imports (better known as re-exports) + /// and removes imports that rename the imported symbol. workspace_symbol_search_excludeImports: bool = false, /// Workspace symbol search kind. workspace_symbol_search_kind: WorkspaceSymbolSearchKindDef = WorkspaceSymbolSearchKindDef::OnlyTypes, @@ -1505,6 +1511,7 @@ impl Config { ExprFillDefaultDef::Default => ExprFillDefaultMode::Default, ExprFillDefaultDef::Underscore => ExprFillDefaultMode::Underscore, }, + prefer_self_ty: *self.assist_preferSelf(source_root), } } diff --git a/src/tools/rust-analyzer/crates/rust-analyzer/src/handlers/notification.rs b/src/tools/rust-analyzer/crates/rust-analyzer/src/handlers/notification.rs index b7373f274f05..200e972e4289 100644 --- a/src/tools/rust-analyzer/crates/rust-analyzer/src/handlers/notification.rs +++ b/src/tools/rust-analyzer/crates/rust-analyzer/src/handlers/notification.rs @@ -239,7 +239,7 @@ pub(crate) fn handle_did_change_configuration( let (config, e, _) = config.apply_change(change); this.config_errors = e.is_empty().not().then_some(e); - // Client config changes neccesitates .update_config method to be called. + // Client config changes necessitates .update_config method to be called. this.update_configuration(config); } } diff --git a/src/tools/rust-analyzer/crates/rust-analyzer/src/lsp/capabilities.rs b/src/tools/rust-analyzer/crates/rust-analyzer/src/lsp/capabilities.rs index 418fe957590d..04e31f37fd2c 100644 --- a/src/tools/rust-analyzer/crates/rust-analyzer/src/lsp/capabilities.rs +++ b/src/tools/rust-analyzer/crates/rust-analyzer/src/lsp/capabilities.rs @@ -77,7 +77,7 @@ pub fn server_capabilities(config: &Config) -> ServerCapabilities { _ => Some(OneOf::Left(false)), }, document_on_type_formatting_provider: Some({ - let mut chars = ide::Analysis::SUPPORTED_TRIGGER_CHARS.chars(); + let mut chars = ide::Analysis::SUPPORTED_TRIGGER_CHARS.iter(); DocumentOnTypeFormattingOptions { first_trigger_character: chars.next().unwrap().to_string(), more_trigger_character: Some(chars.map(|c| c.to_string()).collect()), diff --git a/src/tools/rust-analyzer/crates/rust-analyzer/src/reload.rs b/src/tools/rust-analyzer/crates/rust-analyzer/src/reload.rs index 4677880daaf9..189d95ec7ed4 100644 --- a/src/tools/rust-analyzer/crates/rust-analyzer/src/reload.rs +++ b/src/tools/rust-analyzer/crates/rust-analyzer/src/reload.rs @@ -114,6 +114,16 @@ impl GlobalState { Durability::HIGH, ); } + + if self.config.cargo(None) != old_config.cargo(None) { + let req = FetchWorkspaceRequest { path: None, force_crate_graph_reload: false }; + self.fetch_workspaces_queue.request_op("cargo config changed".to_owned(), req) + } + + if self.config.cfg_set_test(None) != old_config.cfg_set_test(None) { + let req = FetchWorkspaceRequest { path: None, force_crate_graph_reload: false }; + self.fetch_workspaces_queue.request_op("cfg_set_test config changed".to_owned(), req) + } } pub(crate) fn current_status(&self) -> lsp_ext::ServerStatusParams { diff --git a/src/tools/rust-analyzer/crates/rust-analyzer/src/test_runner.rs b/src/tools/rust-analyzer/crates/rust-analyzer/src/test_runner.rs index 9c0bc33af646..e7528dbc9396 100644 --- a/src/tools/rust-analyzer/crates/rust-analyzer/src/test_runner.rs +++ b/src/tools/rust-analyzer/crates/rust-analyzer/src/test_runner.rs @@ -103,6 +103,7 @@ impl CargoTestHandle { ) -> std::io::Result { let mut cmd = toolchain::command(Tool::Cargo.path(), root, &options.extra_env); cmd.env("RUSTC_BOOTSTRAP", "1"); + cmd.arg("--color=always"); cmd.arg("test"); cmd.arg("--package"); diff --git a/src/tools/rust-analyzer/crates/stdx/src/lib.rs b/src/tools/rust-analyzer/crates/stdx/src/lib.rs index 9a292eacd7f7..978c50d807bc 100644 --- a/src/tools/rust-analyzer/crates/stdx/src/lib.rs +++ b/src/tools/rust-analyzer/crates/stdx/src/lib.rs @@ -13,6 +13,7 @@ pub mod panic_context; pub mod process; pub mod rand; pub mod thread; +pub mod variance; pub use itertools; diff --git a/src/tools/rust-analyzer/crates/stdx/src/variance.rs b/src/tools/rust-analyzer/crates/stdx/src/variance.rs new file mode 100644 index 000000000000..8465d72bf371 --- /dev/null +++ b/src/tools/rust-analyzer/crates/stdx/src/variance.rs @@ -0,0 +1,270 @@ +//! This is a copy of [`std::marker::variance`]. + +use std::any::type_name; +use std::cmp::Ordering; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::marker::PhantomData; + +macro_rules! first_token { + ($first:tt $($rest:tt)*) => { + $first + }; +} +macro_rules! phantom_type { + ($( + $(#[$attr:meta])* + pub struct $name:ident <$t:ident> ($($inner:tt)*); + )*) => {$( + $(#[$attr])* + pub struct $name<$t>($($inner)*) where T: ?Sized; + + impl $name + where T: ?Sized + { + /// Constructs a new instance of the variance marker. + pub const fn new() -> Self { + Self(PhantomData) + } + } + + impl self::sealed::Sealed for $name where T: ?Sized { + const VALUE: Self = Self::new(); + } + + impl Variance for $name where T: ?Sized {} + + impl Default for $name + where T: ?Sized + { + fn default() -> Self { + Self(PhantomData) + } + } + + impl fmt::Debug for $name + where T: ?Sized + { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}<{}>", stringify!($name), type_name::()) + } + } + + impl Clone for $name + where T: ?Sized + { + fn clone(&self) -> Self { + *self + } + } + + impl Copy for $name where T: ?Sized {} + + impl PartialEq for $name + where T: ?Sized + { + fn eq(&self, _: &Self) -> bool { + true + } + } + + impl Eq for $name where T: ?Sized {} + + #[allow(clippy::non_canonical_partial_ord_impl)] + impl PartialOrd for $name + where T: ?Sized + { + fn partial_cmp(&self, _: &Self) -> Option { + Some(Ordering::Equal) + } + } + + impl Ord for $name + where T: ?Sized + { + fn cmp(&self, _: &Self) -> Ordering { + Ordering::Equal + } + } + + impl Hash for $name + where T: ?Sized + { + fn hash(&self, _: &mut H) {} + } + )*}; +} + +macro_rules! phantom_lifetime { + ($( + $(#[$attr:meta])* + pub struct $name:ident <$lt:lifetime> ($($inner:tt)*); + )*) => {$( + $(#[$attr])* + + #[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct $name<$lt>($($inner)*); + + impl $name<'_> { + /// Constructs a new instance of the variance marker. + pub const fn new() -> Self { + Self(first_token!($($inner)*)(PhantomData)) + } + } + + impl self::sealed::Sealed for $name<'_> { + const VALUE: Self = Self::new(); + } + + impl Variance for $name<'_> {} + + impl fmt::Debug for $name<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", stringify!($name)) + } + } + )*}; +} + +phantom_lifetime! { + /// Zero-sized type used to mark a lifetime as covariant. + /// + /// Covariant lifetimes must live at least as long as declared. See [the reference][1] for more + /// information. + /// + /// [1]: https://doc.rust-lang.org/stable/reference/subtyping.html#variance + /// + /// ## Layout + /// + /// For all `'a`, the following are guaranteed: + /// * `size_of::>() == 0` + /// * `align_of::>() == 1` + + pub struct PhantomCovariantLifetime<'a>(PhantomCovariant<&'a ()>); + /// Zero-sized type used to mark a lifetime as contravariant. + /// + /// Contravariant lifetimes must live at most as long as declared. See [the reference][1] for + /// more information. + /// + /// [1]: https://doc.rust-lang.org/stable/reference/subtyping.html#variance + /// + /// ## Layout + /// + /// For all `'a`, the following are guaranteed: + /// * `size_of::>() == 0` + /// * `align_of::>() == 1` + + pub struct PhantomContravariantLifetime<'a>(PhantomContravariant<&'a ()>); + /// Zero-sized type used to mark a lifetime as invariant. + /// + /// Invariant lifetimes must be live for the exact length declared, neither shorter nor longer. + /// See [the reference][1] for more information. + /// + /// [1]: https://doc.rust-lang.org/stable/reference/subtyping.html#variance + /// + /// ## Layout + /// + /// For all `'a`, the following are guaranteed: + /// * `size_of::>() == 0` + /// * `align_of::>() == 1` + + pub struct PhantomInvariantLifetime<'a>(PhantomInvariant<&'a ()>); + +} + +phantom_type! { + /// Zero-sized type used to mark a type parameter as covariant. + /// + /// Types used as part of the return value from a function are covariant. If the type is _also_ + /// passed as a parameter then it is [invariant][PhantomInvariant]. See [the reference][1] for + /// more information. + /// + /// [1]: https://doc.rust-lang.org/stable/reference/subtyping.html#variance + /// + /// ## Layout + /// + /// For all `T`, the following are guaranteed: + /// * `size_of::>() == 0` + /// * `align_of::>() == 1` + + pub struct PhantomCovariant(PhantomData T>); + /// Zero-sized type used to mark a type parameter as contravariant. + /// + /// Types passed as arguments to a function are contravariant. If the type is _also_ part of the + /// return value from a function then it is [invariant][PhantomInvariant]. See [the + /// reference][1] for more information. + /// + /// [1]: https://doc.rust-lang.org/stable/reference/subtyping.html#variance + /// + /// ## Layout + /// + /// For all `T`, the following are guaranteed: + /// * `size_of::>() == 0` + /// * `align_of::>() == 1` + + pub struct PhantomContravariant(PhantomData); + /// Zero-sized type used to mark a type parameter as invariant. + /// + /// Types that are both passed as an argument _and_ used as part of the return value from a + /// function are invariant. See [the reference][1] for more information. + /// + /// [1]: https://doc.rust-lang.org/stable/reference/subtyping.html#variance + /// + /// ## Layout + /// + /// For all `T`, the following are guaranteed: + /// * `size_of::>() == 0` + /// * `align_of::>() == 1` + + pub struct PhantomInvariant(PhantomData T>); + +} + +mod sealed { + + pub trait Sealed { + const VALUE: Self; + } +} +/// A marker trait for phantom variance types. +pub trait Variance: sealed::Sealed + Default {} +/// Construct a variance marker; equivalent to [`Default::default`]. +/// +/// This type can be any of the following. You generally should not need to explicitly name the +/// type, however. +/// +/// - [`PhantomCovariant`] +/// - [`PhantomContravariant`] +/// - [`PhantomInvariant`] +/// - [`PhantomCovariantLifetime`] +/// - [`PhantomContravariantLifetime`] +/// - [`PhantomInvariantLifetime`] +/// +/// # Example +/// +/// ```rust +/// #![feature(phantom_variance_markers)] +/// +/// use core::marker::{PhantomCovariant, variance}; +/// +/// struct BoundFn +/// where +/// F: Fn(P) -> R, +/// { +/// function: F, +/// parameter: P, +/// return_value: PhantomCovariant, +/// } +/// +/// let bound_fn = BoundFn { +/// function: core::convert::identity, +/// parameter: 5u8, +/// return_value: variance(), +/// }; +/// ``` +pub const fn variance() -> T +where + T: Variance, +{ + T::VALUE +} diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/make.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/make.rs index fab4cb287c3d..955aadaa25d3 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/ast/make.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/ast/make.rs @@ -134,6 +134,13 @@ pub fn name_ref(name_ref: &str) -> ast::NameRef { } } } +pub fn name_ref_self_ty() -> ast::NameRef { + quote! { + NameRef { + [Self] + } + } +} fn raw_ident_esc(ident: &str) -> &'static str { if is_raw_identifier(ident, Edition::CURRENT) { "r#" } else { "" } } diff --git a/src/tools/rust-analyzer/crates/test-utils/src/fixture.rs b/src/tools/rust-analyzer/crates/test-utils/src/fixture.rs index 7240069753e8..1d821e96e550 100644 --- a/src/tools/rust-analyzer/crates/test-utils/src/fixture.rs +++ b/src/tools/rust-analyzer/crates/test-utils/src/fixture.rs @@ -412,22 +412,36 @@ impl MiniCore { } let mut active_regions = Vec::new(); + let mut inactive_regions = Vec::new(); let mut seen_regions = Vec::new(); for line in lines { let trimmed = line.trim(); if let Some(region) = trimmed.strip_prefix("// region:") { - active_regions.push(region); - continue; + if let Some(region) = region.strip_prefix('!') { + inactive_regions.push(region); + continue; + } else { + active_regions.push(region); + continue; + } } if let Some(region) = trimmed.strip_prefix("// endregion:") { - let prev = active_regions.pop().unwrap(); + let (prev, region) = if let Some(region) = region.strip_prefix('!') { + (inactive_regions.pop().unwrap(), region) + } else { + (active_regions.pop().unwrap(), region) + }; assert_eq!(prev, region, "unbalanced region pairs"); continue; } - let mut line_region = false; - if let Some(idx) = trimmed.find("// :") { - line_region = true; + let mut active_line_region = false; + let mut inactive_line_region = false; + if let Some(idx) = trimmed.find("// :!") { + inactive_line_region = true; + inactive_regions.push(&trimmed[idx + "// :!".len()..]); + } else if let Some(idx) = trimmed.find("// :") { + active_line_region = true; active_regions.push(&trimmed[idx + "// :".len()..]); } @@ -438,18 +452,30 @@ impl MiniCore { seen_regions.push(region); keep &= self.has_flag(region); } + for ®ion in &inactive_regions { + assert!(!region.starts_with(' '), "region marker starts with a space: {region:?}"); + self.assert_valid_flag(region); + seen_regions.push(region); + keep &= !self.has_flag(region); + } if keep { buf.push_str(line); } - if line_region { + if active_line_region { active_regions.pop().unwrap(); } + if inactive_line_region { + inactive_regions.pop().unwrap(); + } } if !active_regions.is_empty() { panic!("unclosed regions: {active_regions:?} Add an `endregion` comment"); } + if !inactive_regions.is_empty() { + panic!("unclosed regions: {inactive_regions:?} Add an `endregion` comment"); + } for flag in &self.valid_flags { if !seen_regions.iter().any(|it| it == flag) { diff --git a/src/tools/rust-analyzer/crates/test-utils/src/minicore.rs b/src/tools/rust-analyzer/crates/test-utils/src/minicore.rs index 4bdd791eb167..d13a81d287fa 100644 --- a/src/tools/rust-analyzer/crates/test-utils/src/minicore.rs +++ b/src/tools/rust-analyzer/crates/test-utils/src/minicore.rs @@ -31,6 +31,7 @@ //! eq: sized //! error: fmt //! fmt: option, result, transmute, coerce_unsized, copy, clone, derive +//! fmt_before_1_89_0: fmt //! fn: tuple //! from: sized, result //! future: pin @@ -1175,6 +1176,7 @@ pub mod fmt { } } + // region:fmt_before_1_89_0 #[lang = "format_unsafe_arg"] pub struct UnsafeArg { _private: (), @@ -1185,6 +1187,7 @@ pub mod fmt { UnsafeArg { _private: () } } } + // endregion:fmt_before_1_89_0 } #[derive(Copy, Clone)] @@ -1204,6 +1207,7 @@ pub mod fmt { Arguments { pieces, fmt: None, args: &[] } } + // region:fmt_before_1_89_0 pub fn new_v1_formatted( pieces: &'a [&'static str], args: &'a [rt::Argument<'a>], @@ -1212,6 +1216,17 @@ pub mod fmt { ) -> Arguments<'a> { Arguments { pieces, fmt: Some(fmt), args } } + // endregion:fmt_before_1_89_0 + + // region:!fmt_before_1_89_0 + pub unsafe fn new_v1_formatted( + pieces: &'a [&'static str], + args: &'a [rt::Argument<'a>], + fmt: &'a [rt::Placeholder], + ) -> Arguments<'a> { + Arguments { pieces, fmt: Some(fmt), args } + } + // endregion:!fmt_before_1_89_0 pub const fn as_str(&self) -> Option<&'static str> { match (self.pieces, self.args) { diff --git a/src/tools/rust-analyzer/docs/book/src/configuration_generated.md b/src/tools/rust-analyzer/docs/book/src/configuration_generated.md index 4eb9cfc4e5bd..9404b1454a08 100644 --- a/src/tools/rust-analyzer/docs/book/src/configuration_generated.md +++ b/src/tools/rust-analyzer/docs/book/src/configuration_generated.md @@ -13,6 +13,13 @@ Default: `"todo"` Placeholder expression to use for missing expressions in assists. +## rust-analyzer.assist.preferSelf {#assist.preferSelf} + +Default: `false` + +When inserting a type (e.g. in "fill match arms" assist), prefer to use `Self` over the type name where possible. + + ## rust-analyzer.assist.termSearch.borrowcheck {#assist.termSearch.borrowcheck} Default: `true` @@ -1535,7 +1542,11 @@ https://github.com/facebook/buck2/tree/main/integrations/rust-project. Default: `false` -Exclude imports from symbol search. +Exclude all imports from workspace symbol search. + +In addition to regular imports (which are always excluded), +this option removes public imports (better known as re-exports) +and removes imports that rename the imported symbol. ## rust-analyzer.workspace.symbol.search.kind {#workspace.symbol.search.kind} diff --git a/src/tools/rust-analyzer/editors/code/package.json b/src/tools/rust-analyzer/editors/code/package.json index dcdb4fe30ee8..26a21c1468d8 100644 --- a/src/tools/rust-analyzer/editors/code/package.json +++ b/src/tools/rust-analyzer/editors/code/package.json @@ -680,6 +680,16 @@ } } }, + { + "title": "assist", + "properties": { + "rust-analyzer.assist.preferSelf": { + "markdownDescription": "When inserting a type (e.g. in \"fill match arms\" assist), prefer to use `Self` over the type name where possible.", + "default": false, + "type": "boolean" + } + } + }, { "title": "assist", "properties": { @@ -2895,7 +2905,7 @@ "title": "workspace", "properties": { "rust-analyzer.workspace.symbol.search.excludeImports": { - "markdownDescription": "Exclude imports from symbol search.", + "markdownDescription": "Exclude all imports from workspace symbol search.\n\nIn addition to regular imports (which are always excluded),\nthis option removes public imports (better known as re-exports)\nand removes imports that rename the imported symbol.", "default": false, "type": "boolean" } diff --git a/src/tools/rust-analyzer/editors/code/src/config.ts b/src/tools/rust-analyzer/editors/code/src/config.ts index f36e18a73da0..d2dc740c09b5 100644 --- a/src/tools/rust-analyzer/editors/code/src/config.ts +++ b/src/tools/rust-analyzer/editors/code/src/config.ts @@ -20,15 +20,9 @@ export class Config { configureLang: vscode.Disposable | undefined; readonly rootSection = "rust-analyzer"; - private readonly requiresServerReloadOpts = [ - "cargo", - "procMacro", - "serverPath", - "server", - "files", - "cfg", - "showSyntaxTree", - ].map((opt) => `${this.rootSection}.${opt}`); + private readonly requiresServerReloadOpts = ["server", "files", "showSyntaxTree"].map( + (opt) => `${this.rootSection}.${opt}`, + ); private readonly requiresWindowReloadOpts = ["testExplorer"].map( (opt) => `${this.rootSection}.${opt}`, @@ -208,7 +202,7 @@ export class Config { } get serverPath() { - return this.get("server.path") ?? this.get("serverPath"); + return this.get("server.path"); } get serverExtraEnv(): Env { diff --git a/src/tools/rust-analyzer/rust-version b/src/tools/rust-analyzer/rust-version index af0dd5c9acda..a454087b0cdc 100644 --- a/src/tools/rust-analyzer/rust-version +++ b/src/tools/rust-analyzer/rust-version @@ -1 +1 @@ -7c10378e1fee5ddc6573b916aeb884ab10e0de17 +27733d46d79f4eb92e240fbba502c43022665735 diff --git a/src/tools/rust-installer/install-template.sh b/src/tools/rust-installer/install-template.sh index f7f408be882e..337aaa95b9a2 100644 --- a/src/tools/rust-installer/install-template.sh +++ b/src/tools/rust-installer/install-template.sh @@ -160,7 +160,7 @@ valopt() { local doc="$*" if [ $HELP -eq 0 ] then - local uop=$(echo $op | tr 'a-z-' 'A-Z_') + local uop=$(echo $op | tr '[a-z]-' '[A-Z]_') local v="CFG_${uop}" eval $v="$default" for arg in $CFG_ARGS @@ -206,8 +206,8 @@ opt() { do if [ "$arg" = "--${flag}-${op}" ] then - op=$(echo $op | tr 'a-z-' 'A-Z_') - flag=$(echo $flag | tr 'a-z' 'A-Z') + op=$(echo $op | tr '[a-z]-' '[A-Z]_') + flag=$(echo $flag | tr '[a-z]' '[A-Z]') local v="CFG_${flag}_${op}" eval $v=1 putvar $v @@ -235,7 +235,7 @@ flag() { do if [ "$arg" = "--${op}" ] then - op=$(echo $op | tr 'a-z-' 'A-Z_') + op=$(echo $op | tr '[a-z]-' '[A-Z]_') local v="CFG_${op}" eval $v=1 putvar $v diff --git a/src/tools/rustfmt/src/expr.rs b/src/tools/rustfmt/src/expr.rs index be6b483bfff1..08aedff2b20d 100644 --- a/src/tools/rustfmt/src/expr.rs +++ b/src/tools/rustfmt/src/expr.rs @@ -2289,8 +2289,10 @@ fn rewrite_expr_addrof( ) -> RewriteResult { let operator_str = match (mutability, borrow_kind) { (ast::Mutability::Not, ast::BorrowKind::Ref) => "&", + (ast::Mutability::Not, ast::BorrowKind::Pin) => "&pin const ", (ast::Mutability::Not, ast::BorrowKind::Raw) => "&raw const ", (ast::Mutability::Mut, ast::BorrowKind::Ref) => "&mut ", + (ast::Mutability::Mut, ast::BorrowKind::Pin) => "&pin mut ", (ast::Mutability::Mut, ast::BorrowKind::Raw) => "&raw mut ", }; rewrite_unary_prefix(context, operator_str, expr, shape) diff --git a/src/tools/rustfmt/src/imports.rs b/src/tools/rustfmt/src/imports.rs index b741dd9b5da5..788fed013ad2 100644 --- a/src/tools/rustfmt/src/imports.rs +++ b/src/tools/rustfmt/src/imports.rs @@ -184,7 +184,7 @@ impl UseSegment { modsep: bool, ) -> Option { let name = rewrite_ident(context, path_seg.ident); - if name.is_empty() || name == "{{root}}" { + if name.is_empty() { return None; } let kind = match name { diff --git a/src/tools/rustfmt/src/parse/session.rs b/src/tools/rustfmt/src/parse/session.rs index afd847f95157..10e2809e58bf 100644 --- a/src/tools/rustfmt/src/parse/session.rs +++ b/src/tools/rustfmt/src/parse/session.rs @@ -5,7 +5,7 @@ use std::sync::atomic::{AtomicBool, Ordering}; use rustc_data_structures::sync::IntoDynSyncSend; use rustc_errors::emitter::{DynEmitter, Emitter, HumanEmitter, SilentEmitter, stderr_destination}; use rustc_errors::registry::Registry; -use rustc_errors::translation::Translate; +use rustc_errors::translation::Translator; use rustc_errors::{ColorConfig, Diag, DiagCtxt, DiagInner, Level as DiagnosticLevel}; use rustc_session::parse::ParseSess as RawParseSess; use rustc_span::{ @@ -47,16 +47,6 @@ impl SilentOnIgnoredFilesEmitter { } } -impl Translate for SilentOnIgnoredFilesEmitter { - fn fluent_bundle(&self) -> Option<&rustc_errors::FluentBundle> { - self.emitter.fluent_bundle() - } - - fn fallback_fluent_bundle(&self) -> &rustc_errors::FluentBundle { - self.emitter.fallback_fluent_bundle() - } -} - impl Emitter for SilentOnIgnoredFilesEmitter { fn source_map(&self) -> Option<&SourceMap> { None @@ -84,6 +74,10 @@ impl Emitter for SilentOnIgnoredFilesEmitter { } self.handle_non_ignoreable_error(diag, registry); } + + fn translator(&self) -> &Translator { + self.emitter.translator() + } } impl From for ColorConfig { @@ -110,23 +104,15 @@ fn default_dcx( ColorConfig::Never }; - let fallback_bundle = rustc_errors::fallback_fluent_bundle( - rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(), - false, - ); - let emitter = Box::new( - HumanEmitter::new(stderr_destination(emit_color), fallback_bundle) - .sm(Some(source_map.clone())), - ); + let translator = rustc_driver::default_translator(); - let emitter: Box = if !show_parse_errors { - Box::new(SilentEmitter { - fatal_emitter: emitter, - fatal_note: None, - emit_fatal_diagnostic: false, - }) + let emitter: Box = if show_parse_errors { + Box::new( + HumanEmitter::new(stderr_destination(emit_color), translator) + .sm(Some(source_map.clone())), + ) } else { - emitter + Box::new(SilentEmitter { translator }) }; DiagCtxt::new(Box::new(SilentOnIgnoredFilesEmitter { has_non_ignorable_parser_errors: false, @@ -205,7 +191,7 @@ impl ParseSess { } pub(crate) fn set_silent_emitter(&mut self) { - self.raw_psess.dcx().make_silent(None, false); + self.raw_psess.dcx().make_silent(); } pub(crate) fn span_to_filename(&self, span: Span) -> FileName { @@ -335,16 +321,6 @@ mod tests { num_emitted_errors: Arc, } - impl Translate for TestEmitter { - fn fluent_bundle(&self) -> Option<&rustc_errors::FluentBundle> { - None - } - - fn fallback_fluent_bundle(&self) -> &rustc_errors::FluentBundle { - panic!("test emitter attempted to translate a diagnostic"); - } - } - impl Emitter for TestEmitter { fn source_map(&self) -> Option<&SourceMap> { None @@ -353,6 +329,10 @@ mod tests { fn emit_diagnostic(&mut self, _diag: DiagInner, _registry: &Registry) { self.num_emitted_errors.fetch_add(1, Ordering::Release); } + + fn translator(&self) -> &Translator { + panic!("test emitter attempted to translate a diagnostic"); + } } fn build_diagnostic(level: DiagnosticLevel, span: Option) -> DiagInner { diff --git a/src/tools/rustfmt/tests/source/pin_sugar.rs b/src/tools/rustfmt/tests/source/pin_sugar.rs index 370dfbc196ae..e5b47339b928 100644 --- a/src/tools/rustfmt/tests/source/pin_sugar.rs +++ b/src/tools/rustfmt/tests/source/pin_sugar.rs @@ -18,3 +18,13 @@ impl Foo { mut self) {} fn i(&pin mut self) {} } + +fn borrows() { + let mut foo = 0_i32; + let x: Pin<&mut _> = & pin + mut foo; + + let x: Pin<&_> = & + pin const + foo; +} diff --git a/src/tools/rustfmt/tests/source/type.rs b/src/tools/rustfmt/tests/source/type.rs index 7a232f85198a..213fad7cb16b 100644 --- a/src/tools/rustfmt/tests/source/type.rs +++ b/src/tools/rustfmt/tests/source/type.rs @@ -142,18 +142,18 @@ type MyFn = fn(a: SomeLongComplexType, b: SomeOtherLongComplexType,) -> Box() -> i32 { ::CONST } +const fn not_quite_const() -> i32 { ::CONST } -impl ~ const T {} +impl const T for U {} -fn apit(_: impl ~ const T) {} +fn apit(_: impl [ const ] T) {} -fn rpit() -> impl ~ const T { S } +fn rpit() -> impl [ const] T { S } pub struct Foo(T); -impl Foo { +impl Foo { fn new(t: T) -> Self { Self(t) } diff --git a/src/tools/rustfmt/tests/target/pin_sugar.rs b/src/tools/rustfmt/tests/target/pin_sugar.rs index 7d04efb1b326..09ad23a5807f 100644 --- a/src/tools/rustfmt/tests/target/pin_sugar.rs +++ b/src/tools/rustfmt/tests/target/pin_sugar.rs @@ -16,3 +16,10 @@ impl Foo { fn h<'a>(&'a pin mut self) {} fn i(&pin mut self) {} } + +fn borrows() { + let mut foo = 0_i32; + let x: Pin<&mut _> = &pin mut foo; + + let x: Pin<&_> = &pin const foo; +} diff --git a/src/tools/rustfmt/tests/target/type.rs b/src/tools/rustfmt/tests/target/type.rs index 325adb52f3f9..93479f8b484c 100644 --- a/src/tools/rustfmt/tests/target/type.rs +++ b/src/tools/rustfmt/tests/target/type.rs @@ -147,22 +147,22 @@ type MyFn = fn( // Const bound -trait T: ~const Super {} +trait T: [const] Super {} -const fn not_quite_const() -> i32 { +const fn not_quite_const() -> i32 { ::CONST } -impl ~const T {} +impl const T for U {} -fn apit(_: impl ~const T) {} +fn apit(_: impl [const] T) {} -fn rpit() -> impl ~const T { +fn rpit() -> impl [const] T { S } pub struct Foo(T); -impl Foo { +impl Foo { fn new(t: T) -> Self { Self(t) } diff --git a/src/tools/tidy/src/alphabetical.rs b/src/tools/tidy/src/alphabetical.rs index a29286fa2c59..141083290c6c 100644 --- a/src/tools/tidy/src/alphabetical.rs +++ b/src/tools/tidy/src/alphabetical.rs @@ -19,7 +19,9 @@ //! If a line ends with an opening delimiter, we effectively join the following line to it before //! checking it. E.g. `foo(\nbar)` is treated like `foo(bar)`. +use std::cmp::Ordering; use std::fmt::Display; +use std::iter::Peekable; use std::path::Path; use crate::walk::{filter_dirs, walk}; @@ -99,9 +101,9 @@ fn check_section<'a>( continue; } - let prev_line_trimmed_lowercase = prev_line.trim_start_matches(' ').to_lowercase(); + let prev_line_trimmed_lowercase = prev_line.trim_start_matches(' '); - if trimmed_line.to_lowercase() < prev_line_trimmed_lowercase { + if version_sort(&trimmed_line, &prev_line_trimmed_lowercase).is_lt() { tidy_error_ext!(err, bad, "{file}:{}: line not in alphabetical order", idx + 1); } @@ -143,3 +145,56 @@ pub fn check(path: &Path, bad: &mut bool) { check_lines(file, lines, &mut crate::tidy_error, bad) }); } + +fn consume_numeric_prefix>(it: &mut Peekable) -> String { + let mut result = String::new(); + + while let Some(&c) = it.peek() { + if !c.is_numeric() { + break; + } + + result.push(c); + it.next(); + } + + result +} + +// A sorting function that is case-sensitive, and sorts sequences of digits by their numeric value, +// so that `9` sorts before `12`. +fn version_sort(a: &str, b: &str) -> Ordering { + let mut it1 = a.chars().peekable(); + let mut it2 = b.chars().peekable(); + + while let (Some(x), Some(y)) = (it1.peek(), it2.peek()) { + match (x.is_numeric(), y.is_numeric()) { + (true, true) => { + let num1: String = consume_numeric_prefix(it1.by_ref()); + let num2: String = consume_numeric_prefix(it2.by_ref()); + + let int1: u64 = num1.parse().unwrap(); + let int2: u64 = num2.parse().unwrap(); + + // Compare strings when the numeric value is equal to handle "00" versus "0". + match int1.cmp(&int2).then_with(|| num1.cmp(&num2)) { + Ordering::Equal => continue, + different => return different, + } + } + (false, false) => match x.cmp(y) { + Ordering::Equal => { + it1.next(); + it2.next(); + continue; + } + different => return different, + }, + (false, true) | (true, false) => { + return x.cmp(y); + } + } + } + + it1.next().cmp(&it2.next()) +} diff --git a/src/tools/tidy/src/alphabetical/tests.rs b/src/tools/tidy/src/alphabetical/tests.rs index 29e89a693bfa..4d05bc33cedc 100644 --- a/src/tools/tidy/src/alphabetical/tests.rs +++ b/src/tools/tidy/src/alphabetical/tests.rs @@ -3,6 +3,7 @@ use std::str::from_utf8; use super::*; +#[track_caller] fn test(lines: &str, name: &str, expected_msg: &str, expected_bad: bool) { let mut actual_msg = Vec::new(); let mut actual_bad = false; @@ -15,10 +16,12 @@ fn test(lines: &str, name: &str, expected_msg: &str, expected_bad: bool) { assert_eq!(expected_bad, actual_bad); } +#[track_caller] fn good(lines: &str) { test(lines, "good", "", false); } +#[track_caller] fn bad(lines: &str, expected_msg: &str) { test(lines, "bad", expected_msg, true); } @@ -187,3 +190,147 @@ fn test_double_end() { "; bad(lines, "bad:5 found `tidy-alphabetical-end` expecting `tidy-alphabetical-start`"); } + +#[test] +fn test_numeric_good() { + good( + "\ + # tidy-alphabetical-start + rustc_ast = { path = \"../rustc_ast\" } + rustc_ast_lowering = { path = \"../rustc_ast_lowering\" } + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + fp-armv8 + fp16 + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + item1 + item2 + item10 + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + foo + foo_ + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + foo-bar + foo_bar + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + sme-lutv2 + sme2 + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + v5te + v6 + v6k + v6t2 + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + zve64d + zve64f + # tidy-alphabetical-end + ", + ); + + // Case is significant. + good( + "\ + # tidy-alphabetical-start + _ZYXW + _abcd + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + v0 + v00 + v000 + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + w005s09t + w5s009t + # tidy-alphabetical-end + ", + ); + + good( + "\ + # tidy-alphabetical-start + v0s + v00t + # tidy-alphabetical-end + ", + ); +} + +#[test] +fn test_numeric_bad() { + let lines = "\ + # tidy-alphabetical-start + item1 + item10 + item2 + # tidy-alphabetical-end + "; + bad(lines, "bad:4: line not in alphabetical order"); + + let lines = "\ + # tidy-alphabetical-start + zve64f + zve64d + # tidy-alphabetical-end + "; + bad(lines, "bad:3: line not in alphabetical order"); + + let lines = "\ + # tidy-alphabetical-start + 000 + 00 + # tidy-alphabetical-end + "; + bad(lines, "bad:3: line not in alphabetical order"); +} diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 170dcd626a28..bf813d2131e8 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -206,6 +206,7 @@ const EXCEPTIONS_CRANELIFT: ExceptionList = &[ ("regalloc2", "Apache-2.0 WITH LLVM-exception"), ("target-lexicon", "Apache-2.0 WITH LLVM-exception"), ("wasmtime-jit-icache-coherence", "Apache-2.0 WITH LLVM-exception"), + ("wasmtime-math", "Apache-2.0 WITH LLVM-exception"), // tidy-alphabetical-end ]; @@ -356,6 +357,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "rand", "rand_chacha", "rand_core", + "rand_xorshift", // dependency for doc-tests in rustc_thread_pool "rand_xoshiro", "redox_syscall", "regex", @@ -364,7 +366,6 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "rustc-demangle", "rustc-hash", "rustc-literal-escaper", - "rustc-rayon-core", "rustc-stable-hash", "rustc_apfloat", "rustix", @@ -373,6 +374,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "scoped-tls", "scopeguard", "self_cell", + "semver", "serde", "serde_derive", "serde_json", @@ -537,6 +539,7 @@ const PERMITTED_CRANELIFT_DEPENDENCIES: &[&str] = &[ "indexmap", "libc", "libloading", + "libm", "log", "mach2", "memchr", @@ -554,6 +557,7 @@ const PERMITTED_CRANELIFT_DEPENDENCIES: &[&str] = &[ "target-lexicon", "unicode-ident", "wasmtime-jit-icache-coherence", + "wasmtime-math", "windows-sys", "windows-targets", "windows_aarch64_gnullvm", diff --git a/src/tools/tidy/src/error_codes.rs b/src/tools/tidy/src/error_codes.rs index e2d1b85797ff..bb61412f6788 100644 --- a/src/tools/tidy/src/error_codes.rs +++ b/src/tools/tidy/src/error_codes.rs @@ -43,9 +43,18 @@ macro_rules! verbose_print { }; } -pub fn check(root_path: &Path, search_paths: &[&Path], verbose: bool, bad: &mut bool) { +pub fn check( + root_path: &Path, + search_paths: &[&Path], + verbose: bool, + ci_info: &crate::CiInfo, + bad: &mut bool, +) { let mut errors = Vec::new(); + // Check that no error code explanation was removed. + check_removed_error_code_explanation(ci_info, bad); + // Stage 1: create list let error_codes = extract_error_codes(root_path, &mut errors); if verbose { @@ -68,6 +77,27 @@ pub fn check(root_path: &Path, search_paths: &[&Path], verbose: bool, bad: &mut } } +fn check_removed_error_code_explanation(ci_info: &crate::CiInfo, bad: &mut bool) { + let Some(base_commit) = &ci_info.base_commit else { + eprintln!("Skipping error code explanation removal check"); + return; + }; + let Some(diff) = crate::git_diff(base_commit, "--name-status") else { + *bad = true; + eprintln!("removed error code explanation tidy check: Failed to run git diff"); + return; + }; + if diff.lines().any(|line| { + line.starts_with('D') && line.contains("compiler/rustc_error_codes/src/error_codes/") + }) { + *bad = true; + eprintln!("tidy check error: Error code explanations should never be removed!"); + eprintln!("Take a look at E0001 to see how to handle it."); + return; + } + println!("No error code explanation was removed!"); +} + /// Stage 1: Parses a list of error codes from `error_codes.rs`. fn extract_error_codes(root_path: &Path, errors: &mut Vec) -> Vec { let path = root_path.join(Path::new(ERROR_CODES_PATH)); diff --git a/src/tools/tidy/src/issues.txt b/src/tools/tidy/src/issues.txt index b3517b2e9da0..7e27e0258c2f 100644 --- a/src/tools/tidy/src/issues.txt +++ b/src/tools/tidy/src/issues.txt @@ -2070,7 +2070,6 @@ ui/issues/issue-32782.rs ui/issues/issue-32797.rs ui/issues/issue-32805.rs ui/issues/issue-3290.rs -ui/issues/issue-32950.rs ui/issues/issue-32995-2.rs ui/issues/issue-32995.rs ui/issues/issue-33202.rs @@ -2340,7 +2339,6 @@ ui/issues/issue-49934.rs ui/issues/issue-49955.rs ui/issues/issue-49973.rs ui/issues/issue-50187.rs -ui/issues/issue-50403.rs ui/issues/issue-50411.rs ui/issues/issue-50415.rs ui/issues/issue-50442.rs @@ -3921,7 +3919,6 @@ ui/traits/const-traits/issue-79450.rs ui/traits/const-traits/issue-88155.rs ui/traits/const-traits/issue-92111.rs ui/traits/const-traits/issue-92230-wf-super-trait-env.rs -ui/traits/const-traits/specialization/issue-95186-specialize-on-tilde-const.rs ui/traits/const-traits/specialization/issue-95187-same-trait-bound-different-constness.rs ui/traits/issue-103563.rs ui/traits/issue-104322.rs diff --git a/src/tools/tidy/src/lib.rs b/src/tools/tidy/src/lib.rs index e8a12d563358..237737f0f169 100644 --- a/src/tools/tidy/src/lib.rs +++ b/src/tools/tidy/src/lib.rs @@ -3,6 +3,12 @@ //! This library contains the tidy lints and exposes it //! to be used by tools. +use std::ffi::OsStr; +use std::process::Command; + +use build_helper::ci::CiEnv; +use build_helper::git::{GitConfig, get_closest_upstream_commit}; +use build_helper::stage0_parser::{Stage0Config, parse_stage0_file}; use termcolor::WriteColor; macro_rules! static_regex { @@ -63,6 +69,61 @@ fn tidy_error(args: &str) -> std::io::Result<()> { Ok(()) } +pub struct CiInfo { + pub git_merge_commit_email: String, + pub nightly_branch: String, + pub base_commit: Option, + pub ci_env: CiEnv, +} + +impl CiInfo { + pub fn new(bad: &mut bool) -> Self { + let stage0 = parse_stage0_file(); + let Stage0Config { nightly_branch, git_merge_commit_email, .. } = stage0.config; + + let mut info = Self { + nightly_branch, + git_merge_commit_email, + ci_env: CiEnv::current(), + base_commit: None, + }; + let base_commit = match get_closest_upstream_commit(None, &info.git_config(), info.ci_env) { + Ok(Some(commit)) => Some(commit), + Ok(None) => { + info.error_if_in_ci("no base commit found", bad); + None + } + Err(error) => { + info.error_if_in_ci(&format!("failed to retrieve base commit: {error}"), bad); + None + } + }; + info.base_commit = base_commit; + info + } + + pub fn git_config(&self) -> GitConfig<'_> { + GitConfig { + nightly_branch: &self.nightly_branch, + git_merge_commit_email: &self.git_merge_commit_email, + } + } + + pub fn error_if_in_ci(&self, msg: &str, bad: &mut bool) { + if self.ci_env.is_running_in_ci() { + *bad = true; + eprintln!("tidy check error: {msg}"); + } else { + eprintln!("tidy check warning: {msg}. Some checks will be skipped."); + } + } +} + +pub fn git_diff>(base_commit: &str, extra_arg: S) -> Option { + let output = Command::new("git").arg("diff").arg(base_commit).arg(extra_arg).output().ok()?; + Some(String::from_utf8_lossy(&output.stdout).into()) +} + pub mod alphabetical; pub mod bins; pub mod debug_artifacts; @@ -83,6 +144,7 @@ pub mod pal; pub mod rustdoc_css_themes; pub mod rustdoc_gui_tests; pub mod rustdoc_js; +pub mod rustdoc_json; pub mod rustdoc_templates; pub mod style; pub mod target_policy; diff --git a/src/tools/tidy/src/main.rs b/src/tools/tidy/src/main.rs index 776f1bde2eb7..ef6ff5c9277a 100644 --- a/src/tools/tidy/src/main.rs +++ b/src/tools/tidy/src/main.rs @@ -48,7 +48,9 @@ fn main() { let extra_checks = cfg_args.iter().find(|s| s.starts_with("--extra-checks=")).map(String::as_str); - let bad = std::sync::Arc::new(AtomicBool::new(false)); + let mut bad = false; + let ci_info = CiInfo::new(&mut bad); + let bad = std::sync::Arc::new(AtomicBool::new(bad)); let drain_handles = |handles: &mut VecDeque>| { // poll all threads for completion before awaiting the oldest one @@ -110,11 +112,12 @@ fn main() { check!(rustdoc_css_themes, &librustdoc_path); check!(rustdoc_templates, &librustdoc_path); check!(rustdoc_js, &librustdoc_path, &tools_path, &src_path); + check!(rustdoc_json, &src_path, &ci_info); check!(known_bug, &crashes_path); check!(unknown_revision, &tests_path); // Checks that only make sense for the compiler. - check!(error_codes, &root_path, &[&compiler_path, &librustdoc_path], verbose); + check!(error_codes, &root_path, &[&compiler_path, &librustdoc_path], verbose, &ci_info); check!(fluent_alphabetical, &compiler_path, bless); check!(fluent_period, &compiler_path); check!(target_policy, &root_path); diff --git a/src/tools/tidy/src/rustdoc_js.rs b/src/tools/tidy/src/rustdoc_js.rs index 2517e2de12ce..720f0712ee03 100644 --- a/src/tools/tidy/src/rustdoc_js.rs +++ b/src/tools/tidy/src/rustdoc_js.rs @@ -62,6 +62,9 @@ pub fn check(librustdoc_path: &Path, tools_path: &Path, src_path: &Path, bad: &m return; } }; + // Having the correct `eslint` version installed via `npm` isn't strictly necessary, since we're invoking it via `npx`, + // but this check allows the vast majority that is not working on the rustdoc frontend to avoid the penalty of running + // `eslint` in tidy. See also: https://github.com/rust-lang/rust/pull/142851 match get_eslint_version() { Some(version) => { if version != eslint_version { diff --git a/src/tools/tidy/src/rustdoc_json.rs b/src/tools/tidy/src/rustdoc_json.rs new file mode 100644 index 000000000000..dfbb35d69f17 --- /dev/null +++ b/src/tools/tidy/src/rustdoc_json.rs @@ -0,0 +1,90 @@ +//! Tidy check to ensure that `FORMAT_VERSION` was correctly updated if `rustdoc-json-types` was +//! updated as well. + +use std::path::Path; +use std::str::FromStr; + +const RUSTDOC_JSON_TYPES: &str = "src/rustdoc-json-types"; + +pub fn check(src_path: &Path, ci_info: &crate::CiInfo, bad: &mut bool) { + println!("Checking tidy rustdoc_json..."); + let Some(base_commit) = &ci_info.base_commit else { + eprintln!("No base commit, skipping rustdoc_json check"); + return; + }; + + // First we check that `src/rustdoc-json-types` was modified. + match crate::git_diff(&base_commit, "--name-status") { + Some(output) => { + if !output + .lines() + .any(|line| line.starts_with("M") && line.contains(RUSTDOC_JSON_TYPES)) + { + // `rustdoc-json-types` was not modified so nothing more to check here. + println!("`rustdoc-json-types` was not modified."); + return; + } + } + None => { + *bad = true; + eprintln!("error: failed to run `git diff` in rustdoc_json check"); + return; + } + } + // Then we check that if `FORMAT_VERSION` was updated, the `Latest feature:` was also updated. + match crate::git_diff(&base_commit, src_path.join("rustdoc-json-types")) { + Some(output) => { + let mut format_version_updated = false; + let mut latest_feature_comment_updated = false; + let mut new_version = None; + let mut old_version = None; + for line in output.lines() { + if line.starts_with("+pub const FORMAT_VERSION: u32 =") { + format_version_updated = true; + new_version = line + .split('=') + .nth(1) + .and_then(|s| s.trim().split(';').next()) + .and_then(|s| u32::from_str(s.trim()).ok()); + } else if line.starts_with("-pub const FORMAT_VERSION: u32 =") { + old_version = line + .split('=') + .nth(1) + .and_then(|s| s.trim().split(';').next()) + .and_then(|s| u32::from_str(s.trim()).ok()); + } else if line.starts_with("+// Latest feature:") { + latest_feature_comment_updated = true; + } + } + if format_version_updated != latest_feature_comment_updated { + *bad = true; + if latest_feature_comment_updated { + eprintln!( + "error in `rustdoc_json` tidy check: `Latest feature` comment was updated \ + whereas `FORMAT_VERSION` wasn't in `{RUSTDOC_JSON_TYPES}/lib.rs`" + ); + } else { + eprintln!( + "error in `rustdoc_json` tidy check: `Latest feature` comment was not \ + updated whereas `FORMAT_VERSION` was in `{RUSTDOC_JSON_TYPES}/lib.rs`" + ); + } + } + match (new_version, old_version) { + (Some(new_version), Some(old_version)) if new_version != old_version + 1 => { + *bad = true; + eprintln!( + "error in `rustdoc_json` tidy check: invalid `FORMAT_VERSION` increase in \ + `{RUSTDOC_JSON_TYPES}/lib.rs`, should be `{}`, found `{new_version}`", + old_version + 1, + ); + } + _ => {} + } + } + None => { + *bad = true; + eprintln!("error: failed to run `git diff` in rustdoc_json check"); + } + } +} diff --git a/src/tools/tidy/src/ui_tests.rs b/src/tools/tidy/src/ui_tests.rs index 8f9b07c49acb..53226fcb80e6 100644 --- a/src/tools/tidy/src/ui_tests.rs +++ b/src/tools/tidy/src/ui_tests.rs @@ -17,7 +17,7 @@ use ignore::Walk; const ENTRY_LIMIT: u32 = 901; // FIXME: The following limits should be reduced eventually. -const ISSUES_ENTRY_LIMIT: u32 = 1623; +const ISSUES_ENTRY_LIMIT: u32 = 1619; const EXPECTED_TEST_FILE_EXTENSIONS: &[&str] = &[ "rs", // test source files diff --git a/src/tools/wasm-component-ld/Cargo.toml b/src/tools/wasm-component-ld/Cargo.toml index 642d48b9952e..ce718902b29f 100644 --- a/src/tools/wasm-component-ld/Cargo.toml +++ b/src/tools/wasm-component-ld/Cargo.toml @@ -10,4 +10,4 @@ name = "wasm-component-ld" path = "src/main.rs" [dependencies] -wasm-component-ld = "0.5.13" +wasm-component-ld = "0.5.14" diff --git a/src/version b/src/version index 636ea711ad96..82e24bf241e5 100644 --- a/src/version +++ b/src/version @@ -1 +1 @@ -1.89.0 +1.90.0 diff --git a/tests/assembly/cmse.rs b/tests/assembly/cmse.rs index 2984df92225c..a68ee99eac67 100644 --- a/tests/assembly/cmse.rs +++ b/tests/assembly/cmse.rs @@ -6,7 +6,7 @@ //@ [hard] needs-llvm-components: arm //@ [soft] needs-llvm-components: arm #![crate_type = "lib"] -#![feature(abi_c_cmse_nonsecure_call, cmse_nonsecure_entry, no_core, lang_items)] +#![feature(abi_cmse_nonsecure_call, cmse_nonsecure_entry, no_core, lang_items)] #![no_core] extern crate minicore; @@ -53,7 +53,7 @@ use minicore::*; // Branch back to non-secure side // CHECK: bxns lr #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn entry_point() -> i64 { +pub extern "cmse-nonsecure-entry" fn entry_point() -> i64 { 0 } @@ -95,8 +95,6 @@ pub extern "C-cmse-nonsecure-entry" fn entry_point() -> i64 { // Call to non-secure // CHECK: blxns r12 #[no_mangle] -pub fn call_nonsecure( - f: unsafe extern "C-cmse-nonsecure-call" fn(u32, u32, u32, u32) -> u64, -) -> u64 { +pub fn call_nonsecure(f: unsafe extern "cmse-nonsecure-call" fn(u32, u32, u32, u32) -> u64) -> u64 { unsafe { f(0, 1, 2, 3) } } diff --git a/tests/assembly/s390x-backchain-toggle.rs b/tests/assembly/s390x-backchain-toggle.rs index 83c7b82d0d4b..9bae15b7d11b 100644 --- a/tests/assembly/s390x-backchain-toggle.rs +++ b/tests/assembly/s390x-backchain-toggle.rs @@ -1,5 +1,5 @@ //@ add-core-stubs -//@ revisions: enable-backchain disable-backchain +//@ revisions: enable-backchain disable-backchain default-backchain //@ assembly-output: emit-asm //@ compile-flags: -Copt-level=3 --crate-type=lib --target=s390x-unknown-linux-gnu //@ needs-llvm-components: systemz @@ -26,6 +26,8 @@ extern "C" fn test_backchain() -> i32 { // enable-backchain: stg [[REG1]], 0(%r15) // disable-backchain: aghi %r15, -160 // disable-backchain-NOT: stg %r{{.*}}, 0(%r15) + // default-backchain: aghi %r15, -160 + // default-backchain-NOT: stg %r{{.*}}, 0(%r15) unsafe { extern_func(); } @@ -35,6 +37,7 @@ extern "C" fn test_backchain() -> i32 { // Make sure that the expected return value is written into %r2 (return register): // enable-backchain-NEXT: lghi %r2, 1 // disable-backchain: lghi %r2, 0 + // default-backchain: lghi %r2, 0 #[cfg(target_feature = "backchain")] { 1 diff --git a/tests/auxiliary/minicore.rs b/tests/auxiliary/minicore.rs index db11549382fc..3e9841b179cb 100644 --- a/tests/auxiliary/minicore.rs +++ b/tests/auxiliary/minicore.rs @@ -16,6 +16,7 @@ #![feature( no_core, + intrinsics, lang_items, auto_traits, freeze_impls, @@ -196,3 +197,9 @@ impl<'a, 'b: 'a, T: PointeeSized + Unsize, U: PointeeSized> CoerceUnsized<&'a trait Drop { fn drop(&mut self); } + +pub mod mem { + #[rustc_nounwind] + #[rustc_intrinsic] + pub unsafe fn transmute(src: Src) -> Dst; +} diff --git a/tests/codegen-units/item-collection/drop-glue-noop.rs b/tests/codegen-units/item-collection/drop-glue-noop.rs new file mode 100644 index 000000000000..604ba883bb28 --- /dev/null +++ b/tests/codegen-units/item-collection/drop-glue-noop.rs @@ -0,0 +1,23 @@ +//@ compile-flags:-Clink-dead-code -Zmir-opt-level=0 + +#![deny(dead_code)] +#![crate_type = "lib"] + +//~ MONO_ITEM fn start +#[no_mangle] +pub fn start(_: isize, _: *const *const u8) -> isize { + // No item produced for this, it's a no-op drop and so is removed. + unsafe { + std::ptr::drop_in_place::(&mut 0); + } + + // No choice but to codegen for indirect drop as a function pointer, since we have to produce a + // function with the right signature. In vtables we can avoid that (tested in + // instantiation-through-vtable.rs) because we special case null pointer for drop glue since + // #122662. + // + //~ MONO_ITEM fn std::ptr::drop_in_place:: - shim(None) @@ drop_glue_noop-cgu.0[External] + std::ptr::drop_in_place:: as unsafe fn(*mut u64); + + 0 +} diff --git a/tests/codegen-units/item-collection/instantiation-through-vtable.rs b/tests/codegen-units/item-collection/instantiation-through-vtable.rs index 8f13fd558083..7882a526b682 100644 --- a/tests/codegen-units/item-collection/instantiation-through-vtable.rs +++ b/tests/codegen-units/item-collection/instantiation-through-vtable.rs @@ -24,7 +24,6 @@ impl Trait for Struct { pub fn start(_: isize, _: *const *const u8) -> isize { let s1 = Struct { _a: 0u32 }; - //~ MONO_ITEM fn std::ptr::drop_in_place::> - shim(None) @@ instantiation_through_vtable-cgu.0[External] //~ MONO_ITEM fn as Trait>::foo //~ MONO_ITEM fn as Trait>::bar let r1 = &s1 as &Trait; @@ -32,7 +31,6 @@ pub fn start(_: isize, _: *const *const u8) -> isize { r1.bar(); let s1 = Struct { _a: 0u64 }; - //~ MONO_ITEM fn std::ptr::drop_in_place::> - shim(None) @@ instantiation_through_vtable-cgu.0[External] //~ MONO_ITEM fn as Trait>::foo //~ MONO_ITEM fn as Trait>::bar let _ = &s1 as &Trait; diff --git a/tests/codegen-units/item-collection/non-generic-closures.rs b/tests/codegen-units/item-collection/non-generic-closures.rs index 124fe7e3b69a..2d9c461e6fd2 100644 --- a/tests/codegen-units/item-collection/non-generic-closures.rs +++ b/tests/codegen-units/item-collection/non-generic-closures.rs @@ -1,4 +1,4 @@ -//@ compile-flags:-Clink-dead-code -Zinline-mir=no +//@ compile-flags:-Clink-dead-code -Zinline-mir=no -O #![deny(dead_code)] #![crate_type = "lib"] @@ -22,9 +22,8 @@ fn assigned_to_variable_but_not_executed() { //~ MONO_ITEM fn assigned_to_variable_executed_indirectly @@ non_generic_closures-cgu.0[External] fn assigned_to_variable_executed_indirectly() { //~ MONO_ITEM fn assigned_to_variable_executed_indirectly::{closure#0} @@ non_generic_closures-cgu.0[External] - //~ MONO_ITEM fn <{closure@TEST_PATH:28:13: 28:21} as std::ops::FnOnce<(i32,)>>::call_once - shim @@ non_generic_closures-cgu.0[External] - //~ MONO_ITEM fn <{closure@TEST_PATH:28:13: 28:21} as std::ops::FnOnce<(i32,)>>::call_once - shim(vtable) @@ non_generic_closures-cgu.0[External] - //~ MONO_ITEM fn std::ptr::drop_in_place::<{closure@TEST_PATH:28:13: 28:21}> - shim(None) @@ non_generic_closures-cgu.0[External] + //~ MONO_ITEM fn <{closure@TEST_PATH:27:13: 27:21} as std::ops::FnOnce<(i32,)>>::call_once - shim @@ non_generic_closures-cgu.0[External] + //~ MONO_ITEM fn <{closure@TEST_PATH:27:13: 27:21} as std::ops::FnOnce<(i32,)>>::call_once - shim(vtable) @@ non_generic_closures-cgu.0[External] let f = |a: i32| { let _ = a + 2; }; @@ -40,6 +39,20 @@ fn assigned_to_variable_executed_directly() { f(4); } +// Make sure we generate mono items for stateful closures that need dropping +//~ MONO_ITEM fn with_drop @@ non_generic_closures-cgu.0[External] +fn with_drop(v: PresentDrop) { + //~ MONO_ITEM fn with_drop::{closure#0} @@ non_generic_closures-cgu.0[External] + //~ MONO_ITEM fn std::ptr::drop_in_place:: - shim(Some(PresentDrop)) @@ non_generic_closures-cgu.0[Internal] + //~ MONO_ITEM fn std::ptr::drop_in_place::<{closure@TEST_PATH:49:14: 49:24}> - shim(Some({closure@TEST_PATH:49:14: 49:24})) @@ non_generic_closures-cgu.0[Internal] + + let _f = |a: usize| { + let _ = a + 2; + //~ MONO_ITEM fn std::mem::drop:: @@ non_generic_closures-cgu.0[External] + drop(v); + }; +} + //~ MONO_ITEM fn start @@ non_generic_closures-cgu.0[External] #[no_mangle] pub fn start(_: isize, _: *const *const u8) -> isize { @@ -47,6 +60,7 @@ pub fn start(_: isize, _: *const *const u8) -> isize { assigned_to_variable_but_not_executed(); assigned_to_variable_executed_directly(); assigned_to_variable_executed_indirectly(); + with_drop(PresentDrop); 0 } @@ -55,3 +69,10 @@ pub fn start(_: isize, _: *const *const u8) -> isize { fn run_closure(f: &Fn(i32)) { f(3); } + +struct PresentDrop; + +impl Drop for PresentDrop { + //~ MONO_ITEM fn ::drop @@ non_generic_closures-cgu.0[External] + fn drop(&mut self) {} +} diff --git a/tests/codegen-units/item-collection/unsizing.rs b/tests/codegen-units/item-collection/unsizing.rs index 15e42bce2495..b751d2153a94 100644 --- a/tests/codegen-units/item-collection/unsizing.rs +++ b/tests/codegen-units/item-collection/unsizing.rs @@ -1,4 +1,4 @@ -//@ compile-flags:-Zmir-opt-level=0 +//@ compile-flags:-Zmir-opt-level=0 -O #![deny(dead_code)] #![feature(coerce_unsized)] @@ -42,33 +42,47 @@ struct Wrapper(#[allow(dead_code)] *const T); impl, U: ?Sized> CoerceUnsized> for Wrapper {} +struct PresentDrop; + +impl Drop for PresentDrop { + fn drop(&mut self) {} +} + +// Custom Coercion Case +impl Trait for PresentDrop { + fn foo(&self) {} +} + //~ MONO_ITEM fn start #[no_mangle] pub fn start(_: isize, _: *const *const u8) -> isize { // simple case let bool_sized = &true; - //~ MONO_ITEM fn std::ptr::drop_in_place:: - shim(None) @@ unsizing-cgu.0[Internal] //~ MONO_ITEM fn ::foo let _bool_unsized = bool_sized as &Trait; let char_sized = &'a'; - //~ MONO_ITEM fn std::ptr::drop_in_place:: - shim(None) @@ unsizing-cgu.0[Internal] //~ MONO_ITEM fn ::foo let _char_unsized = char_sized as &Trait; // struct field let struct_sized = &Struct { _a: 1, _b: 2, _c: 3.0f64 }; - //~ MONO_ITEM fn std::ptr::drop_in_place:: - shim(None) @@ unsizing-cgu.0[Internal] //~ MONO_ITEM fn ::foo let _struct_unsized = struct_sized as &Struct; // custom coercion let wrapper_sized = Wrapper(&0u32); - //~ MONO_ITEM fn std::ptr::drop_in_place:: - shim(None) @@ unsizing-cgu.0[Internal] //~ MONO_ITEM fn ::foo let _wrapper_sized = wrapper_sized as Wrapper; + // with drop + let droppable = &PresentDrop; + //~ MONO_ITEM fn ::drop @@ unsizing-cgu.0[Internal] + //~ MONO_ITEM fn std::ptr::drop_in_place:: - shim(Some(PresentDrop)) @@ unsizing-cgu.0[Internal] + //~ MONO_ITEM fn ::foo + let droppable = droppable as &dyn Trait; + false.foo(); 0 diff --git a/tests/codegen-units/partitioning/vtable-through-const.rs b/tests/codegen-units/partitioning/vtable-through-const.rs index aad9ccb634b0..7a070728843c 100644 --- a/tests/codegen-units/partitioning/vtable-through-const.rs +++ b/tests/codegen-units/partitioning/vtable-through-const.rs @@ -35,7 +35,6 @@ mod mod1 { } } - //~ MONO_ITEM fn mod1::id:: @@ vtable_through_const-mod1.volatile[Internal] fn id(x: T) -> T { x } @@ -50,8 +49,6 @@ mod mod1 { fn do_something_else(&self) {} } - //~ MONO_ITEM fn ::do_something @@ vtable_through_const-mod1.volatile[External] - //~ MONO_ITEM fn ::do_something_else @@ vtable_through_const-mod1.volatile[External] impl Trait2 for NeedsDrop {} pub trait Trait2Gen { @@ -93,8 +90,6 @@ pub fn main() { // Same as above //~ MONO_ITEM fn >::do_something @@ vtable_through_const-mod1.volatile[External] //~ MONO_ITEM fn >::do_something_else @@ vtable_through_const-mod1.volatile[External] - //~ MONO_ITEM fn >::do_something @@ vtable_through_const-mod1.volatile[External] - //~ MONO_ITEM fn >::do_something_else @@ vtable_through_const-mod1.volatile[External] mod1::TRAIT1_GEN_REF.do_something(0u8); //~ MONO_ITEM fn mod1::id:: @@ vtable_through_const-mod1.volatile[External] diff --git a/tests/codegen/abi-x86-interrupt.rs b/tests/codegen/abi-x86-interrupt.rs index 255ccba2c111..9a1ded2c9e37 100644 --- a/tests/codegen/abi-x86-interrupt.rs +++ b/tests/codegen/abi-x86-interrupt.rs @@ -13,8 +13,6 @@ extern crate minicore; use minicore::*; -// CHECK: define x86_intrcc i64 @has_x86_interrupt_abi +// CHECK: define x86_intrcc void @has_x86_interrupt_abi #[no_mangle] -pub extern "x86-interrupt" fn has_x86_interrupt_abi(a: i64) -> i64 { - a -} +pub extern "x86-interrupt" fn has_x86_interrupt_abi() {} diff --git a/tests/codegen/asm/critical.rs b/tests/codegen/asm/critical.rs index 8c039900cab3..0f29d7c69b46 100644 --- a/tests/codegen/asm/critical.rs +++ b/tests/codegen/asm/critical.rs @@ -1,6 +1,5 @@ //@ only-x86_64 //@ compile-flags: -C no-prepopulate-passes -#![feature(asm_goto)] #![feature(asm_goto_with_outputs)] #![crate_type = "lib"] use std::arch::asm; diff --git a/tests/codegen/min-function-alignment.rs b/tests/codegen/min-function-alignment.rs index 75f845572a4a..78989ec5df23 100644 --- a/tests/codegen/min-function-alignment.rs +++ b/tests/codegen/min-function-alignment.rs @@ -1,17 +1,19 @@ //@ revisions: align16 align1024 -//@ compile-flags: -C no-prepopulate-passes -Z mir-opt-level=0 +//@ compile-flags: -C no-prepopulate-passes -Z mir-opt-level=0 -Clink-dead-code //@ [align16] compile-flags: -Zmin-function-alignment=16 //@ [align1024] compile-flags: -Zmin-function-alignment=1024 #![crate_type = "lib"] #![feature(fn_align)] -// functions without explicit alignment use the global minimum +// Functions without explicit alignment use the global minimum. // -// CHECK-LABEL: @no_explicit_align +// NOTE: this function deliberately has zero (0) attributes! That is to make sure that +// `-Zmin-function-alignment` is applied regardless of whether attributes are used. +// +// CHECK-LABEL: no_explicit_align // align16: align 16 // align1024: align 1024 -#[no_mangle] pub fn no_explicit_align() {} // CHECK-LABEL: @lower_align diff --git a/tests/codegen/naked-asan.rs b/tests/codegen/naked-asan.rs index 223c41b15bb3..46218cf79d6d 100644 --- a/tests/codegen/naked-asan.rs +++ b/tests/codegen/naked-asan.rs @@ -1,22 +1,28 @@ +//@ add-core-stubs +//@ needs-llvm-components: x86 +//@ compile-flags: --target x86_64-unknown-linux-gnu -Zsanitizer=address -Ctarget-feature=-crt-static + // Make sure we do not request sanitizers for naked functions. -//@ only-x86_64 -//@ needs-sanitizer-address -//@ compile-flags: -Zsanitizer=address -Ctarget-feature=-crt-static - #![crate_type = "lib"] +#![feature(no_core)] #![no_std] +#![no_core] #![feature(abi_x86_interrupt)] +extern crate minicore; +use minicore::*; + +#[no_mangle] pub fn caller() { - page_fault_handler(1, 2); + unsafe { asm!("call {}", sym page_fault_handler) } } -// CHECK: declare x86_intrcc void @page_fault_handler(ptr {{.*}}, i64{{.*}}){{.*}}#[[ATTRS:[0-9]+]] +// CHECK: declare x86_intrcc void @page_fault_handler(){{.*}}#[[ATTRS:[0-9]+]] #[unsafe(naked)] #[no_mangle] -pub extern "x86-interrupt" fn page_fault_handler(_: u64, _: u64) { - core::arch::naked_asm!("ud2") +pub extern "x86-interrupt" fn page_fault_handler() { + naked_asm!("ud2") } // CHECK: #[[ATTRS]] = diff --git a/tests/codegen/sanitizer/cfi/emit-type-metadata-id-itanium-cxx-abi-drop-in-place.rs b/tests/codegen/sanitizer/cfi/emit-type-metadata-id-itanium-cxx-abi-drop-in-place.rs index 2a7eca6fc196..8fec275fd064 100644 --- a/tests/codegen/sanitizer/cfi/emit-type-metadata-id-itanium-cxx-abi-drop-in-place.rs +++ b/tests/codegen/sanitizer/cfi/emit-type-metadata-id-itanium-cxx-abi-drop-in-place.rs @@ -1,5 +1,9 @@ // Verifies that type metadata identifiers for drop functions are emitted correctly. // +// Non needs_drop drop glue isn't codegen'd at all, so we don't try to check the IDs there. But we +// do check it's not emitted which should help catch bugs if we do start generating it again in the +// future. +// //@ needs-sanitizer-cfi //@ compile-flags: -Clto -Cno-prepopulate-passes -Copt-level=0 -Zsanitizer=cfi -Ctarget-feature=-crt-static @@ -10,18 +14,18 @@ // CHECK: call i1 @llvm.type.test(ptr {{%.+}}, metadata !"_ZTSFvPu3dynIu{{[0-9]+}}NtNtNtC{{[[:print:]]+}}_4core3ops4drop4Dropu6regionEE") struct EmptyDrop; -// CHECK: define{{.*}}4core3ptr{{[0-9]+}}drop_in_place$LT${{.*}}EmptyDrop$GT${{.*}}!type ![[TYPE1]] !type !{{[0-9]+}} !type !{{[0-9]+}} !type !{{[0-9]+}} +// CHECK-NOT: define{{.*}}4core3ptr{{[0-9]+}}drop_in_place$LT${{.*}}EmptyDrop$GT${{.*}}!type ![[TYPE1]] !type !{{[0-9]+}} !type !{{[0-9]+}} !type !{{[0-9]+}} -struct NonEmptyDrop; +struct PresentDrop; -impl Drop for NonEmptyDrop { +impl Drop for PresentDrop { fn drop(&mut self) {} - // CHECK: define{{.*}}4core3ptr{{[0-9]+}}drop_in_place$LT${{.*}}NonEmptyDrop$GT${{.*}}!type ![[TYPE1]] !type !{{[0-9]+}} !type !{{[0-9]+}} !type !{{[0-9]+}} + // CHECK: define{{.*}}4core3ptr{{[0-9]+}}drop_in_place$LT${{.*}}PresentDrop$GT${{.*}}!type ![[TYPE1]] !type !{{[0-9]+}} !type !{{[0-9]+}} !type !{{[0-9]+}} } pub fn foo() { let _ = Box::new(EmptyDrop) as Box; - let _ = Box::new(NonEmptyDrop) as Box; + let _ = Box::new(PresentDrop) as Box; } // CHECK: ![[TYPE1]] = !{i64 0, !"_ZTSFvPu3dynIu{{[0-9]+}}NtNtNtC{{[[:print:]]+}}_4core3ops4drop4Dropu6regionEE"} diff --git a/tests/codegen/target-feature-negative-implication.rs b/tests/codegen/target-feature-negative-implication.rs new file mode 100644 index 000000000000..36cd82dd8cf5 --- /dev/null +++ b/tests/codegen/target-feature-negative-implication.rs @@ -0,0 +1,20 @@ +//@ add-core-stubs +//@ needs-llvm-components: x86 +//@ compile-flags: --target=x86_64-unknown-linux-gnu +//@ compile-flags: -Ctarget-feature=-avx2 + +#![feature(no_core, lang_items)] +#![crate_type = "lib"] +#![no_core] + +extern crate minicore; +use minicore::*; + +#[no_mangle] +pub unsafe fn banana() { + // CHECK-LABEL: @banana() + // CHECK-SAME: [[BANANAATTRS:#[0-9]+]] { +} + +// CHECK: attributes [[BANANAATTRS]] +// CHECK-SAME: -avx512 diff --git a/tests/codegen/target-feature-overrides.rs b/tests/codegen/target-feature-overrides.rs index 0fc1e0136b3f..eb19b0de2fa8 100644 --- a/tests/codegen/target-feature-overrides.rs +++ b/tests/codegen/target-feature-overrides.rs @@ -1,3 +1,4 @@ +// ignore-tidy-linelength //@ add-core-stubs //@ revisions: COMPAT INCOMPAT //@ needs-llvm-components: x86 @@ -39,7 +40,7 @@ pub unsafe fn banana() -> u32 { // CHECK: attributes [[APPLEATTRS]] // COMPAT-SAME: "target-features"="+avx,+avx2,{{.*}}" -// INCOMPAT-SAME: "target-features"="-avx2,-avx,+avx,{{.*}}" +// INCOMPAT-SAME: "target-features"="{{(-[^,]+,)*}}-avx2{{(,-[^,]+)*}},-avx{{(,-[^,]+)*}},+avx{{(,\+[^,]+)*}}" // CHECK: attributes [[BANANAATTRS]] // COMPAT-SAME: "target-features"="+avx,+avx2,{{.*}}" -// INCOMPAT-SAME: "target-features"="-avx2,-avx" +// INCOMPAT-SAME: "target-features"="{{(-[^,]+,)*}}-avx2{{(,-[^,]+)*}},-avx{{(,-[^,]+)*}}" diff --git a/tests/codegen/tied-features-strength.rs b/tests/codegen/tied-features-strength.rs index 6be0e21e0ef3..81499c070d19 100644 --- a/tests/codegen/tied-features-strength.rs +++ b/tests/codegen/tied-features-strength.rs @@ -4,14 +4,23 @@ //@ compile-flags: --crate-type=rlib --target=aarch64-unknown-linux-gnu //@ needs-llvm-components: aarch64 +// Rust made SVE require neon. //@ [ENABLE_SVE] compile-flags: -C target-feature=+sve -Copt-level=0 -// ENABLE_SVE: attributes #0 = { {{.*}} "target-features"="{{((\+outline-atomics,?)|(\+v8a,?)|(\+sve,?)|(\+neon,?)|(\+fp-armv8,?))*}}" } +// ENABLE_SVE: attributes #0 +// ENABLE_SVE-SAME: +neon +// ENABLE_SVE-SAME: +sve +// However, disabling SVE does not disable neon. //@ [DISABLE_SVE] compile-flags: -C target-feature=-sve -Copt-level=0 -// DISABLE_SVE: attributes #0 = { {{.*}} "target-features"="{{((\+outline-atomics,?)|(\+v8a,?)|(-sve,?)|(\+neon,?))*}}" } +// DISABLE_SVE: attributes #0 +// DISABLE_SVE-NOT: -neon +// DISABLE_SVE-SAME: -sve +// OTOH, neon fn `fp-armv8` are fully tied; toggling neon must toggle `fp-armv8` the same way. //@ [DISABLE_NEON] compile-flags: -C target-feature=-neon -Copt-level=0 -// DISABLE_NEON: attributes #0 = { {{.*}} "target-features"="{{((\+outline-atomics,?)|(\+v8a,?)|(-fp-armv8,?)|(-neon,?))*}}" } +// DISABLE_NEON: attributes #0 +// DISABLE_NEON-SAME: -neon +// DISABLE_NEON-SAME: -fp-armv8 //@ [ENABLE_NEON] compile-flags: -C target-feature=+neon -Copt-level=0 // ENABLE_NEON: attributes #0 = { {{.*}} "target-features"="{{((\+outline-atomics,?)|(\+v8a,?)|(\+fp-armv8,?)|(\+neon,?))*}}" } diff --git a/tests/codegen/transmute-scalar.rs b/tests/codegen/transmute-scalar.rs index c080259a9172..c57ade58c30e 100644 --- a/tests/codegen/transmute-scalar.rs +++ b/tests/codegen/transmute-scalar.rs @@ -55,3 +55,48 @@ pub fn ptr_to_int(p: *mut u16) -> usize { pub fn int_to_ptr(i: usize) -> *mut u16 { unsafe { std::mem::transmute(i) } } + +// This is the one case where signedness matters to transmuting: +// the LLVM type is `i8` here because of `repr(i8)`, +// whereas below with the `repr(u8)` it's `i1` in LLVM instead. +#[repr(i8)] +pub enum FakeBoolSigned { + False = 0, + True = 1, +} + +// CHECK-LABEL: define{{.*}}i8 @bool_to_fake_bool_signed(i1 zeroext %b) +// CHECK: %_0 = zext i1 %b to i8 +// CHECK-NEXT: ret i8 %_0 +#[no_mangle] +pub fn bool_to_fake_bool_signed(b: bool) -> FakeBoolSigned { + unsafe { std::mem::transmute(b) } +} + +// CHECK-LABEL: define{{.*}}i1 @fake_bool_signed_to_bool(i8 %b) +// CHECK: %_0 = trunc nuw i8 %b to i1 +// CHECK-NEXT: ret i1 %_0 +#[no_mangle] +pub fn fake_bool_signed_to_bool(b: FakeBoolSigned) -> bool { + unsafe { std::mem::transmute(b) } +} + +#[repr(u8)] +pub enum FakeBoolUnsigned { + False = 0, + True = 1, +} + +// CHECK-LABEL: define{{.*}}i1 @bool_to_fake_bool_unsigned(i1 zeroext %b) +// CHECK: ret i1 %b +#[no_mangle] +pub fn bool_to_fake_bool_unsigned(b: bool) -> FakeBoolUnsigned { + unsafe { std::mem::transmute(b) } +} + +// CHECK-LABEL: define{{.*}}i1 @fake_bool_unsigned_to_bool(i1 zeroext %b) +// CHECK: ret i1 %b +#[no_mangle] +pub fn fake_bool_unsigned_to_bool(b: FakeBoolUnsigned) -> bool { + unsafe { std::mem::transmute(b) } +} diff --git a/tests/crashes/126269.rs b/tests/crashes/126269.rs deleted file mode 100644 index ca4b76eb930d..000000000000 --- a/tests/crashes/126269.rs +++ /dev/null @@ -1,12 +0,0 @@ -//@ known-bug: rust-lang/rust#126269 -#![feature(coerce_unsized)] - -pub enum Foo { - Bar([T; usize::MAX]), -} - -use std::ops::CoerceUnsized; - -impl CoerceUnsized for T {} - -fn main() {} diff --git a/tests/crashes/126982.rs b/tests/crashes/126982.rs deleted file mode 100644 index 8522d9415eb8..000000000000 --- a/tests/crashes/126982.rs +++ /dev/null @@ -1,18 +0,0 @@ -//@ known-bug: rust-lang/rust#126982 - -#![feature(coerce_unsized)] -use std::ops::CoerceUnsized; - -struct Foo { - a: T, -} - -impl CoerceUnsized for Foo {} - -union U { - a: usize, -} - -const C: U = Foo { a: 10 }; - -fn main() {} diff --git a/tests/crashes/130104.rs b/tests/crashes/130104.rs index 0ffc21ad3604..b961108c9233 100644 --- a/tests/crashes/130104.rs +++ b/tests/crashes/130104.rs @@ -2,5 +2,5 @@ fn main() { let non_secure_function = - core::mem::transmute:: _, extern "C-cmse-nonsecure-call" fn() -> _>; + core::mem::transmute:: _, extern "cmse-nonsecure-call" fn() -> _>; } diff --git a/tests/crashes/131048.rs b/tests/crashes/131048.rs deleted file mode 100644 index d57e9921a8ab..000000000000 --- a/tests/crashes/131048.rs +++ /dev/null @@ -1,7 +0,0 @@ -//@ known-bug: #131048 - -impl std::ops::CoerceUnsized for A {} - -fn main() { - format_args!("Hello, world!"); -} diff --git a/tests/crashes/132142.rs b/tests/crashes/132142.rs index 9a026f3bca71..813bf0bf0a8e 100644 --- a/tests/crashes/132142.rs +++ b/tests/crashes/132142.rs @@ -1,3 +1,3 @@ //@ known-bug: #132142 -async extern "C-cmse-nonsecure-entry" fn fun(...) {} +async extern "cmse-nonsecure-entry" fn fun(...) {} diff --git a/tests/crashes/132430.rs b/tests/crashes/132430.rs deleted file mode 100644 index 81c8c6d6f7d3..000000000000 --- a/tests/crashes/132430.rs +++ /dev/null @@ -1,10 +0,0 @@ -//@ known-bug: #132430 - -//@ compile-flags: --crate-type=lib -//@ edition: 2018 -#![feature(cmse_nonsecure_entry)] -struct Test; - -impl Test { - pub async unsafe extern "C-cmse-nonsecure-entry" fn test(val: &str) {} -} diff --git a/tests/crashes/133808.rs b/tests/crashes/133808.rs deleted file mode 100644 index 9c6a23d1e35b..000000000000 --- a/tests/crashes/133808.rs +++ /dev/null @@ -1,15 +0,0 @@ -//@ known-bug: #133808 - -#![feature(generic_const_exprs, transmutability)] - -mod assert { - use std::mem::TransmuteFrom; - - pub fn is_transmutable() - where - Dst: TransmuteFrom, - { - } -} - -pub fn main() {} diff --git a/tests/crashes/134217.rs b/tests/crashes/134217.rs deleted file mode 100644 index 1b14c660e8b4..000000000000 --- a/tests/crashes/134217.rs +++ /dev/null @@ -1,9 +0,0 @@ -//@ known-bug: #134217 - -impl std::ops::CoerceUnsized for A {} - -fn main() { - if let _ = true - && true - {} -} diff --git a/tests/crashes/138265.rs b/tests/crashes/138265.rs deleted file mode 100644 index f6c8ea748895..000000000000 --- a/tests/crashes/138265.rs +++ /dev/null @@ -1,12 +0,0 @@ -//@ known-bug: #138265 - -#![feature(coerce_unsized)] -#![crate_type = "lib"] -impl std::ops::CoerceUnsized for A {} -pub fn f() { - [0; { - let mut c = &0; - c = &0; - 0 - }] -} diff --git a/tests/crashes/138738.rs b/tests/crashes/138738.rs deleted file mode 100644 index 74e5effa56f5..000000000000 --- a/tests/crashes/138738.rs +++ /dev/null @@ -1,7 +0,0 @@ -//@ known-bug: #138738 -//@ only-x86_64 - -#![feature(abi_ptx)] -fn main() { - let a = unsafe { core::mem::transmute::(4) }(2); -} diff --git a/tests/crashes/139905.rs b/tests/crashes/139905.rs deleted file mode 100644 index 7da622aaabac..000000000000 --- a/tests/crashes/139905.rs +++ /dev/null @@ -1,6 +0,0 @@ -//@ known-bug: #139905 -trait a {} -impl a<{}> for () {} -trait c {} -impl c for () where (): a {} -impl c for () {} diff --git a/tests/crashes/140333.rs b/tests/crashes/140333.rs deleted file mode 100644 index cec1100e6ada..000000000000 --- a/tests/crashes/140333.rs +++ /dev/null @@ -1,9 +0,0 @@ -//@ known-bug: #140333 -fn a() -> impl b< - [c; { - struct d { - #[a] - bar: e, - } - }], ->; diff --git a/tests/incremental/issue-61323.rs b/tests/incremental/issue-61323.rs index b7423c81fc16..4845648d49c8 100644 --- a/tests/incremental/issue-61323.rs +++ b/tests/incremental/issue-61323.rs @@ -1,7 +1,7 @@ //@ revisions: rpass cfail enum A { - //[cfail]~^ ERROR 3:1: 3:7: recursive types `A` and `C` have infinite size [E0072] + //[cfail]~^ ERROR recursive types `A` and `C` have infinite size [E0072] B(C), } diff --git a/tests/incremental/track-deps-in-new-solver.rs b/tests/incremental/track-deps-in-new-solver.rs index fb013b2b24a7..51cd6b89e37e 100644 --- a/tests/incremental/track-deps-in-new-solver.rs +++ b/tests/incremental/track-deps-in-new-solver.rs @@ -3,6 +3,8 @@ //@ compile-flags: -Znext-solver //@ check-pass +#![allow(dead_code)] + pub trait Future { type Error; fn poll() -> Self::Error; diff --git a/tests/mir-opt/copy-prop/write_to_borrowed.main.CopyProp.diff b/tests/mir-opt/copy-prop/write_to_borrowed.main.CopyProp.diff new file mode 100644 index 000000000000..eab06b1ba1e7 --- /dev/null +++ b/tests/mir-opt/copy-prop/write_to_borrowed.main.CopyProp.diff @@ -0,0 +1,30 @@ +- // MIR for `main` before CopyProp ++ // MIR for `main` after CopyProp + + fn main() -> () { + let mut _0: (); + let mut _1: *const char; + let mut _2: char; + let mut _3: char; + let mut _4: char; + let mut _5: char; + let mut _6: &char; + let mut _7: (); + + bb0: { + _1 = &raw const _2; + _3 = const 'b'; + _5 = copy _3; + _6 = &_3; +- _4 = copy _5; + (*_1) = copy (*_6); + _6 = &_5; +- _7 = dump_var::(copy _4) -> [return: bb1, unwind unreachable]; ++ _7 = dump_var::(copy _5) -> [return: bb1, unwind unreachable]; + } + + bb1: { + return; + } + } + diff --git a/tests/mir-opt/copy-prop/write_to_borrowed.rs b/tests/mir-opt/copy-prop/write_to_borrowed.rs new file mode 100644 index 000000000000..58809749103e --- /dev/null +++ b/tests/mir-opt/copy-prop/write_to_borrowed.rs @@ -0,0 +1,45 @@ +//@ test-mir-pass: CopyProp + +#![feature(custom_mir, core_intrinsics)] +#![allow(internal_features)] + +use std::intrinsics::mir::*; + +#[custom_mir(dialect = "runtime")] +fn main() { + mir! { + // Both _3 and _5 are borrowed, check that we do not unify them, and that we do not + // introduce a write to any of them. + let _1; + let _2; + let _3; + let _4; + let _5; + let _6; + let _7; + // CHECK: bb0: { + { + // CHECK-NEXT: _1 = &raw const _2; + _1 = core::ptr::addr_of!(_2); + // CHECK-NEXT: _3 = const 'b'; + _3 = 'b'; + // CHECK-NEXT: _5 = copy _3; + _5 = _3; + // CHECK-NEXT: _6 = &_3; + _6 = &_3; + // CHECK-NOT: {{_.*}} = {{_.*}}; + _4 = _5; + // CHECK-NEXT: (*_1) = copy (*_6); + *_1 = *_6; + // CHECK-NEXT: _6 = &_5; + _6 = &_5; + // CHECK-NEXT: _7 = dump_var::(copy _5) + Call(_7 = dump_var(_4), ReturnTo(bb1), UnwindUnreachable()) + } + bb1 = { Return() } + } +} + +fn dump_var(_: T) {} + +// EMIT_MIR write_to_borrowed.main.CopyProp.diff diff --git a/tests/pretty/pin-ergonomics-hir.pp b/tests/pretty/pin-ergonomics-hir.pp new file mode 100644 index 000000000000..212e0e174dae --- /dev/null +++ b/tests/pretty/pin-ergonomics-hir.pp @@ -0,0 +1,44 @@ +//@ pretty-compare-only +//@ pretty-mode:hir +//@ pp-exact:pin-ergonomics-hir.pp + +#![feature(pin_ergonomics)] +#![allow(dead_code, incomplete_features)] +#[prelude_import] +use ::std::prelude::rust_2015::*; +#[macro_use] +extern crate std; + +use std::pin::Pin; + +struct Foo; + +impl Foo { + fn baz(&mut self) { } + + fn baz_const(&self) { } + + fn baz_lt<'a>(&mut self) { } + + fn baz_const_lt(&self) { } +} + +fn foo(_: Pin<&'_ mut Foo>) { } +fn foo_lt<'a>(_: Pin<&'a mut Foo>) { } + +fn foo_const(_: Pin<&'_ Foo>) { } +fn foo_const_lt(_: Pin<&'_ Foo>) { } + +fn bar() { + let mut x: Pin<&mut _> = &pin mut Foo; + foo(x.as_mut()); + foo(x.as_mut()); + foo_const(x); + + let x: Pin<&_> = &pin const Foo; + + foo_const(x); + foo_const(x); +} + +fn main() { } diff --git a/tests/pretty/pin-ergonomics-hir.rs b/tests/pretty/pin-ergonomics-hir.rs new file mode 100644 index 000000000000..5f2158258f07 --- /dev/null +++ b/tests/pretty/pin-ergonomics-hir.rs @@ -0,0 +1,40 @@ +//@ pretty-compare-only +//@ pretty-mode:hir +//@ pp-exact:pin-ergonomics-hir.pp + +#![feature(pin_ergonomics)] +#![allow(dead_code, incomplete_features)] + +use std::pin::Pin; + +struct Foo; + +impl Foo { + fn baz(&mut self) { } + + fn baz_const(&self) { } + + fn baz_lt<'a>(&mut self) { } + + fn baz_const_lt(&self) { } +} + +fn foo(_: Pin<&'_ mut Foo>) { } +fn foo_lt<'a>(_: Pin<&'a mut Foo>) { } + +fn foo_const(_: Pin<&'_ Foo>) { } +fn foo_const_lt(_: Pin<&'_ Foo>) { } + +fn bar() { + let mut x: Pin<&mut _> = &pin mut Foo; + foo(x.as_mut()); + foo(x.as_mut()); + foo_const(x); + + let x: Pin<&_> = &pin const Foo; + + foo_const(x); + foo_const(x); +} + +fn main() { } diff --git a/tests/pretty/pin-ergonomics.rs b/tests/pretty/pin-ergonomics.rs index 47ffc97b1183..8e8ced791b13 100644 --- a/tests/pretty/pin-ergonomics.rs +++ b/tests/pretty/pin-ergonomics.rs @@ -3,6 +3,8 @@ #![feature(pin_ergonomics)] #![allow(dead_code, incomplete_features)] +use std::pin::Pin; + struct Foo; impl Foo { @@ -21,4 +23,15 @@ fn foo_lt<'a>(_: &'a pin mut Foo) {} fn foo_const(_: &pin const Foo) {} fn foo_const_lt(_: &'_ pin const Foo) {} +fn bar() { + let mut x: Pin<&mut _> = &pin mut Foo; + foo(x.as_mut()); + foo(x.as_mut()); + foo_const(x); + + let x: Pin<&_> = &pin const Foo; + foo_const(x); + foo_const(x); +} + fn main() {} diff --git a/tests/run-make/arm64ec-import-export-static/export.rs b/tests/run-make/arm64ec-import-export-static/export.rs new file mode 100644 index 000000000000..ca6ccf00ca17 --- /dev/null +++ b/tests/run-make/arm64ec-import-export-static/export.rs @@ -0,0 +1,27 @@ +#![crate_type = "dylib"] +#![allow(internal_features)] +#![feature(no_core, lang_items)] +#![no_core] +#![no_std] + +// This is needed because of #![no_core]: +#[lang = "pointee_sized"] +pub trait PointeeSized {} +#[lang = "meta_sized"] +pub trait MetaSized: PointeeSized {} +#[lang = "sized"] +pub trait Sized: MetaSized {} +#[lang = "sync"] +trait Sync {} +impl Sync for i32 {} +#[lang = "copy"] +pub trait Copy {} +impl Copy for i32 {} +#[lang = "drop_in_place"] +pub unsafe fn drop_in_place(_: *mut T) {} +#[no_mangle] +extern "system" fn _DllMainCRTStartup(_: *const u8, _: u32, _: *const u8) -> u32 { + 1 +} + +pub static VALUE: i32 = 42; diff --git a/tests/run-make/arm64ec-import-export-static/import.rs b/tests/run-make/arm64ec-import-export-static/import.rs new file mode 100644 index 000000000000..9d52db251250 --- /dev/null +++ b/tests/run-make/arm64ec-import-export-static/import.rs @@ -0,0 +1,12 @@ +#![crate_type = "cdylib"] +#![allow(internal_features)] +#![feature(no_core)] +#![no_std] +#![no_core] + +extern crate export; + +#[no_mangle] +pub extern "C" fn func() -> i32 { + export::VALUE +} diff --git a/tests/run-make/arm64ec-import-export-static/rmake.rs b/tests/run-make/arm64ec-import-export-static/rmake.rs new file mode 100644 index 000000000000..7fa31144810d --- /dev/null +++ b/tests/run-make/arm64ec-import-export-static/rmake.rs @@ -0,0 +1,15 @@ +// Test that a static can be exported from one crate and imported into another. +// +// This was broken for Arm64EC as only functions, not variables, should be +// decorated with `#`. +// See https://github.com/rust-lang/rust/issues/138541 + +//@ needs-llvm-components: aarch64 +//@ only-windows + +use run_make_support::rustc; + +fn main() { + rustc().input("export.rs").target("aarch64-pc-windows-msvc").panic("abort").run(); + rustc().input("import.rs").target("aarch64-pc-windows-msvc").panic("abort").run(); +} diff --git a/tests/run-make/bin-emit-no-symbols/app.rs b/tests/run-make/bin-emit-no-symbols/app.rs index e9dc1e9744fb..ad74fcc43dce 100644 --- a/tests/run-make/bin-emit-no-symbols/app.rs +++ b/tests/run-make/bin-emit-no-symbols/app.rs @@ -12,7 +12,15 @@ fn panic(_: &PanicInfo) -> ! { } #[lang = "eh_personality"] -fn eh() {} +fn eh( + _version: i32, + _actions: i32, + _exception_class: u64, + _exception_object: *mut (), + _context: *mut (), +) -> i32 { + loop {} +} #[alloc_error_handler] fn oom(_: Layout) -> ! { diff --git a/tests/run-make/c-link-to-rust-va-list-fn/rmake.rs b/tests/run-make/c-link-to-rust-va-list-fn/rmake.rs index 63904bea6227..cca528c42526 100644 --- a/tests/run-make/c-link-to-rust-va-list-fn/rmake.rs +++ b/tests/run-make/c-link-to-rust-va-list-fn/rmake.rs @@ -3,7 +3,9 @@ // prevent the creation of a functional binary. // See https://github.com/rust-lang/rust/pull/49878 -//@ ignore-cross-compile +//@ needs-target-std +//@ ignore-android: FIXME(#142855) +//@ ignore-sgx: (x86 machine code cannot be directly executed) use run_make_support::{cc, extra_c_flags, run, rustc, static_lib_name}; diff --git a/tests/run-make/const-trait-stable-toolchain/const-super-trait-nightly-disabled.stderr b/tests/run-make/const-trait-stable-toolchain/const-super-trait-nightly-disabled.stderr index 82f57864d859..be3de5809832 100644 --- a/tests/run-make/const-trait-stable-toolchain/const-super-trait-nightly-disabled.stderr +++ b/tests/run-make/const-trait-stable-toolchain/const-super-trait-nightly-disabled.stderr @@ -1,10 +1,10 @@ -error: `~const` is not allowed here +error: `[const]` is not allowed here --> const-super-trait.rs:7:12 | LL | trait Bar: ~const Foo {} | ^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> const-super-trait.rs:7:1 | LL | trait Bar: ~const Foo {} @@ -30,7 +30,7 @@ LL | const fn foo(x: &T) { = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date -error: `~const` can only be applied to `#[const_trait]` traits +error: `[const]` can only be applied to `#[const_trait]` traits --> const-super-trait.rs:7:12 | LL | trait Bar: ~const Foo {} @@ -41,7 +41,7 @@ help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[ LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits +error: `[const]` can only be applied to `#[const_trait]` traits --> const-super-trait.rs:9:17 | LL | const fn foo(x: &T) { diff --git a/tests/run-make/const-trait-stable-toolchain/const-super-trait-nightly-enabled.stderr b/tests/run-make/const-trait-stable-toolchain/const-super-trait-nightly-enabled.stderr index 8f4c78ccfa4c..ef764a62b066 100644 --- a/tests/run-make/const-trait-stable-toolchain/const-super-trait-nightly-enabled.stderr +++ b/tests/run-make/const-trait-stable-toolchain/const-super-trait-nightly-enabled.stderr @@ -1,16 +1,16 @@ -error: `~const` is not allowed here +error: `[const]` is not allowed here --> const-super-trait.rs:7:12 | LL | trait Bar: ~const Foo {} | ^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> const-super-trait.rs:7:1 | LL | trait Bar: ~const Foo {} | ^^^^^^^^^^^^^^^^^^^^^^^^ -error: `~const` can only be applied to `#[const_trait]` traits +error: `[const]` can only be applied to `#[const_trait]` traits --> const-super-trait.rs:7:12 | LL | trait Bar: ~const Foo {} @@ -21,7 +21,7 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits +error: `[const]` can only be applied to `#[const_trait]` traits --> const-super-trait.rs:9:17 | LL | const fn foo(x: &T) { diff --git a/tests/run-make/const-trait-stable-toolchain/const-super-trait-stable-disabled.stderr b/tests/run-make/const-trait-stable-toolchain/const-super-trait-stable-disabled.stderr index b7cd7097f444..a23793580f7a 100644 --- a/tests/run-make/const-trait-stable-toolchain/const-super-trait-stable-disabled.stderr +++ b/tests/run-make/const-trait-stable-toolchain/const-super-trait-stable-disabled.stderr @@ -1,10 +1,10 @@ -error: `~const` is not allowed here +error: `[const]` is not allowed here --> const-super-trait.rs:7:12 | 7 | trait Bar: ~const Foo {} | ^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> const-super-trait.rs:7:1 | 7 | trait Bar: ~const Foo {} @@ -26,25 +26,25 @@ error[E0658]: const trait impls are experimental | = note: see issue #67792 for more information -error: `~const` can only be applied to `#[const_trait]` traits +error: `[const]` can only be applied to `#[const_trait]` traits --> const-super-trait.rs:7:12 | 7 | trait Bar: ~const Foo {} | ^^^^^^ can't be applied to `Foo` | -note: `Foo` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Foo` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> const-super-trait.rs:3:1 | 3 | trait Foo { | ^^^^^^^^^ -error: `~const` can only be applied to `#[const_trait]` traits +error: `[const]` can only be applied to `#[const_trait]` traits --> const-super-trait.rs:9:17 | 9 | const fn foo(x: &T) { | ^^^^^^ can't be applied to `Bar` | -note: `Bar` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Bar` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> const-super-trait.rs:7:1 | 7 | trait Bar: ~const Foo {} diff --git a/tests/run-make/const-trait-stable-toolchain/const-super-trait-stable-enabled.stderr b/tests/run-make/const-trait-stable-toolchain/const-super-trait-stable-enabled.stderr index 4c59d870671d..2cdeb277ca4a 100644 --- a/tests/run-make/const-trait-stable-toolchain/const-super-trait-stable-enabled.stderr +++ b/tests/run-make/const-trait-stable-toolchain/const-super-trait-stable-enabled.stderr @@ -1,10 +1,10 @@ -error: `~const` is not allowed here +error: `[const]` is not allowed here --> const-super-trait.rs:7:12 | 7 | trait Bar: ~const Foo {} | ^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> const-super-trait.rs:7:1 | 7 | trait Bar: ~const Foo {} @@ -16,25 +16,25 @@ error[E0554]: `#![feature]` may not be used on the NIGHTLY release channel 1 | #![cfg_attr(feature_enabled, feature(const_trait_impl))] | ^^^^^^^^^^^^^^^^^^^^^^^^^ -error: `~const` can only be applied to `#[const_trait]` traits +error: `[const]` can only be applied to `#[const_trait]` traits --> const-super-trait.rs:7:12 | 7 | trait Bar: ~const Foo {} | ^^^^^^ can't be applied to `Foo` | -note: `Foo` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Foo` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> const-super-trait.rs:3:1 | 3 | trait Foo { | ^^^^^^^^^ -error: `~const` can only be applied to `#[const_trait]` traits +error: `[const]` can only be applied to `#[const_trait]` traits --> const-super-trait.rs:9:17 | 9 | const fn foo(x: &T) { | ^^^^^^ can't be applied to `Bar` | -note: `Bar` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Bar` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> const-super-trait.rs:7:1 | 7 | trait Bar: ~const Foo {} diff --git a/tests/run-make/crate-circular-deps-link/a.rs b/tests/run-make/crate-circular-deps-link/a.rs index a54f429550e7..6deb449d873a 100644 --- a/tests/run-make/crate-circular-deps-link/a.rs +++ b/tests/run-make/crate-circular-deps-link/a.rs @@ -1,6 +1,7 @@ #![crate_type = "rlib"] #![feature(lang_items)] #![feature(panic_unwind)] +#![feature(rustc_attrs)] #![no_std] extern crate panic_unwind; @@ -10,17 +11,23 @@ pub fn panic_handler(_: &core::panic::PanicInfo) -> ! { loop {} } -#[no_mangle] +#[rustc_std_internal_symbol] extern "C" fn __rust_drop_panic() -> ! { loop {} } -#[no_mangle] +#[rustc_std_internal_symbol] extern "C" fn __rust_foreign_exception() -> ! { loop {} } #[lang = "eh_personality"] -fn eh_personality() { +fn eh_personality( + _version: i32, + _actions: i32, + _exception_class: u64, + _exception_object: *mut (), + _context: *mut (), +) -> i32 { loop {} } diff --git a/tests/run-make/fmt-write-bloat/rmake.rs b/tests/run-make/fmt-write-bloat/rmake.rs index 6875ef9ddc05..3348651d501f 100644 --- a/tests/run-make/fmt-write-bloat/rmake.rs +++ b/tests/run-make/fmt-write-bloat/rmake.rs @@ -15,14 +15,9 @@ //! `NO_DEBUG_ASSERTIONS=1`). If debug assertions are disabled, then we can check for the absence of //! additional `usize` formatting and padding related symbols. -//@ ignore-windows -// Reason: -// - MSVC targets really need to parse the .pdb file (aka the debug information). -// On Windows there's an API for that (dbghelp) which maybe we can use -// - MinGW targets have a lot of symbols included in their runtime which we can't avoid. -// We would need to make the symbols we're looking for more specific for this test to work. //@ ignore-cross-compile +use run_make_support::artifact_names::bin_name; use run_make_support::env::no_debug_assertions; use run_make_support::rustc; use run_make_support::symbols::any_symbol_contains; @@ -36,5 +31,5 @@ fn main() { // otherwise, add them to the list of symbols to deny. panic_syms.extend_from_slice(&["panicking", "panic_fmt", "pad_integral", "Display"]); } - assert!(!any_symbol_contains("main", &panic_syms)); + assert!(!any_symbol_contains(bin_name("main"), &panic_syms)); } diff --git a/tests/run-make/linker-warning/rmake.rs b/tests/run-make/linker-warning/rmake.rs index 344b880faab9..26d03fd2d70e 100644 --- a/tests/run-make/linker-warning/rmake.rs +++ b/tests/run-make/linker-warning/rmake.rs @@ -57,7 +57,8 @@ fn main() { diff() .expected_file("short-error.txt") .actual_text("(linker error)", out.stderr()) - .normalize(r#"/rustc[^/]*/"#, "/rustc/") + .normalize(r#"/rustc[^/_-]*/"#, "/rustc/") + .normalize("libpanic_abort", "libpanic_unwind") .normalize( regex::escape(run_make_support::build_root().to_str().unwrap()), "/build-root", diff --git a/tests/run-make/no-alloc-shim/foo.rs b/tests/run-make/no-alloc-shim/foo.rs index b5d0d394d2b9..a22307f41b39 100644 --- a/tests/run-make/no-alloc-shim/foo.rs +++ b/tests/run-make/no-alloc-shim/foo.rs @@ -12,7 +12,13 @@ fn panic_handler(_: &core::panic::PanicInfo) -> ! { } #[no_mangle] -extern "C" fn rust_eh_personality() { +extern "C" fn rust_eh_personality( + _version: i32, + _actions: i32, + _exception_class: u64, + _exception_object: *mut (), + _context: *mut (), +) -> i32 { loop {} } diff --git a/tests/run-make/reproducible-build-2/rmake.rs b/tests/run-make/reproducible-build-2/rmake.rs index 5971fa01f920..1de5ca1e6f7f 100644 --- a/tests/run-make/reproducible-build-2/rmake.rs +++ b/tests/run-make/reproducible-build-2/rmake.rs @@ -7,22 +7,36 @@ // See https://github.com/rust-lang/rust/issues/34902 //@ ignore-cross-compile -//@ ignore-windows -// Reasons: -// 1. The object files are reproducible, but their paths are not, which causes -// the first assertion in the test to fail. -// 2. When the sysroot gets copied, some symlinks must be re-created, -// which is a privileged action on Windows. -use run_make_support::{rfs, rust_lib_name, rustc}; +//@ ignore-windows-gnu +// GNU Linker for Windows is non-deterministic. + +use run_make_support::{bin_name, is_windows_msvc, rfs, rust_lib_name, rustc}; fn main() { // test 1: fat lto rustc().input("reproducible-build-aux.rs").run(); - rustc().input("reproducible-build.rs").arg("-Clto=fat").output("reproducible-build").run(); - rfs::rename("reproducible-build", "reproducible-build-a"); - rustc().input("reproducible-build.rs").arg("-Clto=fat").output("reproducible-build").run(); - assert_eq!(rfs::read("reproducible-build"), rfs::read("reproducible-build-a")); + let make_reproducible_build = || { + let mut reproducible_build = rustc(); + reproducible_build + .input("reproducible-build.rs") + .arg("-Clto=fat") + .output(bin_name("reproducible-build")); + if is_windows_msvc() { + // Avoids timestamps, etc. when linking. + reproducible_build.arg("-Clink-arg=/Brepro"); + } + reproducible_build.run(); + }; + make_reproducible_build(); + rfs::rename(bin_name("reproducible-build"), "reproducible-build-a"); + if is_windows_msvc() { + // Linker acts differently if there is already a PDB file with the same + // name. + rfs::remove_file("reproducible-build.pdb"); + } + make_reproducible_build(); + assert_eq!(rfs::read(bin_name("reproducible-build")), rfs::read("reproducible-build-a")); // test 2: sysroot let sysroot = rustc().print("sysroot").run().stdout_utf8(); diff --git a/tests/run-make/sanitizer-dylib-link/program.rs b/tests/run-make/sanitizer-dylib-link/program.rs index 1026c7f89ba9..dbf885d343fe 100644 --- a/tests/run-make/sanitizer-dylib-link/program.rs +++ b/tests/run-make/sanitizer-dylib-link/program.rs @@ -1,4 +1,4 @@ -#[cfg_attr(windows, link(name = "library.dll.lib", modifiers = "+verbatim"))] +#[cfg_attr(windows, link(name = "library", kind = "raw-dylib"))] #[cfg_attr(not(windows), link(name = "library"))] extern "C" { fn overflow(); diff --git a/tests/run-make/short-ice/rmake.rs b/tests/run-make/short-ice/rmake.rs index 8377954f4672..483def62fc79 100644 --- a/tests/run-make/short-ice/rmake.rs +++ b/tests/run-make/short-ice/rmake.rs @@ -5,8 +5,9 @@ // See https://github.com/rust-lang/rust/issues/107910 //@ needs-target-std -//@ ignore-windows -// Reason: the assert_eq! on line 32 fails, as error output on Windows is different. +//@ ignore-i686-pc-windows-msvc +// Reason: the assert_eq! on line 37 fails, almost seems like it missing debug info? +// Haven't been able to reproduce locally, but it happens on CI. use run_make_support::rustc; @@ -29,10 +30,16 @@ fn main() { let rustc_query_count_full = count_lines_with(rust_test_log_2, "rustc_query_"); - assert!(rust_test_log_1.lines().count() < rust_test_log_2.lines().count()); + assert!( + rust_test_log_1.lines().count() < rust_test_log_2.lines().count(), + "Short backtrace should be shorter than full backtrace.\nShort backtrace:\n\ + {rust_test_log_1}\nFull backtrace:\n{rust_test_log_2}" + ); assert_eq!( count_lines_with(rust_test_log_2, "__rust_begin_short_backtrace"), - count_lines_with(rust_test_log_2, "__rust_end_short_backtrace") + count_lines_with(rust_test_log_2, "__rust_end_short_backtrace"), + "Full backtrace should contain the short backtrace markers.\nFull backtrace:\n\ + {rust_test_log_2}" ); assert!(count_lines_with(rust_test_log_1, "rustc_query_") + 5 < rustc_query_count_full); assert!(rustc_query_count_full > 5); diff --git a/tests/run-make/textrel-on-minimal-lib/rmake.rs b/tests/run-make/textrel-on-minimal-lib/rmake.rs index 625ded70ad62..08e2b45a75f4 100644 --- a/tests/run-make/textrel-on-minimal-lib/rmake.rs +++ b/tests/run-make/textrel-on-minimal-lib/rmake.rs @@ -6,25 +6,23 @@ // See https://github.com/rust-lang/rust/issues/68794 //@ ignore-cross-compile -//@ ignore-windows -// Reason: There is no `bar.dll` produced by CC to run readobj on use run_make_support::{ - cc, dynamic_lib_name, extra_c_flags, extra_cxx_flags, llvm_readobj, rustc, static_lib_name, + bin_name, cc, extra_c_flags, extra_cxx_flags, llvm_readobj, rustc, static_lib_name, }; fn main() { rustc().input("foo.rs").run(); cc().input("bar.c") .input(static_lib_name("foo")) - .out_exe(&dynamic_lib_name("bar")) + .out_exe(&bin_name("bar")) .arg("-fPIC") .arg("-shared") .args(extra_c_flags()) .args(extra_cxx_flags()) .run(); llvm_readobj() - .input(dynamic_lib_name("bar")) + .input(bin_name("bar")) .arg("--dynamic") .run() .assert_stdout_not_contains("TEXTREL"); diff --git a/tests/rustdoc-js-std/doc-alias-use.js b/tests/rustdoc-js-std/doc-alias-use.js new file mode 100644 index 000000000000..e08d94533a93 --- /dev/null +++ b/tests/rustdoc-js-std/doc-alias-use.js @@ -0,0 +1,12 @@ +// AsciiChar has a doc alias on its reexport and we +// want to make sure that actually works correctly, +// since apperently there are no other tests for this. + +const EXPECTED = [ + { + 'query': 'AsciiChar', + 'others': [ + { 'path': 'core::ascii', 'name': 'Char' }, + ], + }, +]; diff --git a/tests/rustdoc-js/big-result.rs b/tests/rustdoc-js/big-result.rs index 4dfecd6aaadd..c7a52aac1a24 100644 --- a/tests/rustdoc-js/big-result.rs +++ b/tests/rustdoc-js/big-result.rs @@ -1,4 +1,3 @@ -#![feature(concat_idents)] #![allow(nonstandard_style)] /// Generate 250 items that all match the query, starting with the longest. /// Those long items should be dropped from the result set, and the short ones diff --git a/tests/rustdoc-json/attrs/cold.rs b/tests/rustdoc-json/attrs/cold.rs new file mode 100644 index 000000000000..e219345d669c --- /dev/null +++ b/tests/rustdoc-json/attrs/cold.rs @@ -0,0 +1,3 @@ +//@ is "$.index[?(@.name=='cold_fn')].attrs" '["#[attr = Cold]"]' +#[cold] +pub fn cold_fn() {} diff --git a/tests/rustdoc-json/attrs/must_use.rs b/tests/rustdoc-json/attrs/must_use.rs index 64df8e5f509f..3ca6f5a75a5a 100644 --- a/tests/rustdoc-json/attrs/must_use.rs +++ b/tests/rustdoc-json/attrs/must_use.rs @@ -1,9 +1,9 @@ #![no_std] -//@ is "$.index[?(@.name=='example')].attrs" '["#[must_use]"]' +//@ is "$.index[?(@.name=='example')].attrs" '["#[attr = MustUse]"]' #[must_use] pub fn example() -> impl Iterator {} -//@ is "$.index[?(@.name=='explicit_message')].attrs" '["#[must_use = \"does nothing if you do not use it\"]"]' +//@ is "$.index[?(@.name=='explicit_message')].attrs" '["#[attr = MustUse {reason: \"does nothing if you do not use it\"}]"]' #[must_use = "does nothing if you do not use it"] pub fn explicit_message() -> impl Iterator {} diff --git a/tests/rustdoc-json/attrs/optimize.rs b/tests/rustdoc-json/attrs/optimize.rs new file mode 100644 index 000000000000..0bed0ad18c31 --- /dev/null +++ b/tests/rustdoc-json/attrs/optimize.rs @@ -0,0 +1,13 @@ +#![feature(optimize_attribute)] + +//@ is "$.index[?(@.name=='speed')].attrs" '["#[attr = Optimize(Speed)]"]' +#[optimize(speed)] +pub fn speed() {} + +//@ is "$.index[?(@.name=='size')].attrs" '["#[attr = Optimize(Size)]"]' +#[optimize(size)] +pub fn size() {} + +//@ is "$.index[?(@.name=='none')].attrs" '["#[attr = Optimize(DoNotOptimize)]"]' +#[optimize(none)] +pub fn none() {} diff --git a/tests/rustdoc-json/generic-args.rs b/tests/rustdoc-json/generic-args.rs new file mode 100644 index 000000000000..b4a73a046b50 --- /dev/null +++ b/tests/rustdoc-json/generic-args.rs @@ -0,0 +1,23 @@ +pub struct MyStruct(u32); + +pub trait MyTrait { + type MyType; + fn my_fn(&self); +} + +impl MyTrait for MyStruct { + type MyType = u32; + fn my_fn(&self) {} +} + +//@ is "$.index[?(@.name=='my_fn1')].inner.function.sig.inputs[0][1].qualified_path.args" null +//@ is "$.index[?(@.name=='my_fn1')].inner.function.sig.inputs[0][1].qualified_path.self_type.resolved_path.args" null +pub fn my_fn1(_: ::MyType) {} + +//@ is "$.index[?(@.name=='my_fn2')].inner.function.sig.inputs[0][1].dyn_trait.traits[0].trait.args.angle_bracketed.constraints[0].args" null +pub fn my_fn2(_: IntoIterator) {} + +//@ is "$.index[?(@.name=='my_fn3')].inner.function.sig.inputs[0][1].impl_trait[0].trait_bound.trait.args.parenthesized.inputs" [] +pub fn my_fn3(f: impl FnMut()) {} + +fn main() {} diff --git a/tests/rustdoc-ui/lints/redundant_explicit_links-expansion.rs b/tests/rustdoc-ui/lints/redundant_explicit_links-expansion.rs new file mode 100644 index 000000000000..2e42a0a5c5d6 --- /dev/null +++ b/tests/rustdoc-ui/lints/redundant_explicit_links-expansion.rs @@ -0,0 +1,40 @@ +// This is a regression test for . +// If the link is generated from expansion, we should not emit the lint. + +#![deny(rustdoc::redundant_explicit_links)] + +macro_rules! mac1 { + () => { + "provided by a [`BufferProvider`](crate::BufferProvider)." + }; +} + +macro_rules! mac2 { + () => { + #[doc = mac1!()] + pub struct BufferProvider; + } +} + +macro_rules! mac3 { + () => { + "Provided by" + }; +} + +// Should not lint. +#[doc = mac1!()] +pub struct Foo; + +// Should not lint. +mac2!{} + +#[doc = "provided by a [`BufferProvider`](crate::BufferProvider)."] +/// bla +//~^^ ERROR: redundant_explicit_links +pub struct Bla; + +#[doc = mac3!()] +/// a [`BufferProvider`](crate::BufferProvider). +//~^ ERROR: redundant_explicit_links +pub fn f() {} diff --git a/tests/rustdoc-ui/lints/redundant_explicit_links-expansion.stderr b/tests/rustdoc-ui/lints/redundant_explicit_links-expansion.stderr new file mode 100644 index 000000000000..a81931fb0732 --- /dev/null +++ b/tests/rustdoc-ui/lints/redundant_explicit_links-expansion.stderr @@ -0,0 +1,39 @@ +error: redundant explicit link target + --> $DIR/redundant_explicit_links-expansion.rs:32:43 + | +LL | #[doc = "provided by a [`BufferProvider`](crate::BufferProvider)."] + | ---------------- ^^^^^^^^^^^^^^^^^^^^^ explicit target is redundant + | | + | because label contains path that resolves to same destination + | + = note: when a link's destination is not specified, + the label is used to resolve intra-doc links +note: the lint level is defined here + --> $DIR/redundant_explicit_links-expansion.rs:4:9 + | +LL | #![deny(rustdoc::redundant_explicit_links)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +help: remove explicit link target + | +LL - #[doc = "provided by a [`BufferProvider`](crate::BufferProvider)."] +LL + #[doc = "provided by a [`BufferProvider`]."] + | + +error: redundant explicit link target + --> $DIR/redundant_explicit_links-expansion.rs:38:26 + | +LL | /// a [`BufferProvider`](crate::BufferProvider). + | ---------------- ^^^^^^^^^^^^^^^^^^^^^ explicit target is redundant + | | + | because label contains path that resolves to same destination + | + = note: when a link's destination is not specified, + the label is used to resolve intra-doc links +help: remove explicit link target + | +LL - /// a [`BufferProvider`](crate::BufferProvider). +LL + /// a [`BufferProvider`]. + | + +error: aborting due to 2 previous errors + diff --git a/tests/rustdoc/constant/const-effect-param.rs b/tests/rustdoc/constant/const-effect-param.rs index cceb0adac309..3dc63fb3d30f 100644 --- a/tests/rustdoc/constant/const-effect-param.rs +++ b/tests/rustdoc/constant/const-effect-param.rs @@ -11,4 +11,4 @@ pub trait Tr { //@ has foo/fn.g.html //@ has - '//pre[@class="rust item-decl"]' 'pub const fn g()' /// foo -pub const fn g() {} +pub const fn g() {} diff --git a/tests/rustdoc/constant/rfc-2632-const-trait-impl.rs b/tests/rustdoc/constant/rfc-2632-const-trait-impl.rs index 8a86e3e5e978..e304eff14e8c 100644 --- a/tests/rustdoc/constant/rfc-2632-const-trait-impl.rs +++ b/tests/rustdoc/constant/rfc-2632-const-trait-impl.rs @@ -1,12 +1,12 @@ -// Test that we do not currently display `~const` in rustdoc -// as that syntax is currently provisional; `~const Destruct` has +// Test that we do not currently display `[const]` in rustdoc +// as that syntax is currently provisional; `[const] Destruct` has // no effect on stable code so it should be hidden as well. // // To future blessers: make sure that `const_trait_impl` is // stabilized when changing `@!has` to `@has`, and please do // not remove this test. // -// FIXME(const_trait_impl) add `const_trait` to `Fn` so we use `~const` +// FIXME(const_trait_impl) add `const_trait` to `Fn` so we use `[const]` // FIXME(const_trait_impl) restore `const_trait` to `Destruct` #![feature(const_trait_impl)] #![crate_name = "foo"] @@ -15,58 +15,58 @@ use std::marker::Destruct; pub struct S(T); -//@ !has foo/trait.Tr.html '//pre[@class="rust item-decl"]/code/a[@class="trait"]' '~const' +//@ !has foo/trait.Tr.html '//pre[@class="rust item-decl"]/code/a[@class="trait"]' '[const]' //@ has - '//pre[@class="rust item-decl"]/code/a[@class="trait"]' 'Fn' -//@ !has - '//pre[@class="rust item-decl"]/code/span[@class="where"]' '~const' +//@ !has - '//pre[@class="rust item-decl"]/code/span[@class="where"]' '[const]' //@ has - '//pre[@class="rust item-decl"]/code/span[@class="where"]' ': Fn' #[const_trait] pub trait Tr { - //@ !has - '//section[@id="method.a"]/h4[@class="code-header"]' '~const' + //@ !has - '//section[@id="method.a"]/h4[@class="code-header"]' '[const]' //@ has - '//section[@id="method.a"]/h4[@class="code-header"]/a[@class="trait"]' 'Fn' - //@ !has - '//section[@id="method.a"]/h4[@class="code-header"]/span[@class="where"]' '~const' + //@ !has - '//section[@id="method.a"]/h4[@class="code-header"]/span[@class="where"]' '[const]' //@ has - '//section[@id="method.a"]/h4[@class="code-header"]/div[@class="where"]' ': Fn' - fn a() + fn a() where - Option: /* ~const */ Fn() /* + ~const Destruct */, + Option: /* [const] */ Fn() /* + [const] Destruct */, { } } //@ has - '//section[@id="impl-Tr%3CT%3E-for-T"]' '' -//@ !has - '//section[@id="impl-Tr%3CT%3E-for-T"]/h3[@class="code-header"]' '~const' +//@ !has - '//section[@id="impl-Tr%3CT%3E-for-T"]/h3[@class="code-header"]' '[const]' //@ has - '//section[@id="impl-Tr%3CT%3E-for-T"]/h3[@class="code-header"]/a[@class="trait"]' 'Fn' -//@ !has - '//section[@id="impl-Tr%3CT%3E-for-T"]/h3[@class="code-header"]/span[@class="where"]' '~const' +//@ !has - '//section[@id="impl-Tr%3CT%3E-for-T"]/h3[@class="code-header"]/span[@class="where"]' '[const]' //@ has - '//section[@id="impl-Tr%3CT%3E-for-T"]/h3[@class="code-header"]/div[@class="where"]' ': Fn' -impl const Tr for T +impl const Tr for T where - Option: /* ~const */ Fn() /* + ~const Destruct */, + Option: /* [const] */ Fn() /* + [const] Destruct */, { - fn a() + fn a() where - Option: /* ~const */ Fn() /* + ~const Destruct */, + Option: /* [const] */ Fn() /* + [const] Destruct */, { } } -//@ !has foo/fn.foo.html '//pre[@class="rust item-decl"]/code/a[@class="trait"]' '~const' +//@ !has foo/fn.foo.html '//pre[@class="rust item-decl"]/code/a[@class="trait"]' '[const]' //@ has - '//pre[@class="rust item-decl"]/code/a[@class="trait"]' 'Fn' -//@ !has - '//pre[@class="rust item-decl"]/code/div[@class="where"]' '~const' +//@ !has - '//pre[@class="rust item-decl"]/code/div[@class="where"]' '[const]' //@ has - '//pre[@class="rust item-decl"]/code/div[@class="where"]' ': Fn' -pub const fn foo() +pub const fn foo() where - Option: /* ~const */ Fn() /* + ~const Destruct */, + Option: /* [const] */ Fn() /* + [const] Destruct */, { F::a() } impl S { - //@ !has foo/struct.S.html '//section[@id="method.foo"]/h4[@class="code-header"]' '~const' + //@ !has foo/struct.S.html '//section[@id="method.foo"]/h4[@class="code-header"]' '[const]' //@ has - '//section[@id="method.foo"]/h4[@class="code-header"]/a[@class="trait"]' 'Fn' - //@ !has - '//section[@id="method.foo"]/h4[@class="code-header"]/span[@class="where"]' '~const' + //@ !has - '//section[@id="method.foo"]/h4[@class="code-header"]/span[@class="where"]' '[const]' //@ has - '//section[@id="method.foo"]/h4[@class="code-header"]/div[@class="where"]' ': Fn' - pub const fn foo() + pub const fn foo() where - B: /* ~const */ Fn() /* + ~const Destruct */, + B: /* [const] */ Fn() /* + [const] Destruct */, { B::a() } diff --git a/tests/rustdoc/inline_cross/auxiliary/const-effect-param.rs b/tests/rustdoc/inline_cross/auxiliary/const-effect-param.rs index db198e0fce99..d7d7b32e2b8b 100644 --- a/tests/rustdoc/inline_cross/auxiliary/const-effect-param.rs +++ b/tests/rustdoc/inline_cross/auxiliary/const-effect-param.rs @@ -4,7 +4,7 @@ #[const_trait] pub trait Resource {} -pub const fn load() -> i32 { +pub const fn load() -> i32 { 0 } diff --git a/tests/rustdoc/target-feature.rs b/tests/rustdoc/target-feature.rs new file mode 100644 index 000000000000..59a08a0ca949 --- /dev/null +++ b/tests/rustdoc/target-feature.rs @@ -0,0 +1,38 @@ +#![crate_name = "foo"] + +//@ has 'foo/index.html' + +//@ has - '//dl[@class="item-table"]/dt[1]//a' 'f1_safe' +//@ has - '//dl[@class="item-table"]/dt[1]//code' 'popcnt' +//@ count - '//dl[@class="item-table"]/dt[1]//sup' 0 +//@ has - '//dl[@class="item-table"]/dt[2]//a' 'f2_not_safe' +//@ has - '//dl[@class="item-table"]/dt[2]//code' 'avx2' +//@ count - '//dl[@class="item-table"]/dt[2]//sup' 1 +//@ has - '//dl[@class="item-table"]/dt[2]//sup' '⚠' + +#[target_feature(enable = "popcnt")] +//@ has 'foo/fn.f1_safe.html' +//@ matches - '//pre[@class="rust item-decl"]' '^pub fn f1_safe' +//@ has - '//*[@id="main-content"]/*[@class="item-info"]/*[@class="stab portability"]' \ +// 'Available with target feature popcnt only.' +pub fn f1_safe() {} + +//@ has 'foo/fn.f2_not_safe.html' +//@ matches - '//pre[@class="rust item-decl"]' '^pub unsafe fn f2_not_safe()' +//@ has - '//*[@id="main-content"]/*[@class="item-info"]/*[@class="stab portability"]' \ +// 'Available with target feature avx2 only.' +#[target_feature(enable = "avx2")] +pub unsafe fn f2_not_safe() {} + +//@ has 'foo/fn.f3_multifeatures_in_attr.html' +//@ has - '//*[@id="main-content"]/*[@class="item-info"]/*[@class="stab portability"]' \ +// 'Available on target features popcnt and avx2 only.' +#[target_feature(enable = "popcnt", enable = "avx2")] +pub fn f3_multifeatures_in_attr() {} + +//@ has 'foo/fn.f4_multi_attrs.html' +//@ has - '//*[@id="main-content"]/*[@class="item-info"]/*[@class="stab portability"]' \ +// 'Available on target features popcnt and avx2 only.' +#[target_feature(enable = "popcnt")] +#[target_feature(enable = "avx2")] +pub fn f4_multi_attrs() {} diff --git a/tests/ui-fulldeps/pprust-parenthesis-insertion.rs b/tests/ui-fulldeps/pprust-parenthesis-insertion.rs index 90e07bed40e7..72b5cfb90630 100644 --- a/tests/ui-fulldeps/pprust-parenthesis-insertion.rs +++ b/tests/ui-fulldeps/pprust-parenthesis-insertion.rs @@ -92,6 +92,21 @@ static EXPRS: &[&str] = &[ "#[attr] loop {}.field", "(#[attr] loop {}).field", "loop { #![attr] }.field", + // Attributes on a Binary, Cast, Assign, AssignOp, and Range expression + // require parentheses. Without parentheses `#[attr] lo..hi` means + // `(#[attr] lo)..hi`, and `#[attr] ..hi` is invalid syntax. + "#[attr] (1 + 1)", + "#[attr] (1 as T)", + "#[attr] (x = 1)", + "#[attr] (x += 1)", + "#[attr] (lo..hi)", + "#[attr] (..hi)", + // If the attribute were not present on the binary operation, it would be + // legal to render this without not just the inner parentheses, but also the + // outer ones. `return x + .. .field` (Yes, really.) Currently the + // pretty-printer does not take advantage of this edge case. + "(return #[attr] (x + ..)).field", + "(return x + ..).field", // Grammar restriction: break value starting with a labeled loop is not // allowed, except if the break is also labeled. "break 'outer 'inner: loop {} + 2", @@ -158,7 +173,12 @@ struct Unparenthesize; impl MutVisitor for Unparenthesize { fn visit_expr(&mut self, e: &mut Expr) { while let ExprKind::Paren(paren) = &mut e.kind { + let paren_attrs = mem::take(&mut e.attrs); *e = mem::replace(paren, Expr::dummy()); + if !paren_attrs.is_empty() { + assert!(e.attrs.is_empty()); + e.attrs = paren_attrs; + } } mut_visit::walk_expr(self, e); } diff --git a/tests/ui-fulldeps/run-compiler-twice.rs b/tests/ui-fulldeps/run-compiler-twice.rs index fa651baa7bc8..87504b8301f0 100644 --- a/tests/ui-fulldeps/run-compiler-twice.rs +++ b/tests/ui-fulldeps/run-compiler-twice.rs @@ -18,7 +18,7 @@ extern crate rustc_span; use std::path::{Path, PathBuf}; use rustc_interface::{Linker, interface}; -use rustc_session::config::{Input, Options, OutFileName, OutputType, OutputTypes}; +use rustc_session::config::{Input, Options, OutFileName, OutputType, OutputTypes, Sysroot}; use rustc_span::FileName; fn main() { @@ -32,7 +32,7 @@ fn main() { panic!("expected sysroot (and optional linker)"); } - let sysroot = PathBuf::from(&args[1]); + let sysroot = Sysroot::new(Some(PathBuf::from(&args[1]))); let linker = args.get(2).map(PathBuf::from); // compiletest sets the current dir to `output_base_dir` when running. @@ -43,7 +43,7 @@ fn main() { compile(src.to_string(), tmpdir.join("out"), sysroot.clone(), linker.as_deref()); } -fn compile(code: String, output: PathBuf, sysroot: PathBuf, linker: Option<&Path>) { +fn compile(code: String, output: PathBuf, sysroot: Sysroot, linker: Option<&Path>) { let mut opts = Options::default(); opts.output_types = OutputTypes::new(&[(OutputType::Exe, None)]); opts.sysroot = sysroot; diff --git a/tests/ui-fulldeps/rustc-dev-remap.only-remap.stderr b/tests/ui-fulldeps/rustc-dev-remap.only-remap.stderr index f54b6803b346..0c969b9c6d82 100644 --- a/tests/ui-fulldeps/rustc-dev-remap.only-remap.stderr +++ b/tests/ui-fulldeps/rustc-dev-remap.only-remap.stderr @@ -9,6 +9,7 @@ LL | type Result = NotAValidResultType; ControlFlow note: required by a bound in `rustc_ast::visit::Visitor::Result` --> /rustc-dev/xyz/compiler/rustc_ast/src/visit.rs:LL:COL + = note: this error originates in the macro `common_visitor_and_walkers` (in Nightly builds, run with -Z macro-backtrace for more info) error: aborting due to 1 previous error diff --git a/tests/ui-fulldeps/rustc-dev-remap.remap-unremap.stderr b/tests/ui-fulldeps/rustc-dev-remap.remap-unremap.stderr index 438c23458e2f..6ac8c3046f62 100644 --- a/tests/ui-fulldeps/rustc-dev-remap.remap-unremap.stderr +++ b/tests/ui-fulldeps/rustc-dev-remap.remap-unremap.stderr @@ -10,8 +10,9 @@ LL | type Result = NotAValidResultType; note: required by a bound in `rustc_ast::visit::Visitor::Result` --> $COMPILER_DIR_REAL/rustc_ast/src/visit.rs:LL:COL | -LL | type Result: VisitorResult = (); - | ^^^^^^^^^^^^^ required by this bound in `Visitor::Result` +LL | common_visitor_and_walkers!(Visitor<'a>); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ required by this bound in `Visitor::Result` + = note: this error originates in the macro `common_visitor_and_walkers` (in Nightly builds, run with -Z macro-backtrace for more info) error: aborting due to 1 previous error diff --git a/tests/ui-fulldeps/stable-mir/check_coroutine_body.rs b/tests/ui-fulldeps/stable-mir/check_coroutine_body.rs new file mode 100644 index 000000000000..677734929589 --- /dev/null +++ b/tests/ui-fulldeps/stable-mir/check_coroutine_body.rs @@ -0,0 +1,105 @@ +//@ run-pass +//! Tests stable mir API for retrieving the body of a coroutine. + +//@ ignore-stage1 +//@ ignore-cross-compile +//@ ignore-remote +//@ edition: 2024 + +#![feature(rustc_private)] +#![feature(assert_matches)] + +extern crate rustc_middle; +#[macro_use] +extern crate rustc_smir; +extern crate rustc_driver; +extern crate rustc_interface; +extern crate stable_mir; + +use std::io::Write; +use std::ops::ControlFlow; + +use stable_mir::mir::Body; +use stable_mir::ty::{RigidTy, TyKind}; + +const CRATE_NAME: &str = "crate_coroutine_body"; + +fn test_coroutine_body() -> ControlFlow<()> { + let crate_items = stable_mir::all_local_items(); + if let Some(body) = crate_items.iter().find_map(|item| { + let item_ty = item.ty(); + if let TyKind::RigidTy(RigidTy::Coroutine(def, ..)) = &item_ty.kind() { + if def.0.name() == "gbc::{closure#0}".to_string() { + def.body() + } else { + None + } + } else { + None + } + }) { + check_coroutine_body(body); + } else { + panic!("Cannot find `gbc::{{closure#0}}`. All local items are: {:#?}", crate_items); + } + + ControlFlow::Continue(()) +} + +fn check_coroutine_body(body: Body) { + let ret_ty = &body.locals()[0].ty; + let local_3 = &body.locals()[3].ty; + let local_4 = &body.locals()[4].ty; + + let TyKind::RigidTy(RigidTy::Adt(def, ..)) = &ret_ty.kind() + else { + panic!("Expected RigidTy::Adt, got: {:#?}", ret_ty); + }; + + assert_eq!("std::task::Poll", def.0.name()); + + let TyKind::RigidTy(RigidTy::Coroutine(def, ..)) = &local_3.kind() + else { + panic!("Expected RigidTy::Coroutine, got: {:#?}", local_3); + }; + + assert_eq!("gbc::{closure#0}::{closure#0}", def.0.name()); + + let TyKind::RigidTy(RigidTy::Coroutine(def, ..)) = &local_4.kind() + else { + panic!("Expected RigidTy::Coroutine, got: {:#?}", local_4); + }; + + assert_eq!("gbc::{closure#0}::{closure#0}", def.0.name()); +} + +fn main() { + let path = "coroutine_body.rs"; + generate_input(&path).unwrap(); + let args = &[ + "rustc".to_string(), + "-Cpanic=abort".to_string(), + "--edition".to_string(), + "2024".to_string(), + "--crate-name".to_string(), + CRATE_NAME.to_string(), + path.to_string(), + ]; + run!(args, test_coroutine_body).unwrap(); +} + +fn generate_input(path: &str) -> std::io::Result<()> { + let mut file = std::fs::File::create(path)?; + write!( + file, + r#" + async fn gbc() -> i32 {{ + let a = async {{ 1 }}.await; + a + }} + + fn main() {{}} + "# + )?; + Ok(()) +} diff --git a/tests/ui/SUMMARY.md b/tests/ui/SUMMARY.md index d807e38dab28..72b673dd50a2 100644 --- a/tests/ui/SUMMARY.md +++ b/tests/ui/SUMMARY.md @@ -8,6 +8,8 @@ For now, only immediate subdirectories under `tests/ui/` are described, but thes These tests deal with *Application Binary Interfaces* (ABI), mostly relating to function name mangling (and the `#[no_mangle]` attribute), calling conventions, or compiler flags which affect ABI. +Tests for unsupported ABIs can be made cross-platform by using the `extern "rust-invalid"` ABI, which is considered unsupported on every platform. + ## `tests/ui/allocator` These tests exercise `#![feature(allocator_api)]` and the `#[global_allocator]` attribute. @@ -208,14 +210,14 @@ Tests targeted at how we deduce the types of closure arguments. This process is Any closure-focused tests that does not fit in the other more specific closure subdirectories belong here. E.g. syntax, `move`, lifetimes. -## `tests/ui/cmse-nonsecure/`: `C-cmse-nonsecure` ABIs +## `tests/ui/cmse-nonsecure/`: `cmse-nonsecure` ABIs -Tests for `cmse_nonsecure_entry` and `abi_c_cmse_nonsecure_call` ABIs. Used specifically for the Armv8-M architecture, the former marks Secure functions with additional behaviours, such as adding a special symbol and constraining the number of parameters, while the latter alters function pointers to indicate they are non-secure and to handle them differently than usual. +Tests for `extern "cmse-nonsecure-call"` and `extern "cmse-nonsecure-entry"` functions. Used specifically for the Armv8-M architecture, the former marks Secure functions with additional behaviours, such as adding a special symbol and constraining the number of parameters, while the latter alters function pointers to indicate they are non-secure and to handle them differently than usual. See: -- [`cmse_nonsecure_entry` | The Unstable book](https://doc.rust-lang.org/unstable-book/language-features/cmse-nonsecure-entry.html) -- [`abi_c_cmse_nonsecure_call` | The Unstable book](https://doc.rust-lang.org/beta/unstable-book/language-features/abi-c-cmse-nonsecure-call.html) +- [`cmse_nonsecure_entry` | The Unstable book](https://doc.rust-lang.org/nightly/unstable-book/language-features/cmse-nonsecure-entry.html) +- [`abi_cmse_nonsecure_call` | The Unstable book](https://doc.rust-lang.org/nightly/unstable-book/language-features/abi-cmse-nonsecure-call.html) ## `tests/ui/codegen/`: Code Generation diff --git a/tests/ui/abi/bad-custom.rs b/tests/ui/abi/bad-custom.rs index e792f0955b91..7c881134ccb4 100644 --- a/tests/ui/abi/bad-custom.rs +++ b/tests/ui/abi/bad-custom.rs @@ -5,7 +5,7 @@ #[unsafe(naked)] extern "custom" fn must_be_unsafe(a: i64) -> i64 { - //~^ ERROR functions with the `"custom"` ABI must be unsafe + //~^ ERROR functions with the "custom" ABI must be unsafe //~| ERROR invalid signature for `extern "custom"` function std::arch::naked_asm!("") } @@ -23,7 +23,7 @@ unsafe extern "custom" fn no_return_type() -> i64 { } unsafe extern "custom" fn double(a: i64) -> i64 { - //~^ ERROR items with the `"custom"` ABI can only be declared externally or defined via naked functions + //~^ ERROR items with the "custom" ABI can only be declared externally or defined via naked functions //~| ERROR invalid signature for `extern "custom"` function unimplemented!() } @@ -32,7 +32,7 @@ struct Thing(i64); impl Thing { unsafe extern "custom" fn is_even(self) -> bool { - //~^ ERROR items with the `"custom"` ABI can only be declared externally or defined via naked functions + //~^ ERROR items with the "custom" ABI can only be declared externally or defined via naked functions //~| ERROR invalid signature for `extern "custom"` function unimplemented!() } @@ -40,7 +40,7 @@ impl Thing { trait BitwiseNot { unsafe extern "custom" fn bitwise_not(a: i64) -> i64 { - //~^ ERROR items with the `"custom"` ABI can only be declared externally or defined via naked functions + //~^ ERROR items with the "custom" ABI can only be declared externally or defined via naked functions //~| ERROR invalid signature for `extern "custom"` function unimplemented!() } @@ -50,14 +50,14 @@ impl BitwiseNot for Thing {} trait Negate { extern "custom" fn negate(a: i64) -> i64; - //~^ ERROR functions with the `"custom"` ABI must be unsafe + //~^ ERROR functions with the "custom" ABI must be unsafe //~| ERROR invalid signature for `extern "custom"` function } impl Negate for Thing { extern "custom" fn negate(a: i64) -> i64 { - //~^ ERROR items with the `"custom"` ABI can only be declared externally or defined via naked functions - //~| ERROR functions with the `"custom"` ABI must be unsafe + //~^ ERROR items with the "custom" ABI can only be declared externally or defined via naked functions + //~| ERROR functions with the "custom" ABI must be unsafe //~| ERROR invalid signature for `extern "custom"` function -a } @@ -68,24 +68,24 @@ unsafe extern "custom" { //~^ ERROR invalid signature for `extern "custom"` function safe fn extern_cannot_be_safe(); - //~^ ERROR foreign functions with the `"custom"` ABI cannot be safe + //~^ ERROR foreign functions with the "custom" ABI cannot be safe } fn caller(f: unsafe extern "custom" fn(i64) -> i64, mut x: i64) -> i64 { unsafe { f(x) } - //~^ ERROR functions with the `"custom"` ABI cannot be called + //~^ ERROR functions with the "custom" ABI cannot be called } fn caller_by_ref(f: &unsafe extern "custom" fn(i64) -> i64, mut x: i64) -> i64 { unsafe { f(x) } - //~^ ERROR functions with the `"custom"` ABI cannot be called + //~^ ERROR functions with the "custom" ABI cannot be called } type Custom = unsafe extern "custom" fn(i64) -> i64; fn caller_alias(f: Custom, mut x: i64) -> i64 { unsafe { f(x) } - //~^ ERROR functions with the `"custom"` ABI cannot be called + //~^ ERROR functions with the "custom" ABI cannot be called } #[unsafe(naked)] @@ -95,8 +95,8 @@ const unsafe extern "custom" fn no_const_fn() { } async unsafe extern "custom" fn no_async_fn() { - //~^ ERROR items with the `"custom"` ABI can only be declared externally or defined via naked functions - //~| ERROR functions with the `"custom"` ABI cannot be `async` + //~^ ERROR items with the "custom" ABI can only be declared externally or defined via naked functions + //~| ERROR functions with the "custom" ABI cannot be `async` } fn no_promotion_to_fn_trait(f: unsafe extern "custom" fn()) -> impl Fn() { @@ -107,15 +107,15 @@ fn no_promotion_to_fn_trait(f: unsafe extern "custom" fn()) -> impl Fn() { pub fn main() { unsafe { assert_eq!(double(21), 42); - //~^ ERROR functions with the `"custom"` ABI cannot be called + //~^ ERROR functions with the "custom" ABI cannot be called assert_eq!(unsafe { increment(41) }, 42); - //~^ ERROR functions with the `"custom"` ABI cannot be called + //~^ ERROR functions with the "custom" ABI cannot be called assert!(Thing(41).is_even()); - //~^ ERROR functions with the `"custom"` ABI cannot be called + //~^ ERROR functions with the "custom" ABI cannot be called assert_eq!(Thing::bitwise_not(42), !42); - //~^ ERROR functions with the `"custom"` ABI cannot be called + //~^ ERROR functions with the "custom" ABI cannot be called } } diff --git a/tests/ui/abi/bad-custom.stderr b/tests/ui/abi/bad-custom.stderr index ec0f11af8980..893382875a22 100644 --- a/tests/ui/abi/bad-custom.stderr +++ b/tests/ui/abi/bad-custom.stderr @@ -1,4 +1,4 @@ -error: functions with the `"custom"` ABI must be unsafe +error: functions with the "custom" ABI must be unsafe --> $DIR/bad-custom.rs:7:1 | LL | extern "custom" fn must_be_unsafe(a: i64) -> i64 { @@ -15,7 +15,7 @@ error: invalid signature for `extern "custom"` function LL | extern "custom" fn must_be_unsafe(a: i64) -> i64 { | ^^^^^^ ^^^ | - = note: functions with the `"custom"` ABI cannot have any parameters or return type + = note: functions with the "custom" ABI cannot have any parameters or return type help: remove the parameters and return type | LL - extern "custom" fn must_be_unsafe(a: i64) -> i64 { @@ -28,7 +28,7 @@ error: invalid signature for `extern "custom"` function LL | unsafe extern "custom" fn no_parameters(a: i64) { | ^^^^^^ | - = note: functions with the `"custom"` ABI cannot have any parameters or return type + = note: functions with the "custom" ABI cannot have any parameters or return type help: remove the parameters and return type | LL - unsafe extern "custom" fn no_parameters(a: i64) { @@ -41,7 +41,7 @@ error: invalid signature for `extern "custom"` function LL | unsafe extern "custom" fn no_return_type() -> i64 { | ^^^ | - = note: functions with the `"custom"` ABI cannot have any parameters or return type + = note: functions with the "custom" ABI cannot have any parameters or return type help: remove the parameters and return type | LL - unsafe extern "custom" fn no_return_type() -> i64 { @@ -54,7 +54,7 @@ error: invalid signature for `extern "custom"` function LL | unsafe extern "custom" fn double(a: i64) -> i64 { | ^^^^^^ ^^^ | - = note: functions with the `"custom"` ABI cannot have any parameters or return type + = note: functions with the "custom" ABI cannot have any parameters or return type help: remove the parameters and return type | LL - unsafe extern "custom" fn double(a: i64) -> i64 { @@ -67,7 +67,7 @@ error: invalid signature for `extern "custom"` function LL | unsafe extern "custom" fn is_even(self) -> bool { | ^^^^ ^^^^ | - = note: functions with the `"custom"` ABI cannot have any parameters or return type + = note: functions with the "custom" ABI cannot have any parameters or return type help: remove the parameters and return type | LL - unsafe extern "custom" fn is_even(self) -> bool { @@ -80,14 +80,14 @@ error: invalid signature for `extern "custom"` function LL | unsafe extern "custom" fn bitwise_not(a: i64) -> i64 { | ^^^^^^ ^^^ | - = note: functions with the `"custom"` ABI cannot have any parameters or return type + = note: functions with the "custom" ABI cannot have any parameters or return type help: remove the parameters and return type | LL - unsafe extern "custom" fn bitwise_not(a: i64) -> i64 { LL + unsafe extern "custom" fn bitwise_not() { | -error: functions with the `"custom"` ABI must be unsafe +error: functions with the "custom" ABI must be unsafe --> $DIR/bad-custom.rs:52:5 | LL | extern "custom" fn negate(a: i64) -> i64; @@ -104,14 +104,14 @@ error: invalid signature for `extern "custom"` function LL | extern "custom" fn negate(a: i64) -> i64; | ^^^^^^ ^^^ | - = note: functions with the `"custom"` ABI cannot have any parameters or return type + = note: functions with the "custom" ABI cannot have any parameters or return type help: remove the parameters and return type | LL - extern "custom" fn negate(a: i64) -> i64; LL + extern "custom" fn negate(); | -error: functions with the `"custom"` ABI must be unsafe +error: functions with the "custom" ABI must be unsafe --> $DIR/bad-custom.rs:58:5 | LL | extern "custom" fn negate(a: i64) -> i64 { @@ -128,7 +128,7 @@ error: invalid signature for `extern "custom"` function LL | extern "custom" fn negate(a: i64) -> i64 { | ^^^^^^ ^^^ | - = note: functions with the `"custom"` ABI cannot have any parameters or return type + = note: functions with the "custom" ABI cannot have any parameters or return type help: remove the parameters and return type | LL - extern "custom" fn negate(a: i64) -> i64 { @@ -141,14 +141,14 @@ error: invalid signature for `extern "custom"` function LL | fn increment(a: i64) -> i64; | ^^^^^^ ^^^ | - = note: functions with the `"custom"` ABI cannot have any parameters or return type + = note: functions with the "custom" ABI cannot have any parameters or return type help: remove the parameters and return type | LL - fn increment(a: i64) -> i64; LL + fn increment(); | -error: foreign functions with the `"custom"` ABI cannot be safe +error: foreign functions with the "custom" ABI cannot be safe --> $DIR/bad-custom.rs:70:5 | LL | safe fn extern_cannot_be_safe(); @@ -160,7 +160,7 @@ LL - safe fn extern_cannot_be_safe(); LL + fn extern_cannot_be_safe(); | -error: functions with the `"custom"` ABI cannot be `async` +error: functions with the "custom" ABI cannot be `async` --> $DIR/bad-custom.rs:97:1 | LL | async unsafe extern "custom" fn no_async_fn() { @@ -172,7 +172,7 @@ LL - async unsafe extern "custom" fn no_async_fn() { LL + unsafe extern "custom" fn no_async_fn() { | -error: items with the `"custom"` ABI can only be declared externally or defined via naked functions +error: items with the "custom" ABI can only be declared externally or defined via naked functions --> $DIR/bad-custom.rs:97:1 | LL | async unsafe extern "custom" fn no_async_fn() { @@ -197,7 +197,7 @@ LL | f = note: unsafe function cannot be called generically without an unsafe block = note: wrap the `unsafe extern "custom" fn()` in a closure with no arguments: `|| { /* code */ }` -error: items with the `"custom"` ABI can only be declared externally or defined via naked functions +error: items with the "custom" ABI can only be declared externally or defined via naked functions --> $DIR/bad-custom.rs:25:1 | LL | unsafe extern "custom" fn double(a: i64) -> i64 { @@ -209,7 +209,7 @@ LL + #[unsafe(naked)] LL | unsafe extern "custom" fn double(a: i64) -> i64 { | -error: items with the `"custom"` ABI can only be declared externally or defined via naked functions +error: items with the "custom" ABI can only be declared externally or defined via naked functions --> $DIR/bad-custom.rs:34:5 | LL | unsafe extern "custom" fn is_even(self) -> bool { @@ -221,7 +221,7 @@ LL + #[unsafe(naked)] LL | unsafe extern "custom" fn is_even(self) -> bool { | -error: items with the `"custom"` ABI can only be declared externally or defined via naked functions +error: items with the "custom" ABI can only be declared externally or defined via naked functions --> $DIR/bad-custom.rs:42:5 | LL | unsafe extern "custom" fn bitwise_not(a: i64) -> i64 { @@ -233,7 +233,7 @@ LL + #[unsafe(naked)] LL | unsafe extern "custom" fn bitwise_not(a: i64) -> i64 { | -error: items with the `"custom"` ABI can only be declared externally or defined via naked functions +error: items with the "custom" ABI can only be declared externally or defined via naked functions --> $DIR/bad-custom.rs:58:5 | LL | extern "custom" fn negate(a: i64) -> i64 { @@ -245,43 +245,85 @@ LL + #[unsafe(naked)] LL | extern "custom" fn negate(a: i64) -> i64 { | -error: functions with the `"custom"` ABI cannot be called +error: functions with the "custom" ABI cannot be called + --> $DIR/bad-custom.rs:75:14 + | +LL | unsafe { f(x) } + | ^^^^ + | +note: an `extern "custom"` function can only be called using inline assembly --> $DIR/bad-custom.rs:75:14 | LL | unsafe { f(x) } | ^^^^ -error: functions with the `"custom"` ABI cannot be called +error: functions with the "custom" ABI cannot be called + --> $DIR/bad-custom.rs:80:14 + | +LL | unsafe { f(x) } + | ^^^^ + | +note: an `extern "custom"` function can only be called using inline assembly --> $DIR/bad-custom.rs:80:14 | LL | unsafe { f(x) } | ^^^^ -error: functions with the `"custom"` ABI cannot be called +error: functions with the "custom" ABI cannot be called + --> $DIR/bad-custom.rs:87:14 + | +LL | unsafe { f(x) } + | ^^^^ + | +note: an `extern "custom"` function can only be called using inline assembly --> $DIR/bad-custom.rs:87:14 | LL | unsafe { f(x) } | ^^^^ -error: functions with the `"custom"` ABI cannot be called +error: functions with the "custom" ABI cannot be called + --> $DIR/bad-custom.rs:109:20 + | +LL | assert_eq!(double(21), 42); + | ^^^^^^^^^^ + | +note: an `extern "custom"` function can only be called using inline assembly --> $DIR/bad-custom.rs:109:20 | LL | assert_eq!(double(21), 42); | ^^^^^^^^^^ -error: functions with the `"custom"` ABI cannot be called +error: functions with the "custom" ABI cannot be called + --> $DIR/bad-custom.rs:112:29 + | +LL | assert_eq!(unsafe { increment(41) }, 42); + | ^^^^^^^^^^^^^ + | +note: an `extern "custom"` function can only be called using inline assembly --> $DIR/bad-custom.rs:112:29 | LL | assert_eq!(unsafe { increment(41) }, 42); | ^^^^^^^^^^^^^ -error: functions with the `"custom"` ABI cannot be called +error: functions with the "custom" ABI cannot be called + --> $DIR/bad-custom.rs:115:17 + | +LL | assert!(Thing(41).is_even()); + | ^^^^^^^^^^^^^^^^^^^ + | +note: an `extern "custom"` function can only be called using inline assembly --> $DIR/bad-custom.rs:115:17 | LL | assert!(Thing(41).is_even()); | ^^^^^^^^^^^^^^^^^^^ -error: functions with the `"custom"` ABI cannot be called +error: functions with the "custom" ABI cannot be called + --> $DIR/bad-custom.rs:118:20 + | +LL | assert_eq!(Thing::bitwise_not(42), !42); + | ^^^^^^^^^^^^^^^^^^^^^^ + | +note: an `extern "custom"` function can only be called using inline assembly --> $DIR/bad-custom.rs:118:20 | LL | assert_eq!(Thing::bitwise_not(42), !42); diff --git a/tests/ui/abi/cannot-be-called.avr.stderr b/tests/ui/abi/cannot-be-called.avr.stderr new file mode 100644 index 000000000000..1129893cbfaf --- /dev/null +++ b/tests/ui/abi/cannot-be-called.avr.stderr @@ -0,0 +1,75 @@ +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:37:8 + | +LL | extern "msp430-interrupt" fn msp430() {} + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:41:8 + | +LL | extern "riscv-interrupt-m" fn riscv_m() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:43:8 + | +LL | extern "riscv-interrupt-s" fn riscv_s() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:45:8 + | +LL | extern "x86-interrupt" fn x86() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:70:25 + | +LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:76:26 + | +LL | fn riscv_m_ptr(f: extern "riscv-interrupt-m" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:82:26 + | +LL | fn riscv_s_ptr(f: extern "riscv-interrupt-s" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:88:22 + | +LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error: functions with the "avr-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:50:5 + | +LL | avr(); + | ^^^^^ + | +note: an `extern "avr-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:50:5 + | +LL | avr(); + | ^^^^^ + +error: functions with the "avr-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:66:5 + | +LL | f() + | ^^^ + | +note: an `extern "avr-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:66:5 + | +LL | f() + | ^^^ + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/cannot-be-called.i686.stderr b/tests/ui/abi/cannot-be-called.i686.stderr new file mode 100644 index 000000000000..024d5e2e93d6 --- /dev/null +++ b/tests/ui/abi/cannot-be-called.i686.stderr @@ -0,0 +1,75 @@ +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:37:8 + | +LL | extern "msp430-interrupt" fn msp430() {} + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:39:8 + | +LL | extern "avr-interrupt" fn avr() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:41:8 + | +LL | extern "riscv-interrupt-m" fn riscv_m() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:43:8 + | +LL | extern "riscv-interrupt-s" fn riscv_s() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:64:22 + | +LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:70:25 + | +LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:76:26 + | +LL | fn riscv_m_ptr(f: extern "riscv-interrupt-m" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:82:26 + | +LL | fn riscv_s_ptr(f: extern "riscv-interrupt-s" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error: functions with the "x86-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:58:5 + | +LL | x86(); + | ^^^^^ + | +note: an `extern "x86-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:58:5 + | +LL | x86(); + | ^^^^^ + +error: functions with the "x86-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:90:5 + | +LL | f() + | ^^^ + | +note: an `extern "x86-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:90:5 + | +LL | f() + | ^^^ + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/cannot-be-called.msp430.stderr b/tests/ui/abi/cannot-be-called.msp430.stderr new file mode 100644 index 000000000000..52d7d792510e --- /dev/null +++ b/tests/ui/abi/cannot-be-called.msp430.stderr @@ -0,0 +1,75 @@ +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:39:8 + | +LL | extern "avr-interrupt" fn avr() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:41:8 + | +LL | extern "riscv-interrupt-m" fn riscv_m() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:43:8 + | +LL | extern "riscv-interrupt-s" fn riscv_s() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:45:8 + | +LL | extern "x86-interrupt" fn x86() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:64:22 + | +LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:76:26 + | +LL | fn riscv_m_ptr(f: extern "riscv-interrupt-m" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:82:26 + | +LL | fn riscv_s_ptr(f: extern "riscv-interrupt-s" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:88:22 + | +LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error: functions with the "msp430-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:52:5 + | +LL | msp430(); + | ^^^^^^^^ + | +note: an `extern "msp430-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:52:5 + | +LL | msp430(); + | ^^^^^^^^ + +error: functions with the "msp430-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:72:5 + | +LL | f() + | ^^^ + | +note: an `extern "msp430-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:72:5 + | +LL | f() + | ^^^ + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/cannot-be-called.riscv32.stderr b/tests/ui/abi/cannot-be-called.riscv32.stderr new file mode 100644 index 000000000000..119d93bd58e9 --- /dev/null +++ b/tests/ui/abi/cannot-be-called.riscv32.stderr @@ -0,0 +1,87 @@ +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:37:8 + | +LL | extern "msp430-interrupt" fn msp430() {} + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:39:8 + | +LL | extern "avr-interrupt" fn avr() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:45:8 + | +LL | extern "x86-interrupt" fn x86() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:64:22 + | +LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:70:25 + | +LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:88:22 + | +LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error: functions with the "riscv-interrupt-m" ABI cannot be called + --> $DIR/cannot-be-called.rs:54:5 + | +LL | riscv_m(); + | ^^^^^^^^^ + | +note: an `extern "riscv-interrupt-m"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:54:5 + | +LL | riscv_m(); + | ^^^^^^^^^ + +error: functions with the "riscv-interrupt-s" ABI cannot be called + --> $DIR/cannot-be-called.rs:56:5 + | +LL | riscv_s(); + | ^^^^^^^^^ + | +note: an `extern "riscv-interrupt-s"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:56:5 + | +LL | riscv_s(); + | ^^^^^^^^^ + +error: functions with the "riscv-interrupt-m" ABI cannot be called + --> $DIR/cannot-be-called.rs:78:5 + | +LL | f() + | ^^^ + | +note: an `extern "riscv-interrupt-m"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:78:5 + | +LL | f() + | ^^^ + +error: functions with the "riscv-interrupt-s" ABI cannot be called + --> $DIR/cannot-be-called.rs:84:5 + | +LL | f() + | ^^^ + | +note: an `extern "riscv-interrupt-s"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:84:5 + | +LL | f() + | ^^^ + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/cannot-be-called.riscv64.stderr b/tests/ui/abi/cannot-be-called.riscv64.stderr new file mode 100644 index 000000000000..119d93bd58e9 --- /dev/null +++ b/tests/ui/abi/cannot-be-called.riscv64.stderr @@ -0,0 +1,87 @@ +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:37:8 + | +LL | extern "msp430-interrupt" fn msp430() {} + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:39:8 + | +LL | extern "avr-interrupt" fn avr() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:45:8 + | +LL | extern "x86-interrupt" fn x86() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:64:22 + | +LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:70:25 + | +LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:88:22 + | +LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error: functions with the "riscv-interrupt-m" ABI cannot be called + --> $DIR/cannot-be-called.rs:54:5 + | +LL | riscv_m(); + | ^^^^^^^^^ + | +note: an `extern "riscv-interrupt-m"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:54:5 + | +LL | riscv_m(); + | ^^^^^^^^^ + +error: functions with the "riscv-interrupt-s" ABI cannot be called + --> $DIR/cannot-be-called.rs:56:5 + | +LL | riscv_s(); + | ^^^^^^^^^ + | +note: an `extern "riscv-interrupt-s"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:56:5 + | +LL | riscv_s(); + | ^^^^^^^^^ + +error: functions with the "riscv-interrupt-m" ABI cannot be called + --> $DIR/cannot-be-called.rs:78:5 + | +LL | f() + | ^^^ + | +note: an `extern "riscv-interrupt-m"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:78:5 + | +LL | f() + | ^^^ + +error: functions with the "riscv-interrupt-s" ABI cannot be called + --> $DIR/cannot-be-called.rs:84:5 + | +LL | f() + | ^^^ + | +note: an `extern "riscv-interrupt-s"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:84:5 + | +LL | f() + | ^^^ + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/cannot-be-called.rs b/tests/ui/abi/cannot-be-called.rs new file mode 100644 index 000000000000..af979d65d334 --- /dev/null +++ b/tests/ui/abi/cannot-be-called.rs @@ -0,0 +1,92 @@ +/*! Tests entry-point ABIs cannot be called + +Interrupt ABIs share similar semantics, in that they are special entry-points unusable by Rust. +So we test that they error in essentially all of the same places. +*/ +//@ add-core-stubs +//@ revisions: x64 x64_win i686 riscv32 riscv64 avr msp430 +// +//@ [x64] needs-llvm-components: x86 +//@ [x64] compile-flags: --target=x86_64-unknown-linux-gnu --crate-type=rlib +//@ [x64_win] needs-llvm-components: x86 +//@ [x64_win] compile-flags: --target=x86_64-pc-windows-msvc --crate-type=rlib +//@ [i686] needs-llvm-components: x86 +//@ [i686] compile-flags: --target=i686-unknown-linux-gnu --crate-type=rlib +//@ [riscv32] needs-llvm-components: riscv +//@ [riscv32] compile-flags: --target=riscv32i-unknown-none-elf --crate-type=rlib +//@ [riscv64] needs-llvm-components: riscv +//@ [riscv64] compile-flags: --target=riscv64gc-unknown-none-elf --crate-type=rlib +//@ [avr] needs-llvm-components: avr +//@ [avr] compile-flags: --target=avr-none -C target-cpu=atmega328p --crate-type=rlib +//@ [msp430] needs-llvm-components: msp430 +//@ [msp430] compile-flags: --target=msp430-none-elf --crate-type=rlib +#![no_core] +#![feature( + no_core, + abi_msp430_interrupt, + abi_avr_interrupt, + abi_x86_interrupt, + abi_riscv_interrupt +)] + +extern crate minicore; +use minicore::*; + +/* extern "interrupt" definition */ + +extern "msp430-interrupt" fn msp430() {} +//[x64,x64_win,i686,riscv32,riscv64,avr]~^ ERROR is not a supported ABI +extern "avr-interrupt" fn avr() {} +//[x64,x64_win,i686,riscv32,riscv64,msp430]~^ ERROR is not a supported ABI +extern "riscv-interrupt-m" fn riscv_m() {} +//[x64,x64_win,i686,avr,msp430]~^ ERROR is not a supported ABI +extern "riscv-interrupt-s" fn riscv_s() {} +//[x64,x64_win,i686,avr,msp430]~^ ERROR is not a supported ABI +extern "x86-interrupt" fn x86() {} +//[riscv32,riscv64,avr,msp430]~^ ERROR is not a supported ABI + +/* extern "interrupt" calls */ +fn call_the_interrupts() { + avr(); + //[avr]~^ ERROR functions with the "avr-interrupt" ABI cannot be called + msp430(); + //[msp430]~^ ERROR functions with the "msp430-interrupt" ABI cannot be called + riscv_m(); + //[riscv32,riscv64]~^ ERROR functions with the "riscv-interrupt-m" ABI cannot be called + riscv_s(); + //[riscv32,riscv64]~^ ERROR functions with the "riscv-interrupt-s" ABI cannot be called + x86(); + //[x64,x64_win,i686]~^ ERROR functions with the "x86-interrupt" ABI cannot be called +} + +/* extern "interrupt" fnptr calls */ + +fn avr_ptr(f: extern "avr-interrupt" fn()) { + //[x64,x64_win,i686,riscv32,riscv64,msp430]~^ ERROR is not a supported ABI + f() + //[avr]~^ ERROR functions with the "avr-interrupt" ABI cannot be called +} + +fn msp430_ptr(f: extern "msp430-interrupt" fn()) { + //[x64,x64_win,i686,riscv32,riscv64,avr]~^ ERROR is not a supported ABI + f() + //[msp430]~^ ERROR functions with the "msp430-interrupt" ABI cannot be called +} + +fn riscv_m_ptr(f: extern "riscv-interrupt-m" fn()) { + //[x64,x64_win,i686,avr,msp430]~^ ERROR is not a supported ABI + f() + //[riscv32,riscv64]~^ ERROR functions with the "riscv-interrupt-m" ABI cannot be called +} + +fn riscv_s_ptr(f: extern "riscv-interrupt-s" fn()) { + //[x64,x64_win,i686,avr,msp430]~^ ERROR is not a supported ABI + f() + //[riscv32,riscv64]~^ ERROR functions with the "riscv-interrupt-s" ABI cannot be called +} + +fn x86_ptr(f: extern "x86-interrupt" fn()) { + //[riscv32,riscv64,avr,msp430]~^ ERROR is not a supported ABI + f() + //[x64,x64_win,i686]~^ ERROR functions with the "x86-interrupt" ABI cannot be called +} diff --git a/tests/ui/abi/cannot-be-called.x64.stderr b/tests/ui/abi/cannot-be-called.x64.stderr new file mode 100644 index 000000000000..024d5e2e93d6 --- /dev/null +++ b/tests/ui/abi/cannot-be-called.x64.stderr @@ -0,0 +1,75 @@ +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:37:8 + | +LL | extern "msp430-interrupt" fn msp430() {} + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:39:8 + | +LL | extern "avr-interrupt" fn avr() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:41:8 + | +LL | extern "riscv-interrupt-m" fn riscv_m() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:43:8 + | +LL | extern "riscv-interrupt-s" fn riscv_s() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:64:22 + | +LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:70:25 + | +LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:76:26 + | +LL | fn riscv_m_ptr(f: extern "riscv-interrupt-m" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:82:26 + | +LL | fn riscv_s_ptr(f: extern "riscv-interrupt-s" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error: functions with the "x86-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:58:5 + | +LL | x86(); + | ^^^^^ + | +note: an `extern "x86-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:58:5 + | +LL | x86(); + | ^^^^^ + +error: functions with the "x86-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:90:5 + | +LL | f() + | ^^^ + | +note: an `extern "x86-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:90:5 + | +LL | f() + | ^^^ + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/cannot-be-called.x64_win.stderr b/tests/ui/abi/cannot-be-called.x64_win.stderr new file mode 100644 index 000000000000..024d5e2e93d6 --- /dev/null +++ b/tests/ui/abi/cannot-be-called.x64_win.stderr @@ -0,0 +1,75 @@ +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:37:8 + | +LL | extern "msp430-interrupt" fn msp430() {} + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:39:8 + | +LL | extern "avr-interrupt" fn avr() {} + | ^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:41:8 + | +LL | extern "riscv-interrupt-m" fn riscv_m() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:43:8 + | +LL | extern "riscv-interrupt-s" fn riscv_s() {} + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:64:22 + | +LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { + | ^^^^^^^^^^^^^^^ + +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:70:25 + | +LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { + | ^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:76:26 + | +LL | fn riscv_m_ptr(f: extern "riscv-interrupt-m" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "riscv-interrupt-s" is not a supported ABI for the current target + --> $DIR/cannot-be-called.rs:82:26 + | +LL | fn riscv_s_ptr(f: extern "riscv-interrupt-s" fn()) { + | ^^^^^^^^^^^^^^^^^^^ + +error: functions with the "x86-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:58:5 + | +LL | x86(); + | ^^^^^ + | +note: an `extern "x86-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:58:5 + | +LL | x86(); + | ^^^^^ + +error: functions with the "x86-interrupt" ABI cannot be called + --> $DIR/cannot-be-called.rs:90:5 + | +LL | f() + | ^^^ + | +note: an `extern "x86-interrupt"` function can only be called using inline assembly + --> $DIR/cannot-be-called.rs:90:5 + | +LL | f() + | ^^^ + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/cannot-be-coroutine.avr.stderr b/tests/ui/abi/cannot-be-coroutine.avr.stderr new file mode 100644 index 000000000000..b06da0f3352e --- /dev/null +++ b/tests/ui/abi/cannot-be-coroutine.avr.stderr @@ -0,0 +1,23 @@ +error: functions with the "avr-interrupt" ABI cannot be `async` + --> $DIR/cannot-be-coroutine.rs:36:1 + | +LL | async extern "avr-interrupt" fn avr() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: remove the `async` keyword from this definiton + | +LL - async extern "avr-interrupt" fn avr() { +LL + extern "avr-interrupt" fn avr() { + | + +error: requires `ResumeTy` lang_item + --> $DIR/cannot-be-coroutine.rs:32:19 + | +LL | async fn vanilla(){ + | ___________________^ +LL | | +LL | | } + | |_^ + +error: aborting due to 2 previous errors + diff --git a/tests/ui/abi/cannot-be-coroutine.i686.stderr b/tests/ui/abi/cannot-be-coroutine.i686.stderr new file mode 100644 index 000000000000..cbbddd087c8a --- /dev/null +++ b/tests/ui/abi/cannot-be-coroutine.i686.stderr @@ -0,0 +1,23 @@ +error: functions with the "x86-interrupt" ABI cannot be `async` + --> $DIR/cannot-be-coroutine.rs:52:1 + | +LL | async extern "x86-interrupt" fn x86() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: remove the `async` keyword from this definiton + | +LL - async extern "x86-interrupt" fn x86() { +LL + extern "x86-interrupt" fn x86() { + | + +error: requires `ResumeTy` lang_item + --> $DIR/cannot-be-coroutine.rs:32:19 + | +LL | async fn vanilla(){ + | ___________________^ +LL | | +LL | | } + | |_^ + +error: aborting due to 2 previous errors + diff --git a/tests/ui/abi/cannot-be-coroutine.msp430.stderr b/tests/ui/abi/cannot-be-coroutine.msp430.stderr new file mode 100644 index 000000000000..951ce13b6055 --- /dev/null +++ b/tests/ui/abi/cannot-be-coroutine.msp430.stderr @@ -0,0 +1,23 @@ +error: functions with the "msp430-interrupt" ABI cannot be `async` + --> $DIR/cannot-be-coroutine.rs:40:1 + | +LL | async extern "msp430-interrupt" fn msp430() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: remove the `async` keyword from this definiton + | +LL - async extern "msp430-interrupt" fn msp430() { +LL + extern "msp430-interrupt" fn msp430() { + | + +error: requires `ResumeTy` lang_item + --> $DIR/cannot-be-coroutine.rs:32:19 + | +LL | async fn vanilla(){ + | ___________________^ +LL | | +LL | | } + | |_^ + +error: aborting due to 2 previous errors + diff --git a/tests/ui/abi/cannot-be-coroutine.riscv32.stderr b/tests/ui/abi/cannot-be-coroutine.riscv32.stderr new file mode 100644 index 000000000000..8e3b3a2940a1 --- /dev/null +++ b/tests/ui/abi/cannot-be-coroutine.riscv32.stderr @@ -0,0 +1,35 @@ +error: functions with the "riscv-interrupt-m" ABI cannot be `async` + --> $DIR/cannot-be-coroutine.rs:44:1 + | +LL | async extern "riscv-interrupt-m" fn riscv_m() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: remove the `async` keyword from this definiton + | +LL - async extern "riscv-interrupt-m" fn riscv_m() { +LL + extern "riscv-interrupt-m" fn riscv_m() { + | + +error: functions with the "riscv-interrupt-s" ABI cannot be `async` + --> $DIR/cannot-be-coroutine.rs:48:1 + | +LL | async extern "riscv-interrupt-s" fn riscv_s() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: remove the `async` keyword from this definiton + | +LL - async extern "riscv-interrupt-s" fn riscv_s() { +LL + extern "riscv-interrupt-s" fn riscv_s() { + | + +error: requires `ResumeTy` lang_item + --> $DIR/cannot-be-coroutine.rs:32:19 + | +LL | async fn vanilla(){ + | ___________________^ +LL | | +LL | | } + | |_^ + +error: aborting due to 3 previous errors + diff --git a/tests/ui/abi/cannot-be-coroutine.riscv64.stderr b/tests/ui/abi/cannot-be-coroutine.riscv64.stderr new file mode 100644 index 000000000000..8e3b3a2940a1 --- /dev/null +++ b/tests/ui/abi/cannot-be-coroutine.riscv64.stderr @@ -0,0 +1,35 @@ +error: functions with the "riscv-interrupt-m" ABI cannot be `async` + --> $DIR/cannot-be-coroutine.rs:44:1 + | +LL | async extern "riscv-interrupt-m" fn riscv_m() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: remove the `async` keyword from this definiton + | +LL - async extern "riscv-interrupt-m" fn riscv_m() { +LL + extern "riscv-interrupt-m" fn riscv_m() { + | + +error: functions with the "riscv-interrupt-s" ABI cannot be `async` + --> $DIR/cannot-be-coroutine.rs:48:1 + | +LL | async extern "riscv-interrupt-s" fn riscv_s() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: remove the `async` keyword from this definiton + | +LL - async extern "riscv-interrupt-s" fn riscv_s() { +LL + extern "riscv-interrupt-s" fn riscv_s() { + | + +error: requires `ResumeTy` lang_item + --> $DIR/cannot-be-coroutine.rs:32:19 + | +LL | async fn vanilla(){ + | ___________________^ +LL | | +LL | | } + | |_^ + +error: aborting due to 3 previous errors + diff --git a/tests/ui/abi/cannot-be-coroutine.rs b/tests/ui/abi/cannot-be-coroutine.rs new file mode 100644 index 000000000000..7270a55f69ec --- /dev/null +++ b/tests/ui/abi/cannot-be-coroutine.rs @@ -0,0 +1,54 @@ +//@ add-core-stubs +//@ edition: 2021 +//@ revisions: x64 x64_win i686 riscv32 riscv64 avr msp430 +// +//@ [x64] needs-llvm-components: x86 +//@ [x64] compile-flags: --target=x86_64-unknown-linux-gnu --crate-type=rlib +//@ [x64_win] needs-llvm-components: x86 +//@ [x64_win] compile-flags: --target=x86_64-pc-windows-msvc --crate-type=rlib +//@ [i686] needs-llvm-components: x86 +//@ [i686] compile-flags: --target=i686-unknown-linux-gnu --crate-type=rlib +//@ [riscv32] needs-llvm-components: riscv +//@ [riscv32] compile-flags: --target=riscv32i-unknown-none-elf --crate-type=rlib +//@ [riscv64] needs-llvm-components: riscv +//@ [riscv64] compile-flags: --target=riscv64gc-unknown-none-elf --crate-type=rlib +//@ [avr] needs-llvm-components: avr +//@ [avr] compile-flags: --target=avr-none -C target-cpu=atmega328p --crate-type=rlib +//@ [msp430] needs-llvm-components: msp430 +//@ [msp430] compile-flags: --target=msp430-none-elf --crate-type=rlib +#![no_core] +#![feature( + no_core, + abi_msp430_interrupt, + abi_avr_interrupt, + abi_x86_interrupt, + abi_riscv_interrupt +)] + +extern crate minicore; +use minicore::*; + +// We ignore this error; implementing all of the async-related lang items is not worth it. +async fn vanilla(){ + //~^ ERROR requires `ResumeTy` lang_item +} + +async extern "avr-interrupt" fn avr() { + //[avr]~^ ERROR functions with the "avr-interrupt" ABI cannot be `async` +} + +async extern "msp430-interrupt" fn msp430() { + //[msp430]~^ ERROR functions with the "msp430-interrupt" ABI cannot be `async` +} + +async extern "riscv-interrupt-m" fn riscv_m() { + //[riscv32,riscv64]~^ ERROR functions with the "riscv-interrupt-m" ABI cannot be `async` +} + +async extern "riscv-interrupt-s" fn riscv_s() { + //[riscv32,riscv64]~^ ERROR functions with the "riscv-interrupt-s" ABI cannot be `async` +} + +async extern "x86-interrupt" fn x86() { + //[x64,x64_win,i686]~^ ERROR functions with the "x86-interrupt" ABI cannot be `async` +} diff --git a/tests/ui/abi/cannot-be-coroutine.x64.stderr b/tests/ui/abi/cannot-be-coroutine.x64.stderr new file mode 100644 index 000000000000..cbbddd087c8a --- /dev/null +++ b/tests/ui/abi/cannot-be-coroutine.x64.stderr @@ -0,0 +1,23 @@ +error: functions with the "x86-interrupt" ABI cannot be `async` + --> $DIR/cannot-be-coroutine.rs:52:1 + | +LL | async extern "x86-interrupt" fn x86() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: remove the `async` keyword from this definiton + | +LL - async extern "x86-interrupt" fn x86() { +LL + extern "x86-interrupt" fn x86() { + | + +error: requires `ResumeTy` lang_item + --> $DIR/cannot-be-coroutine.rs:32:19 + | +LL | async fn vanilla(){ + | ___________________^ +LL | | +LL | | } + | |_^ + +error: aborting due to 2 previous errors + diff --git a/tests/ui/abi/cannot-be-coroutine.x64_win.stderr b/tests/ui/abi/cannot-be-coroutine.x64_win.stderr new file mode 100644 index 000000000000..cbbddd087c8a --- /dev/null +++ b/tests/ui/abi/cannot-be-coroutine.x64_win.stderr @@ -0,0 +1,23 @@ +error: functions with the "x86-interrupt" ABI cannot be `async` + --> $DIR/cannot-be-coroutine.rs:52:1 + | +LL | async extern "x86-interrupt" fn x86() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: remove the `async` keyword from this definiton + | +LL - async extern "x86-interrupt" fn x86() { +LL + extern "x86-interrupt" fn x86() { + | + +error: requires `ResumeTy` lang_item + --> $DIR/cannot-be-coroutine.rs:32:19 + | +LL | async fn vanilla(){ + | ___________________^ +LL | | +LL | | } + | |_^ + +error: aborting due to 2 previous errors + diff --git a/tests/ui/abi/invalid-call-abi-ctfe.rs b/tests/ui/abi/invalid-call-abi-ctfe.rs new file mode 100644 index 000000000000..343cc728fe3b --- /dev/null +++ b/tests/ui/abi/invalid-call-abi-ctfe.rs @@ -0,0 +1,14 @@ +// Fix for #142969 where an invalid ABI in a signature still had its call ABI computed +// because CTFE tried to evaluate it, despite previous errors during AST-to-HIR lowering. + +#![feature(rustc_attrs)] + +const extern "rust-invalid" fn foo() { + //~^ ERROR "rust-invalid" is not a supported ABI for the current target + panic!() +} + +const _: () = foo(); + + +fn main() {} diff --git a/tests/ui/abi/invalid-call-abi-ctfe.stderr b/tests/ui/abi/invalid-call-abi-ctfe.stderr new file mode 100644 index 000000000000..402de4b69b97 --- /dev/null +++ b/tests/ui/abi/invalid-call-abi-ctfe.stderr @@ -0,0 +1,9 @@ +error[E0570]: "rust-invalid" is not a supported ABI for the current target + --> $DIR/invalid-call-abi-ctfe.rs:6:14 + | +LL | const extern "rust-invalid" fn foo() { + | ^^^^^^^^^^^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/invalid-call-abi.rs b/tests/ui/abi/invalid-call-abi.rs new file mode 100644 index 000000000000..076ddd91ab0f --- /dev/null +++ b/tests/ui/abi/invalid-call-abi.rs @@ -0,0 +1,12 @@ +// Tests the `"rustc-invalid"` ABI, which is never canonizable. + +#![feature(rustc_attrs)] + +const extern "rust-invalid" fn foo() { + //~^ ERROR "rust-invalid" is not a supported ABI for the current target + panic!() +} + +fn main() { + foo(); +} diff --git a/tests/ui/abi/invalid-call-abi.stderr b/tests/ui/abi/invalid-call-abi.stderr new file mode 100644 index 000000000000..c4a90158dcfe --- /dev/null +++ b/tests/ui/abi/invalid-call-abi.stderr @@ -0,0 +1,9 @@ +error[E0570]: "rust-invalid" is not a supported ABI for the current target + --> $DIR/invalid-call-abi.rs:5:14 + | +LL | const extern "rust-invalid" fn foo() { + | ^^^^^^^^^^^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/unsupported-abi-transmute.rs b/tests/ui/abi/unsupported-abi-transmute.rs new file mode 100644 index 000000000000..42aa180e1fd8 --- /dev/null +++ b/tests/ui/abi/unsupported-abi-transmute.rs @@ -0,0 +1,12 @@ +// Check we error before unsupported ABIs reach codegen stages. + +//@ edition: 2018 +//@ compile-flags: --crate-type=lib +#![feature(rustc_attrs)] + +use core::mem; + +fn anything() { + let a = unsafe { mem::transmute::(4) }(2); + //~^ ERROR: is not a supported ABI for the current target [E0570] +} diff --git a/tests/ui/abi/unsupported-abi-transmute.stderr b/tests/ui/abi/unsupported-abi-transmute.stderr new file mode 100644 index 000000000000..f1d202b1a1c7 --- /dev/null +++ b/tests/ui/abi/unsupported-abi-transmute.stderr @@ -0,0 +1,9 @@ +error[E0570]: "rust-invalid" is not a supported ABI for the current target + --> $DIR/unsupported-abi-transmute.rs:10:53 + | +LL | let a = unsafe { mem::transmute::(4) }(2); + | ^^^^^^^^^^^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/unsupported-in-impls.rs b/tests/ui/abi/unsupported-in-impls.rs new file mode 100644 index 000000000000..71797954865a --- /dev/null +++ b/tests/ui/abi/unsupported-in-impls.rs @@ -0,0 +1,36 @@ +// Test for https://github.com/rust-lang/rust/issues/86232 +// Due to AST-to-HIR lowering nuances, we used to allow unsupported ABIs to "leak" into the HIR +// without being checked, as we would check after generating the ExternAbi. +// Checking afterwards only works if we examine every HIR construct that contains an ExternAbi, +// and those may be very different in HIR, even if they read the same in source. +// This made it very easy to make mistakes. +// +// Here we test that an unsupported ABI in various impl-related positions will be rejected, +// both in the original declarations and the actual implementations. + +#![feature(rustc_attrs)] +//@ compile-flags: --crate-type lib + +pub struct FnPtrBearer { + pub ptr: extern "rust-invalid" fn(), + //~^ ERROR: is not a supported ABI +} + +impl FnPtrBearer { + pub extern "rust-invalid" fn inherent_fn(self) { + //~^ ERROR: is not a supported ABI + (self.ptr)() + } +} + +pub trait Trait { + extern "rust-invalid" fn trait_fn(self); + //~^ ERROR: is not a supported ABI +} + +impl Trait for FnPtrBearer { + extern "rust-invalid" fn trait_fn(self) { + //~^ ERROR: is not a supported ABI + self.inherent_fn() + } +} diff --git a/tests/ui/abi/unsupported-in-impls.stderr b/tests/ui/abi/unsupported-in-impls.stderr new file mode 100644 index 000000000000..d7a188f8a040 --- /dev/null +++ b/tests/ui/abi/unsupported-in-impls.stderr @@ -0,0 +1,27 @@ +error[E0570]: "rust-invalid" is not a supported ABI for the current target + --> $DIR/unsupported-in-impls.rs:15:21 + | +LL | pub ptr: extern "rust-invalid" fn(), + | ^^^^^^^^^^^^^^ + +error[E0570]: "rust-invalid" is not a supported ABI for the current target + --> $DIR/unsupported-in-impls.rs:20:16 + | +LL | pub extern "rust-invalid" fn inherent_fn(self) { + | ^^^^^^^^^^^^^^ + +error[E0570]: "rust-invalid" is not a supported ABI for the current target + --> $DIR/unsupported-in-impls.rs:27:12 + | +LL | extern "rust-invalid" fn trait_fn(self); + | ^^^^^^^^^^^^^^ + +error[E0570]: "rust-invalid" is not a supported ABI for the current target + --> $DIR/unsupported-in-impls.rs:32:12 + | +LL | extern "rust-invalid" fn trait_fn(self) { + | ^^^^^^^^^^^^^^ + +error: aborting due to 4 previous errors + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/unsupported-varargs-fnptr.rs b/tests/ui/abi/unsupported-varargs-fnptr.rs new file mode 100644 index 000000000000..1d23916d0390 --- /dev/null +++ b/tests/ui/abi/unsupported-varargs-fnptr.rs @@ -0,0 +1,19 @@ +// FIXME(workingjubilee): add revisions and generalize to other platform-specific varargs ABIs, +// preferably after the only-arch directive is enhanced with an "or pattern" syntax +// NOTE: This deliberately tests an ABI that supports varargs, so no `extern "rust-invalid"` +//@ only-x86_64 + +// We have to use this flag to force ABI computation of an invalid ABI +//@ compile-flags: -Clink-dead-code + +#![feature(extended_varargs_abi_support)] + +// sometimes fn ptrs with varargs make layout and ABI computation ICE +// as found in https://github.com/rust-lang/rust/issues/142107 + +fn aapcs(f: extern "aapcs" fn(usize, ...)) { +//~^ ERROR [E0570] +// Note we DO NOT have to actually make a call to trigger the ICE! +} + +fn main() {} diff --git a/tests/ui/abi/unsupported-varargs-fnptr.stderr b/tests/ui/abi/unsupported-varargs-fnptr.stderr new file mode 100644 index 000000000000..238f2b313304 --- /dev/null +++ b/tests/ui/abi/unsupported-varargs-fnptr.stderr @@ -0,0 +1,9 @@ +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported-varargs-fnptr.rs:14:20 + | +LL | fn aapcs(f: extern "aapcs" fn(usize, ...)) { + | ^^^^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/unsupported.aarch64.stderr b/tests/ui/abi/unsupported.aarch64.stderr index 4721c26026d1..61d07f29fd74 100644 --- a/tests/ui/abi/unsupported.aarch64.stderr +++ b/tests/ui/abi/unsupported.aarch64.stderr @@ -1,136 +1,163 @@ -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:36:8 + | +LL | extern "ptx-kernel" fn ptx() {} + | ^^^^^^^^^^^^ + +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:38:22 | LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default + | ^^^^^^^^^^^^ -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:43:1 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:42:8 | LL | extern "ptx-kernel" {} - | ^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^ -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:44:8 + | +LL | extern "gpu-kernel" fn gpu() {} + | ^^^^^^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:47:8 + | +LL | extern "aapcs" fn aapcs() {} + | ^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:49:24 | LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^ -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:55:1 +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:53:8 | LL | extern "aapcs" {} - | ^^^^^^^^^^^^^^^^^ + | ^^^^^^^ -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:65:1 +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:56:8 | LL | extern "msp430-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^ -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:75:1 +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:59:8 | LL | extern "avr-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 - | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:85:1 +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/unsupported.rs:62:8 | LL | extern "riscv-interrupt-m" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^^ -warning: the calling convention "x86-interrupt" is not supported on this target - --> $DIR/unsupported.rs:90:15 - | -LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"x86-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:95:1 +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:65:8 | LL | extern "x86-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:68:8 + | +LL | extern "thiscall" fn thiscall() {} + | ^^^^^^^^^^ + +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:70:27 | LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^^^^ -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:105:1 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:74:8 | LL | extern "thiscall" {} - | ^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^ -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:77:8 | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:119:1 - | -LL | extern "stdcall" {} - | ^^^^^^^^^^^^^^^^^^^ +LL | extern "stdcall" fn stdcall() {} + | ^^^^^^^^^ | = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` -error[E0570]: `"stdcall-unwind"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:123:1 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:81:26 + | +LL | fn stdcall_ptr(f: extern "stdcall" fn()) { + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:87:8 + | +LL | extern "stdcall" {} + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:91:8 | LL | extern "stdcall-unwind" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^ | = help: if you need `extern "stdcall-unwind"` on win32 and `extern "C-unwind"` everywhere else, use `extern "system-unwind"` -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:131:17 +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:111:8 + | +LL | extern "vectorcall" fn vectorcall() {} + | ^^^^^^^^^^^^ + +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:113:29 + | +LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { + | ^^^^^^^^^^^^ + +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:117:8 + | +LL | extern "vectorcall" {} + | ^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-call" is not a supported ABI for the current target + --> $DIR/unsupported.rs:120:28 + | +LL | fn cmse_call_ptr(f: extern "cmse-nonsecure-call" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:125:8 + | +LL | extern "cmse-nonsecure-entry" fn cmse_entry() {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:127:29 + | +LL | fn cmse_entry_ptr(f: extern "cmse-nonsecure-entry" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:131:8 + | +LL | extern "cmse-nonsecure-entry" {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:99:17 | LL | fn cdecl_ptr(f: extern "cdecl" fn()) { | ^^^^^^^^^^^^^^^^^^^ @@ -140,8 +167,8 @@ LL | fn cdecl_ptr(f: extern "cdecl" fn()) { = help: use `extern "C"` instead = note: `#[warn(unsupported_calling_conventions)]` on by default -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:136:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:104:1 | LL | extern "cdecl" {} | ^^^^^^^^^^^^^^^^^ @@ -150,8 +177,8 @@ LL | extern "cdecl" {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:139:1 +warning: "cdecl-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:107:1 | LL | extern "cdecl-unwind" {} | ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -160,103 +187,8 @@ LL | extern "cdecl-unwind" {} = note: for more information, see issue #137018 = help: use `extern "C-unwind"` instead -warning: the calling convention "vectorcall" is not supported on this target - --> $DIR/unsupported.rs:145:22 - | -LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"vectorcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:150:1 - | -LL | extern "vectorcall" {} - | ^^^^^^^^^^^^^^^^^^^^^^ - -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:166:1 - | -LL | extern "C-cmse-nonsecure-entry" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:36:1 - | -LL | extern "ptx-kernel" fn ptx() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:45:1 - | -LL | extern "gpu-kernel" fn gpu() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:48:1 - | -LL | extern "aapcs" fn aapcs() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:58:1 - | -LL | extern "msp430-interrupt" fn msp430() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:68:1 - | -LL | extern "avr-interrupt" fn avr() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:78:1 - | -LL | extern "riscv-interrupt-m" fn riscv() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"x86-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:88:1 - | -LL | extern "x86-interrupt" fn x86() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:98:1 - | -LL | extern "thiscall" fn thiscall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:108:1 - | -LL | extern "stdcall" fn stdcall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` - -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:128:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:96:1 | LL | extern "cdecl" fn cdecl() {} | ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -265,139 +197,6 @@ LL | extern "cdecl" fn cdecl() {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -error[E0570]: `"vectorcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:143:1 - | -LL | extern "vectorcall" fn vectorcall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:159:1 - | -LL | extern "C-cmse-nonsecure-entry" fn cmse_entry() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: aborting due to 22 previous errors; 15 warnings emitted +error: aborting due to 25 previous errors; 4 warnings emitted For more information about this error, try `rustc --explain E0570`. -Future incompatibility report: Future breakage diagnostic: -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 - | -LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 - | -LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 - | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "x86-interrupt" is not supported on this target - --> $DIR/unsupported.rs:90:15 - | -LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 - | -LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 - | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "vectorcall" is not supported on this target - --> $DIR/unsupported.rs:145:22 - | -LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - diff --git a/tests/ui/abi/unsupported.arm.stderr b/tests/ui/abi/unsupported.arm.stderr index ed9cd2ab2c5d..37b6e2316b0a 100644 --- a/tests/ui/abi/unsupported.arm.stderr +++ b/tests/ui/abi/unsupported.arm.stderr @@ -1,121 +1,145 @@ -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:36:8 + | +LL | extern "ptx-kernel" fn ptx() {} + | ^^^^^^^^^^^^ + +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:38:22 | LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default + | ^^^^^^^^^^^^ -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:43:1 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:42:8 | LL | extern "ptx-kernel" {} - | ^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^ -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:44:8 | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 +LL | extern "gpu-kernel" fn gpu() {} + | ^^^^^^^^^^^^ -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:65:1 +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:56:8 | LL | extern "msp430-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^ -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:75:1 +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:59:8 | LL | extern "avr-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 - | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:85:1 +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/unsupported.rs:62:8 | LL | extern "riscv-interrupt-m" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^^ -warning: the calling convention "x86-interrupt" is not supported on this target - --> $DIR/unsupported.rs:90:15 - | -LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"x86-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:95:1 +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:65:8 | LL | extern "x86-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:68:8 + | +LL | extern "thiscall" fn thiscall() {} + | ^^^^^^^^^^ + +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:70:27 | LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^^^^ -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:105:1 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:74:8 | LL | extern "thiscall" {} - | ^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^ -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:77:8 | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:119:1 - | -LL | extern "stdcall" {} - | ^^^^^^^^^^^^^^^^^^^ +LL | extern "stdcall" fn stdcall() {} + | ^^^^^^^^^ | = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` -error[E0570]: `"stdcall-unwind"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:123:1 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:81:26 + | +LL | fn stdcall_ptr(f: extern "stdcall" fn()) { + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:87:8 + | +LL | extern "stdcall" {} + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:91:8 | LL | extern "stdcall-unwind" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^ | = help: if you need `extern "stdcall-unwind"` on win32 and `extern "C-unwind"` everywhere else, use `extern "system-unwind"` -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:131:17 +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:111:8 + | +LL | extern "vectorcall" fn vectorcall() {} + | ^^^^^^^^^^^^ + +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:113:29 + | +LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { + | ^^^^^^^^^^^^ + +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:117:8 + | +LL | extern "vectorcall" {} + | ^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-call" is not a supported ABI for the current target + --> $DIR/unsupported.rs:120:28 + | +LL | fn cmse_call_ptr(f: extern "cmse-nonsecure-call" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:125:8 + | +LL | extern "cmse-nonsecure-entry" fn cmse_entry() {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:127:29 + | +LL | fn cmse_entry_ptr(f: extern "cmse-nonsecure-entry" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:131:8 + | +LL | extern "cmse-nonsecure-entry" {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:99:17 | LL | fn cdecl_ptr(f: extern "cdecl" fn()) { | ^^^^^^^^^^^^^^^^^^^ @@ -125,8 +149,8 @@ LL | fn cdecl_ptr(f: extern "cdecl" fn()) { = help: use `extern "C"` instead = note: `#[warn(unsupported_calling_conventions)]` on by default -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:136:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:104:1 | LL | extern "cdecl" {} | ^^^^^^^^^^^^^^^^^ @@ -135,8 +159,8 @@ LL | extern "cdecl" {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:139:1 +warning: "cdecl-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:107:1 | LL | extern "cdecl-unwind" {} | ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -145,97 +169,8 @@ LL | extern "cdecl-unwind" {} = note: for more information, see issue #137018 = help: use `extern "C-unwind"` instead -warning: the calling convention "vectorcall" is not supported on this target - --> $DIR/unsupported.rs:145:22 - | -LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"vectorcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:150:1 - | -LL | extern "vectorcall" {} - | ^^^^^^^^^^^^^^^^^^^^^^ - -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:166:1 - | -LL | extern "C-cmse-nonsecure-entry" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:36:1 - | -LL | extern "ptx-kernel" fn ptx() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:45:1 - | -LL | extern "gpu-kernel" fn gpu() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:58:1 - | -LL | extern "msp430-interrupt" fn msp430() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:68:1 - | -LL | extern "avr-interrupt" fn avr() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:78:1 - | -LL | extern "riscv-interrupt-m" fn riscv() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"x86-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:88:1 - | -LL | extern "x86-interrupt" fn x86() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:98:1 - | -LL | extern "thiscall" fn thiscall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:108:1 - | -LL | extern "stdcall" fn stdcall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` - -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:128:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:96:1 | LL | extern "cdecl" fn cdecl() {} | ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -244,128 +179,6 @@ LL | extern "cdecl" fn cdecl() {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -error[E0570]: `"vectorcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:143:1 - | -LL | extern "vectorcall" fn vectorcall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:159:1 - | -LL | extern "C-cmse-nonsecure-entry" fn cmse_entry() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: aborting due to 20 previous errors; 14 warnings emitted +error: aborting due to 22 previous errors; 4 warnings emitted For more information about this error, try `rustc --explain E0570`. -Future incompatibility report: Future breakage diagnostic: -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 - | -LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 - | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "x86-interrupt" is not supported on this target - --> $DIR/unsupported.rs:90:15 - | -LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 - | -LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 - | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "vectorcall" is not supported on this target - --> $DIR/unsupported.rs:145:22 - | -LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - diff --git a/tests/ui/abi/unsupported.i686.stderr b/tests/ui/abi/unsupported.i686.stderr index 4d903b435d87..8478c4819416 100644 --- a/tests/ui/abi/unsupported.i686.stderr +++ b/tests/ui/abi/unsupported.i686.stderr @@ -1,222 +1,87 @@ -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 - | -LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:43:1 - | -LL | extern "ptx-kernel" {} - | ^^^^^^^^^^^^^^^^^^^^^^ - -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 - | -LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:55:1 - | -LL | extern "aapcs" {} - | ^^^^^^^^^^^^^^^^^ - -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:65:1 - | -LL | extern "msp430-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:75:1 - | -LL | extern "avr-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 - | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:85:1 - | -LL | extern "riscv-interrupt-m" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:166:1 - | -LL | extern "C-cmse-nonsecure-entry" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:36:1 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:36:8 | LL | extern "ptx-kernel" fn ptx() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^ -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:45:1 - | -LL | extern "gpu-kernel" fn gpu() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:48:1 - | -LL | extern "aapcs" fn aapcs() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:58:1 - | -LL | extern "msp430-interrupt" fn msp430() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:68:1 - | -LL | extern "avr-interrupt" fn avr() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:78:1 - | -LL | extern "riscv-interrupt-m" fn riscv() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:159:1 - | -LL | extern "C-cmse-nonsecure-entry" fn cmse_entry() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: aborting due to 13 previous errors; 7 warnings emitted - -For more information about this error, try `rustc --explain E0570`. -Future incompatibility report: Future breakage diagnostic: -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:38:22 | LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default + | ^^^^^^^^^^^^ -Future breakage diagnostic: -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:42:8 + | +LL | extern "ptx-kernel" {} + | ^^^^^^^^^^^^ + +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:44:8 + | +LL | extern "gpu-kernel" fn gpu() {} + | ^^^^^^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:47:8 + | +LL | extern "aapcs" fn aapcs() {} + | ^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:49:24 | LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default + | ^^^^^^^ -Future breakage diagnostic: -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:53:8 | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default +LL | extern "aapcs" {} + | ^^^^^^^ -Future breakage diagnostic: -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:56:8 | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default +LL | extern "msp430-interrupt" {} + | ^^^^^^^^^^^^^^^^^^ -Future breakage diagnostic: -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:59:8 | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default +LL | extern "avr-interrupt" {} + | ^^^^^^^^^^^^^^^ -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/unsupported.rs:62:8 | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default +LL | extern "riscv-interrupt-m" {} + | ^^^^^^^^^^^^^^^^^^^ -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 +error[E0570]: "cmse-nonsecure-call" is not a supported ABI for the current target + --> $DIR/unsupported.rs:120:28 | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default +LL | fn cmse_call_ptr(f: extern "cmse-nonsecure-call" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^ +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:125:8 + | +LL | extern "cmse-nonsecure-entry" fn cmse_entry() {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:127:29 + | +LL | fn cmse_entry_ptr(f: extern "cmse-nonsecure-entry" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:131:8 + | +LL | extern "cmse-nonsecure-entry" {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +error: aborting due to 14 previous errors + +For more information about this error, try `rustc --explain E0570`. diff --git a/tests/ui/abi/unsupported.riscv32.stderr b/tests/ui/abi/unsupported.riscv32.stderr index 9e75dfafca0f..d7eb222eb766 100644 --- a/tests/ui/abi/unsupported.riscv32.stderr +++ b/tests/ui/abi/unsupported.riscv32.stderr @@ -1,121 +1,157 @@ -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:36:8 + | +LL | extern "ptx-kernel" fn ptx() {} + | ^^^^^^^^^^^^ + +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:38:22 | LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default + | ^^^^^^^^^^^^ -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:43:1 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:42:8 | LL | extern "ptx-kernel" {} - | ^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^ -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:44:8 + | +LL | extern "gpu-kernel" fn gpu() {} + | ^^^^^^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:47:8 + | +LL | extern "aapcs" fn aapcs() {} + | ^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:49:24 | LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^ -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:55:1 +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:53:8 | LL | extern "aapcs" {} - | ^^^^^^^^^^^^^^^^^ + | ^^^^^^^ -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:65:1 +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:56:8 | LL | extern "msp430-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^ -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:75:1 +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:59:8 | LL | extern "avr-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "x86-interrupt" is not supported on this target - --> $DIR/unsupported.rs:90:15 - | -LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"x86-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:95:1 +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:65:8 | LL | extern "x86-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:68:8 + | +LL | extern "thiscall" fn thiscall() {} + | ^^^^^^^^^^ + +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:70:27 | LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^^^^ -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:105:1 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:74:8 | LL | extern "thiscall" {} - | ^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^ -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:77:8 | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:119:1 - | -LL | extern "stdcall" {} - | ^^^^^^^^^^^^^^^^^^^ +LL | extern "stdcall" fn stdcall() {} + | ^^^^^^^^^ | = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` -error[E0570]: `"stdcall-unwind"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:123:1 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:81:26 + | +LL | fn stdcall_ptr(f: extern "stdcall" fn()) { + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:87:8 + | +LL | extern "stdcall" {} + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:91:8 | LL | extern "stdcall-unwind" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^ | = help: if you need `extern "stdcall-unwind"` on win32 and `extern "C-unwind"` everywhere else, use `extern "system-unwind"` -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:131:17 +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:111:8 + | +LL | extern "vectorcall" fn vectorcall() {} + | ^^^^^^^^^^^^ + +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:113:29 + | +LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { + | ^^^^^^^^^^^^ + +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:117:8 + | +LL | extern "vectorcall" {} + | ^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-call" is not a supported ABI for the current target + --> $DIR/unsupported.rs:120:28 + | +LL | fn cmse_call_ptr(f: extern "cmse-nonsecure-call" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:125:8 + | +LL | extern "cmse-nonsecure-entry" fn cmse_entry() {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:127:29 + | +LL | fn cmse_entry_ptr(f: extern "cmse-nonsecure-entry" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:131:8 + | +LL | extern "cmse-nonsecure-entry" {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:99:17 | LL | fn cdecl_ptr(f: extern "cdecl" fn()) { | ^^^^^^^^^^^^^^^^^^^ @@ -125,8 +161,8 @@ LL | fn cdecl_ptr(f: extern "cdecl" fn()) { = help: use `extern "C"` instead = note: `#[warn(unsupported_calling_conventions)]` on by default -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:136:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:104:1 | LL | extern "cdecl" {} | ^^^^^^^^^^^^^^^^^ @@ -135,8 +171,8 @@ LL | extern "cdecl" {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:139:1 +warning: "cdecl-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:107:1 | LL | extern "cdecl-unwind" {} | ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -145,97 +181,8 @@ LL | extern "cdecl-unwind" {} = note: for more information, see issue #137018 = help: use `extern "C-unwind"` instead -warning: the calling convention "vectorcall" is not supported on this target - --> $DIR/unsupported.rs:145:22 - | -LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"vectorcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:150:1 - | -LL | extern "vectorcall" {} - | ^^^^^^^^^^^^^^^^^^^^^^ - -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:166:1 - | -LL | extern "C-cmse-nonsecure-entry" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:36:1 - | -LL | extern "ptx-kernel" fn ptx() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:45:1 - | -LL | extern "gpu-kernel" fn gpu() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:48:1 - | -LL | extern "aapcs" fn aapcs() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:58:1 - | -LL | extern "msp430-interrupt" fn msp430() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:68:1 - | -LL | extern "avr-interrupt" fn avr() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"x86-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:88:1 - | -LL | extern "x86-interrupt" fn x86() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:98:1 - | -LL | extern "thiscall" fn thiscall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:108:1 - | -LL | extern "stdcall" fn stdcall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` - -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:128:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:96:1 | LL | extern "cdecl" fn cdecl() {} | ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -244,128 +191,6 @@ LL | extern "cdecl" fn cdecl() {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -error[E0570]: `"vectorcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:143:1 - | -LL | extern "vectorcall" fn vectorcall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:159:1 - | -LL | extern "C-cmse-nonsecure-entry" fn cmse_entry() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: aborting due to 20 previous errors; 14 warnings emitted +error: aborting due to 24 previous errors; 4 warnings emitted For more information about this error, try `rustc --explain E0570`. -Future incompatibility report: Future breakage diagnostic: -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 - | -LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 - | -LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "x86-interrupt" is not supported on this target - --> $DIR/unsupported.rs:90:15 - | -LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 - | -LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 - | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "vectorcall" is not supported on this target - --> $DIR/unsupported.rs:145:22 - | -LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - diff --git a/tests/ui/abi/unsupported.riscv64.stderr b/tests/ui/abi/unsupported.riscv64.stderr index 9e75dfafca0f..d7eb222eb766 100644 --- a/tests/ui/abi/unsupported.riscv64.stderr +++ b/tests/ui/abi/unsupported.riscv64.stderr @@ -1,121 +1,157 @@ -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:36:8 + | +LL | extern "ptx-kernel" fn ptx() {} + | ^^^^^^^^^^^^ + +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:38:22 | LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default + | ^^^^^^^^^^^^ -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:43:1 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:42:8 | LL | extern "ptx-kernel" {} - | ^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^ -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:44:8 + | +LL | extern "gpu-kernel" fn gpu() {} + | ^^^^^^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:47:8 + | +LL | extern "aapcs" fn aapcs() {} + | ^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:49:24 | LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^ -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:55:1 +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:53:8 | LL | extern "aapcs" {} - | ^^^^^^^^^^^^^^^^^ + | ^^^^^^^ -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:65:1 +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:56:8 | LL | extern "msp430-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^ -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:75:1 +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:59:8 | LL | extern "avr-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "x86-interrupt" is not supported on this target - --> $DIR/unsupported.rs:90:15 - | -LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"x86-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:95:1 +error[E0570]: "x86-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:65:8 | LL | extern "x86-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:68:8 + | +LL | extern "thiscall" fn thiscall() {} + | ^^^^^^^^^^ + +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:70:27 | LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^^^^ -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:105:1 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:74:8 | LL | extern "thiscall" {} - | ^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^ -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:77:8 | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:119:1 - | -LL | extern "stdcall" {} - | ^^^^^^^^^^^^^^^^^^^ +LL | extern "stdcall" fn stdcall() {} + | ^^^^^^^^^ | = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` -error[E0570]: `"stdcall-unwind"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:123:1 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:81:26 + | +LL | fn stdcall_ptr(f: extern "stdcall" fn()) { + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:87:8 + | +LL | extern "stdcall" {} + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:91:8 | LL | extern "stdcall-unwind" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^ | = help: if you need `extern "stdcall-unwind"` on win32 and `extern "C-unwind"` everywhere else, use `extern "system-unwind"` -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:131:17 +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:111:8 + | +LL | extern "vectorcall" fn vectorcall() {} + | ^^^^^^^^^^^^ + +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:113:29 + | +LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { + | ^^^^^^^^^^^^ + +error[E0570]: "vectorcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:117:8 + | +LL | extern "vectorcall" {} + | ^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-call" is not a supported ABI for the current target + --> $DIR/unsupported.rs:120:28 + | +LL | fn cmse_call_ptr(f: extern "cmse-nonsecure-call" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:125:8 + | +LL | extern "cmse-nonsecure-entry" fn cmse_entry() {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:127:29 + | +LL | fn cmse_entry_ptr(f: extern "cmse-nonsecure-entry" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:131:8 + | +LL | extern "cmse-nonsecure-entry" {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:99:17 | LL | fn cdecl_ptr(f: extern "cdecl" fn()) { | ^^^^^^^^^^^^^^^^^^^ @@ -125,8 +161,8 @@ LL | fn cdecl_ptr(f: extern "cdecl" fn()) { = help: use `extern "C"` instead = note: `#[warn(unsupported_calling_conventions)]` on by default -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:136:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:104:1 | LL | extern "cdecl" {} | ^^^^^^^^^^^^^^^^^ @@ -135,8 +171,8 @@ LL | extern "cdecl" {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:139:1 +warning: "cdecl-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:107:1 | LL | extern "cdecl-unwind" {} | ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -145,97 +181,8 @@ LL | extern "cdecl-unwind" {} = note: for more information, see issue #137018 = help: use `extern "C-unwind"` instead -warning: the calling convention "vectorcall" is not supported on this target - --> $DIR/unsupported.rs:145:22 - | -LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"vectorcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:150:1 - | -LL | extern "vectorcall" {} - | ^^^^^^^^^^^^^^^^^^^^^^ - -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:166:1 - | -LL | extern "C-cmse-nonsecure-entry" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:36:1 - | -LL | extern "ptx-kernel" fn ptx() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:45:1 - | -LL | extern "gpu-kernel" fn gpu() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:48:1 - | -LL | extern "aapcs" fn aapcs() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:58:1 - | -LL | extern "msp430-interrupt" fn msp430() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:68:1 - | -LL | extern "avr-interrupt" fn avr() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"x86-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:88:1 - | -LL | extern "x86-interrupt" fn x86() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:98:1 - | -LL | extern "thiscall" fn thiscall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:108:1 - | -LL | extern "stdcall" fn stdcall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` - -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:128:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:96:1 | LL | extern "cdecl" fn cdecl() {} | ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -244,128 +191,6 @@ LL | extern "cdecl" fn cdecl() {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -error[E0570]: `"vectorcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:143:1 - | -LL | extern "vectorcall" fn vectorcall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:159:1 - | -LL | extern "C-cmse-nonsecure-entry" fn cmse_entry() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: aborting due to 20 previous errors; 14 warnings emitted +error: aborting due to 24 previous errors; 4 warnings emitted For more information about this error, try `rustc --explain E0570`. -Future incompatibility report: Future breakage diagnostic: -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 - | -LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 - | -LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "x86-interrupt" is not supported on this target - --> $DIR/unsupported.rs:90:15 - | -LL | fn x86_ptr(f: extern "x86-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 - | -LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 - | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "vectorcall" is not supported on this target - --> $DIR/unsupported.rs:145:22 - | -LL | fn vectorcall_ptr(f: extern "vectorcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - diff --git a/tests/ui/abi/unsupported.rs b/tests/ui/abi/unsupported.rs index 43bdfe3ea240..828fcc147a56 100644 --- a/tests/ui/abi/unsupported.rs +++ b/tests/ui/abi/unsupported.rs @@ -25,7 +25,7 @@ abi_gpu_kernel, abi_x86_interrupt, abi_riscv_interrupt, - abi_c_cmse_nonsecure_call, + abi_cmse_nonsecure_call, abi_vectorcall, cmse_nonsecure_entry )] @@ -36,8 +36,7 @@ use minicore::*; extern "ptx-kernel" fn ptx() {} //~^ ERROR is not a supported ABI fn ptx_ptr(f: extern "ptx-kernel" fn()) { - //~^ WARN unsupported_fn_ptr_calling_conventions - //~^^ WARN this was previously accepted +//~^ ERROR is not a supported ABI f() } extern "ptx-kernel" {} @@ -48,58 +47,28 @@ extern "gpu-kernel" fn gpu() {} extern "aapcs" fn aapcs() {} //[x64,x64_win,i686,aarch64,riscv32,riscv64]~^ ERROR is not a supported ABI fn aapcs_ptr(f: extern "aapcs" fn()) { - //[x64,x64_win,i686,aarch64,riscv32,riscv64]~^ WARN unsupported_fn_ptr_calling_conventions - //[x64,x64_win,i686,aarch64,riscv32,riscv64]~^^ WARN this was previously accepted + //[x64,x64_win,i686,aarch64,riscv32,riscv64]~^ ERROR is not a supported ABI f() } extern "aapcs" {} //[x64,x64_win,i686,aarch64,riscv32,riscv64]~^ ERROR is not a supported ABI -extern "msp430-interrupt" fn msp430() {} -//~^ ERROR is not a supported ABI -fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - //~^ WARN unsupported_fn_ptr_calling_conventions - //~^^ WARN this was previously accepted - f() -} extern "msp430-interrupt" {} //~^ ERROR is not a supported ABI -extern "avr-interrupt" fn avr() {} -//~^ ERROR is not a supported ABI -fn avr_ptr(f: extern "avr-interrupt" fn()) { - //~^ WARN unsupported_fn_ptr_calling_conventions - //~^^ WARN this was previously accepted - f() -} extern "avr-interrupt" {} //~^ ERROR is not a supported ABI -extern "riscv-interrupt-m" fn riscv() {} -//[x64,x64_win,i686,arm,aarch64]~^ ERROR is not a supported ABI -fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - //[x64,x64_win,i686,arm,aarch64]~^ WARN unsupported_fn_ptr_calling_conventions - //[x64,x64_win,i686,arm,aarch64]~^^ WARN this was previously accepted - f() -} extern "riscv-interrupt-m" {} //[x64,x64_win,i686,arm,aarch64]~^ ERROR is not a supported ABI -extern "x86-interrupt" fn x86() {} -//[aarch64,arm,riscv32,riscv64]~^ ERROR is not a supported ABI -fn x86_ptr(f: extern "x86-interrupt" fn()) { - //[aarch64,arm,riscv32,riscv64]~^ WARN unsupported_fn_ptr_calling_conventions - //[aarch64,arm,riscv32,riscv64]~^^ WARN this was previously accepted - f() -} extern "x86-interrupt" {} //[aarch64,arm,riscv32,riscv64]~^ ERROR is not a supported ABI extern "thiscall" fn thiscall() {} //[x64,x64_win,arm,aarch64,riscv32,riscv64]~^ ERROR is not a supported ABI fn thiscall_ptr(f: extern "thiscall" fn()) { - //[x64,x64_win,arm,aarch64,riscv32,riscv64]~^ WARN unsupported_fn_ptr_calling_conventions - //[x64,x64_win,arm,aarch64,riscv32,riscv64]~^^ WARN this was previously accepted + //[x64,x64_win,arm,aarch64,riscv32,riscv64]~^ ERROR is not a supported ABI f() } extern "thiscall" {} @@ -110,10 +79,9 @@ extern "stdcall" fn stdcall() {} //[x64_win]~^^ WARN unsupported_calling_conventions //[x64_win]~^^^ WARN this was previously accepted fn stdcall_ptr(f: extern "stdcall" fn()) { - //[x64_win]~^ WARN unsupported_calling_conventions - //[x64_win]~| WARN this was previously accepted - //[x64,arm,aarch64,riscv32,riscv64]~^^^ WARN unsupported_fn_ptr_calling_conventions - //[x64,arm,aarch64,riscv32,riscv64]~| WARN this was previously accepted + //[x64,arm,aarch64,riscv32,riscv64]~^ ERROR is not a supported ABI + //[x64_win]~^^ WARN unsupported_calling_conventions + //[x64_win]~| WARN this was previously accepted f() } extern "stdcall" {} @@ -130,7 +98,7 @@ extern "cdecl" fn cdecl() {} //[x64,x64_win,arm,aarch64,riscv32,riscv64]~^^ WARN this was previously accepted fn cdecl_ptr(f: extern "cdecl" fn()) { //[x64,x64_win,arm,aarch64,riscv32,riscv64]~^ WARN unsupported_calling_conventions - //[x64,x64_win,arm,aarch64,riscv32,riscv64]~^^ WARN this was previously accepted + //[x64,x64_win,arm,aarch64,riscv32,riscv64]~| WARN this was previously accepted f() } extern "cdecl" {} @@ -143,31 +111,28 @@ extern "cdecl-unwind" {} extern "vectorcall" fn vectorcall() {} //[arm,aarch64,riscv32,riscv64]~^ ERROR is not a supported ABI fn vectorcall_ptr(f: extern "vectorcall" fn()) { - //[arm,aarch64,riscv32,riscv64]~^ WARN unsupported_fn_ptr_calling_conventions - //[arm,aarch64,riscv32,riscv64]~^^ WARN this was previously accepted + //[arm,aarch64,riscv32,riscv64]~^ ERROR is not a supported ABI f() } extern "vectorcall" {} //[arm,aarch64,riscv32,riscv64]~^ ERROR is not a supported ABI -fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - //~^ WARN unsupported_fn_ptr_calling_conventions - //~^^ WARN this was previously accepted +fn cmse_call_ptr(f: extern "cmse-nonsecure-call" fn()) { +//~^ ERROR is not a supported ABI f() } -extern "C-cmse-nonsecure-entry" fn cmse_entry() {} +extern "cmse-nonsecure-entry" fn cmse_entry() {} +//~^ ERROR is not a supported ABI +fn cmse_entry_ptr(f: extern "cmse-nonsecure-entry" fn()) { //~^ ERROR is not a supported ABI -fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - //~^ WARN unsupported_fn_ptr_calling_conventions - //~^^ WARN this was previously accepted f() } -extern "C-cmse-nonsecure-entry" {} +extern "cmse-nonsecure-entry" {} //~^ ERROR is not a supported ABI #[cfg(windows)] #[link(name = "foo", kind = "raw-dylib")] extern "cdecl" {} -//[x64_win]~^ WARN use of calling convention not supported on this target +//[x64_win]~^ WARN unsupported_calling_conventions //[x64_win]~^^ WARN this was previously accepted diff --git a/tests/ui/abi/unsupported.x64.stderr b/tests/ui/abi/unsupported.x64.stderr index 5b55e5707fad..cf04680b5878 100644 --- a/tests/ui/abi/unsupported.x64.stderr +++ b/tests/ui/abi/unsupported.x64.stderr @@ -1,121 +1,139 @@ -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:36:8 + | +LL | extern "ptx-kernel" fn ptx() {} + | ^^^^^^^^^^^^ + +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:38:22 | LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default + | ^^^^^^^^^^^^ -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:43:1 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:42:8 | LL | extern "ptx-kernel" {} - | ^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^ -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:44:8 + | +LL | extern "gpu-kernel" fn gpu() {} + | ^^^^^^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:47:8 + | +LL | extern "aapcs" fn aapcs() {} + | ^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:49:24 | LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^ -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:55:1 +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:53:8 | LL | extern "aapcs" {} - | ^^^^^^^^^^^^^^^^^ + | ^^^^^^^ -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:65:1 +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:56:8 | LL | extern "msp430-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^ -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:75:1 +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:59:8 | LL | extern "avr-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 - | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:85:1 +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/unsupported.rs:62:8 | LL | extern "riscv-interrupt-m" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^^ -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:68:8 + | +LL | extern "thiscall" fn thiscall() {} + | ^^^^^^^^^^ + +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:70:27 | LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^^^^ -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:105:1 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:74:8 | LL | extern "thiscall" {} - | ^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^ -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:77:8 | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:119:1 - | -LL | extern "stdcall" {} - | ^^^^^^^^^^^^^^^^^^^ +LL | extern "stdcall" fn stdcall() {} + | ^^^^^^^^^ | = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` -error[E0570]: `"stdcall-unwind"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:123:1 +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:81:26 + | +LL | fn stdcall_ptr(f: extern "stdcall" fn()) { + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:87:8 + | +LL | extern "stdcall" {} + | ^^^^^^^^^ + | + = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` + +error[E0570]: "stdcall-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:91:8 | LL | extern "stdcall-unwind" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^ | = help: if you need `extern "stdcall-unwind"` on win32 and `extern "C-unwind"` everywhere else, use `extern "system-unwind"` -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:131:17 +error[E0570]: "cmse-nonsecure-call" is not a supported ABI for the current target + --> $DIR/unsupported.rs:120:28 + | +LL | fn cmse_call_ptr(f: extern "cmse-nonsecure-call" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:125:8 + | +LL | extern "cmse-nonsecure-entry" fn cmse_entry() {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:127:29 + | +LL | fn cmse_entry_ptr(f: extern "cmse-nonsecure-entry" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:131:8 + | +LL | extern "cmse-nonsecure-entry" {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:99:17 | LL | fn cdecl_ptr(f: extern "cdecl" fn()) { | ^^^^^^^^^^^^^^^^^^^ @@ -125,8 +143,8 @@ LL | fn cdecl_ptr(f: extern "cdecl" fn()) { = help: use `extern "C"` instead = note: `#[warn(unsupported_calling_conventions)]` on by default -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:136:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:104:1 | LL | extern "cdecl" {} | ^^^^^^^^^^^^^^^^^ @@ -135,8 +153,8 @@ LL | extern "cdecl" {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:139:1 +warning: "cdecl-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:107:1 | LL | extern "cdecl-unwind" {} | ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -145,82 +163,8 @@ LL | extern "cdecl-unwind" {} = note: for more information, see issue #137018 = help: use `extern "C-unwind"` instead -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:166:1 - | -LL | extern "C-cmse-nonsecure-entry" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:36:1 - | -LL | extern "ptx-kernel" fn ptx() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:45:1 - | -LL | extern "gpu-kernel" fn gpu() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:48:1 - | -LL | extern "aapcs" fn aapcs() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:58:1 - | -LL | extern "msp430-interrupt" fn msp430() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:68:1 - | -LL | extern "avr-interrupt" fn avr() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:78:1 - | -LL | extern "riscv-interrupt-m" fn riscv() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:98:1 - | -LL | extern "thiscall" fn thiscall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"stdcall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:108:1 - | -LL | extern "stdcall" fn stdcall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` - -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:128:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:96:1 | LL | extern "cdecl" fn cdecl() {} | ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -229,111 +173,6 @@ LL | extern "cdecl" fn cdecl() {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:159:1 - | -LL | extern "C-cmse-nonsecure-entry" fn cmse_entry() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: aborting due to 18 previous errors; 13 warnings emitted +error: aborting due to 21 previous errors; 4 warnings emitted For more information about this error, try `rustc --explain E0570`. -Future incompatibility report: Future breakage diagnostic: -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 - | -LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 - | -LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 - | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 - | -LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "stdcall" is not supported on this target - --> $DIR/unsupported.rs:112:19 - | -LL | fn stdcall_ptr(f: extern "stdcall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - diff --git a/tests/ui/abi/unsupported.x64_win.stderr b/tests/ui/abi/unsupported.x64_win.stderr index 93b5a272e926..d383a4df732f 100644 --- a/tests/ui/abi/unsupported.x64_win.stderr +++ b/tests/ui/abi/unsupported.x64_win.stderr @@ -1,96 +1,107 @@ -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:36:8 + | +LL | extern "ptx-kernel" fn ptx() {} + | ^^^^^^^^^^^^ + +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:38:22 | LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default + | ^^^^^^^^^^^^ -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:43:1 +error[E0570]: "ptx-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:42:8 | LL | extern "ptx-kernel" {} - | ^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^ -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/unsupported.rs:44:8 + | +LL | extern "gpu-kernel" fn gpu() {} + | ^^^^^^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:47:8 + | +LL | extern "aapcs" fn aapcs() {} + | ^^^^^^^ + +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:49:24 | LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^ -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:55:1 +error[E0570]: "aapcs" is not a supported ABI for the current target + --> $DIR/unsupported.rs:53:8 | LL | extern "aapcs" {} - | ^^^^^^^^^^^^^^^^^ + | ^^^^^^^ -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:65:1 +error[E0570]: "msp430-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:56:8 | LL | extern "msp430-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^ -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:75:1 +error[E0570]: "avr-interrupt" is not a supported ABI for the current target + --> $DIR/unsupported.rs:59:8 | LL | extern "avr-interrupt" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^ -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 - | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:85:1 +error[E0570]: "riscv-interrupt-m" is not a supported ABI for the current target + --> $DIR/unsupported.rs:62:8 | LL | extern "riscv-interrupt-m" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^^ -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:68:8 + | +LL | extern "thiscall" fn thiscall() {} + | ^^^^^^^^^^ + +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:70:27 | LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 + | ^^^^^^^^^^ -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:105:1 +error[E0570]: "thiscall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:74:8 | LL | extern "thiscall" {} - | ^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^ -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:112:19 +error[E0570]: "cmse-nonsecure-call" is not a supported ABI for the current target + --> $DIR/unsupported.rs:120:28 + | +LL | fn cmse_call_ptr(f: extern "cmse-nonsecure-call" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:125:8 + | +LL | extern "cmse-nonsecure-entry" fn cmse_entry() {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:127:29 + | +LL | fn cmse_entry_ptr(f: extern "cmse-nonsecure-entry" fn()) { + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/unsupported.rs:131:8 + | +LL | extern "cmse-nonsecure-entry" {} + | ^^^^^^^^^^^^^^^^^^^^^^ + +warning: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:81:19 | LL | fn stdcall_ptr(f: extern "stdcall" fn()) { | ^^^^^^^^^^^^^^^^^^^^^ @@ -100,8 +111,8 @@ LL | fn stdcall_ptr(f: extern "stdcall" fn()) { = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` = note: `#[warn(unsupported_calling_conventions)]` on by default -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:119:1 +warning: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:87:1 | LL | extern "stdcall" {} | ^^^^^^^^^^^^^^^^^^^ @@ -110,8 +121,8 @@ LL | extern "stdcall" {} = note: for more information, see issue #137018 = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:123:1 +warning: "stdcall-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:91:1 | LL | extern "stdcall-unwind" {} | ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -120,8 +131,8 @@ LL | extern "stdcall-unwind" {} = note: for more information, see issue #137018 = help: if you need `extern "stdcall-unwind"` on win32 and `extern "C-unwind"` everywhere else, use `extern "system-unwind"` -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:131:17 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:99:17 | LL | fn cdecl_ptr(f: extern "cdecl" fn()) { | ^^^^^^^^^^^^^^^^^^^ @@ -130,7 +141,27 @@ LL | fn cdecl_ptr(f: extern "cdecl" fn()) { = note: for more information, see issue #137018 = help: use `extern "C"` instead -warning: use of calling convention not supported on this target +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:104:1 + | +LL | extern "cdecl" {} + | ^^^^^^^^^^^^^^^^^ + | + = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! + = note: for more information, see issue #137018 + = help: use `extern "C"` instead + +warning: "cdecl-unwind" is not a supported ABI for the current target + --> $DIR/unsupported.rs:107:1 + | +LL | extern "cdecl-unwind" {} + | ^^^^^^^^^^^^^^^^^^^^^^^^ + | + = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! + = note: for more information, see issue #137018 + = help: use `extern "C-unwind"` instead + +warning: "cdecl" is not a supported ABI for the current target --> $DIR/unsupported.rs:136:1 | LL | extern "cdecl" {} @@ -140,94 +171,8 @@ LL | extern "cdecl" {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:139:1 - | -LL | extern "cdecl-unwind" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #137018 - = help: use `extern "C-unwind"` instead - -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:166:1 - | -LL | extern "C-cmse-nonsecure-entry" {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:171:1 - | -LL | extern "cdecl" {} - | ^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #137018 - = help: use `extern "C"` instead - -error[E0570]: `"ptx-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:36:1 - | -LL | extern "ptx-kernel" fn ptx() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:45:1 - | -LL | extern "gpu-kernel" fn gpu() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"aapcs"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:48:1 - | -LL | extern "aapcs" fn aapcs() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"msp430-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:58:1 - | -LL | extern "msp430-interrupt" fn msp430() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"avr-interrupt"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:68:1 - | -LL | extern "avr-interrupt" fn avr() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"riscv-interrupt-m"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:78:1 - | -LL | extern "riscv-interrupt-m" fn riscv() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"thiscall"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:98:1 - | -LL | extern "thiscall" fn thiscall() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:108:1 +warning: "stdcall" is not a supported ABI for the current target + --> $DIR/unsupported.rs:77:1 | LL | extern "stdcall" fn stdcall() {} | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -236,8 +181,8 @@ LL | extern "stdcall" fn stdcall() {} = note: for more information, see issue #137018 = help: if you need `extern "stdcall"` on win32 and `extern "C"` everywhere else, use `extern "system"` -warning: use of calling convention not supported on this target - --> $DIR/unsupported.rs:128:1 +warning: "cdecl" is not a supported ABI for the current target + --> $DIR/unsupported.rs:96:1 | LL | extern "cdecl" fn cdecl() {} | ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -246,100 +191,6 @@ LL | extern "cdecl" fn cdecl() {} = note: for more information, see issue #137018 = help: use `extern "C"` instead -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/unsupported.rs:159:1 - | -LL | extern "C-cmse-nonsecure-entry" fn cmse_entry() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: aborting due to 15 previous errors; 17 warnings emitted +error: aborting due to 17 previous errors; 9 warnings emitted For more information about this error, try `rustc --explain E0570`. -Future incompatibility report: Future breakage diagnostic: -warning: the calling convention "ptx-kernel" is not supported on this target - --> $DIR/unsupported.rs:38:15 - | -LL | fn ptx_ptr(f: extern "ptx-kernel" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "aapcs" is not supported on this target - --> $DIR/unsupported.rs:50:17 - | -LL | fn aapcs_ptr(f: extern "aapcs" fn()) { - | ^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "msp430-interrupt" is not supported on this target - --> $DIR/unsupported.rs:60:18 - | -LL | fn msp430_ptr(f: extern "msp430-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "avr-interrupt" is not supported on this target - --> $DIR/unsupported.rs:70:15 - | -LL | fn avr_ptr(f: extern "avr-interrupt" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "riscv-interrupt-m" is not supported on this target - --> $DIR/unsupported.rs:80:17 - | -LL | fn riscv_ptr(f: extern "riscv-interrupt-m" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "thiscall" is not supported on this target - --> $DIR/unsupported.rs:100:20 - | -LL | fn thiscall_ptr(f: extern "thiscall" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/unsupported.rs:153:21 - | -LL | fn cmse_call_ptr(f: extern "C-cmse-nonsecure-call" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-entry" is not supported on this target - --> $DIR/unsupported.rs:161:22 - | -LL | fn cmse_entry_ptr(f: extern "C-cmse-nonsecure-entry" fn()) { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - diff --git a/tests/ui/allocator/no_std-alloc-error-handler-custom.rs b/tests/ui/allocator/no_std-alloc-error-handler-custom.rs index 6bbfb72510d0..1b0f0608fc61 100644 --- a/tests/ui/allocator/no_std-alloc-error-handler-custom.rs +++ b/tests/ui/allocator/no_std-alloc-error-handler-custom.rs @@ -6,13 +6,14 @@ //@ compile-flags:-C panic=abort //@ aux-build:helper.rs -#![feature(rustc_private, lang_items)] +#![feature(rustc_private, lang_items, panic_unwind)] #![feature(alloc_error_handler)] #![no_std] #![no_main] extern crate alloc; extern crate libc; +extern crate unwind; // For _Unwind_Resume // ARM targets need these symbols #[no_mangle] @@ -70,7 +71,15 @@ fn panic(panic_info: &core::panic::PanicInfo) -> ! { // in these libraries will refer to `rust_eh_personality` if LLVM can not *prove* the contents won't // unwind. So, for this test case we will define the symbol. #[lang = "eh_personality"] -extern "C" fn rust_eh_personality() {} +extern "C" fn rust_eh_personality( + _version: i32, + _actions: i32, + _exception_class: u64, + _exception_object: *mut (), + _context: *mut (), +) -> i32 { + loop {} +} #[derive(Default, Debug)] struct Page(#[allow(dead_code)] [[u64; 32]; 16]); diff --git a/tests/ui/allocator/no_std-alloc-error-handler-default.rs b/tests/ui/allocator/no_std-alloc-error-handler-default.rs index 8bcf054ac85f..51ecf1a6731a 100644 --- a/tests/ui/allocator/no_std-alloc-error-handler-default.rs +++ b/tests/ui/allocator/no_std-alloc-error-handler-default.rs @@ -6,12 +6,13 @@ //@ compile-flags:-C panic=abort //@ aux-build:helper.rs -#![feature(rustc_private, lang_items)] +#![feature(rustc_private, lang_items, panic_unwind)] #![no_std] #![no_main] extern crate alloc; extern crate libc; +extern crate unwind; // For _Unwind_Resume // ARM targets need these symbols #[no_mangle] @@ -57,7 +58,15 @@ fn panic(panic_info: &core::panic::PanicInfo) -> ! { // in these libraries will refer to `rust_eh_personality` if LLVM can not *prove* the contents won't // unwind. So, for this test case we will define the symbol. #[lang = "eh_personality"] -extern "C" fn rust_eh_personality() {} +extern "C" fn rust_eh_personality( + _version: i32, + _actions: i32, + _exception_class: u64, + _exception_object: *mut (), + _context: *mut (), +) -> i32 { + loop {} +} #[derive(Default, Debug)] struct Page(#[allow(dead_code)] [[u64; 32]; 16]); diff --git a/tests/ui/argument-suggestions/issue-100154.stderr b/tests/ui/argument-suggestions/issue-100154.stderr index 7eaebcafb595..9732beac4492 100644 --- a/tests/ui/argument-suggestions/issue-100154.stderr +++ b/tests/ui/argument-suggestions/issue-100154.stderr @@ -17,10 +17,8 @@ error[E0277]: `()` doesn't implement `std::fmt::Display` --> $DIR/issue-100154.rs:4:11 | LL | foo::<()>(()); - | ^^ `()` cannot be formatted with the default formatter + | ^^ the trait `std::fmt::Display` is not implemented for `()` | - = help: the trait `std::fmt::Display` is not implemented for `()` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `foo` --> $DIR/issue-100154.rs:1:16 | diff --git a/tests/ui/argument-suggestions/issue-100478.rs b/tests/ui/argument-suggestions/issue-100478.rs index b0a9703112e3..219870f3b237 100644 --- a/tests/ui/argument-suggestions/issue-100478.rs +++ b/tests/ui/argument-suggestions/issue-100478.rs @@ -32,8 +32,8 @@ fn four_shuffle(_a: T1, _b: T2, _c: T3, _d: T4) {} fn main() { three_diff(T2::new(0)); //~ ERROR function takes - four_shuffle(T3::default(), T4::default(), T1::default(), T2::default()); //~ ERROR 35:5: 35:17: arguments to this function are incorrect [E0308] - four_shuffle(T3::default(), T2::default(), T1::default(), T3::default()); //~ ERROR 36:5: 36:17: arguments to this function are incorrect [E0308] + four_shuffle(T3::default(), T4::default(), T1::default(), T2::default()); //~ ERROR arguments to this function are incorrect [E0308] + four_shuffle(T3::default(), T2::default(), T1::default(), T3::default()); //~ ERROR arguments to this function are incorrect [E0308] let p1 = T1::new(0); let p2 = Arc::new(T2::new(0)); diff --git a/tests/ui/asm/naked-functions-inline.stderr b/tests/ui/asm/naked-functions-inline.stderr index 07d5f3bc49a9..91140a301edc 100644 --- a/tests/ui/asm/naked-functions-inline.stderr +++ b/tests/ui/asm/naked-functions-inline.stderr @@ -1,26 +1,26 @@ error[E0736]: attribute incompatible with `#[unsafe(naked)]` - --> $DIR/naked-functions-inline.rs:12:1 + --> $DIR/naked-functions-inline.rs:12:3 | LL | #[unsafe(naked)] | ---------------- function marked with `#[unsafe(naked)]` here LL | #[inline] - | ^^^^^^^^^ the `inline` attribute is incompatible with `#[unsafe(naked)]` + | ^^^^^^ the `inline` attribute is incompatible with `#[unsafe(naked)]` error[E0736]: attribute incompatible with `#[unsafe(naked)]` - --> $DIR/naked-functions-inline.rs:19:1 + --> $DIR/naked-functions-inline.rs:19:3 | LL | #[unsafe(naked)] | ---------------- function marked with `#[unsafe(naked)]` here LL | #[inline(always)] - | ^^^^^^^^^^^^^^^^^ the `inline` attribute is incompatible with `#[unsafe(naked)]` + | ^^^^^^ the `inline` attribute is incompatible with `#[unsafe(naked)]` error[E0736]: attribute incompatible with `#[unsafe(naked)]` - --> $DIR/naked-functions-inline.rs:26:1 + --> $DIR/naked-functions-inline.rs:26:3 | LL | #[unsafe(naked)] | ---------------- function marked with `#[unsafe(naked)]` here LL | #[inline(never)] - | ^^^^^^^^^^^^^^^^ the `inline` attribute is incompatible with `#[unsafe(naked)]` + | ^^^^^^ the `inline` attribute is incompatible with `#[unsafe(naked)]` error[E0736]: attribute incompatible with `#[unsafe(naked)]` --> $DIR/naked-functions-inline.rs:33:19 @@ -28,7 +28,7 @@ error[E0736]: attribute incompatible with `#[unsafe(naked)]` LL | #[unsafe(naked)] | ---------------- function marked with `#[unsafe(naked)]` here LL | #[cfg_attr(all(), inline(never))] - | ^^^^^^^^^^^^^ the `inline` attribute is incompatible with `#[unsafe(naked)]` + | ^^^^^^ the `inline` attribute is incompatible with `#[unsafe(naked)]` error: aborting due to 4 previous errors diff --git a/tests/ui/asm/naked-invalid-attr.stderr b/tests/ui/asm/naked-invalid-attr.stderr index ef389e7d921b..915b54b3fc23 100644 --- a/tests/ui/asm/naked-invalid-attr.stderr +++ b/tests/ui/asm/naked-invalid-attr.stderr @@ -4,6 +4,15 @@ error[E0433]: failed to resolve: use of unresolved module or unlinked crate `a` LL | #[::a] | ^ use of unresolved module or unlinked crate `a` +error[E0736]: attribute incompatible with `#[unsafe(naked)]` + --> $DIR/naked-invalid-attr.rs:56:3 + | +LL | #[::a] + | ^^^ the `{{root}}::a` attribute is incompatible with `#[unsafe(naked)]` +... +LL | #[unsafe(naked)] + | ---------------- function marked with `#[unsafe(naked)]` here + error: attribute should be applied to a function definition --> $DIR/naked-invalid-attr.rs:13:1 | @@ -33,15 +42,6 @@ LL | #[unsafe(naked)] LL | || {}; | ----- not a function definition -error[E0736]: attribute incompatible with `#[unsafe(naked)]` - --> $DIR/naked-invalid-attr.rs:56:1 - | -LL | #[::a] - | ^^^^^^ the `{{root}}::a` attribute is incompatible with `#[unsafe(naked)]` -... -LL | #[unsafe(naked)] - | ---------------- function marked with `#[unsafe(naked)]` here - error: attribute should be applied to a function definition --> $DIR/naked-invalid-attr.rs:22:5 | diff --git a/tests/ui/associated-consts/assoc-const-eq-const_evaluatable_unchecked.rs b/tests/ui/associated-consts/assoc-const-eq-const_evaluatable_unchecked.rs new file mode 100644 index 000000000000..4b6de6f56d55 --- /dev/null +++ b/tests/ui/associated-consts/assoc-const-eq-const_evaluatable_unchecked.rs @@ -0,0 +1,17 @@ +// The impl of lint `const_evaluatable_unchecked` used to wrongly assume and `assert!` that +// successfully evaluating a type-system constant that has non-region args had to be an anon const. +// In the case below however we have a type-system assoc const (here: `<() as TraitA>::K`). +// +// issue: +//@ check-pass +#![feature(associated_const_equality)] + +pub trait TraitA { const K: u8 = 0; } +pub trait TraitB {} + +impl TraitA for () {} +impl TraitB for () where (): TraitA {} + +fn check() where (): TraitB {} + +fn main() {} diff --git a/tests/ui/associated-inherent-types/issue-109299.stderr b/tests/ui/associated-inherent-types/issue-109299.stderr index 1e11c0e8c2af..f29d3cc7834e 100644 --- a/tests/ui/associated-inherent-types/issue-109299.stderr +++ b/tests/ui/associated-inherent-types/issue-109299.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'d` --> $DIR/issue-109299.rs:6:12 | LL | impl Lexer<'d> { - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'d` here: `<'d>` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'d` here + | +LL | impl<'d> Lexer<'d> { + | ++++ error: aborting due to 1 previous error diff --git a/tests/ui/associated-types/defaults-unsound-62211-1.current.stderr b/tests/ui/associated-types/defaults-unsound-62211-1.current.stderr index 8b6f0a47aed9..b17e26b608d9 100644 --- a/tests/ui/associated-types/defaults-unsound-62211-1.current.stderr +++ b/tests/ui/associated-types/defaults-unsound-62211-1.current.stderr @@ -2,9 +2,8 @@ error[E0277]: `Self` doesn't implement `std::fmt::Display` --> $DIR/defaults-unsound-62211-1.rs:24:96 | LL | type Output: Copy + Deref + AddAssign<&'static str> + From + Display = Self; - | ^^^^ `Self` cannot be formatted with the default formatter + | ^^^^ the trait `std::fmt::Display` is not implemented for `Self` | - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `UncheckedCopy::Output` --> $DIR/defaults-unsound-62211-1.rs:24:86 | diff --git a/tests/ui/associated-types/defaults-unsound-62211-1.next.stderr b/tests/ui/associated-types/defaults-unsound-62211-1.next.stderr index 010f51df15ad..a858c9c1ba04 100644 --- a/tests/ui/associated-types/defaults-unsound-62211-1.next.stderr +++ b/tests/ui/associated-types/defaults-unsound-62211-1.next.stderr @@ -2,9 +2,8 @@ error[E0277]: `Self` doesn't implement `std::fmt::Display` --> $DIR/defaults-unsound-62211-1.rs:24:96 | LL | type Output: Copy + Deref + AddAssign<&'static str> + From + Display = Self; - | ^^^^ `Self` cannot be formatted with the default formatter + | ^^^^ the trait `std::fmt::Display` is not implemented for `Self` | - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `UncheckedCopy::Output` --> $DIR/defaults-unsound-62211-1.rs:24:86 | diff --git a/tests/ui/associated-types/defaults-unsound-62211-2.current.stderr b/tests/ui/associated-types/defaults-unsound-62211-2.current.stderr index 7552b0891333..facfec85afe3 100644 --- a/tests/ui/associated-types/defaults-unsound-62211-2.current.stderr +++ b/tests/ui/associated-types/defaults-unsound-62211-2.current.stderr @@ -2,9 +2,8 @@ error[E0277]: `Self` doesn't implement `std::fmt::Display` --> $DIR/defaults-unsound-62211-2.rs:24:96 | LL | type Output: Copy + Deref + AddAssign<&'static str> + From + Display = Self; - | ^^^^ `Self` cannot be formatted with the default formatter + | ^^^^ the trait `std::fmt::Display` is not implemented for `Self` | - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `UncheckedCopy::Output` --> $DIR/defaults-unsound-62211-2.rs:24:86 | diff --git a/tests/ui/associated-types/defaults-unsound-62211-2.next.stderr b/tests/ui/associated-types/defaults-unsound-62211-2.next.stderr index 934789465707..1360843172f9 100644 --- a/tests/ui/associated-types/defaults-unsound-62211-2.next.stderr +++ b/tests/ui/associated-types/defaults-unsound-62211-2.next.stderr @@ -2,9 +2,8 @@ error[E0277]: `Self` doesn't implement `std::fmt::Display` --> $DIR/defaults-unsound-62211-2.rs:24:96 | LL | type Output: Copy + Deref + AddAssign<&'static str> + From + Display = Self; - | ^^^^ `Self` cannot be formatted with the default formatter + | ^^^^ the trait `std::fmt::Display` is not implemented for `Self` | - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `UncheckedCopy::Output` --> $DIR/defaults-unsound-62211-2.rs:24:86 | diff --git a/tests/ui/async-await/incorrect-move-async-order-issue-79694.fixed b/tests/ui/async-await/incorrect-move-async-order-issue-79694.fixed index c74a32e442f3..9e5e889506c7 100644 --- a/tests/ui/async-await/incorrect-move-async-order-issue-79694.fixed +++ b/tests/ui/async-await/incorrect-move-async-order-issue-79694.fixed @@ -4,5 +4,5 @@ // Regression test for issue 79694 fn main() { - let _ = async move { }; //~ ERROR 7:13: 7:23: the order of `move` and `async` is incorrect + let _ = async move { }; //~ ERROR the order of `move` and `async` is incorrect } diff --git a/tests/ui/async-await/incorrect-move-async-order-issue-79694.rs b/tests/ui/async-await/incorrect-move-async-order-issue-79694.rs index 81ffbacc3273..9c36a6c96da6 100644 --- a/tests/ui/async-await/incorrect-move-async-order-issue-79694.rs +++ b/tests/ui/async-await/incorrect-move-async-order-issue-79694.rs @@ -4,5 +4,5 @@ // Regression test for issue 79694 fn main() { - let _ = move async { }; //~ ERROR 7:13: 7:23: the order of `move` and `async` is incorrect + let _ = move async { }; //~ ERROR the order of `move` and `async` is incorrect } diff --git a/tests/ui/async-await/issues/issue-95307.rs b/tests/ui/async-await/issues/issue-95307.rs index 83df65612b48..40905c239c34 100644 --- a/tests/ui/async-await/issues/issue-95307.rs +++ b/tests/ui/async-await/issues/issue-95307.rs @@ -5,7 +5,10 @@ pub trait C { async fn new() -> [u8; _]; - //~^ ERROR: the placeholder `_` is not allowed within types on item signatures for functions + //~^ ERROR: the placeholder `_` is not allowed within types on item signatures for opaque types + //~| ERROR: the placeholder `_` is not allowed within types on item signatures for opaque types + //~| ERROR: the placeholder `_` is not allowed within types on item signatures for opaque types + //~| ERROR: the placeholder `_` is not allowed within types on item signatures for opaque types } fn main() {} diff --git a/tests/ui/async-await/issues/issue-95307.stderr b/tests/ui/async-await/issues/issue-95307.stderr index c670686f7c9d..0aae7a215cda 100644 --- a/tests/ui/async-await/issues/issue-95307.stderr +++ b/tests/ui/async-await/issues/issue-95307.stderr @@ -1,9 +1,33 @@ -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions +error[E0121]: the placeholder `_` is not allowed within types on item signatures for opaque types --> $DIR/issue-95307.rs:7:28 | LL | async fn new() -> [u8; _]; | ^ not allowed in type signatures -error: aborting due to 1 previous error +error[E0121]: the placeholder `_` is not allowed within types on item signatures for opaque types + --> $DIR/issue-95307.rs:7:28 + | +LL | async fn new() -> [u8; _]; + | ^ not allowed in type signatures + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for opaque types + --> $DIR/issue-95307.rs:7:28 + | +LL | async fn new() -> [u8; _]; + | ^ not allowed in type signatures + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for opaque types + --> $DIR/issue-95307.rs:7:28 + | +LL | async fn new() -> [u8; _]; + | ^ not allowed in type signatures + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error: aborting due to 4 previous errors For more information about this error, try `rustc --explain E0121`. diff --git a/tests/ui/attributes/expected-word.rs b/tests/ui/attributes/expected-word.rs new file mode 100644 index 000000000000..246aa78db828 --- /dev/null +++ b/tests/ui/attributes/expected-word.rs @@ -0,0 +1,3 @@ +#[cold = true] +//~^ ERROR malformed `cold` attribute input [E0565] +fn main() {} diff --git a/tests/ui/attributes/expected-word.stderr b/tests/ui/attributes/expected-word.stderr new file mode 100644 index 000000000000..dcb10e7aee89 --- /dev/null +++ b/tests/ui/attributes/expected-word.stderr @@ -0,0 +1,12 @@ +error[E0565]: malformed `cold` attribute input + --> $DIR/expected-word.rs:1:1 + | +LL | #[cold = true] + | ^^^^^^^------^ + | | | + | | didn't expect any arguments here + | help: must be of the form: `#[cold]` + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0565`. diff --git a/tests/ui/attributes/lint_on_root.rs b/tests/ui/attributes/lint_on_root.rs new file mode 100644 index 000000000000..93d47bf0d714 --- /dev/null +++ b/tests/ui/attributes/lint_on_root.rs @@ -0,0 +1,7 @@ +// NOTE: this used to panic in debug builds (by a sanity assertion) +// and not emit any lint on release builds. See https://github.com/rust-lang/rust/issues/142891. +#![inline = ""] +//~^ ERROR valid forms for the attribute are `#[inline(always|never)]` and `#[inline]` +//~| WARN this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! + +fn main() {} diff --git a/tests/ui/attributes/lint_on_root.stderr b/tests/ui/attributes/lint_on_root.stderr new file mode 100644 index 000000000000..aaa46e6f54ba --- /dev/null +++ b/tests/ui/attributes/lint_on_root.stderr @@ -0,0 +1,12 @@ +error: valid forms for the attribute are `#[inline(always|never)]` and `#[inline]` + --> $DIR/lint_on_root.rs:3:1 + | +LL | #![inline = ""] + | ^^^^^^^^^^^^^^^ + | + = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! + = note: for more information, see issue #57571 + = note: `#[deny(ill_formed_attribute_input)]` on by default + +error: aborting due to 1 previous error + diff --git a/tests/ui/attributes/malformed-fn-align.rs b/tests/ui/attributes/malformed-fn-align.rs index 35ffd6d8acce..f5ab9555e561 100644 --- a/tests/ui/attributes/malformed-fn-align.rs +++ b/tests/ui/attributes/malformed-fn-align.rs @@ -3,7 +3,10 @@ trait MyTrait { #[align] //~ ERROR malformed `align` attribute input - fn myfun(); + fn myfun1(); + + #[align(1, 2)] //~ ERROR malformed `align` attribute input + fn myfun2(); } #[align = 16] //~ ERROR malformed `align` attribute input diff --git a/tests/ui/attributes/malformed-fn-align.stderr b/tests/ui/attributes/malformed-fn-align.stderr index 765255c2c3a9..b769d0b457dd 100644 --- a/tests/ui/attributes/malformed-fn-align.stderr +++ b/tests/ui/attributes/malformed-fn-align.stderr @@ -2,54 +2,55 @@ error[E0539]: malformed `align` attribute input --> $DIR/malformed-fn-align.rs:5:5 | LL | #[align] - | ^^^^^^^^ expected this to be a list + | ^^^^^^^^ + | | + | expected this to be a list + | help: must be of the form: `#[align()]` + +error[E0805]: malformed `align` attribute input + --> $DIR/malformed-fn-align.rs:8:5 | -help: try changing it to one of the following valid forms of the attribute - | -LL | #[align()] - | ++++++++++++++++++++++ +LL | #[align(1, 2)] + | ^^^^^^^------^ + | | | + | | expected a single argument here + | help: must be of the form: `#[align()]` error[E0539]: malformed `align` attribute input - --> $DIR/malformed-fn-align.rs:9:1 + --> $DIR/malformed-fn-align.rs:12:1 | LL | #[align = 16] - | ^^^^^^^^^^^^^ expected this to be a list - | -help: try changing it to one of the following valid forms of the attribute - | -LL - #[align = 16] -LL + #[align()] - | -LL - #[align = 16] -LL + #[align] - | + | ^^^^^^^^^^^^^ + | | + | expected this to be a list + | help: must be of the form: `#[align()]` error[E0589]: invalid alignment value: not an unsuffixed integer - --> $DIR/malformed-fn-align.rs:12:9 + --> $DIR/malformed-fn-align.rs:15:9 | LL | #[align("hello")] | ^^^^^^^ error[E0589]: invalid alignment value: not a power of two - --> $DIR/malformed-fn-align.rs:15:9 + --> $DIR/malformed-fn-align.rs:18:9 | LL | #[align(0)] | ^ error: `#[repr(align(...))]` is not supported on function items - --> $DIR/malformed-fn-align.rs:18:8 + --> $DIR/malformed-fn-align.rs:21:8 | LL | #[repr(align(16))] | ^^^^^^^^^ | help: use `#[align(...)]` instead - --> $DIR/malformed-fn-align.rs:18:8 + --> $DIR/malformed-fn-align.rs:21:8 | LL | #[repr(align(16))] | ^^^^^^^^^ error: `#[align(...)]` is not supported on struct items - --> $DIR/malformed-fn-align.rs:21:1 + --> $DIR/malformed-fn-align.rs:24:1 | LL | #[align(16)] | ^^^^^^^^^^^^ @@ -60,7 +61,7 @@ LL - #[align(16)] LL + #[repr(align(16))] | -error: aborting due to 6 previous errors +error: aborting due to 7 previous errors -Some errors have detailed explanations: E0539, E0589. +Some errors have detailed explanations: E0539, E0589, E0805. For more information about an error, try `rustc --explain E0539`. diff --git a/tests/ui/attributes/malformed-must_use.rs b/tests/ui/attributes/malformed-must_use.rs new file mode 100644 index 000000000000..4b98affa8abd --- /dev/null +++ b/tests/ui/attributes/malformed-must_use.rs @@ -0,0 +1,4 @@ +#[must_use()] //~ ERROR valid forms for the attribute are `#[must_use = "reason"]` and `#[must_use]` +struct Test; + +fn main() {} diff --git a/tests/ui/attributes/malformed-must_use.stderr b/tests/ui/attributes/malformed-must_use.stderr new file mode 100644 index 000000000000..c948ba677444 --- /dev/null +++ b/tests/ui/attributes/malformed-must_use.stderr @@ -0,0 +1,8 @@ +error: valid forms for the attribute are `#[must_use = "reason"]` and `#[must_use]` + --> $DIR/malformed-must_use.rs:1:1 + | +LL | #[must_use()] + | ^^^^^^^^^^^^^ + +error: aborting due to 1 previous error + diff --git a/tests/ui/attributes/mixed_export_name_and_no_mangle.fixed b/tests/ui/attributes/mixed_export_name_and_no_mangle.fixed index d8b5235c52fb..55c196f6deca 100644 --- a/tests/ui/attributes/mixed_export_name_and_no_mangle.fixed +++ b/tests/ui/attributes/mixed_export_name_and_no_mangle.fixed @@ -3,11 +3,11 @@ //@ check-pass #![warn(unused_attributes)] -//~^ WARN `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[export_name]` [unused_attributes] +//~^ WARN `#[no_mangle]` attribute may not be used in combination with `#[export_name]` [unused_attributes] #[export_name = "foo"] pub fn bar() {} -//~^ WARN `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[export_name]` [unused_attributes] +//~^ WARN `#[no_mangle]` attribute may not be used in combination with `#[export_name]` [unused_attributes] #[export_name = "baz"] pub fn bak() {} diff --git a/tests/ui/attributes/mixed_export_name_and_no_mangle.rs b/tests/ui/attributes/mixed_export_name_and_no_mangle.rs index 83a673a7d132..79f1e5c19c54 100644 --- a/tests/ui/attributes/mixed_export_name_and_no_mangle.rs +++ b/tests/ui/attributes/mixed_export_name_and_no_mangle.rs @@ -4,12 +4,12 @@ #![warn(unused_attributes)] #[no_mangle] -//~^ WARN `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[export_name]` [unused_attributes] +//~^ WARN `#[no_mangle]` attribute may not be used in combination with `#[export_name]` [unused_attributes] #[export_name = "foo"] pub fn bar() {} #[unsafe(no_mangle)] -//~^ WARN `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[export_name]` [unused_attributes] +//~^ WARN `#[no_mangle]` attribute may not be used in combination with `#[export_name]` [unused_attributes] #[export_name = "baz"] pub fn bak() {} diff --git a/tests/ui/attributes/mixed_export_name_and_no_mangle.stderr b/tests/ui/attributes/mixed_export_name_and_no_mangle.stderr index c760d27db251..1dcaa636800b 100644 --- a/tests/ui/attributes/mixed_export_name_and_no_mangle.stderr +++ b/tests/ui/attributes/mixed_export_name_and_no_mangle.stderr @@ -1,8 +1,8 @@ -warning: `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[export_name]` +warning: `#[no_mangle]` attribute may not be used in combination with `#[export_name]` --> $DIR/mixed_export_name_and_no_mangle.rs:6:1 | LL | #[no_mangle] - | ^^^^^^^^^^^^ `#[unsafe(no_mangle)]` is ignored + | ^^^^^^^^^^^^ `#[no_mangle]` is ignored | note: `#[export_name]` takes precedence --> $DIR/mixed_export_name_and_no_mangle.rs:8:1 @@ -14,23 +14,23 @@ note: the lint level is defined here | LL | #![warn(unused_attributes)] | ^^^^^^^^^^^^^^^^^ -help: remove the `#[unsafe(no_mangle)]` attribute +help: remove the `#[no_mangle]` attribute | LL - #[no_mangle] | -warning: `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[export_name]` +warning: `#[no_mangle]` attribute may not be used in combination with `#[export_name]` --> $DIR/mixed_export_name_and_no_mangle.rs:11:1 | LL | #[unsafe(no_mangle)] - | ^^^^^^^^^^^^^^^^^^^^ `#[unsafe(no_mangle)]` is ignored + | ^^^^^^^^^^^^^^^^^^^^ `#[no_mangle]` is ignored | note: `#[export_name]` takes precedence --> $DIR/mixed_export_name_and_no_mangle.rs:13:1 | LL | #[export_name = "baz"] | ^^^^^^^^^^^^^^^^^^^^^^ -help: remove the `#[unsafe(no_mangle)]` attribute +help: remove the `#[no_mangle]` attribute | LL - #[unsafe(no_mangle)] | diff --git a/tests/ui/attributes/mixed_export_name_and_no_mangle_2024.fixed b/tests/ui/attributes/mixed_export_name_and_no_mangle_2024.fixed new file mode 100644 index 000000000000..581cb200770a --- /dev/null +++ b/tests/ui/attributes/mixed_export_name_and_no_mangle_2024.fixed @@ -0,0 +1,15 @@ +// issue: rust-lang/rust#47446 +//@ run-rustfix +//@ check-pass +//@ edition:2024 + +#![warn(unused_attributes)] +//~^ WARN `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[unsafe(export_name)]` [unused_attributes] +#[unsafe(export_name = "foo")] +pub fn bar() {} + +//~^ WARN `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[unsafe(export_name)]` [unused_attributes] +#[unsafe(export_name = "baz")] +pub fn bak() {} + +fn main() {} diff --git a/tests/ui/attributes/mixed_export_name_and_no_mangle_2024.rs b/tests/ui/attributes/mixed_export_name_and_no_mangle_2024.rs new file mode 100644 index 000000000000..1e4a06132f29 --- /dev/null +++ b/tests/ui/attributes/mixed_export_name_and_no_mangle_2024.rs @@ -0,0 +1,17 @@ +// issue: rust-lang/rust#47446 +//@ run-rustfix +//@ check-pass +//@ edition:2024 + +#![warn(unused_attributes)] +#[unsafe(no_mangle)] +//~^ WARN `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[unsafe(export_name)]` [unused_attributes] +#[unsafe(export_name = "foo")] +pub fn bar() {} + +#[unsafe(no_mangle)] +//~^ WARN `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[unsafe(export_name)]` [unused_attributes] +#[unsafe(export_name = "baz")] +pub fn bak() {} + +fn main() {} diff --git a/tests/ui/attributes/mixed_export_name_and_no_mangle_2024.stderr b/tests/ui/attributes/mixed_export_name_and_no_mangle_2024.stderr new file mode 100644 index 000000000000..09804f9f92b6 --- /dev/null +++ b/tests/ui/attributes/mixed_export_name_and_no_mangle_2024.stderr @@ -0,0 +1,39 @@ +warning: `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[unsafe(export_name)]` + --> $DIR/mixed_export_name_and_no_mangle_2024.rs:7:1 + | +LL | #[unsafe(no_mangle)] + | ^^^^^^^^^^^^^^^^^^^^ `#[unsafe(no_mangle)]` is ignored + | +note: `#[unsafe(export_name)]` takes precedence + --> $DIR/mixed_export_name_and_no_mangle_2024.rs:9:1 + | +LL | #[unsafe(export_name = "foo")] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +note: the lint level is defined here + --> $DIR/mixed_export_name_and_no_mangle_2024.rs:6:9 + | +LL | #![warn(unused_attributes)] + | ^^^^^^^^^^^^^^^^^ +help: remove the `#[unsafe(no_mangle)]` attribute + | +LL - #[unsafe(no_mangle)] + | + +warning: `#[unsafe(no_mangle)]` attribute may not be used in combination with `#[unsafe(export_name)]` + --> $DIR/mixed_export_name_and_no_mangle_2024.rs:12:1 + | +LL | #[unsafe(no_mangle)] + | ^^^^^^^^^^^^^^^^^^^^ `#[unsafe(no_mangle)]` is ignored + | +note: `#[unsafe(export_name)]` takes precedence + --> $DIR/mixed_export_name_and_no_mangle_2024.rs:14:1 + | +LL | #[unsafe(export_name = "baz")] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +help: remove the `#[unsafe(no_mangle)]` attribute + | +LL - #[unsafe(no_mangle)] + | + +warning: 2 warnings emitted + diff --git a/tests/ui/attributes/rustc_skip_during_method_dispatch.rs b/tests/ui/attributes/rustc_skip_during_method_dispatch.rs new file mode 100644 index 000000000000..25b473d5a585 --- /dev/null +++ b/tests/ui/attributes/rustc_skip_during_method_dispatch.rs @@ -0,0 +1,38 @@ +#![feature(rustc_attrs)] + +#[rustc_skip_during_method_dispatch] +//~^ ERROR: malformed `rustc_skip_during_method_dispatch` attribute input [E0539] +trait NotAList {} + +#[rustc_skip_during_method_dispatch = "array"] +//~^ ERROR: malformed `rustc_skip_during_method_dispatch` attribute input [E0539] +trait AlsoNotAList {} + +#[rustc_skip_during_method_dispatch()] +//~^ ERROR: malformed `rustc_skip_during_method_dispatch` attribute input +trait Argless {} + +#[rustc_skip_during_method_dispatch(array, boxed_slice, array)] +//~^ ERROR: malformed `rustc_skip_during_method_dispatch` attribute input +trait Duplicate {} + +#[rustc_skip_during_method_dispatch(slice)] +//~^ ERROR: malformed `rustc_skip_during_method_dispatch` attribute input +trait Unexpected {} + +#[rustc_skip_during_method_dispatch(array = true)] +//~^ ERROR: malformed `rustc_skip_during_method_dispatch` attribute input +trait KeyValue {} + +#[rustc_skip_during_method_dispatch("array")] +//~^ ERROR: malformed `rustc_skip_during_method_dispatch` attribute input +trait String {} + +#[rustc_skip_during_method_dispatch(array, boxed_slice)] +trait OK {} + +#[rustc_skip_during_method_dispatch(array)] +//~^ ERROR: attribute should be applied to a trait +impl OK for () {} + +fn main() {} diff --git a/tests/ui/attributes/rustc_skip_during_method_dispatch.stderr b/tests/ui/attributes/rustc_skip_during_method_dispatch.stderr new file mode 100644 index 000000000000..2f5d79684899 --- /dev/null +++ b/tests/ui/attributes/rustc_skip_during_method_dispatch.stderr @@ -0,0 +1,76 @@ +error[E0539]: malformed `rustc_skip_during_method_dispatch` attribute input + --> $DIR/rustc_skip_during_method_dispatch.rs:3:1 + | +LL | #[rustc_skip_during_method_dispatch] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | | + | expected this to be a list + | help: must be of the form: `#[rustc_skip_during_method_dispatch(array, boxed_slice)]` + +error[E0539]: malformed `rustc_skip_during_method_dispatch` attribute input + --> $DIR/rustc_skip_during_method_dispatch.rs:7:1 + | +LL | #[rustc_skip_during_method_dispatch = "array"] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | | + | expected this to be a list + | help: must be of the form: `#[rustc_skip_during_method_dispatch(array, boxed_slice)]` + +error[E0539]: malformed `rustc_skip_during_method_dispatch` attribute input + --> $DIR/rustc_skip_during_method_dispatch.rs:11:1 + | +LL | #[rustc_skip_during_method_dispatch()] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--^ + | | | + | | expected at least 1 argument here + | help: must be of the form: `#[rustc_skip_during_method_dispatch(array, boxed_slice)]` + +error[E0538]: malformed `rustc_skip_during_method_dispatch` attribute input + --> $DIR/rustc_skip_during_method_dispatch.rs:15:1 + | +LL | #[rustc_skip_during_method_dispatch(array, boxed_slice, array)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----^^ + | | | + | | found `array` used as a key more than once + | help: must be of the form: `#[rustc_skip_during_method_dispatch(array, boxed_slice)]` + +error[E0539]: malformed `rustc_skip_during_method_dispatch` attribute input + --> $DIR/rustc_skip_during_method_dispatch.rs:19:1 + | +LL | #[rustc_skip_during_method_dispatch(slice)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----^^ + | | | + | | valid arguments are `array` or `boxed_slice` + | help: must be of the form: `#[rustc_skip_during_method_dispatch(array, boxed_slice)]` + +error[E0565]: malformed `rustc_skip_during_method_dispatch` attribute input + --> $DIR/rustc_skip_during_method_dispatch.rs:23:1 + | +LL | #[rustc_skip_during_method_dispatch(array = true)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^------^^ + | | | + | | didn't expect any arguments here + | help: must be of the form: `#[rustc_skip_during_method_dispatch(array, boxed_slice)]` + +error[E0565]: malformed `rustc_skip_during_method_dispatch` attribute input + --> $DIR/rustc_skip_during_method_dispatch.rs:27:1 + | +LL | #[rustc_skip_during_method_dispatch("array")] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------^^ + | | | + | | didn't expect a literal here + | help: must be of the form: `#[rustc_skip_during_method_dispatch(array, boxed_slice)]` + +error: attribute should be applied to a trait + --> $DIR/rustc_skip_during_method_dispatch.rs:34:1 + | +LL | #[rustc_skip_during_method_dispatch(array)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | +LL | impl OK for () {} + | ----------------- not a trait + +error: aborting due to 8 previous errors + +Some errors have detailed explanations: E0538, E0539, E0565. +For more information about an error, try `rustc --explain E0538`. diff --git a/tests/ui/attributes/used_with_arg.rs b/tests/ui/attributes/used_with_arg.rs index ad80ff53f0ef..bc7a6f07442b 100644 --- a/tests/ui/attributes/used_with_arg.rs +++ b/tests/ui/attributes/used_with_arg.rs @@ -1,3 +1,4 @@ +#![deny(unused_attributes)] #![feature(used_with_arg)] #[used(linker)] @@ -6,14 +7,22 @@ static mut USED_LINKER: [usize; 1] = [0]; #[used(compiler)] static mut USED_COMPILER: [usize; 1] = [0]; -#[used(compiler)] //~ ERROR `used(compiler)` and `used(linker)` can't be used together +#[used(compiler)] #[used(linker)] static mut USED_COMPILER_LINKER2: [usize; 1] = [0]; -#[used(compiler)] //~ ERROR `used(compiler)` and `used(linker)` can't be used together -#[used(linker)] #[used(compiler)] #[used(linker)] +#[used(compiler)] //~ ERROR unused attribute +#[used(linker)] //~ ERROR unused attribute static mut USED_COMPILER_LINKER3: [usize; 1] = [0]; +#[used(compiler)] +#[used] +static mut USED_WITHOUT_ATTR1: [usize; 1] = [0]; + +#[used(linker)] +#[used] //~ ERROR unused attribute +static mut USED_WITHOUT_ATTR2: [usize; 1] = [0]; + fn main() {} diff --git a/tests/ui/attributes/used_with_arg.stderr b/tests/ui/attributes/used_with_arg.stderr index 440e5c4a5a02..9ff91a4e03b3 100644 --- a/tests/ui/attributes/used_with_arg.stderr +++ b/tests/ui/attributes/used_with_arg.stderr @@ -1,18 +1,43 @@ -error: `used(compiler)` and `used(linker)` can't be used together - --> $DIR/used_with_arg.rs:9:1 +error: unused attribute + --> $DIR/used_with_arg.rs:16:1 + | +LL | #[used(compiler)] + | ^^^^^^^^^^^^^^^^^ help: remove this attribute + | +note: attribute also specified here + --> $DIR/used_with_arg.rs:14:1 | LL | #[used(compiler)] | ^^^^^^^^^^^^^^^^^ -LL | #[used(linker)] - | ^^^^^^^^^^^^^^^ - -error: `used(compiler)` and `used(linker)` can't be used together - --> $DIR/used_with_arg.rs:13:1 +note: the lint level is defined here + --> $DIR/used_with_arg.rs:1:9 + | +LL | #![deny(unused_attributes)] + | ^^^^^^^^^^^^^^^^^ + +error: unused attribute + --> $DIR/used_with_arg.rs:17:1 + | +LL | #[used(linker)] + | ^^^^^^^^^^^^^^^ help: remove this attribute + | +note: attribute also specified here + --> $DIR/used_with_arg.rs:15:1 | -LL | #[used(compiler)] - | ^^^^^^^^^^^^^^^^^ LL | #[used(linker)] | ^^^^^^^^^^^^^^^ -error: aborting due to 2 previous errors +error: unused attribute + --> $DIR/used_with_arg.rs:25:1 + | +LL | #[used] + | ^^^^^^^ help: remove this attribute + | +note: attribute also specified here + --> $DIR/used_with_arg.rs:24:1 + | +LL | #[used(linker)] + | ^^^^^^^^^^^^^^^ + +error: aborting due to 3 previous errors diff --git a/tests/ui/attributes/used_with_multi_args.rs b/tests/ui/attributes/used_with_multi_args.rs index d3109cc64442..1c054f792eb9 100644 --- a/tests/ui/attributes/used_with_multi_args.rs +++ b/tests/ui/attributes/used_with_multi_args.rs @@ -1,6 +1,6 @@ #![feature(used_with_arg)] -#[used(compiler, linker)] //~ ERROR expected `used`, `used(compiler)` or `used(linker)` +#[used(compiler, linker)] //~ ERROR malformed `used` attribute input static mut USED_COMPILER_LINKER: [usize; 1] = [0]; fn main() {} diff --git a/tests/ui/attributes/used_with_multi_args.stderr b/tests/ui/attributes/used_with_multi_args.stderr index d4417a202d5f..e48209cf2042 100644 --- a/tests/ui/attributes/used_with_multi_args.stderr +++ b/tests/ui/attributes/used_with_multi_args.stderr @@ -1,8 +1,20 @@ -error: expected `used`, `used(compiler)` or `used(linker)` +error[E0805]: malformed `used` attribute input --> $DIR/used_with_multi_args.rs:3:1 | LL | #[used(compiler, linker)] - | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^------------------^ + | | + | expected a single argument here + | +help: try changing it to one of the following valid forms of the attribute + | +LL - #[used(compiler, linker)] +LL + #[used(compiler|linker)] + | +LL - #[used(compiler, linker)] +LL + #[used] + | error: aborting due to 1 previous error +For more information about this error, try `rustc --explain E0805`. diff --git a/tests/ui/binop/issue-77910-1.stderr b/tests/ui/binop/issue-77910-1.stderr index 74deac900d42..80c384f39bd1 100644 --- a/tests/ui/binop/issue-77910-1.stderr +++ b/tests/ui/binop/issue-77910-1.stderr @@ -16,9 +16,8 @@ LL | fn foo(s: &i32) -> &i32 { | --- consider calling this function ... LL | assert_eq!(foo, y); - | ^^^^^^^^^^^^^^^^^^ `for<'a> fn(&'a i32) -> &'a i32 {foo}` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for fn item `for<'a> fn(&'a i32) -> &'a i32 {foo}` | - = help: the trait `Debug` is not implemented for fn item `for<'a> fn(&'a i32) -> &'a i32 {foo}` = help: use parentheses to call this function: `foo(/* &i32 */)` = note: this error originates in the macro `assert_eq` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/tests/ui/borrowck/borrowck-move-by-capture.stderr b/tests/ui/borrowck/borrowck-move-by-capture.stderr index 9915acfe0653..58d5e90e990a 100644 --- a/tests/ui/borrowck/borrowck-move-by-capture.stderr +++ b/tests/ui/borrowck/borrowck-move-by-capture.stderr @@ -12,7 +12,7 @@ LL | let _h = to_fn_once(move || -> isize { *bar }); | | move occurs because `bar` has type `Box`, which does not implement the `Copy` trait | `bar` is moved here | -help: clone the value before moving it into the closure +help: consider cloning the value before moving it into the closure | LL ~ let value = bar.clone(); LL ~ let _h = to_fn_once(move || -> isize { value }); diff --git a/tests/ui/borrowck/borrowck-move-moved-value-into-closure.stderr b/tests/ui/borrowck/borrowck-move-moved-value-into-closure.stderr index 6a77d86f250a..5ddc6a6d82d8 100644 --- a/tests/ui/borrowck/borrowck-move-moved-value-into-closure.stderr +++ b/tests/ui/borrowck/borrowck-move-moved-value-into-closure.stderr @@ -12,6 +12,12 @@ LL | call_f(move|| { *t + 1 }); | ^^^^^^ -- use occurs due to use in closure | | | value used here after move + | +help: consider cloning the value before moving it into the closure + | +LL ~ let value = t.clone(); +LL ~ call_f(move|| { value + 1 }); + | error: aborting due to 1 previous error diff --git a/tests/ui/borrowck/generic_const_early_param.stderr b/tests/ui/borrowck/generic_const_early_param.stderr index 3f56d6a33251..6447f92aba85 100644 --- a/tests/ui/borrowck/generic_const_early_param.stderr +++ b/tests/ui/borrowck/generic_const_early_param.stderr @@ -7,19 +7,24 @@ LL | struct DataWrapper<'static> { error[E0261]: use of undeclared lifetime name `'a` --> $DIR/generic_const_early_param.rs:6:12 | -LL | struct DataWrapper<'static> { - | - help: consider introducing lifetime `'a` here: `'a,` -LL | LL | data: &'a [u8; Self::SIZE], | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | struct DataWrapper<'a, 'static> { + | +++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/generic_const_early_param.rs:10:18 | LL | impl DataWrapper<'a> { - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | impl<'a> DataWrapper<'a> { + | ++++ warning: the feature `generic_const_exprs` is incomplete and may not be safe to use and/or cause compiler crashes --> $DIR/generic_const_early_param.rs:1:12 diff --git a/tests/ui/cast/ice-cast-type-with-error-124848.stderr b/tests/ui/cast/ice-cast-type-with-error-124848.stderr index 0b2ab1dfc4c1..316a484d9715 100644 --- a/tests/ui/cast/ice-cast-type-with-error-124848.stderr +++ b/tests/ui/cast/ice-cast-type-with-error-124848.stderr @@ -2,27 +2,34 @@ error[E0261]: use of undeclared lifetime name `'unpinned` --> $DIR/ice-cast-type-with-error-124848.rs:7:32 | LL | struct MyType<'a>(Cell>>, Pin); - | - ^^^^^^^^^ undeclared lifetime - | | - | help: consider introducing lifetime `'unpinned` here: `'unpinned,` + | ^^^^^^^^^ undeclared lifetime + | +help: consider introducing lifetime `'unpinned` here + | +LL | struct MyType<'unpinned, 'a>(Cell>>, Pin); + | ++++++++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/ice-cast-type-with-error-124848.rs:14:53 | -LL | fn main() { - | - help: consider introducing lifetime `'a` here: `<'a>` -... LL | let bad_addr = &unpinned as *const Cell>> as usize; | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn main<'a>() { + | ++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/ice-cast-type-with-error-124848.rs:14:67 | -LL | fn main() { - | - help: consider introducing lifetime `'a` here: `<'a>` -... LL | let bad_addr = &unpinned as *const Cell>> as usize; | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn main<'a>() { + | ++++ error[E0412]: cannot find type `Pin` in this scope --> $DIR/ice-cast-type-with-error-124848.rs:7:60 diff --git a/tests/ui/check-cfg/target_feature.stderr b/tests/ui/check-cfg/target_feature.stderr index f29a41d6a8e2..f422919983b7 100644 --- a/tests/ui/check-cfg/target_feature.stderr +++ b/tests/ui/check-cfg/target_feature.stderr @@ -211,10 +211,6 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE"); `reference-types` `relax` `relaxed-simd` -`reserve-x18` -`retpoline-external-thunk` -`retpoline-indirect-branches` -`retpoline-indirect-calls` `rtm` `sb` `scq` diff --git a/tests/ui/closures/issue-111932.stderr b/tests/ui/closures/issue-111932.stderr index 93488ad2011e..fc3b7b0c6e66 100644 --- a/tests/ui/closures/issue-111932.stderr +++ b/tests/ui/closures/issue-111932.stderr @@ -14,11 +14,9 @@ error[E0277]: the size for values of type `dyn Foo` cannot be known at compilati LL | println!("{:?}", foo); | ---- ^^^ doesn't have a size known at compile-time | | - | required by a bound introduced by this call + | required by this formatting parameter | = help: the trait `Sized` is not implemented for `dyn Foo` -note: required by an implicit `Sized` bound in `core::fmt::rt::Argument::<'_>::new_debug` - --> $SRC_DIR/core/src/fmt/rt.rs:LL:COL = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) error: aborting due to 2 previous errors diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/callback-as-argument.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/callback-as-argument.rs index b25a81b858be..796c2634b623 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/callback-as-argument.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/callback-as-argument.rs @@ -2,15 +2,15 @@ //@ build-pass //@ compile-flags: --target thumbv8m.main-none-eabi --crate-type lib //@ needs-llvm-components: arm -#![feature(abi_c_cmse_nonsecure_call, cmse_nonsecure_entry, no_core, lang_items, intrinsics)] +#![feature(abi_cmse_nonsecure_call, cmse_nonsecure_entry, no_core, lang_items, intrinsics)] #![no_core] extern crate minicore; use minicore::*; #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn test( - f: extern "C-cmse-nonsecure-call" fn(u32, u32, u32, u32) -> u32, +pub extern "cmse-nonsecure-entry" fn test( + f: extern "cmse-nonsecure-call" fn(u32, u32, u32, u32) -> u32, a: u32, b: u32, c: u32, diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/gate_test.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/gate_test.rs index 2d0ed5d2a307..cb805309a02d 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/gate_test.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/gate_test.rs @@ -1,9 +1,9 @@ -// gate-test-abi_c_cmse_nonsecure_call -#[allow(unsupported_fn_ptr_calling_conventions)] +// gate-test-abi_cmse_nonsecure_call fn main() { let non_secure_function = unsafe { - core::mem::transmute:: i32>( - //~^ ERROR [E0658] + core::mem::transmute:: i32>( + //~^ ERROR: is not a supported ABI for the current target [E0570] + //~| ERROR: ABI is experimental and subject to change [E0658] 0x10000004, ) }; diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/gate_test.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/gate_test.stderr index beb0ab70cc7e..ecf70e890f4c 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/gate_test.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/gate_test.stderr @@ -1,23 +1,20 @@ -error[E0658]: the extern "C-cmse-nonsecure-call" ABI is experimental and subject to change - --> $DIR/gate_test.rs:5:46 +error[E0570]: "cmse-nonsecure-call" is not a supported ABI for the current target + --> $DIR/gate_test.rs:4:46 | -LL | core::mem::transmute:: i32>( - | ^^^^^^^^^^^^^^^^^^^^^^^ +LL | core::mem::transmute:: i32>( + | ^^^^^^^^^^^^^^^^^^^^^ + +error[E0658]: the extern "cmse-nonsecure-call" ABI is experimental and subject to change + --> $DIR/gate_test.rs:4:46 + | +LL | core::mem::transmute:: i32>( + | ^^^^^^^^^^^^^^^^^^^^^ | = note: see issue #81391 for more information - = help: add `#![feature(abi_c_cmse_nonsecure_call)]` to the crate attributes to enable + = help: add `#![feature(abi_cmse_nonsecure_call)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date -error: aborting due to 1 previous error - -For more information about this error, try `rustc --explain E0658`. -Future incompatibility report: Future breakage diagnostic: -warning: the calling convention "C-cmse-nonsecure-call" is not supported on this target - --> $DIR/gate_test.rs:5:39 - | -LL | core::mem::transmute:: i32>( - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 +error: aborting due to 2 previous errors +Some errors have detailed explanations: E0570, E0658. +For more information about an error, try `rustc --explain E0570`. diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/generics.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/generics.rs index 84080890e080..4ce5890a2da3 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/generics.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/generics.rs @@ -1,7 +1,7 @@ //@ add-core-stubs //@ compile-flags: --target thumbv8m.main-none-eabi --crate-type lib //@ needs-llvm-components: arm -#![feature(abi_c_cmse_nonsecure_call, no_core, lang_items)] +#![feature(abi_cmse_nonsecure_call, no_core, lang_items)] #![no_core] extern crate minicore; @@ -11,31 +11,31 @@ use minicore::*; struct Wrapper(T); struct Test { - f1: extern "C-cmse-nonsecure-call" fn(U, u32, u32, u32) -> u64, + f1: extern "cmse-nonsecure-call" fn(U, u32, u32, u32) -> u64, //~^ ERROR cannot find type `U` in this scope //~| ERROR function pointer types may not have generic parameters - f2: extern "C-cmse-nonsecure-call" fn(impl Copy, u32, u32, u32) -> u64, + f2: extern "cmse-nonsecure-call" fn(impl Copy, u32, u32, u32) -> u64, //~^ ERROR `impl Trait` is not allowed in `fn` pointer parameters - f3: extern "C-cmse-nonsecure-call" fn(T, u32, u32, u32) -> u64, //~ ERROR [E0798] - f4: extern "C-cmse-nonsecure-call" fn(Wrapper, u32, u32, u32) -> u64, //~ ERROR [E0798] + f3: extern "cmse-nonsecure-call" fn(T, u32, u32, u32) -> u64, //~ ERROR [E0798] + f4: extern "cmse-nonsecure-call" fn(Wrapper, u32, u32, u32) -> u64, //~ ERROR [E0798] } -type WithReference = extern "C-cmse-nonsecure-call" fn(&usize); +type WithReference = extern "cmse-nonsecure-call" fn(&usize); trait Trait {} -type WithTraitObject = extern "C-cmse-nonsecure-call" fn(&dyn Trait) -> &dyn Trait; -//~^ ERROR return value of `"C-cmse-nonsecure-call"` function too large to pass via registers [E0798] +type WithTraitObject = extern "cmse-nonsecure-call" fn(&dyn Trait) -> &dyn Trait; +//~^ ERROR return value of `"cmse-nonsecure-call"` function too large to pass via registers [E0798] type WithStaticTraitObject = - extern "C-cmse-nonsecure-call" fn(&'static dyn Trait) -> &'static dyn Trait; -//~^ ERROR return value of `"C-cmse-nonsecure-call"` function too large to pass via registers [E0798] + extern "cmse-nonsecure-call" fn(&'static dyn Trait) -> &'static dyn Trait; +//~^ ERROR return value of `"cmse-nonsecure-call"` function too large to pass via registers [E0798] #[repr(transparent)] struct WrapperTransparent<'a>(&'a dyn Trait); type WithTransparentTraitObject = - extern "C-cmse-nonsecure-call" fn(WrapperTransparent) -> WrapperTransparent; -//~^ ERROR return value of `"C-cmse-nonsecure-call"` function too large to pass via registers [E0798] + extern "cmse-nonsecure-call" fn(WrapperTransparent) -> WrapperTransparent; +//~^ ERROR return value of `"cmse-nonsecure-call"` function too large to pass via registers [E0798] -type WithVarArgs = extern "C-cmse-nonsecure-call" fn(u32, ...); -//~^ ERROR C-variadic functions with the "C-cmse-nonsecure-call" calling convention are not supported +type WithVarArgs = extern "cmse-nonsecure-call" fn(u32, ...); +//~^ ERROR C-variadic functions with the "cmse-nonsecure-call" calling convention are not supported diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/generics.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/generics.stderr index 2b51f48915b3..156568535763 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/generics.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/generics.stderr @@ -1,21 +1,21 @@ error: function pointer types may not have generic parameters - --> $DIR/generics.rs:14:42 + --> $DIR/generics.rs:14:40 | -LL | f1: extern "C-cmse-nonsecure-call" fn(U, u32, u32, u32) -> u64, - | ^^^^^^^^^ +LL | f1: extern "cmse-nonsecure-call" fn(U, u32, u32, u32) -> u64, + | ^^^^^^^^^ error[E0412]: cannot find type `U` in this scope - --> $DIR/generics.rs:14:52 + --> $DIR/generics.rs:14:50 | LL | struct Test { | - similarly named type parameter `T` defined here -LL | f1: extern "C-cmse-nonsecure-call" fn(U, u32, u32, u32) -> u64, - | ^ +LL | f1: extern "cmse-nonsecure-call" fn(U, u32, u32, u32) -> u64, + | ^ | help: a type parameter with a similar name exists | -LL - f1: extern "C-cmse-nonsecure-call" fn(U, u32, u32, u32) -> u64, -LL + f1: extern "C-cmse-nonsecure-call" fn(T, u32, u32, u32) -> u64, +LL - f1: extern "cmse-nonsecure-call" fn(U, u32, u32, u32) -> u64, +LL + f1: extern "cmse-nonsecure-call" fn(T, u32, u32, u32) -> u64, | help: you might be missing a type parameter | @@ -23,57 +23,57 @@ LL | struct Test { | +++ error[E0562]: `impl Trait` is not allowed in `fn` pointer parameters - --> $DIR/generics.rs:17:43 + --> $DIR/generics.rs:17:41 | -LL | f2: extern "C-cmse-nonsecure-call" fn(impl Copy, u32, u32, u32) -> u64, - | ^^^^^^^^^ +LL | f2: extern "cmse-nonsecure-call" fn(impl Copy, u32, u32, u32) -> u64, + | ^^^^^^^^^ | = note: `impl Trait` is only allowed in arguments and return types of functions and methods -error[E0798]: function pointers with the `"C-cmse-nonsecure-call"` ABI cannot contain generics in their type +error[E0798]: function pointers with the `"cmse-nonsecure-call"` ABI cannot contain generics in their type --> $DIR/generics.rs:19:9 | -LL | f3: extern "C-cmse-nonsecure-call" fn(T, u32, u32, u32) -> u64, - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | f3: extern "cmse-nonsecure-call" fn(T, u32, u32, u32) -> u64, + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -error[E0798]: function pointers with the `"C-cmse-nonsecure-call"` ABI cannot contain generics in their type +error[E0798]: function pointers with the `"cmse-nonsecure-call"` ABI cannot contain generics in their type --> $DIR/generics.rs:20:9 | -LL | f4: extern "C-cmse-nonsecure-call" fn(Wrapper, u32, u32, u32) -> u64, - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | f4: extern "cmse-nonsecure-call" fn(Wrapper, u32, u32, u32) -> u64, + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/generics.rs:26:73 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/generics.rs:26:71 | -LL | type WithTraitObject = extern "C-cmse-nonsecure-call" fn(&dyn Trait) -> &dyn Trait; - | ^^^^^^^^^^ this type doesn't fit in the available registers +LL | type WithTraitObject = extern "cmse-nonsecure-call" fn(&dyn Trait) -> &dyn Trait; + | ^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/generics.rs:30:62 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/generics.rs:30:60 | -LL | extern "C-cmse-nonsecure-call" fn(&'static dyn Trait) -> &'static dyn Trait; - | ^^^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers +LL | extern "cmse-nonsecure-call" fn(&'static dyn Trait) -> &'static dyn Trait; + | ^^^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/generics.rs:37:62 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/generics.rs:37:60 | -LL | extern "C-cmse-nonsecure-call" fn(WrapperTransparent) -> WrapperTransparent; - | ^^^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers +LL | extern "cmse-nonsecure-call" fn(WrapperTransparent) -> WrapperTransparent; + | ^^^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0045]: C-variadic functions with the "C-cmse-nonsecure-call" calling convention are not supported +error[E0045]: C-variadic functions with the "cmse-nonsecure-call" calling convention are not supported --> $DIR/generics.rs:40:20 | -LL | type WithVarArgs = extern "C-cmse-nonsecure-call" fn(u32, ...); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ C-variadic function must have a compatible calling convention +LL | type WithVarArgs = extern "cmse-nonsecure-call" fn(u32, ...); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ C-variadic function must have a compatible calling convention error: aborting due to 9 previous errors diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/params-via-stack.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/params-via-stack.rs index 8328f9b6dd55..7036cd367e40 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/params-via-stack.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/params-via-stack.rs @@ -1,7 +1,7 @@ //@ add-core-stubs //@ compile-flags: --target thumbv8m.main-none-eabi --crate-type lib //@ needs-llvm-components: arm -#![feature(abi_c_cmse_nonsecure_call, no_core, lang_items)] +#![feature(abi_cmse_nonsecure_call, no_core, lang_items)] #![no_core] extern crate minicore; @@ -13,10 +13,10 @@ pub struct AlignRelevant(u32); #[no_mangle] pub fn test( - f1: extern "C-cmse-nonsecure-call" fn(u32, u32, u32, u32, x: u32, y: u32), //~ ERROR [E0798] - f2: extern "C-cmse-nonsecure-call" fn(u32, u32, u32, u16, u16), //~ ERROR [E0798] - f3: extern "C-cmse-nonsecure-call" fn(u32, u64, u32), //~ ERROR [E0798] - f4: extern "C-cmse-nonsecure-call" fn(AlignRelevant, u32), //~ ERROR [E0798] - f5: extern "C-cmse-nonsecure-call" fn([u32; 5]), //~ ERROR [E0798] + f1: extern "cmse-nonsecure-call" fn(u32, u32, u32, u32, x: u32, y: u32), //~ ERROR [E0798] + f2: extern "cmse-nonsecure-call" fn(u32, u32, u32, u16, u16), //~ ERROR [E0798] + f3: extern "cmse-nonsecure-call" fn(u32, u64, u32), //~ ERROR [E0798] + f4: extern "cmse-nonsecure-call" fn(AlignRelevant, u32), //~ ERROR [E0798] + f5: extern "cmse-nonsecure-call" fn([u32; 5]), //~ ERROR [E0798] ) { } diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/params-via-stack.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/params-via-stack.stderr index 10a5e8561075..5d59405fbd1b 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/params-via-stack.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/params-via-stack.stderr @@ -1,42 +1,42 @@ -error[E0798]: arguments for `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/params-via-stack.rs:16:63 +error[E0798]: arguments for `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/params-via-stack.rs:16:61 | -LL | f1: extern "C-cmse-nonsecure-call" fn(u32, u32, u32, u32, x: u32, y: u32), - | ^^^^^^^^^^^^^^ these arguments don't fit in the available registers +LL | f1: extern "cmse-nonsecure-call" fn(u32, u32, u32, u32, x: u32, y: u32), + | ^^^^^^^^^^^^^^ these arguments don't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers -error[E0798]: arguments for `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/params-via-stack.rs:17:63 +error[E0798]: arguments for `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/params-via-stack.rs:17:61 | -LL | f2: extern "C-cmse-nonsecure-call" fn(u32, u32, u32, u16, u16), - | ^^^ this argument doesn't fit in the available registers +LL | f2: extern "cmse-nonsecure-call" fn(u32, u32, u32, u16, u16), + | ^^^ this argument doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers -error[E0798]: arguments for `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/params-via-stack.rs:18:53 +error[E0798]: arguments for `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/params-via-stack.rs:18:51 | -LL | f3: extern "C-cmse-nonsecure-call" fn(u32, u64, u32), - | ^^^ this argument doesn't fit in the available registers +LL | f3: extern "cmse-nonsecure-call" fn(u32, u64, u32), + | ^^^ this argument doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers -error[E0798]: arguments for `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/params-via-stack.rs:19:58 +error[E0798]: arguments for `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/params-via-stack.rs:19:56 | -LL | f4: extern "C-cmse-nonsecure-call" fn(AlignRelevant, u32), - | ^^^ this argument doesn't fit in the available registers +LL | f4: extern "cmse-nonsecure-call" fn(AlignRelevant, u32), + | ^^^ this argument doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers -error[E0798]: arguments for `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/params-via-stack.rs:20:43 +error[E0798]: arguments for `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/params-via-stack.rs:20:41 | -LL | f5: extern "C-cmse-nonsecure-call" fn([u32; 5]), - | ^^^^^^^^ this argument doesn't fit in the available registers +LL | f5: extern "cmse-nonsecure-call" fn([u32; 5]), + | ^^^^^^^^ this argument doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass all their arguments via the 4 32-bit available argument registers error: aborting due to 5 previous errors diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/return-via-stack.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/return-via-stack.rs index 890ec4b00f6a..77347b04ede8 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/return-via-stack.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/return-via-stack.rs @@ -3,7 +3,7 @@ //@ needs-llvm-components: arm //@ add-core-stubs -#![feature(abi_c_cmse_nonsecure_call, no_core, lang_items)] +#![feature(abi_cmse_nonsecure_call, no_core, lang_items)] #![no_core] extern crate minicore; @@ -23,18 +23,18 @@ pub struct ReprCAlign16(u16); #[no_mangle] pub fn test( - f1: extern "C-cmse-nonsecure-call" fn() -> ReprCU64, //~ ERROR [E0798] - f2: extern "C-cmse-nonsecure-call" fn() -> ReprCBytes, //~ ERROR [E0798] - f3: extern "C-cmse-nonsecure-call" fn() -> U64Compound, //~ ERROR [E0798] - f4: extern "C-cmse-nonsecure-call" fn() -> ReprCAlign16, //~ ERROR [E0798] - f5: extern "C-cmse-nonsecure-call" fn() -> [u8; 5], //~ ERROR [E0798] + f1: extern "cmse-nonsecure-call" fn() -> ReprCU64, //~ ERROR [E0798] + f2: extern "cmse-nonsecure-call" fn() -> ReprCBytes, //~ ERROR [E0798] + f3: extern "cmse-nonsecure-call" fn() -> U64Compound, //~ ERROR [E0798] + f4: extern "cmse-nonsecure-call" fn() -> ReprCAlign16, //~ ERROR [E0798] + f5: extern "cmse-nonsecure-call" fn() -> [u8; 5], //~ ERROR [E0798] ) { } #[allow(improper_ctypes_definitions)] struct Test { - u128: extern "C-cmse-nonsecure-call" fn() -> u128, //~ ERROR [E0798] - i128: extern "C-cmse-nonsecure-call" fn() -> i128, //~ ERROR [E0798] + u128: extern "cmse-nonsecure-call" fn() -> u128, //~ ERROR [E0798] + i128: extern "cmse-nonsecure-call" fn() -> i128, //~ ERROR [E0798] } #[repr(C)] @@ -49,7 +49,7 @@ pub union ReprRustUnionU64 { #[no_mangle] pub fn test_union( - f1: extern "C-cmse-nonsecure-call" fn() -> ReprRustUnionU64, //~ ERROR [E0798] - f2: extern "C-cmse-nonsecure-call" fn() -> ReprCUnionU64, //~ ERROR [E0798] + f1: extern "cmse-nonsecure-call" fn() -> ReprRustUnionU64, //~ ERROR [E0798] + f2: extern "cmse-nonsecure-call" fn() -> ReprCUnionU64, //~ ERROR [E0798] ) { } diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/return-via-stack.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/return-via-stack.stderr index d2077352900a..ddf969c1bce1 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/return-via-stack.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/return-via-stack.stderr @@ -1,82 +1,82 @@ -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/return-via-stack.rs:36:50 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/return-via-stack.rs:36:48 | -LL | u128: extern "C-cmse-nonsecure-call" fn() -> u128, - | ^^^^ this type doesn't fit in the available registers +LL | u128: extern "cmse-nonsecure-call" fn() -> u128, + | ^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/return-via-stack.rs:37:50 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/return-via-stack.rs:37:48 | -LL | i128: extern "C-cmse-nonsecure-call" fn() -> i128, - | ^^^^ this type doesn't fit in the available registers +LL | i128: extern "cmse-nonsecure-call" fn() -> i128, + | ^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/return-via-stack.rs:26:48 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/return-via-stack.rs:26:46 | -LL | f1: extern "C-cmse-nonsecure-call" fn() -> ReprCU64, - | ^^^^^^^^ this type doesn't fit in the available registers +LL | f1: extern "cmse-nonsecure-call" fn() -> ReprCU64, + | ^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/return-via-stack.rs:27:48 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/return-via-stack.rs:27:46 | -LL | f2: extern "C-cmse-nonsecure-call" fn() -> ReprCBytes, - | ^^^^^^^^^^ this type doesn't fit in the available registers +LL | f2: extern "cmse-nonsecure-call" fn() -> ReprCBytes, + | ^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/return-via-stack.rs:28:48 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/return-via-stack.rs:28:46 | -LL | f3: extern "C-cmse-nonsecure-call" fn() -> U64Compound, - | ^^^^^^^^^^^ this type doesn't fit in the available registers +LL | f3: extern "cmse-nonsecure-call" fn() -> U64Compound, + | ^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/return-via-stack.rs:29:48 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/return-via-stack.rs:29:46 | -LL | f4: extern "C-cmse-nonsecure-call" fn() -> ReprCAlign16, - | ^^^^^^^^^^^^ this type doesn't fit in the available registers +LL | f4: extern "cmse-nonsecure-call" fn() -> ReprCAlign16, + | ^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/return-via-stack.rs:30:48 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/return-via-stack.rs:30:46 | -LL | f5: extern "C-cmse-nonsecure-call" fn() -> [u8; 5], - | ^^^^^^^ this type doesn't fit in the available registers +LL | f5: extern "cmse-nonsecure-call" fn() -> [u8; 5], + | ^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/return-via-stack.rs:52:48 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/return-via-stack.rs:52:46 | -LL | f1: extern "C-cmse-nonsecure-call" fn() -> ReprRustUnionU64, - | ^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers +LL | f1: extern "cmse-nonsecure-call" fn() -> ReprRustUnionU64, + | ^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-call"` function too large to pass via registers - --> $DIR/return-via-stack.rs:53:48 +error[E0798]: return value of `"cmse-nonsecure-call"` function too large to pass via registers + --> $DIR/return-via-stack.rs:53:46 | -LL | f2: extern "C-cmse-nonsecure-call" fn() -> ReprCUnionU64, - | ^^^^^^^^^^^^^ this type doesn't fit in the available registers +LL | f2: extern "cmse-nonsecure-call" fn() -> ReprCUnionU64, + | ^^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-call"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-call"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size error: aborting due to 9 previous errors diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/via-registers.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/via-registers.rs index 7dfe6cf9672a..419d26875bcd 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/via-registers.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/via-registers.rs @@ -2,7 +2,7 @@ //@ build-pass //@ compile-flags: --target thumbv8m.main-none-eabi --crate-type lib //@ needs-llvm-components: arm -#![feature(abi_c_cmse_nonsecure_call, no_core, lang_items, intrinsics)] +#![feature(abi_cmse_nonsecure_call, no_core, lang_items, intrinsics)] #![no_core] extern crate minicore; @@ -27,26 +27,26 @@ pub struct U32Compound(u16, u16); #[no_mangle] #[allow(improper_ctypes_definitions)] pub fn params( - f1: extern "C-cmse-nonsecure-call" fn(), - f2: extern "C-cmse-nonsecure-call" fn(u32, u32, u32, u32), - f3: extern "C-cmse-nonsecure-call" fn(u64, u64), - f4: extern "C-cmse-nonsecure-call" fn(u128), - f5: extern "C-cmse-nonsecure-call" fn(f64, f32, f32), - f6: extern "C-cmse-nonsecure-call" fn(ReprTransparentStruct, U32Compound), - f7: extern "C-cmse-nonsecure-call" fn([u32; 4]), + f1: extern "cmse-nonsecure-call" fn(), + f2: extern "cmse-nonsecure-call" fn(u32, u32, u32, u32), + f3: extern "cmse-nonsecure-call" fn(u64, u64), + f4: extern "cmse-nonsecure-call" fn(u128), + f5: extern "cmse-nonsecure-call" fn(f64, f32, f32), + f6: extern "cmse-nonsecure-call" fn(ReprTransparentStruct, U32Compound), + f7: extern "cmse-nonsecure-call" fn([u32; 4]), ) { } #[no_mangle] pub fn returns( - f1: extern "C-cmse-nonsecure-call" fn() -> u32, - f2: extern "C-cmse-nonsecure-call" fn() -> u64, - f3: extern "C-cmse-nonsecure-call" fn() -> i64, - f4: extern "C-cmse-nonsecure-call" fn() -> f64, - f5: extern "C-cmse-nonsecure-call" fn() -> [u8; 4], - f6: extern "C-cmse-nonsecure-call" fn() -> ReprTransparentStruct, - f7: extern "C-cmse-nonsecure-call" fn() -> ReprTransparentStruct>, - f8: extern "C-cmse-nonsecure-call" fn() -> ReprTransparentEnumU64, - f9: extern "C-cmse-nonsecure-call" fn() -> U32Compound, + f1: extern "cmse-nonsecure-call" fn() -> u32, + f2: extern "cmse-nonsecure-call" fn() -> u64, + f3: extern "cmse-nonsecure-call" fn() -> i64, + f4: extern "cmse-nonsecure-call" fn() -> f64, + f5: extern "cmse-nonsecure-call" fn() -> [u8; 4], + f6: extern "cmse-nonsecure-call" fn() -> ReprTransparentStruct, + f7: extern "cmse-nonsecure-call" fn() -> ReprTransparentStruct>, + f8: extern "cmse-nonsecure-call" fn() -> ReprTransparentEnumU64, + f9: extern "cmse-nonsecure-call" fn() -> U32Compound, ) { } diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-1.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-1.rs index 5a2d2db19c54..44a1e7d69a8c 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-1.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-1.rs @@ -1,10 +1,10 @@ //@ add-core-stubs //@ compile-flags: --target thumbv8m.main-none-eabi --crate-type lib //@ needs-llvm-components: arm -#![feature(abi_c_cmse_nonsecure_call, lang_items, no_core)] +#![feature(abi_cmse_nonsecure_call, lang_items, no_core)] #![no_core] extern crate minicore; use minicore::*; -pub extern "C-cmse-nonsecure-call" fn test() {} //~ ERROR [E0781] +pub extern "cmse-nonsecure-call" fn test() {} //~ ERROR [E0781] diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-1.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-1.stderr index f49fab043a47..b9cccecc64bf 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-1.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-1.stderr @@ -1,8 +1,8 @@ -error[E0781]: the `"C-cmse-nonsecure-call"` ABI is only allowed on function pointers +error[E0781]: the `"cmse-nonsecure-call"` ABI is only allowed on function pointers --> $DIR/wrong-abi-location-1.rs:10:1 | -LL | pub extern "C-cmse-nonsecure-call" fn test() {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | pub extern "cmse-nonsecure-call" fn test() {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: aborting due to 1 previous error diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-2.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-2.rs index e93b153949a3..f23f45f786fb 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-2.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-2.rs @@ -1,12 +1,12 @@ //@ add-core-stubs //@ compile-flags: --target thumbv8m.main-none-eabi --crate-type lib //@ needs-llvm-components: arm -#![feature(abi_c_cmse_nonsecure_call, lang_items, no_core)] +#![feature(abi_cmse_nonsecure_call, lang_items, no_core)] #![no_core] extern crate minicore; use minicore::*; -extern "C-cmse-nonsecure-call" { //~ ERROR [E0781] +extern "cmse-nonsecure-call" { //~ ERROR [E0781] fn test(); } diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-2.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-2.stderr index bae8d20d81c5..437d7b80b1fd 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-2.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-call/wrong-abi-location-2.stderr @@ -1,7 +1,7 @@ -error[E0781]: the `"C-cmse-nonsecure-call"` ABI is only allowed on function pointers +error[E0781]: the `"cmse-nonsecure-call"` ABI is only allowed on function pointers --> $DIR/wrong-abi-location-2.rs:10:1 | -LL | / extern "C-cmse-nonsecure-call" { +LL | / extern "cmse-nonsecure-call" { LL | | fn test(); LL | | } | |_^ diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/gate_test.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/gate_test.rs index 6061451b2e97..8ec22033a3df 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/gate_test.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/gate_test.rs @@ -1,9 +1,9 @@ // gate-test-cmse_nonsecure_entry #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { - //~^ ERROR [E0570] - //~| ERROR [E0658] +pub extern "cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { + //~^ ERROR: is not a supported ABI for the current target [E0570] + //~| ERROR: ABI is experimental and subject to change [E0658] input + 6 } diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/gate_test.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/gate_test.stderr index 0afbbe647af0..e40862e74eee 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/gate_test.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/gate_test.stderr @@ -1,19 +1,19 @@ -error[E0658]: the extern "C-cmse-nonsecure-entry" ABI is experimental and subject to change +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target --> $DIR/gate_test.rs:4:12 | -LL | pub extern "C-cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { - | ^^^^^^^^^^^^^^^^^^^^^^^^ +LL | pub extern "cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { + | ^^^^^^^^^^^^^^^^^^^^^^ + +error[E0658]: the extern "cmse-nonsecure-entry" ABI is experimental and subject to change + --> $DIR/gate_test.rs:4:12 + | +LL | pub extern "cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { + | ^^^^^^^^^^^^^^^^^^^^^^ | = note: see issue #75835 for more information = help: add `#![feature(cmse_nonsecure_entry)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/gate_test.rs:4:1 - | -LL | pub extern "C-cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - error: aborting due to 2 previous errors Some errors have detailed explanations: E0570, E0658. diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/generics.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/generics.rs index 19b6179dde75..800dd580af29 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/generics.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/generics.rs @@ -11,12 +11,12 @@ use minicore::*; struct Wrapper(T); impl Wrapper { - extern "C-cmse-nonsecure-entry" fn ambient_generic(_: T, _: u32, _: u32, _: u32) -> u64 { + extern "cmse-nonsecure-entry" fn ambient_generic(_: T, _: u32, _: u32, _: u32) -> u64 { //~^ ERROR [E0798] 0 } - extern "C-cmse-nonsecure-entry" fn ambient_generic_nested( + extern "cmse-nonsecure-entry" fn ambient_generic_nested( //~^ ERROR [E0798] _: Wrapper, _: u32, @@ -27,7 +27,7 @@ impl Wrapper { } } -extern "C-cmse-nonsecure-entry" fn introduced_generic( +extern "cmse-nonsecure-entry" fn introduced_generic( //~^ ERROR [E0798] _: U, _: u32, @@ -37,40 +37,40 @@ extern "C-cmse-nonsecure-entry" fn introduced_generic( 0 } -extern "C-cmse-nonsecure-entry" fn impl_trait(_: impl Copy, _: u32, _: u32, _: u32) -> u64 { +extern "cmse-nonsecure-entry" fn impl_trait(_: impl Copy, _: u32, _: u32, _: u32) -> u64 { //~^ ERROR [E0798] 0 } -extern "C-cmse-nonsecure-entry" fn reference(x: &usize) -> usize { +extern "cmse-nonsecure-entry" fn reference(x: &usize) -> usize { *x } trait Trait {} -extern "C-cmse-nonsecure-entry" fn trait_object(x: &dyn Trait) -> &dyn Trait { - //~^ ERROR return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers [E0798] +extern "cmse-nonsecure-entry" fn trait_object(x: &dyn Trait) -> &dyn Trait { + //~^ ERROR return value of `"cmse-nonsecure-entry"` function too large to pass via registers [E0798] x } -extern "C-cmse-nonsecure-entry" fn static_trait_object( +extern "cmse-nonsecure-entry" fn static_trait_object( x: &'static dyn Trait, ) -> &'static dyn Trait { - //~^ ERROR return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers [E0798] + //~^ ERROR return value of `"cmse-nonsecure-entry"` function too large to pass via registers [E0798] x } #[repr(transparent)] struct WrapperTransparent<'a>(&'a dyn Trait); -extern "C-cmse-nonsecure-entry" fn wrapped_trait_object( +extern "cmse-nonsecure-entry" fn wrapped_trait_object( x: WrapperTransparent, ) -> WrapperTransparent { - //~^ ERROR return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers [E0798] + //~^ ERROR return value of `"cmse-nonsecure-entry"` function too large to pass via registers [E0798] x } -extern "C-cmse-nonsecure-entry" fn c_variadic(_: u32, _: ...) { +extern "cmse-nonsecure-entry" fn c_variadic(_: u32, _: ...) { //~^ ERROR only foreign, `unsafe extern "C"`, or `unsafe extern "C-unwind"` functions may have a C-variadic arg //~| ERROR requires `va_list` lang_item } diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/generics.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/generics.stderr index c314671dc297..f0190671b5a1 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/generics.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/generics.stderr @@ -1,13 +1,13 @@ error: only foreign, `unsafe extern "C"`, or `unsafe extern "C-unwind"` functions may have a C-variadic arg - --> $DIR/generics.rs:73:55 + --> $DIR/generics.rs:73:53 | -LL | extern "C-cmse-nonsecure-entry" fn c_variadic(_: u32, _: ...) { - | ^^^^^^ +LL | extern "cmse-nonsecure-entry" fn c_variadic(_: u32, _: ...) { + | ^^^^^^ -error[E0798]: functions with the `"C-cmse-nonsecure-entry"` ABI cannot contain generics in their type +error[E0798]: functions with the `"cmse-nonsecure-entry"` ABI cannot contain generics in their type --> $DIR/generics.rs:30:1 | -LL | / extern "C-cmse-nonsecure-entry" fn introduced_generic( +LL | / extern "cmse-nonsecure-entry" fn introduced_generic( LL | | LL | | _: U, LL | | _: u32, @@ -16,22 +16,22 @@ LL | | _: u32, LL | | ) -> u64 { | |________^ -error[E0798]: functions with the `"C-cmse-nonsecure-entry"` ABI cannot contain generics in their type +error[E0798]: functions with the `"cmse-nonsecure-entry"` ABI cannot contain generics in their type --> $DIR/generics.rs:40:1 | -LL | extern "C-cmse-nonsecure-entry" fn impl_trait(_: impl Copy, _: u32, _: u32, _: u32) -> u64 { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | extern "cmse-nonsecure-entry" fn impl_trait(_: impl Copy, _: u32, _: u32, _: u32) -> u64 { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -error[E0798]: functions with the `"C-cmse-nonsecure-entry"` ABI cannot contain generics in their type +error[E0798]: functions with the `"cmse-nonsecure-entry"` ABI cannot contain generics in their type --> $DIR/generics.rs:14:5 | -LL | extern "C-cmse-nonsecure-entry" fn ambient_generic(_: T, _: u32, _: u32, _: u32) -> u64 { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | extern "cmse-nonsecure-entry" fn ambient_generic(_: T, _: u32, _: u32, _: u32) -> u64 { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -error[E0798]: functions with the `"C-cmse-nonsecure-entry"` ABI cannot contain generics in their type +error[E0798]: functions with the `"cmse-nonsecure-entry"` ABI cannot contain generics in their type --> $DIR/generics.rs:19:5 | -LL | / extern "C-cmse-nonsecure-entry" fn ambient_generic_nested( +LL | / extern "cmse-nonsecure-entry" fn ambient_generic_nested( LL | | LL | | _: Wrapper, LL | | _: u32, @@ -40,38 +40,38 @@ LL | | _: u32, LL | | ) -> u64 { | |____________^ -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/generics.rs:51:67 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/generics.rs:51:65 | -LL | extern "C-cmse-nonsecure-entry" fn trait_object(x: &dyn Trait) -> &dyn Trait { - | ^^^^^^^^^^ this type doesn't fit in the available registers +LL | extern "cmse-nonsecure-entry" fn trait_object(x: &dyn Trait) -> &dyn Trait { + | ^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers --> $DIR/generics.rs:58:6 | LL | ) -> &'static dyn Trait { | ^^^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers --> $DIR/generics.rs:68:6 | LL | ) -> WrapperTransparent { | ^^^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size error: requires `va_list` lang_item - --> $DIR/generics.rs:73:55 + --> $DIR/generics.rs:73:53 | -LL | extern "C-cmse-nonsecure-entry" fn c_variadic(_: u32, _: ...) { - | ^^^^^^ +LL | extern "cmse-nonsecure-entry" fn c_variadic(_: u32, _: ...) { + | ^^^^^^ error: aborting due to 9 previous errors diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/params-via-stack.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/params-via-stack.rs index 4c53f9422dab..d4f722fa1938 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/params-via-stack.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/params-via-stack.rs @@ -12,14 +12,14 @@ use minicore::*; pub struct AlignRelevant(u32); #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn f1(_: u32, _: u32, _: u32, _: u32, _: u32, _: u32) {} //~ ERROR [E0798] +pub extern "cmse-nonsecure-entry" fn f1(_: u32, _: u32, _: u32, _: u32, _: u32, _: u32) {} //~ ERROR [E0798] #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn f2(_: u32, _: u32, _: u32, _: u16, _: u16) {} //~ ERROR [E0798] +pub extern "cmse-nonsecure-entry" fn f2(_: u32, _: u32, _: u32, _: u16, _: u16) {} //~ ERROR [E0798] #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn f3(_: u32, _: u64, _: u32) {} //~ ERROR [E0798] +pub extern "cmse-nonsecure-entry" fn f3(_: u32, _: u64, _: u32) {} //~ ERROR [E0798] #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn f4(_: AlignRelevant, _: u32) {} //~ ERROR [E0798] +pub extern "cmse-nonsecure-entry" fn f4(_: AlignRelevant, _: u32) {} //~ ERROR [E0798] #[no_mangle] #[allow(improper_ctypes_definitions)] -pub extern "C-cmse-nonsecure-entry" fn f5(_: [u32; 5]) {} //~ ERROR [E0798] +pub extern "cmse-nonsecure-entry" fn f5(_: [u32; 5]) {} //~ ERROR [E0798] diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/params-via-stack.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/params-via-stack.stderr index 24e9ddf32feb..f8b96bddc947 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/params-via-stack.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/params-via-stack.stderr @@ -1,42 +1,42 @@ -error[E0798]: arguments for `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/params-via-stack.rs:15:78 +error[E0798]: arguments for `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/params-via-stack.rs:15:76 | -LL | pub extern "C-cmse-nonsecure-entry" fn f1(_: u32, _: u32, _: u32, _: u32, _: u32, _: u32) {} - | ^^^^^^^^^^^ these arguments don't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn f1(_: u32, _: u32, _: u32, _: u32, _: u32, _: u32) {} + | ^^^^^^^^^^^ these arguments don't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers -error[E0798]: arguments for `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/params-via-stack.rs:17:78 +error[E0798]: arguments for `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/params-via-stack.rs:17:76 | -LL | pub extern "C-cmse-nonsecure-entry" fn f2(_: u32, _: u32, _: u32, _: u16, _: u16) {} - | ^^^ this argument doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn f2(_: u32, _: u32, _: u32, _: u16, _: u16) {} + | ^^^ this argument doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers -error[E0798]: arguments for `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/params-via-stack.rs:19:62 +error[E0798]: arguments for `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/params-via-stack.rs:19:60 | -LL | pub extern "C-cmse-nonsecure-entry" fn f3(_: u32, _: u64, _: u32) {} +LL | pub extern "cmse-nonsecure-entry" fn f3(_: u32, _: u64, _: u32) {} + | ^^^ this argument doesn't fit in the available registers + | + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers + +error[E0798]: arguments for `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/params-via-stack.rs:21:62 + | +LL | pub extern "cmse-nonsecure-entry" fn f4(_: AlignRelevant, _: u32) {} | ^^^ this argument doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers -error[E0798]: arguments for `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/params-via-stack.rs:21:64 +error[E0798]: arguments for `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/params-via-stack.rs:25:44 | -LL | pub extern "C-cmse-nonsecure-entry" fn f4(_: AlignRelevant, _: u32) {} - | ^^^ this argument doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn f5(_: [u32; 5]) {} + | ^^^^^^^^ this argument doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers - -error[E0798]: arguments for `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/params-via-stack.rs:25:46 - | -LL | pub extern "C-cmse-nonsecure-entry" fn f5(_: [u32; 5]) {} - | ^^^^^^^^ this argument doesn't fit in the available registers - | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass all their arguments via the 4 32-bit available argument registers error: aborting due to 5 previous errors diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/return-via-stack.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/return-via-stack.rs index 735eab10fa15..0052a0977ed7 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/return-via-stack.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/return-via-stack.rs @@ -22,41 +22,41 @@ pub struct U64Compound(u32, u32); pub struct ReprCAlign16(u16); #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn f1() -> ReprCU64 { +pub extern "cmse-nonsecure-entry" fn f1() -> ReprCU64 { //~^ ERROR [E0798] ReprCU64(0) } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn f2() -> ReprCBytes { +pub extern "cmse-nonsecure-entry" fn f2() -> ReprCBytes { //~^ ERROR [E0798] ReprCBytes(0, 1, 2, 3, 4) } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn f3() -> U64Compound { +pub extern "cmse-nonsecure-entry" fn f3() -> U64Compound { //~^ ERROR [E0798] U64Compound(2, 3) } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn f4() -> ReprCAlign16 { +pub extern "cmse-nonsecure-entry" fn f4() -> ReprCAlign16 { //~^ ERROR [E0798] ReprCAlign16(4) } #[no_mangle] #[allow(improper_ctypes_definitions)] -pub extern "C-cmse-nonsecure-entry" fn f5() -> [u8; 5] { +pub extern "cmse-nonsecure-entry" fn f5() -> [u8; 5] { //~^ ERROR [E0798] [0xAA; 5] } #[no_mangle] #[allow(improper_ctypes_definitions)] -pub extern "C-cmse-nonsecure-entry" fn u128() -> u128 { +pub extern "cmse-nonsecure-entry" fn u128() -> u128 { //~^ ERROR [E0798] 123 } #[no_mangle] #[allow(improper_ctypes_definitions)] -pub extern "C-cmse-nonsecure-entry" fn i128() -> i128 { +pub extern "cmse-nonsecure-entry" fn i128() -> i128 { //~^ ERROR [E0798] 456 } @@ -73,12 +73,12 @@ pub union ReprCUnionU64 { #[no_mangle] #[allow(improper_ctypes_definitions)] -pub extern "C-cmse-nonsecure-entry" fn union_rust() -> ReprRustUnionU64 { +pub extern "cmse-nonsecure-entry" fn union_rust() -> ReprRustUnionU64 { //~^ ERROR [E0798] ReprRustUnionU64 { _unused: 1 } } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn union_c() -> ReprCUnionU64 { +pub extern "cmse-nonsecure-entry" fn union_c() -> ReprCUnionU64 { //~^ ERROR [E0798] ReprCUnionU64 { _unused: 2 } } diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/return-via-stack.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/return-via-stack.stderr index 9c885d953181..c5effed92ae9 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/return-via-stack.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/return-via-stack.stderr @@ -1,82 +1,82 @@ -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/return-via-stack.rs:25:48 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/return-via-stack.rs:25:46 | -LL | pub extern "C-cmse-nonsecure-entry" fn f1() -> ReprCU64 { - | ^^^^^^^^ this type doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn f1() -> ReprCU64 { + | ^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/return-via-stack.rs:30:48 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/return-via-stack.rs:30:46 | -LL | pub extern "C-cmse-nonsecure-entry" fn f2() -> ReprCBytes { - | ^^^^^^^^^^ this type doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn f2() -> ReprCBytes { + | ^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/return-via-stack.rs:35:48 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/return-via-stack.rs:35:46 | -LL | pub extern "C-cmse-nonsecure-entry" fn f3() -> U64Compound { - | ^^^^^^^^^^^ this type doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn f3() -> U64Compound { + | ^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/return-via-stack.rs:40:48 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/return-via-stack.rs:40:46 | -LL | pub extern "C-cmse-nonsecure-entry" fn f4() -> ReprCAlign16 { - | ^^^^^^^^^^^^ this type doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn f4() -> ReprCAlign16 { + | ^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/return-via-stack.rs:47:48 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/return-via-stack.rs:47:46 | -LL | pub extern "C-cmse-nonsecure-entry" fn f5() -> [u8; 5] { - | ^^^^^^^ this type doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn f5() -> [u8; 5] { + | ^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/return-via-stack.rs:53:50 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/return-via-stack.rs:53:48 | -LL | pub extern "C-cmse-nonsecure-entry" fn u128() -> u128 { - | ^^^^ this type doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn u128() -> u128 { + | ^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/return-via-stack.rs:59:50 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/return-via-stack.rs:59:48 | -LL | pub extern "C-cmse-nonsecure-entry" fn i128() -> i128 { - | ^^^^ this type doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn i128() -> i128 { + | ^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/return-via-stack.rs:76:56 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/return-via-stack.rs:76:54 | -LL | pub extern "C-cmse-nonsecure-entry" fn union_rust() -> ReprRustUnionU64 { - | ^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn union_rust() -> ReprRustUnionU64 { + | ^^^^^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size -error[E0798]: return value of `"C-cmse-nonsecure-entry"` function too large to pass via registers - --> $DIR/return-via-stack.rs:81:53 +error[E0798]: return value of `"cmse-nonsecure-entry"` function too large to pass via registers + --> $DIR/return-via-stack.rs:81:51 | -LL | pub extern "C-cmse-nonsecure-entry" fn union_c() -> ReprCUnionU64 { - | ^^^^^^^^^^^^^ this type doesn't fit in the available registers +LL | pub extern "cmse-nonsecure-entry" fn union_c() -> ReprCUnionU64 { + | ^^^^^^^^^^^^^ this type doesn't fit in the available registers | - = note: functions with the `"C-cmse-nonsecure-entry"` ABI must pass their result via the available return registers + = note: functions with the `"cmse-nonsecure-entry"` ABI must pass their result via the available return registers = note: the result must either be a (transparently wrapped) i64, u64 or f64, or be at most 4 bytes in size error: aborting due to 9 previous errors diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.aarch64.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.aarch64.stderr index 6a90dc8d635b..3949eac15429 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.aarch64.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.aarch64.stderr @@ -1,8 +1,8 @@ -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/trustzone-only.rs:17:1 +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/trustzone-only.rs:17:12 | -LL | pub extern "C-cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | pub extern "cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { + | ^^^^^^^^^^^^^^^^^^^^^^ error: aborting due to 1 previous error diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.rs index 6d84dab2166e..ff5d2ec0ab6c 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.rs @@ -14,7 +14,7 @@ extern crate minicore; use minicore::*; #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { +pub extern "cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { //~^ ERROR [E0570] input } diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.thumb7.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.thumb7.stderr index 6a90dc8d635b..3949eac15429 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.thumb7.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.thumb7.stderr @@ -1,8 +1,8 @@ -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/trustzone-only.rs:17:1 +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/trustzone-only.rs:17:12 | -LL | pub extern "C-cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | pub extern "cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { + | ^^^^^^^^^^^^^^^^^^^^^^ error: aborting due to 1 previous error diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.x86.stderr b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.x86.stderr index 6a90dc8d635b..3949eac15429 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.x86.stderr +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/trustzone-only.x86.stderr @@ -1,8 +1,8 @@ -error[E0570]: `"C-cmse-nonsecure-entry"` is not a supported ABI for the current target - --> $DIR/trustzone-only.rs:17:1 +error[E0570]: "cmse-nonsecure-entry" is not a supported ABI for the current target + --> $DIR/trustzone-only.rs:17:12 | -LL | pub extern "C-cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | pub extern "cmse-nonsecure-entry" fn entry_function(input: u32) -> u32 { + | ^^^^^^^^^^^^^^^^^^^^^^ error: aborting due to 1 previous error diff --git a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/via-registers.rs b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/via-registers.rs index 912fc8b85ebd..343732881256 100644 --- a/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/via-registers.rs +++ b/tests/ui/cmse-nonsecure/cmse-nonsecure-entry/via-registers.rs @@ -26,49 +26,49 @@ pub enum ReprTransparentEnumU64 { pub struct U32Compound(u16, u16); #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn inputs1() {} +pub extern "cmse-nonsecure-entry" fn inputs1() {} #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn inputs2(_: u32, _: u32, _: u32, _: u32) {} +pub extern "cmse-nonsecure-entry" fn inputs2(_: u32, _: u32, _: u32, _: u32) {} #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn inputs3(_: u64, _: u64) {} +pub extern "cmse-nonsecure-entry" fn inputs3(_: u64, _: u64) {} #[no_mangle] #[allow(improper_ctypes_definitions)] -pub extern "C-cmse-nonsecure-entry" fn inputs4(_: u128) {} +pub extern "cmse-nonsecure-entry" fn inputs4(_: u128) {} #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn inputs5(_: f64, _: f32, _: f32) {} +pub extern "cmse-nonsecure-entry" fn inputs5(_: f64, _: f32, _: f32) {} #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn inputs6(_: ReprTransparentStruct, _: U32Compound) {} +pub extern "cmse-nonsecure-entry" fn inputs6(_: ReprTransparentStruct, _: U32Compound) {} #[no_mangle] #[allow(improper_ctypes_definitions)] -pub extern "C-cmse-nonsecure-entry" fn inputs7(_: [u32; 4]) {} +pub extern "cmse-nonsecure-entry" fn inputs7(_: [u32; 4]) {} #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn outputs1() -> u32 { +pub extern "cmse-nonsecure-entry" fn outputs1() -> u32 { 0 } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn outputs2() -> u64 { +pub extern "cmse-nonsecure-entry" fn outputs2() -> u64 { 0 } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn outputs3() -> i64 { +pub extern "cmse-nonsecure-entry" fn outputs3() -> i64 { 0 } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn outputs4() -> f64 { +pub extern "cmse-nonsecure-entry" fn outputs4() -> f64 { 0.0 } #[no_mangle] #[allow(improper_ctypes_definitions)] -pub extern "C-cmse-nonsecure-entry" fn outputs5() -> [u8; 4] { +pub extern "cmse-nonsecure-entry" fn outputs5() -> [u8; 4] { [0xAA; 4] } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn outputs6() -> ReprTransparentStruct { +pub extern "cmse-nonsecure-entry" fn outputs6() -> ReprTransparentStruct { ReprTransparentStruct { _marker1: (), _marker2: (), field: 0xAA, _marker3: () } } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn outputs7( +pub extern "cmse-nonsecure-entry" fn outputs7( ) -> ReprTransparentStruct> { ReprTransparentStruct { _marker1: (), @@ -78,10 +78,10 @@ pub extern "C-cmse-nonsecure-entry" fn outputs7( } } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn outputs8() -> ReprTransparentEnumU64 { +pub extern "cmse-nonsecure-entry" fn outputs8() -> ReprTransparentEnumU64 { ReprTransparentEnumU64::A(0) } #[no_mangle] -pub extern "C-cmse-nonsecure-entry" fn outputs9() -> U32Compound { +pub extern "cmse-nonsecure-entry" fn outputs9() -> U32Compound { U32Compound(1, 2) } diff --git a/tests/ui/coercion/invalid-blanket-coerce-unsized-impl.rs b/tests/ui/coercion/invalid-blanket-coerce-unsized-impl.rs new file mode 100644 index 000000000000..a4fd77107188 --- /dev/null +++ b/tests/ui/coercion/invalid-blanket-coerce-unsized-impl.rs @@ -0,0 +1,13 @@ +// Regression test minimized from #126982. +// We used to apply a coerce_unsized coercion to literally every argument since +// the blanket applied in literally all cases, even though it was incoherent. + +#![feature(coerce_unsized)] + +impl std::ops::CoerceUnsized for A {} +//~^ ERROR type parameter `A` must be used as the type parameter for some local type +//~| ERROR the trait `CoerceUnsized` may only be implemented for a coercion between structures + +const C: usize = 1; + +fn main() {} diff --git a/tests/ui/coercion/invalid-blanket-coerce-unsized-impl.stderr b/tests/ui/coercion/invalid-blanket-coerce-unsized-impl.stderr new file mode 100644 index 000000000000..377906ee334a --- /dev/null +++ b/tests/ui/coercion/invalid-blanket-coerce-unsized-impl.stderr @@ -0,0 +1,19 @@ +error[E0210]: type parameter `A` must be used as the type parameter for some local type (e.g., `MyStruct`) + --> $DIR/invalid-blanket-coerce-unsized-impl.rs:7:6 + | +LL | impl std::ops::CoerceUnsized for A {} + | ^ type parameter `A` must be used as the type parameter for some local type + | + = note: implementing a foreign trait is only possible if at least one of the types for which it is implemented is local + = note: only traits defined in the current crate can be implemented for a type parameter + +error[E0377]: the trait `CoerceUnsized` may only be implemented for a coercion between structures + --> $DIR/invalid-blanket-coerce-unsized-impl.rs:7:1 + | +LL | impl std::ops::CoerceUnsized for A {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: aborting due to 2 previous errors + +Some errors have detailed explanations: E0210, E0377. +For more information about an error, try `rustc --explain E0210`. diff --git a/tests/ui/compiletest-self-test/line-annotation-mismatches.rs b/tests/ui/compiletest-self-test/line-annotation-mismatches.rs new file mode 100644 index 000000000000..d2a14374ed4c --- /dev/null +++ b/tests/ui/compiletest-self-test/line-annotation-mismatches.rs @@ -0,0 +1,42 @@ +//@ should-fail + +// The warning is reported with unknown line +//@ compile-flags: -D raw_pointer_derive +//~? WARN kind and unknown line match the reported warning, but we do not suggest it + +// The error is expected but not reported at all. +//~ ERROR this error does not exist + +// The error is reported but not expected at all. +// "`main` function not found in crate" (the main function is intentionally not added) + +// An "unimportant" diagnostic is expected on a wrong line. +//~ ERROR aborting due to + +// An "unimportant" diagnostic is expected with a wrong kind. +//~? ERROR For more information about an error + +fn wrong_line_or_kind() { + // A diagnostic expected on a wrong line. + unresolved1; + //~ ERROR cannot find value `unresolved1` in this scope + + // A diagnostic expected with a wrong kind. + unresolved2; //~ WARN cannot find value `unresolved2` in this scope + + // A diagnostic expected with a missing kind (treated as a wrong kind). + unresolved3; //~ cannot find value `unresolved3` in this scope + + // A diagnostic expected with a wrong line and kind. + unresolved4; + //~ WARN cannot find value `unresolved4` in this scope +} + +fn wrong_message() { + // A diagnostic expected with a wrong message, but the line is known and right. + unresolvedA; //~ ERROR stub message 1 + + // A diagnostic expected with a wrong message, but the line is known and right, + // even if the kind doesn't match. + unresolvedB; //~ WARN stub message 2 +} diff --git a/tests/ui/compiletest-self-test/line-annotation-mismatches.stderr b/tests/ui/compiletest-self-test/line-annotation-mismatches.stderr new file mode 100644 index 000000000000..7ca3bfaf396c --- /dev/null +++ b/tests/ui/compiletest-self-test/line-annotation-mismatches.stderr @@ -0,0 +1,61 @@ +warning: lint `raw_pointer_derive` has been removed: using derive with raw pointers is ok + | + = note: requested on the command line with `-D raw_pointer_derive` + = note: `#[warn(renamed_and_removed_lints)]` on by default + +error[E0425]: cannot find value `unresolved1` in this scope + --> $DIR/line-annotation-mismatches.rs:21:5 + | +LL | unresolved1; + | ^^^^^^^^^^^ not found in this scope + +error[E0425]: cannot find value `unresolved2` in this scope + --> $DIR/line-annotation-mismatches.rs:25:5 + | +LL | unresolved2; + | ^^^^^^^^^^^ not found in this scope + +error[E0425]: cannot find value `unresolved3` in this scope + --> $DIR/line-annotation-mismatches.rs:28:5 + | +LL | unresolved3; + | ^^^^^^^^^^^ not found in this scope + +error[E0425]: cannot find value `unresolved4` in this scope + --> $DIR/line-annotation-mismatches.rs:31:5 + | +LL | unresolved4; + | ^^^^^^^^^^^ not found in this scope + +error[E0425]: cannot find value `unresolvedA` in this scope + --> $DIR/line-annotation-mismatches.rs:37:5 + | +LL | unresolvedA; + | ^^^^^^^^^^^ not found in this scope + +error[E0425]: cannot find value `unresolvedB` in this scope + --> $DIR/line-annotation-mismatches.rs:41:5 + | +LL | unresolvedB; + | ^^^^^^^^^^^ not found in this scope + +warning: lint `raw_pointer_derive` has been removed: using derive with raw pointers is ok + | + = note: requested on the command line with `-D raw_pointer_derive` + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error[E0601]: `main` function not found in crate `line_annotation_mismatches` + --> $DIR/line-annotation-mismatches.rs:42:2 + | +LL | } + | ^ consider adding a `main` function to `$DIR/line-annotation-mismatches.rs` + +warning: lint `raw_pointer_derive` has been removed: using derive with raw pointers is ok + | + = note: requested on the command line with `-D raw_pointer_derive` + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error: aborting due to 7 previous errors; 3 warnings emitted + +Some errors have detailed explanations: E0425, E0601. +For more information about an error, try `rustc --explain E0425`. diff --git a/tests/ui/const-generics/const_trait_fn-issue-88433.rs b/tests/ui/const-generics/const_trait_fn-issue-88433.rs index bc91fc1700ea..2f92a528bf72 100644 --- a/tests/ui/const-generics/const_trait_fn-issue-88433.rs +++ b/tests/ui/const-generics/const_trait_fn-issue-88433.rs @@ -10,7 +10,6 @@ trait Func { fn call_once(self, arg: T) -> Self::Output; } - struct Closure; impl const Func<&usize> for Closure { @@ -21,7 +20,7 @@ impl const Func<&usize> for Closure { } } -enum Bug { +enum Bug { V(T), } diff --git a/tests/ui/const-generics/generic_arg_infer/in-signature.rs b/tests/ui/const-generics/generic_arg_infer/in-signature.rs index cd0235bf45aa..1be8b564224e 100644 --- a/tests/ui/const-generics/generic_arg_infer/in-signature.rs +++ b/tests/ui/const-generics/generic_arg_infer/in-signature.rs @@ -41,6 +41,7 @@ trait TyAssocConst { trait TyAssocConstMixed { const ARR: Bar<_, _>; //~^ ERROR the placeholder `_` is not allowed within types on item signatures for associated constants + //~| ERROR the placeholder `_` is not allowed within types on item signatures for associated constants } trait AssocTy { @@ -57,4 +58,5 @@ impl AssocTy for i16 { impl AssocTy for i32 { type Assoc = Bar<_, _>; //~^ ERROR the placeholder `_` is not allowed within types on item signatures for associated types + //~| ERROR the placeholder `_` is not allowed within types on item signatures for associated types } diff --git a/tests/ui/const-generics/generic_arg_infer/in-signature.stderr b/tests/ui/const-generics/generic_arg_infer/in-signature.stderr index f964fc8d2f2c..b6f2662a9393 100644 --- a/tests/ui/const-generics/generic_arg_infer/in-signature.stderr +++ b/tests/ui/const-generics/generic_arg_infer/in-signature.stderr @@ -103,24 +103,28 @@ LL + static TY_STATIC_MIXED: Bar = Bar::(0); | error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types - --> $DIR/in-signature.rs:50:23 + --> $DIR/in-signature.rs:51:23 | LL | type Assoc = [u8; _]; | ^ not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types - --> $DIR/in-signature.rs:54:27 + --> $DIR/in-signature.rs:55:27 | LL | type Assoc = Bar; | ^ not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types - --> $DIR/in-signature.rs:58:22 + --> $DIR/in-signature.rs:59:22 | LL | type Assoc = Bar<_, _>; - | ^ ^ not allowed in type signatures - | | - | not allowed in type signatures + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types + --> $DIR/in-signature.rs:59:25 + | +LL | type Assoc = Bar<_, _>; + | ^ not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants --> $DIR/in-signature.rs:34:21 @@ -138,10 +142,14 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures --> $DIR/in-signature.rs:42:20 | LL | const ARR: Bar<_, _>; - | ^ ^ not allowed in type signatures - | | - | not allowed in type signatures + | ^ not allowed in type signatures -error: aborting due to 15 previous errors +error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants + --> $DIR/in-signature.rs:42:23 + | +LL | const ARR: Bar<_, _>; + | ^ not allowed in type signatures + +error: aborting due to 17 previous errors For more information about this error, try `rustc --explain E0121`. diff --git a/tests/ui/const-generics/generic_const_exprs/non-local-const.rs b/tests/ui/const-generics/generic_const_exprs/non-local-const.rs new file mode 100644 index 000000000000..0a30cc385ac4 --- /dev/null +++ b/tests/ui/const-generics/generic_const_exprs/non-local-const.rs @@ -0,0 +1,10 @@ +// regression test for #133808. + +#![feature(generic_const_exprs)] +#![feature(min_generic_const_args)] +#![allow(incomplete_features)] +#![crate_type = "lib"] + +pub trait Foo {} +impl Foo for [u8; std::path::MAIN_SEPARATOR] {} +//~^ ERROR the constant `MAIN_SEPARATOR` is not of type `usize` diff --git a/tests/ui/const-generics/generic_const_exprs/non-local-const.stderr b/tests/ui/const-generics/generic_const_exprs/non-local-const.stderr new file mode 100644 index 000000000000..d8df3269a19e --- /dev/null +++ b/tests/ui/const-generics/generic_const_exprs/non-local-const.stderr @@ -0,0 +1,10 @@ +error: the constant `MAIN_SEPARATOR` is not of type `usize` + --> $DIR/non-local-const.rs:9:14 + | +LL | impl Foo for [u8; std::path::MAIN_SEPARATOR] {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ expected `usize`, found `char` + | + = note: the length of array `[u8; MAIN_SEPARATOR]` must be type `usize` + +error: aborting due to 1 previous error + diff --git a/tests/ui/const-generics/generic_const_exprs/unevaluated-const-ice-119731.rs b/tests/ui/const-generics/generic_const_exprs/unevaluated-const-ice-119731.rs index a55be99fc0be..02a95ed3e908 100644 --- a/tests/ui/const-generics/generic_const_exprs/unevaluated-const-ice-119731.rs +++ b/tests/ui/const-generics/generic_const_exprs/unevaluated-const-ice-119731.rs @@ -28,6 +28,10 @@ mod v20 { impl v17 { //~^ ERROR maximum number of nodes exceeded in constant v20::v17::::{constant#0} //~| ERROR maximum number of nodes exceeded in constant v20::v17::::{constant#0} + //~| ERROR maximum number of nodes exceeded in constant v20::v17::::{constant#0} + //~| ERROR maximum number of nodes exceeded in constant v20::v17::::{constant#0} + //~| ERROR maximum number of nodes exceeded in constant v20::v17::::{constant#0} + //~| ERROR maximum number of nodes exceeded in constant v20::v17::::{constant#0} pub const fn v21() -> v18 { //~^ ERROR cannot find type `v18` in this scope v18 { _p: () } diff --git a/tests/ui/const-generics/generic_const_exprs/unevaluated-const-ice-119731.stderr b/tests/ui/const-generics/generic_const_exprs/unevaluated-const-ice-119731.stderr index b73611c79b29..cf0bdd0e9a15 100644 --- a/tests/ui/const-generics/generic_const_exprs/unevaluated-const-ice-119731.stderr +++ b/tests/ui/const-generics/generic_const_exprs/unevaluated-const-ice-119731.stderr @@ -1,5 +1,5 @@ error[E0432]: unresolved import `v20::v13` - --> $DIR/unevaluated-const-ice-119731.rs:38:15 + --> $DIR/unevaluated-const-ice-119731.rs:42:15 | LL | pub use v20::{v13, v17}; | ^^^ @@ -23,7 +23,7 @@ LL | pub const fn v21() -> v18 {} | ^^^ help: a type alias with a similar name exists: `v11` error[E0412]: cannot find type `v18` in this scope - --> $DIR/unevaluated-const-ice-119731.rs:31:31 + --> $DIR/unevaluated-const-ice-119731.rs:35:31 | LL | pub type v11 = [[usize; v4]; v4]; | --------------------------------- similarly named type alias `v11` defined here @@ -32,7 +32,7 @@ LL | pub const fn v21() -> v18 { | ^^^ help: a type alias with a similar name exists: `v11` error[E0422]: cannot find struct, variant or union type `v18` in this scope - --> $DIR/unevaluated-const-ice-119731.rs:33:13 + --> $DIR/unevaluated-const-ice-119731.rs:37:13 | LL | pub type v11 = [[usize; v4]; v4]; | --------------------------------- similarly named type alias `v11` defined here @@ -86,6 +86,38 @@ LL | impl v17 { | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` +error: maximum number of nodes exceeded in constant v20::v17::::{constant#0} + --> $DIR/unevaluated-const-ice-119731.rs:28:37 + | +LL | impl v17 { + | ^^ + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error: maximum number of nodes exceeded in constant v20::v17::::{constant#0} + --> $DIR/unevaluated-const-ice-119731.rs:28:37 + | +LL | impl v17 { + | ^^ + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error: maximum number of nodes exceeded in constant v20::v17::::{constant#0} + --> $DIR/unevaluated-const-ice-119731.rs:28:37 + | +LL | impl v17 { + | ^^ + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error: maximum number of nodes exceeded in constant v20::v17::::{constant#0} + --> $DIR/unevaluated-const-ice-119731.rs:28:37 + | +LL | impl v17 { + | ^^ + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + error[E0592]: duplicate definitions with name `v21` --> $DIR/unevaluated-const-ice-119731.rs:23:9 | @@ -95,7 +127,7 @@ LL | pub const fn v21() -> v18 {} LL | pub const fn v21() -> v18 { | ------------------------- other definition for `v21` -error: aborting due to 10 previous errors; 2 warnings emitted +error: aborting due to 14 previous errors; 2 warnings emitted Some errors have detailed explanations: E0412, E0422, E0425, E0432, E0592. For more information about an error, try `rustc --explain E0412`. diff --git a/tests/ui/const-generics/generic_const_exprs/unresolved_lifetimes_error.stderr b/tests/ui/const-generics/generic_const_exprs/unresolved_lifetimes_error.stderr index 67eed46eaddc..ae074373da27 100644 --- a/tests/ui/const-generics/generic_const_exprs/unresolved_lifetimes_error.stderr +++ b/tests/ui/const-generics/generic_const_exprs/unresolved_lifetimes_error.stderr @@ -1,10 +1,13 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/unresolved_lifetimes_error.rs:5:13 | -LL | fn foo() -> [(); { - | - help: consider introducing lifetime `'a` here: `<'a>` LL | let a: &'a (); | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn foo<'a>() -> [(); { + | ++++ error: aborting due to 1 previous error diff --git a/tests/ui/const-generics/ice-unexpected-inference-var-122549.stderr b/tests/ui/const-generics/ice-unexpected-inference-var-122549.stderr index afad3388145c..3b24808cd162 100644 --- a/tests/ui/const-generics/ice-unexpected-inference-var-122549.stderr +++ b/tests/ui/const-generics/ice-unexpected-inference-var-122549.stderr @@ -17,9 +17,12 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/ice-unexpected-inference-var-122549.rs:11:34 | LL | struct ConstChunksExact<'rem, T: 'a, const N: usize> {} - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `'a,` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | struct ConstChunksExact<'a, 'rem, T: 'a, const N: usize> {} + | +++ error[E0046]: not all trait items implemented, missing: `const_chunks_exact` --> $DIR/ice-unexpected-inference-var-122549.rs:9:1 diff --git a/tests/ui/const-generics/issues/issue-86535-2.rs b/tests/ui/const-generics/issues/issue-86535-2.rs index 8d064f3eeb1b..5c9132fe54d3 100644 --- a/tests/ui/const-generics/issues/issue-86535-2.rs +++ b/tests/ui/const-generics/issues/issue-86535-2.rs @@ -9,7 +9,7 @@ pub trait Foo { [(); Self::ASSOC_C]:; } -struct Bar; +struct Bar; //~ WARN struct `Bar` is never constructed impl Foo for Bar { const ASSOC_C: usize = 3; diff --git a/tests/ui/const-generics/issues/issue-86535-2.stderr b/tests/ui/const-generics/issues/issue-86535-2.stderr new file mode 100644 index 000000000000..0ba748365754 --- /dev/null +++ b/tests/ui/const-generics/issues/issue-86535-2.stderr @@ -0,0 +1,10 @@ +warning: struct `Bar` is never constructed + --> $DIR/issue-86535-2.rs:12:8 + | +LL | struct Bar; + | ^^^ + | + = note: `#[warn(dead_code)]` on by default + +warning: 1 warning emitted + diff --git a/tests/ui/const-generics/issues/issue-86535.rs b/tests/ui/const-generics/issues/issue-86535.rs index 62454f4a388a..2cdf801c1561 100644 --- a/tests/ui/const-generics/issues/issue-86535.rs +++ b/tests/ui/const-generics/issues/issue-86535.rs @@ -2,7 +2,7 @@ #![feature(adt_const_params, unsized_const_params, generic_const_exprs)] #![allow(incomplete_features, unused_variables)] -struct F; +struct F; //~ WARN struct `F` is never constructed impl X for F<{ S }> { const W: usize = 3; diff --git a/tests/ui/const-generics/issues/issue-86535.stderr b/tests/ui/const-generics/issues/issue-86535.stderr new file mode 100644 index 000000000000..84d6c1c11ff6 --- /dev/null +++ b/tests/ui/const-generics/issues/issue-86535.stderr @@ -0,0 +1,10 @@ +warning: struct `F` is never constructed + --> $DIR/issue-86535.rs:5:8 + | +LL | struct F; + | ^ + | + = note: `#[warn(dead_code)]` on by default + +warning: 1 warning emitted + diff --git a/tests/ui/const-generics/issues/issue-88119.stderr b/tests/ui/const-generics/issues/issue-88119.stderr index 94f06bbbbc45..0aabf48011dc 100644 --- a/tests/ui/const-generics/issues/issue-88119.stderr +++ b/tests/ui/const-generics/issues/issue-88119.stderr @@ -6,7 +6,7 @@ LL | #![feature(const_trait_impl, generic_const_exprs)] | = help: remove one of these features -error[E0275]: overflow evaluating the requirement `&T: ~const ConstName` +error[E0275]: overflow evaluating the requirement `&T: [const] ConstName` --> $DIR/issue-88119.rs:19:49 | LL | impl const ConstName for &T @@ -42,7 +42,7 @@ note: required by a bound in `<&T as ConstName>` LL | [(); name_len::()]:, | ^^^^^^^^^^^^^^^^^^^^^ required by this bound in `<&T as ConstName>` -error[E0275]: overflow evaluating the requirement `&mut T: ~const ConstName` +error[E0275]: overflow evaluating the requirement `&mut T: [const] ConstName` --> $DIR/issue-88119.rs:26:49 | LL | impl const ConstName for &mut T diff --git a/tests/ui/const_prop/ice-type-mismatch-when-copying-112824.stderr b/tests/ui/const_prop/ice-type-mismatch-when-copying-112824.stderr index d95a8861230e..586c96011e43 100644 --- a/tests/ui/const_prop/ice-type-mismatch-when-copying-112824.stderr +++ b/tests/ui/const_prop/ice-type-mismatch-when-copying-112824.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/ice-type-mismatch-when-copying-112824.rs:5:21 | LL | pub struct Opcode2(&'a S); - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | pub struct Opcode2<'a>(&'a S); + | ++++ error[E0412]: cannot find type `S` in this scope --> $DIR/ice-type-mismatch-when-copying-112824.rs:5:24 diff --git a/tests/ui/consts/const-block-const-bound.rs b/tests/ui/consts/const-block-const-bound.rs index b4b89a93e759..1847c880a391 100644 --- a/tests/ui/consts/const-block-const-bound.rs +++ b/tests/ui/consts/const-block-const-bound.rs @@ -3,7 +3,7 @@ use std::marker::Destruct; -const fn f(x: T) {} +const fn f(x: T) {} struct UnconstDrop; diff --git a/tests/ui/consts/const-block-const-bound.stderr b/tests/ui/consts/const-block-const-bound.stderr index 624772f5aedc..b6c8027918ff 100644 --- a/tests/ui/consts/const-block-const-bound.stderr +++ b/tests/ui/consts/const-block-const-bound.stderr @@ -9,8 +9,8 @@ LL | f(UnconstDrop); note: required by a bound in `f` --> $DIR/const-block-const-bound.rs:6:15 | -LL | const fn f(x: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `f` +LL | const fn f(x: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `f` error: aborting due to 1 previous error diff --git a/tests/ui/consts/const-eval/format.stderr b/tests/ui/consts/const-eval/format.stderr index 2f202705b7f9..bd50ac0bf411 100644 --- a/tests/ui/consts/const-eval/format.stderr +++ b/tests/ui/consts/const-eval/format.stderr @@ -13,7 +13,7 @@ LL | println!("{:?}", 0); | ^^^^^^^^^^^^^^^^^^^ | = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) + = note: this error originates in the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) error[E0015]: cannot call non-const function `_print` in constant functions --> $DIR/format.rs:7:5 diff --git a/tests/ui/consts/const-mut-refs/issue-76510.rs b/tests/ui/consts/const-mut-refs/issue-76510.rs index 6ebbd4e50f6d..a6f7540dd59d 100644 --- a/tests/ui/consts/const-mut-refs/issue-76510.rs +++ b/tests/ui/consts/const-mut-refs/issue-76510.rs @@ -1,7 +1,7 @@ use std::mem::{transmute, ManuallyDrop}; const S: &'static mut str = &mut " hello "; -//~^ ERROR: mutable references are not allowed in the final value of constants +//~^ ERROR: mutable borrows of temporaries const fn trigger() -> [(); unsafe { let s = transmute::<(*const u8, usize), &ManuallyDrop>((S.as_ptr(), 3)); diff --git a/tests/ui/consts/const-mut-refs/issue-76510.stderr b/tests/ui/consts/const-mut-refs/issue-76510.stderr index aff86e83578d..3a6c95141e52 100644 --- a/tests/ui/consts/const-mut-refs/issue-76510.stderr +++ b/tests/ui/consts/const-mut-refs/issue-76510.stderr @@ -1,8 +1,12 @@ -error[E0764]: mutable references are not allowed in the final value of constants +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/issue-76510.rs:3:29 | LL | const S: &'static mut str = &mut " hello "; - | ^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error: aborting due to 1 previous error diff --git a/tests/ui/consts/const-mut-refs/mut_ref_in_final.rs b/tests/ui/consts/const-mut-refs/mut_ref_in_final.rs index 28facc188860..9f9384adeb71 100644 --- a/tests/ui/consts/const-mut-refs/mut_ref_in_final.rs +++ b/tests/ui/consts/const-mut-refs/mut_ref_in_final.rs @@ -12,13 +12,13 @@ const A: *const i32 = &4; // It could be made sound to allow it to compile, // but we do not want to allow this to compile, // as that would be an enormous footgun in oli-obk's opinion. -const B: *mut i32 = &mut 4; //~ ERROR mutable references are not allowed +const B: *mut i32 = &mut 4; //~ ERROR mutable borrows of temporaries // Ok, no actual mutable allocation exists const B2: Option<&mut i32> = None; // Not ok, can't prove that no mutable allocation ends up in final value -const B3: Option<&mut i32> = Some(&mut 42); //~ ERROR mutable references are not allowed +const B3: Option<&mut i32> = Some(&mut 42); //~ ERROR mutable borrows of temporaries const fn helper(x: &mut i32) -> Option<&mut i32> { Some(x) } const B4: Option<&mut i32> = helper(&mut 42); //~ ERROR temporary value dropped while borrowed @@ -26,8 +26,10 @@ const B4: Option<&mut i32> = helper(&mut 42); //~ ERROR temporary value dropped // Not ok, since it points to read-only memory. const IMMUT_MUT_REF: &mut u16 = unsafe { mem::transmute(&13) }; //~^ ERROR pointing to read-only memory +static IMMUT_MUT_REF_STATIC: &mut u16 = unsafe { mem::transmute(&13) }; +//~^ ERROR pointing to read-only memory -// Ok, because no references to mutable data exist here, since the `{}` moves +// Ok, because no borrows of mutable data exist here, since the `{}` moves // its value and then takes a reference to that. const C: *const i32 = &{ let mut x = 42; @@ -67,13 +69,13 @@ unsafe impl Sync for SyncPtr {} // (This relies on `SyncPtr` being a curly brace struct.) // However, we intern the inner memory as read-only, so this must be rejected. static RAW_MUT_CAST_S: SyncPtr = SyncPtr { x : &mut 42 as *mut _ as *const _ }; -//~^ ERROR mutable references are not allowed +//~^ ERROR mutable borrows of temporaries static RAW_MUT_COERCE_S: SyncPtr = SyncPtr { x: &mut 0 }; -//~^ ERROR mutable references are not allowed +//~^ ERROR mutable borrows of temporaries const RAW_MUT_CAST_C: SyncPtr = SyncPtr { x : &mut 42 as *mut _ as *const _ }; -//~^ ERROR mutable references are not allowed +//~^ ERROR mutable borrows of temporaries const RAW_MUT_COERCE_C: SyncPtr = SyncPtr { x: &mut 0 }; -//~^ ERROR mutable references are not allowed +//~^ ERROR mutable borrows of temporaries fn main() { println!("{}", unsafe { *A }); diff --git a/tests/ui/consts/const-mut-refs/mut_ref_in_final.stderr b/tests/ui/consts/const-mut-refs/mut_ref_in_final.stderr index 122e5c1bdf0a..16dee44d8006 100644 --- a/tests/ui/consts/const-mut-refs/mut_ref_in_final.stderr +++ b/tests/ui/consts/const-mut-refs/mut_ref_in_final.stderr @@ -1,14 +1,22 @@ -error[E0764]: mutable references are not allowed in the final value of constants +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/mut_ref_in_final.rs:15:21 | LL | const B: *mut i32 = &mut 4; - | ^^^^^^ + | ^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0764]: mutable references are not allowed in the final value of constants +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/mut_ref_in_final.rs:21:35 | LL | const B3: Option<&mut i32> = Some(&mut 42); - | ^^^^^^^ + | ^^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error[E0716]: temporary value dropped while borrowed --> $DIR/mut_ref_in_final.rs:24:42 @@ -31,8 +39,19 @@ LL | const IMMUT_MUT_REF: &mut u16 = unsafe { mem::transmute(&13) }; HEX_DUMP } +error[E0080]: constructing invalid value: encountered mutable reference or box pointing to read-only memory + --> $DIR/mut_ref_in_final.rs:29:1 + | +LL | static IMMUT_MUT_REF_STATIC: &mut u16 = unsafe { mem::transmute(&13) }; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ it is undefined behavior to use this value + | + = note: The rules on what exactly is undefined behavior aren't clear, so this check might be overzealous. Please open an issue on the rustc repository if you believe it should not be considered undefined behavior. + = note: the raw bytes of the constant (size: $SIZE, align: $ALIGN) { + HEX_DUMP + } + error[E0716]: temporary value dropped while borrowed - --> $DIR/mut_ref_in_final.rs:50:65 + --> $DIR/mut_ref_in_final.rs:52:65 | LL | const FOO: NotAMutex<&mut i32> = NotAMutex(UnsafeCell::new(&mut 42)); | -------------------------------^^-- @@ -42,7 +61,7 @@ LL | const FOO: NotAMutex<&mut i32> = NotAMutex(UnsafeCell::new(&mut 42)); | using this value as a constant requires that borrow lasts for `'static` error[E0716]: temporary value dropped while borrowed - --> $DIR/mut_ref_in_final.rs:53:67 + --> $DIR/mut_ref_in_final.rs:55:67 | LL | static FOO2: NotAMutex<&mut i32> = NotAMutex(UnsafeCell::new(&mut 42)); | -------------------------------^^-- @@ -52,7 +71,7 @@ LL | static FOO2: NotAMutex<&mut i32> = NotAMutex(UnsafeCell::new(&mut 42)); | using this value as a static requires that borrow lasts for `'static` error[E0716]: temporary value dropped while borrowed - --> $DIR/mut_ref_in_final.rs:56:71 + --> $DIR/mut_ref_in_final.rs:58:71 | LL | static mut FOO3: NotAMutex<&mut i32> = NotAMutex(UnsafeCell::new(&mut 42)); | -------------------------------^^-- @@ -61,31 +80,47 @@ LL | static mut FOO3: NotAMutex<&mut i32> = NotAMutex(UnsafeCell::new(&mut 42)); | | creates a temporary value which is freed while still in use | using this value as a static requires that borrow lasts for `'static` -error[E0764]: mutable references are not allowed in the final value of statics - --> $DIR/mut_ref_in_final.rs:69:53 +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed + --> $DIR/mut_ref_in_final.rs:71:53 | LL | static RAW_MUT_CAST_S: SyncPtr = SyncPtr { x : &mut 42 as *mut _ as *const _ }; - | ^^^^^^^ + | ^^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0764]: mutable references are not allowed in the final value of statics - --> $DIR/mut_ref_in_final.rs:71:54 +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed + --> $DIR/mut_ref_in_final.rs:73:54 | LL | static RAW_MUT_COERCE_S: SyncPtr = SyncPtr { x: &mut 0 }; - | ^^^^^^ + | ^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0764]: mutable references are not allowed in the final value of constants - --> $DIR/mut_ref_in_final.rs:73:52 +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed + --> $DIR/mut_ref_in_final.rs:75:52 | LL | const RAW_MUT_CAST_C: SyncPtr = SyncPtr { x : &mut 42 as *mut _ as *const _ }; - | ^^^^^^^ + | ^^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0764]: mutable references are not allowed in the final value of constants - --> $DIR/mut_ref_in_final.rs:75:53 +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed + --> $DIR/mut_ref_in_final.rs:77:53 | LL | const RAW_MUT_COERCE_C: SyncPtr = SyncPtr { x: &mut 0 }; - | ^^^^^^ + | ^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error: aborting due to 11 previous errors +error: aborting due to 12 previous errors Some errors have detailed explanations: E0080, E0716, E0764. For more information about an error, try `rustc --explain E0080`. diff --git a/tests/ui/consts/const-mut-refs/mut_ref_in_final_dynamic_check.rs b/tests/ui/consts/const-mut-refs/mut_ref_in_final_dynamic_check.rs index 2707e8a14ec3..1ae901f1653a 100644 --- a/tests/ui/consts/const-mut-refs/mut_ref_in_final_dynamic_check.rs +++ b/tests/ui/consts/const-mut-refs/mut_ref_in_final_dynamic_check.rs @@ -16,7 +16,7 @@ static mut BUFFER: i32 = 42; const fn helper() -> Option<&'static mut i32> { unsafe { Some(&mut *std::ptr::addr_of_mut!(BUFFER)) } } -const MUT: Option<&mut i32> = helper(); //~ ERROR encountered reference to mutable +const MUT: Option<&mut i32> = helper(); //~ ERROR encountered mutable reference const fn helper_int2ptr() -> Option<&'static mut i32> { unsafe { // Undefined behaviour (integer as pointer), who doesn't love tests like this. diff --git a/tests/ui/consts/const-mut-refs/mut_ref_in_final_dynamic_check.stderr b/tests/ui/consts/const-mut-refs/mut_ref_in_final_dynamic_check.stderr index 6456587b77a4..302e342bce61 100644 --- a/tests/ui/consts/const-mut-refs/mut_ref_in_final_dynamic_check.stderr +++ b/tests/ui/consts/const-mut-refs/mut_ref_in_final_dynamic_check.stderr @@ -1,4 +1,4 @@ -error[E0080]: constructing invalid value at ..0: encountered reference to mutable memory in `const` +error[E0080]: constructing invalid value at ..0: encountered mutable reference in `const` value --> $DIR/mut_ref_in_final_dynamic_check.rs:19:1 | LL | const MUT: Option<&mut i32> = helper(); diff --git a/tests/ui/consts/const-promoted-opaque.atomic.stderr b/tests/ui/consts/const-promoted-opaque.atomic.stderr index 9c0c969d586c..64cc7b3a3292 100644 --- a/tests/ui/consts/const-promoted-opaque.atomic.stderr +++ b/tests/ui/consts/const-promoted-opaque.atomic.stderr @@ -7,11 +7,15 @@ LL | LL | }; | - value is dropped here -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/const-promoted-opaque.rs:36:19 | LL | const BAZ: &Foo = &FOO; - | ^^^^ this borrow of an interior mutable value may end up in the final value + | ^^^^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error[E0716]: temporary value dropped while borrowed --> $DIR/const-promoted-opaque.rs:40:26 diff --git a/tests/ui/consts/const-promoted-opaque.rs b/tests/ui/consts/const-promoted-opaque.rs index 188dacd10034..270dddbb4a2f 100644 --- a/tests/ui/consts/const-promoted-opaque.rs +++ b/tests/ui/consts/const-promoted-opaque.rs @@ -34,7 +34,7 @@ const BAR: () = { }; const BAZ: &Foo = &FOO; -//[atomic]~^ ERROR: constants cannot refer to interior mutable data +//[atomic]~^ ERROR: interior mutable shared borrows of temporaries fn main() { let _: &'static _ = &FOO; diff --git a/tests/ui/consts/const-size_of-cycle.stderr b/tests/ui/consts/const-size_of-cycle.stderr index bf17d76a092b..b127f83d8853 100644 --- a/tests/ui/consts/const-size_of-cycle.stderr +++ b/tests/ui/consts/const-size_of-cycle.stderr @@ -11,13 +11,17 @@ LL | bytes: [u8; std::mem::size_of::()] | ^^^^^^^^^^^^^^^^^^^^^^^^^^ = note: ...which requires computing layout of `Foo`... = note: ...which requires computing layout of `[u8; std::mem::size_of::()]`... - = note: ...which requires normalizing `[u8; std::mem::size_of::()]`... +note: ...which requires normalizing `[u8; std::mem::size_of::()]`... + --> $DIR/const-size_of-cycle.rs:2:17 + | +LL | bytes: [u8; std::mem::size_of::()] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ = note: ...which again requires evaluating type-level constant, completing the cycle note: cycle used when checking that `Foo` is well-formed - --> $DIR/const-size_of-cycle.rs:1:1 + --> $DIR/const-size_of-cycle.rs:2:17 | -LL | struct Foo { - | ^^^^^^^^^^ +LL | bytes: [u8; std::mem::size_of::()] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ = note: see https://rustc-dev-guide.rust-lang.org/overview.html#queries and https://rustc-dev-guide.rust-lang.org/query.html for more information error: aborting due to 1 previous error diff --git a/tests/ui/consts/const_cmp_type_id.rs b/tests/ui/consts/const_cmp_type_id.rs index e89b8d377878..dca0615083a7 100644 --- a/tests/ui/consts/const_cmp_type_id.rs +++ b/tests/ui/consts/const_cmp_type_id.rs @@ -6,11 +6,10 @@ use std::any::TypeId; fn main() { const { assert!(TypeId::of::() == TypeId::of::()); - //~^ ERROR cannot call non-const operator in constants + //~^ ERROR the trait bound `TypeId: const PartialEq` is not satisfied assert!(TypeId::of::<()>() != TypeId::of::()); - //~^ ERROR cannot call non-const operator in constants + //~^ ERROR the trait bound `TypeId: const PartialEq` is not satisfied let _a = TypeId::of::() < TypeId::of::(); - //~^ ERROR cannot call non-const operator in constants // can't assert `_a` because it is not deterministic // FIXME(const_trait_impl) make it pass } diff --git a/tests/ui/consts/const_cmp_type_id.stderr b/tests/ui/consts/const_cmp_type_id.stderr index 62f8d42c0e68..a8242a200eff 100644 --- a/tests/ui/consts/const_cmp_type_id.stderr +++ b/tests/ui/consts/const_cmp_type_id.stderr @@ -1,33 +1,15 @@ -error[E0015]: cannot call non-const operator in constants +error[E0277]: the trait bound `TypeId: const PartialEq` is not satisfied --> $DIR/const_cmp_type_id.rs:8:17 | LL | assert!(TypeId::of::() == TypeId::of::()); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | -note: impl defined here, but it is not `const` - --> $SRC_DIR/core/src/any.rs:LL:COL - = note: calls in constants are limited to constant functions, tuple structs and tuple variants -error[E0015]: cannot call non-const operator in constants +error[E0277]: the trait bound `TypeId: const PartialEq` is not satisfied --> $DIR/const_cmp_type_id.rs:10:17 | LL | assert!(TypeId::of::<()>() != TypeId::of::()); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | -note: impl defined here, but it is not `const` - --> $SRC_DIR/core/src/any.rs:LL:COL - = note: calls in constants are limited to constant functions, tuple structs and tuple variants -error[E0015]: cannot call non-const operator in constants - --> $DIR/const_cmp_type_id.rs:12:18 - | -LL | let _a = TypeId::of::() < TypeId::of::(); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | -note: impl defined here, but it is not `const` - --> $SRC_DIR/core/src/any.rs:LL:COL - = note: calls in constants are limited to constant functions, tuple structs and tuple variants +error: aborting due to 2 previous errors -error: aborting due to 3 previous errors - -For more information about this error, try `rustc --explain E0015`. +For more information about this error, try `rustc --explain E0277`. diff --git a/tests/ui/consts/const_refs_to_static.rs b/tests/ui/consts/const_refs_to_static.rs index 3c59697e8eda..187fab86a894 100644 --- a/tests/ui/consts/const_refs_to_static.rs +++ b/tests/ui/consts/const_refs_to_static.rs @@ -1,4 +1,5 @@ //@ run-pass +use std::sync::atomic::AtomicU32; static S: i32 = 0; static mut S_MUT: i32 = 0; @@ -10,9 +11,13 @@ const C1_READ: () = { }; const C2: *const i32 = std::ptr::addr_of!(S_MUT); +static FOO: AtomicU32 = AtomicU32::new(0); +const NOT_VALID_AS_PATTERN: &'static AtomicU32 = &FOO; + fn main() { assert_eq!(*C1, 0); assert_eq!(unsafe { *C2 }, 0); // Computing this pattern will read from an immutable static. That's fine. assert!(matches!(&0, C1)); + let _val = NOT_VALID_AS_PATTERN; } diff --git a/tests/ui/consts/const_refs_to_static_fail.rs b/tests/ui/consts/const_refs_to_static_fail.rs index b8bab91e005d..5bb9ca0a65e3 100644 --- a/tests/ui/consts/const_refs_to_static_fail.rs +++ b/tests/ui/consts/const_refs_to_static_fail.rs @@ -9,13 +9,23 @@ use std::cell::SyncUnsafeCell; static S: SyncUnsafeCell = SyncUnsafeCell::new(0); static mut S_MUT: i32 = 0; -const C1: &SyncUnsafeCell = &S; //~ERROR encountered reference to mutable memory +const C1: &SyncUnsafeCell = &S; const C1_READ: () = unsafe { - assert!(*C1.get() == 0); + assert!(*C1.get() == 0); //~ERROR constant accesses mutable global memory }; const C2: *const i32 = unsafe { std::ptr::addr_of!(S_MUT) }; const C2_READ: () = unsafe { assert!(*C2 == 0); //~ERROR constant accesses mutable global memory }; -fn main() {} +const BAD_PATTERN: &i32 = { + static mut S: i32 = 0; + unsafe { &mut S } +}; + +fn main() { + match &0 { + BAD_PATTERN => {}, //~ ERROR cannot be used as pattern + _ => {}, + } +} diff --git a/tests/ui/consts/const_refs_to_static_fail.stderr b/tests/ui/consts/const_refs_to_static_fail.stderr index 86d6c11dc0c3..c567b3e0ce1f 100644 --- a/tests/ui/consts/const_refs_to_static_fail.stderr +++ b/tests/ui/consts/const_refs_to_static_fail.stderr @@ -1,19 +1,8 @@ -error[E0080]: constructing invalid value: encountered reference to mutable memory in `const` - --> $DIR/const_refs_to_static_fail.rs:12:1 - | -LL | const C1: &SyncUnsafeCell = &S; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ it is undefined behavior to use this value - | - = note: The rules on what exactly is undefined behavior aren't clear, so this check might be overzealous. Please open an issue on the rustc repository if you believe it should not be considered undefined behavior. - = note: the raw bytes of the constant (size: $SIZE, align: $ALIGN) { - HEX_DUMP - } - -note: erroneous constant encountered - --> $DIR/const_refs_to_static_fail.rs:14:14 +error[E0080]: constant accesses mutable global memory + --> $DIR/const_refs_to_static_fail.rs:14:13 | LL | assert!(*C1.get() == 0); - | ^^ + | ^^^^^^^^^ evaluation of `C1_READ` failed here error[E0080]: constant accesses mutable global memory --> $DIR/const_refs_to_static_fail.rs:18:13 @@ -21,6 +10,14 @@ error[E0080]: constant accesses mutable global memory LL | assert!(*C2 == 0); | ^^^ evaluation of `C2_READ` failed here -error: aborting due to 2 previous errors +error: constant BAD_PATTERN cannot be used as pattern + --> $DIR/const_refs_to_static_fail.rs:28:9 + | +LL | BAD_PATTERN => {}, + | ^^^^^^^^^^^ + | + = note: constants that reference mutable or external memory cannot be used as pattern + +error: aborting due to 3 previous errors For more information about this error, try `rustc --explain E0080`. diff --git a/tests/ui/consts/const_refs_to_static_fail_invalid.rs b/tests/ui/consts/const_refs_to_static_fail_invalid.rs index 34ed8540f3fd..229b9fdcc602 100644 --- a/tests/ui/consts/const_refs_to_static_fail_invalid.rs +++ b/tests/ui/consts/const_refs_to_static_fail_invalid.rs @@ -23,11 +23,10 @@ fn extern_() { } const C: &i8 = unsafe { &S }; - //~^ERROR: `extern` static // This must be rejected here (or earlier), since the pattern cannot be read. match &0 { - C => {} // ok, `const` already emitted an error + C => {} //~ ERROR cannot be used as pattern _ => {} } } @@ -36,12 +35,11 @@ fn mutable() { static mut S_MUT: i32 = 0; const C: &i32 = unsafe { &S_MUT }; - //~^ERROR: encountered reference to mutable memory // This *must not build*, the constant we are matching against // could change its value! match &42 { - C => {} // ok, `const` already emitted an error + C => {} //~ ERROR cannot be used as pattern _ => {} } } diff --git a/tests/ui/consts/const_refs_to_static_fail_invalid.stderr b/tests/ui/consts/const_refs_to_static_fail_invalid.stderr index 8a034aa00bc5..8be8b4bc50f5 100644 --- a/tests/ui/consts/const_refs_to_static_fail_invalid.stderr +++ b/tests/ui/consts/const_refs_to_static_fail_invalid.stderr @@ -9,27 +9,21 @@ LL | const C: &bool = unsafe { std::mem::transmute(&S) }; HEX_DUMP } -error[E0080]: constructing invalid value: encountered reference to `extern` static in `const` - --> $DIR/const_refs_to_static_fail_invalid.rs:25:5 +error: constant extern_::C cannot be used as pattern + --> $DIR/const_refs_to_static_fail_invalid.rs:29:9 | -LL | const C: &i8 = unsafe { &S }; - | ^^^^^^^^^^^^ it is undefined behavior to use this value +LL | C => {} + | ^ | - = note: The rules on what exactly is undefined behavior aren't clear, so this check might be overzealous. Please open an issue on the rustc repository if you believe it should not be considered undefined behavior. - = note: the raw bytes of the constant (size: $SIZE, align: $ALIGN) { - HEX_DUMP - } + = note: constants that reference mutable or external memory cannot be used as pattern -error[E0080]: constructing invalid value: encountered reference to mutable memory in `const` - --> $DIR/const_refs_to_static_fail_invalid.rs:38:5 +error: constant mutable::C cannot be used as pattern + --> $DIR/const_refs_to_static_fail_invalid.rs:42:9 | -LL | const C: &i32 = unsafe { &S_MUT }; - | ^^^^^^^^^^^^^ it is undefined behavior to use this value +LL | C => {} + | ^ | - = note: The rules on what exactly is undefined behavior aren't clear, so this check might be overzealous. Please open an issue on the rustc repository if you believe it should not be considered undefined behavior. - = note: the raw bytes of the constant (size: $SIZE, align: $ALIGN) { - HEX_DUMP - } + = note: constants that reference mutable or external memory cannot be used as pattern error: aborting due to 3 previous errors diff --git a/tests/ui/consts/constifconst-call-in-const-position.rs b/tests/ui/consts/constifconst-call-in-const-position.rs index 80e47c2230f2..da29030dbc74 100644 --- a/tests/ui/consts/constifconst-call-in-const-position.rs +++ b/tests/ui/consts/constifconst-call-in-const-position.rs @@ -14,7 +14,7 @@ impl Tr for () { } } -const fn foo() -> [u8; T::a()] { +const fn foo() -> [u8; T::a()] { [0; T::a()] } diff --git a/tests/ui/consts/constifconst-call-in-const-position.stderr b/tests/ui/consts/constifconst-call-in-const-position.stderr index c778299560fa..e84e686251a5 100644 --- a/tests/ui/consts/constifconst-call-in-const-position.stderr +++ b/tests/ui/consts/constifconst-call-in-const-position.stderr @@ -1,8 +1,8 @@ error[E0277]: the trait bound `T: const Tr` is not satisfied - --> $DIR/constifconst-call-in-const-position.rs:17:38 + --> $DIR/constifconst-call-in-const-position.rs:17:39 | -LL | const fn foo() -> [u8; T::a()] { - | ^ +LL | const fn foo() -> [u8; T::a()] { + | ^ error[E0277]: the trait bound `T: const Tr` is not satisfied --> $DIR/constifconst-call-in-const-position.rs:18:9 diff --git a/tests/ui/consts/fn_trait_refs.rs b/tests/ui/consts/fn_trait_refs.rs index af233efd738a..e475c0a1b6fd 100644 --- a/tests/ui/consts/fn_trait_refs.rs +++ b/tests/ui/consts/fn_trait_refs.rs @@ -11,47 +11,47 @@ use std::marker::Destruct; const fn tester_fn(f: T) -> T::Output where - T: ~const Fn<()> + ~const Destruct, + T: [const] Fn<()> + [const] Destruct, { f() } const fn tester_fn_mut(mut f: T) -> T::Output where - T: ~const FnMut<()> + ~const Destruct, + T: [const] FnMut<()> + [const] Destruct, { f() } const fn tester_fn_once(f: T) -> T::Output where - T: ~const FnOnce<()>, + T: [const] FnOnce<()>, { f() } const fn test_fn(mut f: T) -> (T::Output, T::Output, T::Output) where - T: ~const Fn<()> + ~const Destruct, + T: [const] Fn<()> + [const] Destruct, { ( - // impl const Fn for &F + // impl const Fn for &F tester_fn(&f), - // impl const FnMut for &F + // impl const FnMut for &F tester_fn_mut(&f), - // impl const FnOnce for &F + // impl const FnOnce for &F tester_fn_once(&f), ) } const fn test_fn_mut(mut f: T) -> (T::Output, T::Output) where - T: ~const FnMut<()> + ~const Destruct, + T: [const] FnMut<()> + [const] Destruct, { ( - // impl const FnMut for &mut F + // impl const FnMut for &mut F tester_fn_mut(&mut f), - // impl const FnOnce for &mut F + // impl const FnOnce for &mut F tester_fn_once(&mut f), ) } diff --git a/tests/ui/consts/fn_trait_refs.stderr b/tests/ui/consts/fn_trait_refs.stderr index d688bfbde2bc..ee716c932e83 100644 --- a/tests/ui/consts/fn_trait_refs.stderr +++ b/tests/ui/consts/fn_trait_refs.stderr @@ -4,172 +4,162 @@ error[E0635]: unknown feature `const_fn_trait_ref_impls` LL | #![feature(const_fn_trait_ref_impls)] | ^^^^^^^^^^^^^^^^^^^^^^^^ -error[E0635]: unknown feature `const_cmp` - --> $DIR/fn_trait_refs.rs:7:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:14:6 | -LL | #![feature(const_cmp)] - | ^^^^^^^^^ - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:14:8 +LL | T: [const] Fn<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `Fn` | -LL | T: ~const Fn<()> + ~const Destruct, - | ^^^^^^ can't be applied to `Fn` - | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:14:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:14:6 | -LL | T: ~const Fn<()> + ~const Destruct, - | ^^^^^^ can't be applied to `Fn` +LL | T: [const] Fn<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:14:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:14:6 | -LL | T: ~const Fn<()> + ~const Destruct, - | ^^^^^^ can't be applied to `Fn` +LL | T: [const] Fn<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:21:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:21:6 | -LL | T: ~const FnMut<()> + ~const Destruct, - | ^^^^^^ can't be applied to `FnMut` +LL | T: [const] FnMut<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `FnMut` | -note: `FnMut` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnMut` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:21:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:21:6 | -LL | T: ~const FnMut<()> + ~const Destruct, - | ^^^^^^ can't be applied to `FnMut` +LL | T: [const] FnMut<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `FnMut` | -note: `FnMut` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnMut` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:21:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:21:6 | -LL | T: ~const FnMut<()> + ~const Destruct, - | ^^^^^^ can't be applied to `FnMut` +LL | T: [const] FnMut<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `FnMut` | -note: `FnMut` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnMut` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:28:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:28:6 | -LL | T: ~const FnOnce<()>, - | ^^^^^^ can't be applied to `FnOnce` +LL | T: [const] FnOnce<()>, + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:28:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:28:6 | -LL | T: ~const FnOnce<()>, - | ^^^^^^ can't be applied to `FnOnce` +LL | T: [const] FnOnce<()>, + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:28:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:28:6 | -LL | T: ~const FnOnce<()>, - | ^^^^^^ can't be applied to `FnOnce` +LL | T: [const] FnOnce<()>, + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:35:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:35:6 | -LL | T: ~const Fn<()> + ~const Destruct, - | ^^^^^^ can't be applied to `Fn` +LL | T: [const] Fn<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:35:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:35:6 | -LL | T: ~const Fn<()> + ~const Destruct, - | ^^^^^^ can't be applied to `Fn` +LL | T: [const] Fn<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:35:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:35:6 | -LL | T: ~const Fn<()> + ~const Destruct, - | ^^^^^^ can't be applied to `Fn` +LL | T: [const] Fn<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:49:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:49:6 | -LL | T: ~const FnMut<()> + ~const Destruct, - | ^^^^^^ can't be applied to `FnMut` +LL | T: [const] FnMut<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `FnMut` | -note: `FnMut` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnMut` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:49:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:49:6 | -LL | T: ~const FnMut<()> + ~const Destruct, - | ^^^^^^ can't be applied to `FnMut` +LL | T: [const] FnMut<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `FnMut` | -note: `FnMut` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnMut` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/fn_trait_refs.rs:49:8 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/fn_trait_refs.rs:49:6 | -LL | T: ~const FnMut<()> + ~const Destruct, - | ^^^^^^ can't be applied to `FnMut` +LL | T: [const] FnMut<()> + [const] Destruct, + | ^^^^^^^^^ can't be applied to `FnMut` | -note: `FnMut` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnMut` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error[E0015]: cannot call non-const operator in constants +error[E0277]: the trait bound `(i32, i32, i32): const PartialEq` is not satisfied --> $DIR/fn_trait_refs.rs:71:17 | LL | assert!(test_one == (1, 1, 1)); | ^^^^^^^^^^^^^^^^^^^^^ - | - = note: calls in constants are limited to constant functions, tuple structs and tuple variants -error[E0015]: cannot call non-const operator in constants +error[E0277]: the trait bound `(i32, i32): const PartialEq` is not satisfied --> $DIR/fn_trait_refs.rs:74:17 | LL | assert!(test_two == (2, 2)); | ^^^^^^^^^^^^^^^^^^ - | - = note: calls in constants are limited to constant functions, tuple structs and tuple variants error[E0015]: cannot call non-const closure in constant functions --> $DIR/fn_trait_refs.rs:16:5 @@ -195,7 +185,7 @@ LL | f() | = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants -error: aborting due to 22 previous errors +error: aborting due to 21 previous errors -Some errors have detailed explanations: E0015, E0635. +Some errors have detailed explanations: E0015, E0277, E0635. For more information about an error, try `rustc --explain E0015`. diff --git a/tests/ui/consts/issue-17718-const-bad-values.rs b/tests/ui/consts/issue-17718-const-bad-values.rs index 40fc02cf6446..a447350e35bf 100644 --- a/tests/ui/consts/issue-17718-const-bad-values.rs +++ b/tests/ui/consts/issue-17718-const-bad-values.rs @@ -5,10 +5,10 @@ #![allow(static_mut_refs)] const C1: &'static mut [usize] = &mut []; -//~^ ERROR: mutable references are not allowed +//~^ ERROR: mutable borrows of temporaries static mut S: i32 = 3; const C2: &'static mut i32 = unsafe { &mut S }; -//~^ ERROR: reference to mutable memory +//~^ ERROR: encountered mutable reference fn main() {} diff --git a/tests/ui/consts/issue-17718-const-bad-values.stderr b/tests/ui/consts/issue-17718-const-bad-values.stderr index effb614b15bc..68d1a72b71e6 100644 --- a/tests/ui/consts/issue-17718-const-bad-values.stderr +++ b/tests/ui/consts/issue-17718-const-bad-values.stderr @@ -1,10 +1,14 @@ -error[E0764]: mutable references are not allowed in the final value of constants +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/issue-17718-const-bad-values.rs:7:34 | LL | const C1: &'static mut [usize] = &mut []; - | ^^^^^^^ + | ^^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0080]: constructing invalid value: encountered reference to mutable memory in `const` +error[E0080]: constructing invalid value: encountered mutable reference in `const` value --> $DIR/issue-17718-const-bad-values.rs:11:1 | LL | const C2: &'static mut i32 = unsafe { &mut S }; diff --git a/tests/ui/consts/issue-17718-const-borrow.rs b/tests/ui/consts/issue-17718-const-borrow.rs index 89316dbd5c41..637333325914 100644 --- a/tests/ui/consts/issue-17718-const-borrow.rs +++ b/tests/ui/consts/issue-17718-const-borrow.rs @@ -2,13 +2,13 @@ use std::cell::UnsafeCell; const A: UnsafeCell = UnsafeCell::new(1); const B: &'static UnsafeCell = &A; -//~^ ERROR: cannot refer to interior mutable +//~^ ERROR: interior mutable shared borrows of temporaries struct C { a: UnsafeCell } const D: C = C { a: UnsafeCell::new(1) }; const E: &'static UnsafeCell = &D.a; -//~^ ERROR: cannot refer to interior mutable +//~^ ERROR: interior mutable shared borrows of temporaries const F: &'static C = &D; -//~^ ERROR: cannot refer to interior mutable +//~^ ERROR: interior mutable shared borrows of temporaries fn main() {} diff --git a/tests/ui/consts/issue-17718-const-borrow.stderr b/tests/ui/consts/issue-17718-const-borrow.stderr index e3ff6c923ad7..420a2c378a25 100644 --- a/tests/ui/consts/issue-17718-const-borrow.stderr +++ b/tests/ui/consts/issue-17718-const-borrow.stderr @@ -1,20 +1,32 @@ -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/issue-17718-const-borrow.rs:4:39 | LL | const B: &'static UnsafeCell = &A; - | ^^ this borrow of an interior mutable value may end up in the final value + | ^^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/issue-17718-const-borrow.rs:9:39 | LL | const E: &'static UnsafeCell = &D.a; - | ^^^^ this borrow of an interior mutable value may end up in the final value + | ^^^^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/issue-17718-const-borrow.rs:11:23 | LL | const F: &'static C = &D; - | ^^ this borrow of an interior mutable value may end up in the final value + | ^^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error: aborting due to 3 previous errors diff --git a/tests/ui/consts/issue-44415.stderr b/tests/ui/consts/issue-44415.stderr index 641945fce9fd..0e3f2e6199f7 100644 --- a/tests/ui/consts/issue-44415.stderr +++ b/tests/ui/consts/issue-44415.stderr @@ -11,13 +11,17 @@ LL | bytes: [u8; unsafe { intrinsics::size_of::() }], | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ = note: ...which requires computing layout of `Foo`... = note: ...which requires computing layout of `[u8; unsafe { intrinsics::size_of::() }]`... - = note: ...which requires normalizing `[u8; unsafe { intrinsics::size_of::() }]`... +note: ...which requires normalizing `[u8; unsafe { intrinsics::size_of::() }]`... + --> $DIR/issue-44415.rs:6:17 + | +LL | bytes: [u8; unsafe { intrinsics::size_of::() }], + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ = note: ...which again requires evaluating type-level constant, completing the cycle note: cycle used when checking that `Foo` is well-formed - --> $DIR/issue-44415.rs:5:1 + --> $DIR/issue-44415.rs:6:17 | -LL | struct Foo { - | ^^^^^^^^^^ +LL | bytes: [u8; unsafe { intrinsics::size_of::() }], + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ = note: see https://rustc-dev-guide.rust-lang.org/overview.html#queries and https://rustc-dev-guide.rust-lang.org/query.html for more information error: aborting due to 1 previous error diff --git a/tests/ui/consts/issue-73976-monomorphic.stderr b/tests/ui/consts/issue-73976-monomorphic.stderr index ef754b23ff06..367d5be09da4 100644 --- a/tests/ui/consts/issue-73976-monomorphic.stderr +++ b/tests/ui/consts/issue-73976-monomorphic.stderr @@ -1,13 +1,9 @@ -error[E0015]: cannot call non-const operator in constant functions +error[E0277]: the trait bound `TypeId: [const] PartialEq` is not satisfied --> $DIR/issue-73976-monomorphic.rs:21:5 | LL | GetTypeId::::VALUE == GetTypeId::::VALUE | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | -note: impl defined here, but it is not `const` - --> $SRC_DIR/core/src/any.rs:LL:COL - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants error: aborting due to 1 previous error -For more information about this error, try `rustc --explain E0015`. +For more information about this error, try `rustc --explain E0277`. diff --git a/tests/ui/consts/issue-90870.rs b/tests/ui/consts/issue-90870.rs index b62769a33f8e..f807ae75ee5e 100644 --- a/tests/ui/consts/issue-90870.rs +++ b/tests/ui/consts/issue-90870.rs @@ -3,22 +3,31 @@ #![allow(dead_code)] const fn f(a: &u8, b: &u8) -> bool { + //~^ HELP: add `#![feature(const_trait_impl)]` to the crate attributes to enable + //~| HELP: add `#![feature(const_trait_impl)]` to the crate attributes to enable + //~| HELP: add `#![feature(const_trait_impl)]` to the crate attributes to enable a == b - //~^ ERROR: cannot call non-const operator in constant functions [E0015] + //~^ ERROR: cannot call conditionally-const operator in constant functions + //~| ERROR: `PartialEq` is not yet stable as a const trait //~| HELP: consider dereferencing here + //~| HELP: add `#![feature(const_trait_impl)]` to the crate attributes to enable } const fn g(a: &&&&i64, b: &&&&i64) -> bool { a == b - //~^ ERROR: cannot call non-const operator in constant functions [E0015] + //~^ ERROR: cannot call conditionally-const operator in constant functions + //~| ERROR: `PartialEq` is not yet stable as a const trait //~| HELP: consider dereferencing here + //~| HELP: add `#![feature(const_trait_impl)]` to the crate attributes to enable } const fn h(mut a: &[u8], mut b: &[u8]) -> bool { while let ([l, at @ ..], [r, bt @ ..]) = (a, b) { if l == r { - //~^ ERROR: cannot call non-const operator in constant functions [E0015] + //~^ ERROR: cannot call conditionally-const operator in constant functions + //~| ERROR: `PartialEq` is not yet stable as a const trait //~| HELP: consider dereferencing here + //~| HELP: add `#![feature(const_trait_impl)]` to the crate attributes to enable a = at; b = bt; } else { diff --git a/tests/ui/consts/issue-90870.stderr b/tests/ui/consts/issue-90870.stderr index ea987920d7d3..8d6f21fd82fb 100644 --- a/tests/ui/consts/issue-90870.stderr +++ b/tests/ui/consts/issue-90870.stderr @@ -1,39 +1,81 @@ -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/issue-90870.rs:6:5 +error[E0658]: cannot call conditionally-const operator in constant functions + --> $DIR/issue-90870.rs:9:5 | LL | a == b | ^^^^^^ | = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants + = note: see issue #67792 for more information + = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date help: consider dereferencing here | LL | *a == *b | + + -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/issue-90870.rs:12:5 +error: `PartialEq` is not yet stable as a const trait + --> $DIR/issue-90870.rs:9:5 + | +LL | a == b + | ^^^^^^ + | +help: add `#![feature(const_trait_impl)]` to the crate attributes to enable + | +LL + #![feature(const_trait_impl)] + | + +error[E0658]: cannot call conditionally-const operator in constant functions + --> $DIR/issue-90870.rs:17:5 | LL | a == b | ^^^^^^ | = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants + = note: see issue #67792 for more information + = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date help: consider dereferencing here | LL | ****a == ****b | ++++ ++++ -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/issue-90870.rs:19:12 +error: `PartialEq` is not yet stable as a const trait + --> $DIR/issue-90870.rs:17:5 + | +LL | a == b + | ^^^^^^ + | +help: add `#![feature(const_trait_impl)]` to the crate attributes to enable + | +LL + #![feature(const_trait_impl)] + | + +error[E0658]: cannot call conditionally-const operator in constant functions + --> $DIR/issue-90870.rs:26:12 | LL | if l == r { | ^^^^^^ | = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants + = note: see issue #67792 for more information + = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date help: consider dereferencing here | LL | if *l == *r { | + + -error: aborting due to 3 previous errors +error: `PartialEq` is not yet stable as a const trait + --> $DIR/issue-90870.rs:26:12 + | +LL | if l == r { + | ^^^^^^ + | +help: add `#![feature(const_trait_impl)]` to the crate attributes to enable + | +LL + #![feature(const_trait_impl)] + | -For more information about this error, try `rustc --explain E0015`. +error: aborting due to 6 previous errors + +For more information about this error, try `rustc --explain E0658`. diff --git a/tests/ui/consts/miri_unleashed/const_refers_to_static.rs b/tests/ui/consts/miri_unleashed/const_refers_to_static.rs index 6cc670943463..eb78b5335cba 100644 --- a/tests/ui/consts/miri_unleashed/const_refers_to_static.rs +++ b/tests/ui/consts/miri_unleashed/const_refers_to_static.rs @@ -20,7 +20,7 @@ static mut MUTABLE: u32 = 0; const READ_MUT: u32 = unsafe { MUTABLE }; //~ERROR constant accesses mutable global memory // Evaluating this does not read anything mutable, but validation does, so this should error. -const REF_INTERIOR_MUT: &usize = { //~ ERROR encountered reference to mutable memory +const REF_INTERIOR_MUT: &usize = { static FOO: AtomicUsize = AtomicUsize::new(0); unsafe { &*(&FOO as *const _ as *const usize) } }; @@ -30,6 +30,13 @@ static MY_STATIC: u8 = 4; const REF_IMMUT: &u8 = &MY_STATIC; const READ_IMMUT: u8 = *REF_IMMUT; +fn foo() { + match &0 { + REF_INTERIOR_MUT => {}, //~ ERROR cannot be used as pattern + _ => {}, + } +} + fn main() {} //~? WARN skipping const checks diff --git a/tests/ui/consts/miri_unleashed/const_refers_to_static.stderr b/tests/ui/consts/miri_unleashed/const_refers_to_static.stderr index eed3b4d90659..6b70a211a72c 100644 --- a/tests/ui/consts/miri_unleashed/const_refers_to_static.stderr +++ b/tests/ui/consts/miri_unleashed/const_refers_to_static.stderr @@ -16,16 +16,13 @@ error[E0080]: constant accesses mutable global memory LL | const READ_MUT: u32 = unsafe { MUTABLE }; | ^^^^^^^ evaluation of `READ_MUT` failed here -error[E0080]: constructing invalid value: encountered reference to mutable memory in `const` - --> $DIR/const_refers_to_static.rs:23:1 +error: constant REF_INTERIOR_MUT cannot be used as pattern + --> $DIR/const_refers_to_static.rs:35:9 | -LL | const REF_INTERIOR_MUT: &usize = { - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ it is undefined behavior to use this value +LL | REF_INTERIOR_MUT => {}, + | ^^^^^^^^^^^^^^^^ | - = note: The rules on what exactly is undefined behavior aren't clear, so this check might be overzealous. Please open an issue on the rustc repository if you believe it should not be considered undefined behavior. - = note: the raw bytes of the constant (size: $SIZE, align: $ALIGN) { - HEX_DUMP - } + = note: constants that reference mutable or external memory cannot be used as pattern warning: skipping const checks | diff --git a/tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.rs b/tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.rs index 6c7e78356616..cb093305429d 100644 --- a/tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.rs +++ b/tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.rs @@ -11,18 +11,15 @@ extern crate static_cross_crate; // Sneaky: reference to a mutable static. // Allowing this would be a disaster for pattern matching, we could violate exhaustiveness checking! const SLICE_MUT: &[u8; 1] = { - //~^ ERROR encountered reference to mutable memory unsafe { &static_cross_crate::ZERO } }; const U8_MUT: &u8 = { - //~^ ERROR encountered reference to mutable memory unsafe { &static_cross_crate::ZERO[0] } }; // Also test indirection that reads from other static. const U8_MUT2: &u8 = { - //~^ ERROR encountered reference to mutable memory unsafe { &(*static_cross_crate::ZERO_REF)[0] } }; const U8_MUT3: &u8 = { @@ -37,14 +34,14 @@ const U8_MUT3: &u8 = { pub fn test(x: &[u8; 1]) -> bool { match x { - SLICE_MUT => true, // ok, `const` error already emitted + SLICE_MUT => true, //~ ERROR cannot be used as pattern &[1..] => false, } } pub fn test2(x: &u8) -> bool { match x { - U8_MUT => true, // ok, `const` error already emitted + U8_MUT => true, //~ ERROR cannot be used as pattern &(1..) => false, } } @@ -53,7 +50,7 @@ pub fn test2(x: &u8) -> bool { // the errors above otherwise stop compilation too early? pub fn test3(x: &u8) -> bool { match x { - U8_MUT2 => true, // ok, `const` error already emitted + U8_MUT2 => true, //~ ERROR cannot be used as pattern &(1..) => false, } } diff --git a/tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.stderr b/tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.stderr index 8af3a1948f0f..d753506cc94e 100644 --- a/tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.stderr +++ b/tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.stderr @@ -1,42 +1,33 @@ -error[E0080]: constructing invalid value: encountered reference to mutable memory in `const` - --> $DIR/const_refers_to_static_cross_crate.rs:13:1 - | -LL | const SLICE_MUT: &[u8; 1] = { - | ^^^^^^^^^^^^^^^^^^^^^^^^^ it is undefined behavior to use this value - | - = note: The rules on what exactly is undefined behavior aren't clear, so this check might be overzealous. Please open an issue on the rustc repository if you believe it should not be considered undefined behavior. - = note: the raw bytes of the constant (size: $SIZE, align: $ALIGN) { - HEX_DUMP - } - -error[E0080]: constructing invalid value: encountered reference to mutable memory in `const` - --> $DIR/const_refers_to_static_cross_crate.rs:18:1 - | -LL | const U8_MUT: &u8 = { - | ^^^^^^^^^^^^^^^^^ it is undefined behavior to use this value - | - = note: The rules on what exactly is undefined behavior aren't clear, so this check might be overzealous. Please open an issue on the rustc repository if you believe it should not be considered undefined behavior. - = note: the raw bytes of the constant (size: $SIZE, align: $ALIGN) { - HEX_DUMP - } - -error[E0080]: constructing invalid value: encountered reference to mutable memory in `const` - --> $DIR/const_refers_to_static_cross_crate.rs:24:1 - | -LL | const U8_MUT2: &u8 = { - | ^^^^^^^^^^^^^^^^^^ it is undefined behavior to use this value - | - = note: The rules on what exactly is undefined behavior aren't clear, so this check might be overzealous. Please open an issue on the rustc repository if you believe it should not be considered undefined behavior. - = note: the raw bytes of the constant (size: $SIZE, align: $ALIGN) { - HEX_DUMP - } - error[E0080]: constant accesses mutable global memory - --> $DIR/const_refers_to_static_cross_crate.rs:30:15 + --> $DIR/const_refers_to_static_cross_crate.rs:27:15 | LL | match static_cross_crate::OPT_ZERO { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ evaluation of `U8_MUT3` failed here +error: constant SLICE_MUT cannot be used as pattern + --> $DIR/const_refers_to_static_cross_crate.rs:37:9 + | +LL | SLICE_MUT => true, + | ^^^^^^^^^ + | + = note: constants that reference mutable or external memory cannot be used as pattern + +error: constant U8_MUT cannot be used as pattern + --> $DIR/const_refers_to_static_cross_crate.rs:44:9 + | +LL | U8_MUT => true, + | ^^^^^^ + | + = note: constants that reference mutable or external memory cannot be used as pattern + +error: constant U8_MUT2 cannot be used as pattern + --> $DIR/const_refers_to_static_cross_crate.rs:53:9 + | +LL | U8_MUT2 => true, + | ^^^^^^^ + | + = note: constants that reference mutable or external memory cannot be used as pattern + error: aborting due to 4 previous errors For more information about this error, try `rustc --explain E0080`. diff --git a/tests/ui/consts/miri_unleashed/mutable_references.rs b/tests/ui/consts/miri_unleashed/mutable_references.rs index 63d243f892cd..2e95393ccbf5 100644 --- a/tests/ui/consts/miri_unleashed/mutable_references.rs +++ b/tests/ui/consts/miri_unleashed/mutable_references.rs @@ -26,7 +26,7 @@ const BLUNT: &mut i32 = &mut 42; //~^ ERROR: pointing to read-only memory const SUBTLE: &mut i32 = unsafe { - //~^ ERROR: constructing invalid value: encountered reference to mutable memory in `const` + //~^ ERROR: encountered mutable reference static mut STATIC: i32 = 0; &mut STATIC }; @@ -65,7 +65,10 @@ static mut MUT_TO_READONLY: &mut i32 = unsafe { &mut *(&READONLY as *const _ as // # Check for consts pointing to mutable memory static mut MUTABLE: i32 = 42; -const POINTS_TO_MUTABLE: &i32 = unsafe { &MUTABLE }; //~ ERROR encountered reference to mutable memory +const POINTS_TO_MUTABLE: &i32 = unsafe { &MUTABLE }; // OK, as long as it is not used as a pattern. + +// This fails since `&*MUTABLE_REF` is basically a copy of `MUTABLE_REF`, but we +// can't read from that static as it is mutable. static mut MUTABLE_REF: &mut i32 = &mut 42; const POINTS_TO_MUTABLE2: &i32 = unsafe { &*MUTABLE_REF }; //~^ ERROR accesses mutable global memory diff --git a/tests/ui/consts/miri_unleashed/mutable_references.stderr b/tests/ui/consts/miri_unleashed/mutable_references.stderr index 22860e4f6d9b..137efde44b31 100644 --- a/tests/ui/consts/miri_unleashed/mutable_references.stderr +++ b/tests/ui/consts/miri_unleashed/mutable_references.stderr @@ -43,7 +43,7 @@ LL | const BLUNT: &mut i32 = &mut 42; HEX_DUMP } -error[E0080]: constructing invalid value: encountered reference to mutable memory in `const` +error[E0080]: constructing invalid value: encountered mutable reference in `const` value --> $DIR/mutable_references.rs:28:1 | LL | const SUBTLE: &mut i32 = unsafe { @@ -98,49 +98,38 @@ LL | static mut MUT_TO_READONLY: &mut i32 = unsafe { &mut *(&READONLY as *const HEX_DUMP } -error[E0080]: constructing invalid value: encountered reference to mutable memory in `const` - --> $DIR/mutable_references.rs:68:1 - | -LL | const POINTS_TO_MUTABLE: &i32 = unsafe { &MUTABLE }; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ it is undefined behavior to use this value - | - = note: The rules on what exactly is undefined behavior aren't clear, so this check might be overzealous. Please open an issue on the rustc repository if you believe it should not be considered undefined behavior. - = note: the raw bytes of the constant (size: $SIZE, align: $ALIGN) { - HEX_DUMP - } - error[E0080]: constant accesses mutable global memory - --> $DIR/mutable_references.rs:70:43 + --> $DIR/mutable_references.rs:73:43 | LL | const POINTS_TO_MUTABLE2: &i32 = unsafe { &*MUTABLE_REF }; | ^^^^^^^^^^^^^ evaluation of `POINTS_TO_MUTABLE2` failed here error: encountered mutable pointer in final value of constant - --> $DIR/mutable_references.rs:73:1 + --> $DIR/mutable_references.rs:76:1 | LL | const POINTS_TO_MUTABLE_INNER: *const i32 = &mut 42 as *mut _ as *const _; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: encountered mutable pointer in final value of constant - --> $DIR/mutable_references.rs:76:1 + --> $DIR/mutable_references.rs:79:1 | LL | const POINTS_TO_MUTABLE_INNER2: *const i32 = &mut 42 as *const _; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: encountered mutable pointer in final value of constant - --> $DIR/mutable_references.rs:96:1 + --> $DIR/mutable_references.rs:99:1 | LL | const RAW_MUT_CAST: SyncPtr = SyncPtr { x: &mut 42 as *mut _ as *const _ }; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: encountered mutable pointer in final value of constant - --> $DIR/mutable_references.rs:99:1 + --> $DIR/mutable_references.rs:102:1 | LL | const RAW_MUT_COERCE: SyncPtr = SyncPtr { x: &mut 0 }; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error[E0594]: cannot assign to `*OH_YES`, as `OH_YES` is an immutable static item - --> $DIR/mutable_references.rs:106:5 + --> $DIR/mutable_references.rs:109:5 | LL | *OH_YES = 99; | ^^^^^^^^^^^^ cannot assign @@ -188,37 +177,37 @@ help: skipping check that does not even have a feature gate LL | const SNEAKY: &dyn Sync = &Synced { x: UnsafeCell::new(42) }; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: skipping check that does not even have a feature gate - --> $DIR/mutable_references.rs:73:45 + --> $DIR/mutable_references.rs:76:45 | LL | const POINTS_TO_MUTABLE_INNER: *const i32 = &mut 42 as *mut _ as *const _; | ^^^^^^^ help: skipping check that does not even have a feature gate - --> $DIR/mutable_references.rs:76:46 + --> $DIR/mutable_references.rs:79:46 | LL | const POINTS_TO_MUTABLE_INNER2: *const i32 = &mut 42 as *const _; | ^^^^^^^ help: skipping check that does not even have a feature gate - --> $DIR/mutable_references.rs:81:47 + --> $DIR/mutable_references.rs:84:47 | LL | const INTERIOR_MUTABLE_BEHIND_RAW: *mut i32 = &UnsafeCell::new(42) as *const _ as *mut _; | ^^^^^^^^^^^^^^^^^^^^ help: skipping check that does not even have a feature gate - --> $DIR/mutable_references.rs:93:51 + --> $DIR/mutable_references.rs:96:51 | LL | const RAW_SYNC: SyncPtr = SyncPtr { x: &AtomicI32::new(42) }; | ^^^^^^^^^^^^^^^^^^^ help: skipping check that does not even have a feature gate - --> $DIR/mutable_references.rs:96:49 + --> $DIR/mutable_references.rs:99:49 | LL | const RAW_MUT_CAST: SyncPtr = SyncPtr { x: &mut 42 as *mut _ as *const _ }; | ^^^^^^^ help: skipping check that does not even have a feature gate - --> $DIR/mutable_references.rs:99:51 + --> $DIR/mutable_references.rs:102:51 | LL | const RAW_MUT_COERCE: SyncPtr = SyncPtr { x: &mut 0 }; | ^^^^^^ -error: aborting due to 17 previous errors; 1 warning emitted +error: aborting due to 16 previous errors; 1 warning emitted Some errors have detailed explanations: E0080, E0594. For more information about an error, try `rustc --explain E0080`. diff --git a/tests/ui/consts/normalize-before-const-arg-has-type-goal.rs b/tests/ui/consts/normalize-before-const-arg-has-type-goal.rs new file mode 100644 index 000000000000..9caa3c9e2145 --- /dev/null +++ b/tests/ui/consts/normalize-before-const-arg-has-type-goal.rs @@ -0,0 +1,19 @@ +trait A {} + +// vv- Let's call this const "UNEVALUATED" for the comment below. +impl A<{}> for () {} +//~^ ERROR mismatched types + +// During overlap check, we end up trying to prove `(): A`. Inference guides +// `?0c = UNEVALUATED` (which is the `{}` const in the erroneous impl). We then +// fail to prove `ConstArgHasType` since `UNEVALUATED` has the +// type `bool` from the type_of query. We then deeply normalize the predicate for +// error reporting, which ends up normalizing `UNEVALUATED` to a ConstKind::Error. +// This ended up ICEing when trying to report an error for the `ConstArgHasType` +// predicate, since we don't expect `ConstArgHasType(ERROR, Ty)` to ever fail. + +trait C {} +impl C for () where (): A {} +impl C for () {} + +fn main() {} diff --git a/tests/ui/consts/normalize-before-const-arg-has-type-goal.stderr b/tests/ui/consts/normalize-before-const-arg-has-type-goal.stderr new file mode 100644 index 000000000000..a53231846b73 --- /dev/null +++ b/tests/ui/consts/normalize-before-const-arg-has-type-goal.stderr @@ -0,0 +1,9 @@ +error[E0308]: mismatched types + --> $DIR/normalize-before-const-arg-has-type-goal.rs:4:8 + | +LL | impl A<{}> for () {} + | ^^ expected `bool`, found `()` + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0308`. diff --git a/tests/ui/consts/partial_qualif.rs b/tests/ui/consts/partial_qualif.rs index 7c28b8b8a628..18438cc576b1 100644 --- a/tests/ui/consts/partial_qualif.rs +++ b/tests/ui/consts/partial_qualif.rs @@ -3,7 +3,7 @@ use std::cell::Cell; const FOO: &(Cell, bool) = { let mut a = (Cell::new(0), false); a.1 = true; // sets `qualif(a)` to `qualif(a) | qualif(true)` - &{a} //~ ERROR cannot refer to interior mutable + &{a} //~ ERROR interior mutable shared borrows of temporaries }; fn main() {} diff --git a/tests/ui/consts/partial_qualif.stderr b/tests/ui/consts/partial_qualif.stderr index 05e0eeee1332..b7632eb868ac 100644 --- a/tests/ui/consts/partial_qualif.stderr +++ b/tests/ui/consts/partial_qualif.stderr @@ -1,8 +1,12 @@ -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/partial_qualif.rs:6:5 | LL | &{a} - | ^^^^ this borrow of an interior mutable value may end up in the final value + | ^^^^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error: aborting due to 1 previous error diff --git a/tests/ui/consts/qualif_overwrite.rs b/tests/ui/consts/qualif_overwrite.rs index aae4e41ffd7e..93310b3f2a6a 100644 --- a/tests/ui/consts/qualif_overwrite.rs +++ b/tests/ui/consts/qualif_overwrite.rs @@ -7,7 +7,7 @@ use std::cell::Cell; const FOO: &Option> = { let mut a = Some(Cell::new(0)); a = None; // sets `qualif(a)` to `qualif(a) | qualif(None)` - &{a} //~ ERROR cannot refer to interior mutable + &{a} //~ ERROR interior mutable shared borrows of temporaries }; fn main() {} diff --git a/tests/ui/consts/qualif_overwrite.stderr b/tests/ui/consts/qualif_overwrite.stderr index 976cf7bd79eb..4aaaa4b2ca90 100644 --- a/tests/ui/consts/qualif_overwrite.stderr +++ b/tests/ui/consts/qualif_overwrite.stderr @@ -1,8 +1,12 @@ -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/qualif_overwrite.rs:10:5 | LL | &{a} - | ^^^^ this borrow of an interior mutable value may end up in the final value + | ^^^^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error: aborting due to 1 previous error diff --git a/tests/ui/consts/qualif_overwrite_2.rs b/tests/ui/consts/qualif_overwrite_2.rs index 1819d9a6d203..e739790b5666 100644 --- a/tests/ui/consts/qualif_overwrite_2.rs +++ b/tests/ui/consts/qualif_overwrite_2.rs @@ -5,7 +5,7 @@ use std::cell::Cell; const FOO: &Option> = { let mut a = (Some(Cell::new(0)),); a.0 = None; // sets `qualif(a)` to `qualif(a) | qualif(None)` - &{a.0} //~ ERROR cannot refer to interior mutable + &{a.0} //~ ERROR interior mutable shared borrows of temporaries }; fn main() {} diff --git a/tests/ui/consts/qualif_overwrite_2.stderr b/tests/ui/consts/qualif_overwrite_2.stderr index a107c4a5c6db..bc1681418765 100644 --- a/tests/ui/consts/qualif_overwrite_2.stderr +++ b/tests/ui/consts/qualif_overwrite_2.stderr @@ -1,8 +1,12 @@ -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/qualif_overwrite_2.rs:8:5 | LL | &{a.0} - | ^^^^^^ this borrow of an interior mutable value may end up in the final value + | ^^^^^^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error: aborting due to 1 previous error diff --git a/tests/ui/consts/recursive-static-write.rs b/tests/ui/consts/recursive-static-write.rs new file mode 100644 index 000000000000..dc5813d8c783 --- /dev/null +++ b/tests/ui/consts/recursive-static-write.rs @@ -0,0 +1,24 @@ +//! Ensure that writing to `S` while initializing `S` errors. +//! Regression test for . +#![allow(dead_code)] + +struct Foo { + x: i32, + y: (), +} + +static S: Foo = Foo { + x: 0, + y: unsafe { + (&raw const S.x).cast_mut().write(1); //~ERROR access itself during initialization + }, +}; + +static mut S2: Foo = Foo { + x: 0, + y: unsafe { + S2.x = 1; //~ERROR access itself during initialization + }, +}; + +fn main() {} diff --git a/tests/ui/consts/recursive-static-write.stderr b/tests/ui/consts/recursive-static-write.stderr new file mode 100644 index 000000000000..f5b5c49317cf --- /dev/null +++ b/tests/ui/consts/recursive-static-write.stderr @@ -0,0 +1,15 @@ +error[E0080]: encountered static that tried to access itself during initialization + --> $DIR/recursive-static-write.rs:13:9 + | +LL | (&raw const S.x).cast_mut().write(1); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ evaluation of `S` failed here + +error[E0080]: encountered static that tried to access itself during initialization + --> $DIR/recursive-static-write.rs:20:9 + | +LL | S2.x = 1; + | ^^^^^^^^ evaluation of `S2` failed here + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0080`. diff --git a/tests/ui/consts/recursive-zst-static.default.stderr b/tests/ui/consts/recursive-zst-static.default.stderr index fee33a892d06..c814576dfd5b 100644 --- a/tests/ui/consts/recursive-zst-static.default.stderr +++ b/tests/ui/consts/recursive-zst-static.default.stderr @@ -1,20 +1,20 @@ -error[E0080]: encountered static that tried to initialize itself with itself +error[E0080]: encountered static that tried to access itself during initialization --> $DIR/recursive-zst-static.rs:10:18 | LL | static FOO: () = FOO; | ^^^ evaluation of `FOO` failed here error[E0391]: cycle detected when evaluating initializer of static `A` - --> $DIR/recursive-zst-static.rs:13:16 + --> $DIR/recursive-zst-static.rs:13:1 | LL | static A: () = B; - | ^ + | ^^^^^^^^^^^^ | note: ...which requires evaluating initializer of static `B`... - --> $DIR/recursive-zst-static.rs:14:16 + --> $DIR/recursive-zst-static.rs:14:1 | LL | static B: () = A; - | ^ + | ^^^^^^^^^^^^ = note: ...which again requires evaluating initializer of static `A`, completing the cycle = note: cycle used when running analysis passes on this crate = note: see https://rustc-dev-guide.rust-lang.org/overview.html#queries and https://rustc-dev-guide.rust-lang.org/query.html for more information diff --git a/tests/ui/consts/recursive-zst-static.rs b/tests/ui/consts/recursive-zst-static.rs index 852caae94934..853af6d70eb0 100644 --- a/tests/ui/consts/recursive-zst-static.rs +++ b/tests/ui/consts/recursive-zst-static.rs @@ -8,7 +8,7 @@ // See https://github.com/rust-lang/rust/issues/71078 for more details. static FOO: () = FOO; -//~^ ERROR encountered static that tried to initialize itself with itself +//~^ ERROR encountered static that tried to access itself during initialization static A: () = B; //~ ERROR cycle detected when evaluating initializer of static `A` static B: () = A; diff --git a/tests/ui/consts/recursive-zst-static.unleash.stderr b/tests/ui/consts/recursive-zst-static.unleash.stderr index fee33a892d06..c814576dfd5b 100644 --- a/tests/ui/consts/recursive-zst-static.unleash.stderr +++ b/tests/ui/consts/recursive-zst-static.unleash.stderr @@ -1,20 +1,20 @@ -error[E0080]: encountered static that tried to initialize itself with itself +error[E0080]: encountered static that tried to access itself during initialization --> $DIR/recursive-zst-static.rs:10:18 | LL | static FOO: () = FOO; | ^^^ evaluation of `FOO` failed here error[E0391]: cycle detected when evaluating initializer of static `A` - --> $DIR/recursive-zst-static.rs:13:16 + --> $DIR/recursive-zst-static.rs:13:1 | LL | static A: () = B; - | ^ + | ^^^^^^^^^^^^ | note: ...which requires evaluating initializer of static `B`... - --> $DIR/recursive-zst-static.rs:14:16 + --> $DIR/recursive-zst-static.rs:14:1 | LL | static B: () = A; - | ^ + | ^^^^^^^^^^^^ = note: ...which again requires evaluating initializer of static `A`, completing the cycle = note: cycle used when running analysis passes on this crate = note: see https://rustc-dev-guide.rust-lang.org/overview.html#queries and https://rustc-dev-guide.rust-lang.org/query.html for more information diff --git a/tests/ui/consts/refs-to-cell-in-final.rs b/tests/ui/consts/refs-to-cell-in-final.rs index 844b140cff2b..2bd0623d94bc 100644 --- a/tests/ui/consts/refs-to-cell-in-final.rs +++ b/tests/ui/consts/refs-to-cell-in-final.rs @@ -11,9 +11,9 @@ unsafe impl Sync for SyncPtr {} // The resulting constant would pass all validation checks, so it is crucial that this gets rejected // by static const checks! static RAW_SYNC_S: SyncPtr> = SyncPtr { x: &Cell::new(42) }; -//~^ ERROR: cannot refer to interior mutable data +//~^ ERROR: interior mutable shared borrows of temporaries const RAW_SYNC_C: SyncPtr> = SyncPtr { x: &Cell::new(42) }; -//~^ ERROR: cannot refer to interior mutable data +//~^ ERROR: interior mutable shared borrows of temporaries // This one does not get promoted because of `Drop`, and then enters interesting codepaths because // as a value it has no interior mutability, but as a type it does. See @@ -39,7 +39,7 @@ const NONE_EXPLICIT_PROMOTED: &'static Option> = { // Not okay, since we are borrowing something with interior mutability. const INTERIOR_MUT_VARIANT: &Option> = &{ - //~^ERROR: cannot refer to interior mutable data + //~^ERROR: interior mutable shared borrows of temporaries let mut x = None; assert!(x.is_none()); x = Some(UnsafeCell::new(false)); diff --git a/tests/ui/consts/refs-to-cell-in-final.stderr b/tests/ui/consts/refs-to-cell-in-final.stderr index 8d82d94f4126..ac866dbe7210 100644 --- a/tests/ui/consts/refs-to-cell-in-final.stderr +++ b/tests/ui/consts/refs-to-cell-in-final.stderr @@ -1,18 +1,24 @@ -error[E0492]: statics cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/refs-to-cell-in-final.rs:13:54 | LL | static RAW_SYNC_S: SyncPtr> = SyncPtr { x: &Cell::new(42) }; - | ^^^^^^^^^^^^^^ this borrow of an interior mutable value may end up in the final value + | ^^^^^^^^^^^^^^ this borrow of an interior mutable value refers to such a temporary | - = help: to fix this, the value can be extracted to a separate `static` item and then referenced + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/refs-to-cell-in-final.rs:15:53 | LL | const RAW_SYNC_C: SyncPtr> = SyncPtr { x: &Cell::new(42) }; - | ^^^^^^^^^^^^^^ this borrow of an interior mutable value may end up in the final value + | ^^^^^^^^^^^^^^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/refs-to-cell-in-final.rs:41:57 | LL | const INTERIOR_MUT_VARIANT: &Option> = &{ @@ -23,7 +29,11 @@ LL | | assert!(x.is_none()); LL | | x = Some(UnsafeCell::new(false)); LL | | x LL | | }; - | |_^ this borrow of an interior mutable value may end up in the final value + | |_^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error: aborting due to 3 previous errors diff --git a/tests/ui/consts/unsafe_cell_in_const.rs b/tests/ui/consts/unsafe_cell_in_const.rs new file mode 100644 index 000000000000..b867ae1ba9f1 --- /dev/null +++ b/tests/ui/consts/unsafe_cell_in_const.rs @@ -0,0 +1,15 @@ +//! Ensure we do not complain about zero-sized `UnsafeCell` in a const in any form. +//! See . + +//@ check-pass +use std::cell::UnsafeCell; + +const X1: &mut UnsafeCell<[i32; 0]> = UnsafeCell::from_mut(&mut []); + +const X2: &mut UnsafeCell<[i32]> = UnsafeCell::from_mut(&mut []); + +trait Trait {} +impl Trait for [i32; 0] {} +const X3: &mut UnsafeCell = UnsafeCell::from_mut(&mut []); + +fn main() {} diff --git a/tests/ui/consts/unstable-const-fn-in-libcore.rs b/tests/ui/consts/unstable-const-fn-in-libcore.rs index baeece40a52b..f4b4c687bd2e 100644 --- a/tests/ui/consts/unstable-const-fn-in-libcore.rs +++ b/tests/ui/consts/unstable-const-fn-in-libcore.rs @@ -16,7 +16,7 @@ enum Opt { impl Opt { #[rustc_const_unstable(feature = "foo", issue = "none")] #[stable(feature = "rust1", since = "1.0.0")] - const fn unwrap_or_else T>(self, f: F) -> T { + const fn unwrap_or_else T>(self, f: F) -> T { //FIXME ~^ ERROR destructor of //FIXME ~| ERROR destructor of match self { diff --git a/tests/ui/consts/unstable-const-fn-in-libcore.stderr b/tests/ui/consts/unstable-const-fn-in-libcore.stderr index 32693edbfcbd..b43fa1f7e6c3 100644 --- a/tests/ui/consts/unstable-const-fn-in-libcore.stderr +++ b/tests/ui/consts/unstable-const-fn-in-libcore.stderr @@ -1,19 +1,19 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/unstable-const-fn-in-libcore.rs:19:32 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/unstable-const-fn-in-libcore.rs:19:30 | -LL | const fn unwrap_or_else T>(self, f: F) -> T { - | ^^^^^^ can't be applied to `FnOnce` +LL | const fn unwrap_or_else T>(self, f: F) -> T { + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/unstable-const-fn-in-libcore.rs:19:32 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/unstable-const-fn-in-libcore.rs:19:30 | -LL | const fn unwrap_or_else T>(self, f: F) -> T { - | ^^^^^^ can't be applied to `FnOnce` +LL | const fn unwrap_or_else T>(self, f: F) -> T { + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` @@ -26,19 +26,19 @@ LL | Opt::None => f(), = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants error[E0493]: destructor of `F` cannot be evaluated at compile-time - --> $DIR/unstable-const-fn-in-libcore.rs:19:60 + --> $DIR/unstable-const-fn-in-libcore.rs:19:61 | -LL | const fn unwrap_or_else T>(self, f: F) -> T { - | ^ the destructor for this type cannot be evaluated in constant functions +LL | const fn unwrap_or_else T>(self, f: F) -> T { + | ^ the destructor for this type cannot be evaluated in constant functions ... LL | } | - value is dropped here error[E0493]: destructor of `Opt` cannot be evaluated at compile-time - --> $DIR/unstable-const-fn-in-libcore.rs:19:54 + --> $DIR/unstable-const-fn-in-libcore.rs:19:55 | -LL | const fn unwrap_or_else T>(self, f: F) -> T { - | ^^^^ the destructor for this type cannot be evaluated in constant functions +LL | const fn unwrap_or_else T>(self, f: F) -> T { + | ^^^^ the destructor for this type cannot be evaluated in constant functions ... LL | } | - value is dropped here diff --git a/tests/ui/consts/write-to-static-mut-in-static.rs b/tests/ui/consts/write-to-static-mut-in-static.rs index ce15d9e912b6..016bfb06ccfc 100644 --- a/tests/ui/consts/write-to-static-mut-in-static.rs +++ b/tests/ui/consts/write-to-static-mut-in-static.rs @@ -3,8 +3,9 @@ pub static mut B: () = unsafe { A = 1; }; //~^ ERROR modifying a static's initial value pub static mut C: u32 = unsafe { C = 1; 0 }; +//~^ ERROR static that tried to access itself during initialization pub static D: u32 = D; -//~^ ERROR static that tried to initialize itself with itself +//~^ ERROR static that tried to access itself during initialization fn main() {} diff --git a/tests/ui/consts/write-to-static-mut-in-static.stderr b/tests/ui/consts/write-to-static-mut-in-static.stderr index bb5e217afb97..4180bb49339e 100644 --- a/tests/ui/consts/write-to-static-mut-in-static.stderr +++ b/tests/ui/consts/write-to-static-mut-in-static.stderr @@ -4,12 +4,18 @@ error[E0080]: modifying a static's initial value from another static's initializ LL | pub static mut B: () = unsafe { A = 1; }; | ^^^^^ evaluation of `B` failed here -error[E0080]: encountered static that tried to initialize itself with itself - --> $DIR/write-to-static-mut-in-static.rs:7:21 +error[E0080]: encountered static that tried to access itself during initialization + --> $DIR/write-to-static-mut-in-static.rs:5:34 + | +LL | pub static mut C: u32 = unsafe { C = 1; 0 }; + | ^^^^^ evaluation of `C` failed here + +error[E0080]: encountered static that tried to access itself during initialization + --> $DIR/write-to-static-mut-in-static.rs:8:21 | LL | pub static D: u32 = D; | ^ evaluation of `D` failed here -error: aborting due to 2 previous errors +error: aborting due to 3 previous errors For more information about this error, try `rustc --explain E0080`. diff --git a/tests/ui/consts/write_to_static_via_mut_ref.rs b/tests/ui/consts/write_to_static_via_mut_ref.rs index 82ac85bd2509..dc8a7eed13d8 100644 --- a/tests/ui/consts/write_to_static_via_mut_ref.rs +++ b/tests/ui/consts/write_to_static_via_mut_ref.rs @@ -1,4 +1,4 @@ -static OH_NO: &mut i32 = &mut 42; //~ ERROR mutable references are not allowed +static OH_NO: &mut i32 = &mut 42; //~ ERROR mutable borrows of temporaries fn main() { assert_eq!(*OH_NO, 42); *OH_NO = 43; //~ ERROR cannot assign to `*OH_NO`, as `OH_NO` is an immutable static diff --git a/tests/ui/consts/write_to_static_via_mut_ref.stderr b/tests/ui/consts/write_to_static_via_mut_ref.stderr index 63ef788032f3..1bcd7b81fe05 100644 --- a/tests/ui/consts/write_to_static_via_mut_ref.stderr +++ b/tests/ui/consts/write_to_static_via_mut_ref.stderr @@ -1,8 +1,12 @@ -error[E0764]: mutable references are not allowed in the final value of statics +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/write_to_static_via_mut_ref.rs:1:26 | LL | static OH_NO: &mut i32 = &mut 42; - | ^^^^^^^ + | ^^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error[E0594]: cannot assign to `*OH_NO`, as `OH_NO` is an immutable static item --> $DIR/write_to_static_via_mut_ref.rs:4:5 diff --git a/tests/ui/coroutine/auto-trait-regions.rs b/tests/ui/coroutine/auto-trait-regions.rs index f115896a473c..736555b31bbd 100644 --- a/tests/ui/coroutine/auto-trait-regions.rs +++ b/tests/ui/coroutine/auto-trait-regions.rs @@ -23,31 +23,31 @@ fn assert_foo(f: T) {} fn main() { // Make sure 'static is erased for coroutine interiors so we can't match it in trait selection let x: &'static _ = &OnlyFooIfStaticRef(No); - let gen = #[coroutine] move || { + let generator = #[coroutine] move || { let x = x; yield; assert_foo(x); }; - assert_foo(gen); + assert_foo(generator); //~^ ERROR implementation of `Foo` is not general enough // Allow impls which matches any lifetime let x = &OnlyFooIfRef(No); - let gen = #[coroutine] move || { + let generator = #[coroutine] move || { let x = x; yield; assert_foo(x); }; - assert_foo(gen); // ok + assert_foo(generator); // ok // Disallow impls which relates lifetimes in the coroutine interior - let gen = #[coroutine] move || { + let generator = #[coroutine] move || { let a = A(&mut true, &mut true, No); //~^ ERROR borrow may still be in use when coroutine yields //~| ERROR borrow may still be in use when coroutine yields yield; assert_foo(a); }; - assert_foo(gen); + assert_foo(generator); //~^ ERROR not general enough } diff --git a/tests/ui/coroutine/auto-trait-regions.stderr b/tests/ui/coroutine/auto-trait-regions.stderr index 77b5f3ce57c4..beb689d868d4 100644 --- a/tests/ui/coroutine/auto-trait-regions.stderr +++ b/tests/ui/coroutine/auto-trait-regions.stderr @@ -1,8 +1,8 @@ error[E0626]: borrow may still be in use when coroutine yields --> $DIR/auto-trait-regions.rs:45:19 | -LL | let gen = #[coroutine] move || { - | ------- within this coroutine +LL | let generator = #[coroutine] move || { + | ------- within this coroutine LL | let a = A(&mut true, &mut true, No); | ^^^^^^^^^ ... @@ -11,14 +11,14 @@ LL | yield; | help: add `static` to mark this coroutine as unmovable | -LL | let gen = #[coroutine] static move || { - | ++++++ +LL | let generator = #[coroutine] static move || { + | ++++++ error[E0626]: borrow may still be in use when coroutine yields --> $DIR/auto-trait-regions.rs:45:30 | -LL | let gen = #[coroutine] move || { - | ------- within this coroutine +LL | let generator = #[coroutine] move || { + | ------- within this coroutine LL | let a = A(&mut true, &mut true, No); | ^^^^^^^^^ ... @@ -27,14 +27,14 @@ LL | yield; | help: add `static` to mark this coroutine as unmovable | -LL | let gen = #[coroutine] static move || { - | ++++++ +LL | let generator = #[coroutine] static move || { + | ++++++ error: implementation of `Foo` is not general enough --> $DIR/auto-trait-regions.rs:31:5 | -LL | assert_foo(gen); - | ^^^^^^^^^^^^^^^ implementation of `Foo` is not general enough +LL | assert_foo(generator); + | ^^^^^^^^^^^^^^^^^^^^^ implementation of `Foo` is not general enough | = note: `&'0 OnlyFooIfStaticRef` must implement `Foo`, for any lifetime `'0`... = note: ...but `Foo` is actually implemented for the type `&'static OnlyFooIfStaticRef` @@ -42,8 +42,8 @@ LL | assert_foo(gen); error: implementation of `Foo` is not general enough --> $DIR/auto-trait-regions.rs:51:5 | -LL | assert_foo(gen); - | ^^^^^^^^^^^^^^^ implementation of `Foo` is not general enough +LL | assert_foo(generator); + | ^^^^^^^^^^^^^^^^^^^^^ implementation of `Foo` is not general enough | = note: `Foo` would have to be implemented for the type `A<'0, '1>`, for any two lifetimes `'0` and `'1`... = note: ...but `Foo` is actually implemented for the type `A<'_, '2>`, for some specific lifetime `'2` diff --git a/tests/ui/coroutine/clone-impl-static.rs b/tests/ui/coroutine/clone-impl-static.rs index f6fadff7faf1..2f941d655912 100644 --- a/tests/ui/coroutine/clone-impl-static.rs +++ b/tests/ui/coroutine/clone-impl-static.rs @@ -7,13 +7,13 @@ #![feature(coroutines, coroutine_clone, stmt_expr_attributes)] fn main() { - let gen = #[coroutine] + let generator = #[coroutine] static move || { yield; }; - check_copy(&gen); + check_copy(&generator); //~^ ERROR Copy` is not satisfied - check_clone(&gen); + check_clone(&generator); //~^ ERROR Clone` is not satisfied } diff --git a/tests/ui/coroutine/clone-impl-static.stderr b/tests/ui/coroutine/clone-impl-static.stderr index db1d2770346b..9fb71fd5fd01 100644 --- a/tests/ui/coroutine/clone-impl-static.stderr +++ b/tests/ui/coroutine/clone-impl-static.stderr @@ -1,8 +1,8 @@ error[E0277]: the trait bound `{static coroutine@$DIR/clone-impl-static.rs:11:5: 11:19}: Copy` is not satisfied --> $DIR/clone-impl-static.rs:14:16 | -LL | check_copy(&gen); - | ---------- ^^^^ the trait `Copy` is not implemented for `{static coroutine@$DIR/clone-impl-static.rs:11:5: 11:19}` +LL | check_copy(&generator); + | ---------- ^^^^^^^^^^ the trait `Copy` is not implemented for `{static coroutine@$DIR/clone-impl-static.rs:11:5: 11:19}` | | | required by a bound introduced by this call | @@ -15,8 +15,8 @@ LL | fn check_copy(_x: &T) {} error[E0277]: the trait bound `{static coroutine@$DIR/clone-impl-static.rs:11:5: 11:19}: Clone` is not satisfied --> $DIR/clone-impl-static.rs:16:17 | -LL | check_clone(&gen); - | ----------- ^^^^ the trait `Clone` is not implemented for `{static coroutine@$DIR/clone-impl-static.rs:11:5: 11:19}` +LL | check_clone(&generator); + | ----------- ^^^^^^^^^^ the trait `Clone` is not implemented for `{static coroutine@$DIR/clone-impl-static.rs:11:5: 11:19}` | | | required by a bound introduced by this call | diff --git a/tests/ui/defaults-well-formedness.rs b/tests/ui/defaults-well-formedness.rs deleted file mode 100644 index e5e48edad88f..000000000000 --- a/tests/ui/defaults-well-formedness.rs +++ /dev/null @@ -1,27 +0,0 @@ -//@ run-pass - -#![allow(dead_code)] -trait Trait {} -struct Foo(U, V) where U: Trait; - -trait Marker {} -struct TwoParams(T, U); -impl Marker for TwoParams {} - -// Clauses with more than 1 param are not checked. -struct IndividuallyBogus(TwoParams) where TwoParams: Marker; -struct BogusTogether(T, U) where TwoParams: Marker; -// Clauses with non-defaulted params are not checked. -struct NonDefaultedInClause(TwoParams) where TwoParams: Marker; -struct DefaultedLhs(U, V) where V: Trait; -// Dependent defaults are not checked. -struct Dependent(T, U) where U: Copy; -trait SelfBound {} -// Not even for well-formedness. -struct WellFormedProjection::Item>(A, T); - -// Issue #49344, predicates with lifetimes should not be checked. -trait Scope<'a> {} -struct Request<'a, S: Scope<'a> = i32>(S, &'a ()); - -fn main() {} diff --git a/tests/ui/deprecation/deprecated-expr-precedence.rs b/tests/ui/deprecation/deprecated-expr-precedence.rs new file mode 100644 index 000000000000..9636b46df201 --- /dev/null +++ b/tests/ui/deprecation/deprecated-expr-precedence.rs @@ -0,0 +1,8 @@ +//@ check-fail +//@ compile-flags: --crate-type=lib + +// Regression test for issue 142649 +pub fn public() { + #[deprecated] 0 + //~^ ERROR mismatched types +} diff --git a/tests/ui/deprecation/deprecated-expr-precedence.stderr b/tests/ui/deprecation/deprecated-expr-precedence.stderr new file mode 100644 index 000000000000..3275f2e790ae --- /dev/null +++ b/tests/ui/deprecation/deprecated-expr-precedence.stderr @@ -0,0 +1,11 @@ +error[E0308]: mismatched types + --> $DIR/deprecated-expr-precedence.rs:6:19 + | +LL | pub fn public() { + | - help: try adding a return type: `-> i32` +LL | #[deprecated] 0 + | ^ expected `()`, found integer + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0308`. diff --git a/tests/ui/deprecation-in-force-unstable.rs b/tests/ui/deprecation/deprecated_main_function.rs similarity index 90% rename from tests/ui/deprecation-in-force-unstable.rs rename to tests/ui/deprecation/deprecated_main_function.rs index 6aaf29b069a6..398046637d80 100644 --- a/tests/ui/deprecation-in-force-unstable.rs +++ b/tests/ui/deprecation/deprecated_main_function.rs @@ -2,4 +2,4 @@ //@ compile-flags:-Zforce-unstable-if-unmarked #[deprecated] // should work even with -Zforce-unstable-if-unmarked -fn main() { } +fn main() {} diff --git a/tests/ui/deprecation/deprecated_no_stack_check.rs b/tests/ui/deprecation/deprecated_no_stack_check.rs index ef482098634a..8e1f5bbf045a 100644 --- a/tests/ui/deprecation/deprecated_no_stack_check.rs +++ b/tests/ui/deprecation/deprecated_no_stack_check.rs @@ -1,5 +1,3 @@ -//@ normalize-stderr: "you are using [0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9]+)?( \([^)]*\))?" -> "you are using $$RUSTC_VERSION" - #![deny(warnings)] #![feature(no_stack_check)] //~^ ERROR: feature has been removed [E0557] diff --git a/tests/ui/deprecation/deprecated_no_stack_check.stderr b/tests/ui/deprecation/deprecated_no_stack_check.stderr index 2d08b1b8db54..33788661d730 100644 --- a/tests/ui/deprecation/deprecated_no_stack_check.stderr +++ b/tests/ui/deprecation/deprecated_no_stack_check.stderr @@ -1,10 +1,10 @@ error[E0557]: feature has been removed - --> $DIR/deprecated_no_stack_check.rs:4:12 + --> $DIR/deprecated_no_stack_check.rs:2:12 | LL | #![feature(no_stack_check)] | ^^^^^^^^^^^^^^ feature has been removed | - = note: removed in 1.0.0 (you are using $RUSTC_VERSION); see for more information + = note: removed in 1.0.0; see for more information error: aborting due to 1 previous error diff --git a/tests/ui/deref-rc.rs b/tests/ui/deref-rc.rs deleted file mode 100644 index 92fdd9003592..000000000000 --- a/tests/ui/deref-rc.rs +++ /dev/null @@ -1,8 +0,0 @@ -//@ run-pass - -use std::rc::Rc; - -fn main() { - let x = Rc::new([1, 2, 3, 4]); - assert_eq!(*x, [1, 2, 3, 4]); -} diff --git a/tests/ui/deref.rs b/tests/ui/deref.rs deleted file mode 100644 index 0a6f3cc81f6c..000000000000 --- a/tests/ui/deref.rs +++ /dev/null @@ -1,6 +0,0 @@ -//@ run-pass - -pub fn main() { - let x: Box = Box::new(10); - let _y: isize = *x; -} diff --git a/tests/ui/derive-uninhabited-enum-38885.rs b/tests/ui/derive-uninhabited-enum-38885.rs deleted file mode 100644 index 2259a542706e..000000000000 --- a/tests/ui/derive-uninhabited-enum-38885.rs +++ /dev/null @@ -1,19 +0,0 @@ -//@ check-pass -//@ compile-flags: -Wunused - -// ensure there are no special warnings about uninhabited types -// when deriving Debug on an empty enum - -#[derive(Debug)] -enum Void {} - -#[derive(Debug)] -enum Foo { - Bar(#[allow(dead_code)] u8), - Void(Void), //~ WARN variant `Void` is never constructed -} - -fn main() { - let x = Foo::Bar(42); - println!("{:?}", x); -} diff --git a/tests/ui/derives/clone-debug-dead-code.stderr b/tests/ui/derives/clone-debug-dead-code.stderr index 34b7f929ec5e..38be486e3320 100644 --- a/tests/ui/derives/clone-debug-dead-code.stderr +++ b/tests/ui/derives/clone-debug-dead-code.stderr @@ -40,7 +40,7 @@ LL | struct D { f: () } | | | field in this struct | - = note: `D` has derived impls for the traits `Debug` and `Clone`, but these are intentionally ignored during dead code analysis + = note: `D` has derived impls for the traits `Clone` and `Debug`, but these are intentionally ignored during dead code analysis error: field `f` is never read --> $DIR/clone-debug-dead-code.rs:21:12 diff --git a/tests/ui/derives/derive-debug-uninhabited-enum.rs b/tests/ui/derives/derive-debug-uninhabited-enum.rs new file mode 100644 index 000000000000..be7b3ab348d6 --- /dev/null +++ b/tests/ui/derives/derive-debug-uninhabited-enum.rs @@ -0,0 +1,23 @@ +//! Regression test for `#[derive(Debug)]` on enums with uninhabited variants. +//! +//! Ensures there are no special warnings about uninhabited types when deriving +//! Debug on an enum with uninhabited variants, only standard unused warnings. +//! +//! Issue: https://github.com/rust-lang/rust/issues/38885 + +//@ check-pass +//@ compile-flags: -Wunused + +#[derive(Debug)] +enum Void {} + +#[derive(Debug)] +enum Foo { + Bar(#[allow(dead_code)] u8), + Void(Void), //~ WARN variant `Void` is never constructed +} + +fn main() { + let x = Foo::Bar(42); + println!("{:?}", x); +} diff --git a/tests/ui/derive-uninhabited-enum-38885.stderr b/tests/ui/derives/derive-debug-uninhabited-enum.stderr similarity index 89% rename from tests/ui/derive-uninhabited-enum-38885.stderr rename to tests/ui/derives/derive-debug-uninhabited-enum.stderr index bcd8f6b7b536..4911b6b6cded 100644 --- a/tests/ui/derive-uninhabited-enum-38885.stderr +++ b/tests/ui/derives/derive-debug-uninhabited-enum.stderr @@ -1,5 +1,5 @@ warning: variant `Void` is never constructed - --> $DIR/derive-uninhabited-enum-38885.rs:13:5 + --> $DIR/derive-debug-uninhabited-enum.rs:17:5 | LL | enum Foo { | --- variant in this enum diff --git a/tests/ui/derives/derives-span-Debug-enum-struct-variant.stderr b/tests/ui/derives/derives-span-Debug-enum-struct-variant.stderr index a7f6d094681a..147910b715f5 100644 --- a/tests/ui/derives/derives-span-Debug-enum-struct-variant.stderr +++ b/tests/ui/derives/derives-span-Debug-enum-struct-variant.stderr @@ -5,9 +5,8 @@ LL | #[derive(Debug)] | ----- in this derive macro expansion ... LL | x: Error - | ^^^^^^^^ `Error` cannot be formatted using `{:?}` + | ^^^^^^^^ the trait `Debug` is not implemented for `Error` | - = help: the trait `Debug` is not implemented for `Error` = note: add `#[derive(Debug)]` to `Error` or manually `impl Debug for Error` help: consider annotating `Error` with `#[derive(Debug)]` | diff --git a/tests/ui/derives/derives-span-Debug-enum.stderr b/tests/ui/derives/derives-span-Debug-enum.stderr index b3a584781598..6f97ceb02d3a 100644 --- a/tests/ui/derives/derives-span-Debug-enum.stderr +++ b/tests/ui/derives/derives-span-Debug-enum.stderr @@ -5,9 +5,8 @@ LL | #[derive(Debug)] | ----- in this derive macro expansion ... LL | Error - | ^^^^^ `Error` cannot be formatted using `{:?}` + | ^^^^^ the trait `Debug` is not implemented for `Error` | - = help: the trait `Debug` is not implemented for `Error` = note: add `#[derive(Debug)]` to `Error` or manually `impl Debug for Error` help: consider annotating `Error` with `#[derive(Debug)]` | diff --git a/tests/ui/derives/derives-span-Debug-struct.stderr b/tests/ui/derives/derives-span-Debug-struct.stderr index c8ad652716ca..46d69a892f25 100644 --- a/tests/ui/derives/derives-span-Debug-struct.stderr +++ b/tests/ui/derives/derives-span-Debug-struct.stderr @@ -5,9 +5,8 @@ LL | #[derive(Debug)] | ----- in this derive macro expansion LL | struct Struct { LL | x: Error - | ^^^^^^^^ `Error` cannot be formatted using `{:?}` + | ^^^^^^^^ the trait `Debug` is not implemented for `Error` | - = help: the trait `Debug` is not implemented for `Error` = note: add `#[derive(Debug)]` to `Error` or manually `impl Debug for Error` help: consider annotating `Error` with `#[derive(Debug)]` | diff --git a/tests/ui/derives/derives-span-Debug-tuple-struct.stderr b/tests/ui/derives/derives-span-Debug-tuple-struct.stderr index dbece4d2091b..a3feeff6df37 100644 --- a/tests/ui/derives/derives-span-Debug-tuple-struct.stderr +++ b/tests/ui/derives/derives-span-Debug-tuple-struct.stderr @@ -5,9 +5,8 @@ LL | #[derive(Debug)] | ----- in this derive macro expansion LL | struct Struct( LL | Error - | ^^^^^ `Error` cannot be formatted using `{:?}` + | ^^^^^ the trait `Debug` is not implemented for `Error` | - = help: the trait `Debug` is not implemented for `Error` = note: add `#[derive(Debug)]` to `Error` or manually `impl Debug for Error` help: consider annotating `Error` with `#[derive(Debug)]` | diff --git a/tests/ui/derives/nonsense-input-to-debug.rs b/tests/ui/derives/nonsense-input-to-debug.rs new file mode 100644 index 000000000000..7dfa3cd616a7 --- /dev/null +++ b/tests/ui/derives/nonsense-input-to-debug.rs @@ -0,0 +1,12 @@ +// Issue: #32950 +// Ensure that using macros rather than a type doesn't break `derive`. + +#[derive(Debug)] +struct Nonsense { + //~^ ERROR type parameter `T` is never used + should_be_vec_t: vec![T], + //~^ ERROR `derive` cannot be used on items with type macros + //~| ERROR expected type, found `expr` metavariable +} + +fn main() {} diff --git a/tests/ui/derives/nonsense-input-to-debug.stderr b/tests/ui/derives/nonsense-input-to-debug.stderr new file mode 100644 index 000000000000..7c97ca93cfc9 --- /dev/null +++ b/tests/ui/derives/nonsense-input-to-debug.stderr @@ -0,0 +1,30 @@ +error: `derive` cannot be used on items with type macros + --> $DIR/nonsense-input-to-debug.rs:7:22 + | +LL | should_be_vec_t: vec![T], + | ^^^^^^^ + +error: expected type, found `expr` metavariable + --> $DIR/nonsense-input-to-debug.rs:7:22 + | +LL | should_be_vec_t: vec![T], + | ^^^^^^^ + | | + | expected type + | in this macro invocation + | this macro call doesn't expand to a type + | + = note: this error originates in the macro `vec` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0392]: type parameter `T` is never used + --> $DIR/nonsense-input-to-debug.rs:5:17 + | +LL | struct Nonsense { + | ^ unused type parameter + | + = help: consider removing `T`, referring to it in a field, or using a marker such as `PhantomData` + = help: if you intended `T` to be a const parameter, use `const T: /* Type */` instead + +error: aborting due to 3 previous errors + +For more information about this error, try `rustc --explain E0392`. diff --git a/tests/ui/destructure-trait-ref.rs b/tests/ui/destructure-trait-ref.rs deleted file mode 100644 index daa0ca30d687..000000000000 --- a/tests/ui/destructure-trait-ref.rs +++ /dev/null @@ -1,46 +0,0 @@ -// The regression test for #15031 to make sure destructuring trait -// reference work properly. - -//@ dont-require-annotations: NOTE - -#![feature(box_patterns)] - -trait T { fn foo(&self) {} } -impl T for isize {} - - -fn main() { - // For an expression of the form: - // - // let &...&x = &..&SomeTrait; - // - // Say we have n `&` at the left hand and m `&` right hand, then: - // if n < m, we are golden; - // if n == m, it's a derefing non-derefable type error; - // if n > m, it's a type mismatch error. - - // n < m - let &x = &(&1isize as &dyn T); - let &x = &&(&1isize as &dyn T); - let &&x = &&(&1isize as &dyn T); - - // n == m - let &x = &1isize as &dyn T; //~ ERROR type `&dyn T` cannot be dereferenced - let &&x = &(&1isize as &dyn T); //~ ERROR type `&dyn T` cannot be dereferenced - let box x = Box::new(1isize) as Box; - //~^ ERROR type `Box` cannot be dereferenced - - // n > m - let &&x = &1isize as &dyn T; - //~^ ERROR mismatched types - //~| NOTE expected trait object `dyn T` - //~| NOTE found reference `&_` - let &&&x = &(&1isize as &dyn T); - //~^ ERROR mismatched types - //~| NOTE expected trait object `dyn T` - //~| NOTE found reference `&_` - let box box x = Box::new(1isize) as Box; - //~^ ERROR mismatched types - //~| NOTE expected trait object `dyn T` - //~| NOTE found struct `Box<_>` -} diff --git a/tests/ui/did_you_mean/bad-assoc-ty.stderr b/tests/ui/did_you_mean/bad-assoc-ty.edition2015.stderr similarity index 83% rename from tests/ui/did_you_mean/bad-assoc-ty.stderr rename to tests/ui/did_you_mean/bad-assoc-ty.edition2015.stderr index 7e34f4d35b4e..fe1ce5ad18b1 100644 --- a/tests/ui/did_you_mean/bad-assoc-ty.stderr +++ b/tests/ui/did_you_mean/bad-assoc-ty.edition2015.stderr @@ -1,5 +1,5 @@ error: missing angle brackets in associated item path - --> $DIR/bad-assoc-ty.rs:1:10 + --> $DIR/bad-assoc-ty.rs:5:10 | LL | type A = [u8; 4]::AssocTy; | ^^^^^^^ @@ -10,7 +10,7 @@ LL | type A = <[u8; 4]>::AssocTy; | + + error: missing angle brackets in associated item path - --> $DIR/bad-assoc-ty.rs:5:10 + --> $DIR/bad-assoc-ty.rs:9:10 | LL | type B = [u8]::AssocTy; | ^^^^ @@ -21,7 +21,7 @@ LL | type B = <[u8]>::AssocTy; | + + error: missing angle brackets in associated item path - --> $DIR/bad-assoc-ty.rs:9:10 + --> $DIR/bad-assoc-ty.rs:13:10 | LL | type C = (u8)::AssocTy; | ^^^^ @@ -32,7 +32,7 @@ LL | type C = <(u8)>::AssocTy; | + + error: missing angle brackets in associated item path - --> $DIR/bad-assoc-ty.rs:13:10 + --> $DIR/bad-assoc-ty.rs:17:10 | LL | type D = (u8, u8)::AssocTy; | ^^^^^^^^ @@ -43,7 +43,7 @@ LL | type D = <(u8, u8)>::AssocTy; | + + error: missing angle brackets in associated item path - --> $DIR/bad-assoc-ty.rs:17:10 + --> $DIR/bad-assoc-ty.rs:21:10 | LL | type E = _::AssocTy; | ^ @@ -54,7 +54,7 @@ LL | type E = <_>::AssocTy; | + + error: missing angle brackets in associated item path - --> $DIR/bad-assoc-ty.rs:21:19 + --> $DIR/bad-assoc-ty.rs:25:19 | LL | type F = &'static (u8)::AssocTy; | ^^^^ @@ -65,7 +65,7 @@ LL | type F = &'static <(u8)>::AssocTy; | + + error: missing angle brackets in associated item path - --> $DIR/bad-assoc-ty.rs:27:10 + --> $DIR/bad-assoc-ty.rs:31:10 | LL | type G = dyn 'static + (Send)::AssocTy; | ^^^^^^^^^^^^^^^^^^^^ @@ -76,7 +76,7 @@ LL | type G = ::AssocTy; | + + error: missing angle brackets in associated item path - --> $DIR/bad-assoc-ty.rs:46:10 + --> $DIR/bad-assoc-ty.rs:51:10 | LL | type I = ty!()::AssocTy; | ^^^^^ @@ -87,7 +87,7 @@ LL | type I = ::AssocTy; | + + error: missing angle brackets in associated item path - --> $DIR/bad-assoc-ty.rs:39:19 + --> $DIR/bad-assoc-ty.rs:44:19 | LL | ($ty: ty) => ($ty::AssocTy); | ^^^ @@ -102,7 +102,7 @@ LL | ($ty: ty) => (<$ty>::AssocTy); | + + error[E0223]: ambiguous associated type - --> $DIR/bad-assoc-ty.rs:1:10 + --> $DIR/bad-assoc-ty.rs:5:10 | LL | type A = [u8; 4]::AssocTy; | ^^^^^^^^^^^^^^^^ @@ -114,7 +114,7 @@ LL + type A = <[u8; 4] as Example>::AssocTy; | error[E0223]: ambiguous associated type - --> $DIR/bad-assoc-ty.rs:5:10 + --> $DIR/bad-assoc-ty.rs:9:10 | LL | type B = [u8]::AssocTy; | ^^^^^^^^^^^^^ @@ -126,7 +126,7 @@ LL + type B = <[u8] as Example>::AssocTy; | error[E0223]: ambiguous associated type - --> $DIR/bad-assoc-ty.rs:9:10 + --> $DIR/bad-assoc-ty.rs:13:10 | LL | type C = (u8)::AssocTy; | ^^^^^^^^^^^^^ @@ -138,7 +138,7 @@ LL + type C = ::AssocTy; | error[E0223]: ambiguous associated type - --> $DIR/bad-assoc-ty.rs:13:10 + --> $DIR/bad-assoc-ty.rs:17:10 | LL | type D = (u8, u8)::AssocTy; | ^^^^^^^^^^^^^^^^^ @@ -150,13 +150,13 @@ LL + type D = <(u8, u8) as Example>::AssocTy; | error[E0121]: the placeholder `_` is not allowed within types on item signatures for type aliases - --> $DIR/bad-assoc-ty.rs:17:10 + --> $DIR/bad-assoc-ty.rs:21:10 | LL | type E = _::AssocTy; | ^ not allowed in type signatures error[E0223]: ambiguous associated type - --> $DIR/bad-assoc-ty.rs:21:19 + --> $DIR/bad-assoc-ty.rs:25:19 | LL | type F = &'static (u8)::AssocTy; | ^^^^^^^^^^^^^ @@ -168,7 +168,7 @@ LL + type F = &'static ::AssocTy; | error[E0223]: ambiguous associated type - --> $DIR/bad-assoc-ty.rs:27:10 + --> $DIR/bad-assoc-ty.rs:31:10 | LL | type G = dyn 'static + (Send)::AssocTy; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -180,7 +180,7 @@ LL + type G = <(dyn Send + 'static) as Example>::AssocTy; | warning: trait objects without an explicit `dyn` are deprecated - --> $DIR/bad-assoc-ty.rs:33:10 + --> $DIR/bad-assoc-ty.rs:37:10 | LL | type H = Fn(u8) -> (u8)::Output; | ^^^^^^^^^^^^^^ @@ -194,7 +194,7 @@ LL | type H = (u8)>::Output; | ++++ + error[E0223]: ambiguous associated type - --> $DIR/bad-assoc-ty.rs:33:10 + --> $DIR/bad-assoc-ty.rs:37:10 | LL | type H = Fn(u8) -> (u8)::Output; | ^^^^^^^^^^^^^^^^^^^^^^ @@ -209,7 +209,7 @@ LL + type H = <(dyn Fn(u8) -> u8 + 'static) as IntoFuture>::Output; | error[E0223]: ambiguous associated type - --> $DIR/bad-assoc-ty.rs:39:19 + --> $DIR/bad-assoc-ty.rs:44:19 | LL | ($ty: ty) => ($ty::AssocTy); | ^^^^^^^^^^^^ @@ -225,7 +225,7 @@ LL + ($ty: ty) => (::AssocTy); | error[E0223]: ambiguous associated type - --> $DIR/bad-assoc-ty.rs:46:10 + --> $DIR/bad-assoc-ty.rs:51:10 | LL | type I = ty!()::AssocTy; | ^^^^^^^^^^^^^^ @@ -237,99 +237,55 @@ LL + type I = ::AssocTy; | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/bad-assoc-ty.rs:51:13 + --> $DIR/bad-assoc-ty.rs:56:13 | LL | fn foo>(x: X) {} - | ^ ^ not allowed in type signatures - | | - | not allowed in type signatures + | ^ not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/bad-assoc-ty.rs:54:34 + --> $DIR/bad-assoc-ty.rs:56:16 + | +LL | fn foo>(x: X) {} + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/bad-assoc-ty.rs:60:34 | LL | fn bar(_: F) where F: Fn() -> _ {} | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn bar(_: F) where F: Fn() -> _ {} -LL + fn bar(_: F) where F: Fn() -> T {} - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/bad-assoc-ty.rs:57:19 + --> $DIR/bad-assoc-ty.rs:63:19 | LL | fn baz _>(_: F) {} | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn baz _>(_: F) {} -LL + fn baz T, T>(_: F) {} - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs - --> $DIR/bad-assoc-ty.rs:60:33 + --> $DIR/bad-assoc-ty.rs:66:33 | LL | struct L(F) where F: Fn() -> _; | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - struct L(F) where F: Fn() -> _; -LL + struct L(F) where F: Fn() -> T; - | - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/bad-assoc-ty.rs:82:38 - | -LL | fn foo(_: F) where F: Fn() -> _ {} - | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn foo(_: F) where F: Fn() -> _ {} -LL + fn foo(_: F) where F: Fn() -> T {} - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs - --> $DIR/bad-assoc-ty.rs:62:30 + --> $DIR/bad-assoc-ty.rs:68:30 | LL | struct M where F: Fn() -> _ { | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - struct M where F: Fn() -> _ { -LL + struct M where F: Fn() -> T { - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for enums - --> $DIR/bad-assoc-ty.rs:66:28 + --> $DIR/bad-assoc-ty.rs:72:28 | LL | enum N where F: Fn() -> _ { | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - enum N where F: Fn() -> _ { -LL + enum N where F: Fn() -> T { - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for unions - --> $DIR/bad-assoc-ty.rs:71:29 + --> $DIR/bad-assoc-ty.rs:77:29 | LL | union O where F: Fn() -> _ { | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - union O where F: Fn() -> _ { -LL + union O where F: Fn() -> T { - | error[E0740]: field must implement `Copy` or be wrapped in `ManuallyDrop<...>` to be used in a union - --> $DIR/bad-assoc-ty.rs:73:5 + --> $DIR/bad-assoc-ty.rs:79:5 | LL | foo: F, | ^^^^^^ @@ -341,18 +297,18 @@ LL | foo: std::mem::ManuallyDrop, | +++++++++++++++++++++++ + error[E0121]: the placeholder `_` is not allowed within types on item signatures for traits - --> $DIR/bad-assoc-ty.rs:77:29 + --> $DIR/bad-assoc-ty.rs:83:29 | LL | trait P where F: Fn() -> _ { | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - trait P where F: Fn() -> _ { -LL + trait P where F: Fn() -> T { - | -error: aborting due to 29 previous errors; 1 warning emitted +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/bad-assoc-ty.rs:88:38 + | +LL | fn foo(_: F) where F: Fn() -> _ {} + | ^ not allowed in type signatures + +error: aborting due to 30 previous errors; 1 warning emitted Some errors have detailed explanations: E0121, E0223, E0740. For more information about an error, try `rustc --explain E0121`. diff --git a/tests/ui/did_you_mean/bad-assoc-ty.edition2021.stderr b/tests/ui/did_you_mean/bad-assoc-ty.edition2021.stderr new file mode 100644 index 000000000000..2cf7a150aa27 --- /dev/null +++ b/tests/ui/did_you_mean/bad-assoc-ty.edition2021.stderr @@ -0,0 +1,296 @@ +error: missing angle brackets in associated item path + --> $DIR/bad-assoc-ty.rs:5:10 + | +LL | type A = [u8; 4]::AssocTy; + | ^^^^^^^ + | +help: types that don't start with an identifier need to be surrounded with angle brackets in qualified paths + | +LL | type A = <[u8; 4]>::AssocTy; + | + + + +error: missing angle brackets in associated item path + --> $DIR/bad-assoc-ty.rs:9:10 + | +LL | type B = [u8]::AssocTy; + | ^^^^ + | +help: types that don't start with an identifier need to be surrounded with angle brackets in qualified paths + | +LL | type B = <[u8]>::AssocTy; + | + + + +error: missing angle brackets in associated item path + --> $DIR/bad-assoc-ty.rs:13:10 + | +LL | type C = (u8)::AssocTy; + | ^^^^ + | +help: types that don't start with an identifier need to be surrounded with angle brackets in qualified paths + | +LL | type C = <(u8)>::AssocTy; + | + + + +error: missing angle brackets in associated item path + --> $DIR/bad-assoc-ty.rs:17:10 + | +LL | type D = (u8, u8)::AssocTy; + | ^^^^^^^^ + | +help: types that don't start with an identifier need to be surrounded with angle brackets in qualified paths + | +LL | type D = <(u8, u8)>::AssocTy; + | + + + +error: missing angle brackets in associated item path + --> $DIR/bad-assoc-ty.rs:21:10 + | +LL | type E = _::AssocTy; + | ^ + | +help: types that don't start with an identifier need to be surrounded with angle brackets in qualified paths + | +LL | type E = <_>::AssocTy; + | + + + +error: missing angle brackets in associated item path + --> $DIR/bad-assoc-ty.rs:25:19 + | +LL | type F = &'static (u8)::AssocTy; + | ^^^^ + | +help: types that don't start with an identifier need to be surrounded with angle brackets in qualified paths + | +LL | type F = &'static <(u8)>::AssocTy; + | + + + +error: missing angle brackets in associated item path + --> $DIR/bad-assoc-ty.rs:31:10 + | +LL | type G = dyn 'static + (Send)::AssocTy; + | ^^^^^^^^^^^^^^^^^^^^ + | +help: types that don't start with an identifier need to be surrounded with angle brackets in qualified paths + | +LL | type G = ::AssocTy; + | + + + +error: missing angle brackets in associated item path + --> $DIR/bad-assoc-ty.rs:51:10 + | +LL | type I = ty!()::AssocTy; + | ^^^^^ + | +help: types that don't start with an identifier need to be surrounded with angle brackets in qualified paths + | +LL | type I = ::AssocTy; + | + + + +error: missing angle brackets in associated item path + --> $DIR/bad-assoc-ty.rs:44:19 + | +LL | ($ty: ty) => ($ty::AssocTy); + | ^^^ +... +LL | type J = ty!(u8); + | ------- in this macro invocation + | + = note: this error originates in the macro `ty` (in Nightly builds, run with -Z macro-backtrace for more info) +help: types that don't start with an identifier need to be surrounded with angle brackets in qualified paths + | +LL | ($ty: ty) => (<$ty>::AssocTy); + | + + + +error[E0223]: ambiguous associated type + --> $DIR/bad-assoc-ty.rs:5:10 + | +LL | type A = [u8; 4]::AssocTy; + | ^^^^^^^^^^^^^^^^ + | +help: if there were a trait named `Example` with associated type `AssocTy` implemented for `[u8; 4]`, you could use the fully-qualified path + | +LL - type A = [u8; 4]::AssocTy; +LL + type A = <[u8; 4] as Example>::AssocTy; + | + +error[E0223]: ambiguous associated type + --> $DIR/bad-assoc-ty.rs:9:10 + | +LL | type B = [u8]::AssocTy; + | ^^^^^^^^^^^^^ + | +help: if there were a trait named `Example` with associated type `AssocTy` implemented for `[u8]`, you could use the fully-qualified path + | +LL - type B = [u8]::AssocTy; +LL + type B = <[u8] as Example>::AssocTy; + | + +error[E0223]: ambiguous associated type + --> $DIR/bad-assoc-ty.rs:13:10 + | +LL | type C = (u8)::AssocTy; + | ^^^^^^^^^^^^^ + | +help: if there were a trait named `Example` with associated type `AssocTy` implemented for `u8`, you could use the fully-qualified path + | +LL - type C = (u8)::AssocTy; +LL + type C = ::AssocTy; + | + +error[E0223]: ambiguous associated type + --> $DIR/bad-assoc-ty.rs:17:10 + | +LL | type D = (u8, u8)::AssocTy; + | ^^^^^^^^^^^^^^^^^ + | +help: if there were a trait named `Example` with associated type `AssocTy` implemented for `(u8, u8)`, you could use the fully-qualified path + | +LL - type D = (u8, u8)::AssocTy; +LL + type D = <(u8, u8) as Example>::AssocTy; + | + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for type aliases + --> $DIR/bad-assoc-ty.rs:21:10 + | +LL | type E = _::AssocTy; + | ^ not allowed in type signatures + +error[E0223]: ambiguous associated type + --> $DIR/bad-assoc-ty.rs:25:19 + | +LL | type F = &'static (u8)::AssocTy; + | ^^^^^^^^^^^^^ + | +help: if there were a trait named `Example` with associated type `AssocTy` implemented for `u8`, you could use the fully-qualified path + | +LL - type F = &'static (u8)::AssocTy; +LL + type F = &'static ::AssocTy; + | + +error[E0223]: ambiguous associated type + --> $DIR/bad-assoc-ty.rs:31:10 + | +LL | type G = dyn 'static + (Send)::AssocTy; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: if there were a trait named `Example` with associated type `AssocTy` implemented for `(dyn Send + 'static)`, you could use the fully-qualified path + | +LL - type G = dyn 'static + (Send)::AssocTy; +LL + type G = <(dyn Send + 'static) as Example>::AssocTy; + | + +error[E0782]: expected a type, found a trait + --> $DIR/bad-assoc-ty.rs:37:10 + | +LL | type H = Fn(u8) -> (u8)::Output; + | ^^^^^^^^^^^^^^ + | +help: you can add the `dyn` keyword if you want a trait object + | +LL | type H = (u8)>::Output; + | ++++ + + +error[E0223]: ambiguous associated type + --> $DIR/bad-assoc-ty.rs:44:19 + | +LL | ($ty: ty) => ($ty::AssocTy); + | ^^^^^^^^^^^^ +... +LL | type J = ty!(u8); + | ------- in this macro invocation + | + = note: this error originates in the macro `ty` (in Nightly builds, run with -Z macro-backtrace for more info) +help: if there were a trait named `Example` with associated type `AssocTy` implemented for `u8`, you could use the fully-qualified path + | +LL - ($ty: ty) => ($ty::AssocTy); +LL + ($ty: ty) => (::AssocTy); + | + +error[E0223]: ambiguous associated type + --> $DIR/bad-assoc-ty.rs:51:10 + | +LL | type I = ty!()::AssocTy; + | ^^^^^^^^^^^^^^ + | +help: if there were a trait named `Example` with associated type `AssocTy` implemented for `u8`, you could use the fully-qualified path + | +LL - type I = ty!()::AssocTy; +LL + type I = ::AssocTy; + | + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/bad-assoc-ty.rs:56:13 + | +LL | fn foo>(x: X) {} + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/bad-assoc-ty.rs:56:16 + | +LL | fn foo>(x: X) {} + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/bad-assoc-ty.rs:60:34 + | +LL | fn bar(_: F) where F: Fn() -> _ {} + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/bad-assoc-ty.rs:63:19 + | +LL | fn baz _>(_: F) {} + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/bad-assoc-ty.rs:66:33 + | +LL | struct L(F) where F: Fn() -> _; + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/bad-assoc-ty.rs:68:30 + | +LL | struct M where F: Fn() -> _ { + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for enums + --> $DIR/bad-assoc-ty.rs:72:28 + | +LL | enum N where F: Fn() -> _ { + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for unions + --> $DIR/bad-assoc-ty.rs:77:29 + | +LL | union O where F: Fn() -> _ { + | ^ not allowed in type signatures + +error[E0740]: field must implement `Copy` or be wrapped in `ManuallyDrop<...>` to be used in a union + --> $DIR/bad-assoc-ty.rs:79:5 + | +LL | foo: F, + | ^^^^^^ + | + = note: union fields must not have drop side-effects, which is currently enforced via either `Copy` or `ManuallyDrop<...>` +help: wrap the field type in `ManuallyDrop<...>` + | +LL | foo: std::mem::ManuallyDrop, + | +++++++++++++++++++++++ + + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for traits + --> $DIR/bad-assoc-ty.rs:83:29 + | +LL | trait P where F: Fn() -> _ { + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/bad-assoc-ty.rs:88:38 + | +LL | fn foo(_: F) where F: Fn() -> _ {} + | ^ not allowed in type signatures + +error: aborting due to 30 previous errors + +Some errors have detailed explanations: E0121, E0223, E0740, E0782. +For more information about an error, try `rustc --explain E0121`. diff --git a/tests/ui/did_you_mean/bad-assoc-ty.rs b/tests/ui/did_you_mean/bad-assoc-ty.rs index 5a559b01ea28..9abda4fd962b 100644 --- a/tests/ui/did_you_mean/bad-assoc-ty.rs +++ b/tests/ui/did_you_mean/bad-assoc-ty.rs @@ -1,3 +1,7 @@ +//@revisions: edition2015 edition2021 +//@[edition2015] edition:2015 +//@[edition2021] edition:2021 + type A = [u8; 4]::AssocTy; //~^ ERROR missing angle brackets in associated item path //~| ERROR ambiguous associated type @@ -31,9 +35,10 @@ type G = dyn 'static + (Send)::AssocTy; // This is actually a legal path with fn-like generic arguments in the middle! // Recovery should not apply in this context. type H = Fn(u8) -> (u8)::Output; -//~^ ERROR ambiguous associated type -//~| WARN trait objects without an explicit `dyn` are deprecated -//~| WARN this is accepted in the current edition +//[edition2015]~^ ERROR ambiguous associated type +//[edition2015]~| WARN trait objects without an explicit `dyn` are deprecated +//[edition2015]~| WARN this is accepted in the current edition +//[edition2021]~^^^^ ERROR expected a type, found a trait macro_rules! ty { ($ty: ty) => ($ty::AssocTy); @@ -50,6 +55,7 @@ type I = ty!()::AssocTy; trait K {} fn foo>(x: X) {} //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions +//~| ERROR the placeholder `_` is not allowed within types on item signatures for functions fn bar(_: F) where F: Fn() -> _ {} //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions diff --git a/tests/ui/diverging-fallback-method-chain.rs b/tests/ui/diverging-fallback-method-chain.rs deleted file mode 100644 index aa8eba1191b9..000000000000 --- a/tests/ui/diverging-fallback-method-chain.rs +++ /dev/null @@ -1,20 +0,0 @@ -//@ run-pass - -#![allow(unused_imports)] -// Test a regression found when building compiler. The `produce()` -// error type `T` winds up getting unified with result of `x.parse()`; -// the type of the closure given to `unwrap_or_else` needs to be -// inferred to `usize`. - -use std::num::ParseIntError; - -fn produce() -> Result<&'static str, T> { - Ok("22") -} - -fn main() { - let x: usize = produce() - .and_then(|x| x.parse()) - .unwrap_or_else(|_| panic!()); - println!("{}", x); -} diff --git a/tests/ui/diverging-fallback-option.rs b/tests/ui/diverging-fallback-option.rs deleted file mode 100644 index aa793ebd0178..000000000000 --- a/tests/ui/diverging-fallback-option.rs +++ /dev/null @@ -1,14 +0,0 @@ -//@ run-pass - -#![allow(warnings)] - -// Here the type of `c` is `Option`, where `?T` is unconstrained. -// Because there is data-flow from the `{ return; }` block, which -// diverges and hence has type `!`, into `c`, we will default `?T` to -// `!`, and hence this code compiles rather than failing and requiring -// a type annotation. - -fn main() { - let c = Some({ return; }); - c.unwrap(); -} diff --git a/tests/ui/dyn-compatibility/avoid-ice-on-warning-3.old.stderr b/tests/ui/dyn-compatibility/avoid-ice-on-warning-3.old.stderr index d8935be56094..8b4f3f52ee93 100644 --- a/tests/ui/dyn-compatibility/avoid-ice-on-warning-3.old.stderr +++ b/tests/ui/dyn-compatibility/avoid-ice-on-warning-3.old.stderr @@ -87,6 +87,11 @@ help: alternatively, consider constraining `g` so it does not apply to trait obj | LL | trait A { fn g(b: B) -> B where Self: Sized; } | +++++++++++++++++ +help: you might have meant to use `Self` to refer to the implementing type + | +LL - trait B { fn f(a: A) -> A; } +LL + trait B { fn f(a: Self) -> A; } + | warning: trait objects without an explicit `dyn` are deprecated --> $DIR/avoid-ice-on-warning-3.rs:14:19 @@ -124,6 +129,11 @@ help: alternatively, consider constraining `f` so it does not apply to trait obj | LL | trait B { fn f(a: A) -> A where Self: Sized; } | +++++++++++++++++ +help: you might have meant to use `Self` to refer to the implementing type + | +LL - trait A { fn g(b: B) -> B; } +LL + trait A { fn g(b: Self) -> B; } + | error: aborting due to 2 previous errors; 6 warnings emitted diff --git a/tests/ui/dyn-compatibility/reference-to-bare-trait-in-fn-inputs-and-outputs-issue-125139.stderr b/tests/ui/dyn-compatibility/reference-to-bare-trait-in-fn-inputs-and-outputs-issue-125139.stderr index 2cf244185e69..83a0a77d8424 100644 --- a/tests/ui/dyn-compatibility/reference-to-bare-trait-in-fn-inputs-and-outputs-issue-125139.stderr +++ b/tests/ui/dyn-compatibility/reference-to-bare-trait-in-fn-inputs-and-outputs-issue-125139.stderr @@ -139,9 +139,12 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/reference-to-bare-trait-in-fn-inputs-and-outputs-issue-125139.rs:96:12 | LL | fn bar(_: &'a Trait) {} - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn bar<'a>(_: &'a Trait) {} + | ++++ error[E0106]: missing lifetime specifier --> $DIR/reference-to-bare-trait-in-fn-inputs-and-outputs-issue-125139.rs:110:13 @@ -171,9 +174,12 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/reference-to-bare-trait-in-fn-inputs-and-outputs-issue-125139.rs:122:17 | LL | fn kitten() -> &'a Trait { - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn kitten<'a>() -> &'a Trait { + | ++++ error[E0106]: missing lifetime specifier --> $DIR/reference-to-bare-trait-in-fn-inputs-and-outputs-issue-125139.rs:133:16 diff --git a/tests/ui/dyn-compatibility/supertrait-mentions-GAT.rs b/tests/ui/dyn-compatibility/supertrait-mentions-GAT.rs index 9e5c1bfe4160..b866dab9dba2 100644 --- a/tests/ui/dyn-compatibility/supertrait-mentions-GAT.rs +++ b/tests/ui/dyn-compatibility/supertrait-mentions-GAT.rs @@ -8,8 +8,7 @@ trait GatTrait { trait SuperTrait: for<'a> GatTrait = T> { fn c(&self) -> dyn SuperTrait; - //~^ ERROR associated item referring to unboxed trait object for its own trait - //~| ERROR the trait `SuperTrait` is not dyn compatible + //~^ ERROR the trait `SuperTrait` is not dyn compatible } fn main() {} diff --git a/tests/ui/dyn-compatibility/supertrait-mentions-GAT.stderr b/tests/ui/dyn-compatibility/supertrait-mentions-GAT.stderr index 582cf1af0546..ba4ce4753995 100644 --- a/tests/ui/dyn-compatibility/supertrait-mentions-GAT.stderr +++ b/tests/ui/dyn-compatibility/supertrait-mentions-GAT.stderr @@ -7,20 +7,6 @@ LL | Self: 'a; | ^^ = help: consider adding an explicit lifetime bound `Self: 'a`... -error: associated item referring to unboxed trait object for its own trait - --> $DIR/supertrait-mentions-GAT.rs:10:20 - | -LL | trait SuperTrait: for<'a> GatTrait = T> { - | ---------- in this trait -LL | fn c(&self) -> dyn SuperTrait; - | ^^^^^^^^^^^^^^^^^ - | -help: you might have meant to use `Self` to refer to the implementing type - | -LL - fn c(&self) -> dyn SuperTrait; -LL + fn c(&self) -> Self; - | - error[E0038]: the trait `SuperTrait` is not dyn compatible --> $DIR/supertrait-mentions-GAT.rs:10:20 | @@ -37,8 +23,13 @@ LL | type Gat<'a> LL | trait SuperTrait: for<'a> GatTrait = T> { | ---------- this trait is not dyn compatible... = help: consider moving `Gat` to another trait +help: you might have meant to use `Self` to refer to the implementing type + | +LL - fn c(&self) -> dyn SuperTrait; +LL + fn c(&self) -> Self; + | -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors Some errors have detailed explanations: E0038, E0311. For more information about an error, try `rustc --explain E0038`. diff --git a/tests/ui/error-codes/E0017.rs b/tests/ui/error-codes/E0017.rs index 8c685aad0308..0f00ddac579d 100644 --- a/tests/ui/error-codes/E0017.rs +++ b/tests/ui/error-codes/E0017.rs @@ -5,12 +5,12 @@ static X: i32 = 1; const C: i32 = 2; static mut M: i32 = 3; -const CR: &'static mut i32 = &mut C; //~ ERROR mutable references are not allowed +const CR: &'static mut i32 = &mut C; //~ ERROR mutable borrows of temporaries //~| WARN taking a mutable static STATIC_REF: &'static mut i32 = &mut X; //~ ERROR cannot borrow immutable static item `X` as mutable -static CONST_REF: &'static mut i32 = &mut C; //~ ERROR mutable references are not allowed +static CONST_REF: &'static mut i32 = &mut C; //~ ERROR mutable borrows of temporaries //~| WARN taking a mutable fn main() {} diff --git a/tests/ui/error-codes/E0017.stderr b/tests/ui/error-codes/E0017.stderr index 285d363592f8..2039e5564701 100644 --- a/tests/ui/error-codes/E0017.stderr +++ b/tests/ui/error-codes/E0017.stderr @@ -13,11 +13,15 @@ LL | const C: i32 = 2; | ^^^^^^^^^^^^ = note: `#[warn(const_item_mutation)]` on by default -error[E0764]: mutable references are not allowed in the final value of constants +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/E0017.rs:8:30 | LL | const CR: &'static mut i32 = &mut C; - | ^^^^^^ + | ^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error[E0596]: cannot borrow immutable static item `X` as mutable --> $DIR/E0017.rs:11:39 @@ -39,11 +43,15 @@ note: `const` item defined here LL | const C: i32 = 2; | ^^^^^^^^^^^^ -error[E0764]: mutable references are not allowed in the final value of statics +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/E0017.rs:13:38 | LL | static CONST_REF: &'static mut i32 = &mut C; - | ^^^^^^ + | ^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error: aborting due to 3 previous errors; 2 warnings emitted diff --git a/tests/ui/error-codes/E0261.stderr b/tests/ui/error-codes/E0261.stderr index 0eab2dc0ee05..9ca26dc8459d 100644 --- a/tests/ui/error-codes/E0261.stderr +++ b/tests/ui/error-codes/E0261.stderr @@ -2,17 +2,23 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/E0261.rs:1:12 | LL | fn foo(x: &'a str) { } - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn foo<'a>(x: &'a str) { } + | ++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/E0261.rs:5:9 | -LL | struct Foo { - | - help: consider introducing lifetime `'a` here: `<'a>` LL | x: &'a str, | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | struct Foo<'a> { + | ++++ error: aborting due to 2 previous errors diff --git a/tests/ui/error-codes/E0492.stderr b/tests/ui/error-codes/E0492.stderr index 557c977e87d9..43a3a872e4e7 100644 --- a/tests/ui/error-codes/E0492.stderr +++ b/tests/ui/error-codes/E0492.stderr @@ -1,16 +1,22 @@ -error[E0492]: constants cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/E0492.rs:4:33 | LL | const B: &'static AtomicUsize = &A; - | ^^ this borrow of an interior mutable value may end up in the final value + | ^^ this borrow of an interior mutable value refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` -error[E0492]: statics cannot refer to interior mutable data +error[E0492]: interior mutable shared borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/E0492.rs:5:34 | LL | static C: &'static AtomicUsize = &A; - | ^^ this borrow of an interior mutable value may end up in the final value + | ^^ this borrow of an interior mutable value refers to such a temporary | - = help: to fix this, the value can be extracted to a separate `static` item and then referenced + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error: aborting due to 2 previous errors diff --git a/tests/ui/errors/remap-path-prefix-diagnostics.not-diag-in-deps.stderr b/tests/ui/errors/remap-path-prefix-diagnostics.not-diag-in-deps.stderr index 3ddff11798de..229bfbe59e50 100644 --- a/tests/ui/errors/remap-path-prefix-diagnostics.not-diag-in-deps.stderr +++ b/tests/ui/errors/remap-path-prefix-diagnostics.not-diag-in-deps.stderr @@ -2,10 +2,8 @@ error[E0277]: `A` doesn't implement `std::fmt::Display` --> remapped/errors/remap-path-prefix-diagnostics.rs:LL:COL | LL | impl r#trait::Trait for A {} - | ^ `A` cannot be formatted with the default formatter + | ^ the trait `std::fmt::Display` is not implemented for `A` | - = help: the trait `std::fmt::Display` is not implemented for `A` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `Trait` --> $DIR/auxiliary/trait.rs:LL:COL | diff --git a/tests/ui/errors/remap-path-prefix-diagnostics.only-debuginfo-in-deps.stderr b/tests/ui/errors/remap-path-prefix-diagnostics.only-debuginfo-in-deps.stderr index 85c781425b16..a59af3b6a826 100644 --- a/tests/ui/errors/remap-path-prefix-diagnostics.only-debuginfo-in-deps.stderr +++ b/tests/ui/errors/remap-path-prefix-diagnostics.only-debuginfo-in-deps.stderr @@ -2,10 +2,8 @@ error[E0277]: `A` doesn't implement `std::fmt::Display` --> $DIR/remap-path-prefix-diagnostics.rs:LL:COL | LL | impl r#trait::Trait for A {} - | ^ `A` cannot be formatted with the default formatter + | ^ the trait `std::fmt::Display` is not implemented for `A` | - = help: the trait `std::fmt::Display` is not implemented for `A` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `Trait` --> $DIR/auxiliary/trait-debuginfo.rs:LL:COL | diff --git a/tests/ui/errors/remap-path-prefix-diagnostics.only-diag-in-deps.stderr b/tests/ui/errors/remap-path-prefix-diagnostics.only-diag-in-deps.stderr index 792ea7925ad5..18fb9afcf390 100644 --- a/tests/ui/errors/remap-path-prefix-diagnostics.only-diag-in-deps.stderr +++ b/tests/ui/errors/remap-path-prefix-diagnostics.only-diag-in-deps.stderr @@ -2,10 +2,8 @@ error[E0277]: `A` doesn't implement `std::fmt::Display` --> $DIR/remap-path-prefix-diagnostics.rs:LL:COL | LL | impl r#trait::Trait for A {} - | ^ `A` cannot be formatted with the default formatter + | ^ the trait `std::fmt::Display` is not implemented for `A` | - = help: the trait `std::fmt::Display` is not implemented for `A` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `Trait` --> $DIR/auxiliary/trait-diag.rs:LL:COL | diff --git a/tests/ui/errors/remap-path-prefix-diagnostics.only-macro-in-deps.stderr b/tests/ui/errors/remap-path-prefix-diagnostics.only-macro-in-deps.stderr index d13333d2e482..9e770f07fba2 100644 --- a/tests/ui/errors/remap-path-prefix-diagnostics.only-macro-in-deps.stderr +++ b/tests/ui/errors/remap-path-prefix-diagnostics.only-macro-in-deps.stderr @@ -2,10 +2,8 @@ error[E0277]: `A` doesn't implement `std::fmt::Display` --> $DIR/remap-path-prefix-diagnostics.rs:LL:COL | LL | impl r#trait::Trait for A {} - | ^ `A` cannot be formatted with the default formatter + | ^ the trait `std::fmt::Display` is not implemented for `A` | - = help: the trait `std::fmt::Display` is not implemented for `A` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `Trait` --> $DIR/auxiliary/trait-macro.rs:LL:COL | diff --git a/tests/ui/errors/remap-path-prefix-diagnostics.with-debuginfo-in-deps.stderr b/tests/ui/errors/remap-path-prefix-diagnostics.with-debuginfo-in-deps.stderr index 85c781425b16..a59af3b6a826 100644 --- a/tests/ui/errors/remap-path-prefix-diagnostics.with-debuginfo-in-deps.stderr +++ b/tests/ui/errors/remap-path-prefix-diagnostics.with-debuginfo-in-deps.stderr @@ -2,10 +2,8 @@ error[E0277]: `A` doesn't implement `std::fmt::Display` --> $DIR/remap-path-prefix-diagnostics.rs:LL:COL | LL | impl r#trait::Trait for A {} - | ^ `A` cannot be formatted with the default formatter + | ^ the trait `std::fmt::Display` is not implemented for `A` | - = help: the trait `std::fmt::Display` is not implemented for `A` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `Trait` --> $DIR/auxiliary/trait-debuginfo.rs:LL:COL | diff --git a/tests/ui/errors/remap-path-prefix-diagnostics.with-diag-in-deps.stderr b/tests/ui/errors/remap-path-prefix-diagnostics.with-diag-in-deps.stderr index 08f7fb2c7364..ca6f2b1697a8 100644 --- a/tests/ui/errors/remap-path-prefix-diagnostics.with-diag-in-deps.stderr +++ b/tests/ui/errors/remap-path-prefix-diagnostics.with-diag-in-deps.stderr @@ -2,10 +2,8 @@ error[E0277]: `A` doesn't implement `std::fmt::Display` --> remapped/errors/remap-path-prefix-diagnostics.rs:LL:COL | LL | impl r#trait::Trait for A {} - | ^ `A` cannot be formatted with the default formatter + | ^ the trait `std::fmt::Display` is not implemented for `A` | - = help: the trait `std::fmt::Display` is not implemented for `A` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `Trait` --> remapped/errors/auxiliary/trait-diag.rs:LL:COL | diff --git a/tests/ui/errors/remap-path-prefix-diagnostics.with-macro-in-deps.stderr b/tests/ui/errors/remap-path-prefix-diagnostics.with-macro-in-deps.stderr index d13333d2e482..9e770f07fba2 100644 --- a/tests/ui/errors/remap-path-prefix-diagnostics.with-macro-in-deps.stderr +++ b/tests/ui/errors/remap-path-prefix-diagnostics.with-macro-in-deps.stderr @@ -2,10 +2,8 @@ error[E0277]: `A` doesn't implement `std::fmt::Display` --> $DIR/remap-path-prefix-diagnostics.rs:LL:COL | LL | impl r#trait::Trait for A {} - | ^ `A` cannot be formatted with the default formatter + | ^ the trait `std::fmt::Display` is not implemented for `A` | - = help: the trait `std::fmt::Display` is not implemented for `A` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `Trait` --> $DIR/auxiliary/trait-macro.rs:LL:COL | diff --git a/tests/ui/expr/if/if-else-chain-missing-else.stderr b/tests/ui/expr/if/if-else-chain-missing-else.stderr index 374c4927e300..6c437120d391 100644 --- a/tests/ui/expr/if/if-else-chain-missing-else.stderr +++ b/tests/ui/expr/if/if-else-chain-missing-else.stderr @@ -1,18 +1,15 @@ error[E0308]: `if` and `else` have incompatible types --> $DIR/if-else-chain-missing-else.rs:12:12 | -LL | let x = if let Ok(x) = res { - | ______________- -LL | | x - | | - expected because of this -LL | | } else if let Err(e) = res { - | | ____________^ -LL | || return Err(e); -LL | || }; - | || ^ - | ||_____| - | |_____`if` and `else` have incompatible types - | expected `i32`, found `()` +LL | let x = if let Ok(x) = res { + | ------------------ `if` and `else` have incompatible types +LL | x + | - expected because of this +LL | } else if let Err(e) = res { + | ____________^ +LL | | return Err(e); +LL | | }; + | |_____^ expected `i32`, found `()` | = note: `if` expressions without `else` evaluate to `()` = note: consider adding an `else` block that evaluates to the expected type diff --git a/tests/ui/expr/if/if-else-type-mismatch.stderr b/tests/ui/expr/if/if-else-type-mismatch.stderr index 1cf94c98800b..56181267a318 100644 --- a/tests/ui/expr/if/if-else-type-mismatch.stderr +++ b/tests/ui/expr/if/if-else-type-mismatch.stderr @@ -92,13 +92,16 @@ LL | | }; error[E0308]: `if` and `else` have incompatible types --> $DIR/if-else-type-mismatch.rs:37:9 | -LL | let _ = if true { - | _____________________- -LL | | -LL | | } else { - | |_____- expected because of this -LL | 11u32 - | ^^^^^ expected `()`, found `u32` +LL | let _ = if true { + | ______________- - + | | _____________________| +LL | || +LL | || } else { + | ||_____- expected because of this +LL | | 11u32 + | | ^^^^^ expected `()`, found `u32` +LL | | }; + | |______- `if` and `else` have incompatible types error[E0308]: `if` and `else` have incompatible types --> $DIR/if-else-type-mismatch.rs:42:12 diff --git a/tests/ui/extern-flag/auxiliary/panic_handler.rs b/tests/ui/extern-flag/auxiliary/panic_handler.rs index 9140ceed2291..9607f0ed0139 100644 --- a/tests/ui/extern-flag/auxiliary/panic_handler.rs +++ b/tests/ui/extern-flag/auxiliary/panic_handler.rs @@ -12,4 +12,12 @@ pub fn begin_panic_handler(_info: &core::panic::PanicInfo<'_>) -> ! { } #[lang = "eh_personality"] -extern "C" fn eh_personality() {} +extern "C" fn eh_personality( + _version: i32, + _actions: i32, + _exception_class: u64, + _exception_object: *mut (), + _context: *mut (), +) -> i32 { + loop {} +} diff --git a/tests/ui/feature-gates/feature-gate-abi-custom.rs b/tests/ui/feature-gates/feature-gate-abi-custom.rs index 3ddce974dd7d..312b6230b743 100644 --- a/tests/ui/feature-gates/feature-gate-abi-custom.rs +++ b/tests/ui/feature-gates/feature-gate-abi-custom.rs @@ -15,11 +15,11 @@ unsafe extern "custom" fn f7() { trait Tr { extern "custom" fn m7(); //~^ ERROR "custom" ABI is experimental - //~| ERROR functions with the `"custom"` ABI must be unsafe + //~| ERROR functions with the "custom" ABI must be unsafe #[unsafe(naked)] extern "custom" fn dm7() { //~^ ERROR "custom" ABI is experimental - //~| ERROR functions with the `"custom"` ABI must be unsafe + //~| ERROR functions with the "custom" ABI must be unsafe naked_asm!("") } } @@ -31,7 +31,7 @@ impl Tr for S { #[unsafe(naked)] extern "custom" fn m7() { //~^ ERROR "custom" ABI is experimental - //~| ERROR functions with the `"custom"` ABI must be unsafe + //~| ERROR functions with the "custom" ABI must be unsafe naked_asm!("") } } @@ -41,7 +41,7 @@ impl S { #[unsafe(naked)] extern "custom" fn im7() { //~^ ERROR "custom" ABI is experimental - //~| ERROR functions with the `"custom"` ABI must be unsafe + //~| ERROR functions with the "custom" ABI must be unsafe naked_asm!("") } } diff --git a/tests/ui/feature-gates/feature-gate-abi-custom.stderr b/tests/ui/feature-gates/feature-gate-abi-custom.stderr index e6dce0126d64..e359dbb5ebe1 100644 --- a/tests/ui/feature-gates/feature-gate-abi-custom.stderr +++ b/tests/ui/feature-gates/feature-gate-abi-custom.stderr @@ -1,4 +1,4 @@ -error: functions with the `"custom"` ABI must be unsafe +error: functions with the "custom" ABI must be unsafe --> $DIR/feature-gate-abi-custom.rs:16:5 | LL | extern "custom" fn m7(); @@ -9,7 +9,7 @@ help: add the `unsafe` keyword to this definition LL | unsafe extern "custom" fn m7(); | ++++++ -error: functions with the `"custom"` ABI must be unsafe +error: functions with the "custom" ABI must be unsafe --> $DIR/feature-gate-abi-custom.rs:20:5 | LL | extern "custom" fn dm7() { @@ -20,7 +20,7 @@ help: add the `unsafe` keyword to this definition LL | unsafe extern "custom" fn dm7() { | ++++++ -error: functions with the `"custom"` ABI must be unsafe +error: functions with the "custom" ABI must be unsafe --> $DIR/feature-gate-abi-custom.rs:32:5 | LL | extern "custom" fn m7() { @@ -31,7 +31,7 @@ help: add the `unsafe` keyword to this definition LL | unsafe extern "custom" fn m7() { | ++++++ -error: functions with the `"custom"` ABI must be unsafe +error: functions with the "custom" ABI must be unsafe --> $DIR/feature-gate-abi-custom.rs:42:5 | LL | extern "custom" fn im7() { diff --git a/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.AMDGPU.stderr b/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.AMDGPU.stderr index fca32c5c1e6f..4fa3fee942ea 100644 --- a/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.AMDGPU.stderr +++ b/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.AMDGPU.stderr @@ -19,7 +19,7 @@ LL | extern "gpu-kernel" fn m1(_: ()); = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:23:12 + --> $DIR/feature-gate-abi_gpu_kernel.rs:24:12 | LL | extern "gpu-kernel" fn dm1(_: ()) {} | ^^^^^^^^^^^^ @@ -29,7 +29,7 @@ LL | extern "gpu-kernel" fn dm1(_: ()) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:31:12 + --> $DIR/feature-gate-abi_gpu_kernel.rs:32:12 | LL | extern "gpu-kernel" fn m1(_: ()) {} | ^^^^^^^^^^^^ @@ -39,7 +39,7 @@ LL | extern "gpu-kernel" fn m1(_: ()) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:37:12 + --> $DIR/feature-gate-abi_gpu_kernel.rs:38:12 | LL | extern "gpu-kernel" fn im1(_: ()) {} | ^^^^^^^^^^^^ @@ -49,7 +49,7 @@ LL | extern "gpu-kernel" fn im1(_: ()) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:42:18 + --> $DIR/feature-gate-abi_gpu_kernel.rs:43:18 | LL | type A1 = extern "gpu-kernel" fn(_: ()); | ^^^^^^^^^^^^ diff --git a/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.HOST.stderr b/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.HOST.stderr index cc81289f6b78..88734bc9d225 100644 --- a/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.HOST.stderr +++ b/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.HOST.stderr @@ -1,3 +1,9 @@ +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/feature-gate-abi_gpu_kernel.rs:16:8 + | +LL | extern "gpu-kernel" fn f1(_: ()) {} + | ^^^^^^^^^^^^ + error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change --> $DIR/feature-gate-abi_gpu_kernel.rs:16:8 | @@ -8,6 +14,12 @@ LL | extern "gpu-kernel" fn f1(_: ()) {} = help: add `#![feature(abi_gpu_kernel)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/feature-gate-abi_gpu_kernel.rs:21:12 + | +LL | extern "gpu-kernel" fn m1(_: ()); + | ^^^^^^^^^^^^ + error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change --> $DIR/feature-gate-abi_gpu_kernel.rs:21:12 | @@ -18,8 +30,14 @@ LL | extern "gpu-kernel" fn m1(_: ()); = help: add `#![feature(abi_gpu_kernel)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/feature-gate-abi_gpu_kernel.rs:24:12 + | +LL | extern "gpu-kernel" fn dm1(_: ()) {} + | ^^^^^^^^^^^^ + error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:23:12 + --> $DIR/feature-gate-abi_gpu_kernel.rs:24:12 | LL | extern "gpu-kernel" fn dm1(_: ()) {} | ^^^^^^^^^^^^ @@ -28,8 +46,14 @@ LL | extern "gpu-kernel" fn dm1(_: ()) {} = help: add `#![feature(abi_gpu_kernel)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/feature-gate-abi_gpu_kernel.rs:32:12 + | +LL | extern "gpu-kernel" fn m1(_: ()) {} + | ^^^^^^^^^^^^ + error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:31:12 + --> $DIR/feature-gate-abi_gpu_kernel.rs:32:12 | LL | extern "gpu-kernel" fn m1(_: ()) {} | ^^^^^^^^^^^^ @@ -38,8 +62,14 @@ LL | extern "gpu-kernel" fn m1(_: ()) {} = help: add `#![feature(abi_gpu_kernel)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/feature-gate-abi_gpu_kernel.rs:38:12 + | +LL | extern "gpu-kernel" fn im1(_: ()) {} + | ^^^^^^^^^^^^ + error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:37:12 + --> $DIR/feature-gate-abi_gpu_kernel.rs:38:12 | LL | extern "gpu-kernel" fn im1(_: ()) {} | ^^^^^^^^^^^^ @@ -48,8 +78,14 @@ LL | extern "gpu-kernel" fn im1(_: ()) {} = help: add `#![feature(abi_gpu_kernel)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/feature-gate-abi_gpu_kernel.rs:43:18 + | +LL | type A1 = extern "gpu-kernel" fn(_: ()); + | ^^^^^^^^^^^^ + error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:42:18 + --> $DIR/feature-gate-abi_gpu_kernel.rs:43:18 | LL | type A1 = extern "gpu-kernel" fn(_: ()); | ^^^^^^^^^^^^ @@ -58,6 +94,12 @@ LL | type A1 = extern "gpu-kernel" fn(_: ()); = help: add `#![feature(abi_gpu_kernel)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date +error[E0570]: "gpu-kernel" is not a supported ABI for the current target + --> $DIR/feature-gate-abi_gpu_kernel.rs:47:8 + | +LL | extern "gpu-kernel" {} + | ^^^^^^^^^^^^ + error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change --> $DIR/feature-gate-abi_gpu_kernel.rs:47:8 | @@ -68,58 +110,7 @@ LL | extern "gpu-kernel" {} = help: add `#![feature(abi_gpu_kernel)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date -warning: the calling convention "gpu-kernel" is not supported on this target - --> $DIR/feature-gate-abi_gpu_kernel.rs:42:11 - | -LL | type A1 = extern "gpu-kernel" fn(_: ()); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/feature-gate-abi_gpu_kernel.rs:47:1 - | -LL | extern "gpu-kernel" {} - | ^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/feature-gate-abi_gpu_kernel.rs:16:1 - | -LL | extern "gpu-kernel" fn f1(_: ()) {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/feature-gate-abi_gpu_kernel.rs:23:5 - | -LL | extern "gpu-kernel" fn dm1(_: ()) {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/feature-gate-abi_gpu_kernel.rs:31:5 - | -LL | extern "gpu-kernel" fn m1(_: ()) {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0570]: `"gpu-kernel"` is not a supported ABI for the current target - --> $DIR/feature-gate-abi_gpu_kernel.rs:37:5 - | -LL | extern "gpu-kernel" fn im1(_: ()) {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: aborting due to 12 previous errors; 1 warning emitted +error: aborting due to 14 previous errors Some errors have detailed explanations: E0570, E0658. For more information about an error, try `rustc --explain E0570`. -Future incompatibility report: Future breakage diagnostic: -warning: the calling convention "gpu-kernel" is not supported on this target - --> $DIR/feature-gate-abi_gpu_kernel.rs:42:11 - | -LL | type A1 = extern "gpu-kernel" fn(_: ()); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - = note: for more information, see issue #130260 - = note: `#[warn(unsupported_fn_ptr_calling_conventions)]` on by default - diff --git a/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.NVPTX.stderr b/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.NVPTX.stderr index fca32c5c1e6f..4fa3fee942ea 100644 --- a/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.NVPTX.stderr +++ b/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.NVPTX.stderr @@ -19,7 +19,7 @@ LL | extern "gpu-kernel" fn m1(_: ()); = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:23:12 + --> $DIR/feature-gate-abi_gpu_kernel.rs:24:12 | LL | extern "gpu-kernel" fn dm1(_: ()) {} | ^^^^^^^^^^^^ @@ -29,7 +29,7 @@ LL | extern "gpu-kernel" fn dm1(_: ()) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:31:12 + --> $DIR/feature-gate-abi_gpu_kernel.rs:32:12 | LL | extern "gpu-kernel" fn m1(_: ()) {} | ^^^^^^^^^^^^ @@ -39,7 +39,7 @@ LL | extern "gpu-kernel" fn m1(_: ()) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:37:12 + --> $DIR/feature-gate-abi_gpu_kernel.rs:38:12 | LL | extern "gpu-kernel" fn im1(_: ()) {} | ^^^^^^^^^^^^ @@ -49,7 +49,7 @@ LL | extern "gpu-kernel" fn im1(_: ()) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: the extern "gpu-kernel" ABI is experimental and subject to change - --> $DIR/feature-gate-abi_gpu_kernel.rs:42:18 + --> $DIR/feature-gate-abi_gpu_kernel.rs:43:18 | LL | type A1 = extern "gpu-kernel" fn(_: ()); | ^^^^^^^^^^^^ diff --git a/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.rs b/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.rs index 7b1ee681dd7e..988fbd83afcc 100644 --- a/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.rs +++ b/tests/ui/feature-gates/feature-gate-abi_gpu_kernel.rs @@ -19,6 +19,7 @@ extern "gpu-kernel" fn f1(_: ()) {} //~ ERROR "gpu-kernel" ABI is experimental a // Methods in trait definition trait Tr { extern "gpu-kernel" fn m1(_: ()); //~ ERROR "gpu-kernel" ABI is experimental and subject to change + //[HOST]~^ ERROR is not a supported ABI extern "gpu-kernel" fn dm1(_: ()) {} //~ ERROR "gpu-kernel" ABI is experimental and subject to change //[HOST]~^ ERROR is not a supported ABI @@ -40,8 +41,7 @@ impl S { // Function pointer types type A1 = extern "gpu-kernel" fn(_: ()); //~ ERROR "gpu-kernel" ABI is experimental and subject to change -//[HOST]~^ WARNING the calling convention "gpu-kernel" is not supported on this target [unsupported_fn_ptr_calling_conventions] -//[HOST]~| WARNING this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! +//[HOST]~^ ERROR is not a supported ABI // Foreign modules extern "gpu-kernel" {} //~ ERROR "gpu-kernel" ABI is experimental and subject to change diff --git a/tests/ui/feature-gates/feature-gate-cfi_encoding.rs b/tests/ui/feature-gates/feature-gate-cfi_encoding.rs index 3cef8156014b..b6312dd7817f 100644 --- a/tests/ui/feature-gates/feature-gate-cfi_encoding.rs +++ b/tests/ui/feature-gates/feature-gate-cfi_encoding.rs @@ -1,4 +1,4 @@ #![crate_type = "lib"] -#[cfi_encoding = "3Bar"] //~ERROR 3:1: 3:25: the `#[cfi_encoding]` attribute is an experimental feature [E0658] +#[cfi_encoding = "3Bar"] //~ ERROR the `#[cfi_encoding]` attribute is an experimental feature [E0658] pub struct Foo(i32); diff --git a/tests/ui/feature-gates/feature-gate-concat_idents.rs b/tests/ui/feature-gates/feature-gate-concat_idents.rs deleted file mode 100644 index 4fc3b6915973..000000000000 --- a/tests/ui/feature-gates/feature-gate-concat_idents.rs +++ /dev/null @@ -1,11 +0,0 @@ -#![expect(deprecated)] // concat_idents is deprecated - -const XY_1: i32 = 10; - -fn main() { - const XY_2: i32 = 20; - let a = concat_idents!(X, Y_1); //~ ERROR `concat_idents` is not stable - let b = concat_idents!(X, Y_2); //~ ERROR `concat_idents` is not stable - assert_eq!(a, 10); - assert_eq!(b, 20); -} diff --git a/tests/ui/feature-gates/feature-gate-concat_idents.stderr b/tests/ui/feature-gates/feature-gate-concat_idents.stderr deleted file mode 100644 index 6399424eecd8..000000000000 --- a/tests/ui/feature-gates/feature-gate-concat_idents.stderr +++ /dev/null @@ -1,23 +0,0 @@ -error[E0658]: use of unstable library feature `concat_idents`: `concat_idents` is not stable enough for use and is subject to change - --> $DIR/feature-gate-concat_idents.rs:7:13 - | -LL | let a = concat_idents!(X, Y_1); - | ^^^^^^^^^^^^^ - | - = note: see issue #29599 for more information - = help: add `#![feature(concat_idents)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: use of unstable library feature `concat_idents`: `concat_idents` is not stable enough for use and is subject to change - --> $DIR/feature-gate-concat_idents.rs:8:13 - | -LL | let b = concat_idents!(X, Y_2); - | ^^^^^^^^^^^^^ - | - = note: see issue #29599 for more information - = help: add `#![feature(concat_idents)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error: aborting due to 2 previous errors - -For more information about this error, try `rustc --explain E0658`. diff --git a/tests/ui/feature-gates/feature-gate-concat_idents2.rs b/tests/ui/feature-gates/feature-gate-concat_idents2.rs deleted file mode 100644 index bc2b4f7cddf9..000000000000 --- a/tests/ui/feature-gates/feature-gate-concat_idents2.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![expect(deprecated)] // concat_idents is deprecated - -fn main() { - concat_idents!(a, b); //~ ERROR `concat_idents` is not stable enough - //~| ERROR cannot find value `ab` in this scope -} diff --git a/tests/ui/feature-gates/feature-gate-concat_idents2.stderr b/tests/ui/feature-gates/feature-gate-concat_idents2.stderr deleted file mode 100644 index a770c1a348b5..000000000000 --- a/tests/ui/feature-gates/feature-gate-concat_idents2.stderr +++ /dev/null @@ -1,20 +0,0 @@ -error[E0658]: use of unstable library feature `concat_idents`: `concat_idents` is not stable enough for use and is subject to change - --> $DIR/feature-gate-concat_idents2.rs:4:5 - | -LL | concat_idents!(a, b); - | ^^^^^^^^^^^^^ - | - = note: see issue #29599 for more information - = help: add `#![feature(concat_idents)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0425]: cannot find value `ab` in this scope - --> $DIR/feature-gate-concat_idents2.rs:4:5 - | -LL | concat_idents!(a, b); - | ^^^^^^^^^^^^^^^^^^^^ not found in this scope - -error: aborting due to 2 previous errors - -Some errors have detailed explanations: E0425, E0658. -For more information about an error, try `rustc --explain E0425`. diff --git a/tests/ui/feature-gates/feature-gate-concat_idents3.rs b/tests/ui/feature-gates/feature-gate-concat_idents3.rs deleted file mode 100644 index d4a0d2e6bb0e..000000000000 --- a/tests/ui/feature-gates/feature-gate-concat_idents3.rs +++ /dev/null @@ -1,9 +0,0 @@ -#![expect(deprecated)] // concat_idents is deprecated - -const XY_1: i32 = 10; - -fn main() { - const XY_2: i32 = 20; - assert_eq!(10, concat_idents!(X, Y_1)); //~ ERROR `concat_idents` is not stable - assert_eq!(20, concat_idents!(X, Y_2)); //~ ERROR `concat_idents` is not stable -} diff --git a/tests/ui/feature-gates/feature-gate-concat_idents3.stderr b/tests/ui/feature-gates/feature-gate-concat_idents3.stderr deleted file mode 100644 index 7d929322bc06..000000000000 --- a/tests/ui/feature-gates/feature-gate-concat_idents3.stderr +++ /dev/null @@ -1,23 +0,0 @@ -error[E0658]: use of unstable library feature `concat_idents`: `concat_idents` is not stable enough for use and is subject to change - --> $DIR/feature-gate-concat_idents3.rs:7:20 - | -LL | assert_eq!(10, concat_idents!(X, Y_1)); - | ^^^^^^^^^^^^^ - | - = note: see issue #29599 for more information - = help: add `#![feature(concat_idents)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: use of unstable library feature `concat_idents`: `concat_idents` is not stable enough for use and is subject to change - --> $DIR/feature-gate-concat_idents3.rs:8:20 - | -LL | assert_eq!(20, concat_idents!(X, Y_2)); - | ^^^^^^^^^^^^^ - | - = note: see issue #29599 for more information - = help: add `#![feature(concat_idents)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error: aborting due to 2 previous errors - -For more information about this error, try `rustc --explain E0658`. diff --git a/tests/ui/feature-gates/feature-gate-coverage-attribute.rs b/tests/ui/feature-gates/feature-gate-coverage-attribute.rs index 2cf4b76180e1..0a463755f137 100644 --- a/tests/ui/feature-gates/feature-gate-coverage-attribute.rs +++ b/tests/ui/feature-gates/feature-gate-coverage-attribute.rs @@ -1,5 +1,3 @@ -//@ normalize-stderr: "you are using [0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9]+)?( \([^)]*\))?" -> "you are using $$RUSTC_VERSION" - #![crate_type = "lib"] #![feature(no_coverage)] //~ ERROR feature has been removed [E0557] diff --git a/tests/ui/feature-gates/feature-gate-coverage-attribute.stderr b/tests/ui/feature-gates/feature-gate-coverage-attribute.stderr index 8c23544698d0..68d0d9bc3c31 100644 --- a/tests/ui/feature-gates/feature-gate-coverage-attribute.stderr +++ b/tests/ui/feature-gates/feature-gate-coverage-attribute.stderr @@ -1,14 +1,14 @@ error[E0557]: feature has been removed - --> $DIR/feature-gate-coverage-attribute.rs:4:12 + --> $DIR/feature-gate-coverage-attribute.rs:2:12 | LL | #![feature(no_coverage)] | ^^^^^^^^^^^ feature has been removed | - = note: removed in 1.74.0 (you are using $RUSTC_VERSION); see for more information + = note: removed in 1.74.0; see for more information = note: renamed to `coverage_attribute` error[E0658]: the `#[coverage]` attribute is an experimental feature - --> $DIR/feature-gate-coverage-attribute.rs:12:1 + --> $DIR/feature-gate-coverage-attribute.rs:10:1 | LL | #[coverage(off)] | ^^^^^^^^^^^^^^^^ diff --git a/tests/ui/feature-gates/feature-gate-fn_align.stderr b/tests/ui/feature-gates/feature-gate-fn_align.stderr index 93ef136dc73c..921cf08435c2 100644 --- a/tests/ui/feature-gates/feature-gate-fn_align.stderr +++ b/tests/ui/feature-gates/feature-gate-fn_align.stderr @@ -22,12 +22,10 @@ error[E0539]: malformed `align` attribute input --> $DIR/feature-gate-fn_align.rs:8:5 | LL | #[align] - | ^^^^^^^^ expected this to be a list - | -help: try changing it to one of the following valid forms of the attribute - | -LL | #[align()] - | ++++++++++++++++++++++ + | ^^^^^^^^ + | | + | expected this to be a list + | help: must be of the form: `#[align()]` error: aborting due to 3 previous errors diff --git a/tests/ui/feature-gates/feature-gate-loop-match.rs b/tests/ui/feature-gates/feature-gate-loop-match.rs new file mode 100644 index 000000000000..399b20234f32 --- /dev/null +++ b/tests/ui/feature-gates/feature-gate-loop-match.rs @@ -0,0 +1,30 @@ +// Test that `#[loop_match]` and `#[const_continue]` cannot be used without +// `#![feature(loop_match)]`. + +enum State { + A, + B, + C, +} + +fn main() { + let mut state = State::A; + #[loop_match] //~ ERROR the `#[loop_match]` attribute is an experimental feature + 'a: loop { + state = 'blk: { + match state { + State::A => { + #[const_continue] + //~^ ERROR the `#[const_continue]` attribute is an experimental feature + break 'blk State::B; + } + State::B => { + #[const_continue] + //~^ ERROR the `#[const_continue]` attribute is an experimental feature + break 'blk State::C; + } + State::C => break 'a, + } + }; + } +} diff --git a/tests/ui/feature-gates/feature-gate-loop-match.stderr b/tests/ui/feature-gates/feature-gate-loop-match.stderr new file mode 100644 index 000000000000..9b12047cf4dd --- /dev/null +++ b/tests/ui/feature-gates/feature-gate-loop-match.stderr @@ -0,0 +1,33 @@ +error[E0658]: the `#[loop_match]` attribute is an experimental feature + --> $DIR/feature-gate-loop-match.rs:12:5 + | +LL | #[loop_match] + | ^^^^^^^^^^^^^ + | + = note: see issue #132306 for more information + = help: add `#![feature(loop_match)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0658]: the `#[const_continue]` attribute is an experimental feature + --> $DIR/feature-gate-loop-match.rs:17:21 + | +LL | #[const_continue] + | ^^^^^^^^^^^^^^^^^ + | + = note: see issue #132306 for more information + = help: add `#![feature(loop_match)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0658]: the `#[const_continue]` attribute is an experimental feature + --> $DIR/feature-gate-loop-match.rs:22:21 + | +LL | #[const_continue] + | ^^^^^^^^^^^^^^^^^ + | + = note: see issue #132306 for more information + = help: add `#![feature(loop_match)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error: aborting due to 3 previous errors + +For more information about this error, try `rustc --explain E0658`. diff --git a/tests/ui/feature-gates/feature-gate-naked_functions_target_feature.stderr b/tests/ui/feature-gates/feature-gate-naked_functions_target_feature.stderr index 8e601a14753b..e57ec9cc59b2 100644 --- a/tests/ui/feature-gates/feature-gate-naked_functions_target_feature.stderr +++ b/tests/ui/feature-gates/feature-gate-naked_functions_target_feature.stderr @@ -1,8 +1,8 @@ error[E0658]: `#[target_feature(/* ... */)]` is currently unstable on `#[naked]` functions - --> $DIR/feature-gate-naked_functions_target_feature.rs:7:1 + --> $DIR/feature-gate-naked_functions_target_feature.rs:7:3 | LL | #[target_feature(enable = "avx2")] - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^ | = note: see issue #138568 for more information = help: add `#![feature(naked_functions_target_feature)]` to the crate attributes to enable diff --git a/tests/ui/feature-gates/feature-gate-optimize_attribute.rs b/tests/ui/feature-gates/feature-gate-optimize_attribute.rs index 77cc307c9f45..ed5a11270f83 100644 --- a/tests/ui/feature-gates/feature-gate-optimize_attribute.rs +++ b/tests/ui/feature-gates/feature-gate-optimize_attribute.rs @@ -11,5 +11,5 @@ fn none() {} #[optimize(banana)] //~^ ERROR the `#[optimize]` attribute is an experimental feature -//~| ERROR E0722 +//~| ERROR malformed `optimize` attribute input [E0539] fn not_known() {} diff --git a/tests/ui/feature-gates/feature-gate-optimize_attribute.stderr b/tests/ui/feature-gates/feature-gate-optimize_attribute.stderr index 4e6e4ac2703a..e7e62b4f9899 100644 --- a/tests/ui/feature-gates/feature-gate-optimize_attribute.stderr +++ b/tests/ui/feature-gates/feature-gate-optimize_attribute.stderr @@ -38,13 +38,16 @@ LL | #[optimize(banana)] = help: add `#![feature(optimize_attribute)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date -error[E0722]: invalid argument - --> $DIR/feature-gate-optimize_attribute.rs:12:12 +error[E0539]: malformed `optimize` attribute input + --> $DIR/feature-gate-optimize_attribute.rs:12:1 | LL | #[optimize(banana)] - | ^^^^^^ + | ^^^^^^^^^^^------^^ + | | | + | | valid arguments are `size`, `speed` or `none` + | help: must be of the form: `#[optimize(size|speed|none)]` error: aborting due to 5 previous errors -Some errors have detailed explanations: E0658, E0722. -For more information about an error, try `rustc --explain E0658`. +Some errors have detailed explanations: E0539, E0658. +For more information about an error, try `rustc --explain E0539`. diff --git a/tests/ui/feature-gates/feature-gate-pin_ergonomics.rs b/tests/ui/feature-gates/feature-gate-pin_ergonomics.rs index 663a83a665c4..7746654555dd 100644 --- a/tests/ui/feature-gates/feature-gate-pin_ergonomics.rs +++ b/tests/ui/feature-gates/feature-gate-pin_ergonomics.rs @@ -17,6 +17,10 @@ fn foo(mut x: Pin<&mut Foo>) { let _y: &pin mut Foo = x; //~ ERROR pinned reference syntax is experimental } +fn foo_const(x: Pin<&Foo>) { + let _y: &pin const Foo = x; //~ ERROR pinned reference syntax is experimental +} + fn foo_sugar(_: &pin mut Foo) {} //~ ERROR pinned reference syntax is experimental fn bar(x: Pin<&mut Foo>) { @@ -31,6 +35,18 @@ fn baz(mut x: Pin<&mut Foo>) { fn baz_sugar(_: &pin const Foo) {} //~ ERROR pinned reference syntax is experimental +fn borrows() { + let mut x: Pin<&mut _> = &pin mut Foo; //~ ERROR pinned reference syntax is experimental + foo(x.as_mut()); + foo(x.as_mut()); + foo_const(x.as_ref()); + + let x: Pin<&_> = &pin const Foo; //~ ERROR pinned reference syntax is experimental + + foo_const(x); + foo_const(x); +} + #[cfg(any())] mod not_compiled { use std::pin::Pin; @@ -63,6 +79,18 @@ mod not_compiled { } fn baz_sugar(_: &pin const Foo) {} //~ ERROR pinned reference syntax is experimental + + fn borrows() { + let mut x: Pin<&mut _> = &pin mut Foo; //~ ERROR pinned reference syntax is experimental + foo(x.as_mut()); + foo(x.as_mut()); + foo_const(x.as_ref()); + + let x: Pin<&_> = &pin const Foo; //~ ERROR pinned reference syntax is experimental + + foo_const(x); + foo_const(x); + } } fn main() {} diff --git a/tests/ui/feature-gates/feature-gate-pin_ergonomics.stderr b/tests/ui/feature-gates/feature-gate-pin_ergonomics.stderr index 8ed7543d86e3..a8890254face 100644 --- a/tests/ui/feature-gates/feature-gate-pin_ergonomics.stderr +++ b/tests/ui/feature-gates/feature-gate-pin_ergonomics.stderr @@ -29,7 +29,17 @@ LL | let _y: &pin mut Foo = x; = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: pinned reference syntax is experimental - --> $DIR/feature-gate-pin_ergonomics.rs:20:18 + --> $DIR/feature-gate-pin_ergonomics.rs:21:14 + | +LL | let _y: &pin const Foo = x; + | ^^^ + | + = note: see issue #130494 for more information + = help: add `#![feature(pin_ergonomics)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0658]: pinned reference syntax is experimental + --> $DIR/feature-gate-pin_ergonomics.rs:24:18 | LL | fn foo_sugar(_: &pin mut Foo) {} | ^^^ @@ -39,7 +49,7 @@ LL | fn foo_sugar(_: &pin mut Foo) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: pinned reference syntax is experimental - --> $DIR/feature-gate-pin_ergonomics.rs:32:18 + --> $DIR/feature-gate-pin_ergonomics.rs:36:18 | LL | fn baz_sugar(_: &pin const Foo) {} | ^^^ @@ -49,7 +59,27 @@ LL | fn baz_sugar(_: &pin const Foo) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: pinned reference syntax is experimental - --> $DIR/feature-gate-pin_ergonomics.rs:43:23 + --> $DIR/feature-gate-pin_ergonomics.rs:39:31 + | +LL | let mut x: Pin<&mut _> = &pin mut Foo; + | ^^^ + | + = note: see issue #130494 for more information + = help: add `#![feature(pin_ergonomics)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0658]: pinned reference syntax is experimental + --> $DIR/feature-gate-pin_ergonomics.rs:44:23 + | +LL | let x: Pin<&_> = &pin const Foo; + | ^^^ + | + = note: see issue #130494 for more information + = help: add `#![feature(pin_ergonomics)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0658]: pinned reference syntax is experimental + --> $DIR/feature-gate-pin_ergonomics.rs:59:23 | LL | fn foo_sugar(&pin mut self) {} | ^^^ @@ -59,7 +89,7 @@ LL | fn foo_sugar(&pin mut self) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: pinned reference syntax is experimental - --> $DIR/feature-gate-pin_ergonomics.rs:44:29 + --> $DIR/feature-gate-pin_ergonomics.rs:60:29 | LL | fn foo_sugar_const(&pin const self) {} | ^^^ @@ -69,7 +99,7 @@ LL | fn foo_sugar_const(&pin const self) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: pinned reference syntax is experimental - --> $DIR/feature-gate-pin_ergonomics.rs:50:18 + --> $DIR/feature-gate-pin_ergonomics.rs:66:18 | LL | let _y: &pin mut Foo = x; | ^^^ @@ -79,7 +109,7 @@ LL | let _y: &pin mut Foo = x; = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: pinned reference syntax is experimental - --> $DIR/feature-gate-pin_ergonomics.rs:53:22 + --> $DIR/feature-gate-pin_ergonomics.rs:69:22 | LL | fn foo_sugar(_: &pin mut Foo) {} | ^^^ @@ -89,7 +119,7 @@ LL | fn foo_sugar(_: &pin mut Foo) {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: pinned reference syntax is experimental - --> $DIR/feature-gate-pin_ergonomics.rs:65:22 + --> $DIR/feature-gate-pin_ergonomics.rs:81:22 | LL | fn baz_sugar(_: &pin const Foo) {} | ^^^ @@ -98,8 +128,28 @@ LL | fn baz_sugar(_: &pin const Foo) {} = help: add `#![feature(pin_ergonomics)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date +error[E0658]: pinned reference syntax is experimental + --> $DIR/feature-gate-pin_ergonomics.rs:84:35 + | +LL | let mut x: Pin<&mut _> = &pin mut Foo; + | ^^^ + | + = note: see issue #130494 for more information + = help: add `#![feature(pin_ergonomics)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0658]: pinned reference syntax is experimental + --> $DIR/feature-gate-pin_ergonomics.rs:89:27 + | +LL | let x: Pin<&_> = &pin const Foo; + | ^^^ + | + = note: see issue #130494 for more information + = help: add `#![feature(pin_ergonomics)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + error[E0382]: use of moved value: `x` - --> $DIR/feature-gate-pin_ergonomics.rs:24:9 + --> $DIR/feature-gate-pin_ergonomics.rs:28:9 | LL | fn bar(x: Pin<&mut Foo>) { | - move occurs because `x` has type `Pin<&mut Foo>`, which does not implement the `Copy` trait @@ -117,7 +167,7 @@ LL | fn foo(mut x: Pin<&mut Foo>) { | in this function error[E0382]: use of moved value: `x` - --> $DIR/feature-gate-pin_ergonomics.rs:29:5 + --> $DIR/feature-gate-pin_ergonomics.rs:33:5 | LL | fn baz(mut x: Pin<&mut Foo>) { | ----- move occurs because `x` has type `Pin<&mut Foo>`, which does not implement the `Copy` trait @@ -136,7 +186,7 @@ help: consider reborrowing the `Pin` instead of moving it LL | x.as_mut().foo(); | +++++++++ -error: aborting due to 12 previous errors +error: aborting due to 17 previous errors Some errors have detailed explanations: E0382, E0658. For more information about an error, try `rustc --explain E0382`. diff --git a/tests/ui/feature-gates/feature-gate-unsized_fn_params.stderr b/tests/ui/feature-gates/feature-gate-unsized_fn_params.edition2015.stderr similarity index 91% rename from tests/ui/feature-gates/feature-gate-unsized_fn_params.stderr rename to tests/ui/feature-gates/feature-gate-unsized_fn_params.edition2015.stderr index 30f958517680..ac2d1ffa8684 100644 --- a/tests/ui/feature-gates/feature-gate-unsized_fn_params.stderr +++ b/tests/ui/feature-gates/feature-gate-unsized_fn_params.edition2015.stderr @@ -1,5 +1,5 @@ error[E0277]: the size for values of type `(dyn Foo + 'static)` cannot be known at compilation time - --> $DIR/feature-gate-unsized_fn_params.rs:17:11 + --> $DIR/feature-gate-unsized_fn_params.rs:20:11 | LL | fn foo(x: dyn Foo) { | ^^^^^^^ doesn't have a size known at compile-time @@ -17,7 +17,7 @@ LL | fn foo(x: &dyn Foo) { | + error[E0277]: the size for values of type `(dyn Foo + 'static)` cannot be known at compilation time - --> $DIR/feature-gate-unsized_fn_params.rs:21:11 + --> $DIR/feature-gate-unsized_fn_params.rs:24:11 | LL | fn bar(x: Foo) { | ^^^ doesn't have a size known at compile-time @@ -34,7 +34,7 @@ LL | fn bar(x: &dyn Foo) { | ++++ error[E0277]: the size for values of type `[()]` cannot be known at compilation time - --> $DIR/feature-gate-unsized_fn_params.rs:25:11 + --> $DIR/feature-gate-unsized_fn_params.rs:30:11 | LL | fn qux(_: [()]) {} | ^^^^ doesn't have a size known at compile-time @@ -47,7 +47,7 @@ LL | fn qux(_: &[()]) {} | + error[E0277]: the size for values of type `(dyn Foo + 'static)` cannot be known at compilation time - --> $DIR/feature-gate-unsized_fn_params.rs:29:9 + --> $DIR/feature-gate-unsized_fn_params.rs:34:9 | LL | foo(*x); | ^^ doesn't have a size known at compile-time diff --git a/tests/ui/feature-gates/feature-gate-unsized_fn_params.edition2021.stderr b/tests/ui/feature-gates/feature-gate-unsized_fn_params.edition2021.stderr new file mode 100644 index 000000000000..12411f695f42 --- /dev/null +++ b/tests/ui/feature-gates/feature-gate-unsized_fn_params.edition2021.stderr @@ -0,0 +1,65 @@ +error[E0782]: expected a type, found a trait + --> $DIR/feature-gate-unsized_fn_params.rs:24:11 + | +LL | fn bar(x: Foo) { + | ^^^ + | +help: use a new generic type parameter, constrained by `Foo` + | +LL - fn bar(x: Foo) { +LL + fn bar(x: T) { + | +help: you can also use an opaque type, but users won't be able to specify the type parameter when calling the `fn`, having to rely exclusively on type inference + | +LL | fn bar(x: impl Foo) { + | ++++ +help: alternatively, use a trait object to accept any type that implements `Foo`, accessing its methods at runtime using dynamic dispatch + | +LL | fn bar(x: &dyn Foo) { + | ++++ + +error[E0277]: the size for values of type `(dyn Foo + 'static)` cannot be known at compilation time + --> $DIR/feature-gate-unsized_fn_params.rs:20:11 + | +LL | fn foo(x: dyn Foo) { + | ^^^^^^^ doesn't have a size known at compile-time + | + = help: the trait `Sized` is not implemented for `(dyn Foo + 'static)` + = help: unsized fn params are gated as an unstable feature +help: you can use `impl Trait` as the argument type + | +LL - fn foo(x: dyn Foo) { +LL + fn foo(x: impl Foo) { + | +help: function arguments must have a statically known size, borrowed types always have a known size + | +LL | fn foo(x: &dyn Foo) { + | + + +error[E0277]: the size for values of type `[()]` cannot be known at compilation time + --> $DIR/feature-gate-unsized_fn_params.rs:30:11 + | +LL | fn qux(_: [()]) {} + | ^^^^ doesn't have a size known at compile-time + | + = help: the trait `Sized` is not implemented for `[()]` + = help: unsized fn params are gated as an unstable feature +help: function arguments must have a statically known size, borrowed slices always have a known size + | +LL | fn qux(_: &[()]) {} + | + + +error[E0277]: the size for values of type `(dyn Foo + 'static)` cannot be known at compilation time + --> $DIR/feature-gate-unsized_fn_params.rs:34:9 + | +LL | foo(*x); + | ^^ doesn't have a size known at compile-time + | + = help: the trait `Sized` is not implemented for `(dyn Foo + 'static)` + = note: all function arguments must have a statically known size + = help: unsized fn params are gated as an unstable feature + +error: aborting due to 4 previous errors + +Some errors have detailed explanations: E0277, E0782. +For more information about an error, try `rustc --explain E0277`. diff --git a/tests/ui/feature-gates/feature-gate-unsized_fn_params.rs b/tests/ui/feature-gates/feature-gate-unsized_fn_params.rs index c04e57843d4b..3c5f932e8918 100644 --- a/tests/ui/feature-gates/feature-gate-unsized_fn_params.rs +++ b/tests/ui/feature-gates/feature-gate-unsized_fn_params.rs @@ -1,3 +1,6 @@ +//@revisions: edition2015 edition2021 +//@[edition2015] edition:2015 +//@[edition2021] edition:2021 #![allow(unused, bare_trait_objects)] #[repr(align(256))] struct A { @@ -18,7 +21,9 @@ fn foo(x: dyn Foo) { //~ ERROR [E0277] x.foo() } -fn bar(x: Foo) { //~ ERROR [E0277] +fn bar(x: Foo) { +//[edition2015]~^ ERROR [E0277] +//[edition2021]~^^ ERROR expected a type, found a trait x.foo() } diff --git a/tests/ui/feature-gates/feature-gate-unsized_tuple_coercion.rs b/tests/ui/feature-gates/feature-gate-unsized_tuple_coercion.rs index b5fbcc9ccf8c..c14698637927 100644 --- a/tests/ui/feature-gates/feature-gate-unsized_tuple_coercion.rs +++ b/tests/ui/feature-gates/feature-gate-unsized_tuple_coercion.rs @@ -1,4 +1,4 @@ fn main() { let _ : &(dyn Send,) = &((),); - //~^ ERROR 2:28: 2:34: mismatched types [E0308] + //~^ ERROR mismatched types [E0308] } diff --git a/tests/ui/feature-gates/gated-bad-feature.rs b/tests/ui/feature-gates/gated-bad-feature.rs index 3114f661dc5b..51f2db5556e2 100644 --- a/tests/ui/feature-gates/gated-bad-feature.rs +++ b/tests/ui/feature-gates/gated-bad-feature.rs @@ -1,4 +1,3 @@ -//@ normalize-stderr: "you are using [0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9]+)?( \([^)]*\))?" -> "you are using $$RUSTC_VERSION" #![feature(foo_bar_baz, foo(bar), foo = "baz", foo)] //~^ ERROR malformed `feature` //~| ERROR malformed `feature` diff --git a/tests/ui/feature-gates/gated-bad-feature.stderr b/tests/ui/feature-gates/gated-bad-feature.stderr index 0e75dff14f8a..e0e84d842352 100644 --- a/tests/ui/feature-gates/gated-bad-feature.stderr +++ b/tests/ui/feature-gates/gated-bad-feature.stderr @@ -1,43 +1,43 @@ error[E0556]: malformed `feature` attribute input - --> $DIR/gated-bad-feature.rs:2:25 + --> $DIR/gated-bad-feature.rs:1:25 | LL | #![feature(foo_bar_baz, foo(bar), foo = "baz", foo)] | ^^^^^^^^ help: expected just one word: `foo` error[E0556]: malformed `feature` attribute input - --> $DIR/gated-bad-feature.rs:2:35 + --> $DIR/gated-bad-feature.rs:1:35 | LL | #![feature(foo_bar_baz, foo(bar), foo = "baz", foo)] | ^^^^^^^^^^^ help: expected just one word: `foo` error[E0557]: feature has been removed - --> $DIR/gated-bad-feature.rs:9:12 + --> $DIR/gated-bad-feature.rs:8:12 | LL | #![feature(test_removed_feature)] | ^^^^^^^^^^^^^^^^^^^^ feature has been removed | - = note: removed in 1.0.0 (you are using $RUSTC_VERSION) + = note: removed in 1.0.0 error: malformed `feature` attribute input - --> $DIR/gated-bad-feature.rs:7:1 + --> $DIR/gated-bad-feature.rs:6:1 | LL | #![feature] | ^^^^^^^^^^^ help: must be of the form: `#![feature(name1, name2, ...)]` error: malformed `feature` attribute input - --> $DIR/gated-bad-feature.rs:8:1 + --> $DIR/gated-bad-feature.rs:7:1 | LL | #![feature = "foo"] | ^^^^^^^^^^^^^^^^^^^ help: must be of the form: `#![feature(name1, name2, ...)]` error[E0635]: unknown feature `foo_bar_baz` - --> $DIR/gated-bad-feature.rs:2:12 + --> $DIR/gated-bad-feature.rs:1:12 | LL | #![feature(foo_bar_baz, foo(bar), foo = "baz", foo)] | ^^^^^^^^^^^ error[E0635]: unknown feature `foo` - --> $DIR/gated-bad-feature.rs:2:48 + --> $DIR/gated-bad-feature.rs:1:48 | LL | #![feature(foo_bar_baz, foo(bar), foo = "baz", foo)] | ^^^ diff --git a/tests/ui/feature-gates/issue-43106-gating-of-builtin-attrs.stderr b/tests/ui/feature-gates/issue-43106-gating-of-builtin-attrs.stderr index 1c6868dc95d9..d2b1d71ab87c 100644 --- a/tests/ui/feature-gates/issue-43106-gating-of-builtin-attrs.stderr +++ b/tests/ui/feature-gates/issue-43106-gating-of-builtin-attrs.stderr @@ -379,14 +379,6 @@ warning: `#[proc_macro_derive]` only has an effect on functions LL | #![proc_macro_derive()] | ^^^^^^^^^^^^^^^^^^^^^^^ -warning: attribute should be applied to a function definition - --> $DIR/issue-43106-gating-of-builtin-attrs.rs:62:1 - | -LL | #![cold] - | ^^^^^^^^ cannot be applied to crates - | - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - warning: attribute should be applied to an `extern` block with non-Rust ABI --> $DIR/issue-43106-gating-of-builtin-attrs.rs:64:1 | @@ -411,6 +403,14 @@ LL | #![link_section = "1800"] | = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! +warning: attribute should be applied to a function definition + --> $DIR/issue-43106-gating-of-builtin-attrs.rs:62:1 + | +LL | #![cold] + | ^^^^^^^^ cannot be applied to crates + | + = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! + warning: `#[must_use]` has no effect when applied to a module --> $DIR/issue-43106-gating-of-builtin-attrs.rs:72:1 | diff --git a/tests/ui/feature-gates/removed-features-note-version-and-pr-issue-141619.rs b/tests/ui/feature-gates/removed-features-note-version-and-pr-issue-141619.rs index ec6adb471ba5..d8c5f48f9fd9 100644 --- a/tests/ui/feature-gates/removed-features-note-version-and-pr-issue-141619.rs +++ b/tests/ui/feature-gates/removed-features-note-version-and-pr-issue-141619.rs @@ -1,5 +1,3 @@ -//@ normalize-stderr: "you are using [0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9]+)?( \([^)]*\))?" -> "you are using $$RUSTC_VERSION" - #![feature(external_doc)] //~ ERROR feature has been removed #![doc(include("README.md"))] //~ ERROR unknown `doc` attribute `include` diff --git a/tests/ui/feature-gates/removed-features-note-version-and-pr-issue-141619.stderr b/tests/ui/feature-gates/removed-features-note-version-and-pr-issue-141619.stderr index 43205c7360b2..bd8c56c61c3c 100644 --- a/tests/ui/feature-gates/removed-features-note-version-and-pr-issue-141619.stderr +++ b/tests/ui/feature-gates/removed-features-note-version-and-pr-issue-141619.stderr @@ -1,14 +1,14 @@ error[E0557]: feature has been removed - --> $DIR/removed-features-note-version-and-pr-issue-141619.rs:3:12 + --> $DIR/removed-features-note-version-and-pr-issue-141619.rs:1:12 | LL | #![feature(external_doc)] | ^^^^^^^^^^^^ feature has been removed | - = note: removed in 1.54.0 (you are using $RUSTC_VERSION); see for more information + = note: removed in 1.54.0; see for more information = note: use #[doc = include_str!("filename")] instead, which handles macro invocations error: unknown `doc` attribute `include` - --> $DIR/removed-features-note-version-and-pr-issue-141619.rs:4:8 + --> $DIR/removed-features-note-version-and-pr-issue-141619.rs:2:8 | LL | #![doc(include("README.md"))] | ^^^^^^^^^^^^^^^^^^^^ diff --git a/tests/ui/fmt/format-args-argument-span.stderr b/tests/ui/fmt/format-args-argument-span.stderr index 4e2702383d6c..d46cfb438cf6 100644 --- a/tests/ui/fmt/format-args-argument-span.stderr +++ b/tests/ui/fmt/format-args-argument-span.stderr @@ -12,7 +12,9 @@ error[E0277]: `Option<{integer}>` doesn't implement `std::fmt::Display` --> $DIR/format-args-argument-span.rs:15:37 | LL | println!("{x:?} {x} {x:?}", x = Some(1)); - | ^^^^^^^ `Option<{integer}>` cannot be formatted with the default formatter + | --- ^^^^^^^ `Option<{integer}>` cannot be formatted with the default formatter + | | + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `Option<{integer}>` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead @@ -22,7 +24,7 @@ error[E0277]: `DisplayOnly` doesn't implement `Debug` --> $DIR/format-args-argument-span.rs:18:19 | LL | println!("{x} {x:?} {x}"); - | ^^^^^ `DisplayOnly` cannot be formatted using `{:?}` + | ^^^^^ `DisplayOnly` cannot be formatted using `{:?}` because it doesn't implement `Debug` | = help: the trait `Debug` is not implemented for `DisplayOnly` = note: add `#[derive(Debug)]` to `DisplayOnly` or manually `impl Debug for DisplayOnly` @@ -37,7 +39,9 @@ error[E0277]: `DisplayOnly` doesn't implement `Debug` --> $DIR/format-args-argument-span.rs:20:35 | LL | println!("{x} {x:?} {x}", x = DisplayOnly); - | ^^^^^^^^^^^ `DisplayOnly` cannot be formatted using `{:?}` + | ----- ^^^^^^^^^^^ `DisplayOnly` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = help: the trait `Debug` is not implemented for `DisplayOnly` = note: add `#[derive(Debug)]` to `DisplayOnly` or manually `impl Debug for DisplayOnly` diff --git a/tests/ui/fmt/ifmt-unimpl.stderr b/tests/ui/fmt/ifmt-unimpl.stderr index b8d4425a4a71..5e80f892dcb5 100644 --- a/tests/ui/fmt/ifmt-unimpl.stderr +++ b/tests/ui/fmt/ifmt-unimpl.stderr @@ -4,7 +4,7 @@ error[E0277]: the trait bound `str: UpperHex` is not satisfied LL | format!("{:X}", "3"); | ---- ^^^ the trait `UpperHex` is not implemented for `str` | | - | required by a bound introduced by this call + | required by this formatting parameter | = help: the following other types implement trait `UpperHex`: &T @@ -17,8 +17,6 @@ LL | format!("{:X}", "3"); i32 and 9 others = note: required for `&str` to implement `UpperHex` -note: required by a bound in `core::fmt::rt::Argument::<'_>::new_upper_hex` - --> $SRC_DIR/core/src/fmt/rt.rs:LL:COL = note: this error originates in the macro `$crate::__export::format_args` which comes from the expansion of the macro `format` (in Nightly builds, run with -Z macro-backtrace for more info) error: aborting due to 1 previous error diff --git a/tests/ui/fmt/non-source-literals.rs b/tests/ui/fmt/non-source-literals.rs new file mode 100644 index 000000000000..e3ffdb40a6b7 --- /dev/null +++ b/tests/ui/fmt/non-source-literals.rs @@ -0,0 +1,13 @@ +/// Do not point at the format string if it wasn't written in the source. +//@ forbid-output: required by this formatting parameter + +#[derive(Debug)] +pub struct NonDisplay; +pub struct NonDebug; + +fn main() { + let _ = format!(concat!("{", "}"), NonDisplay); //~ ERROR + let _ = format!(concat!("{", "0", "}"), NonDisplay); //~ ERROR + let _ = format!(concat!("{:", "?}"), NonDebug); //~ ERROR + let _ = format!(concat!("{", "0", ":?}"), NonDebug); //~ ERROR +} diff --git a/tests/ui/fmt/non-source-literals.stderr b/tests/ui/fmt/non-source-literals.stderr new file mode 100644 index 000000000000..5f8a6200dab7 --- /dev/null +++ b/tests/ui/fmt/non-source-literals.stderr @@ -0,0 +1,53 @@ +error[E0277]: `NonDisplay` doesn't implement `std::fmt::Display` + --> $DIR/non-source-literals.rs:9:40 + | +LL | let _ = format!(concat!("{", "}"), NonDisplay); + | ^^^^^^^^^^ `NonDisplay` cannot be formatted with the default formatter + | + = help: the trait `std::fmt::Display` is not implemented for `NonDisplay` + = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead + = note: this error originates in the macro `$crate::__export::format_args` which comes from the expansion of the macro `format` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0277]: `NonDisplay` doesn't implement `std::fmt::Display` + --> $DIR/non-source-literals.rs:10:45 + | +LL | let _ = format!(concat!("{", "0", "}"), NonDisplay); + | ^^^^^^^^^^ `NonDisplay` cannot be formatted with the default formatter + | + = help: the trait `std::fmt::Display` is not implemented for `NonDisplay` + = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead + = note: this error originates in the macro `$crate::__export::format_args` which comes from the expansion of the macro `format` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0277]: `NonDebug` doesn't implement `Debug` + --> $DIR/non-source-literals.rs:11:42 + | +LL | let _ = format!(concat!("{:", "?}"), NonDebug); + | ^^^^^^^^ `NonDebug` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | + = help: the trait `Debug` is not implemented for `NonDebug` + = note: add `#[derive(Debug)]` to `NonDebug` or manually `impl Debug for NonDebug` + = note: this error originates in the macro `$crate::__export::format_args` which comes from the expansion of the macro `format` (in Nightly builds, run with -Z macro-backtrace for more info) +help: consider annotating `NonDebug` with `#[derive(Debug)]` + | +LL + #[derive(Debug)] +LL | pub struct NonDebug; + | + +error[E0277]: `NonDebug` doesn't implement `Debug` + --> $DIR/non-source-literals.rs:12:47 + | +LL | let _ = format!(concat!("{", "0", ":?}"), NonDebug); + | ^^^^^^^^ `NonDebug` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | + = help: the trait `Debug` is not implemented for `NonDebug` + = note: add `#[derive(Debug)]` to `NonDebug` or manually `impl Debug for NonDebug` + = note: this error originates in the macro `$crate::__export::format_args` which comes from the expansion of the macro `format` (in Nightly builds, run with -Z macro-backtrace for more info) +help: consider annotating `NonDebug` with `#[derive(Debug)]` + | +LL + #[derive(Debug)] +LL | pub struct NonDebug; + | + +error: aborting due to 4 previous errors + +For more information about this error, try `rustc --explain E0277`. diff --git a/tests/ui/fn/error-recovery-mismatch.stderr b/tests/ui/fn/error-recovery-mismatch.stderr index f281e77f13b9..c046302cb91c 100644 --- a/tests/ui/fn/error-recovery-mismatch.stderr +++ b/tests/ui/fn/error-recovery-mismatch.stderr @@ -34,12 +34,6 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures | LL | fn fold(&self, _: T, &self._) {} | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn fold(&self, _: T, &self._) {} -LL + fn fold(&self, _: T, &self.U) {} - | error: aborting due to 4 previous errors; 1 warning emitted diff --git a/tests/ui/generic-associated-types/generic-associated-types-where.stderr b/tests/ui/generic-associated-types/generic-associated-types-where.stderr index 7dce34650d78..637f86f7bec2 100644 --- a/tests/ui/generic-associated-types/generic-associated-types-where.stderr +++ b/tests/ui/generic-associated-types/generic-associated-types-where.stderr @@ -2,9 +2,8 @@ error[E0277]: `T` doesn't implement `std::fmt::Display` --> $DIR/generic-associated-types-where.rs:18:22 | LL | type Assoc2 = Vec; - | ^^^^^^ `T` cannot be formatted with the default formatter + | ^^^^^^ the trait `std::fmt::Display` is not implemented for `T` | - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead help: consider restricting type parameter `T` with trait `Display` | LL | type Assoc2 = Vec; diff --git a/tests/ui/generic-const-items/assoc-const-AnonConst-ice-108220.rs b/tests/ui/generic-const-items/assoc-const-AnonConst-ice-108220.rs deleted file mode 100644 index f5babb67b563..000000000000 --- a/tests/ui/generic-const-items/assoc-const-AnonConst-ice-108220.rs +++ /dev/null @@ -1,35 +0,0 @@ -// ICE assertion failed: matches!(self.def_kind(ct.def.did), DefKind :: AnonConst) -// issue: rust-lang/rust#108220 -//@ check-pass - -#![feature(associated_const_equality)] -#![allow(unused)] - -use std::marker::PhantomData; - -pub struct NoPin; - -pub trait SetAlternate {} - -impl SetAlternate<0> for NoPin {} - -pub trait PinA { - const A: u8; -} - -impl PinA for NoPin { - const A: u8 = 0; -} - -pub trait Pins {} - -impl Pins for T where - T: PinA + SetAlternate -{ -} - -struct Serial(PhantomData); - -impl Serial where NoPin: Pins {} - -fn main() {} diff --git a/tests/ui/generics/default-type-params-well-formedness.rs b/tests/ui/generics/default-type-params-well-formedness.rs new file mode 100644 index 000000000000..22b8f5011f7e --- /dev/null +++ b/tests/ui/generics/default-type-params-well-formedness.rs @@ -0,0 +1,50 @@ +//! Test for well-formedness checking of default type parameters. +//! +//! Regression Test for: https://github.com/rust-lang/rust/issues/49344 + +//@ run-pass + +#![allow(dead_code)] + +trait Trait {} +struct Foo(U, V) +where + U: Trait; + +trait Marker {} +struct TwoParams(T, U); +impl Marker for TwoParams {} + +// Clauses with more than 1 param are not checked. +struct IndividuallyBogus(TwoParams) +where + TwoParams: Marker; + +struct BogusTogether(T, U) +where + TwoParams: Marker; + +// Clauses with non-defaulted params are not checked. +struct NonDefaultedInClause(TwoParams) +where + TwoParams: Marker; + +struct DefaultedLhs(U, V) +where + V: Trait; + +// Dependent defaults are not checked. +struct Dependent(T, U) +where + U: Copy; + +trait SelfBound {} + +// Not even for well-formedness. +struct WellFormedProjection::Item>(A, T); + +// Issue #49344, predicates with lifetimes should not be checked. +trait Scope<'a> {} +struct Request<'a, S: Scope<'a> = i32>(S, &'a ()); + +fn main() {} diff --git a/tests/ui/generics/generic-extern-lifetime.stderr b/tests/ui/generics/generic-extern-lifetime.stderr index 33332e760f58..6f9b496f1cd7 100644 --- a/tests/ui/generics/generic-extern-lifetime.stderr +++ b/tests/ui/generics/generic-extern-lifetime.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/generic-extern-lifetime.rs:6:26 | LL | pub fn life2<'b>(x: &'a i32, y: &'b i32); - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `'a,` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | pub fn life2<'a, 'b>(x: &'a i32, y: &'b i32); + | +++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/generic-extern-lifetime.rs:8:37 diff --git a/tests/ui/generics/impl-block-params-declared-in-wrong-spot-issue-113073.stderr b/tests/ui/generics/impl-block-params-declared-in-wrong-spot-issue-113073.stderr index c60c4c72a213..33d0c9c97079 100644 --- a/tests/ui/generics/impl-block-params-declared-in-wrong-spot-issue-113073.stderr +++ b/tests/ui/generics/impl-block-params-declared-in-wrong-spot-issue-113073.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/impl-block-params-declared-in-wrong-spot-issue-113073.rs:7:13 | LL | impl Foo for u8 {} - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | impl<'a> Foo for u8 {} + | ++++ error[E0229]: associated item constraints are not allowed here --> $DIR/impl-block-params-declared-in-wrong-spot-issue-113073.rs:3:10 diff --git a/tests/ui/generics/overlapping-errors-span-issue-123861.stderr b/tests/ui/generics/overlapping-errors-span-issue-123861.stderr index 9622dffda9f8..7d08d8fed9f9 100644 --- a/tests/ui/generics/overlapping-errors-span-issue-123861.stderr +++ b/tests/ui/generics/overlapping-errors-span-issue-123861.stderr @@ -30,12 +30,6 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures | LL | fn mainIterator<_ = _> {} | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn mainIterator<_ = _> {} -LL + fn mainIterator {} - | error: aborting due to 4 previous errors diff --git a/tests/ui/hygiene/no_implicit_prelude.stderr b/tests/ui/hygiene/no_implicit_prelude.stderr index 5de6e3db327b..42049da23eb5 100644 --- a/tests/ui/hygiene/no_implicit_prelude.stderr +++ b/tests/ui/hygiene/no_implicit_prelude.stderr @@ -23,8 +23,6 @@ LL | ().clone() | ^^^^^ | = help: items from traits can only be used if the trait is in scope -help: there is a method `clone_from` with a similar name, but with different arguments - --> $SRC_DIR/core/src/clone.rs:LL:COL = note: this error originates in the macro `::bar::m` (in Nightly builds, run with -Z macro-backtrace for more info) help: trait `Clone` which provides `clone` is implemented but not in scope; perhaps you want to import it | diff --git a/tests/ui/impl-header-lifetime-elision/assoc-type.rs b/tests/ui/impl-header-lifetime-elision/assoc-type.rs index db3c416540fc..14b2ea647f19 100644 --- a/tests/ui/impl-header-lifetime-elision/assoc-type.rs +++ b/tests/ui/impl-header-lifetime-elision/assoc-type.rs @@ -9,7 +9,7 @@ trait MyTrait { impl MyTrait for &i32 { type Output = &i32; - //~^ ERROR 11:19: 11:20: in the trait associated type is declared without lifetime parameters, so using a borrowed type for them requires that lifetime to come from the implemented type + //~^ ERROR in the trait associated type is declared without lifetime parameters, so using a borrowed type for them requires that lifetime to come from the implemented type } impl MyTrait for &u32 { diff --git a/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.stderr b/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.edition2015.stderr similarity index 93% rename from tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.stderr rename to tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.edition2015.stderr index a2d00edbb6d9..68b4e2ed39fb 100644 --- a/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.stderr +++ b/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.edition2015.stderr @@ -1,5 +1,5 @@ error[E0700]: hidden type for `impl Sized + 'a` captures lifetime that does not appear in bounds - --> $DIR/rpit-hidden-erased-unsoundness.rs:16:5 + --> $DIR/rpit-hidden-erased-unsoundness.rs:19:5 | LL | fn step2<'a, 'b: 'a>() -> impl Sized + 'a { | -- --------------- opaque type defined here diff --git a/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.edition2024.stderr b/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.edition2024.stderr new file mode 100644 index 000000000000..6c0c17895827 --- /dev/null +++ b/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.edition2024.stderr @@ -0,0 +1,10 @@ +error: lifetime may not live long enough + --> $DIR/rpit-hidden-erased-unsoundness.rs:24:5 + | +LL | fn step3<'a, 'b: 'a>() -> impl Send + 'a { + | -- lifetime `'b` defined here +LL | step2::<'a, 'b>() + | ^^^^^^^^^^^^^^^^^ returning this value requires that `'b` must outlive `'static` + +error: aborting due to 1 previous error + diff --git a/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.rs b/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.rs index 6863a3c73bad..3338063d8c68 100644 --- a/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.rs +++ b/tests/ui/impl-trait/alias-liveness/rpit-hidden-erased-unsoundness.rs @@ -1,3 +1,6 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 // This test should never pass! #![feature(type_alias_impl_trait)] @@ -14,11 +17,12 @@ fn step1<'a, 'b: 'a>() -> impl Sized + Captures<'b> + 'a { fn step2<'a, 'b: 'a>() -> impl Sized + 'a { step1::<'a, 'b>() - //~^ ERROR hidden type for `impl Sized + 'a` captures lifetime that does not appear in bounds + //[edition2015]~^ ERROR hidden type for `impl Sized + 'a` captures lifetime that does not appear in bounds } fn step3<'a, 'b: 'a>() -> impl Send + 'a { step2::<'a, 'b>() + //[edition2024]~^ ERROR lifetime may not live long enough // This should not be Send unless `'b: 'static` } diff --git a/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.stderr b/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.edition2015.stderr similarity index 94% rename from tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.stderr rename to tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.edition2015.stderr index a1e92e533846..769a878a45c7 100644 --- a/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.stderr +++ b/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.edition2015.stderr @@ -1,5 +1,5 @@ error[E0700]: hidden type for `impl Swap + 'a` captures lifetime that does not appear in bounds - --> $DIR/rpit-hide-lifetime-for-swap.rs:17:5 + --> $DIR/rpit-hide-lifetime-for-swap.rs:20:5 | LL | fn hide<'a, 'b: 'a, T: 'static>(x: Rc>) -> impl Swap + 'a { | -- -------------- opaque type defined here diff --git a/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.edition2024.stderr b/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.edition2024.stderr new file mode 100644 index 000000000000..6109184250bb --- /dev/null +++ b/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.edition2024.stderr @@ -0,0 +1,17 @@ +error[E0597]: `x` does not live long enough + --> $DIR/rpit-hide-lifetime-for-swap.rs:27:38 + | +LL | let x = [1, 2, 3]; + | - binding `x` declared here +LL | let short = Rc::new(RefCell::new(&x)); + | ^^ borrowed value does not live long enough +... +LL | let res: &'static [i32; 3] = *long.borrow(); + | ----------------- type annotation requires that `x` is borrowed for `'static` +LL | res +LL | } + | - `x` dropped here while still borrowed + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0597`. diff --git a/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.rs b/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.rs index 4de2ffbb8087..c4eaec478b84 100644 --- a/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.rs +++ b/tests/ui/impl-trait/alias-liveness/rpit-hide-lifetime-for-swap.rs @@ -1,3 +1,6 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 // This test should never pass! use std::cell::RefCell; @@ -15,13 +18,14 @@ impl Swap for Rc> { fn hide<'a, 'b: 'a, T: 'static>(x: Rc>) -> impl Swap + 'a { x - //~^ ERROR hidden type for `impl Swap + 'a` captures lifetime that does not appear in bounds + //[edition2015]~^ ERROR hidden type for `impl Swap + 'a` captures lifetime that does not appear in bounds } fn dangle() -> &'static [i32; 3] { let long = Rc::new(RefCell::new(&[4, 5, 6])); let x = [1, 2, 3]; let short = Rc::new(RefCell::new(&x)); + //[edition2024]~^ ERROR `x` does not live long enough hide(long.clone()).swap(hide(short)); let res: &'static [i32; 3] = *long.borrow(); res diff --git a/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.stderr b/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.edition2015.stderr similarity index 92% rename from tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.stderr rename to tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.edition2015.stderr index 304d7d43b78b..64f0b201ca28 100644 --- a/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.stderr +++ b/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.edition2015.stderr @@ -1,5 +1,5 @@ error[E0277]: the size for values of type `(dyn Trait + 'static)` cannot be known at compilation time - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:7:13 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:10:13 | LL | fn fuz() -> (usize, Trait) { (42, Struct) } | ^^^^^^^^^^^^^^ doesn't have a size known at compile-time @@ -9,7 +9,7 @@ LL | fn fuz() -> (usize, Trait) { (42, Struct) } = note: the return type of a function must have a statically known size error[E0277]: the size for values of type `(dyn Trait + 'static)` cannot be known at compilation time - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:11:13 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:15:13 | LL | fn bar() -> (usize, dyn Trait) { (42, Struct) } | ^^^^^^^^^^^^^^^^^^ doesn't have a size known at compile-time @@ -19,7 +19,7 @@ LL | fn bar() -> (usize, dyn Trait) { (42, Struct) } = note: the return type of a function must have a statically known size error[E0746]: return type cannot be a trait object without pointer indirection - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:15:13 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:19:13 | LL | fn bap() -> Trait { Struct } | ^^^^^ doesn't have a size known at compile-time @@ -34,7 +34,7 @@ LL | fn bap() -> Box { Box::new(Struct) } | +++++++ + +++++++++ + error[E0746]: return type cannot be a trait object without pointer indirection - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:17:13 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:22:13 | LL | fn ban() -> dyn Trait { Struct } | ^^^^^^^^^ doesn't have a size known at compile-time @@ -50,7 +50,7 @@ LL | fn ban() -> Box { Box::new(Struct) } | ++++ + +++++++++ + error[E0746]: return type cannot be a trait object without pointer indirection - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:19:13 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:24:13 | LL | fn bak() -> dyn Trait { unimplemented!() } | ^^^^^^^^^ doesn't have a size known at compile-time @@ -66,7 +66,7 @@ LL | fn bak() -> Box { Box::new(unimplemented!()) } | ++++ + +++++++++ + error[E0746]: return type cannot be a trait object without pointer indirection - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:21:13 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:26:13 | LL | fn bal() -> dyn Trait { | ^^^^^^^^^ doesn't have a size known at compile-time @@ -86,7 +86,7 @@ LL ~ Box::new(42) | error[E0746]: return type cannot be a trait object without pointer indirection - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:27:13 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:32:13 | LL | fn bax() -> dyn Trait { | ^^^^^^^^^ doesn't have a size known at compile-time @@ -106,7 +106,7 @@ LL ~ Box::new(42) | error[E0746]: return type cannot be a trait object without pointer indirection - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:62:13 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:67:13 | LL | fn bat() -> dyn Trait { | ^^^^^^^^^ doesn't have a size known at compile-time @@ -126,7 +126,7 @@ LL ~ Box::new(42) | error[E0746]: return type cannot be a trait object without pointer indirection - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:68:13 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:73:13 | LL | fn bay() -> dyn Trait { | ^^^^^^^^^ doesn't have a size known at compile-time @@ -146,7 +146,7 @@ LL ~ Box::new(42) | error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:7:35 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:10:35 | LL | fn fuz() -> (usize, Trait) { (42, Struct) } | ^^^^^^ expected `dyn Trait`, found `Struct` @@ -156,7 +156,7 @@ LL | fn fuz() -> (usize, Trait) { (42, Struct) } = help: `Struct` implements `Trait` so you could box the found value and coerce it to the trait object `Box`, you will have to change the expected type as well error[E0277]: the size for values of type `(dyn Trait + 'static)` cannot be known at compilation time - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:7:30 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:10:30 | LL | fn fuz() -> (usize, Trait) { (42, Struct) } | ^^^^^^^^^^^^ doesn't have a size known at compile-time @@ -166,7 +166,7 @@ LL | fn fuz() -> (usize, Trait) { (42, Struct) } = note: tuples must have a statically known size to be initialized error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:11:39 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:15:39 | LL | fn bar() -> (usize, dyn Trait) { (42, Struct) } | ^^^^^^ expected `dyn Trait`, found `Struct` @@ -176,7 +176,7 @@ LL | fn bar() -> (usize, dyn Trait) { (42, Struct) } = help: `Struct` implements `Trait` so you could box the found value and coerce it to the trait object `Box`, you will have to change the expected type as well error[E0277]: the size for values of type `(dyn Trait + 'static)` cannot be known at compilation time - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:11:34 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:15:34 | LL | fn bar() -> (usize, dyn Trait) { (42, Struct) } | ^^^^^^^^^^^^ doesn't have a size known at compile-time @@ -186,7 +186,7 @@ LL | fn bar() -> (usize, dyn Trait) { (42, Struct) } = note: tuples must have a statically known size to be initialized error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:36:16 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:41:16 | LL | fn bam() -> Box { | -------------- expected `Box<(dyn Trait + 'static)>` because of return type @@ -203,7 +203,7 @@ LL | return Box::new(Struct); | +++++++++ + error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:38:5 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:43:5 | LL | fn bam() -> Box { | -------------- expected `Box<(dyn Trait + 'static)>` because of return type @@ -220,7 +220,7 @@ LL | Box::new(42) | +++++++++ + error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:42:16 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:47:16 | LL | fn baq() -> Box { | -------------- expected `Box<(dyn Trait + 'static)>` because of return type @@ -237,7 +237,7 @@ LL | return Box::new(0); | +++++++++ + error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:44:5 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:49:5 | LL | fn baq() -> Box { | -------------- expected `Box<(dyn Trait + 'static)>` because of return type @@ -254,7 +254,7 @@ LL | Box::new(42) | +++++++++ + error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:48:9 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:53:9 | LL | fn baz() -> Box { | -------------- expected `Box<(dyn Trait + 'static)>` because of return type @@ -271,7 +271,7 @@ LL | Box::new(Struct) | +++++++++ + error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:50:9 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:55:9 | LL | fn baz() -> Box { | -------------- expected `Box<(dyn Trait + 'static)>` because of return type @@ -288,7 +288,7 @@ LL | Box::new(42) | +++++++++ + error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:55:9 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:60:9 | LL | fn baw() -> Box { | -------------- expected `Box<(dyn Trait + 'static)>` because of return type @@ -305,7 +305,7 @@ LL | Box::new(0) | +++++++++ + error[E0308]: mismatched types - --> $DIR/dyn-trait-return-should-be-impl-trait.rs:57:9 + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:62:9 | LL | fn baw() -> Box { | -------------- expected `Box<(dyn Trait + 'static)>` because of return type diff --git a/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.edition2021.stderr b/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.edition2021.stderr new file mode 100644 index 000000000000..5811431b4940 --- /dev/null +++ b/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.edition2021.stderr @@ -0,0 +1,308 @@ +error[E0782]: expected a type, found a trait + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:10:21 + | +LL | fn fuz() -> (usize, Trait) { (42, Struct) } + | ^^^^^ + | +help: you can add the `dyn` keyword if you want a trait object + | +LL | fn fuz() -> (usize, dyn Trait) { (42, Struct) } + | +++ + +error[E0277]: the size for values of type `(dyn Trait + 'static)` cannot be known at compilation time + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:15:13 + | +LL | fn bar() -> (usize, dyn Trait) { (42, Struct) } + | ^^^^^^^^^^^^^^^^^^ doesn't have a size known at compile-time + | + = help: within `(usize, (dyn Trait + 'static))`, the trait `Sized` is not implemented for `(dyn Trait + 'static)` + = note: required because it appears within the type `(usize, (dyn Trait + 'static))` + = note: the return type of a function must have a statically known size + +error[E0782]: expected a type, found a trait + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:19:13 + | +LL | fn bap() -> Trait { Struct } + | ^^^^^ + | +help: use `impl Trait` to return an opaque type, as long as you return a single underlying type + | +LL | fn bap() -> impl Trait { Struct } + | ++++ +help: alternatively, you can return an owned trait object + | +LL | fn bap() -> Box { Struct } + | +++++++ + + +error[E0746]: return type cannot be a trait object without pointer indirection + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:22:13 + | +LL | fn ban() -> dyn Trait { Struct } + | ^^^^^^^^^ doesn't have a size known at compile-time + | +help: consider returning an `impl Trait` instead of a `dyn Trait` + | +LL - fn ban() -> dyn Trait { Struct } +LL + fn ban() -> impl Trait { Struct } + | +help: alternatively, box the return type, and wrap all of the returned values in `Box::new` + | +LL | fn ban() -> Box { Box::new(Struct) } + | ++++ + +++++++++ + + +error[E0746]: return type cannot be a trait object without pointer indirection + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:24:13 + | +LL | fn bak() -> dyn Trait { unimplemented!() } + | ^^^^^^^^^ doesn't have a size known at compile-time + | +help: consider returning an `impl Trait` instead of a `dyn Trait` + | +LL - fn bak() -> dyn Trait { unimplemented!() } +LL + fn bak() -> impl Trait { unimplemented!() } + | +help: alternatively, box the return type, and wrap all of the returned values in `Box::new` + | +LL | fn bak() -> Box { Box::new(unimplemented!()) } + | ++++ + +++++++++ + + +error[E0746]: return type cannot be a trait object without pointer indirection + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:26:13 + | +LL | fn bal() -> dyn Trait { + | ^^^^^^^^^ doesn't have a size known at compile-time + | +help: consider returning an `impl Trait` instead of a `dyn Trait` + | +LL - fn bal() -> dyn Trait { +LL + fn bal() -> impl Trait { + | +help: alternatively, box the return type, and wrap all of the returned values in `Box::new` + | +LL ~ fn bal() -> Box { +LL | if true { +LL ~ return Box::new(Struct); +LL | } +LL ~ Box::new(42) + | + +error[E0746]: return type cannot be a trait object without pointer indirection + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:32:13 + | +LL | fn bax() -> dyn Trait { + | ^^^^^^^^^ doesn't have a size known at compile-time + | +help: consider returning an `impl Trait` instead of a `dyn Trait` + | +LL - fn bax() -> dyn Trait { +LL + fn bax() -> impl Trait { + | +help: alternatively, box the return type, and wrap all of the returned values in `Box::new` + | +LL ~ fn bax() -> Box { +LL | if true { +LL ~ Box::new(Struct) +LL | } else { +LL ~ Box::new(42) + | + +error[E0746]: return type cannot be a trait object without pointer indirection + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:67:13 + | +LL | fn bat() -> dyn Trait { + | ^^^^^^^^^ doesn't have a size known at compile-time + | +help: consider returning an `impl Trait` instead of a `dyn Trait` + | +LL - fn bat() -> dyn Trait { +LL + fn bat() -> impl Trait { + | +help: alternatively, box the return type, and wrap all of the returned values in `Box::new` + | +LL ~ fn bat() -> Box { +LL | if true { +LL ~ return Box::new(0); +LL | } +LL ~ Box::new(42) + | + +error[E0746]: return type cannot be a trait object without pointer indirection + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:73:13 + | +LL | fn bay() -> dyn Trait { + | ^^^^^^^^^ doesn't have a size known at compile-time + | +help: consider returning an `impl Trait` instead of a `dyn Trait` + | +LL - fn bay() -> dyn Trait { +LL + fn bay() -> impl Trait { + | +help: alternatively, box the return type, and wrap all of the returned values in `Box::new` + | +LL ~ fn bay() -> Box { +LL | if true { +LL ~ Box::new(0) +LL | } else { +LL ~ Box::new(42) + | + +error[E0308]: mismatched types + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:15:39 + | +LL | fn bar() -> (usize, dyn Trait) { (42, Struct) } + | ^^^^^^ expected `dyn Trait`, found `Struct` + | + = note: expected trait object `(dyn Trait + 'static)` + found struct `Struct` + = help: `Struct` implements `Trait` so you could box the found value and coerce it to the trait object `Box`, you will have to change the expected type as well + +error[E0277]: the size for values of type `(dyn Trait + 'static)` cannot be known at compilation time + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:15:34 + | +LL | fn bar() -> (usize, dyn Trait) { (42, Struct) } + | ^^^^^^^^^^^^ doesn't have a size known at compile-time + | + = help: within `(usize, (dyn Trait + 'static))`, the trait `Sized` is not implemented for `(dyn Trait + 'static)` + = note: required because it appears within the type `(usize, (dyn Trait + 'static))` + = note: tuples must have a statically known size to be initialized + +error[E0308]: mismatched types + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:41:16 + | +LL | fn bam() -> Box { + | -------------- expected `Box<(dyn Trait + 'static)>` because of return type +LL | if true { +LL | return Struct; + | ^^^^^^ expected `Box`, found `Struct` + | + = note: expected struct `Box<(dyn Trait + 'static)>` + found struct `Struct` + = note: for more on the distinction between the stack and the heap, read https://doc.rust-lang.org/book/ch15-01-box.html, https://doc.rust-lang.org/rust-by-example/std/box.html, and https://doc.rust-lang.org/std/boxed/index.html +help: store this in the heap by calling `Box::new` + | +LL | return Box::new(Struct); + | +++++++++ + + +error[E0308]: mismatched types + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:43:5 + | +LL | fn bam() -> Box { + | -------------- expected `Box<(dyn Trait + 'static)>` because of return type +... +LL | 42 + | ^^ expected `Box`, found integer + | + = note: expected struct `Box<(dyn Trait + 'static)>` + found type `{integer}` + = note: for more on the distinction between the stack and the heap, read https://doc.rust-lang.org/book/ch15-01-box.html, https://doc.rust-lang.org/rust-by-example/std/box.html, and https://doc.rust-lang.org/std/boxed/index.html +help: store this in the heap by calling `Box::new` + | +LL | Box::new(42) + | +++++++++ + + +error[E0308]: mismatched types + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:47:16 + | +LL | fn baq() -> Box { + | -------------- expected `Box<(dyn Trait + 'static)>` because of return type +LL | if true { +LL | return 0; + | ^ expected `Box`, found integer + | + = note: expected struct `Box<(dyn Trait + 'static)>` + found type `{integer}` + = note: for more on the distinction between the stack and the heap, read https://doc.rust-lang.org/book/ch15-01-box.html, https://doc.rust-lang.org/rust-by-example/std/box.html, and https://doc.rust-lang.org/std/boxed/index.html +help: store this in the heap by calling `Box::new` + | +LL | return Box::new(0); + | +++++++++ + + +error[E0308]: mismatched types + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:49:5 + | +LL | fn baq() -> Box { + | -------------- expected `Box<(dyn Trait + 'static)>` because of return type +... +LL | 42 + | ^^ expected `Box`, found integer + | + = note: expected struct `Box<(dyn Trait + 'static)>` + found type `{integer}` + = note: for more on the distinction between the stack and the heap, read https://doc.rust-lang.org/book/ch15-01-box.html, https://doc.rust-lang.org/rust-by-example/std/box.html, and https://doc.rust-lang.org/std/boxed/index.html +help: store this in the heap by calling `Box::new` + | +LL | Box::new(42) + | +++++++++ + + +error[E0308]: mismatched types + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:53:9 + | +LL | fn baz() -> Box { + | -------------- expected `Box<(dyn Trait + 'static)>` because of return type +LL | if true { +LL | Struct + | ^^^^^^ expected `Box`, found `Struct` + | + = note: expected struct `Box<(dyn Trait + 'static)>` + found struct `Struct` + = note: for more on the distinction between the stack and the heap, read https://doc.rust-lang.org/book/ch15-01-box.html, https://doc.rust-lang.org/rust-by-example/std/box.html, and https://doc.rust-lang.org/std/boxed/index.html +help: store this in the heap by calling `Box::new` + | +LL | Box::new(Struct) + | +++++++++ + + +error[E0308]: mismatched types + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:55:9 + | +LL | fn baz() -> Box { + | -------------- expected `Box<(dyn Trait + 'static)>` because of return type +... +LL | 42 + | ^^ expected `Box`, found integer + | + = note: expected struct `Box<(dyn Trait + 'static)>` + found type `{integer}` + = note: for more on the distinction between the stack and the heap, read https://doc.rust-lang.org/book/ch15-01-box.html, https://doc.rust-lang.org/rust-by-example/std/box.html, and https://doc.rust-lang.org/std/boxed/index.html +help: store this in the heap by calling `Box::new` + | +LL | Box::new(42) + | +++++++++ + + +error[E0308]: mismatched types + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:60:9 + | +LL | fn baw() -> Box { + | -------------- expected `Box<(dyn Trait + 'static)>` because of return type +LL | if true { +LL | 0 + | ^ expected `Box`, found integer + | + = note: expected struct `Box<(dyn Trait + 'static)>` + found type `{integer}` + = note: for more on the distinction between the stack and the heap, read https://doc.rust-lang.org/book/ch15-01-box.html, https://doc.rust-lang.org/rust-by-example/std/box.html, and https://doc.rust-lang.org/std/boxed/index.html +help: store this in the heap by calling `Box::new` + | +LL | Box::new(0) + | +++++++++ + + +error[E0308]: mismatched types + --> $DIR/dyn-trait-return-should-be-impl-trait.rs:62:9 + | +LL | fn baw() -> Box { + | -------------- expected `Box<(dyn Trait + 'static)>` because of return type +... +LL | 42 + | ^^ expected `Box`, found integer + | + = note: expected struct `Box<(dyn Trait + 'static)>` + found type `{integer}` + = note: for more on the distinction between the stack and the heap, read https://doc.rust-lang.org/book/ch15-01-box.html, https://doc.rust-lang.org/rust-by-example/std/box.html, and https://doc.rust-lang.org/std/boxed/index.html +help: store this in the heap by calling `Box::new` + | +LL | Box::new(42) + | +++++++++ + + +error: aborting due to 19 previous errors + +Some errors have detailed explanations: E0277, E0308, E0746, E0782. +For more information about an error, try `rustc --explain E0277`. diff --git a/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.rs b/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.rs index ccf0a1ad3d44..aa1f871d8eaa 100644 --- a/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.rs +++ b/tests/ui/impl-trait/dyn-trait-return-should-be-impl-trait.rs @@ -1,3 +1,6 @@ +//@revisions: edition2015 edition2021 +//@[edition2015] edition:2015 +//@[edition2021] edition:2021 #![allow(bare_trait_objects)] struct Struct; trait Trait {} @@ -5,15 +8,17 @@ impl Trait for Struct {} impl Trait for u32 {} fn fuz() -> (usize, Trait) { (42, Struct) } -//~^ ERROR E0277 -//~| ERROR E0277 -//~| ERROR E0308 +//[edition2015]~^ ERROR E0277 +//[edition2015]~| ERROR E0277 +//[edition2015]~| ERROR E0308 +//[edition2021]~^^^^ ERROR expected a type, found a trait fn bar() -> (usize, dyn Trait) { (42, Struct) } //~^ ERROR E0277 //~| ERROR E0277 //~| ERROR E0308 fn bap() -> Trait { Struct } -//~^ ERROR E0746 +//[edition2015]~^ ERROR E0746 +//[edition2021]~^^ ERROR expected a type, found a trait fn ban() -> dyn Trait { Struct } //~^ ERROR E0746 fn bak() -> dyn Trait { unimplemented!() } //~ ERROR E0746 diff --git a/tests/ui/impl-trait/extra-impl-in-trait-impl.fixed b/tests/ui/impl-trait/extra-impl-in-trait-impl.fixed index 886fc1d00580..d8eceeff6788 100644 --- a/tests/ui/impl-trait/extra-impl-in-trait-impl.fixed +++ b/tests/ui/impl-trait/extra-impl-in-trait-impl.fixed @@ -1,5 +1,6 @@ //@ run-rustfix +#![allow(dead_code)] struct S(T); struct S2; diff --git a/tests/ui/impl-trait/extra-impl-in-trait-impl.rs b/tests/ui/impl-trait/extra-impl-in-trait-impl.rs index f3271993867c..c2e511c0d055 100644 --- a/tests/ui/impl-trait/extra-impl-in-trait-impl.rs +++ b/tests/ui/impl-trait/extra-impl-in-trait-impl.rs @@ -1,5 +1,6 @@ //@ run-rustfix +#![allow(dead_code)] struct S(T); struct S2; diff --git a/tests/ui/impl-trait/extra-impl-in-trait-impl.stderr b/tests/ui/impl-trait/extra-impl-in-trait-impl.stderr index 5aafc8b64d4f..22e68463a8cf 100644 --- a/tests/ui/impl-trait/extra-impl-in-trait-impl.stderr +++ b/tests/ui/impl-trait/extra-impl-in-trait-impl.stderr @@ -1,23 +1,23 @@ error: unexpected `impl` keyword - --> $DIR/extra-impl-in-trait-impl.rs:6:18 + --> $DIR/extra-impl-in-trait-impl.rs:7:18 | LL | impl impl Default for S { | ^^^^^ help: remove the extra `impl` | note: this is parsed as an `impl Trait` type, but a trait is expected at this position - --> $DIR/extra-impl-in-trait-impl.rs:6:18 + --> $DIR/extra-impl-in-trait-impl.rs:7:18 | LL | impl impl Default for S { | ^^^^^^^^^^^^ error: unexpected `impl` keyword - --> $DIR/extra-impl-in-trait-impl.rs:12:6 + --> $DIR/extra-impl-in-trait-impl.rs:13:6 | LL | impl impl Default for S2 { | ^^^^^ help: remove the extra `impl` | note: this is parsed as an `impl Trait` type, but a trait is expected at this position - --> $DIR/extra-impl-in-trait-impl.rs:12:6 + --> $DIR/extra-impl-in-trait-impl.rs:13:6 | LL | impl impl Default for S2 { | ^^^^^^^^^^^^ diff --git a/tests/ui/impl-trait/hidden-lifetimes.stderr b/tests/ui/impl-trait/hidden-lifetimes.edition2015.stderr similarity index 95% rename from tests/ui/impl-trait/hidden-lifetimes.stderr rename to tests/ui/impl-trait/hidden-lifetimes.edition2015.stderr index 70d8c816ecb4..b63115f76588 100644 --- a/tests/ui/impl-trait/hidden-lifetimes.stderr +++ b/tests/ui/impl-trait/hidden-lifetimes.edition2015.stderr @@ -1,5 +1,5 @@ error[E0700]: hidden type for `impl Swap + 'a` captures lifetime that does not appear in bounds - --> $DIR/hidden-lifetimes.rs:29:5 + --> $DIR/hidden-lifetimes.rs:33:5 | LL | fn hide_ref<'a, 'b, T: 'static>(x: &'a mut &'b T) -> impl Swap + 'a { | -- -------------- opaque type defined here @@ -14,7 +14,7 @@ LL | fn hide_ref<'a, 'b, T: 'static>(x: &'a mut &'b T) -> impl Swap + 'a + use<' | ++++++++++++++++ error[E0700]: hidden type for `impl Swap + 'a` captures lifetime that does not appear in bounds - --> $DIR/hidden-lifetimes.rs:46:5 + --> $DIR/hidden-lifetimes.rs:50:5 | LL | fn hide_rc_refcell<'a, 'b: 'a, T: 'static>(x: Rc>) -> impl Swap + 'a { | -- -------------- opaque type defined here diff --git a/tests/ui/impl-trait/hidden-lifetimes.edition2024.stderr b/tests/ui/impl-trait/hidden-lifetimes.edition2024.stderr new file mode 100644 index 000000000000..d585bb50b13f --- /dev/null +++ b/tests/ui/impl-trait/hidden-lifetimes.edition2024.stderr @@ -0,0 +1,26 @@ +error[E0515]: cannot return value referencing local variable `x` + --> $DIR/hidden-lifetimes.rs:41:5 + | +LL | hide_ref(&mut res).swap(hide_ref(&mut &x)); + | -- `x` is borrowed here +LL | res + | ^^^ returns a value referencing data owned by the current function + +error[E0597]: `x` does not live long enough + --> $DIR/hidden-lifetimes.rs:57:38 + | +LL | let x = [1, 2, 3]; + | - binding `x` declared here +LL | let short = Rc::new(RefCell::new(&x)); + | ^^ borrowed value does not live long enough +LL | hide_rc_refcell(long.clone()).swap(hide_rc_refcell(short)); +LL | let res: &'static [i32; 3] = *long.borrow(); + | ----------------- type annotation requires that `x` is borrowed for `'static` +LL | res +LL | } + | - `x` dropped here while still borrowed + +error: aborting due to 2 previous errors + +Some errors have detailed explanations: E0515, E0597. +For more information about an error, try `rustc --explain E0515`. diff --git a/tests/ui/impl-trait/hidden-lifetimes.rs b/tests/ui/impl-trait/hidden-lifetimes.rs index ae07c8927686..b50c43bd3fa0 100644 --- a/tests/ui/impl-trait/hidden-lifetimes.rs +++ b/tests/ui/impl-trait/hidden-lifetimes.rs @@ -1,3 +1,7 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 + // Test to show what happens if we were not careful and allowed invariant // lifetimes to escape though an impl trait. // @@ -27,14 +31,14 @@ impl Swap for Rc> { // `&'a mut &'l T` are the same type. fn hide_ref<'a, 'b, T: 'static>(x: &'a mut &'b T) -> impl Swap + 'a { x - //~^ ERROR hidden type + //[edition2015]~^ ERROR hidden type } fn dangle_ref() -> &'static [i32; 3] { let mut res = &[4, 5, 6]; let x = [1, 2, 3]; hide_ref(&mut res).swap(hide_ref(&mut &x)); - res + res //[edition2024]~ ERROR cannot return value referencing local variable `x` } // Here we are hiding `'b` making the caller believe that `Rc>` @@ -44,13 +48,13 @@ fn dangle_ref() -> &'static [i32; 3] { // only has a single lifetime. fn hide_rc_refcell<'a, 'b: 'a, T: 'static>(x: Rc>) -> impl Swap + 'a { x - //~^ ERROR hidden type + //[edition2015]~^ ERROR hidden type } fn dangle_rc_refcell() -> &'static [i32; 3] { let long = Rc::new(RefCell::new(&[4, 5, 6])); let x = [1, 2, 3]; - let short = Rc::new(RefCell::new(&x)); + let short = Rc::new(RefCell::new(&x)); //[edition2024]~ ERROR `x` does not live long enough hide_rc_refcell(long.clone()).swap(hide_rc_refcell(short)); let res: &'static [i32; 3] = *long.borrow(); res diff --git a/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.stderr b/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.edition2015.stderr similarity index 91% rename from tests/ui/impl-trait/impl-fn-hrtb-bounds-2.stderr rename to tests/ui/impl-trait/impl-fn-hrtb-bounds-2.edition2015.stderr index 4e453c108d4b..4ba59826231c 100644 --- a/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.stderr +++ b/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.edition2015.stderr @@ -1,5 +1,5 @@ error[E0700]: hidden type for `impl Debug` captures lifetime that does not appear in bounds - --> $DIR/impl-fn-hrtb-bounds-2.rs:5:9 + --> $DIR/impl-fn-hrtb-bounds-2.rs:8:9 | LL | fn a() -> impl Fn(&u8) -> impl Debug { | ---------- opaque type defined here diff --git a/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.edition2024.stderr b/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.edition2024.stderr new file mode 100644 index 000000000000..c7aedfe96bb9 --- /dev/null +++ b/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.edition2024.stderr @@ -0,0 +1,15 @@ +error[E0657]: `impl Trait` cannot capture higher-ranked lifetime from outer `impl Trait` + --> $DIR/impl-fn-hrtb-bounds-2.rs:7:27 + | +LL | fn a() -> impl Fn(&u8) -> impl Debug { + | ^^^^^^^^^^ `impl Trait` implicitly captures all lifetimes in scope + | +note: lifetime declared here + --> $DIR/impl-fn-hrtb-bounds-2.rs:7:19 + | +LL | fn a() -> impl Fn(&u8) -> impl Debug { + | ^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0657`. diff --git a/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.rs b/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.rs index b0aeded0ef75..f4bfbdeb9f37 100644 --- a/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.rs +++ b/tests/ui/impl-trait/impl-fn-hrtb-bounds-2.rs @@ -1,8 +1,11 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 #![feature(impl_trait_in_fn_trait_return)] use std::fmt::Debug; -fn a() -> impl Fn(&u8) -> impl Debug { - |x| x //~ ERROR hidden type for `impl Debug` captures lifetime that does not appear in bounds +fn a() -> impl Fn(&u8) -> impl Debug { //[edition2024]~ ERROR `impl Trait` cannot capture higher-ranked lifetime from outer `impl Trait` + |x| x //[edition2015]~ ERROR hidden type for `impl Debug` captures lifetime that does not appear in bounds } fn main() {} diff --git a/tests/ui/impl-trait/impl-fn-predefined-lifetimes.stderr b/tests/ui/impl-trait/impl-fn-predefined-lifetimes.edition2015.stderr similarity index 89% rename from tests/ui/impl-trait/impl-fn-predefined-lifetimes.stderr rename to tests/ui/impl-trait/impl-fn-predefined-lifetimes.edition2015.stderr index 6064b09ef092..94476bcfbe88 100644 --- a/tests/ui/impl-trait/impl-fn-predefined-lifetimes.stderr +++ b/tests/ui/impl-trait/impl-fn-predefined-lifetimes.edition2015.stderr @@ -1,5 +1,5 @@ error[E0792]: expected generic lifetime parameter, found `'_` - --> $DIR/impl-fn-predefined-lifetimes.rs:5:9 + --> $DIR/impl-fn-predefined-lifetimes.rs:8:9 | LL | fn a<'a>() -> impl Fn(&'a u8) -> (impl Debug + '_) { | -- this generic parameter must be used with a generic lifetime parameter diff --git a/tests/ui/impl-trait/impl-fn-predefined-lifetimes.edition2024.stderr b/tests/ui/impl-trait/impl-fn-predefined-lifetimes.edition2024.stderr new file mode 100644 index 000000000000..2f1eacb0c34f --- /dev/null +++ b/tests/ui/impl-trait/impl-fn-predefined-lifetimes.edition2024.stderr @@ -0,0 +1,11 @@ +error[E0792]: expected generic lifetime parameter, found `'_` + --> $DIR/impl-fn-predefined-lifetimes.rs:8:9 + | +LL | fn a<'a>() -> impl Fn(&'a u8) -> (impl Debug + '_) { + | -- this generic parameter must be used with a generic lifetime parameter +LL | |x| x + | ^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0792`. diff --git a/tests/ui/impl-trait/impl-fn-predefined-lifetimes.rs b/tests/ui/impl-trait/impl-fn-predefined-lifetimes.rs index 199cbbf4fcc9..b2963cc10fa8 100644 --- a/tests/ui/impl-trait/impl-fn-predefined-lifetimes.rs +++ b/tests/ui/impl-trait/impl-fn-predefined-lifetimes.rs @@ -1,3 +1,6 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 #![feature(impl_trait_in_fn_trait_return)] use std::fmt::Debug; diff --git a/tests/ui/impl-trait/in-bindings/lifetime-equality.rs b/tests/ui/impl-trait/in-bindings/lifetime-equality.rs new file mode 100644 index 000000000000..6cf48dccc7d9 --- /dev/null +++ b/tests/ui/impl-trait/in-bindings/lifetime-equality.rs @@ -0,0 +1,19 @@ +//@ check-pass + +#![feature(impl_trait_in_bindings)] + +// A test for #61773 which would have been difficult to support if we +// were to represent `impl_trait_in_bindings` using opaque types. + +trait Trait<'a, 'b> { } +impl Trait<'_, '_> for T { } + + +fn bar<'a, 'b>(data0: &'a u32, data1: &'b u32) { + let x: impl Trait<'_, '_> = (data0, data1); + force_equal(x); +} + +fn force_equal<'a>(t: impl Trait<'a, 'a>) { } + +fn main() { } diff --git a/tests/ui/impl-trait/in-bindings/region-lifetimes.rs b/tests/ui/impl-trait/in-bindings/region-lifetimes.rs new file mode 100644 index 000000000000..189ab85a2769 --- /dev/null +++ b/tests/ui/impl-trait/in-bindings/region-lifetimes.rs @@ -0,0 +1,17 @@ +//@ check-pass + +#![feature(impl_trait_in_bindings)] + +// A test for #61773 which would have been difficult to support if we +// were to represent `impl_trait_in_bindings` using opaque types. + +trait Foo<'a> { } +impl Foo<'_> for &u32 { } + +fn bar<'a>(data: &'a u32) { + let x: impl Foo<'_> = data; +} + +fn main() { + let _: impl Foo<'_> = &44; +} diff --git a/tests/ui/impl-trait/in-trait/doesnt-satisfy.stderr b/tests/ui/impl-trait/in-trait/doesnt-satisfy.stderr index 119195f17ffc..df89ed9f3b56 100644 --- a/tests/ui/impl-trait/in-trait/doesnt-satisfy.stderr +++ b/tests/ui/impl-trait/in-trait/doesnt-satisfy.stderr @@ -2,10 +2,8 @@ error[E0277]: `()` doesn't implement `std::fmt::Display` --> $DIR/doesnt-satisfy.rs:6:17 | LL | fn bar() -> () {} - | ^^ `()` cannot be formatted with the default formatter + | ^^ the trait `std::fmt::Display` is not implemented for `()` | - = help: the trait `std::fmt::Display` is not implemented for `()` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `Foo::bar::{anon_assoc#0}` --> $DIR/doesnt-satisfy.rs:2:22 | diff --git a/tests/ui/impl-trait/in-trait/not-inferred-generic.stderr b/tests/ui/impl-trait/in-trait/not-inferred-generic.stderr index 07f029d3bb7d..c08fc511500c 100644 --- a/tests/ui/impl-trait/in-trait/not-inferred-generic.stderr +++ b/tests/ui/impl-trait/in-trait/not-inferred-generic.stderr @@ -5,7 +5,7 @@ LL | ().publish_typed(); | ^^^^^^^^^^^^^ cannot infer type of the type parameter `F` declared on the method `publish_typed` | = note: cannot satisfy `_: Clone` - = note: associated types cannot be accessed directly on a `trait`, they can only be accessed through a specific `impl` + = note: opaque types cannot be accessed directly on a `trait`, they can only be accessed through a specific `impl` note: required by a bound in `TypedClient::publish_typed::{anon_assoc#0}` --> $DIR/not-inferred-generic.rs:4:12 | diff --git a/tests/ui/impl-trait/in-trait/wf-bounds.stderr b/tests/ui/impl-trait/in-trait/wf-bounds.stderr index 634557094ced..40a029cdc920 100644 --- a/tests/ui/impl-trait/in-trait/wf-bounds.stderr +++ b/tests/ui/impl-trait/in-trait/wf-bounds.stderr @@ -39,9 +39,8 @@ error[E0277]: `T` doesn't implement `std::fmt::Display` --> $DIR/wf-bounds.rs:21:26 | LL | fn nya4() -> impl Wf>; - | ^^^^^^^^^^^^^^^^^^^ `T` cannot be formatted with the default formatter + | ^^^^^^^^^^^^^^^^^^^ the trait `std::fmt::Display` is not implemented for `T` | - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `NeedsDisplay` --> $DIR/wf-bounds.rs:9:24 | diff --git a/tests/ui/impl-trait/issues/issue-54895.stderr b/tests/ui/impl-trait/issues/issue-54895.edition2015.stderr similarity index 87% rename from tests/ui/impl-trait/issues/issue-54895.stderr rename to tests/ui/impl-trait/issues/issue-54895.edition2015.stderr index 64b425328e3a..27a3c6c8b7ce 100644 --- a/tests/ui/impl-trait/issues/issue-54895.stderr +++ b/tests/ui/impl-trait/issues/issue-54895.edition2015.stderr @@ -1,11 +1,11 @@ error[E0657]: `impl Trait` cannot capture higher-ranked lifetime from outer `impl Trait` - --> $DIR/issue-54895.rs:15:53 + --> $DIR/issue-54895.rs:18:53 | LL | fn f() -> impl for<'a> Trait<'a, Out = impl Sized + 'a> { | ^^ | note: lifetime declared here - --> $DIR/issue-54895.rs:15:20 + --> $DIR/issue-54895.rs:18:20 | LL | fn f() -> impl for<'a> Trait<'a, Out = impl Sized + 'a> { | ^^ diff --git a/tests/ui/impl-trait/issues/issue-54895.edition2024.stderr b/tests/ui/impl-trait/issues/issue-54895.edition2024.stderr new file mode 100644 index 000000000000..54aa29e62d88 --- /dev/null +++ b/tests/ui/impl-trait/issues/issue-54895.edition2024.stderr @@ -0,0 +1,27 @@ +error[E0657]: `impl Trait` cannot capture higher-ranked lifetime from outer `impl Trait` + --> $DIR/issue-54895.rs:18:40 + | +LL | fn f() -> impl for<'a> Trait<'a, Out = impl Sized + 'a> { + | ^^^^^^^^^^^^^^^ `impl Trait` implicitly captures all lifetimes in scope + | +note: lifetime declared here + --> $DIR/issue-54895.rs:18:20 + | +LL | fn f() -> impl for<'a> Trait<'a, Out = impl Sized + 'a> { + | ^^ + +error[E0657]: `impl Trait` cannot capture higher-ranked lifetime from outer `impl Trait` + --> $DIR/issue-54895.rs:18:53 + | +LL | fn f() -> impl for<'a> Trait<'a, Out = impl Sized + 'a> { + | ^^ + | +note: lifetime declared here + --> $DIR/issue-54895.rs:18:20 + | +LL | fn f() -> impl for<'a> Trait<'a, Out = impl Sized + 'a> { + | ^^ + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0657`. diff --git a/tests/ui/impl-trait/issues/issue-54895.rs b/tests/ui/impl-trait/issues/issue-54895.rs index 13c0038ce434..bc1841209e17 100644 --- a/tests/ui/impl-trait/issues/issue-54895.rs +++ b/tests/ui/impl-trait/issues/issue-54895.rs @@ -1,3 +1,6 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 trait Trait<'a> { type Out; fn call(&'a self) -> Self::Out; @@ -14,6 +17,7 @@ impl<'a> Trait<'a> for X { fn f() -> impl for<'a> Trait<'a, Out = impl Sized + 'a> { //~^ ERROR `impl Trait` cannot capture higher-ranked lifetime from outer `impl Trait` + //[edition2024]~^^ ERROR `impl Trait` cannot capture higher-ranked lifetime from outer `impl Trait` X(()) } diff --git a/tests/ui/impl-trait/issues/issue-79099.stderr b/tests/ui/impl-trait/issues/issue-79099.edition2015.stderr similarity index 95% rename from tests/ui/impl-trait/issues/issue-79099.stderr rename to tests/ui/impl-trait/issues/issue-79099.edition2015.stderr index d7c0c494454c..ee1a479310d6 100644 --- a/tests/ui/impl-trait/issues/issue-79099.stderr +++ b/tests/ui/impl-trait/issues/issue-79099.edition2015.stderr @@ -1,5 +1,5 @@ error: expected identifier, found `1` - --> $DIR/issue-79099.rs:3:65 + --> $DIR/issue-79099.rs:6:65 | LL | let f: impl core::future::Future = async { 1 }; | ----- ^ expected identifier @@ -10,7 +10,7 @@ LL | let f: impl core::future::Future = async { 1 }; = note: for more on editions, read https://doc.rust-lang.org/edition-guide error[E0562]: `impl Trait` is not allowed in the type of variable bindings - --> $DIR/issue-79099.rs:3:16 + --> $DIR/issue-79099.rs:6:16 | LL | let f: impl core::future::Future = async { 1 }; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/tests/ui/impl-trait/issues/issue-79099.edition2024.stderr b/tests/ui/impl-trait/issues/issue-79099.edition2024.stderr new file mode 100644 index 000000000000..3e422e251361 --- /dev/null +++ b/tests/ui/impl-trait/issues/issue-79099.edition2024.stderr @@ -0,0 +1,14 @@ +error[E0562]: `impl Trait` is not allowed in the type of variable bindings + --> $DIR/issue-79099.rs:6:16 + | +LL | let f: impl core::future::Future = async { 1 }; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: `impl Trait` is only allowed in arguments and return types of functions and methods + = note: see issue #63065 for more information + = help: add `#![feature(impl_trait_in_bindings)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0562`. diff --git a/tests/ui/impl-trait/issues/issue-79099.rs b/tests/ui/impl-trait/issues/issue-79099.rs index c2bad59045b2..8426298620ff 100644 --- a/tests/ui/impl-trait/issues/issue-79099.rs +++ b/tests/ui/impl-trait/issues/issue-79099.rs @@ -1,8 +1,11 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 struct Bug { V1: [(); { let f: impl core::future::Future = async { 1 }; //~^ ERROR `impl Trait` is not allowed in the type of variable bindings - //~| ERROR expected identifier + //[edition2015]~| ERROR expected identifier 1 }], } diff --git a/tests/ui/impl-trait/name-mentioning-macro.rs b/tests/ui/impl-trait/name-mentioning-macro.rs new file mode 100644 index 000000000000..8a81911c0bbd --- /dev/null +++ b/tests/ui/impl-trait/name-mentioning-macro.rs @@ -0,0 +1,12 @@ +trait Foo {} + +macro_rules! bar { + () => { () } +} + +fn foo(x: impl Foo) { + let () = x; + //~^ ERROR mismatched types +} + +fn main() {} diff --git a/tests/ui/impl-trait/name-mentioning-macro.stderr b/tests/ui/impl-trait/name-mentioning-macro.stderr new file mode 100644 index 000000000000..adb4c64f812f --- /dev/null +++ b/tests/ui/impl-trait/name-mentioning-macro.stderr @@ -0,0 +1,16 @@ +error[E0308]: mismatched types + --> $DIR/name-mentioning-macro.rs:8:9 + | +LL | fn foo(x: impl Foo) { + | ---------------- expected this type parameter +LL | let () = x; + | ^^ - this expression has type `impl Foo` + | | + | expected type parameter `impl Foo`, found `()` + | + = note: expected type parameter `impl Foo` + found unit type `()` + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0308`. diff --git a/tests/ui/impl-trait/normalize-tait-in-const.rs b/tests/ui/impl-trait/normalize-tait-in-const.rs index a735ef766737..0c7969c0e9ed 100644 --- a/tests/ui/impl-trait/normalize-tait-in-const.rs +++ b/tests/ui/impl-trait/normalize-tait-in-const.rs @@ -24,7 +24,7 @@ mod foo { } use foo::*; -const fn with_positive ~const Fn(&'a Alias<'a>) + ~const Destruct>(fun: F) { +const fn with_positive [const] Fn(&'a Alias<'a>) + [const] Destruct>(fun: F) { fun(filter_positive()); } diff --git a/tests/ui/impl-trait/normalize-tait-in-const.stderr b/tests/ui/impl-trait/normalize-tait-in-const.stderr index 2b6825b1ac67..01427c78dd98 100644 --- a/tests/ui/impl-trait/normalize-tait-in-const.stderr +++ b/tests/ui/impl-trait/normalize-tait-in-const.stderr @@ -1,19 +1,19 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/normalize-tait-in-const.rs:27:35 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/normalize-tait-in-const.rs:27:33 | -LL | const fn with_positive ~const Fn(&'a Alias<'a>) + ~const Destruct>(fun: F) { - | ^^^^^^ can't be applied to `Fn` +LL | const fn with_positive [const] Fn(&'a Alias<'a>) + [const] Destruct>(fun: F) { + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/normalize-tait-in-const.rs:27:35 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/normalize-tait-in-const.rs:27:33 | -LL | const fn with_positive ~const Fn(&'a Alias<'a>) + ~const Destruct>(fun: F) { - | ^^^^^^ can't be applied to `Fn` +LL | const fn with_positive [const] Fn(&'a Alias<'a>) + [const] Destruct>(fun: F) { + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` diff --git a/tests/ui/impl-trait/precise-capturing/bad-lifetimes.stderr b/tests/ui/impl-trait/precise-capturing/bad-lifetimes.stderr index 98f629f52cf3..ddb09690faf4 100644 --- a/tests/ui/impl-trait/precise-capturing/bad-lifetimes.stderr +++ b/tests/ui/impl-trait/precise-capturing/bad-lifetimes.stderr @@ -15,9 +15,12 @@ error[E0261]: use of undeclared lifetime name `'missing` --> $DIR/bad-lifetimes.rs:7:37 | LL | fn missing_lt() -> impl Sized + use<'missing> {} - | - ^^^^^^^^ undeclared lifetime - | | - | help: consider introducing lifetime `'missing` here: `<'missing>` + | ^^^^^^^^ undeclared lifetime + | +help: consider introducing lifetime `'missing` here + | +LL | fn missing_lt<'missing>() -> impl Sized + use<'missing> {} + | ++++++++++ error: expected lifetime parameter in `use<...>` precise captures list, found `'static` --> $DIR/bad-lifetimes.rs:4:36 diff --git a/tests/ui/impl-trait/precise-capturing/dyn-use.stderr b/tests/ui/impl-trait/precise-capturing/dyn-use.edition2015.stderr similarity index 90% rename from tests/ui/impl-trait/precise-capturing/dyn-use.stderr rename to tests/ui/impl-trait/precise-capturing/dyn-use.edition2015.stderr index d8903fc41291..9951e9e09657 100644 --- a/tests/ui/impl-trait/precise-capturing/dyn-use.stderr +++ b/tests/ui/impl-trait/precise-capturing/dyn-use.edition2015.stderr @@ -1,5 +1,5 @@ error: expected one of `!`, `(`, `::`, `<`, `where`, or `{`, found keyword `use` - --> $DIR/dyn-use.rs:1:26 + --> $DIR/dyn-use.rs:4:26 | LL | fn dyn() -> &'static dyn use<> { &() } | ^^^ expected one of `!`, `(`, `::`, `<`, `where`, or `{` diff --git a/tests/ui/impl-trait/precise-capturing/dyn-use.edition2024.stderr b/tests/ui/impl-trait/precise-capturing/dyn-use.edition2024.stderr new file mode 100644 index 000000000000..cb3fe4cb5836 --- /dev/null +++ b/tests/ui/impl-trait/precise-capturing/dyn-use.edition2024.stderr @@ -0,0 +1,26 @@ +error: expected identifier, found keyword `dyn` + --> $DIR/dyn-use.rs:4:4 + | +LL | fn dyn() -> &'static dyn use<> { &() } + | ^^^ expected identifier, found keyword + | +help: escape `dyn` to use it as an identifier + | +LL | fn r#dyn() -> &'static dyn use<> { &() } + | ++ + +error: `use<...>` precise capturing syntax not allowed in `dyn` trait object bounds + --> $DIR/dyn-use.rs:4:26 + | +LL | fn dyn() -> &'static dyn use<> { &() } + | ^^^^^ + +error[E0224]: at least one trait is required for an object type + --> $DIR/dyn-use.rs:4:22 + | +LL | fn dyn() -> &'static dyn use<> { &() } + | ^^^^^^^^^ + +error: aborting due to 3 previous errors + +For more information about this error, try `rustc --explain E0224`. diff --git a/tests/ui/impl-trait/precise-capturing/dyn-use.rs b/tests/ui/impl-trait/precise-capturing/dyn-use.rs index fb2f83e2d21c..0b6a9467ff7c 100644 --- a/tests/ui/impl-trait/precise-capturing/dyn-use.rs +++ b/tests/ui/impl-trait/precise-capturing/dyn-use.rs @@ -1,2 +1,10 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 fn dyn() -> &'static dyn use<> { &() } -//~^ ERROR expected one of `!`, `(`, `::`, `<`, `where`, or `{`, found keyword `use` +//[edition2015]~^ ERROR expected one of `!`, `(`, `::`, `<`, `where`, or `{`, found keyword `use` +//[edition2024]~^^ ERROR expected identifier, found keyword `dyn` +//[edition2024]~| ERROR `use<...>` precise capturing syntax not allowed in `dyn` trait object bounds +//[edition2024]~| ERROR at least one trait is required for an object type + +fn main() {} diff --git a/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.stderr b/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.edition2015.stderr similarity index 93% rename from tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.stderr rename to tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.edition2015.stderr index 0d8fa650df47..c16722bb80f5 100644 --- a/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.stderr +++ b/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.edition2015.stderr @@ -1,5 +1,5 @@ error[E0700]: hidden type for `impl Sized` captures lifetime that does not appear in bounds - --> $DIR/hidden-type-suggestion.rs:3:5 + --> $DIR/hidden-type-suggestion.rs:6:5 | LL | fn lifetime<'a, 'b>(x: &'a ()) -> impl Sized + use<'b> { | -- -------------------- opaque type defined here @@ -15,7 +15,7 @@ LL | fn lifetime<'a, 'b>(x: &'a ()) -> impl Sized + use<'b, 'a> { | ++++ error[E0700]: hidden type for `impl Sized` captures lifetime that does not appear in bounds - --> $DIR/hidden-type-suggestion.rs:9:5 + --> $DIR/hidden-type-suggestion.rs:12:5 | LL | fn param<'a, T>(x: &'a ()) -> impl Sized + use { | -- ------------------- opaque type defined here @@ -31,7 +31,7 @@ LL | fn param<'a, T>(x: &'a ()) -> impl Sized + use<'a, T> { | +++ error[E0700]: hidden type for `impl Sized` captures lifetime that does not appear in bounds - --> $DIR/hidden-type-suggestion.rs:15:5 + --> $DIR/hidden-type-suggestion.rs:18:5 | LL | fn empty<'a>(x: &'a ()) -> impl Sized + use<> { | -- ------------------ opaque type defined here @@ -47,7 +47,7 @@ LL | fn empty<'a>(x: &'a ()) -> impl Sized + use<'a> { | ++ error[E0700]: hidden type for `impl Captures<'captured>` captures lifetime that does not appear in bounds - --> $DIR/hidden-type-suggestion.rs:24:5 + --> $DIR/hidden-type-suggestion.rs:27:5 | LL | fn missing<'a, 'captured, 'not_captured, Captured>(x: &'a ()) -> impl Captures<'captured> { | -- ------------------------ opaque type defined here @@ -63,7 +63,7 @@ LL | fn missing<'a, 'captured, 'not_captured, Captured>(x: &'a ()) -> impl Captu | ++++++++++++++++++++++++++++++ error[E0700]: hidden type for `impl Sized` captures lifetime that does not appear in bounds - --> $DIR/hidden-type-suggestion.rs:30:5 + --> $DIR/hidden-type-suggestion.rs:33:5 | LL | fn no_params_yet(_: impl Sized, y: &()) -> impl Sized { | --- ---------- opaque type defined here @@ -74,7 +74,7 @@ LL | y | ^ | note: you could use a `use<...>` bound to explicitly capture `'_`, but argument-position `impl Trait`s are not nameable - --> $DIR/hidden-type-suggestion.rs:28:21 + --> $DIR/hidden-type-suggestion.rs:31:21 | LL | fn no_params_yet(_: impl Sized, y: &()) -> impl Sized { | ^^^^^^^^^^ @@ -85,7 +85,7 @@ LL + fn no_params_yet(_: T, y: &()) -> impl Sized + use<'_, T> { | error[E0700]: hidden type for `impl Sized` captures lifetime that does not appear in bounds - --> $DIR/hidden-type-suggestion.rs:36:5 + --> $DIR/hidden-type-suggestion.rs:39:5 | LL | fn yes_params_yet<'a, T>(_: impl Sized, y: &'a ()) -> impl Sized { | -- ---------- opaque type defined here @@ -96,7 +96,7 @@ LL | y | ^ | note: you could use a `use<...>` bound to explicitly capture `'a`, but argument-position `impl Trait`s are not nameable - --> $DIR/hidden-type-suggestion.rs:34:29 + --> $DIR/hidden-type-suggestion.rs:37:29 | LL | fn yes_params_yet<'a, T>(_: impl Sized, y: &'a ()) -> impl Sized { | ^^^^^^^^^^ diff --git a/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.edition2024.stderr b/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.edition2024.stderr new file mode 100644 index 000000000000..308dc9b00fcd --- /dev/null +++ b/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.edition2024.stderr @@ -0,0 +1,51 @@ +error[E0700]: hidden type for `impl Sized` captures lifetime that does not appear in bounds + --> $DIR/hidden-type-suggestion.rs:6:5 + | +LL | fn lifetime<'a, 'b>(x: &'a ()) -> impl Sized + use<'b> { + | -- -------------------- opaque type defined here + | | + | hidden type `&'a ()` captures the lifetime `'a` as defined here +LL | +LL | x + | ^ + | +help: add `'a` to the `use<...>` bound to explicitly capture it + | +LL | fn lifetime<'a, 'b>(x: &'a ()) -> impl Sized + use<'b, 'a> { + | ++++ + +error[E0700]: hidden type for `impl Sized` captures lifetime that does not appear in bounds + --> $DIR/hidden-type-suggestion.rs:12:5 + | +LL | fn param<'a, T>(x: &'a ()) -> impl Sized + use { + | -- ------------------- opaque type defined here + | | + | hidden type `&'a ()` captures the lifetime `'a` as defined here +LL | +LL | x + | ^ + | +help: add `'a` to the `use<...>` bound to explicitly capture it + | +LL | fn param<'a, T>(x: &'a ()) -> impl Sized + use<'a, T> { + | +++ + +error[E0700]: hidden type for `impl Sized` captures lifetime that does not appear in bounds + --> $DIR/hidden-type-suggestion.rs:18:5 + | +LL | fn empty<'a>(x: &'a ()) -> impl Sized + use<> { + | -- ------------------ opaque type defined here + | | + | hidden type `&'a ()` captures the lifetime `'a` as defined here +LL | +LL | x + | ^ + | +help: add `'a` to the `use<...>` bound to explicitly capture it + | +LL | fn empty<'a>(x: &'a ()) -> impl Sized + use<'a> { + | ++ + +error: aborting due to 3 previous errors + +For more information about this error, try `rustc --explain E0700`. diff --git a/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.rs b/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.rs index d34c61355961..9712eac859ab 100644 --- a/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.rs +++ b/tests/ui/impl-trait/precise-capturing/hidden-type-suggestion.rs @@ -1,3 +1,6 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 fn lifetime<'a, 'b>(x: &'a ()) -> impl Sized + use<'b> { //~^ HELP add `'a` to the `use<...>` bound x @@ -20,21 +23,21 @@ trait Captures<'a> {} impl Captures<'_> for T {} fn missing<'a, 'captured, 'not_captured, Captured>(x: &'a ()) -> impl Captures<'captured> { -//~^ HELP add a `use<...>` bound +//[edition2015]~^ HELP add a `use<...>` bound x -//~^ ERROR hidden type for +//[edition2015]~^ ERROR hidden type for } fn no_params_yet(_: impl Sized, y: &()) -> impl Sized { -//~^ HELP add a `use<...>` bound +//[edition2015]~^ HELP add a `use<...>` bound y -//~^ ERROR hidden type for +//[edition2015]~^ ERROR hidden type for } fn yes_params_yet<'a, T>(_: impl Sized, y: &'a ()) -> impl Sized { -//~^ HELP add a `use<...>` bound +//[edition2015]~^ HELP add a `use<...>` bound y -//~^ ERROR hidden type for +//[edition2015]~^ ERROR hidden type for } fn main() {} diff --git a/tests/ui/impl-trait/struct-field-fragment-in-name.rs b/tests/ui/impl-trait/struct-field-fragment-in-name.rs new file mode 100644 index 000000000000..b98cd864ccb4 --- /dev/null +++ b/tests/ui/impl-trait/struct-field-fragment-in-name.rs @@ -0,0 +1,16 @@ +//@ check-pass + +trait Trait {} + +fn a(_: impl Trait< + [(); { + struct D { + #[rustfmt::skip] + bar: (), + } + 0 + }], +>) { +} + +fn main() {} diff --git a/tests/ui/imports/import-from-missing-star-2.stderr b/tests/ui/imports/import-from-missing-star-2.edition2015.stderr similarity index 89% rename from tests/ui/imports/import-from-missing-star-2.stderr rename to tests/ui/imports/import-from-missing-star-2.edition2015.stderr index 9fe2bdbcfa27..cd1a9581d306 100644 --- a/tests/ui/imports/import-from-missing-star-2.stderr +++ b/tests/ui/imports/import-from-missing-star-2.edition2015.stderr @@ -1,5 +1,5 @@ error[E0432]: unresolved import `spam` - --> $DIR/import-from-missing-star-2.rs:2:9 + --> $DIR/import-from-missing-star-2.rs:6:9 | LL | use spam::*; | ^^^^ use of unresolved module or unlinked crate `spam` diff --git a/tests/ui/imports/import-from-missing-star-2.edition2024.stderr b/tests/ui/imports/import-from-missing-star-2.edition2024.stderr new file mode 100644 index 000000000000..086b7a576b22 --- /dev/null +++ b/tests/ui/imports/import-from-missing-star-2.edition2024.stderr @@ -0,0 +1,11 @@ +error[E0432]: unresolved import `spam` + --> $DIR/import-from-missing-star-2.rs:6:9 + | +LL | use spam::*; + | ^^^^ use of unresolved module or unlinked crate `spam` + | + = help: you might be missing a crate named `spam` + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0432`. diff --git a/tests/ui/imports/import-from-missing-star-2.rs b/tests/ui/imports/import-from-missing-star-2.rs index cb341b0b0ca4..9dad2d4886b8 100644 --- a/tests/ui/imports/import-from-missing-star-2.rs +++ b/tests/ui/imports/import-from-missing-star-2.rs @@ -1,5 +1,10 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 mod foo { +//[edition2015]~^ HELP you might be missing a crate named `spam`, add it to your project and import it in your code use spam::*; //~ ERROR unresolved import `spam` [E0432] + //[edition2024]~^ HELP you might be missing a crate named `spam` } fn main() { diff --git a/tests/ui/imports/issue-28134.rs b/tests/ui/imports/issue-28134.rs index 70d3a327c1af..aef2fe8facdc 100644 --- a/tests/ui/imports/issue-28134.rs +++ b/tests/ui/imports/issue-28134.rs @@ -2,4 +2,4 @@ #![allow(soft_unstable)] #![test] -//~^ ERROR 4:1: 4:9: `test` attribute cannot be used at crate level +//~^ ERROR `test` attribute cannot be used at crate level diff --git a/tests/ui/imports/multiple-extern-by-macro-for-underscore.stderr b/tests/ui/imports/multiple-extern-by-macro-for-underscore.ed2015.stderr similarity index 75% rename from tests/ui/imports/multiple-extern-by-macro-for-underscore.stderr rename to tests/ui/imports/multiple-extern-by-macro-for-underscore.ed2015.stderr index 1da5aa870704..985cd654c394 100644 --- a/tests/ui/imports/multiple-extern-by-macro-for-underscore.stderr +++ b/tests/ui/imports/multiple-extern-by-macro-for-underscore.ed2015.stderr @@ -1,5 +1,5 @@ error: expected identifier, found reserved identifier `_` - --> $DIR/multiple-extern-by-macro-for-underscore.rs:16:11 + --> $DIR/multiple-extern-by-macro-for-underscore.rs:18:11 | LL | use ::_; | ^ expected identifier, found reserved identifier diff --git a/tests/ui/imports/multiple-extern-by-macro-for-underscore.ed2021.stderr b/tests/ui/imports/multiple-extern-by-macro-for-underscore.ed2021.stderr new file mode 100644 index 000000000000..985cd654c394 --- /dev/null +++ b/tests/ui/imports/multiple-extern-by-macro-for-underscore.ed2021.stderr @@ -0,0 +1,8 @@ +error: expected identifier, found reserved identifier `_` + --> $DIR/multiple-extern-by-macro-for-underscore.rs:18:11 + | +LL | use ::_; + | ^ expected identifier, found reserved identifier + +error: aborting due to 1 previous error + diff --git a/tests/ui/imports/multiple-extern-by-macro-for-underscore.rs b/tests/ui/imports/multiple-extern-by-macro-for-underscore.rs index ddf735d89475..ab877e062463 100644 --- a/tests/ui/imports/multiple-extern-by-macro-for-underscore.rs +++ b/tests/ui/imports/multiple-extern-by-macro-for-underscore.rs @@ -1,4 +1,6 @@ -//@ edition: 2021 +//@ revisions: ed2015 ed2021 +//@[ed2015] edition: 2015 +//@[ed2021] edition: 2021 // issue#128813 diff --git a/tests/ui/inference/deref-suggestion.stderr b/tests/ui/inference/deref-suggestion.stderr index 096989db0b4e..8ccd28198afc 100644 --- a/tests/ui/inference/deref-suggestion.stderr +++ b/tests/ui/inference/deref-suggestion.stderr @@ -164,21 +164,18 @@ LL | *b error[E0308]: `if` and `else` have incompatible types --> $DIR/deref-suggestion.rs:69:12 | -LL | let val = if true { - | ________________- -LL | | *a - | | -- expected because of this -LL | | } else if true { - | | ____________^ -LL | || -LL | || b -LL | || } else { -LL | || &0 -LL | || }; - | || ^ - | ||_____| - | |_____`if` and `else` have incompatible types - | expected `i32`, found `&{integer}` +LL | let val = if true { + | ------- `if` and `else` have incompatible types +LL | *a + | -- expected because of this +LL | } else if true { + | ____________^ +LL | | +LL | | b +LL | | } else { +LL | | &0 +LL | | }; + | |_____^ expected `i32`, found `&{integer}` error[E0308]: mismatched types --> $DIR/deref-suggestion.rs:81:15 diff --git a/tests/ui/inference/hint-closure-signature-119266.rs b/tests/ui/inference/hint-closure-signature-119266.rs index 35be600fd6ab..6e136c57ccad 100644 --- a/tests/ui/inference/hint-closure-signature-119266.rs +++ b/tests/ui/inference/hint-closure-signature-119266.rs @@ -3,7 +3,7 @@ fn main() { //~^ NOTE: the found closure let x: fn(i32) = x; - //~^ ERROR: 5:22: 5:23: mismatched types [E0308] + //~^ ERROR: mismatched types [E0308] //~| NOTE: incorrect number of function parameters //~| NOTE: expected due to this //~| NOTE: expected fn pointer `fn(i32)` diff --git a/tests/ui/inference/ice-ifer-var-leaked-out-of-rollback-122098.stderr b/tests/ui/inference/ice-ifer-var-leaked-out-of-rollback-122098.stderr index ce01e24770d9..c2ebaee24415 100644 --- a/tests/ui/inference/ice-ifer-var-leaked-out-of-rollback-122098.stderr +++ b/tests/ui/inference/ice-ifer-var-leaked-out-of-rollback-122098.stderr @@ -23,9 +23,12 @@ error[E0261]: use of undeclared lifetime name `'q` --> $DIR/ice-ifer-var-leaked-out-of-rollback-122098.rs:14:21 | LL | impl<'static> Query<'q> { - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'q` here: `'q,` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'q` here + | +LL | impl<'q, 'static> Query<'q> { + | +++ error[E0392]: lifetime parameter `'q` is never used --> $DIR/ice-ifer-var-leaked-out-of-rollback-122098.rs:11:14 diff --git a/tests/ui/inference/issue-107090.stderr b/tests/ui/inference/issue-107090.stderr index e509e262fb1b..0deafdfb931b 100644 --- a/tests/ui/inference/issue-107090.stderr +++ b/tests/ui/inference/issue-107090.stderr @@ -33,15 +33,23 @@ error[E0261]: use of undeclared lifetime name `'b` --> $DIR/issue-107090.rs:11:47 | LL | impl<'long: 'short, 'short, T> Convert<'long, 'b> for Foo<'short, 'out, T> { - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'b` here: `'b,` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'b` here + | +LL | impl<'b, 'long: 'short, 'short, T> Convert<'long, 'b> for Foo<'short, 'out, T> { + | +++ error[E0261]: use of undeclared lifetime name `'out` --> $DIR/issue-107090.rs:11:67 | LL | impl<'long: 'short, 'short, T> Convert<'long, 'b> for Foo<'short, 'out, T> { - | - help: consider introducing lifetime `'out` here: `'out,` ^^^^ undeclared lifetime + | ^^^^ undeclared lifetime + | +help: consider introducing lifetime `'out` here + | +LL | impl<'out, 'long: 'short, 'short, T> Convert<'long, 'b> for Foo<'short, 'out, T> { + | +++++ error[E0261]: use of undeclared lifetime name `'out` --> $DIR/issue-107090.rs:14:49 @@ -62,9 +70,12 @@ error[E0261]: use of undeclared lifetime name `'short` --> $DIR/issue-107090.rs:20:68 | LL | fn badboi<'in_, 'out, T>(x: Foo<'in_, 'out, T>, sadness: &'in_ Foo<'short, 'out, T>) -> &'out T { - | - ^^^^^^ undeclared lifetime - | | - | help: consider introducing lifetime `'short` here: `'short,` + | ^^^^^^ undeclared lifetime + | +help: consider introducing lifetime `'short` here + | +LL | fn badboi<'short, 'in_, 'out, T>(x: Foo<'in_, 'out, T>, sadness: &'in_ Foo<'short, 'out, T>) -> &'out T { + | +++++++ error: aborting due to 6 previous errors diff --git a/tests/ui/integral-indexing.rs b/tests/ui/integral-indexing.rs index f076dfcb0a42..e20553af8a26 100644 --- a/tests/ui/integral-indexing.rs +++ b/tests/ui/integral-indexing.rs @@ -3,14 +3,14 @@ pub fn main() { let s: String = "abcdef".to_string(); v[3_usize]; v[3]; - v[3u8]; //~ERROR : the type `[isize]` cannot be indexed by `u8` - v[3i8]; //~ERROR : the type `[isize]` cannot be indexed by `i8` - v[3u32]; //~ERROR : the type `[isize]` cannot be indexed by `u32` - v[3i32]; //~ERROR : the type `[isize]` cannot be indexed by `i32` + v[3u8]; //~ ERROR the type `[isize]` cannot be indexed by `u8` + v[3i8]; //~ ERROR the type `[isize]` cannot be indexed by `i8` + v[3u32]; //~ ERROR the type `[isize]` cannot be indexed by `u32` + v[3i32]; //~ ERROR the type `[isize]` cannot be indexed by `i32` s.as_bytes()[3_usize]; s.as_bytes()[3]; - s.as_bytes()[3u8]; //~ERROR : the type `[u8]` cannot be indexed by `u8` - s.as_bytes()[3i8]; //~ERROR : the type `[u8]` cannot be indexed by `i8` - s.as_bytes()[3u32]; //~ERROR : the type `[u8]` cannot be indexed by `u32` - s.as_bytes()[3i32]; //~ERROR : the type `[u8]` cannot be indexed by `i32` + s.as_bytes()[3u8]; //~ ERROR the type `[u8]` cannot be indexed by `u8` + s.as_bytes()[3i8]; //~ ERROR the type `[u8]` cannot be indexed by `i8` + s.as_bytes()[3u32]; //~ ERROR the type `[u8]` cannot be indexed by `u32` + s.as_bytes()[3i32]; //~ ERROR the type `[u8]` cannot be indexed by `i32` } diff --git a/tests/ui/issues/issue-32950.rs b/tests/ui/issues/issue-32950.rs deleted file mode 100644 index b51ac2967768..000000000000 --- a/tests/ui/issues/issue-32950.rs +++ /dev/null @@ -1,10 +0,0 @@ -#![feature(concat_idents)] -#![expect(deprecated)] // concat_idents is deprecated - -#[derive(Debug)] -struct Baz( - concat_idents!(Foo, Bar) //~ ERROR `derive` cannot be used on items with type macros - //~^ ERROR cannot find type `FooBar` in this scope -); - -fn main() {} diff --git a/tests/ui/issues/issue-32950.stderr b/tests/ui/issues/issue-32950.stderr deleted file mode 100644 index 38a82542f896..000000000000 --- a/tests/ui/issues/issue-32950.stderr +++ /dev/null @@ -1,15 +0,0 @@ -error: `derive` cannot be used on items with type macros - --> $DIR/issue-32950.rs:6:5 - | -LL | concat_idents!(Foo, Bar) - | ^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0412]: cannot find type `FooBar` in this scope - --> $DIR/issue-32950.rs:6:5 - | -LL | concat_idents!(Foo, Bar) - | ^^^^^^^^^^^^^^^^^^^^^^^^ not found in this scope - -error: aborting due to 2 previous errors - -For more information about this error, try `rustc --explain E0412`. diff --git a/tests/ui/issues/issue-43988.stderr b/tests/ui/issues/issue-43988.stderr index bd4eb8bbed37..fe61e136a516 100644 --- a/tests/ui/issues/issue-43988.stderr +++ b/tests/ui/issues/issue-43988.stderr @@ -38,7 +38,7 @@ LL | #[repr] | ^^^^^^^ | | | expected this to be a list - | help: must be of the form: `#[repr(C)]` + | help: must be of the form: `#[repr(C | Rust | align(...) | packed(...) | | transparent)]` error[E0539]: malformed `inline` attribute input --> $DIR/issue-43988.rs:30:5 @@ -64,7 +64,7 @@ LL | let _z = #[repr] 1; | ^^^^^^^ | | | expected this to be a list - | help: must be of the form: `#[repr(C)]` + | help: must be of the form: `#[repr(C | Rust | align(...) | packed(...) | | transparent)]` error[E0518]: attribute should be applied to function or closure --> $DIR/issue-43988.rs:5:5 diff --git a/tests/ui/issues/issue-46604.rs b/tests/ui/issues/issue-46604.rs index 6ec6e7bdcb81..e15f0b52da2f 100644 --- a/tests/ui/issues/issue-46604.rs +++ b/tests/ui/issues/issue-46604.rs @@ -1,4 +1,4 @@ -static buf: &mut [u8] = &mut [1u8,2,3,4,5,7]; //~ ERROR mutable references are not allowed +static buf: &mut [u8] = &mut [1u8,2,3,4,5,7]; //~ ERROR mutable borrows of temporaries fn write>(buffer: T) { } fn main() { diff --git a/tests/ui/issues/issue-46604.stderr b/tests/ui/issues/issue-46604.stderr index 7faa2d79ba48..d983674995ed 100644 --- a/tests/ui/issues/issue-46604.stderr +++ b/tests/ui/issues/issue-46604.stderr @@ -1,8 +1,12 @@ -error[E0764]: mutable references are not allowed in the final value of statics +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/issue-46604.rs:1:25 | LL | static buf: &mut [u8] = &mut [1u8,2,3,4,5,7]; - | ^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error[E0594]: cannot assign to `buf[_]`, as `buf` is an immutable static item --> $DIR/issue-46604.rs:6:5 diff --git a/tests/ui/issues/issue-50403.rs b/tests/ui/issues/issue-50403.rs deleted file mode 100644 index f14958afc34d..000000000000 --- a/tests/ui/issues/issue-50403.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![feature(concat_idents)] -#![expect(deprecated)] // concat_idents is deprecated - -fn main() { - let x = concat_idents!(); //~ ERROR `concat_idents!()` takes 1 or more arguments -} diff --git a/tests/ui/issues/issue-50403.stderr b/tests/ui/issues/issue-50403.stderr deleted file mode 100644 index e7dd05bb0183..000000000000 --- a/tests/ui/issues/issue-50403.stderr +++ /dev/null @@ -1,8 +0,0 @@ -error: `concat_idents!()` takes 1 or more arguments - --> $DIR/issue-50403.rs:5:13 - | -LL | let x = concat_idents!(); - | ^^^^^^^^^^^^^^^^ - -error: aborting due to 1 previous error - diff --git a/tests/ui/issues/issue-59488.stderr b/tests/ui/issues/issue-59488.stderr index ac8862716c07..b6611ad63a81 100644 --- a/tests/ui/issues/issue-59488.stderr +++ b/tests/ui/issues/issue-59488.stderr @@ -87,18 +87,16 @@ error[E0277]: `fn(usize) -> Foo {Foo::Bar}` doesn't implement `Debug` --> $DIR/issue-59488.rs:30:5 | LL | assert_eq!(Foo::Bar, i); - | ^^^^^^^^^^^^^^^^^^^^^^^ `fn(usize) -> Foo {Foo::Bar}` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for fn item `fn(usize) -> Foo {Foo::Bar}` | - = help: the trait `Debug` is not implemented for fn item `fn(usize) -> Foo {Foo::Bar}` = note: this error originates in the macro `assert_eq` (in Nightly builds, run with -Z macro-backtrace for more info) error[E0277]: `fn(usize) -> Foo {Foo::Bar}` doesn't implement `Debug` --> $DIR/issue-59488.rs:30:5 | LL | assert_eq!(Foo::Bar, i); - | ^^^^^^^^^^^^^^^^^^^^^^^ `fn(usize) -> Foo {Foo::Bar}` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for fn item `fn(usize) -> Foo {Foo::Bar}` | - = help: the trait `Debug` is not implemented for fn item `fn(usize) -> Foo {Foo::Bar}` = note: this error originates in the macro `assert_eq` (in Nightly builds, run with -Z macro-backtrace for more info) error: aborting due to 10 previous errors diff --git a/tests/ui/issues/issue-70724-add_type_neq_err_label-unwrap.stderr b/tests/ui/issues/issue-70724-add_type_neq_err_label-unwrap.stderr index b30bcfb776c8..736002c9335a 100644 --- a/tests/ui/issues/issue-70724-add_type_neq_err_label-unwrap.stderr +++ b/tests/ui/issues/issue-70724-add_type_neq_err_label-unwrap.stderr @@ -26,9 +26,8 @@ LL | fn a() -> i32 { | - consider calling this function ... LL | assert_eq!(a, 0); - | ^^^^^^^^^^^^^^^^ `fn() -> i32 {a}` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for fn item `fn() -> i32 {a}` | - = help: the trait `Debug` is not implemented for fn item `fn() -> i32 {a}` = help: use parentheses to call this function: `a()` = note: this error originates in the macro `assert_eq` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/tests/ui/issues/issue-92741.rs b/tests/ui/issues/issue-92741.rs index f2e5fdafd9cb..1c5d5810a57e 100644 --- a/tests/ui/issues/issue-92741.rs +++ b/tests/ui/issues/issue-92741.rs @@ -1,17 +1,17 @@ //@ run-rustfix fn main() {} fn _foo() -> bool { - & //~ ERROR 4:5: 6:36: mismatched types [E0308] + & //~ ERROR mismatched types [E0308] mut if true { true } else { false } } fn _bar() -> bool { - & //~ ERROR 10:5: 11:40: mismatched types [E0308] + & //~ ERROR mismatched types [E0308] mut if true { true } else { false } } fn _baz() -> bool { - & mut //~ ERROR 15:5: 16:36: mismatched types [E0308] + & mut //~ ERROR mismatched types [E0308] if true { true } else { false } } diff --git a/tests/ui/lifetimes/issue-107988.stderr b/tests/ui/lifetimes/issue-107988.stderr index c2d8c7050e97..7d93c1d20246 100644 --- a/tests/ui/lifetimes/issue-107988.stderr +++ b/tests/ui/lifetimes/issue-107988.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'tcx` --> $DIR/issue-107988.rs:7:52 | LL | impl> TraitEngineExt<'tcx> for T { - | - ^^^^ undeclared lifetime - | | - | help: consider introducing lifetime `'tcx` here: `'tcx,` + | ^^^^ undeclared lifetime + | +help: consider introducing lifetime `'tcx` here + | +LL | impl<'tcx, T: ?Sized + TraitEngine<'tcx>> TraitEngineExt<'tcx> for T { + | +++++ error[E0261]: use of undeclared lifetime name `'tcx` --> $DIR/issue-107988.rs:7:30 diff --git a/tests/ui/lifetimes/no_lending_iterators.rs b/tests/ui/lifetimes/no_lending_iterators.rs index b3e8ad08ba18..88b8cda0898b 100644 --- a/tests/ui/lifetimes/no_lending_iterators.rs +++ b/tests/ui/lifetimes/no_lending_iterators.rs @@ -2,7 +2,7 @@ struct Data(String); impl Iterator for Data { type Item = &str; - //~^ ERROR 4:17: 4:18: associated type `Iterator::Item` is declared without lifetime parameters, so using a borrowed type for them requires that lifetime to come from the implemented type + //~^ ERROR associated type `Iterator::Item` is declared without lifetime parameters, so using a borrowed type for them requires that lifetime to come from the implemented type fn next(&mut self) -> Option { Some(&self.0) @@ -16,7 +16,7 @@ trait Bar { impl Bar for usize { type Item = &usize; - //~^ ERROR 18:17: 18:18: in the trait associated type is declared without lifetime parameters, so using a borrowed type for them requires that lifetime to come from the implemented type + //~^ ERROR in the trait associated type is declared without lifetime parameters, so using a borrowed type for them requires that lifetime to come from the implemented type fn poke(&mut self, item: Self::Item) { self += *item; @@ -25,7 +25,7 @@ impl Bar for usize { impl Bar for isize { type Item<'a> = &'a isize; - //~^ ERROR 27:14: 27:18: lifetime parameters or bounds on associated type `Item` do not match the trait declaration [E0195] + //~^ ERROR lifetime parameters or bounds on associated type `Item` do not match the trait declaration [E0195] fn poke(&mut self, item: Self::Item) { self += *item; diff --git a/tests/ui/lifetimes/undeclared-lifetime-used-in-debug-macro-issue-70152.stderr b/tests/ui/lifetimes/undeclared-lifetime-used-in-debug-macro-issue-70152.stderr index 0d6ade41511f..f90133e9fb1a 100644 --- a/tests/ui/lifetimes/undeclared-lifetime-used-in-debug-macro-issue-70152.stderr +++ b/tests/ui/lifetimes/undeclared-lifetime-used-in-debug-macro-issue-70152.stderr @@ -1,10 +1,13 @@ error[E0261]: use of undeclared lifetime name `'b` --> $DIR/undeclared-lifetime-used-in-debug-macro-issue-70152.rs:3:9 | -LL | struct Test { - | - help: consider introducing lifetime `'b` here: `<'b>` LL | a: &'b str, | ^^ undeclared lifetime + | +help: consider introducing lifetime `'b` here + | +LL | struct Test<'b> { + | ++++ error[E0261]: use of undeclared lifetime name `'b` --> $DIR/undeclared-lifetime-used-in-debug-macro-issue-70152.rs:3:9 @@ -12,9 +15,13 @@ error[E0261]: use of undeclared lifetime name `'b` LL | #[derive(Eq, PartialEq)] | -- lifetime `'b` is missing in item created through this procedural macro LL | struct Test { - | - help: consider introducing lifetime `'b` here: `<'b>` LL | a: &'b str, | ^^ undeclared lifetime + | +help: consider introducing lifetime `'b` here + | +LL | struct Test<'b> { + | ++++ error[E0261]: use of undeclared lifetime name `'b` --> $DIR/undeclared-lifetime-used-in-debug-macro-issue-70152.rs:13:13 diff --git a/tests/ui/linkage-attr/raw-dylib/windows/unsupported-abi.rs b/tests/ui/linkage-attr/raw-dylib/windows/unsupported-abi.rs index 9babc20d1a15..9ccc9ce4fdb8 100644 --- a/tests/ui/linkage-attr/raw-dylib/windows/unsupported-abi.rs +++ b/tests/ui/linkage-attr/raw-dylib/windows/unsupported-abi.rs @@ -11,7 +11,7 @@ extern crate minicore; #[link(name = "foo", kind = "raw-dylib")] extern "stdcall" { -//~^ WARN: calling convention not supported on this target +//~^ WARN: unsupported_calling_conventions //~| WARN: previously accepted fn f(x: i32); //~^ ERROR ABI not supported by `#[link(kind = "raw-dylib")]` on this architecture diff --git a/tests/ui/linkage-attr/raw-dylib/windows/unsupported-abi.stderr b/tests/ui/linkage-attr/raw-dylib/windows/unsupported-abi.stderr index 95ea90804867..91e42f2909e0 100644 --- a/tests/ui/linkage-attr/raw-dylib/windows/unsupported-abi.stderr +++ b/tests/ui/linkage-attr/raw-dylib/windows/unsupported-abi.stderr @@ -1,4 +1,4 @@ -warning: use of calling convention not supported on this target +warning: "stdcall" is not a supported ABI for the current target --> $DIR/unsupported-abi.rs:13:1 | LL | / extern "stdcall" { diff --git a/tests/ui/lint/dead-code/issue-41883.stderr b/tests/ui/lint/dead-code/issue-41883.stderr index cf079e4dda33..47ccef9a5306 100644 --- a/tests/ui/lint/dead-code/issue-41883.stderr +++ b/tests/ui/lint/dead-code/issue-41883.stderr @@ -29,8 +29,6 @@ error: struct `UnusedStruct` is never constructed | LL | struct UnusedStruct; | ^^^^^^^^^^^^ - | - = note: `UnusedStruct` has a derived impl for the trait `Debug`, but this is intentionally ignored during dead code analysis error: aborting due to 4 previous errors diff --git a/tests/ui/lint/dead-code/issue-59003.rs b/tests/ui/lint/dead-code/issue-59003.rs index e3dcaca57788..319cf2db1495 100644 --- a/tests/ui/lint/dead-code/issue-59003.rs +++ b/tests/ui/lint/dead-code/issue-59003.rs @@ -4,8 +4,8 @@ #![deny(dead_code)] +#[allow(dead_code)] struct Foo { - #[allow(dead_code)] inner: u32, } diff --git a/tests/ui/lint/dead-code/lint-unused-adt-appeared-in-pattern.rs b/tests/ui/lint/dead-code/lint-unused-adt-appeared-in-pattern.rs new file mode 100644 index 000000000000..25777438456b --- /dev/null +++ b/tests/ui/lint/dead-code/lint-unused-adt-appeared-in-pattern.rs @@ -0,0 +1,37 @@ +#![deny(dead_code)] + +struct Foo(u8); //~ ERROR struct `Foo` is never constructed + +enum Bar { //~ ERROR enum `Bar` is never used + Var1(u8), + Var2(u8), +} + +pub trait Tr1 { + fn f1() -> Self; +} + +impl Tr1 for Foo { + fn f1() -> Foo { + let f = Foo(0); + let Foo(tag) = f; + Foo(tag) + } +} + +impl Tr1 for Bar { + fn f1() -> Bar { + let b = Bar::Var1(0); + let b = if let Bar::Var1(_) = b { + Bar::Var1(0) + } else { + Bar::Var2(0) + }; + match b { + Bar::Var1(_) => Bar::Var2(0), + Bar::Var2(_) => Bar::Var1(0), + } + } +} + +fn main() {} diff --git a/tests/ui/lint/dead-code/lint-unused-adt-appeared-in-pattern.stderr b/tests/ui/lint/dead-code/lint-unused-adt-appeared-in-pattern.stderr new file mode 100644 index 000000000000..7c1a4b459775 --- /dev/null +++ b/tests/ui/lint/dead-code/lint-unused-adt-appeared-in-pattern.stderr @@ -0,0 +1,20 @@ +error: struct `Foo` is never constructed + --> $DIR/lint-unused-adt-appeared-in-pattern.rs:3:8 + | +LL | struct Foo(u8); + | ^^^ + | +note: the lint level is defined here + --> $DIR/lint-unused-adt-appeared-in-pattern.rs:1:9 + | +LL | #![deny(dead_code)] + | ^^^^^^^^^ + +error: enum `Bar` is never used + --> $DIR/lint-unused-adt-appeared-in-pattern.rs:5:6 + | +LL | enum Bar { + | ^^^ + +error: aborting due to 2 previous errors + diff --git a/tests/ui/lint/dead-code/multiple-dead-codes-in-the-same-struct.stderr b/tests/ui/lint/dead-code/multiple-dead-codes-in-the-same-struct.stderr index b992005318f2..25a7d96cb897 100644 --- a/tests/ui/lint/dead-code/multiple-dead-codes-in-the-same-struct.stderr +++ b/tests/ui/lint/dead-code/multiple-dead-codes-in-the-same-struct.stderr @@ -56,8 +56,6 @@ warning: struct `Foo` is never constructed | LL | struct Foo(usize, #[allow(unused)] usize); | ^^^ - | - = note: `Foo` has a derived impl for the trait `Debug`, but this is intentionally ignored during dead code analysis error: aborting due to 2 previous errors; 2 warnings emitted diff --git a/tests/ui/lint/dead-code/not-lint-adt-appeared-in-pattern-issue-120770.rs b/tests/ui/lint/dead-code/not-lint-adt-appeared-in-pattern-issue-120770.rs new file mode 100644 index 000000000000..43a2e4319043 --- /dev/null +++ b/tests/ui/lint/dead-code/not-lint-adt-appeared-in-pattern-issue-120770.rs @@ -0,0 +1,32 @@ +//@ check-pass + +#![deny(dead_code)] + +#[repr(u8)] +#[derive(Copy, Clone, Debug)] +pub enum RecordField { + Target = 1, + Level, + Module, + File, + Line, + NumArgs, +} + +unsafe trait Pod {} + +#[repr(transparent)] +struct RecordFieldWrapper(RecordField); + +unsafe impl Pod for RecordFieldWrapper {} + +fn try_read(buf: &[u8]) -> T { + unsafe { std::ptr::read_unaligned(buf.as_ptr() as *const T) } +} + +pub fn foo(buf: &[u8]) -> RecordField { + let RecordFieldWrapper(tag) = try_read(buf); + tag +} + +fn main() {} diff --git a/tests/ui/lint/dead-code/unused-adt-impl-pub-trait-with-assoc-const.rs b/tests/ui/lint/dead-code/unused-adt-impl-pub-trait-with-assoc-const.rs index 5b755d62a059..415eb4138def 100644 --- a/tests/ui/lint/dead-code/unused-adt-impl-pub-trait-with-assoc-const.rs +++ b/tests/ui/lint/dead-code/unused-adt-impl-pub-trait-with-assoc-const.rs @@ -2,7 +2,7 @@ struct T1; //~ ERROR struct `T1` is never constructed pub struct T2(i32); //~ ERROR field `0` is never read -struct T3; +struct T3; //~ ERROR struct `T3` is never constructed trait Trait1 { //~ ERROR trait `Trait1` is never used const UNUSED: i32; diff --git a/tests/ui/lint/dead-code/unused-adt-impl-pub-trait-with-assoc-const.stderr b/tests/ui/lint/dead-code/unused-adt-impl-pub-trait-with-assoc-const.stderr index 2441a3f868dc..778dadee153f 100644 --- a/tests/ui/lint/dead-code/unused-adt-impl-pub-trait-with-assoc-const.stderr +++ b/tests/ui/lint/dead-code/unused-adt-impl-pub-trait-with-assoc-const.stderr @@ -20,11 +20,17 @@ LL | pub struct T2(i32); | = help: consider removing this field +error: struct `T3` is never constructed + --> $DIR/unused-adt-impl-pub-trait-with-assoc-const.rs:5:8 + | +LL | struct T3; + | ^^ + error: trait `Trait1` is never used --> $DIR/unused-adt-impl-pub-trait-with-assoc-const.rs:7:7 | LL | trait Trait1 { | ^^^^^^ -error: aborting due to 3 previous errors +error: aborting due to 4 previous errors diff --git a/tests/ui/lint/dead-code/unused-struct-derive-default.rs b/tests/ui/lint/dead-code/unused-struct-derive-default.rs index 330ad32dd570..f20b7cb66ee5 100644 --- a/tests/ui/lint/dead-code/unused-struct-derive-default.rs +++ b/tests/ui/lint/dead-code/unused-struct-derive-default.rs @@ -22,4 +22,5 @@ pub struct T2 { fn main() { let _x: Used = Default::default(); + let _e: E = Default::default(); } diff --git a/tests/ui/lint/dead-code/unused-struct-derive-default.stderr b/tests/ui/lint/dead-code/unused-struct-derive-default.stderr index bbb0bd7be706..7422f9a39f31 100644 --- a/tests/ui/lint/dead-code/unused-struct-derive-default.stderr +++ b/tests/ui/lint/dead-code/unused-struct-derive-default.stderr @@ -4,7 +4,6 @@ error: struct `T` is never constructed LL | struct T; | ^ | - = note: `T` has a derived impl for the trait `Default`, but this is intentionally ignored during dead code analysis note: the lint level is defined here --> $DIR/unused-struct-derive-default.rs:1:9 | diff --git a/tests/ui/lint/lint-non-uppercase-usages.fixed b/tests/ui/lint/lint-non-uppercase-usages.fixed new file mode 100644 index 000000000000..231991dcae08 --- /dev/null +++ b/tests/ui/lint/lint-non-uppercase-usages.fixed @@ -0,0 +1,44 @@ +// Checks that the `non_upper_case_globals` emits suggestions for usages as well +// + +//@ check-pass +//@ run-rustfix + +#![allow(dead_code)] + +use std::cell::Cell; + +const MY_STATIC: u32 = 0; +//~^ WARN constant `my_static` should have an upper case name +//~| SUGGESTION MY_STATIC + +const LOL: u32 = MY_STATIC + 0; +//~^ SUGGESTION MY_STATIC + +mod my_mod { + const INSIDE_MOD: u32 = super::MY_STATIC + 0; + //~^ SUGGESTION MY_STATIC +} + +thread_local! { + static FOO_FOO: Cell = unreachable!(); + //~^ WARN constant `fooFOO` should have an upper case name + //~| SUGGESTION FOO_FOO +} + +fn foo() { + //~^ WARN const parameter `foo` should have an upper case name + //~| SUGGESTION FOO + let _a = FOO + 1; + //~^ SUGGESTION FOO +} + +fn main() { + let _a = crate::MY_STATIC; + //~^ SUGGESTION MY_STATIC + + FOO_FOO.set(9); + //~^ SUGGESTION FOO_FOO + println!("{}", FOO_FOO.get()); + //~^ SUGGESTION FOO_FOO +} diff --git a/tests/ui/lint/lint-non-uppercase-usages.rs b/tests/ui/lint/lint-non-uppercase-usages.rs new file mode 100644 index 000000000000..9cdf5e47003d --- /dev/null +++ b/tests/ui/lint/lint-non-uppercase-usages.rs @@ -0,0 +1,44 @@ +// Checks that the `non_upper_case_globals` emits suggestions for usages as well +// + +//@ check-pass +//@ run-rustfix + +#![allow(dead_code)] + +use std::cell::Cell; + +const my_static: u32 = 0; +//~^ WARN constant `my_static` should have an upper case name +//~| SUGGESTION MY_STATIC + +const LOL: u32 = my_static + 0; +//~^ SUGGESTION MY_STATIC + +mod my_mod { + const INSIDE_MOD: u32 = super::my_static + 0; + //~^ SUGGESTION MY_STATIC +} + +thread_local! { + static fooFOO: Cell = unreachable!(); + //~^ WARN constant `fooFOO` should have an upper case name + //~| SUGGESTION FOO_FOO +} + +fn foo() { + //~^ WARN const parameter `foo` should have an upper case name + //~| SUGGESTION FOO + let _a = foo + 1; + //~^ SUGGESTION FOO +} + +fn main() { + let _a = crate::my_static; + //~^ SUGGESTION MY_STATIC + + fooFOO.set(9); + //~^ SUGGESTION FOO_FOO + println!("{}", fooFOO.get()); + //~^ SUGGESTION FOO_FOO +} diff --git a/tests/ui/lint/lint-non-uppercase-usages.stderr b/tests/ui/lint/lint-non-uppercase-usages.stderr new file mode 100644 index 000000000000..7c7e573a88ed --- /dev/null +++ b/tests/ui/lint/lint-non-uppercase-usages.stderr @@ -0,0 +1,39 @@ +warning: constant `my_static` should have an upper case name + --> $DIR/lint-non-uppercase-usages.rs:11:7 + | +LL | const my_static: u32 = 0; + | ^^^^^^^^^ + | + = note: `#[warn(non_upper_case_globals)]` on by default +help: convert the identifier to upper case + | +LL - const my_static: u32 = 0; +LL + const MY_STATIC: u32 = 0; + | + +warning: constant `fooFOO` should have an upper case name + --> $DIR/lint-non-uppercase-usages.rs:24:12 + | +LL | static fooFOO: Cell = unreachable!(); + | ^^^^^^ + | +help: convert the identifier to upper case + | +LL - static fooFOO: Cell = unreachable!(); +LL + static FOO_FOO: Cell = unreachable!(); + | + +warning: const parameter `foo` should have an upper case name + --> $DIR/lint-non-uppercase-usages.rs:29:14 + | +LL | fn foo() { + | ^^^ + | +help: convert the identifier to upper case (notice the capitalization difference) + | +LL - fn foo() { +LL + fn foo() { + | + +warning: 3 warnings emitted + diff --git a/tests/ui/lint/unused/unused-attr-duplicate.stderr b/tests/ui/lint/unused/unused-attr-duplicate.stderr index e1c45e832af3..a18581192ea6 100644 --- a/tests/ui/lint/unused/unused-attr-duplicate.stderr +++ b/tests/ui/lint/unused/unused-attr-duplicate.stderr @@ -65,19 +65,6 @@ LL | #[should_panic] | ^^^^^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! -error: unused attribute - --> $DIR/unused-attr-duplicate.rs:60:1 - | -LL | #[must_use = "some message"] - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute - | -note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:59:1 - | -LL | #[must_use] - | ^^^^^^^^^^^ - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - error: unused attribute --> $DIR/unused-attr-duplicate.rs:66:1 | @@ -102,67 +89,6 @@ note: attribute also specified here LL | #[automatically_derived] | ^^^^^^^^^^^^^^^^^^^^^^^^ -error: unused attribute - --> $DIR/unused-attr-duplicate.rs:77:1 - | -LL | #[cold] - | ^^^^^^^ help: remove this attribute - | -note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:76:1 - | -LL | #[cold] - | ^^^^^^^ - -error: unused attribute - --> $DIR/unused-attr-duplicate.rs:79:1 - | -LL | #[track_caller] - | ^^^^^^^^^^^^^^^ help: remove this attribute - | -note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:78:1 - | -LL | #[track_caller] - | ^^^^^^^^^^^^^^^ - -error: unused attribute - --> $DIR/unused-attr-duplicate.rs:92:1 - | -LL | #[export_name = "exported_symbol_name"] - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute - | -note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:94:1 - | -LL | #[export_name = "exported_symbol_name2"] - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! - -error: unused attribute - --> $DIR/unused-attr-duplicate.rs:98:1 - | -LL | #[no_mangle] - | ^^^^^^^^^^^^ help: remove this attribute - | -note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:97:1 - | -LL | #[no_mangle] - | ^^^^^^^^^^^^ - -error: unused attribute - --> $DIR/unused-attr-duplicate.rs:102:1 - | -LL | #[used] - | ^^^^^^^ help: remove this attribute - | -note: attribute also specified here - --> $DIR/unused-attr-duplicate.rs:101:1 - | -LL | #[used] - | ^^^^^^^ - error: unused attribute --> $DIR/unused-attr-duplicate.rs:86:5 | @@ -276,6 +202,19 @@ note: attribute also specified here LL | #[macro_export] | ^^^^^^^^^^^^^^^ +error: unused attribute + --> $DIR/unused-attr-duplicate.rs:60:1 + | +LL | #[must_use = "some message"] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute + | +note: attribute also specified here + --> $DIR/unused-attr-duplicate.rs:59:1 + | +LL | #[must_use] + | ^^^^^^^^^^^ + = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! + error: unused attribute --> $DIR/unused-attr-duplicate.rs:74:1 | @@ -289,5 +228,66 @@ LL | #[inline(always)] | ^^^^^^^^^^^^^^^^^ = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! +error: unused attribute + --> $DIR/unused-attr-duplicate.rs:77:1 + | +LL | #[cold] + | ^^^^^^^ help: remove this attribute + | +note: attribute also specified here + --> $DIR/unused-attr-duplicate.rs:76:1 + | +LL | #[cold] + | ^^^^^^^ + +error: unused attribute + --> $DIR/unused-attr-duplicate.rs:79:1 + | +LL | #[track_caller] + | ^^^^^^^^^^^^^^^ help: remove this attribute + | +note: attribute also specified here + --> $DIR/unused-attr-duplicate.rs:78:1 + | +LL | #[track_caller] + | ^^^^^^^^^^^^^^^ + +error: unused attribute + --> $DIR/unused-attr-duplicate.rs:92:1 + | +LL | #[export_name = "exported_symbol_name"] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove this attribute + | +note: attribute also specified here + --> $DIR/unused-attr-duplicate.rs:94:1 + | +LL | #[export_name = "exported_symbol_name2"] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! + +error: unused attribute + --> $DIR/unused-attr-duplicate.rs:98:1 + | +LL | #[no_mangle] + | ^^^^^^^^^^^^ help: remove this attribute + | +note: attribute also specified here + --> $DIR/unused-attr-duplicate.rs:97:1 + | +LL | #[no_mangle] + | ^^^^^^^^^^^^ + +error: unused attribute + --> $DIR/unused-attr-duplicate.rs:102:1 + | +LL | #[used] + | ^^^^^^^ help: remove this attribute + | +note: attribute also specified here + --> $DIR/unused-attr-duplicate.rs:101:1 + | +LL | #[used] + | ^^^^^^^ + error: aborting due to 23 previous errors diff --git a/tests/ui/loop-match/break-to-block.rs b/tests/ui/loop-match/break-to-block.rs new file mode 100644 index 000000000000..e7451a944c39 --- /dev/null +++ b/tests/ui/loop-match/break-to-block.rs @@ -0,0 +1,23 @@ +// Test that a `break` without `#[const_continue]` still works as expected. + +//@ run-pass + +#![allow(incomplete_features)] +#![feature(loop_match)] + +fn main() { + assert_eq!(helper(), 1); +} + +fn helper() -> u8 { + let mut state = 0u8; + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + 0 => break 'blk 1, + _ => break 'a state, + } + } + } +} diff --git a/tests/ui/loop-match/const-continue-to-block.rs b/tests/ui/loop-match/const-continue-to-block.rs new file mode 100644 index 000000000000..fd7ebeefeb64 --- /dev/null +++ b/tests/ui/loop-match/const-continue-to-block.rs @@ -0,0 +1,26 @@ +// Test that a `#[const_continue]` that breaks to a normal labeled block (that +// is not part of a `#[loop_match]`) produces an error. + +#![allow(incomplete_features)] +#![feature(loop_match)] +#![crate_type = "lib"] + +fn const_continue_to_block() -> u8 { + let state = 0; + #[loop_match] + loop { + state = 'blk: { + match state { + 0 => { + #[const_continue] + break 'blk 1; + } + _ => 'b: { + #[const_continue] + break 'b 2; + //~^ ERROR `#[const_continue]` must break to a labeled block that participates in a `#[loop_match]` + } + } + } + } +} diff --git a/tests/ui/loop-match/const-continue-to-block.stderr b/tests/ui/loop-match/const-continue-to-block.stderr new file mode 100644 index 000000000000..3a5339a03949 --- /dev/null +++ b/tests/ui/loop-match/const-continue-to-block.stderr @@ -0,0 +1,8 @@ +error: `#[const_continue]` must break to a labeled block that participates in a `#[loop_match]` + --> $DIR/const-continue-to-block.rs:20:27 + | +LL | break 'b 2; + | ^^ + +error: aborting due to 1 previous error + diff --git a/tests/ui/loop-match/const-continue-to-loop.rs b/tests/ui/loop-match/const-continue-to-loop.rs new file mode 100644 index 000000000000..c363e617cfb7 --- /dev/null +++ b/tests/ui/loop-match/const-continue-to-loop.rs @@ -0,0 +1,27 @@ +// Test that a `#[const_continue]` that breaks to the label of the loop itself +// rather than to the label of the block within the `#[loop_match]` produces an +// error. + +#![allow(incomplete_features)] +#![feature(loop_match)] +#![crate_type = "lib"] + +fn const_continue_to_loop() -> u8 { + let mut state = 0; + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + 0 => { + #[const_continue] + break 'blk 1; + } + _ => { + #[const_continue] + break 'a 2; + //~^ ERROR `#[const_continue]` must break to a labeled block that participates in a `#[loop_match]` + } + } + } + } +} diff --git a/tests/ui/loop-match/const-continue-to-loop.stderr b/tests/ui/loop-match/const-continue-to-loop.stderr new file mode 100644 index 000000000000..a217b3ac72c7 --- /dev/null +++ b/tests/ui/loop-match/const-continue-to-loop.stderr @@ -0,0 +1,8 @@ +error: `#[const_continue]` must break to a labeled block that participates in a `#[loop_match]` + --> $DIR/const-continue-to-loop.rs:21:27 + | +LL | break 'a 2; + | ^^ + +error: aborting due to 1 previous error + diff --git a/tests/ui/loop-match/const-continue-to-polymorphic-const.rs b/tests/ui/loop-match/const-continue-to-polymorphic-const.rs new file mode 100644 index 000000000000..9a91c9779111 --- /dev/null +++ b/tests/ui/loop-match/const-continue-to-polymorphic-const.rs @@ -0,0 +1,29 @@ +// Test that a `#[const_continue]` that breaks on a polymorphic constant produces an error. +// A polymorphic constant does not have a concrete value at MIR building time, and therefore the +// `#[loop_match]~ desugaring can't handle such values. +#![allow(incomplete_features)] +#![feature(loop_match)] +#![crate_type = "lib"] + +trait Foo { + const TARGET: u8; + + fn test_u8(mut state: u8) -> &'static str { + #[loop_match] + loop { + state = 'blk: { + match state { + 0 => { + #[const_continue] + break 'blk Self::TARGET; + //~^ ERROR could not determine the target branch for this `#[const_continue]` + } + + 1 => return "bar", + 2 => return "baz", + _ => unreachable!(), + } + } + } + } +} diff --git a/tests/ui/loop-match/const-continue-to-polymorphic-const.stderr b/tests/ui/loop-match/const-continue-to-polymorphic-const.stderr new file mode 100644 index 000000000000..4d183a2fbebc --- /dev/null +++ b/tests/ui/loop-match/const-continue-to-polymorphic-const.stderr @@ -0,0 +1,8 @@ +error: could not determine the target branch for this `#[const_continue]` + --> $DIR/const-continue-to-polymorphic-const.rs:18:36 + | +LL | break 'blk Self::TARGET; + | ^^^^^^^^^^^^ this value is too generic + +error: aborting due to 1 previous error + diff --git a/tests/ui/loop-match/drop-in-match-arm.rs b/tests/ui/loop-match/drop-in-match-arm.rs new file mode 100644 index 000000000000..731af6590129 --- /dev/null +++ b/tests/ui/loop-match/drop-in-match-arm.rs @@ -0,0 +1,47 @@ +// Test that dropping values works in match arms, which is nontrivial +// because each match arm needs its own scope. + +//@ run-pass + +#![allow(incomplete_features)] +#![feature(loop_match)] + +use std::sync::atomic::{AtomicBool, Ordering}; + +fn main() { + assert_eq!(helper(), 1); + assert!(DROPPED.load(Ordering::Relaxed)); +} + +static DROPPED: AtomicBool = AtomicBool::new(false); + +struct X; + +impl Drop for X { + fn drop(&mut self) { + DROPPED.store(true, Ordering::Relaxed); + } +} + +#[no_mangle] +#[inline(never)] +fn helper() -> i32 { + let mut state = 0; + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + 0 => match X { + _ => { + assert!(!DROPPED.load(Ordering::Relaxed)); + break 'blk 1; + } + }, + _ => { + assert!(DROPPED.load(Ordering::Relaxed)); + break 'a state; + } + } + }; + } +} diff --git a/tests/ui/loop-match/invalid-attribute.rs b/tests/ui/loop-match/invalid-attribute.rs new file mode 100644 index 000000000000..d8d2f605eb49 --- /dev/null +++ b/tests/ui/loop-match/invalid-attribute.rs @@ -0,0 +1,43 @@ +// Test that the `#[loop_match]` and `#[const_continue]` attributes can only be +// placed on expressions. + +#![allow(incomplete_features)] +#![feature(loop_match)] +#![loop_match] //~ ERROR should be applied to a loop +#![const_continue] //~ ERROR should be applied to a break expression + +extern "C" { + #[loop_match] //~ ERROR should be applied to a loop + #[const_continue] //~ ERROR should be applied to a break expression + fn f(); +} + +#[loop_match] //~ ERROR should be applied to a loop +#[const_continue] //~ ERROR should be applied to a break expression +#[repr(C)] +struct S { + a: u32, + b: u32, +} + +trait Invoke { + #[loop_match] //~ ERROR should be applied to a loop + #[const_continue] //~ ERROR should be applied to a break expression + extern "C" fn invoke(&self); +} + +#[loop_match] //~ ERROR should be applied to a loop +#[const_continue] //~ ERROR should be applied to a break expression +extern "C" fn ok() {} + +fn main() { + #[loop_match] //~ ERROR should be applied to a loop + #[const_continue] //~ ERROR should be applied to a break expression + || {}; + + { + #[loop_match] //~ ERROR should be applied to a loop + #[const_continue] //~ ERROR should be applied to a break expression + 5 + }; +} diff --git a/tests/ui/loop-match/invalid-attribute.stderr b/tests/ui/loop-match/invalid-attribute.stderr new file mode 100644 index 000000000000..07015311f9c3 --- /dev/null +++ b/tests/ui/loop-match/invalid-attribute.stderr @@ -0,0 +1,131 @@ +error: `#[const_continue]` should be applied to a break expression + --> $DIR/invalid-attribute.rs:16:1 + | +LL | #[const_continue] + | ^^^^^^^^^^^^^^^^^ +LL | #[repr(C)] +LL | struct S { + | -------- not a break expression + +error: `#[loop_match]` should be applied to a loop + --> $DIR/invalid-attribute.rs:15:1 + | +LL | #[loop_match] + | ^^^^^^^^^^^^^ +... +LL | struct S { + | -------- not a loop + +error: `#[const_continue]` should be applied to a break expression + --> $DIR/invalid-attribute.rs:30:1 + | +LL | #[const_continue] + | ^^^^^^^^^^^^^^^^^ +LL | extern "C" fn ok() {} + | ------------------ not a break expression + +error: `#[loop_match]` should be applied to a loop + --> $DIR/invalid-attribute.rs:29:1 + | +LL | #[loop_match] + | ^^^^^^^^^^^^^ +LL | #[const_continue] +LL | extern "C" fn ok() {} + | ------------------ not a loop + +error: `#[const_continue]` should be applied to a break expression + --> $DIR/invalid-attribute.rs:35:5 + | +LL | #[const_continue] + | ^^^^^^^^^^^^^^^^^ +LL | || {}; + | -- not a break expression + +error: `#[loop_match]` should be applied to a loop + --> $DIR/invalid-attribute.rs:34:5 + | +LL | #[loop_match] + | ^^^^^^^^^^^^^ +LL | #[const_continue] +LL | || {}; + | -- not a loop + +error: `#[const_continue]` should be applied to a break expression + --> $DIR/invalid-attribute.rs:40:9 + | +LL | #[const_continue] + | ^^^^^^^^^^^^^^^^^ +LL | 5 + | - not a break expression + +error: `#[loop_match]` should be applied to a loop + --> $DIR/invalid-attribute.rs:39:9 + | +LL | #[loop_match] + | ^^^^^^^^^^^^^ +LL | #[const_continue] +LL | 5 + | - not a loop + +error: `#[const_continue]` should be applied to a break expression + --> $DIR/invalid-attribute.rs:25:5 + | +LL | #[const_continue] + | ^^^^^^^^^^^^^^^^^ +LL | extern "C" fn invoke(&self); + | ---------------------------- not a break expression + +error: `#[loop_match]` should be applied to a loop + --> $DIR/invalid-attribute.rs:24:5 + | +LL | #[loop_match] + | ^^^^^^^^^^^^^ +LL | #[const_continue] +LL | extern "C" fn invoke(&self); + | ---------------------------- not a loop + +error: `#[const_continue]` should be applied to a break expression + --> $DIR/invalid-attribute.rs:11:5 + | +LL | #[const_continue] + | ^^^^^^^^^^^^^^^^^ +LL | fn f(); + | ------- not a break expression + +error: `#[loop_match]` should be applied to a loop + --> $DIR/invalid-attribute.rs:10:5 + | +LL | #[loop_match] + | ^^^^^^^^^^^^^ +LL | #[const_continue] +LL | fn f(); + | ------- not a loop + +error: `#[const_continue]` should be applied to a break expression + --> $DIR/invalid-attribute.rs:7:1 + | +LL | / #![allow(incomplete_features)] +LL | | #![feature(loop_match)] +LL | | #![loop_match] +LL | | #![const_continue] + | | ^^^^^^^^^^^^^^^^^^ +... | +LL | | }; +LL | | } + | |_- not a break expression + +error: `#[loop_match]` should be applied to a loop + --> $DIR/invalid-attribute.rs:6:1 + | +LL | / #![allow(incomplete_features)] +LL | | #![feature(loop_match)] +LL | | #![loop_match] + | | ^^^^^^^^^^^^^^ +LL | | #![const_continue] +... | +LL | | }; +LL | | } + | |_- not a loop + +error: aborting due to 14 previous errors + diff --git a/tests/ui/loop-match/invalid.rs b/tests/ui/loop-match/invalid.rs new file mode 100644 index 000000000000..2ddc19f4fc62 --- /dev/null +++ b/tests/ui/loop-match/invalid.rs @@ -0,0 +1,161 @@ +// Test that the correct error is emitted when `#[loop_match]` is applied to +// syntax it does not support. +#![allow(incomplete_features)] +#![feature(loop_match)] +#![crate_type = "lib"] + +enum State { + A, + B, + C, +} + +fn invalid_update() { + let mut fake = State::A; + let state = State::A; + #[loop_match] + loop { + fake = 'blk: { + //~^ ERROR invalid update of the `#[loop_match]` state + match state { + _ => State::B, + } + } + } +} + +fn invalid_scrutinee() { + let mut state = State::A; + #[loop_match] + loop { + state = 'blk: { + match State::A { + //~^ ERROR invalid match on `#[loop_match]` state + _ => State::B, + } + } + } +} + +fn bad_statements_1() { + let mut state = State::A; + #[loop_match] + loop { + 1; + //~^ ERROR statements are not allowed in this position within a `#[loop_match]` + state = 'blk: { + match State::A { + _ => State::B, + } + } + } +} + +fn bad_statements_2() { + let mut state = State::A; + #[loop_match] + loop { + state = 'blk: { + 1; + //~^ ERROR statements are not allowed in this position within a `#[loop_match]` + match State::A { + _ => State::B, + } + } + } +} + +fn bad_rhs_1() { + let mut state = State::A; + #[loop_match] + loop { + state = State::B + //~^ ERROR this expression must be a single `match` wrapped in a labeled block + } +} + +fn bad_rhs_2() { + let mut state = State::A; + #[loop_match] + loop { + state = 'blk: { + State::B + //~^ ERROR this expression must be a single `match` wrapped in a labeled block + } + } +} + +fn bad_rhs_3() { + let mut state = (); + #[loop_match] + loop { + state = 'blk: { + //~^ ERROR this expression must be a single `match` wrapped in a labeled block + } + } +} + +fn missing_assignment() { + #[loop_match] + loop { + () //~ ERROR expected a single assignment expression + } +} + +fn empty_loop_body() { + #[loop_match] + loop { + //~^ ERROR expected a single assignment expression + } +} + +fn break_without_value() { + let mut state = State::A; + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + State::A => { + #[const_continue] + break 'blk; + //~^ ERROR mismatched types + } + _ => break 'a, + } + } + } +} + +fn break_without_value_unit() { + let mut state = (); + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + () => { + #[const_continue] + break 'blk; + //~^ ERROR a `#[const_continue]` must break to a label with a value + } + } + } + } +} + +fn arm_has_guard(cond: bool) { + let mut state = State::A; + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + State::A => { + #[const_continue] + break 'blk State::B; + } + State::B if cond => break 'a, + //~^ ERROR match arms that are part of a `#[loop_match]` cannot have guards + _ => break 'a, + } + } + } +} diff --git a/tests/ui/loop-match/invalid.stderr b/tests/ui/loop-match/invalid.stderr new file mode 100644 index 000000000000..51fdd024c6fa --- /dev/null +++ b/tests/ui/loop-match/invalid.stderr @@ -0,0 +1,91 @@ +error[E0308]: mismatched types + --> $DIR/invalid.rs:120:21 + | +LL | break 'blk; + | ^^^^^^^^^^ expected `State`, found `()` + | +help: give the `break` a value of the expected type + | +LL | break 'blk /* value */; + | +++++++++++ + +error: invalid update of the `#[loop_match]` state + --> $DIR/invalid.rs:18:9 + | +LL | fake = 'blk: { + | ^^^^ +LL | +LL | match state { + | ----- the assignment must update this variable + +error: invalid match on `#[loop_match]` state + --> $DIR/invalid.rs:32:19 + | +LL | match State::A { + | ^^^^^^^^ + | + = note: a local variable must be the scrutinee within a `#[loop_match]` + +error: statements are not allowed in this position within a `#[loop_match]` + --> $DIR/invalid.rs:44:9 + | +LL | 1; + | ^^ + +error: statements are not allowed in this position within a `#[loop_match]` + --> $DIR/invalid.rs:59:13 + | +LL | 1; + | ^^ + +error: this expression must be a single `match` wrapped in a labeled block + --> $DIR/invalid.rs:72:17 + | +LL | state = State::B + | ^^^^^^^^ + +error: this expression must be a single `match` wrapped in a labeled block + --> $DIR/invalid.rs:82:13 + | +LL | State::B + | ^^^^^^^^ + +error: this expression must be a single `match` wrapped in a labeled block + --> $DIR/invalid.rs:92:17 + | +LL | state = 'blk: { + | _________________^ +LL | | +LL | | } + | |_________^ + +error: expected a single assignment expression + --> $DIR/invalid.rs:101:9 + | +LL | () + | ^^ + +error: expected a single assignment expression + --> $DIR/invalid.rs:107:10 + | +LL | loop { + | __________^ +LL | | +LL | | } + | |_____^ + +error: a `#[const_continue]` must break to a label with a value + --> $DIR/invalid.rs:137:21 + | +LL | break 'blk; + | ^^^^^^^^^^ + +error: match arms that are part of a `#[loop_match]` cannot have guards + --> $DIR/invalid.rs:155:29 + | +LL | State::B if cond => break 'a, + | ^^^^ + +error: aborting due to 12 previous errors + +For more information about this error, try `rustc --explain E0308`. diff --git a/tests/ui/loop-match/loop-match.rs b/tests/ui/loop-match/loop-match.rs new file mode 100644 index 000000000000..f38bc01f3338 --- /dev/null +++ b/tests/ui/loop-match/loop-match.rs @@ -0,0 +1,45 @@ +// Test that a basic correct example of `#[loop_match]` with `#[const_continue]` +// works correctly. + +//@ run-pass + +#![allow(incomplete_features)] +#![feature(loop_match)] + +enum State { + A, + B, + C, +} + +fn main() { + let mut state = State::A; + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + State::A => { + #[const_continue] + break 'blk State::B; + } + State::B => { + // Without special logic, the compiler believes this is a + // reassignment to an immutable variable because of the + // `loop`. So this tests that local variables work. + let _a = 0; + + if true { + #[const_continue] + break 'blk State::C; + } else { + #[const_continue] + break 'blk State::A; + } + } + State::C => break 'a, + } + }; + } + + assert!(matches!(state, State::C)) +} diff --git a/tests/ui/loop-match/macro.rs b/tests/ui/loop-match/macro.rs new file mode 100644 index 000000000000..98c98b9b627b --- /dev/null +++ b/tests/ui/loop-match/macro.rs @@ -0,0 +1,48 @@ +// Test that macros can be defined in the labeled block. This should not trigger an error about +// statements not being allowed in that position, and should of course work as expected. + +//@ run-pass + +#![allow(incomplete_features)] +#![feature(loop_match)] + +enum State { + A, + B, + C, +} + +fn main() { + let mut state = State::A; + #[loop_match] + 'a: loop { + state = 'blk: { + macro_rules! const_continue { + ($e:expr) => { + #[const_continue] + break 'blk $e; + }; + } + match state { + State::A => { + const_continue!(State::B); + } + State::B => { + // Without special logic, the compiler believes this is a + // reassignment to an immutable variable because of the + // `loop`. So this tests that local variables work. + let _a = 0; + + if true { + const_continue!(State::C); + } else { + const_continue!(State::A); + } + } + State::C => break 'a, + } + }; + } + + assert!(matches!(state, State::C)) +} diff --git a/tests/ui/loop-match/nested.rs b/tests/ui/loop-match/nested.rs new file mode 100644 index 000000000000..aaddfae11def --- /dev/null +++ b/tests/ui/loop-match/nested.rs @@ -0,0 +1,83 @@ +// Test that a nested `#[loop_match]` works as expected, and that e.g. a +// `#[const_continue]` of the inner `#[loop_match]` does not interact with the +// outer `#[loop_match]`. + +//@ run-pass + +#![allow(incomplete_features)] +#![feature(loop_match)] + +enum State1 { + A, + B, + C, +} + +enum State2 { + X, + Y, + Z, +} + +fn main() { + assert_eq!(run(), concat!("ab", "xyz", "xyz", "c")) +} + +fn run() -> String { + let mut accum = String::new(); + + let mut state1 = State1::A; + let mut state2 = State2::X; + + let mut first = true; + + #[loop_match] + 'a: loop { + state1 = 'blk1: { + match state1 { + State1::A => { + accum.push('a'); + #[const_continue] + break 'blk1 State1::B; + } + State1::B => { + accum.push('b'); + #[loop_match] + loop { + state2 = 'blk2: { + match state2 { + State2::X => { + accum.push('x'); + #[const_continue] + break 'blk2 State2::Y; + } + State2::Y => { + accum.push('y'); + #[const_continue] + break 'blk2 State2::Z; + } + State2::Z => { + accum.push('z'); + if first { + first = false; + #[const_continue] + break 'blk2 State2::X; + } else { + #[const_continue] + break 'blk1 State1::C; + } + } + } + } + } + } + State1::C => { + accum.push('c'); + break 'a; + } + } + } + } + + accum +} diff --git a/tests/ui/loop-match/or-patterns.rs b/tests/ui/loop-match/or-patterns.rs new file mode 100644 index 000000000000..775243b9c620 --- /dev/null +++ b/tests/ui/loop-match/or-patterns.rs @@ -0,0 +1,54 @@ +// Test that `#[loop_match]` supports or-patterns. + +//@ run-pass + +#![allow(incomplete_features)] +#![feature(loop_match)] + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum State { + A, + B, + C, + D, +} + +fn main() { + let mut states = vec![]; + let mut first = true; + let mut state = State::A; + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + State::A => { + states.push(state); + if first { + #[const_continue] + break 'blk State::B; + } else { + #[const_continue] + break 'blk State::D; + } + } + State::B | State::D => { + states.push(state); + if first { + first = false; + #[const_continue] + break 'blk State::A; + } else { + #[const_continue] + break 'blk State::C; + } + } + State::C => { + states.push(state); + break 'a; + } + } + } + } + + assert_eq!(states, [State::A, State::B, State::A, State::D, State::C]); +} diff --git a/tests/ui/loop-match/unsupported-type.rs b/tests/ui/loop-match/unsupported-type.rs new file mode 100644 index 000000000000..9100a1103ab7 --- /dev/null +++ b/tests/ui/loop-match/unsupported-type.rs @@ -0,0 +1,27 @@ +// Test that the right error is emitted when the `#[loop_match]` state is an +// unsupported type. + +#![allow(incomplete_features)] +#![feature(loop_match)] +#![crate_type = "lib"] + +fn unsupported_type() { + let mut state = Some(false); + #[loop_match] + 'a: loop { + state = 'blk: { + //~^ ERROR this `#[loop_match]` state value has type `Option`, which is not supported + match state { + Some(false) => { + #[const_continue] + break 'blk Some(true); + } + Some(true) => { + #[const_continue] + break 'blk None; + } + None => break 'a, + } + } + } +} diff --git a/tests/ui/loop-match/unsupported-type.stderr b/tests/ui/loop-match/unsupported-type.stderr new file mode 100644 index 000000000000..ede3d86796fd --- /dev/null +++ b/tests/ui/loop-match/unsupported-type.stderr @@ -0,0 +1,10 @@ +error: this `#[loop_match]` state value has type `Option`, which is not supported + --> $DIR/unsupported-type.rs:12:9 + | +LL | state = 'blk: { + | ^^^^^ + | + = note: only integers, floats, bool, char, and enums without fields are supported + +error: aborting due to 1 previous error + diff --git a/tests/ui/loop-match/unwind.rs b/tests/ui/loop-match/unwind.rs new file mode 100644 index 000000000000..39e2e4537b17 --- /dev/null +++ b/tests/ui/loop-match/unwind.rs @@ -0,0 +1,53 @@ +// Test that `#[const_continue]` correctly emits cleanup paths for drops. +// +// Here, we first drop `DropBomb`, causing an unwind. Then `ExitOnDrop` should +// be dropped, causing us to exit with `0` rather than with some non-zero value +// due to the panic, which is what causes the test to pass. + +//@ run-pass +//@ needs-unwind + +#![allow(incomplete_features)] +#![feature(loop_match)] + +enum State { + A, + B, +} + +struct ExitOnDrop; + +impl Drop for ExitOnDrop { + fn drop(&mut self) { + std::process::exit(0); + } +} + +struct DropBomb; + +impl Drop for DropBomb { + fn drop(&mut self) { + panic!("this must unwind"); + } +} + +fn main() { + let mut state = State::A; + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + State::A => { + let _exit = ExitOnDrop; + let _bomb = DropBomb; + + #[const_continue] + break 'blk State::B; + } + State::B => break 'a, + } + }; + } + + unreachable!(); +} diff --git a/tests/ui/loop-match/valid-patterns.rs b/tests/ui/loop-match/valid-patterns.rs new file mode 100644 index 000000000000..4e0e4798a0bd --- /dev/null +++ b/tests/ui/loop-match/valid-patterns.rs @@ -0,0 +1,117 @@ +// Test that signed and unsigned integer patterns work with `#[loop_match]`. + +//@ run-pass + +#![allow(incomplete_features)] +#![feature(loop_match)] + +fn main() { + assert_eq!(integer(0), 2); + assert_eq!(integer(-1), 2); + assert_eq!(integer(2), 2); + + assert_eq!(boolean(true), false); + assert_eq!(boolean(false), false); + + assert_eq!(character('a'), 'b'); + assert_eq!(character('b'), 'b'); + assert_eq!(character('c'), 'd'); + assert_eq!(character('d'), 'd'); + + assert_eq!(test_f32(1.0), core::f32::consts::PI); + assert_eq!(test_f32(2.5), core::f32::consts::PI); + assert_eq!(test_f32(4.0), 4.0); + + assert_eq!(test_f64(1.0), core::f64::consts::PI); + assert_eq!(test_f64(2.5), core::f64::consts::PI); + assert_eq!(test_f64(4.0), 4.0); +} + +fn integer(mut state: i32) -> i32 { + #[loop_match] + 'a: loop { + state = 'blk: { + match state { + -1 => { + #[const_continue] + break 'blk 2; + } + 0 => { + #[const_continue] + break 'blk -1; + } + 2 => break 'a, + _ => unreachable!("weird value {:?}", state), + } + } + } + + state +} + +fn boolean(mut state: bool) -> bool { + #[loop_match] + loop { + state = 'blk: { + match state { + true => { + #[const_continue] + break 'blk false; + } + false => return state, + } + } + } +} + +fn character(mut state: char) -> char { + #[loop_match] + loop { + state = 'blk: { + match state { + 'a' => { + #[const_continue] + break 'blk 'b'; + } + 'b' => return state, + 'c' => { + #[const_continue] + break 'blk 'd'; + } + _ => return state, + } + } + } +} + +fn test_f32(mut state: f32) -> f32 { + #[loop_match] + loop { + state = 'blk: { + match state { + 1.0 => { + #[const_continue] + break 'blk 2.5; + } + 2.0..3.0 => return core::f32::consts::PI, + _ => return state, + } + } + } +} + +fn test_f64(mut state: f64) -> f64 { + #[loop_match] + loop { + state = 'blk: { + match state { + 1.0 => { + #[const_continue] + break 'blk 2.5; + } + 2.0..3.0 => return core::f64::consts::PI, + _ => return state, + } + } + } +} diff --git a/tests/ui/macros/concat-bytes-error.rs b/tests/ui/macros/concat-bytes-error.rs index db5d3cab0bd8..8130fc54d8d5 100644 --- a/tests/ui/macros/concat-bytes-error.rs +++ b/tests/ui/macros/concat-bytes-error.rs @@ -1,20 +1,44 @@ +//@ edition: 2021 +// 2021 edition for C string literals + #![feature(concat_bytes)] fn main() { + // Identifiers concat_bytes!(pie); //~ ERROR expected a byte literal concat_bytes!(pie, pie); //~ ERROR expected a byte literal + + // String literals concat_bytes!("tnrsi", "tnri"); //~ ERROR cannot concatenate string literals + //~^ SUGGESTION b"tnrsi" + concat_bytes!(r"tnrsi", r"tnri"); //~ ERROR cannot concatenate string literals + //~^ SUGGESTION br"tnrsi" + concat_bytes!(r#"tnrsi"#, r###"tnri"###); //~ ERROR cannot concatenate string literals + //~^ SUGGESTION br#"tnrsi"# + concat_bytes!(c"tnrsi", c"tnri"); //~ ERROR cannot concatenate C string literals + //~^ SUGGESTION b"tnrsi\0" + concat_bytes!(cr"tnrsi", cr"tnri"); //~ ERROR cannot concatenate C string literals + concat_bytes!(cr#"tnrsi"#, cr###"tnri"###); //~ ERROR cannot concatenate C string literals + + // Other literals concat_bytes!(2.8); //~ ERROR cannot concatenate float literals concat_bytes!(300); //~ ERROR cannot concatenate numeric literals + //~^ SUGGESTION [300] concat_bytes!('a'); //~ ERROR cannot concatenate character literals + //~^ SUGGESTION b'a' concat_bytes!(true, false); //~ ERROR cannot concatenate boolean literals concat_bytes!(42, b"va", b'l'); //~ ERROR cannot concatenate numeric literals + //~^ SUGGESTION [42] concat_bytes!(42, b"va", b'l', [1, 2]); //~ ERROR cannot concatenate numeric literals + //~^ SUGGESTION [42] + + // Nested items concat_bytes!([ "hi", //~ ERROR cannot concatenate string literals ]); concat_bytes!([ 'a', //~ ERROR cannot concatenate character literals + //~^ SUGGESTION b'a' ]); concat_bytes!([ true, //~ ERROR cannot concatenate boolean literals @@ -38,6 +62,7 @@ fn main() { [5, 6, 7], //~ ERROR cannot concatenate doubly nested array ]); concat_bytes!(5u16); //~ ERROR cannot concatenate numeric literals + //~^ SUGGESTION [5u16] concat_bytes!([5u16]); //~ ERROR numeric literal is not a `u8` concat_bytes!([3; ()]); //~ ERROR repeat count is not a positive number concat_bytes!([3; -2]); //~ ERROR repeat count is not a positive number diff --git a/tests/ui/macros/concat-bytes-error.stderr b/tests/ui/macros/concat-bytes-error.stderr index 3f2c64922e34..447d7a663fdc 100644 --- a/tests/ui/macros/concat-bytes-error.stderr +++ b/tests/ui/macros/concat-bytes-error.stderr @@ -1,5 +1,5 @@ error: expected a byte literal - --> $DIR/concat-bytes-error.rs:4:19 + --> $DIR/concat-bytes-error.rs:8:19 | LL | concat_bytes!(pie); | ^^^ @@ -7,7 +7,7 @@ LL | concat_bytes!(pie); = note: only byte literals (like `b"foo"`, `b's'` and `[3, 4, 5]`) can be passed to `concat_bytes!()` error: expected a byte literal - --> $DIR/concat-bytes-error.rs:5:19 + --> $DIR/concat-bytes-error.rs:9:19 | LL | concat_bytes!(pie, pie); | ^^^ ^^^ @@ -15,85 +15,126 @@ LL | concat_bytes!(pie, pie); = note: only byte literals (like `b"foo"`, `b's'` and `[3, 4, 5]`) can be passed to `concat_bytes!()` error: cannot concatenate string literals - --> $DIR/concat-bytes-error.rs:6:19 + --> $DIR/concat-bytes-error.rs:12:19 | LL | concat_bytes!("tnrsi", "tnri"); | ^^^^^^^ help: try using a byte string: `b"tnrsi"` +error: cannot concatenate string literals + --> $DIR/concat-bytes-error.rs:14:19 + | +LL | concat_bytes!(r"tnrsi", r"tnri"); + | ^^^^^^^^ help: try using a byte string: `br"tnrsi"` + +error: cannot concatenate string literals + --> $DIR/concat-bytes-error.rs:16:19 + | +LL | concat_bytes!(r#"tnrsi"#, r###"tnri"###); + | ^^^^^^^^^^ help: try using a byte string: `br#"tnrsi"#` + +error: cannot concatenate C string literals + --> $DIR/concat-bytes-error.rs:18:19 + | +LL | concat_bytes!(c"tnrsi", c"tnri"); + | ^^^^^^^^ help: try using a null-terminated byte string: `b"tnrsi\0"` + | +note: concatenating C strings is ambiguous about including the '\0' + --> $DIR/concat-bytes-error.rs:18:19 + | +LL | concat_bytes!(c"tnrsi", c"tnri"); + | ^^^^^^^^ + = note: concatenating C strings is ambiguous about including the '\0' + +error: cannot concatenate C string literals + --> $DIR/concat-bytes-error.rs:20:19 + | +LL | concat_bytes!(cr"tnrsi", cr"tnri"); + | ^^^^^^^^^ + | + = note: concatenating C strings is ambiguous about including the '\0' + +error: cannot concatenate C string literals + --> $DIR/concat-bytes-error.rs:21:19 + | +LL | concat_bytes!(cr#"tnrsi"#, cr###"tnri"###); + | ^^^^^^^^^^^ + | + = note: concatenating C strings is ambiguous about including the '\0' + error: cannot concatenate float literals - --> $DIR/concat-bytes-error.rs:7:19 + --> $DIR/concat-bytes-error.rs:24:19 | LL | concat_bytes!(2.8); | ^^^ error: cannot concatenate numeric literals - --> $DIR/concat-bytes-error.rs:8:19 + --> $DIR/concat-bytes-error.rs:25:19 | LL | concat_bytes!(300); | ^^^ help: try wrapping the number in an array: `[300]` error: cannot concatenate character literals - --> $DIR/concat-bytes-error.rs:9:19 + --> $DIR/concat-bytes-error.rs:27:19 | LL | concat_bytes!('a'); | ^^^ help: try using a byte character: `b'a'` error: cannot concatenate boolean literals - --> $DIR/concat-bytes-error.rs:10:19 + --> $DIR/concat-bytes-error.rs:29:19 | LL | concat_bytes!(true, false); | ^^^^ error: cannot concatenate numeric literals - --> $DIR/concat-bytes-error.rs:11:19 + --> $DIR/concat-bytes-error.rs:30:19 | LL | concat_bytes!(42, b"va", b'l'); | ^^ help: try wrapping the number in an array: `[42]` error: cannot concatenate numeric literals - --> $DIR/concat-bytes-error.rs:12:19 + --> $DIR/concat-bytes-error.rs:32:19 | LL | concat_bytes!(42, b"va", b'l', [1, 2]); | ^^ help: try wrapping the number in an array: `[42]` error: cannot concatenate string literals - --> $DIR/concat-bytes-error.rs:14:9 + --> $DIR/concat-bytes-error.rs:37:9 | LL | "hi", | ^^^^ error: cannot concatenate character literals - --> $DIR/concat-bytes-error.rs:17:9 + --> $DIR/concat-bytes-error.rs:40:9 | LL | 'a', | ^^^ help: try using a byte character: `b'a'` error: cannot concatenate boolean literals - --> $DIR/concat-bytes-error.rs:20:9 + --> $DIR/concat-bytes-error.rs:44:9 | LL | true, | ^^^^ error: cannot concatenate boolean literals - --> $DIR/concat-bytes-error.rs:23:9 + --> $DIR/concat-bytes-error.rs:47:9 | LL | false, | ^^^^^ error: cannot concatenate float literals - --> $DIR/concat-bytes-error.rs:26:9 + --> $DIR/concat-bytes-error.rs:50:9 | LL | 2.6, | ^^^ error: numeric literal is out of bounds - --> $DIR/concat-bytes-error.rs:29:9 + --> $DIR/concat-bytes-error.rs:53:9 | LL | 265, | ^^^ error: expected a byte literal - --> $DIR/concat-bytes-error.rs:32:9 + --> $DIR/concat-bytes-error.rs:56:9 | LL | -33, | ^^^ @@ -101,7 +142,7 @@ LL | -33, = note: only byte literals (like `b"foo"`, `b's'` and `[3, 4, 5]`) can be passed to `concat_bytes!()` error: cannot concatenate doubly nested array - --> $DIR/concat-bytes-error.rs:35:9 + --> $DIR/concat-bytes-error.rs:59:9 | LL | b"hi!", | ^^^^^^ @@ -110,43 +151,43 @@ LL | b"hi!", = help: try flattening the array error: cannot concatenate doubly nested array - --> $DIR/concat-bytes-error.rs:38:9 + --> $DIR/concat-bytes-error.rs:62:9 | LL | [5, 6, 7], | ^^^^^^^^^ error: cannot concatenate numeric literals - --> $DIR/concat-bytes-error.rs:40:19 + --> $DIR/concat-bytes-error.rs:64:19 | LL | concat_bytes!(5u16); | ^^^^ help: try wrapping the number in an array: `[5u16]` error: numeric literal is not a `u8` - --> $DIR/concat-bytes-error.rs:41:20 + --> $DIR/concat-bytes-error.rs:66:20 | LL | concat_bytes!([5u16]); | ^^^^ error: repeat count is not a positive number - --> $DIR/concat-bytes-error.rs:42:23 + --> $DIR/concat-bytes-error.rs:67:23 | LL | concat_bytes!([3; ()]); | ^^ error: repeat count is not a positive number - --> $DIR/concat-bytes-error.rs:43:23 + --> $DIR/concat-bytes-error.rs:68:23 | LL | concat_bytes!([3; -2]); | ^^ error: repeat count is not a positive number - --> $DIR/concat-bytes-error.rs:44:25 + --> $DIR/concat-bytes-error.rs:69:25 | LL | concat_bytes!([pie; -2]); | ^^ error: expected a byte literal - --> $DIR/concat-bytes-error.rs:45:20 + --> $DIR/concat-bytes-error.rs:70:20 | LL | concat_bytes!([pie; 2]); | ^^^ @@ -154,28 +195,28 @@ LL | concat_bytes!([pie; 2]); = note: only byte literals (like `b"foo"`, `b's'` and `[3, 4, 5]`) can be passed to `concat_bytes!()` error: cannot concatenate float literals - --> $DIR/concat-bytes-error.rs:46:20 + --> $DIR/concat-bytes-error.rs:71:20 | LL | concat_bytes!([2.2; 0]); | ^^^ error: repeat count is not a positive number - --> $DIR/concat-bytes-error.rs:47:25 + --> $DIR/concat-bytes-error.rs:72:25 | LL | concat_bytes!([5.5; ()]); | ^^ error: cannot concatenate doubly nested array - --> $DIR/concat-bytes-error.rs:48:20 + --> $DIR/concat-bytes-error.rs:73:20 | LL | concat_bytes!([[1, 2, 3]; 3]); | ^^^^^^^^^ error: cannot concatenate doubly nested array - --> $DIR/concat-bytes-error.rs:49:20 + --> $DIR/concat-bytes-error.rs:74:20 | LL | concat_bytes!([[42; 2]; 3]); | ^^^^^^^ -error: aborting due to 28 previous errors +error: aborting due to 33 previous errors diff --git a/tests/ui/macros/issue-118048.rs b/tests/ui/macros/issue-118048.rs index 15a834fa2df4..3b3ab3b4fc93 100644 --- a/tests/ui/macros/issue-118048.rs +++ b/tests/ui/macros/issue-118048.rs @@ -6,5 +6,6 @@ macro_rules! foo { foo!(_); //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions +//~| ERROR the placeholder `_` is not allowed within types on item signatures for functions fn main() {} diff --git a/tests/ui/macros/issue-118048.stderr b/tests/ui/macros/issue-118048.stderr index 4dc5ef71fec6..f5468b341bce 100644 --- a/tests/ui/macros/issue-118048.stderr +++ b/tests/ui/macros/issue-118048.stderr @@ -2,20 +2,16 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures --> $DIR/issue-118048.rs:7:6 | LL | foo!(_); - | ^ - | | - | not allowed in type signatures - | not allowed in type signatures - | -help: use type parameters instead - | -LL ~ fn foo(_: $ty, _: $ty) {} -LL | } -LL | } -LL | -LL ~ foo!(T); - | + | ^ not allowed in type signatures -error: aborting due to 1 previous error +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/issue-118048.rs:7:6 + | +LL | foo!(_); + | ^ not allowed in type signatures + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error: aborting due to 2 previous errors For more information about this error, try `rustc --explain E0121`. diff --git a/tests/ui/macros/macro-comma-support-rpass.rs b/tests/ui/macros/macro-comma-support-rpass.rs index 5a4bac70b1ce..ef6c1ff6fd06 100644 --- a/tests/ui/macros/macro-comma-support-rpass.rs +++ b/tests/ui/macros/macro-comma-support-rpass.rs @@ -15,7 +15,6 @@ #![cfg_attr(core, no_std)] #![allow(deprecated)] // for deprecated `try!()` macro -#![feature(concat_idents)] #[cfg(std)] use std::fmt; #[cfg(core)] use core::fmt; @@ -79,17 +78,6 @@ fn concat() { let _ = concat!("hello", " world",); } -#[test] -fn concat_idents() { - fn foo() {} - fn foobar() {} - - concat_idents!(foo)(); - concat_idents!(foo,)(); - concat_idents!(foo, bar)(); - concat_idents!(foo, bar,)(); -} - #[test] fn debug_assert() { debug_assert!(true); diff --git a/tests/ui/macros/macro-metavar-expr-concat/empty-input.rs b/tests/ui/macros/macro-metavar-expr-concat/empty-input.rs new file mode 100644 index 000000000000..caad63c5f6b0 --- /dev/null +++ b/tests/ui/macros/macro-metavar-expr-concat/empty-input.rs @@ -0,0 +1,12 @@ +// Issue 50403 +// Ensure that `concat` can't create empty identifiers +// FIXME(macro_metavar_expr_concat): this error message could be improved + +macro_rules! empty { + () => { ${concat()} } //~ ERROR expected identifier or string literal + //~^ERROR expected expression +} + +fn main() { + let x = empty!(); +} diff --git a/tests/ui/macros/macro-metavar-expr-concat/empty-input.stderr b/tests/ui/macros/macro-metavar-expr-concat/empty-input.stderr new file mode 100644 index 000000000000..e95032dd2478 --- /dev/null +++ b/tests/ui/macros/macro-metavar-expr-concat/empty-input.stderr @@ -0,0 +1,19 @@ +error: expected identifier or string literal + --> $DIR/empty-input.rs:6:14 + | +LL | () => { ${concat()} } + | ^^^^^^^^^^ + +error: expected expression, found `$` + --> $DIR/empty-input.rs:6:13 + | +LL | () => { ${concat()} } + | ^ expected expression +... +LL | let x = empty!(); + | -------- in this macro invocation + | + = note: this error originates in the macro `empty` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: aborting due to 2 previous errors + diff --git a/tests/ui/macros/macro-reexport-removed.rs b/tests/ui/macros/macro-reexport-removed.rs index c1267f14cd86..4a054686d776 100644 --- a/tests/ui/macros/macro-reexport-removed.rs +++ b/tests/ui/macros/macro-reexport-removed.rs @@ -1,5 +1,4 @@ //@ aux-build:two_macros.rs -//@ normalize-stderr: "you are using [0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9]+)?( \([^)]*\))?" -> "you are using $$RUSTC_VERSION" #![feature(macro_reexport)] //~ ERROR feature has been removed diff --git a/tests/ui/macros/macro-reexport-removed.stderr b/tests/ui/macros/macro-reexport-removed.stderr index d4940eeb7755..8130fe0c4bda 100644 --- a/tests/ui/macros/macro-reexport-removed.stderr +++ b/tests/ui/macros/macro-reexport-removed.stderr @@ -1,14 +1,14 @@ error[E0557]: feature has been removed - --> $DIR/macro-reexport-removed.rs:4:12 + --> $DIR/macro-reexport-removed.rs:3:12 | LL | #![feature(macro_reexport)] | ^^^^^^^^^^^^^^ feature has been removed | - = note: removed in 1.0.0 (you are using $RUSTC_VERSION); see for more information + = note: removed in 1.0.0; see for more information = note: subsumed by `pub use` error: cannot find attribute `macro_reexport` in this scope - --> $DIR/macro-reexport-removed.rs:6:3 + --> $DIR/macro-reexport-removed.rs:5:3 | LL | #[macro_reexport(macro_one)] | ^^^^^^^^^^^^^^ help: a built-in attribute with a similar name exists: `macro_export` diff --git a/tests/ui/macros/macro-span-issue-116502.rs b/tests/ui/macros/macro-span-issue-116502.rs index 4c254289ee68..b5ae383efca0 100644 --- a/tests/ui/macros/macro-span-issue-116502.rs +++ b/tests/ui/macros/macro-span-issue-116502.rs @@ -5,6 +5,8 @@ fn bug() { macro_rules! m { () => { _ //~ ERROR the placeholder `_` is not allowed within types on item signatures for structs + //~^ ERROR the placeholder `_` is not allowed within types on item signatures for structs + //~| ERROR the placeholder `_` is not allowed within types on item signatures for structs }; } struct S(m!(), T) diff --git a/tests/ui/macros/macro-span-issue-116502.stderr b/tests/ui/macros/macro-span-issue-116502.stderr index 2a581f7031b9..68f8874f5d62 100644 --- a/tests/ui/macros/macro-span-issue-116502.stderr +++ b/tests/ui/macros/macro-span-issue-116502.stderr @@ -2,22 +2,35 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures --> $DIR/macro-span-issue-116502.rs:7:13 | LL | _ - | ^ - | | - | not allowed in type signatures - | not allowed in type signatures - | not allowed in type signatures + | ^ not allowed in type signatures ... -LL | struct S(m!(), T) - | ---- ---- in this macro invocation - | | - | in this macro invocation -LL | where LL | T: Trait; | ---- in this macro invocation | = note: this error originates in the macro `m` (in Nightly builds, run with -Z macro-backtrace for more info) -error: aborting due to 1 previous error +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/macro-span-issue-116502.rs:7:13 + | +LL | _ + | ^ not allowed in type signatures +... +LL | struct S(m!(), T) + | ---- in this macro invocation + | + = note: this error originates in the macro `m` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/macro-span-issue-116502.rs:7:13 + | +LL | _ + | ^ not allowed in type signatures +... +LL | struct S(m!(), T) + | ---- in this macro invocation + | + = note: this error originates in the macro `m` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: aborting due to 3 previous errors For more information about this error, try `rustc --explain E0121`. diff --git a/tests/ui/macros/macros-nonfatal-errors.rs b/tests/ui/macros/macros-nonfatal-errors.rs index 091d64ea5d9e..1349d7415105 100644 --- a/tests/ui/macros/macros-nonfatal-errors.rs +++ b/tests/ui/macros/macros-nonfatal-errors.rs @@ -3,9 +3,8 @@ // test that errors in a (selection) of macros don't kill compilation // immediately, so that we get more errors listed at a time. -#![feature(trace_macros, concat_idents)] +#![feature(trace_macros)] #![feature(stmt_expr_attributes)] -#![expect(deprecated)] // concat_idents is deprecated use std::arch::asm; @@ -105,8 +104,6 @@ fn main() { asm!(invalid); //~ ERROR llvm_asm!(invalid); //~ ERROR - concat_idents!("not", "idents"); //~ ERROR - option_env!(invalid); //~ ERROR env!(invalid); //~ ERROR env!(foo, abr, baz); //~ ERROR diff --git a/tests/ui/macros/macros-nonfatal-errors.stderr b/tests/ui/macros/macros-nonfatal-errors.stderr index 2f990cb24e2b..bc34bd1c8ec8 100644 --- a/tests/ui/macros/macros-nonfatal-errors.stderr +++ b/tests/ui/macros/macros-nonfatal-errors.stderr @@ -1,5 +1,5 @@ error: the `#[default]` attribute may only be used on unit enum variants - --> $DIR/macros-nonfatal-errors.rs:14:5 + --> $DIR/macros-nonfatal-errors.rs:13:5 | LL | #[default] | ^^^^^^^^^^ @@ -7,7 +7,7 @@ LL | #[default] = help: consider a manual implementation of `Default` error: the `#[default]` attribute may only be used on unit enum variants - --> $DIR/macros-nonfatal-errors.rs:19:36 + --> $DIR/macros-nonfatal-errors.rs:18:36 | LL | struct DefaultInnerAttrTupleStruct(#[default] ()); | ^^^^^^^^^^ @@ -15,7 +15,7 @@ LL | struct DefaultInnerAttrTupleStruct(#[default] ()); = help: consider a manual implementation of `Default` error: the `#[default]` attribute may only be used on unit enum variants - --> $DIR/macros-nonfatal-errors.rs:23:1 + --> $DIR/macros-nonfatal-errors.rs:22:1 | LL | #[default] | ^^^^^^^^^^ @@ -23,7 +23,7 @@ LL | #[default] = help: consider a manual implementation of `Default` error: the `#[default]` attribute may only be used on unit enum variants - --> $DIR/macros-nonfatal-errors.rs:27:1 + --> $DIR/macros-nonfatal-errors.rs:26:1 | LL | #[default] | ^^^^^^^^^^ @@ -31,7 +31,7 @@ LL | #[default] = help: consider a manual implementation of `Default` error: the `#[default]` attribute may only be used on unit enum variants - --> $DIR/macros-nonfatal-errors.rs:37:11 + --> $DIR/macros-nonfatal-errors.rs:36:11 | LL | Foo = #[default] 0, | ^^^^^^^^^^ @@ -39,7 +39,7 @@ LL | Foo = #[default] 0, = help: consider a manual implementation of `Default` error: the `#[default]` attribute may only be used on unit enum variants - --> $DIR/macros-nonfatal-errors.rs:38:14 + --> $DIR/macros-nonfatal-errors.rs:37:14 | LL | Bar([u8; #[default] 1]), | ^^^^^^^^^^ @@ -47,7 +47,7 @@ LL | Bar([u8; #[default] 1]), = help: consider a manual implementation of `Default` error[E0665]: `#[derive(Default)]` on enum with no `#[default]` - --> $DIR/macros-nonfatal-errors.rs:43:10 + --> $DIR/macros-nonfatal-errors.rs:42:10 | LL | #[derive(Default)] | ^^^^^^^ @@ -67,7 +67,7 @@ LL | #[default] Bar, | ++++++++++ error[E0665]: `#[derive(Default)]` on enum with no `#[default]` - --> $DIR/macros-nonfatal-errors.rs:49:10 + --> $DIR/macros-nonfatal-errors.rs:48:10 | LL | #[derive(Default)] | ^^^^^^^ @@ -78,7 +78,7 @@ LL | | } | |_- this enum needs a unit variant marked with `#[default]` error: multiple declared defaults - --> $DIR/macros-nonfatal-errors.rs:55:10 + --> $DIR/macros-nonfatal-errors.rs:54:10 | LL | #[derive(Default)] | ^^^^^^^ @@ -95,7 +95,7 @@ LL | Baz, = note: only one variant can be default error: `#[default]` attribute does not accept a value - --> $DIR/macros-nonfatal-errors.rs:67:5 + --> $DIR/macros-nonfatal-errors.rs:66:5 | LL | #[default = 1] | ^^^^^^^^^^^^^^ @@ -103,7 +103,7 @@ LL | #[default = 1] = help: try using `#[default]` error: multiple `#[default]` attributes - --> $DIR/macros-nonfatal-errors.rs:75:5 + --> $DIR/macros-nonfatal-errors.rs:74:5 | LL | #[default] | ---------- `#[default]` used here @@ -114,13 +114,13 @@ LL | Foo, | = note: only one `#[default]` attribute is needed help: try removing this - --> $DIR/macros-nonfatal-errors.rs:74:5 + --> $DIR/macros-nonfatal-errors.rs:73:5 | LL | #[default] | ^^^^^^^^^^ error: multiple `#[default]` attributes - --> $DIR/macros-nonfatal-errors.rs:85:5 + --> $DIR/macros-nonfatal-errors.rs:84:5 | LL | #[default] | ---------- `#[default]` used here @@ -132,7 +132,7 @@ LL | Foo, | = note: only one `#[default]` attribute is needed help: try removing these - --> $DIR/macros-nonfatal-errors.rs:82:5 + --> $DIR/macros-nonfatal-errors.rs:81:5 | LL | #[default] | ^^^^^^^^^^ @@ -142,7 +142,7 @@ LL | #[default] | ^^^^^^^^^^ error: the `#[default]` attribute may only be used on unit enum variants - --> $DIR/macros-nonfatal-errors.rs:92:5 + --> $DIR/macros-nonfatal-errors.rs:91:5 | LL | Foo {}, | ^^^ @@ -150,7 +150,7 @@ LL | Foo {}, = help: consider a manual implementation of `Default` error: default variant must be exhaustive - --> $DIR/macros-nonfatal-errors.rs:100:5 + --> $DIR/macros-nonfatal-errors.rs:99:5 | LL | #[non_exhaustive] | ----------------- declared `#[non_exhaustive]` here @@ -160,37 +160,31 @@ LL | Foo, = help: consider a manual implementation of `Default` error: asm template must be a string literal - --> $DIR/macros-nonfatal-errors.rs:105:10 + --> $DIR/macros-nonfatal-errors.rs:104:10 | LL | asm!(invalid); | ^^^^^^^ -error: `concat_idents!()` requires ident args - --> $DIR/macros-nonfatal-errors.rs:108:5 - | -LL | concat_idents!("not", "idents"); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - error: argument must be a string literal - --> $DIR/macros-nonfatal-errors.rs:110:17 + --> $DIR/macros-nonfatal-errors.rs:107:17 | LL | option_env!(invalid); | ^^^^^^^ error: expected string literal - --> $DIR/macros-nonfatal-errors.rs:111:10 + --> $DIR/macros-nonfatal-errors.rs:108:10 | LL | env!(invalid); | ^^^^^^^ error: `env!()` takes 1 or 2 arguments - --> $DIR/macros-nonfatal-errors.rs:112:5 + --> $DIR/macros-nonfatal-errors.rs:109:5 | LL | env!(foo, abr, baz); | ^^^^^^^^^^^^^^^^^^^ error: environment variable `RUST_HOPEFULLY_THIS_DOESNT_EXIST` not defined at compile time - --> $DIR/macros-nonfatal-errors.rs:113:5 + --> $DIR/macros-nonfatal-errors.rs:110:5 | LL | env!("RUST_HOPEFULLY_THIS_DOESNT_EXIST"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -198,7 +192,7 @@ LL | env!("RUST_HOPEFULLY_THIS_DOESNT_EXIST"); = help: use `std::env::var("RUST_HOPEFULLY_THIS_DOESNT_EXIST")` to read the variable at run time error: format argument must be a string literal - --> $DIR/macros-nonfatal-errors.rs:115:13 + --> $DIR/macros-nonfatal-errors.rs:112:13 | LL | format!(invalid); | ^^^^^^^ @@ -209,43 +203,43 @@ LL | format!("{}", invalid); | +++++ error: argument must be a string literal - --> $DIR/macros-nonfatal-errors.rs:117:14 + --> $DIR/macros-nonfatal-errors.rs:114:14 | LL | include!(invalid); | ^^^^^^^ error: argument must be a string literal - --> $DIR/macros-nonfatal-errors.rs:119:18 + --> $DIR/macros-nonfatal-errors.rs:116:18 | LL | include_str!(invalid); | ^^^^^^^ error: couldn't read `$DIR/i'd be quite surprised if a file with this name existed`: $FILE_NOT_FOUND_MSG - --> $DIR/macros-nonfatal-errors.rs:120:5 + --> $DIR/macros-nonfatal-errors.rs:117:5 | LL | include_str!("i'd be quite surprised if a file with this name existed"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: argument must be a string literal - --> $DIR/macros-nonfatal-errors.rs:121:20 + --> $DIR/macros-nonfatal-errors.rs:118:20 | LL | include_bytes!(invalid); | ^^^^^^^ error: couldn't read `$DIR/i'd be quite surprised if a file with this name existed`: $FILE_NOT_FOUND_MSG - --> $DIR/macros-nonfatal-errors.rs:122:5 + --> $DIR/macros-nonfatal-errors.rs:119:5 | LL | include_bytes!("i'd be quite surprised if a file with this name existed"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: trace_macros! accepts only `true` or `false` - --> $DIR/macros-nonfatal-errors.rs:124:5 + --> $DIR/macros-nonfatal-errors.rs:121:5 | LL | trace_macros!(invalid); | ^^^^^^^^^^^^^^^^^^^^^^ error: default variant must be exhaustive - --> $DIR/macros-nonfatal-errors.rs:134:9 + --> $DIR/macros-nonfatal-errors.rs:131:9 | LL | #[non_exhaustive] | ----------------- declared `#[non_exhaustive]` here @@ -255,11 +249,11 @@ LL | Foo, = help: consider a manual implementation of `Default` error: cannot find macro `llvm_asm` in this scope - --> $DIR/macros-nonfatal-errors.rs:106:5 + --> $DIR/macros-nonfatal-errors.rs:105:5 | LL | llvm_asm!(invalid); | ^^^^^^^^ -error: aborting due to 29 previous errors +error: aborting due to 28 previous errors For more information about this error, try `rustc --explain E0665`. diff --git a/tests/ui/macros/missing-writer-issue-139830.rs b/tests/ui/macros/missing-writer-issue-139830.rs new file mode 100644 index 000000000000..da4608776c31 --- /dev/null +++ b/tests/ui/macros/missing-writer-issue-139830.rs @@ -0,0 +1,9 @@ +// Make sure we don't suggest a method change inside the `write!` macro. +// +// See + +fn main() { + let mut buf = String::new(); + let _ = write!(buf, "foo"); + //~^ ERROR cannot write into `String` +} diff --git a/tests/ui/macros/missing-writer-issue-139830.stderr b/tests/ui/macros/missing-writer-issue-139830.stderr new file mode 100644 index 000000000000..34dd61328e00 --- /dev/null +++ b/tests/ui/macros/missing-writer-issue-139830.stderr @@ -0,0 +1,23 @@ +error[E0599]: cannot write into `String` + --> $DIR/missing-writer-issue-139830.rs:7:20 + | +LL | let _ = write!(buf, "foo"); + | ^^^ + --> $SRC_DIR/core/src/fmt/mod.rs:LL:COL + | + = note: the method is available for `String` here + | +note: must implement `io::Write`, `fmt::Write`, or have a `write_fmt` method + --> $DIR/missing-writer-issue-139830.rs:7:20 + | +LL | let _ = write!(buf, "foo"); + | ^^^ + = help: items from traits can only be used if the trait is in scope +help: trait `Write` which provides `write_fmt` is implemented but not in scope; perhaps you want to import it + | +LL + use std::fmt::Write; + | + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0599`. diff --git a/tests/ui/macros/stringify.rs b/tests/ui/macros/stringify.rs index 3f3d9252adbe..c858051a7ebf 100644 --- a/tests/ui/macros/stringify.rs +++ b/tests/ui/macros/stringify.rs @@ -483,7 +483,6 @@ fn test_item() { c1!(item, [ impl Struct {} ], "impl Struct {}"); c1!(item, [ pub impl Trait for Struct {} ], "pub impl Trait for Struct {}"); c1!(item, [ impl const Trait for T {} ], "impl const Trait for T {}"); - c1!(item, [ impl ~const Struct {} ], "impl ~const Struct {}"); // ItemKind::MacCall c1!(item, [ mac!(); ], "mac!();"); @@ -730,7 +729,7 @@ fn test_ty() { c1!(ty, [ dyn Send + 'a ], "dyn Send + 'a"); c1!(ty, [ dyn 'a + Send ], "dyn 'a + Send"); c1!(ty, [ dyn ?Sized ], "dyn ?Sized"); - c1!(ty, [ dyn ~const Clone ], "dyn ~const Clone"); + c1!(ty, [ dyn [const] Clone ], "dyn [const] Clone"); c1!(ty, [ dyn for<'a> Send ], "dyn for<'a> Send"); // TyKind::ImplTrait @@ -738,7 +737,7 @@ fn test_ty() { c1!(ty, [ impl Send + 'a ], "impl Send + 'a"); c1!(ty, [ impl 'a + Send ], "impl 'a + Send"); c1!(ty, [ impl ?Sized ], "impl ?Sized"); - c1!(ty, [ impl ~const Clone ], "impl ~const Clone"); + c1!(ty, [ impl [const] Clone ], "impl [const] Clone"); c1!(ty, [ impl for<'a> Send ], "impl for<'a> Send"); // TyKind::Paren diff --git a/tests/ui/methods/dont-ice-on-object-lookup-w-error-region.stderr b/tests/ui/methods/dont-ice-on-object-lookup-w-error-region.stderr index 2c33941be433..00267ce359a1 100644 --- a/tests/ui/methods/dont-ice-on-object-lookup-w-error-region.stderr +++ b/tests/ui/methods/dont-ice-on-object-lookup-w-error-region.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'missing` --> $DIR/dont-ice-on-object-lookup-w-error-region.rs:6:20 | LL | fn project(x: Pin<&'missing mut dyn Future>) { - | - ^^^^^^^^ undeclared lifetime - | | - | help: consider introducing lifetime `'missing` here: `<'missing>` + | ^^^^^^^^ undeclared lifetime + | +help: consider introducing lifetime `'missing` here + | +LL | fn project<'missing>(x: Pin<&'missing mut dyn Future>) { + | ++++++++++ error: aborting due to 1 previous error diff --git a/tests/ui/methods/method-call-lifetime-args-unresolved.stderr b/tests/ui/methods/method-call-lifetime-args-unresolved.stderr index c72e7e0cdc37..d3bd74a49fb3 100644 --- a/tests/ui/methods/method-call-lifetime-args-unresolved.stderr +++ b/tests/ui/methods/method-call-lifetime-args-unresolved.stderr @@ -1,10 +1,13 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/method-call-lifetime-args-unresolved.rs:2:15 | -LL | fn main() { - | - help: consider introducing lifetime `'a` here: `<'a>` LL | 0.clone::<'a>(); | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn main<'a>() { + | ++++ warning: cannot specify lifetime arguments explicitly if late bound lifetime parameters are present --> $DIR/method-call-lifetime-args-unresolved.rs:2:15 diff --git a/tests/ui/methods/suggest-convert-ptr-to-ref.stderr b/tests/ui/methods/suggest-convert-ptr-to-ref.stderr index 7d52b20121e9..8cb97ea458bc 100644 --- a/tests/ui/methods/suggest-convert-ptr-to-ref.stderr +++ b/tests/ui/methods/suggest-convert-ptr-to-ref.stderr @@ -2,7 +2,7 @@ error[E0599]: `*const u8` doesn't implement `std::fmt::Display` --> $DIR/suggest-convert-ptr-to-ref.rs:5:22 | LL | println!("{}", z.to_string()); - | ^^^^^^^^^ `*const u8` cannot be formatted with the default formatter + | ^^^^^^^^^ method cannot be called on `*const u8` due to unsatisfied trait bounds | note: the method `to_string` exists on the type `&u8` --> $SRC_DIR/alloc/src/string.rs:LL:COL @@ -11,13 +11,12 @@ note: the method `to_string` exists on the type `&u8` = note: the following trait bounds were not satisfied: `*const u8: std::fmt::Display` which is required by `*const u8: ToString` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead error[E0599]: `*mut u8` doesn't implement `std::fmt::Display` --> $DIR/suggest-convert-ptr-to-ref.rs:8:22 | LL | println!("{}", t.to_string()); - | ^^^^^^^^^ `*mut u8` cannot be formatted with the default formatter + | ^^^^^^^^^ method cannot be called on `*mut u8` due to unsatisfied trait bounds | note: the method `to_string` exists on the type `&&mut u8` --> $SRC_DIR/alloc/src/string.rs:LL:COL @@ -26,7 +25,6 @@ note: the method `to_string` exists on the type `&&mut u8` = note: the following trait bounds were not satisfied: `*mut u8: std::fmt::Display` which is required by `*mut u8: ToString` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead error[E0599]: no method named `make_ascii_lowercase` found for raw pointer `*mut u8` in the current scope --> $DIR/suggest-convert-ptr-to-ref.rs:9:7 diff --git a/tests/crashes/131451.rs b/tests/ui/mir/unreachable-loop-jump-threading.rs similarity index 74% rename from tests/crashes/131451.rs rename to tests/ui/mir/unreachable-loop-jump-threading.rs index cd5b44bad8a5..8403906bb5c0 100644 --- a/tests/crashes/131451.rs +++ b/tests/ui/mir/unreachable-loop-jump-threading.rs @@ -1,9 +1,10 @@ -//@ known-bug: #131451 +//@ build-pass //@ needs-rustc-debug-assertions //@ compile-flags: -Zmir-enable-passes=+GVN -Zmir-enable-passes=+JumpThreading --crate-type=lib pub fn fun(terminate: bool) { while true {} + //~^ WARN denote infinite loops with `loop { ... }` while !terminate {} } diff --git a/tests/ui/mir/unreachable-loop-jump-threading.stderr b/tests/ui/mir/unreachable-loop-jump-threading.stderr new file mode 100644 index 000000000000..21b174c80218 --- /dev/null +++ b/tests/ui/mir/unreachable-loop-jump-threading.stderr @@ -0,0 +1,10 @@ +warning: denote infinite loops with `loop { ... }` + --> $DIR/unreachable-loop-jump-threading.rs:6:5 + | +LL | while true {} + | ^^^^^^^^^^ help: use `loop` + | + = note: `#[warn(while_true)]` on by default + +warning: 1 warning emitted + diff --git a/tests/ui/mismatched_types/method-help-unsatisfied-bound.stderr b/tests/ui/mismatched_types/method-help-unsatisfied-bound.stderr index be3a3e2abf14..23bc9dc0f844 100644 --- a/tests/ui/mismatched_types/method-help-unsatisfied-bound.stderr +++ b/tests/ui/mismatched_types/method-help-unsatisfied-bound.stderr @@ -2,9 +2,8 @@ error[E0277]: `Foo` doesn't implement `Debug` --> $DIR/method-help-unsatisfied-bound.rs:5:7 | LL | a.unwrap(); - | ^^^^^^ `Foo` cannot be formatted using `{:?}` + | ^^^^^^ the trait `Debug` is not implemented for `Foo` | - = help: the trait `Debug` is not implemented for `Foo` = note: add `#[derive(Debug)]` to `Foo` or manually `impl Debug for Foo` note: required by a bound in `Result::::unwrap` --> $SRC_DIR/core/src/result.rs:LL:COL diff --git a/tests/ui/mismatched_types/transforming-option-ref-issue-127545.rs b/tests/ui/mismatched_types/transforming-option-ref-issue-127545.rs index f589e88f68e4..0632b822c55c 100644 --- a/tests/ui/mismatched_types/transforming-option-ref-issue-127545.rs +++ b/tests/ui/mismatched_types/transforming-option-ref-issue-127545.rs @@ -2,17 +2,17 @@ #![crate_type = "lib"] pub fn foo(arg: Option<&Vec>) -> Option<&[i32]> { - arg //~ ERROR 5:5: 5:8: mismatched types [E0308] + arg //~ ERROR mismatched types [E0308] } pub fn bar(arg: Option<&Vec>) -> &[i32] { - arg.unwrap_or(&[]) //~ ERROR 9:19: 9:22: mismatched types [E0308] + arg.unwrap_or(&[]) //~ ERROR mismatched types [E0308] } pub fn barzz<'a>(arg: Option<&'a Vec>, v: &'a [i32]) -> &'a [i32] { - arg.unwrap_or(v) //~ ERROR 13:19: 13:20: mismatched types [E0308] + arg.unwrap_or(v) //~ ERROR mismatched types [E0308] } pub fn convert_result(arg: Result<&Vec, ()>) -> &[i32] { - arg.unwrap_or(&[]) //~ ERROR 17:19: 17:22: mismatched types [E0308] + arg.unwrap_or(&[]) //~ ERROR mismatched types [E0308] } diff --git a/tests/ui/modules/issue-107649.stderr b/tests/ui/modules/issue-107649.stderr index 0d203c1aacba..802ac669a10e 100644 --- a/tests/ui/modules/issue-107649.stderr +++ b/tests/ui/modules/issue-107649.stderr @@ -2,11 +2,10 @@ error[E0277]: `Dummy` doesn't implement `Debug` --> $DIR/issue-107649.rs:105:5 | 105 | dbg!(lib::Dummy); - | ^^^^^^^^^^^^^^^^ `Dummy` cannot be formatted using `{:?}` + | ^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `Dummy` | - = help: the trait `Debug` is not implemented for `Dummy` = note: add `#[derive(Debug)]` to `Dummy` or manually `impl Debug for Dummy` - = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `dbg` (in Nightly builds, run with -Z macro-backtrace for more info) + = note: this error originates in the macro `dbg` (in Nightly builds, run with -Z macro-backtrace for more info) help: consider annotating `Dummy` with `#[derive(Debug)]` --> $DIR/auxiliary/dummy_lib.rs:2:1 | diff --git a/tests/ui/modules/module_suggestion_when_module_not_found/submodule/mod.rs b/tests/ui/modules/module_suggestion_when_module_not_found/submodule/mod.rs new file mode 100644 index 000000000000..cb924172efea --- /dev/null +++ b/tests/ui/modules/module_suggestion_when_module_not_found/submodule/mod.rs @@ -0,0 +1 @@ +//@ ignore-auxiliary diff --git a/tests/ui/modules/module_suggestion_when_module_not_found/submodule2.rs b/tests/ui/modules/module_suggestion_when_module_not_found/submodule2.rs new file mode 100644 index 000000000000..cb924172efea --- /dev/null +++ b/tests/ui/modules/module_suggestion_when_module_not_found/submodule2.rs @@ -0,0 +1 @@ +//@ ignore-auxiliary diff --git a/tests/ui/modules/module_suggestion_when_module_not_found/success.rs b/tests/ui/modules/module_suggestion_when_module_not_found/success.rs new file mode 100644 index 000000000000..888e6ab3f193 --- /dev/null +++ b/tests/ui/modules/module_suggestion_when_module_not_found/success.rs @@ -0,0 +1,4 @@ +//@ ignore-auxiliary + +use submodule3::ferris; // these modules are unresolved. +use submodule4::error; diff --git a/tests/ui/modules/module_suggestion_when_module_not_found/success/compiletest-ignore-dir b/tests/ui/modules/module_suggestion_when_module_not_found/success/compiletest-ignore-dir new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/ui/modules/module_suggestion_when_module_not_found/success/submodule3/mod.rs b/tests/ui/modules/module_suggestion_when_module_not_found/success/submodule3/mod.rs new file mode 100644 index 000000000000..8337712ea57f --- /dev/null +++ b/tests/ui/modules/module_suggestion_when_module_not_found/success/submodule3/mod.rs @@ -0,0 +1 @@ +// diff --git a/tests/ui/modules/module_suggestion_when_module_not_found/success/submodule4.rs b/tests/ui/modules/module_suggestion_when_module_not_found/success/submodule4.rs new file mode 100644 index 000000000000..8337712ea57f --- /dev/null +++ b/tests/ui/modules/module_suggestion_when_module_not_found/success/submodule4.rs @@ -0,0 +1 @@ +// diff --git a/tests/ui/modules/module_suggestion_when_module_not_found/suggestion.rs b/tests/ui/modules/module_suggestion_when_module_not_found/suggestion.rs new file mode 100644 index 000000000000..f4c24bff2288 --- /dev/null +++ b/tests/ui/modules/module_suggestion_when_module_not_found/suggestion.rs @@ -0,0 +1,7 @@ +//@ edition:2024 +use submodule::cat; //~ ERROR unresolved import `submodule` +use submodule2::help; //~ ERROR unresolved import `submodule2` +mod success; +fn main() {} +//~? ERROR unresolved import `submodule3` +//~? ERROR unresolved import `submodule4` diff --git a/tests/ui/modules/module_suggestion_when_module_not_found/suggestion.stderr b/tests/ui/modules/module_suggestion_when_module_not_found/suggestion.stderr new file mode 100644 index 000000000000..6375d71c2807 --- /dev/null +++ b/tests/ui/modules/module_suggestion_when_module_not_found/suggestion.stderr @@ -0,0 +1,49 @@ +error[E0432]: unresolved import `submodule` + --> $DIR/suggestion.rs:2:5 + | +LL | use submodule::cat; + | ^^^^^^^^^ use of unresolved module or unlinked crate `submodule` + | +help: to make use of source file $DIR/submodule/mod.rs, use `mod submodule` in this file to declare the module + | +LL + mod submodule; + | + +error[E0432]: unresolved import `submodule2` + --> $DIR/suggestion.rs:3:5 + | +LL | use submodule2::help; + | ^^^^^^^^^^ use of unresolved module or unlinked crate `submodule2` + | +help: to make use of source file $DIR/submodule2.rs, use `mod submodule2` in this file to declare the module + | +LL + mod submodule2; + | + +error[E0432]: unresolved import `submodule3` + --> $DIR/success.rs:3:5 + | +LL | use submodule3::ferris; // these modules are unresolved. + | ^^^^^^^^^^ use of unresolved module or unlinked crate `submodule3` + | +help: to make use of source file $DIR/success/submodule3/mod.rs, use `mod submodule3` in this file to declare the module + --> $DIR/suggestion.rs:2:1 + | +LL + mod submodule3; + | + +error[E0432]: unresolved import `submodule4` + --> $DIR/success.rs:4:5 + | +LL | use submodule4::error; + | ^^^^^^^^^^ use of unresolved module or unlinked crate `submodule4` + | +help: to make use of source file $DIR/success/submodule4.rs, use `mod submodule4` in this file to declare the module + --> $DIR/suggestion.rs:2:1 + | +LL + mod submodule4; + | + +error: aborting due to 4 previous errors + +For more information about this error, try `rustc --explain E0432`. diff --git a/tests/ui/modules_and_files_visibility/mod_file_disambig.stderr b/tests/ui/modules_and_files_visibility/mod_file_disambig.stderr index f82d613015f5..e71a6de2fb9b 100644 --- a/tests/ui/modules_and_files_visibility/mod_file_disambig.stderr +++ b/tests/ui/modules_and_files_visibility/mod_file_disambig.stderr @@ -12,7 +12,10 @@ error[E0433]: failed to resolve: use of unresolved module or unlinked crate `mod LL | assert_eq!(mod_file_aux::bar(), 10); | ^^^^^^^^^^^^ use of unresolved module or unlinked crate `mod_file_aux` | - = help: you might be missing a crate named `mod_file_aux` +help: to make use of source file $DIR/mod_file_aux.rs, use `mod mod_file_aux` in this file to declare the module + | +LL + mod mod_file_aux; + | error: aborting due to 2 previous errors diff --git a/tests/ui/moves/moves-based-on-type-capture-clause-bad.fixed b/tests/ui/moves/moves-based-on-type-capture-clause-bad.fixed new file mode 100644 index 000000000000..04a183ca96be --- /dev/null +++ b/tests/ui/moves/moves-based-on-type-capture-clause-bad.fixed @@ -0,0 +1,11 @@ +//@ run-rustfix +use std::thread; + +fn main() { + let x = "Hello world!".to_string(); + let value = x.clone(); + thread::spawn(move || { + println!("{}", value); + }); + println!("{}", x); //~ ERROR borrow of moved value +} diff --git a/tests/ui/moves/moves-based-on-type-capture-clause-bad.rs b/tests/ui/moves/moves-based-on-type-capture-clause-bad.rs index 9d7277c1c249..c9a7f2c8ed80 100644 --- a/tests/ui/moves/moves-based-on-type-capture-clause-bad.rs +++ b/tests/ui/moves/moves-based-on-type-capture-clause-bad.rs @@ -1,3 +1,4 @@ +//@ run-rustfix use std::thread; fn main() { diff --git a/tests/ui/moves/moves-based-on-type-capture-clause-bad.stderr b/tests/ui/moves/moves-based-on-type-capture-clause-bad.stderr index c2b9aeab2374..17049fe67318 100644 --- a/tests/ui/moves/moves-based-on-type-capture-clause-bad.stderr +++ b/tests/ui/moves/moves-based-on-type-capture-clause-bad.stderr @@ -1,5 +1,5 @@ error[E0382]: borrow of moved value: `x` - --> $DIR/moves-based-on-type-capture-clause-bad.rs:8:20 + --> $DIR/moves-based-on-type-capture-clause-bad.rs:9:20 | LL | let x = "Hello world!".to_string(); | - move occurs because `x` has type `String`, which does not implement the `Copy` trait @@ -12,6 +12,12 @@ LL | println!("{}", x); | ^ value borrowed here after move | = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) +help: consider cloning the value before moving it into the closure + | +LL ~ let value = x.clone(); +LL ~ thread::spawn(move || { +LL ~ println!("{}", value); + | error: aborting due to 1 previous error diff --git a/tests/ui/no-capture-arc.rs b/tests/ui/moves/no-capture-arc.rs similarity index 100% rename from tests/ui/no-capture-arc.rs rename to tests/ui/moves/no-capture-arc.rs diff --git a/tests/ui/no-capture-arc.stderr b/tests/ui/moves/no-capture-arc.stderr similarity index 79% rename from tests/ui/no-capture-arc.stderr rename to tests/ui/moves/no-capture-arc.stderr index 9c1f5c65066f..6d4a867fa88d 100644 --- a/tests/ui/no-capture-arc.stderr +++ b/tests/ui/moves/no-capture-arc.stderr @@ -13,6 +13,12 @@ LL | assert_eq!((*arc_v)[2], 3); | ^^^^^ value borrowed here after move | = note: borrow occurs due to deref coercion to `Vec` +help: consider cloning the value before moving it into the closure + | +LL ~ let value = arc_v.clone(); +LL ~ thread::spawn(move|| { +LL ~ assert_eq!((*value)[3], 4); + | error: aborting due to 1 previous error diff --git a/tests/ui/moves/no-reuse-move-arc.fixed b/tests/ui/moves/no-reuse-move-arc.fixed new file mode 100644 index 000000000000..a5dac8cc14bf --- /dev/null +++ b/tests/ui/moves/no-reuse-move-arc.fixed @@ -0,0 +1,17 @@ +//@ run-rustfix +use std::sync::Arc; +use std::thread; + +fn main() { + let v = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let arc_v = Arc::new(v); + + let value = arc_v.clone(); + thread::spawn(move|| { + assert_eq!((*value)[3], 4); + }); + + assert_eq!((*arc_v)[2], 3); //~ ERROR borrow of moved value: `arc_v` + + println!("{:?}", *arc_v); +} diff --git a/tests/ui/no-reuse-move-arc.rs b/tests/ui/moves/no-reuse-move-arc.rs similarity index 95% rename from tests/ui/no-reuse-move-arc.rs rename to tests/ui/moves/no-reuse-move-arc.rs index 9c957a4e01b4..0d67aa56489c 100644 --- a/tests/ui/no-reuse-move-arc.rs +++ b/tests/ui/moves/no-reuse-move-arc.rs @@ -1,3 +1,4 @@ +//@ run-rustfix use std::sync::Arc; use std::thread; diff --git a/tests/ui/no-reuse-move-arc.stderr b/tests/ui/moves/no-reuse-move-arc.stderr similarity index 75% rename from tests/ui/no-reuse-move-arc.stderr rename to tests/ui/moves/no-reuse-move-arc.stderr index 61f4837dc0e6..aff979af905e 100644 --- a/tests/ui/no-reuse-move-arc.stderr +++ b/tests/ui/moves/no-reuse-move-arc.stderr @@ -1,5 +1,5 @@ error[E0382]: borrow of moved value: `arc_v` - --> $DIR/no-reuse-move-arc.rs:12:18 + --> $DIR/no-reuse-move-arc.rs:13:18 | LL | let arc_v = Arc::new(v); | ----- move occurs because `arc_v` has type `Arc>`, which does not implement the `Copy` trait @@ -13,6 +13,12 @@ LL | assert_eq!((*arc_v)[2], 3); | ^^^^^ value borrowed here after move | = note: borrow occurs due to deref coercion to `Vec` +help: consider cloning the value before moving it into the closure + | +LL ~ let value = arc_v.clone(); +LL ~ thread::spawn(move|| { +LL ~ assert_eq!((*value)[3], 4); + | error: aborting due to 1 previous error diff --git a/tests/ui/never_type/never-type-fallback-option.rs b/tests/ui/never_type/never-type-fallback-option.rs new file mode 100644 index 000000000000..9c8103aa0a4c --- /dev/null +++ b/tests/ui/never_type/never-type-fallback-option.rs @@ -0,0 +1,22 @@ +//@ run-pass + +#![allow(warnings)] + +//! Tests type inference fallback to `!` (never type) in `Option` context. +//! +//! Regression test for issues: +//! - https://github.com/rust-lang/rust/issues/39808 +//! - https://github.com/rust-lang/rust/issues/39984 +//! +//! Here the type of `c` is `Option`, where `?T` is unconstrained. +//! Because there is data-flow from the `{ return; }` block, which +//! diverges and hence has type `!`, into `c`, we will default `?T` to +//! `!`, and hence this code compiles rather than failing and requiring +//! a type annotation. + +fn main() { + let c = Some({ + return; + }); + c.unwrap(); +} diff --git a/tests/ui/nll/user-annotations/region-error-ice-109072.stderr b/tests/ui/nll/user-annotations/region-error-ice-109072.stderr index d90971bed25b..42551b87f623 100644 --- a/tests/ui/nll/user-annotations/region-error-ice-109072.stderr +++ b/tests/ui/nll/user-annotations/region-error-ice-109072.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'missing` --> $DIR/region-error-ice-109072.rs:8:9 | LL | impl Lt<'missing> for () { - | - ^^^^^^^^ undeclared lifetime - | | - | help: consider introducing lifetime `'missing` here: `<'missing>` + | ^^^^^^^^ undeclared lifetime + | +help: consider introducing lifetime `'missing` here + | +LL | impl<'missing> Lt<'missing> for () { + | ++++++++++ error[E0261]: use of undeclared lifetime name `'missing` --> $DIR/region-error-ice-109072.rs:9:15 diff --git a/tests/ui/no_std/simple-runs.rs b/tests/ui/no_std/simple-runs.rs index 8931ac7ed11b..af44dfec311e 100644 --- a/tests/ui/no_std/simple-runs.rs +++ b/tests/ui/no_std/simple-runs.rs @@ -4,6 +4,7 @@ //@ compile-flags: -Cpanic=abort //@ ignore-wasm different `main` convention +#![feature(lang_items)] #![no_std] #![no_main] @@ -35,6 +36,17 @@ fn panic_handler(_info: &PanicInfo<'_>) -> ! { loop {} } +#[lang = "eh_personality"] +extern "C" fn rust_eh_personality( + _version: i32, + _actions: i32, + _exception_class: u64, + _exception_object: *mut (), + _context: *mut (), +) -> i32 { + loop {} +} + #[no_mangle] extern "C" fn main(_argc: c_int, _argv: *const *const c_char) -> c_int { 0 diff --git a/tests/ui/on-unimplemented/no-debug.stderr b/tests/ui/on-unimplemented/no-debug.stderr index 97d67dbd82e7..5b0b060d40ef 100644 --- a/tests/ui/on-unimplemented/no-debug.stderr +++ b/tests/ui/on-unimplemented/no-debug.stderr @@ -2,7 +2,9 @@ error[E0277]: `Foo` doesn't implement `Debug` --> $DIR/no-debug.rs:10:27 | LL | println!("{:?} {:?}", Foo, Bar); - | ^^^ `Foo` cannot be formatted using `{:?}` + | ---- ^^^ `Foo` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = help: the trait `Debug` is not implemented for `Foo` = note: add `#[derive(Debug)]` to `Foo` or manually `impl Debug for Foo` @@ -17,7 +19,9 @@ error[E0277]: `Bar` doesn't implement `Debug` --> $DIR/no-debug.rs:10:32 | LL | println!("{:?} {:?}", Foo, Bar); - | ^^^ `Bar` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ---- ^^^ `Bar` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = help: the trait `Debug` is not implemented for `Bar` = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) @@ -26,7 +30,9 @@ error[E0277]: `Foo` doesn't implement `std::fmt::Display` --> $DIR/no-debug.rs:11:23 | LL | println!("{} {}", Foo, Bar); - | ^^^ `Foo` cannot be formatted with the default formatter + | -- ^^^ `Foo` cannot be formatted with the default formatter + | | + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `Foo` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead @@ -36,7 +42,9 @@ error[E0277]: `Bar` doesn't implement `std::fmt::Display` --> $DIR/no-debug.rs:11:28 | LL | println!("{} {}", Foo, Bar); - | ^^^ `Bar` cannot be formatted with the default formatter + | -- ^^^ `Bar` cannot be formatted with the default formatter + | | + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `Bar` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead diff --git a/tests/ui/panic-runtime/auxiliary/depends.rs b/tests/ui/panic-runtime/auxiliary/depends.rs deleted file mode 100644 index 7a35619b6813..000000000000 --- a/tests/ui/panic-runtime/auxiliary/depends.rs +++ /dev/null @@ -1,8 +0,0 @@ -//@ no-prefer-dynamic - -#![feature(panic_runtime)] -#![crate_type = "rlib"] -#![panic_runtime] -#![no_std] - -extern crate needs_panic_runtime; diff --git a/tests/ui/panic-runtime/auxiliary/needs-panic-runtime.rs b/tests/ui/panic-runtime/auxiliary/needs-panic-runtime.rs deleted file mode 100644 index fbafee0c2417..000000000000 --- a/tests/ui/panic-runtime/auxiliary/needs-panic-runtime.rs +++ /dev/null @@ -1,6 +0,0 @@ -//@ no-prefer-dynamic - -#![feature(needs_panic_runtime)] -#![crate_type = "rlib"] -#![needs_panic_runtime] -#![no_std] diff --git a/tests/ui/panic-runtime/incompatible-type.rs b/tests/ui/panic-runtime/incompatible-type.rs index 4cbcfec11c96..f82c23d68c2c 100644 --- a/tests/ui/panic-runtime/incompatible-type.rs +++ b/tests/ui/panic-runtime/incompatible-type.rs @@ -21,4 +21,12 @@ pub fn test(_: DropMe) { } #[rustc_std_internal_symbol] -pub unsafe extern "C" fn rust_eh_personality() {} +pub unsafe extern "C" fn rust_eh_personality( + _version: i32, + _actions: i32, + _exception_class: u64, + _exception_object: *mut (), + _context: *mut (), +) -> i32 { + loop {} +} diff --git a/tests/ui/panic-runtime/runtime-depend-on-needs-runtime.rs b/tests/ui/panic-runtime/runtime-depend-on-needs-runtime.rs deleted file mode 100644 index eb00c071702c..000000000000 --- a/tests/ui/panic-runtime/runtime-depend-on-needs-runtime.rs +++ /dev/null @@ -1,9 +0,0 @@ -//@ dont-check-compiler-stderr -//@ aux-build:needs-panic-runtime.rs -//@ aux-build:depends.rs - -extern crate depends; - -fn main() {} - -//~? ERROR the crate `depends` cannot depend on a crate that needs a panic runtime, but it depends on `needs_panic_runtime` diff --git a/tests/ui/parser/bad-lit-suffixes.rs b/tests/ui/parser/bad-lit-suffixes.rs index f29dc53d322b..4e8edf4d46ea 100644 --- a/tests/ui/parser/bad-lit-suffixes.rs +++ b/tests/ui/parser/bad-lit-suffixes.rs @@ -33,7 +33,6 @@ fn f() {} #[must_use = "string"suffix] //~^ ERROR suffixes on string literals are invalid -//~| ERROR malformed `must_use` attribute input fn g() {} #[link(name = "string"suffix)] diff --git a/tests/ui/parser/bad-lit-suffixes.stderr b/tests/ui/parser/bad-lit-suffixes.stderr index 86ef35bf7833..416143e496af 100644 --- a/tests/ui/parser/bad-lit-suffixes.stderr +++ b/tests/ui/parser/bad-lit-suffixes.stderr @@ -22,29 +22,14 @@ error: suffixes on string literals are invalid LL | #[must_use = "string"suffix] | ^^^^^^^^^^^^^^ invalid suffix `suffix` -error: malformed `must_use` attribute input - --> $DIR/bad-lit-suffixes.rs:34:1 - | -LL | #[must_use = "string"suffix] - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | -help: the following are the possible correct uses - | -LL - #[must_use = "string"suffix] -LL + #[must_use = "reason"] - | -LL - #[must_use = "string"suffix] -LL + #[must_use] - | - error: suffixes on string literals are invalid - --> $DIR/bad-lit-suffixes.rs:39:15 + --> $DIR/bad-lit-suffixes.rs:38:15 | LL | #[link(name = "string"suffix)] | ^^^^^^^^^^^^^^ invalid suffix `suffix` error: invalid suffix `suffix` for number literal - --> $DIR/bad-lit-suffixes.rs:43:41 + --> $DIR/bad-lit-suffixes.rs:42:41 | LL | #[rustc_layout_scalar_valid_range_start(0suffix)] | ^^^^^^^ invalid suffix `suffix` @@ -165,5 +150,5 @@ LL | 1.0e10suffix; | = help: valid suffixes are `f32` and `f64` -error: aborting due to 21 previous errors; 2 warnings emitted +error: aborting due to 20 previous errors; 2 warnings emitted diff --git a/tests/ui/parser/bounds-type.rs b/tests/ui/parser/bounds-type.rs index ec0e83c314e1..1bd67bbba6b0 100644 --- a/tests/ui/parser/bounds-type.rs +++ b/tests/ui/parser/bounds-type.rs @@ -10,10 +10,10 @@ struct S< T: Tr +, // OK T: ?'a, //~ ERROR `?` may only modify trait bounds, not lifetime bounds - T: ~const Tr, // OK - T: ~const ?Tr, //~ ERROR `~const` trait not allowed with `?` trait polarity modifier - T: ~const Tr + 'a, // OK - T: ~const 'a, //~ ERROR `~const` may only modify trait bounds, not lifetime bounds + T: [const] Tr, // OK + T: [const] ?Tr, //~ ERROR `[const]` trait not allowed with `?` trait polarity modifier + T: [const] Tr + 'a, // OK + T: [const] 'a, //~ ERROR `[const]` may only modify trait bounds, not lifetime bounds T: const 'a, //~ ERROR `const` may only modify trait bounds, not lifetime bounds T: async Tr, // OK diff --git a/tests/ui/parser/bounds-type.stderr b/tests/ui/parser/bounds-type.stderr index 09c35c12b000..0d929c76f027 100644 --- a/tests/ui/parser/bounds-type.stderr +++ b/tests/ui/parser/bounds-type.stderr @@ -12,19 +12,19 @@ error: `?` may only modify trait bounds, not lifetime bounds LL | T: ?'a, | ^ -error: `~const` trait not allowed with `?` trait polarity modifier - --> $DIR/bounds-type.rs:14:15 +error: `[const]` trait not allowed with `?` trait polarity modifier + --> $DIR/bounds-type.rs:14:16 | -LL | T: ~const ?Tr, - | ------ ^ +LL | T: [const] ?Tr, + | ------- ^ | | - | there is not a well-defined meaning for a `~const ?` trait + | there is not a well-defined meaning for a `[const] ?` trait -error: `~const` may only modify trait bounds, not lifetime bounds - --> $DIR/bounds-type.rs:16:8 +error: `[const]` may only modify trait bounds, not lifetime bounds + --> $DIR/bounds-type.rs:16:6 | -LL | T: ~const 'a, - | ^^^^^^ +LL | T: [const] 'a, + | ^^^^^^^^^ error: `const` may only modify trait bounds, not lifetime bounds --> $DIR/bounds-type.rs:17:8 diff --git a/tests/ui/parser/issues/issue-105366.fixed b/tests/ui/parser/issues/issue-105366.fixed index 7157b647524d..95419dc07f2c 100644 --- a/tests/ui/parser/issues/issue-105366.fixed +++ b/tests/ui/parser/issues/issue-105366.fixed @@ -1,5 +1,6 @@ //@ run-rustfix +#[allow(dead_code)] struct Foo; impl From for Foo { diff --git a/tests/ui/parser/issues/issue-105366.rs b/tests/ui/parser/issues/issue-105366.rs index dc3cb8b343d3..3278b7379912 100644 --- a/tests/ui/parser/issues/issue-105366.rs +++ b/tests/ui/parser/issues/issue-105366.rs @@ -1,5 +1,6 @@ //@ run-rustfix +#[allow(dead_code)] struct Foo; fn From for Foo { diff --git a/tests/ui/parser/issues/issue-105366.stderr b/tests/ui/parser/issues/issue-105366.stderr index d8c79a0e0eaf..225e436b4aa8 100644 --- a/tests/ui/parser/issues/issue-105366.stderr +++ b/tests/ui/parser/issues/issue-105366.stderr @@ -1,5 +1,5 @@ error: you might have meant to write `impl` instead of `fn` - --> $DIR/issue-105366.rs:5:1 + --> $DIR/issue-105366.rs:6:1 | LL | fn From for Foo { | ^^ diff --git a/tests/ui/parser/recover/recover-field-semi.rs b/tests/ui/parser/recover/recover-field-semi.rs index b703578860ec..b6f235f8ad1c 100644 --- a/tests/ui/parser/recover/recover-field-semi.rs +++ b/tests/ui/parser/recover/recover-field-semi.rs @@ -3,7 +3,7 @@ struct Foo { //~^ ERROR struct fields are separated by `,` } -union Bar { //~ ERROR +union Bar { foo: i32; //~^ ERROR union fields are separated by `,` } @@ -13,4 +13,6 @@ enum Baz { //~^ ERROR struct fields are separated by `,` } -fn main() {} +fn main() { + let _ = Foo { foo: "" }; //~ ERROR mismatched types +} diff --git a/tests/ui/parser/recover/recover-field-semi.stderr b/tests/ui/parser/recover/recover-field-semi.stderr index 3cf4847488c0..9b1a34e134b6 100644 --- a/tests/ui/parser/recover/recover-field-semi.stderr +++ b/tests/ui/parser/recover/recover-field-semi.stderr @@ -22,14 +22,12 @@ LL | Qux { foo: i32; } | | | while parsing this struct -error: unions cannot have zero fields - --> $DIR/recover-field-semi.rs:6:1 +error[E0308]: mismatched types + --> $DIR/recover-field-semi.rs:17:24 | -LL | / union Bar { -LL | | foo: i32; -LL | | -LL | | } - | |_^ +LL | let _ = Foo { foo: "" }; + | ^^ expected `i32`, found `&str` error: aborting due to 4 previous errors +For more information about this error, try `rustc --explain E0308`. diff --git a/tests/ui/parser/trait-object-delimiters.rs b/tests/ui/parser/trait-object-delimiters.rs index 8f6221c1b943..1cbd2ff1bdfb 100644 --- a/tests/ui/parser/trait-object-delimiters.rs +++ b/tests/ui/parser/trait-object-delimiters.rs @@ -8,7 +8,7 @@ fn foo2(_: &dyn (Drop + AsRef)) {} //~ ERROR incorrect parentheses around t fn foo2_no_space(_: &dyn(Drop + AsRef)) {} //~ ERROR incorrect parentheses around trait bounds fn foo3(_: &dyn {Drop + AsRef}) {} //~ ERROR expected parameter name, found `{` -//~^ ERROR expected one of `!`, `(`, `)`, `*`, `,`, `?`, `async`, `const`, `for`, `use`, `~`, lifetime, or path, found `{` +//~^ ERROR expected one of `!`, `(`, `)`, `*`, `,`, `?`, `[`, `async`, `const`, `for`, `use`, `~`, lifetime, or path, found `{` //~| ERROR at least one trait is required for an object type fn foo4(_: &dyn >) {} //~ ERROR expected identifier, found `<` diff --git a/tests/ui/parser/trait-object-delimiters.stderr b/tests/ui/parser/trait-object-delimiters.stderr index be130ac7ab23..16d5392eec84 100644 --- a/tests/ui/parser/trait-object-delimiters.stderr +++ b/tests/ui/parser/trait-object-delimiters.stderr @@ -39,11 +39,11 @@ error: expected parameter name, found `{` LL | fn foo3(_: &dyn {Drop + AsRef}) {} | ^ expected parameter name -error: expected one of `!`, `(`, `)`, `*`, `,`, `?`, `async`, `const`, `for`, `use`, `~`, lifetime, or path, found `{` +error: expected one of `!`, `(`, `)`, `*`, `,`, `?`, `[`, `async`, `const`, `for`, `use`, `~`, lifetime, or path, found `{` --> $DIR/trait-object-delimiters.rs:10:17 | LL | fn foo3(_: &dyn {Drop + AsRef}) {} - | -^ expected one of 13 possible tokens + | -^ expected one of 14 possible tokens | | | help: missing `,` diff --git a/tests/ui/pin-ergonomics/borrow-unpin.pinned.stderr b/tests/ui/pin-ergonomics/borrow-unpin.pinned.stderr new file mode 100644 index 000000000000..cc438461a5d1 --- /dev/null +++ b/tests/ui/pin-ergonomics/borrow-unpin.pinned.stderr @@ -0,0 +1,238 @@ +error[E0382]: use of moved value: `foo` + --> $DIR/borrow-unpin.rs:39:14 + | +LL | let foo = Foo::default(); + | --- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | foo_pin_mut(&pin mut foo); + | --- value moved here +LL | foo_move(foo); + | ^^^ value used here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | foo_pin_mut(&pin mut foo); + | --- you could clone this value + +error[E0382]: use of moved value: `foo` + --> $DIR/borrow-unpin.rs:43:14 + | +LL | let foo = Foo::default(); + | --- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | let x = &pin mut foo; + | --- value moved here +LL | foo_move(foo); + | ^^^ value used here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin mut foo; + | --- you could clone this value + +error[E0382]: use of moved value: `foo` + --> $DIR/borrow-unpin.rs:52:14 + | +LL | let mut foo = Foo::default(); + | ------- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | foo_pin_mut(&pin mut foo); // ok + | --- value moved here +LL | foo_move(foo); + | ^^^ value used here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | foo_pin_mut(&pin mut foo); // ok + | --- you could clone this value + +error[E0382]: use of moved value: `foo` + --> $DIR/borrow-unpin.rs:56:14 + | +LL | let mut foo = Foo::default(); + | ------- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | let x = &pin mut foo; // ok + | --- value moved here +LL | foo_move(foo); + | ^^^ value used here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin mut foo; // ok + | --- you could clone this value + +error[E0505]: cannot move out of `foo` because it is borrowed + --> $DIR/borrow-unpin.rs:68:14 + | +LL | let foo = Foo::default(); + | --- binding `foo` declared here +LL | let x = &pin const foo; // ok + | -------------- borrow of `foo` occurs here +LL | foo_move(foo); + | ^^^ move out of `foo` occurs here +LL | +LL | foo_pin_ref(x); + | - borrow later used here + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin const foo; // ok + | --- you could clone this value + +error[E0382]: borrow of moved value: `foo` + --> $DIR/borrow-unpin.rs:76:13 + | +LL | let mut foo = Foo::default(); + | ------- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | foo_pin_mut(&pin mut foo); // ok + | --- value moved here +LL | foo_ref(&foo); + | ^^^^ value borrowed here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | foo_pin_mut(&pin mut foo); // ok + | --- you could clone this value + +error[E0382]: borrow of moved value: `foo` + --> $DIR/borrow-unpin.rs:80:13 + | +LL | let mut foo = Foo::default(); + | ------- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | let x = &pin mut foo; // ok + | --- value moved here +LL | foo_ref(&foo); + | ^^^^ value borrowed here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin mut foo; // ok + | --- you could clone this value + +error[E0382]: use of moved value: `foo` + --> $DIR/borrow-unpin.rs:99:26 + | +LL | let mut foo = Foo::default(); + | ------- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | foo_pin_mut(&pin mut foo); // ok + | --- value moved here +LL | foo_pin_mut(&pin mut foo); + | ^^^ value used here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | foo_pin_mut(&pin mut foo); // ok + | --- you could clone this value + +error[E0382]: use of moved value: `foo` + --> $DIR/borrow-unpin.rs:103:26 + | +LL | let mut foo = Foo::default(); + | ------- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | let x = &pin mut foo; // ok + | --- value moved here +LL | foo_pin_mut(&pin mut foo); + | ^^^ value used here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin mut foo; // ok + | --- you could clone this value + +error[E0505]: cannot move out of `foo` because it is borrowed + --> $DIR/borrow-unpin.rs:115:26 + | +LL | let mut foo = Foo::default(); + | ------- binding `foo` declared here +LL | let x = &pin const foo; // ok + | -------------- borrow of `foo` occurs here +LL | foo_pin_mut(&pin mut foo); + | ^^^ move out of `foo` occurs here +LL | +LL | foo_pin_ref(x); + | - borrow later used here + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin const foo; // ok + | --- you could clone this value + +error[E0382]: borrow of moved value: `foo` + --> $DIR/borrow-unpin.rs:123:17 + | +LL | let mut foo = Foo::default(); + | ------- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | foo_pin_mut(&pin mut foo); // ok + | --- value moved here +LL | foo_pin_ref(&pin const foo); + | ^^^^^^^^^^^^^^ value borrowed here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | foo_pin_mut(&pin mut foo); // ok + | --- you could clone this value + +error[E0382]: borrow of moved value: `foo` + --> $DIR/borrow-unpin.rs:127:17 + | +LL | let mut foo = Foo::default(); + | ------- move occurs because `foo` has type `Foo`, which does not implement the `Copy` trait +LL | let x = &pin mut foo; // ok + | --- value moved here +LL | foo_pin_ref(&pin const foo); + | ^^^^^^^^^^^^^^ value borrowed here after move + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:16:1 + | +LL | struct Foo(PhantomPinned); + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin mut foo; // ok + | --- you could clone this value + +error: aborting due to 12 previous errors + +Some errors have detailed explanations: E0382, E0505. +For more information about an error, try `rustc --explain E0382`. diff --git a/tests/ui/pin-ergonomics/borrow-unpin.rs b/tests/ui/pin-ergonomics/borrow-unpin.rs new file mode 100644 index 000000000000..61e69bab12bc --- /dev/null +++ b/tests/ui/pin-ergonomics/borrow-unpin.rs @@ -0,0 +1,143 @@ +//@ revisions: unpin pinned +#![feature(pin_ergonomics)] +#![allow(dead_code, incomplete_features)] + +// For now, in order to ensure soundness, we move the place in `&pin mut place` +// if `place` is not `Unpin`. +// In the next step, we borrow the place instead of moving it, after that we +// have to makes sure `&pin mut place` and `&pin const place` cannot violate +// the mut-xor-share rules. + +use std::pin::Pin; +use std::marker::PhantomPinned; + +#[cfg(pinned)] +#[derive(Default)] +struct Foo(PhantomPinned); + +#[cfg(unpin)] +#[derive(Default)] +struct Foo; + +fn foo_mut(_: &mut Foo) { +} + +fn foo_ref(_: &Foo) { +} + +fn foo_pin_mut(_: Pin<&mut Foo>) { +} + +fn foo_pin_ref(_: Pin<&Foo>) { +} + +fn foo_move(_: Foo) {} + +fn immutable_pin_mut_then_move() { + let foo = Foo::default(); + foo_pin_mut(&pin mut foo); //[unpin]~ ERROR cannot borrow `foo` as mutable, as it is not declared as mutable + foo_move(foo); //[pinned]~ ERROR use of moved value: `foo` + + let foo = Foo::default(); + let x = &pin mut foo; //[unpin]~ ERROR cannot borrow `foo` as mutable, as it is not declared as mutable + foo_move(foo); //[pinned]~ ERROR use of moved value: `foo` + //[unpin]~^ ERROR cannot move out of `foo` because it is borrowed + foo_pin_mut(x); // +} + + +fn pin_mut_then_move() { + let mut foo = Foo::default(); + foo_pin_mut(&pin mut foo); // ok + foo_move(foo); //[pinned]~ ERROR use of moved value: `foo` + + let mut foo = Foo::default(); + let x = &pin mut foo; // ok + foo_move(foo); //[pinned]~ ERROR use of moved value: `foo` + //[unpin]~^ ERROR cannot move out of `foo` because it is borrowed + foo_pin_mut(x); // +} + +fn pin_ref_then_move() { + let foo = Foo::default(); + foo_pin_ref(&pin const foo); // ok + foo_move(foo); // ok + + let foo = Foo::default(); + let x = &pin const foo; // ok + foo_move(foo); //[pinned]~ ERROR cannot move out of `foo` because it is borrowed + //[unpin]~^ ERROR cannot move out of `foo` because it is borrowed + foo_pin_ref(x); +} + +fn pin_mut_then_ref() { + let mut foo = Foo::default(); + foo_pin_mut(&pin mut foo); // ok + foo_ref(&foo); //[pinned]~ ERROR borrow of moved value: `foo` + + let mut foo = Foo::default(); + let x = &pin mut foo; // ok + foo_ref(&foo); //[pinned]~ ERROR borrow of moved value: `foo` + //[unpin]~^ ERROR cannot borrow `foo` as immutable because it is also borrowed as mutable + foo_pin_mut(x); +} + +fn pin_ref_then_ref() { + let mut foo = Foo::default(); + foo_pin_ref(&pin const foo); // ok + foo_ref(&foo); // ok + + let mut foo = Foo::default(); + let x = &pin const foo; // ok + foo_ref(&foo); // ok + foo_pin_ref(x); +} + +fn pin_mut_then_pin_mut() { + let mut foo = Foo::default(); + foo_pin_mut(&pin mut foo); // ok + foo_pin_mut(&pin mut foo); //[pinned]~ ERROR use of moved value: `foo` + + let mut foo = Foo::default(); + let x = &pin mut foo; // ok + foo_pin_mut(&pin mut foo); //[pinned]~ ERROR use of moved value: `foo` + //[unpin]~^ ERROR cannot borrow `foo` as mutable more than once at a time + foo_pin_mut(x); +} + +fn pin_ref_then_pin_mut() { + let mut foo = Foo::default(); + foo_pin_ref(&pin const foo); // ok + foo_pin_mut(&pin mut foo); // ok + + let mut foo = Foo::default(); + let x = &pin const foo; // ok + foo_pin_mut(&pin mut foo); //[pinned]~ ERROR cannot move out of `foo` because it is borrowed + //[unpin]~^ ERROR cannot borrow `foo` as mutable because it is also borrowed as immutable + foo_pin_ref(x); +} + +fn pin_mut_then_pin_ref() { + let mut foo = Foo::default(); + foo_pin_mut(&pin mut foo); // ok + foo_pin_ref(&pin const foo); //[pinned]~ ERROR borrow of moved value: `foo` + + let mut foo = Foo::default(); + let x = &pin mut foo; // ok + foo_pin_ref(&pin const foo); //[pinned]~ ERROR borrow of moved value: `foo` + //[unpin]~^ ERROR cannot borrow `foo` as immutable because it is also borrowed as mutable + foo_pin_mut(x); +} + +fn pin_ref_then_pin_ref() { + let mut foo = Foo::default(); + foo_pin_ref(&pin const foo); // ok + foo_pin_ref(&pin const foo); // ok + + let mut foo = Foo::default(); + let x = &pin const foo; // ok + foo_pin_ref(&pin const foo); // ok + foo_pin_ref(x); +} + +fn main() {} diff --git a/tests/ui/pin-ergonomics/borrow-unpin.unpin.stderr b/tests/ui/pin-ergonomics/borrow-unpin.unpin.stderr new file mode 100644 index 000000000000..bf9921343ee7 --- /dev/null +++ b/tests/ui/pin-ergonomics/borrow-unpin.unpin.stderr @@ -0,0 +1,136 @@ +error[E0596]: cannot borrow `foo` as mutable, as it is not declared as mutable + --> $DIR/borrow-unpin.rs:38:17 + | +LL | foo_pin_mut(&pin mut foo); + | ^^^^^^^^^^^^ cannot borrow as mutable + | +help: consider changing this to be mutable + | +LL | let mut foo = Foo::default(); + | +++ + +error[E0596]: cannot borrow `foo` as mutable, as it is not declared as mutable + --> $DIR/borrow-unpin.rs:42:13 + | +LL | let x = &pin mut foo; + | ^^^^^^^^^^^^ cannot borrow as mutable + | +help: consider changing this to be mutable + | +LL | let mut foo = Foo::default(); + | +++ + +error[E0505]: cannot move out of `foo` because it is borrowed + --> $DIR/borrow-unpin.rs:43:14 + | +LL | let foo = Foo::default(); + | --- binding `foo` declared here +LL | let x = &pin mut foo; + | ------------ borrow of `foo` occurs here +LL | foo_move(foo); + | ^^^ move out of `foo` occurs here +LL | +LL | foo_pin_mut(x); // + | - borrow later used here + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:20:1 + | +LL | struct Foo; + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin mut foo; + | --- you could clone this value + +error[E0505]: cannot move out of `foo` because it is borrowed + --> $DIR/borrow-unpin.rs:56:14 + | +LL | let mut foo = Foo::default(); + | ------- binding `foo` declared here +LL | let x = &pin mut foo; // ok + | ------------ borrow of `foo` occurs here +LL | foo_move(foo); + | ^^^ move out of `foo` occurs here +LL | +LL | foo_pin_mut(x); // + | - borrow later used here + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:20:1 + | +LL | struct Foo; + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin mut foo; // ok + | --- you could clone this value + +error[E0505]: cannot move out of `foo` because it is borrowed + --> $DIR/borrow-unpin.rs:68:14 + | +LL | let foo = Foo::default(); + | --- binding `foo` declared here +LL | let x = &pin const foo; // ok + | -------------- borrow of `foo` occurs here +LL | foo_move(foo); + | ^^^ move out of `foo` occurs here +LL | +LL | foo_pin_ref(x); + | - borrow later used here + | +note: if `Foo` implemented `Clone`, you could clone the value + --> $DIR/borrow-unpin.rs:20:1 + | +LL | struct Foo; + | ^^^^^^^^^^ consider implementing `Clone` for this type +... +LL | let x = &pin const foo; // ok + | --- you could clone this value + +error[E0502]: cannot borrow `foo` as immutable because it is also borrowed as mutable + --> $DIR/borrow-unpin.rs:80:13 + | +LL | let x = &pin mut foo; // ok + | ------------ mutable borrow occurs here +LL | foo_ref(&foo); + | ^^^^ immutable borrow occurs here +LL | +LL | foo_pin_mut(x); + | - mutable borrow later used here + +error[E0499]: cannot borrow `foo` as mutable more than once at a time + --> $DIR/borrow-unpin.rs:103:17 + | +LL | let x = &pin mut foo; // ok + | ------------ first mutable borrow occurs here +LL | foo_pin_mut(&pin mut foo); + | ^^^^^^^^^^^^ second mutable borrow occurs here +LL | +LL | foo_pin_mut(x); + | - first borrow later used here + +error[E0502]: cannot borrow `foo` as mutable because it is also borrowed as immutable + --> $DIR/borrow-unpin.rs:115:17 + | +LL | let x = &pin const foo; // ok + | -------------- immutable borrow occurs here +LL | foo_pin_mut(&pin mut foo); + | ^^^^^^^^^^^^ mutable borrow occurs here +LL | +LL | foo_pin_ref(x); + | - immutable borrow later used here + +error[E0502]: cannot borrow `foo` as immutable because it is also borrowed as mutable + --> $DIR/borrow-unpin.rs:127:17 + | +LL | let x = &pin mut foo; // ok + | ------------ mutable borrow occurs here +LL | foo_pin_ref(&pin const foo); + | ^^^^^^^^^^^^^^ immutable borrow occurs here +LL | +LL | foo_pin_mut(x); + | - mutable borrow later used here + +error: aborting due to 9 previous errors + +Some errors have detailed explanations: E0499, E0502, E0505, E0596. +For more information about an error, try `rustc --explain E0499`. diff --git a/tests/ui/pin-ergonomics/borrow.rs b/tests/ui/pin-ergonomics/borrow.rs new file mode 100644 index 000000000000..f221165848ba --- /dev/null +++ b/tests/ui/pin-ergonomics/borrow.rs @@ -0,0 +1,38 @@ +//@ check-pass +#![feature(pin_ergonomics)] +#![allow(dead_code, incomplete_features)] + +// Makes sure we can handle `&pin mut place` and `&pin const place` as sugar for +// `std::pin::pin!(place)` and `Pin::new(&place)`. + +use std::pin::Pin; + +struct Foo; + +fn foo_pin_mut(_: Pin<&mut Foo>) { +} + +fn foo_pin_ref(_: Pin<&Foo>) { +} + +fn bar() { + let mut x: Pin<&mut _> = &pin mut Foo; + foo_pin_mut(x.as_mut()); + foo_pin_mut(x.as_mut()); + foo_pin_ref(x); + + let x: Pin<&_> = &pin const Foo; + + foo_pin_ref(x); + foo_pin_ref(x); +} + +fn baz(mut x: Foo, y: Foo) { + let _x = &pin mut x; + let _x = x; // ok because `Foo: Unpin` and thus `&pin mut x` doesn't move `x` + + let _y = &pin const y; + let _y = y; // ok because `&pin const y` dosn't move `y` +} + +fn main() {} diff --git a/tests/ui/async-await/pin-ergonomics/coerce-non-pointer-pin.rs b/tests/ui/pin-ergonomics/coerce-non-pointer-pin.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/coerce-non-pointer-pin.rs rename to tests/ui/pin-ergonomics/coerce-non-pointer-pin.rs diff --git a/tests/ui/async-await/pin-ergonomics/coerce-non-pointer-pin.stderr b/tests/ui/pin-ergonomics/coerce-non-pointer-pin.stderr similarity index 100% rename from tests/ui/async-await/pin-ergonomics/coerce-non-pointer-pin.stderr rename to tests/ui/pin-ergonomics/coerce-non-pointer-pin.stderr diff --git a/tests/ui/async-await/pin-ergonomics/reborrow-arg.rs b/tests/ui/pin-ergonomics/reborrow-arg.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/reborrow-arg.rs rename to tests/ui/pin-ergonomics/reborrow-arg.rs diff --git a/tests/ui/async-await/pin-ergonomics/reborrow-const-as-mut.rs b/tests/ui/pin-ergonomics/reborrow-const-as-mut.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/reborrow-const-as-mut.rs rename to tests/ui/pin-ergonomics/reborrow-const-as-mut.rs diff --git a/tests/ui/async-await/pin-ergonomics/reborrow-const-as-mut.stderr b/tests/ui/pin-ergonomics/reborrow-const-as-mut.stderr similarity index 100% rename from tests/ui/async-await/pin-ergonomics/reborrow-const-as-mut.stderr rename to tests/ui/pin-ergonomics/reborrow-const-as-mut.stderr diff --git a/tests/ui/async-await/pin-ergonomics/reborrow-once.rs b/tests/ui/pin-ergonomics/reborrow-once.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/reborrow-once.rs rename to tests/ui/pin-ergonomics/reborrow-once.rs diff --git a/tests/ui/async-await/pin-ergonomics/reborrow-once.stderr b/tests/ui/pin-ergonomics/reborrow-once.stderr similarity index 100% rename from tests/ui/async-await/pin-ergonomics/reborrow-once.stderr rename to tests/ui/pin-ergonomics/reborrow-once.stderr diff --git a/tests/ui/async-await/pin-ergonomics/reborrow-self.rs b/tests/ui/pin-ergonomics/reborrow-self.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/reborrow-self.rs rename to tests/ui/pin-ergonomics/reborrow-self.rs diff --git a/tests/ui/async-await/pin-ergonomics/reborrow-shorter.rs b/tests/ui/pin-ergonomics/reborrow-shorter.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/reborrow-shorter.rs rename to tests/ui/pin-ergonomics/reborrow-shorter.rs diff --git a/tests/ui/async-await/pin-ergonomics/sugar-ambiguity.rs b/tests/ui/pin-ergonomics/sugar-ambiguity.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/sugar-ambiguity.rs rename to tests/ui/pin-ergonomics/sugar-ambiguity.rs diff --git a/tests/ui/async-await/pin-ergonomics/sugar-no-const.rs b/tests/ui/pin-ergonomics/sugar-no-const.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/sugar-no-const.rs rename to tests/ui/pin-ergonomics/sugar-no-const.rs diff --git a/tests/ui/async-await/pin-ergonomics/sugar-no-const.stderr b/tests/ui/pin-ergonomics/sugar-no-const.stderr similarity index 100% rename from tests/ui/async-await/pin-ergonomics/sugar-no-const.stderr rename to tests/ui/pin-ergonomics/sugar-no-const.stderr diff --git a/tests/ui/async-await/pin-ergonomics/sugar-self.rs b/tests/ui/pin-ergonomics/sugar-self.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/sugar-self.rs rename to tests/ui/pin-ergonomics/sugar-self.rs diff --git a/tests/ui/async-await/pin-ergonomics/sugar.rs b/tests/ui/pin-ergonomics/sugar.rs similarity index 100% rename from tests/ui/async-await/pin-ergonomics/sugar.rs rename to tests/ui/pin-ergonomics/sugar.rs diff --git a/tests/ui/print-calling-conventions.stdout b/tests/ui/print-calling-conventions.stdout index 7b5ae4956606..b8b939e1c04e 100644 --- a/tests/ui/print-calling-conventions.stdout +++ b/tests/ui/print-calling-conventions.stdout @@ -1,6 +1,4 @@ C -C-cmse-nonsecure-call -C-cmse-nonsecure-entry C-unwind Rust aapcs @@ -9,6 +7,8 @@ avr-interrupt avr-non-blocking-interrupt cdecl cdecl-unwind +cmse-nonsecure-call +cmse-nonsecure-entry custom efiapi fastcall @@ -20,6 +20,7 @@ riscv-interrupt-m riscv-interrupt-s rust-call rust-cold +rust-invalid stdcall stdcall-unwind system diff --git a/tests/ui/range/range-1.stderr b/tests/ui/range/range-1.stderr index 37669dd3f47c..8878ba143097 100644 --- a/tests/ui/range/range-1.stderr +++ b/tests/ui/range/range-1.stderr @@ -10,16 +10,6 @@ error[E0277]: the trait bound `bool: Step` is not satisfied LL | for i in false..true {} | ^^^^^^^^^^^ the trait `Step` is not implemented for `bool` | - = help: the following other types implement trait `Step`: - Char - Ipv4Addr - Ipv6Addr - char - i128 - i16 - i32 - i64 - and 8 others = note: required for `std::ops::Range` to implement `Iterator` = note: required for `std::ops::Range` to implement `IntoIterator` diff --git a/tests/ui/recursion/recursive-static-definition.rs b/tests/ui/recursion/recursive-static-definition.rs index 55db6a86bf1f..4f0624eb1623 100644 --- a/tests/ui/recursion/recursive-static-definition.rs +++ b/tests/ui/recursion/recursive-static-definition.rs @@ -1,5 +1,5 @@ pub static FOO: u32 = FOO; -//~^ ERROR encountered static that tried to initialize itself with itself +//~^ ERROR encountered static that tried to access itself during initialization #[derive(Copy, Clone)] pub union Foo { @@ -7,6 +7,6 @@ pub union Foo { } pub static BAR: Foo = BAR; -//~^ ERROR encountered static that tried to initialize itself with itself +//~^ ERROR encountered static that tried to access itself during initialization fn main() {} diff --git a/tests/ui/recursion/recursive-static-definition.stderr b/tests/ui/recursion/recursive-static-definition.stderr index ce93c41bc67c..1e4005832cbb 100644 --- a/tests/ui/recursion/recursive-static-definition.stderr +++ b/tests/ui/recursion/recursive-static-definition.stderr @@ -1,10 +1,10 @@ -error[E0080]: encountered static that tried to initialize itself with itself +error[E0080]: encountered static that tried to access itself during initialization --> $DIR/recursive-static-definition.rs:1:23 | LL | pub static FOO: u32 = FOO; | ^^^ evaluation of `FOO` failed here -error[E0080]: encountered static that tried to initialize itself with itself +error[E0080]: encountered static that tried to access itself during initialization --> $DIR/recursive-static-definition.rs:9:23 | LL | pub static BAR: Foo = BAR; diff --git a/tests/ui/regions/regions-in-enums.stderr b/tests/ui/regions/regions-in-enums.stderr index 66537653291c..449763e8b591 100644 --- a/tests/ui/regions/regions-in-enums.stderr +++ b/tests/ui/regions/regions-in-enums.stderr @@ -1,18 +1,24 @@ error[E0261]: use of undeclared lifetime name `'foo` --> $DIR/regions-in-enums.rs:13:9 | -LL | enum No0 { - | - help: consider introducing lifetime `'foo` here: `<'foo>` LL | X5(&'foo usize) | ^^^^ undeclared lifetime + | +help: consider introducing lifetime `'foo` here + | +LL | enum No0<'foo> { + | ++++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-in-enums.rs:17:9 | -LL | enum No1 { - | - help: consider introducing lifetime `'a` here: `<'a>` LL | X6(&'a usize) | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | enum No1<'a> { + | ++++ error: aborting due to 2 previous errors diff --git a/tests/ui/regions/regions-in-structs.stderr b/tests/ui/regions/regions-in-structs.stderr index 5dfdc2ee93b4..c34b1ffca64f 100644 --- a/tests/ui/regions/regions-in-structs.stderr +++ b/tests/ui/regions/regions-in-structs.stderr @@ -1,19 +1,24 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-in-structs.rs:10:9 | -LL | struct StructDecl { - | - help: consider introducing lifetime `'a` here: `<'a>` LL | a: &'a isize, | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | struct StructDecl<'a> { + | ++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-in-structs.rs:11:9 | -LL | struct StructDecl { - | - help: consider introducing lifetime `'a` here: `<'a>` -LL | a: &'a isize, LL | b: &'a isize, | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | struct StructDecl<'a> { + | ++++ error: aborting due to 2 previous errors diff --git a/tests/ui/regions/regions-name-undeclared.stderr b/tests/ui/regions/regions-name-undeclared.stderr index 532603de5f78..06e6f4299dea 100644 --- a/tests/ui/regions/regions-name-undeclared.stderr +++ b/tests/ui/regions/regions-name-undeclared.stderr @@ -50,9 +50,12 @@ LL | fn bar<'a>(x: &'a isize) { | -- lifetime parameter from outer item ... LL | type X = Option<&'a isize>; - | - ^^ use of generic parameter from outer item - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ use of generic parameter from outer item + | +help: consider introducing lifetime `'a` here + | +LL | type X<'a> = Option<&'a isize>; + | ++++ error[E0401]: can't use generic parameters from outer item --> $DIR/regions-name-undeclared.rs:28:13 @@ -60,10 +63,13 @@ error[E0401]: can't use generic parameters from outer item LL | fn bar<'a>(x: &'a isize) { | -- lifetime parameter from outer item ... -LL | enum E { - | - help: consider introducing lifetime `'a` here: `<'a>` LL | E1(&'a isize) | ^^ use of generic parameter from outer item + | +help: consider introducing lifetime `'a` here + | +LL | enum E<'a> { + | ++++ error[E0401]: can't use generic parameters from outer item --> $DIR/regions-name-undeclared.rs:31:13 @@ -71,10 +77,13 @@ error[E0401]: can't use generic parameters from outer item LL | fn bar<'a>(x: &'a isize) { | -- lifetime parameter from outer item ... -LL | struct S { - | - help: consider introducing lifetime `'a` here: `<'a>` LL | f: &'a isize | ^^ use of generic parameter from outer item + | +help: consider introducing lifetime `'a` here + | +LL | struct S<'a> { + | ++++ error[E0401]: can't use generic parameters from outer item --> $DIR/regions-name-undeclared.rs:33:14 @@ -83,17 +92,23 @@ LL | fn bar<'a>(x: &'a isize) { | -- lifetime parameter from outer item ... LL | fn f(a: &'a isize) { } - | - ^^ use of generic parameter from outer item - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ use of generic parameter from outer item + | +help: consider introducing lifetime `'a` here + | +LL | fn f<'a>(a: &'a isize) { } + | ++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-name-undeclared.rs:41:17 | LL | fn fn_types(a: &'a isize, - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn fn_types<'a>(a: &'a isize, + | ++++ error[E0261]: use of undeclared lifetime name `'b` --> $DIR/regions-name-undeclared.rs:43:36 @@ -129,11 +144,13 @@ LL | fn fn_types<'b>(a: &'a isize, error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-name-undeclared.rs:47:17 | -LL | fn fn_types(a: &'a isize, - | - help: consider introducing lifetime `'a` here: `<'a>` -... LL | c: &'a isize) | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn fn_types<'a>(a: &'a isize, + | ++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-name-undeclared.rs:53:31 diff --git a/tests/ui/regions/regions-undeclared.stderr b/tests/ui/regions/regions-undeclared.stderr index 6bfde5524ac4..2bc0f1848032 100644 --- a/tests/ui/regions/regions-undeclared.stderr +++ b/tests/ui/regions/regions-undeclared.stderr @@ -7,35 +7,46 @@ LL | static c_x: &'blk isize = &22; error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-undeclared.rs:4:10 | -LL | enum EnumDecl { - | - help: consider introducing lifetime `'a` here: `<'a>` LL | Foo(&'a isize), | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | enum EnumDecl<'a> { + | ++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-undeclared.rs:5:10 | -LL | enum EnumDecl { - | - help: consider introducing lifetime `'a` here: `<'a>` -LL | Foo(&'a isize), LL | Bar(&'a isize), | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | enum EnumDecl<'a> { + | ++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-undeclared.rs:8:15 | LL | fn fnDecl(x: &'a isize, - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `<'a>` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn fnDecl<'a>(x: &'a isize, + | ++++ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/regions-undeclared.rs:9:15 | -LL | fn fnDecl(x: &'a isize, - | - help: consider introducing lifetime `'a` here: `<'a>` LL | y: &'a isize) | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn fnDecl<'a>(x: &'a isize, + | ++++ error: aborting due to 5 previous errors diff --git a/tests/ui/repr/repr.stderr b/tests/ui/repr/repr.stderr index f3b11398eaa4..9e5813322783 100644 --- a/tests/ui/repr/repr.stderr +++ b/tests/ui/repr/repr.stderr @@ -5,7 +5,7 @@ LL | #[repr] | ^^^^^^^ | | | expected this to be a list - | help: must be of the form: `#[repr(C)]` + | help: must be of the form: `#[repr(C | Rust | align(...) | packed(...) | | transparent)]` error[E0539]: malformed `repr` attribute input --> $DIR/repr.rs:4:1 @@ -14,7 +14,7 @@ LL | #[repr = "B"] | ^^^^^^^^^^^^^ | | | expected this to be a list - | help: must be of the form: `#[repr(C)]` + | help: must be of the form: `#[repr(C | Rust | align(...) | packed(...) | | transparent)]` error[E0539]: malformed `repr` attribute input --> $DIR/repr.rs:7:1 @@ -23,7 +23,7 @@ LL | #[repr = "C"] | ^^^^^^^^^^^^^ | | | expected this to be a list - | help: must be of the form: `#[repr(C)]` + | help: must be of the form: `#[repr(C | Rust | align(...) | packed(...) | | transparent)]` error: aborting due to 3 previous errors diff --git a/tests/ui/rfcs/rfc-2091-track-caller/error-odd-syntax.stderr b/tests/ui/rfcs/rfc-2091-track-caller/error-odd-syntax.stderr index e22d812c8b03..6088945b829c 100644 --- a/tests/ui/rfcs/rfc-2091-track-caller/error-odd-syntax.stderr +++ b/tests/ui/rfcs/rfc-2091-track-caller/error-odd-syntax.stderr @@ -1,8 +1,12 @@ -error: malformed `track_caller` attribute input +error[E0565]: malformed `track_caller` attribute input --> $DIR/error-odd-syntax.rs:1:1 | LL | #[track_caller(1)] - | ^^^^^^^^^^^^^^^^^^ help: must be of the form: `#[track_caller]` + | ^^^^^^^^^^^^^^---^ + | | | + | | didn't expect any arguments here + | help: must be of the form: `#[track_caller]` error: aborting due to 1 previous error +For more information about this error, try `rustc --explain E0565`. diff --git a/tests/ui/rfcs/rfc-2091-track-caller/error-with-naked.stderr b/tests/ui/rfcs/rfc-2091-track-caller/error-with-naked.stderr index d3cafbc63508..303608061388 100644 --- a/tests/ui/rfcs/rfc-2091-track-caller/error-with-naked.stderr +++ b/tests/ui/rfcs/rfc-2091-track-caller/error-with-naked.stderr @@ -1,17 +1,17 @@ error[E0736]: attribute incompatible with `#[unsafe(naked)]` - --> $DIR/error-with-naked.rs:5:1 + --> $DIR/error-with-naked.rs:5:3 | LL | #[track_caller] - | ^^^^^^^^^^^^^^^ the `track_caller` attribute is incompatible with `#[unsafe(naked)]` + | ^^^^^^^^^^^^ the `track_caller` attribute is incompatible with `#[unsafe(naked)]` LL | LL | #[unsafe(naked)] | ---------------- function marked with `#[unsafe(naked)]` here error[E0736]: attribute incompatible with `#[unsafe(naked)]` - --> $DIR/error-with-naked.rs:17:5 + --> $DIR/error-with-naked.rs:17:7 | LL | #[track_caller] - | ^^^^^^^^^^^^^^^ the `track_caller` attribute is incompatible with `#[unsafe(naked)]` + | ^^^^^^^^^^^^ the `track_caller` attribute is incompatible with `#[unsafe(naked)]` LL | LL | #[unsafe(naked)] | ---------------- function marked with `#[unsafe(naked)]` here diff --git a/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-ref-impl.rs b/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-ref-impl.rs new file mode 100644 index 000000000000..c6e38c0758d7 --- /dev/null +++ b/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-ref-impl.rs @@ -0,0 +1,17 @@ +/// Check that only `&X: Debug` is required, not `X: Debug` +//@check-pass + +use std::fmt::Debug; +use std::fmt::Formatter; + +struct X; + +impl Debug for &X { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + f.write_str("X") + } +} + +fn main() { + dbg!(X); +} diff --git a/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-requires-debug.rs b/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-requires-debug.rs index f2fb62d76f3d..fe71f106fdf9 100644 --- a/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-requires-debug.rs +++ b/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-requires-debug.rs @@ -1,4 +1,7 @@ // Test ensuring that `dbg!(expr)` requires the passed type to implement `Debug`. +// +// `dbg!` shouldn't tell the user about format literal syntax; the user didn't write one. +//@ forbid-output: cannot be formatted using struct NotDebug; diff --git a/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-requires-debug.stderr b/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-requires-debug.stderr index 7ec018a95cc7..4e0ae9184150 100644 --- a/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-requires-debug.stderr +++ b/tests/ui/rfcs/rfc-2361-dbg-macro/dbg-macro-requires-debug.stderr @@ -1,12 +1,11 @@ error[E0277]: `NotDebug` doesn't implement `Debug` - --> $DIR/dbg-macro-requires-debug.rs:6:23 + --> $DIR/dbg-macro-requires-debug.rs:9:23 | LL | let _: NotDebug = dbg!(NotDebug); - | ^^^^^^^^^^^^^^ `NotDebug` cannot be formatted using `{:?}` + | ^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `NotDebug` | - = help: the trait `Debug` is not implemented for `NotDebug` = note: add `#[derive(Debug)]` to `NotDebug` or manually `impl Debug for NotDebug` - = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `dbg` (in Nightly builds, run with -Z macro-backtrace for more info) + = note: this error originates in the macro `dbg` (in Nightly builds, run with -Z macro-backtrace for more info) help: consider annotating `NotDebug` with `#[derive(Debug)]` | LL + #[derive(Debug)] diff --git a/tests/ui/rustdoc/renamed-features-rustdoc_internals.rs b/tests/ui/rustdoc/renamed-features-rustdoc_internals.rs index 2257130280dc..739c624d0c6f 100644 --- a/tests/ui/rustdoc/renamed-features-rustdoc_internals.rs +++ b/tests/ui/rustdoc/renamed-features-rustdoc_internals.rs @@ -1,5 +1,3 @@ -//@ normalize-stderr: "you are using [0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9]+)?( \([^)]*\))?" -> "you are using $$RUSTC_VERSION" - #![feature(doc_keyword)] //~ ERROR #![feature(doc_primitive)] //~ ERROR #![crate_type = "lib"] diff --git a/tests/ui/rustdoc/renamed-features-rustdoc_internals.stderr b/tests/ui/rustdoc/renamed-features-rustdoc_internals.stderr index 9c664da8ee6c..0608a8b58a21 100644 --- a/tests/ui/rustdoc/renamed-features-rustdoc_internals.stderr +++ b/tests/ui/rustdoc/renamed-features-rustdoc_internals.stderr @@ -1,19 +1,19 @@ error[E0557]: feature has been removed - --> $DIR/renamed-features-rustdoc_internals.rs:3:12 + --> $DIR/renamed-features-rustdoc_internals.rs:1:12 | LL | #![feature(doc_keyword)] | ^^^^^^^^^^^ feature has been removed | - = note: removed in 1.58.0 (you are using $RUSTC_VERSION); see for more information + = note: removed in 1.58.0; see for more information = note: merged into `#![feature(rustdoc_internals)]` error[E0557]: feature has been removed - --> $DIR/renamed-features-rustdoc_internals.rs:4:12 + --> $DIR/renamed-features-rustdoc_internals.rs:2:12 | LL | #![feature(doc_primitive)] | ^^^^^^^^^^^^^ feature has been removed | - = note: removed in 1.58.0 (you are using $RUSTC_VERSION); see for more information + = note: removed in 1.58.0; see for more information = note: merged into `#![feature(rustdoc_internals)]` error: aborting due to 2 previous errors diff --git a/tests/ui/sanitizer/cfi/invalid-attr-encoding.rs b/tests/ui/sanitizer/cfi/invalid-attr-encoding.rs index 7ef6bd2f0acc..23ffabad62fe 100644 --- a/tests/ui/sanitizer/cfi/invalid-attr-encoding.rs +++ b/tests/ui/sanitizer/cfi/invalid-attr-encoding.rs @@ -7,5 +7,5 @@ #![no_core] #![no_main] -#[cfi_encoding] //~ERROR 10:1: 10:16: malformed `cfi_encoding` attribute input +#[cfi_encoding] //~ ERROR malformed `cfi_encoding` attribute input pub struct Type1(i32); diff --git a/tests/ui/self/self-infer.stderr b/tests/ui/self/self-infer.stderr index c6bdff22b697..f9db559390f5 100644 --- a/tests/ui/self/self-infer.stderr +++ b/tests/ui/self/self-infer.stderr @@ -3,24 +3,12 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures | LL | fn f(self: _) {} | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn f(self: _) {} -LL + fn f(self: T) {} - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions --> $DIR/self-infer.rs:5:17 | LL | fn g(self: &_) {} | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn g(self: &_) {} -LL + fn g(self: &T) {} - | error: aborting due to 2 previous errors diff --git a/tests/ui/sized-hierarchy/reject-dyn-pointeesized.rs b/tests/ui/sized-hierarchy/reject-dyn-pointeesized.rs new file mode 100644 index 000000000000..ece1702679d0 --- /dev/null +++ b/tests/ui/sized-hierarchy/reject-dyn-pointeesized.rs @@ -0,0 +1,16 @@ +#![feature(sized_hierarchy)] + +use std::marker::PointeeSized; + +type Foo = dyn PointeeSized; +//~^ ERROR `PointeeSized` cannot be used with trait objects + +fn foo(f: &Foo) {} + +fn main() { + foo(&()); + + let x = main; + let y: Box = x; +//~^ ERROR `PointeeSized` cannot be used with trait objects +} diff --git a/tests/ui/sized-hierarchy/reject-dyn-pointeesized.stderr b/tests/ui/sized-hierarchy/reject-dyn-pointeesized.stderr new file mode 100644 index 000000000000..a833c6952fdc --- /dev/null +++ b/tests/ui/sized-hierarchy/reject-dyn-pointeesized.stderr @@ -0,0 +1,14 @@ +error: `PointeeSized` cannot be used with trait objects + --> $DIR/reject-dyn-pointeesized.rs:5:12 + | +LL | type Foo = dyn PointeeSized; + | ^^^^^^^^^^^^^^^^ + +error: `PointeeSized` cannot be used with trait objects + --> $DIR/reject-dyn-pointeesized.rs:14:16 + | +LL | let y: Box = x; + | ^^^^^^^^^^^^^^^^ + +error: aborting due to 2 previous errors + diff --git a/tests/ui/span/issue-71363.stderr b/tests/ui/span/issue-71363.stderr index 90b623e89cff..31069914daac 100644 --- a/tests/ui/span/issue-71363.stderr +++ b/tests/ui/span/issue-71363.stderr @@ -2,10 +2,8 @@ error[E0277]: `MyError` doesn't implement `std::fmt::Display` --> $DIR/issue-71363.rs:4:28 | 4 | impl std::error::Error for MyError {} - | ^^^^^^^ `MyError` cannot be formatted with the default formatter + | ^^^^^^^ the trait `std::fmt::Display` is not implemented for `MyError` | - = help: the trait `std::fmt::Display` is not implemented for `MyError` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `std::error::Error` --> $SRC_DIR/core/src/error.rs:LL:COL @@ -13,9 +11,8 @@ error[E0277]: `MyError` doesn't implement `Debug` --> $DIR/issue-71363.rs:4:28 | 4 | impl std::error::Error for MyError {} - | ^^^^^^^ `MyError` cannot be formatted using `{:?}` + | ^^^^^^^ the trait `Debug` is not implemented for `MyError` | - = help: the trait `Debug` is not implemented for `MyError` = note: add `#[derive(Debug)]` to `MyError` or manually `impl Debug for MyError` note: required by a bound in `std::error::Error` --> $SRC_DIR/core/src/error.rs:LL:COL diff --git a/tests/ui/specialization/const_trait_impl.rs b/tests/ui/specialization/const_trait_impl.rs index d842601a6b7b..2df92dfad3be 100644 --- a/tests/ui/specialization/const_trait_impl.rs +++ b/tests/ui/specialization/const_trait_impl.rs @@ -10,7 +10,7 @@ pub unsafe trait Sup { #[rustc_specialization_trait] #[const_trait] -pub unsafe trait Sub: ~const Sup {} +pub unsafe trait Sub: [const] Sup {} unsafe impl const Sup for u8 { default fn foo() -> u32 { @@ -31,19 +31,19 @@ pub trait A { fn a() -> u32; } -impl const A for T { +impl const A for T { default fn a() -> u32 { 2 } } -impl const A for T { +impl const A for T { default fn a() -> u32 { 3 } } -impl const A for T { +impl const A for T { fn a() -> u32 { T::foo() } diff --git a/tests/ui/specialization/const_trait_impl.stderr b/tests/ui/specialization/const_trait_impl.stderr index 3e1260ff09c9..d36a0a1c2dc8 100644 --- a/tests/ui/specialization/const_trait_impl.stderr +++ b/tests/ui/specialization/const_trait_impl.stderr @@ -1,57 +1,57 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const_trait_impl.rs:34:9 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const_trait_impl.rs:34:7 | -LL | impl const A for T { - | ^^^^^^ can't be applied to `Default` +LL | impl const A for T { + | ^^^^^^^^^ can't be applied to `Default` | -note: `Default` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Default` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/default.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const_trait_impl.rs:40:9 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const_trait_impl.rs:40:7 | -LL | impl const A for T { - | ^^^^^^ can't be applied to `Default` +LL | impl const A for T { + | ^^^^^^^^^ can't be applied to `Default` | -note: `Default` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Default` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/default.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const_trait_impl.rs:46:9 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const_trait_impl.rs:46:7 | -LL | impl const A for T { - | ^^^^^^ can't be applied to `Default` +LL | impl const A for T { + | ^^^^^^^^^ can't be applied to `Default` | -note: `Default` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Default` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/default.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const_trait_impl.rs:40:9 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const_trait_impl.rs:40:7 | -LL | impl const A for T { - | ^^^^^^ can't be applied to `Default` +LL | impl const A for T { + | ^^^^^^^^^ can't be applied to `Default` | -note: `Default` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Default` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/default.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const_trait_impl.rs:34:9 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const_trait_impl.rs:34:7 | -LL | impl const A for T { - | ^^^^^^ can't be applied to `Default` +LL | impl const A for T { + | ^^^^^^^^^ can't be applied to `Default` | -note: `Default` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Default` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/default.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const_trait_impl.rs:46:9 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const_trait_impl.rs:46:7 | -LL | impl const A for T { - | ^^^^^^ can't be applied to `Default` +LL | impl const A for T { + | ^^^^^^^^^ can't be applied to `Default` | -note: `Default` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Default` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/default.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` diff --git a/tests/ui/statics/check-immutable-mut-slices.rs b/tests/ui/statics/check-immutable-mut-slices.rs index 8f9680778aa0..19545a1c9255 100644 --- a/tests/ui/statics/check-immutable-mut-slices.rs +++ b/tests/ui/statics/check-immutable-mut-slices.rs @@ -1,6 +1,6 @@ // Checks that immutable static items can't have mutable slices static TEST: &'static mut [isize] = &mut []; -//~^ ERROR mutable references are not allowed +//~^ ERROR mutable borrows of temporaries pub fn main() { } diff --git a/tests/ui/statics/check-immutable-mut-slices.stderr b/tests/ui/statics/check-immutable-mut-slices.stderr index 5cb35a7c21eb..a9486fc9d781 100644 --- a/tests/ui/statics/check-immutable-mut-slices.stderr +++ b/tests/ui/statics/check-immutable-mut-slices.stderr @@ -1,8 +1,12 @@ -error[E0764]: mutable references are not allowed in the final value of statics +error[E0764]: mutable borrows of temporaries that have their lifetime extended until the end of the program are not allowed --> $DIR/check-immutable-mut-slices.rs:3:37 | LL | static TEST: &'static mut [isize] = &mut []; - | ^^^^^^^ + | ^^^^^^^ this mutable borrow refers to such a temporary + | + = note: Temporaries in constants and statics can have their lifetime extended until the end of the program + = note: To avoid accidentally creating global mutable state, such temporaries must be immutable + = help: If you really want global mutable state, try replacing the temporary by an interior mutable `static` or a `static mut` error: aborting due to 1 previous error diff --git a/tests/ui/statics/missing_lifetime.stderr b/tests/ui/statics/missing_lifetime.stderr index e23b27f7a6a6..102670c36428 100644 --- a/tests/ui/statics/missing_lifetime.stderr +++ b/tests/ui/statics/missing_lifetime.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'reborrow` --> $DIR/missing_lifetime.rs:4:15 | LL | struct Slice(&'reborrow [&'static [u8]]); - | - ^^^^^^^^^ undeclared lifetime - | | - | help: consider introducing lifetime `'reborrow` here: `<'reborrow>` + | ^^^^^^^^^ undeclared lifetime + | +help: consider introducing lifetime `'reborrow` here + | +LL | struct Slice<'reborrow>(&'reborrow [&'static [u8]]); + | +++++++++++ error: aborting due to 1 previous error diff --git a/tests/ui/statics/read_before_init.rs b/tests/ui/statics/read_before_init.rs new file mode 100644 index 000000000000..32cc2554e1a1 --- /dev/null +++ b/tests/ui/statics/read_before_init.rs @@ -0,0 +1,24 @@ +//! This test checks the one code path that does not go through +//! the regular CTFE memory access (as an optimization). We forgot +//! to duplicate the static item self-initialization check, allowing +//! reading from the uninitialized static memory before it was +//! initialized at the end of the static initializer. +//! +//! https://github.com/rust-lang/rust/issues/142532 + +use std::mem::MaybeUninit; + +pub static X: (i32, MaybeUninit) = (1, foo(&X.0, 1)); +//~^ ERROR: encountered static that tried to access itself during initialization +pub static Y: (i32, MaybeUninit) = (1, foo(&Y.0, 0)); +//~^ ERROR: encountered static that tried to access itself during initialization + +const fn foo(x: &i32, num: usize) -> MaybeUninit { + let mut temp = MaybeUninit::::uninit(); + unsafe { + std::ptr::copy(x, temp.as_mut_ptr(), num); + } + temp +} + +fn main() {} diff --git a/tests/ui/statics/read_before_init.stderr b/tests/ui/statics/read_before_init.stderr new file mode 100644 index 000000000000..239568c12050 --- /dev/null +++ b/tests/ui/statics/read_before_init.stderr @@ -0,0 +1,31 @@ +error[E0080]: encountered static that tried to access itself during initialization + --> $DIR/read_before_init.rs:11:45 + | +LL | pub static X: (i32, MaybeUninit) = (1, foo(&X.0, 1)); + | ^^^^^^^^^^^^ evaluation of `X` failed inside this call + | +note: inside `foo` + --> $DIR/read_before_init.rs:19:9 + | +LL | std::ptr::copy(x, temp.as_mut_ptr(), num); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +note: inside `std::ptr::copy::` + --> $SRC_DIR/core/src/ptr/mod.rs:LL:COL + +error[E0080]: encountered static that tried to access itself during initialization + --> $DIR/read_before_init.rs:13:45 + | +LL | pub static Y: (i32, MaybeUninit) = (1, foo(&Y.0, 0)); + | ^^^^^^^^^^^^ evaluation of `Y` failed inside this call + | +note: inside `foo` + --> $DIR/read_before_init.rs:19:9 + | +LL | std::ptr::copy(x, temp.as_mut_ptr(), num); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +note: inside `std::ptr::copy::` + --> $SRC_DIR/core/src/ptr/mod.rs:LL:COL + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0080`. diff --git a/tests/ui/stats/input-stats.rs b/tests/ui/stats/input-stats.rs index e760e2894e31..4e8e25eb7367 100644 --- a/tests/ui/stats/input-stats.rs +++ b/tests/ui/stats/input-stats.rs @@ -1,6 +1,7 @@ //@ check-pass //@ compile-flags: -Zinput-stats //@ only-64bit +//@ needs-asm-support // layout randomization affects the hir stat output //@ needs-deterministic-layouts // @@ -49,5 +50,7 @@ fn main() { _ => {} } - unsafe { asm!("mov rdi, 1"); } + // NOTE(workingjubilee): do GPUs support NOPs? remove this cfg if they do + #[cfg(not(any(target_arch = "nvptx64", target_arch = "amdgpu")))] + unsafe { asm!("nop"); } } diff --git a/tests/ui/stats/input-stats.stderr b/tests/ui/stats/input-stats.stderr index 88f91bef30b1..eb038bbcaf1a 100644 --- a/tests/ui/stats/input-stats.stderr +++ b/tests/ui/stats/input-stats.stderr @@ -1,52 +1,7 @@ -ast-stats POST EXPANSION AST STATS +ast-stats ================================================================ +ast-stats POST EXPANSION AST STATS: input_stats ast-stats Name Accumulated Size Count Item Size ast-stats ---------------------------------------------------------------- -ast-stats Crate 40 (NN.N%) 1 40 -ast-stats GenericArgs 40 (NN.N%) 1 40 -ast-stats - AngleBracketed 40 (NN.N%) 1 -ast-stats ExprField 48 (NN.N%) 1 48 -ast-stats WherePredicate 72 (NN.N%) 1 72 -ast-stats - BoundPredicate 72 (NN.N%) 1 -ast-stats ForeignItem 80 (NN.N%) 1 80 -ast-stats - Fn 80 (NN.N%) 1 -ast-stats Arm 96 (NN.N%) 2 48 -ast-stats Local 96 (NN.N%) 1 96 -ast-stats FnDecl 120 (NN.N%) 5 24 -ast-stats InlineAsm 120 (NN.N%) 1 120 -ast-stats Attribute 128 (NN.N%) 4 32 -ast-stats - DocComment 32 (NN.N%) 1 -ast-stats - Normal 96 (NN.N%) 3 -ast-stats Param 160 (NN.N%) 4 40 -ast-stats Stmt 160 (NN.N%) 5 32 -ast-stats - Let 32 (NN.N%) 1 -ast-stats - Semi 32 (NN.N%) 1 -ast-stats - Expr 96 (NN.N%) 3 -ast-stats Block 192 (NN.N%) 6 32 -ast-stats FieldDef 208 (NN.N%) 2 104 -ast-stats Variant 208 (NN.N%) 2 104 -ast-stats AssocItem 320 (NN.N%) 4 80 -ast-stats - Fn 160 (NN.N%) 2 -ast-stats - Type 160 (NN.N%) 2 -ast-stats GenericBound 352 (NN.N%) 4 88 -ast-stats - Trait 352 (NN.N%) 4 -ast-stats GenericParam 480 (NN.N%) 5 96 -ast-stats Pat 504 (NN.N%) 7 72 -ast-stats - Struct 72 (NN.N%) 1 -ast-stats - Wild 72 (NN.N%) 1 -ast-stats - Ident 360 (NN.N%) 5 -ast-stats Expr 648 (NN.N%) 9 72 -ast-stats - InlineAsm 72 (NN.N%) 1 -ast-stats - Match 72 (NN.N%) 1 -ast-stats - Path 72 (NN.N%) 1 -ast-stats - Struct 72 (NN.N%) 1 -ast-stats - Lit 144 (NN.N%) 2 -ast-stats - Block 216 (NN.N%) 3 -ast-stats PathSegment 864 (NN.N%) 36 24 -ast-stats Ty 896 (NN.N%) 14 64 -ast-stats - Ptr 64 (NN.N%) 1 -ast-stats - Ref 64 (NN.N%) 1 -ast-stats - ImplicitSelf 128 (NN.N%) 2 -ast-stats - Path 640 (NN.N%) 10 ast-stats Item 1_584 (NN.N%) 11 144 ast-stats - Enum 144 (NN.N%) 1 ast-stats - ExternCrate 144 (NN.N%) 1 @@ -55,57 +10,61 @@ ast-stats - Impl 144 (NN.N%) 1 ast-stats - Trait 144 (NN.N%) 1 ast-stats - Fn 288 (NN.N%) 2 ast-stats - Use 576 (NN.N%) 4 +ast-stats Ty 896 (NN.N%) 14 64 +ast-stats - Ptr 64 (NN.N%) 1 +ast-stats - Ref 64 (NN.N%) 1 +ast-stats - ImplicitSelf 128 (NN.N%) 2 +ast-stats - Path 640 (NN.N%) 10 +ast-stats PathSegment 888 (NN.N%) 37 24 +ast-stats Expr 648 (NN.N%) 9 72 +ast-stats - InlineAsm 72 (NN.N%) 1 +ast-stats - Match 72 (NN.N%) 1 +ast-stats - Path 72 (NN.N%) 1 +ast-stats - Struct 72 (NN.N%) 1 +ast-stats - Lit 144 (NN.N%) 2 +ast-stats - Block 216 (NN.N%) 3 +ast-stats Pat 504 (NN.N%) 7 72 +ast-stats - Struct 72 (NN.N%) 1 +ast-stats - Wild 72 (NN.N%) 1 +ast-stats - Ident 360 (NN.N%) 5 +ast-stats GenericParam 480 (NN.N%) 5 96 +ast-stats GenericBound 352 (NN.N%) 4 88 +ast-stats - Trait 352 (NN.N%) 4 +ast-stats AssocItem 320 (NN.N%) 4 80 +ast-stats - Fn 160 (NN.N%) 2 +ast-stats - Type 160 (NN.N%) 2 +ast-stats Variant 208 (NN.N%) 2 104 +ast-stats FieldDef 208 (NN.N%) 2 104 +ast-stats Block 192 (NN.N%) 6 32 +ast-stats Stmt 160 (NN.N%) 5 32 +ast-stats - Let 32 (NN.N%) 1 +ast-stats - Semi 32 (NN.N%) 1 +ast-stats - Expr 96 (NN.N%) 3 +ast-stats Param 160 (NN.N%) 4 40 +ast-stats Attribute 160 (NN.N%) 5 32 +ast-stats - DocComment 32 (NN.N%) 1 +ast-stats - Normal 128 (NN.N%) 4 +ast-stats InlineAsm 120 (NN.N%) 1 120 +ast-stats FnDecl 120 (NN.N%) 5 24 +ast-stats Local 96 (NN.N%) 1 96 +ast-stats Arm 96 (NN.N%) 2 48 +ast-stats ForeignItem 80 (NN.N%) 1 80 +ast-stats - Fn 80 (NN.N%) 1 +ast-stats WherePredicate 72 (NN.N%) 1 72 +ast-stats - BoundPredicate 72 (NN.N%) 1 +ast-stats ExprField 48 (NN.N%) 1 48 +ast-stats GenericArgs 40 (NN.N%) 1 40 +ast-stats - AngleBracketed 40 (NN.N%) 1 +ast-stats Crate 40 (NN.N%) 1 40 ast-stats ---------------------------------------------------------------- -ast-stats Total 7_416 127 -ast-stats -hir-stats HIR STATS +ast-stats Total 7_472 129 +ast-stats ================================================================ +hir-stats ================================================================ +hir-stats HIR STATS: input_stats hir-stats Name Accumulated Size Count Item Size hir-stats ---------------------------------------------------------------- -hir-stats ForeignItemRef 24 (NN.N%) 1 24 -hir-stats Lifetime 28 (NN.N%) 1 28 -hir-stats Mod 32 (NN.N%) 1 32 -hir-stats ExprField 40 (NN.N%) 1 40 -hir-stats TraitItemRef 56 (NN.N%) 2 28 -hir-stats GenericArg 64 (NN.N%) 4 16 -hir-stats - Type 16 (NN.N%) 1 -hir-stats - Lifetime 48 (NN.N%) 3 -hir-stats Param 64 (NN.N%) 2 32 -hir-stats Body 72 (NN.N%) 3 24 -hir-stats ImplItemRef 72 (NN.N%) 2 36 -hir-stats InlineAsm 72 (NN.N%) 1 72 -hir-stats Local 72 (NN.N%) 1 72 -hir-stats WherePredicate 72 (NN.N%) 3 24 -hir-stats - BoundPredicate 72 (NN.N%) 3 -hir-stats Arm 80 (NN.N%) 2 40 -hir-stats Stmt 96 (NN.N%) 3 32 -hir-stats - Expr 32 (NN.N%) 1 -hir-stats - Let 32 (NN.N%) 1 -hir-stats - Semi 32 (NN.N%) 1 -hir-stats FnDecl 120 (NN.N%) 3 40 -hir-stats FieldDef 128 (NN.N%) 2 64 -hir-stats GenericArgs 144 (NN.N%) 3 48 -hir-stats Variant 144 (NN.N%) 2 72 -hir-stats Attribute 160 (NN.N%) 4 40 -hir-stats GenericBound 256 (NN.N%) 4 64 -hir-stats - Trait 256 (NN.N%) 4 -hir-stats Block 288 (NN.N%) 6 48 -hir-stats Pat 360 (NN.N%) 5 72 -hir-stats - Struct 72 (NN.N%) 1 -hir-stats - Wild 72 (NN.N%) 1 -hir-stats - Binding 216 (NN.N%) 3 -hir-stats GenericParam 400 (NN.N%) 5 80 -hir-stats Generics 560 (NN.N%) 10 56 -hir-stats Ty 720 (NN.N%) 15 48 -hir-stats - Ptr 48 (NN.N%) 1 -hir-stats - Ref 48 (NN.N%) 1 -hir-stats - Path 624 (NN.N%) 13 -hir-stats Expr 768 (NN.N%) 12 64 -hir-stats - InlineAsm 64 (NN.N%) 1 -hir-stats - Match 64 (NN.N%) 1 -hir-stats - Path 64 (NN.N%) 1 -hir-stats - Struct 64 (NN.N%) 1 -hir-stats - Lit 128 (NN.N%) 2 -hir-stats - Block 384 (NN.N%) 6 +hir-stats PathSegment 1_776 (NN.N%) 37 48 +hir-stats Path 1_040 (NN.N%) 26 40 hir-stats Item 968 (NN.N%) 11 88 hir-stats - Enum 88 (NN.N%) 1 hir-stats - ExternCrate 88 (NN.N%) 1 @@ -114,8 +73,51 @@ hir-stats - Impl 88 (NN.N%) 1 hir-stats - Trait 88 (NN.N%) 1 hir-stats - Fn 176 (NN.N%) 2 hir-stats - Use 352 (NN.N%) 4 -hir-stats Path 1_040 (NN.N%) 26 40 -hir-stats PathSegment 1_776 (NN.N%) 37 48 +hir-stats Expr 768 (NN.N%) 12 64 +hir-stats - InlineAsm 64 (NN.N%) 1 +hir-stats - Match 64 (NN.N%) 1 +hir-stats - Path 64 (NN.N%) 1 +hir-stats - Struct 64 (NN.N%) 1 +hir-stats - Lit 128 (NN.N%) 2 +hir-stats - Block 384 (NN.N%) 6 +hir-stats Ty 720 (NN.N%) 15 48 +hir-stats - Ptr 48 (NN.N%) 1 +hir-stats - Ref 48 (NN.N%) 1 +hir-stats - Path 624 (NN.N%) 13 +hir-stats Generics 560 (NN.N%) 10 56 +hir-stats GenericParam 400 (NN.N%) 5 80 +hir-stats Pat 360 (NN.N%) 5 72 +hir-stats - Struct 72 (NN.N%) 1 +hir-stats - Wild 72 (NN.N%) 1 +hir-stats - Binding 216 (NN.N%) 3 +hir-stats Block 288 (NN.N%) 6 48 +hir-stats GenericBound 256 (NN.N%) 4 64 +hir-stats - Trait 256 (NN.N%) 4 +hir-stats Attribute 200 (NN.N%) 5 40 +hir-stats Variant 144 (NN.N%) 2 72 +hir-stats GenericArgs 144 (NN.N%) 3 48 +hir-stats FieldDef 128 (NN.N%) 2 64 +hir-stats FnDecl 120 (NN.N%) 3 40 +hir-stats Stmt 96 (NN.N%) 3 32 +hir-stats - Expr 32 (NN.N%) 1 +hir-stats - Let 32 (NN.N%) 1 +hir-stats - Semi 32 (NN.N%) 1 +hir-stats Arm 80 (NN.N%) 2 40 +hir-stats WherePredicate 72 (NN.N%) 3 24 +hir-stats - BoundPredicate 72 (NN.N%) 3 +hir-stats Local 72 (NN.N%) 1 72 +hir-stats InlineAsm 72 (NN.N%) 1 72 +hir-stats ImplItemRef 72 (NN.N%) 2 36 +hir-stats Body 72 (NN.N%) 3 24 +hir-stats Param 64 (NN.N%) 2 32 +hir-stats GenericArg 64 (NN.N%) 4 16 +hir-stats - Type 16 (NN.N%) 1 +hir-stats - Lifetime 48 (NN.N%) 3 +hir-stats TraitItemRef 56 (NN.N%) 2 28 +hir-stats ExprField 40 (NN.N%) 1 40 +hir-stats Mod 32 (NN.N%) 1 32 +hir-stats Lifetime 28 (NN.N%) 1 28 +hir-stats ForeignItemRef 24 (NN.N%) 1 24 hir-stats ---------------------------------------------------------------- -hir-stats Total 8_676 172 -hir-stats +hir-stats Total 8_716 173 +hir-stats ================================================================ diff --git a/tests/ui/stats/macro-stats.stderr b/tests/ui/stats/macro-stats.stderr index f87e34622b92..00c6b55c6a23 100644 --- a/tests/ui/stats/macro-stats.stderr +++ b/tests/ui/stats/macro-stats.stderr @@ -2,25 +2,25 @@ macro-stats ==================================================================== macro-stats MACRO EXPANSION STATS: macro_stats macro-stats Macro Name Uses Lines Avg Lines Bytes Avg Bytes macro-stats ----------------------------------------------------------------------------------- -macro-stats #[derive(Clone)] 8 56 7.0 1_660 207.5 -macro-stats #[derive(PartialOrd)] 1 16 16.0 654 654.0 -macro-stats #[derive(Hash)] 2 15 7.5 547 273.5 -macro-stats #[derive(Ord)] 1 14 14.0 489 489.0 -macro-stats q! 1 24 24.0 435 435.0 -macro-stats #[derive(Default)] 2 14 7.0 367 183.5 -macro-stats #[derive(Eq)] 1 10 10.0 312 312.0 -macro-stats #[derive(Debug)] 1 7 7.0 261 261.0 -macro-stats #[derive(PartialEq)] 1 8 8.0 247 247.0 -macro-stats #[derive(Copy)] 1 1 1.0 46 46.0 -macro-stats p! 1 2 2.0 28 28.0 -macro-stats trait_impl_tys! 1 1 1.0 11 11.0 -macro-stats foreign_item! 1 0 0.0 6 6.0 -macro-stats impl_const! 1 0 0.0 4 4.0 -macro-stats trait_tys! 1 1 1.0 3 3.0 -macro-stats u32! 1 0 0.0 -3 -3.0 -macro-stats none! 1 0 0.0 -3 -3.0 -macro-stats n99! 2 0 0.0 -8 -4.0 +macro-stats #[derive(Clone)] 8 64 8.0 1_788 223.5 +macro-stats #[derive(PartialOrd)] 1 17 17.0 675 675.0 +macro-stats #[derive(Hash)] 2 17 8.5 577 288.5 +macro-stats q! 1 26 26.0 519 519.0 +macro-stats #[derive(Ord)] 1 15 15.0 503 503.0 +macro-stats #[derive(Default)] 2 16 8.0 403 201.5 +macro-stats #[derive(Eq)] 1 11 11.0 325 325.0 +macro-stats #[derive(Debug)] 1 8 8.0 277 277.0 +macro-stats #[derive(PartialEq)] 1 9 9.0 267 267.0 +macro-stats #[derive(Copy)] 1 2 2.0 61 61.0 +macro-stats p! 1 3 3.0 32 32.0 +macro-stats trait_impl_tys! 1 2 2.0 28 28.0 +macro-stats foreign_item! 1 1 1.0 21 21.0 macro-stats this_is_a_really_really_long_macro_name! -macro-stats 1 0 0.0 -30 -30.0 -macro-stats #[test] 1 -6 -6.0 -158 -158.0 +macro-stats 1 1 1.0 18 18.0 +macro-stats impl_const! 1 1 1.0 17 17.0 +macro-stats trait_tys! 1 2 2.0 15 15.0 +macro-stats n99! 2 2 1.0 4 2.0 +macro-stats none! 1 1 1.0 4 4.0 +macro-stats u32! 1 1 1.0 3 3.0 +macro-stats #[test] 1 1 1.0 0 0.0 macro-stats =================================================================================== diff --git a/tests/ui/structs-enums/recover-enum-with-bad-where.rs b/tests/ui/structs-enums/recover-enum-with-bad-where.rs new file mode 100644 index 000000000000..cf7747d710b5 --- /dev/null +++ b/tests/ui/structs-enums/recover-enum-with-bad-where.rs @@ -0,0 +1,8 @@ +pub enum Foo +where: +//~^ ERROR unexpected colon after `where` + T: Missing, {} +//~^ ERROR cannot find trait `Missing` in this scope +// (evidence that we continue parsing after the erroneous colon) + +fn main() {} diff --git a/tests/ui/structs-enums/recover-enum-with-bad-where.stderr b/tests/ui/structs-enums/recover-enum-with-bad-where.stderr new file mode 100644 index 000000000000..30b73f59e8c0 --- /dev/null +++ b/tests/ui/structs-enums/recover-enum-with-bad-where.stderr @@ -0,0 +1,15 @@ +error: unexpected colon after `where` + --> $DIR/recover-enum-with-bad-where.rs:2:6 + | +LL | where: + | ^ help: remove the colon + +error[E0405]: cannot find trait `Missing` in this scope + --> $DIR/recover-enum-with-bad-where.rs:4:8 + | +LL | T: Missing, {} + | ^^^^^^^ not found in this scope + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0405`. diff --git a/tests/ui/suggestions/bad-infer-in-trait-impl.stderr b/tests/ui/suggestions/bad-infer-in-trait-impl.stderr index 68d8f5402e44..8b7d67ac0412 100644 --- a/tests/ui/suggestions/bad-infer-in-trait-impl.stderr +++ b/tests/ui/suggestions/bad-infer-in-trait-impl.stderr @@ -3,12 +3,6 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures | LL | fn bar(s: _) {} | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn bar(s: _) {} -LL + fn bar(s: T) {} - | error[E0050]: method `bar` has 1 parameter but the declaration in trait `Foo::bar` has 0 --> $DIR/bad-infer-in-trait-impl.rs:6:15 diff --git a/tests/ui/suggestions/bound-suggestions.stderr b/tests/ui/suggestions/bound-suggestions.stderr index f23e086afe4e..ec1d23fac458 100644 --- a/tests/ui/suggestions/bound-suggestions.stderr +++ b/tests/ui/suggestions/bound-suggestions.stderr @@ -2,7 +2,9 @@ error[E0277]: `impl Sized` doesn't implement `Debug` --> $DIR/bound-suggestions.rs:9:22 | LL | println!("{:?}", t); - | ^ `impl Sized` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ---- ^ `impl Sized` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) help: consider restricting opaque type `impl Sized` with trait `Debug` @@ -14,7 +16,9 @@ error[E0277]: `T` doesn't implement `Debug` --> $DIR/bound-suggestions.rs:15:22 | LL | println!("{:?}", t); - | ^ `T` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ---- ^ `T` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) help: consider restricting type parameter `T` with trait `Debug` @@ -26,7 +30,9 @@ error[E0277]: `T` doesn't implement `Debug` --> $DIR/bound-suggestions.rs:21:22 | LL | println!("{:?}", t); - | ^ `T` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ---- ^ `T` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) help: consider further restricting type parameter `T` with trait `Debug` @@ -38,7 +44,9 @@ error[E0277]: `Y` doesn't implement `Debug` --> $DIR/bound-suggestions.rs:27:30 | LL | println!("{:?} {:?}", x, y); - | ^ `Y` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ---- ^ `Y` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) help: consider further restricting type parameter `Y` with trait `Debug` @@ -50,7 +58,9 @@ error[E0277]: `X` doesn't implement `Debug` --> $DIR/bound-suggestions.rs:33:22 | LL | println!("{:?}", x); - | ^ `X` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ---- ^ `X` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) help: consider further restricting type parameter `X` with trait `Debug` @@ -62,7 +72,9 @@ error[E0277]: `X` doesn't implement `Debug` --> $DIR/bound-suggestions.rs:39:22 | LL | println!("{:?}", x); - | ^ `X` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ---- ^ `X` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) help: consider further restricting type parameter `X` with trait `Debug` diff --git a/tests/ui/suggestions/derive-macro-missing-bounds.stderr b/tests/ui/suggestions/derive-macro-missing-bounds.stderr index 68c8204d1e18..b28f39ced542 100644 --- a/tests/ui/suggestions/derive-macro-missing-bounds.stderr +++ b/tests/ui/suggestions/derive-macro-missing-bounds.stderr @@ -4,9 +4,8 @@ error[E0277]: `a::Inner` doesn't implement `Debug` LL | #[derive(Debug)] | ----- in this derive macro expansion LL | struct Outer(Inner); - | ^^^^^^^^ `a::Inner` cannot be formatted using `{:?}` + | ^^^^^^^^ the trait `Debug` is not implemented for `a::Inner` | - = help: the trait `Debug` is not implemented for `a::Inner` = note: add `#[derive(Debug)]` to `a::Inner` or manually `impl Debug for a::Inner` help: consider annotating `a::Inner` with `#[derive(Debug)]` | diff --git a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs index 10b4781eb049..97a0e005f86a 100644 --- a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs +++ b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs @@ -4,19 +4,16 @@ trait A: Sized { fn f(a: A) -> A; //~^ ERROR expected a type, found a trait //~| ERROR expected a type, found a trait - //~| ERROR associated item referring to unboxed trait object for its own trait } trait B { fn f(b: B) -> B; //~^ ERROR expected a type, found a trait //~| ERROR expected a type, found a trait - //~| ERROR associated item referring to unboxed trait object for its own trait } trait C { fn f(&self, c: C) -> C; //~^ ERROR expected a type, found a trait //~| ERROR expected a type, found a trait - //~| ERROR associated item referring to unboxed trait object for its own trait } fn main() {} diff --git a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021-without-dyn.stderr b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021-without-dyn.stderr index e189012d15c9..c4dab4691f49 100644 --- a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021-without-dyn.stderr +++ b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021-without-dyn.stderr @@ -26,22 +26,8 @@ help: `A` is dyn-incompatible, use `impl A` to return an opaque type, as long as LL | fn f(a: A) -> impl A; | ++++ -error: associated item referring to unboxed trait object for its own trait - --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:4:13 - | -LL | trait A: Sized { - | - in this trait -LL | fn f(a: A) -> A; - | ^ ^ - | -help: you might have meant to use `Self` to refer to the implementing type - | -LL - fn f(a: A) -> A; -LL + fn f(a: Self) -> Self; - | - error[E0782]: expected a type, found a trait - --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:10:13 + --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:9:13 | LL | fn f(b: B) -> B; | ^ @@ -58,7 +44,7 @@ LL | fn f(b: impl B) -> B; | ++++ error[E0782]: expected a type, found a trait - --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:10:19 + --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:9:19 | LL | fn f(b: B) -> B; | ^ @@ -68,22 +54,8 @@ help: `B` is dyn-incompatible, use `impl B` to return an opaque type, as long as LL | fn f(b: B) -> impl B; | ++++ -error: associated item referring to unboxed trait object for its own trait - --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:10:13 - | -LL | trait B { - | - in this trait -LL | fn f(b: B) -> B; - | ^ ^ - | -help: you might have meant to use `Self` to refer to the implementing type - | -LL - fn f(b: B) -> B; -LL + fn f(b: Self) -> Self; - | - error[E0782]: expected a type, found a trait - --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:16:20 + --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:14:20 | LL | fn f(&self, c: C) -> C; | ^ @@ -100,7 +72,7 @@ LL | fn f(&self, c: impl C) -> C; | ++++ error[E0782]: expected a type, found a trait - --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:16:26 + --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:14:26 | LL | fn f(&self, c: C) -> C; | ^ @@ -110,20 +82,6 @@ help: `C` is dyn-incompatible, use `impl C` to return an opaque type, as long as LL | fn f(&self, c: C) -> impl C; | ++++ -error: associated item referring to unboxed trait object for its own trait - --> $DIR/dyn-incompatible-trait-should-use-self-2021-without-dyn.rs:16:20 - | -LL | trait C { - | - in this trait -LL | fn f(&self, c: C) -> C; - | ^ ^ - | -help: you might have meant to use `Self` to refer to the implementing type - | -LL - fn f(&self, c: C) -> C; -LL + fn f(&self, c: Self) -> Self; - | - -error: aborting due to 9 previous errors +error: aborting due to 6 previous errors For more information about this error, try `rustc --explain E0782`. diff --git a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021.rs b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021.rs index 747926c400ae..a798b1bd5787 100644 --- a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021.rs +++ b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021.rs @@ -2,13 +2,11 @@ #![allow(bare_trait_objects)] trait A: Sized { fn f(a: dyn A) -> dyn A; - //~^ ERROR associated item referring to unboxed trait object for its own trait - //~| ERROR the trait `A` is not dyn compatible + //~^ ERROR the trait `A` is not dyn compatible } trait B { fn f(a: dyn B) -> dyn B; - //~^ ERROR associated item referring to unboxed trait object for its own trait - //~| ERROR the trait `B` is not dyn compatible + //~^ ERROR the trait `B` is not dyn compatible } trait C { fn f(&self, a: dyn C) -> dyn C; diff --git a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021.stderr b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021.stderr index 2e3919db1b75..4ccf65b68bf7 100644 --- a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021.stderr +++ b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self-2021.stderr @@ -1,17 +1,3 @@ -error: associated item referring to unboxed trait object for its own trait - --> $DIR/dyn-incompatible-trait-should-use-self-2021.rs:4:13 - | -LL | trait A: Sized { - | - in this trait -LL | fn f(a: dyn A) -> dyn A; - | ^^^^^ ^^^^^ - | -help: you might have meant to use `Self` to refer to the implementing type - | -LL - fn f(a: dyn A) -> dyn A; -LL + fn f(a: Self) -> Self; - | - error[E0038]: the trait `A` is not dyn compatible --> $DIR/dyn-incompatible-trait-should-use-self-2021.rs:4:13 | @@ -26,30 +12,21 @@ LL | trait A: Sized { | - ^^^^^ ...because it requires `Self: Sized` | | | this trait is not dyn compatible... - -error: associated item referring to unboxed trait object for its own trait - --> $DIR/dyn-incompatible-trait-should-use-self-2021.rs:9:13 - | -LL | trait B { - | - in this trait -LL | fn f(a: dyn B) -> dyn B; - | ^^^^^ ^^^^^ - | help: you might have meant to use `Self` to refer to the implementing type | -LL - fn f(a: dyn B) -> dyn B; -LL + fn f(a: Self) -> Self; +LL - fn f(a: dyn A) -> dyn A; +LL + fn f(a: Self) -> dyn A; | error[E0038]: the trait `B` is not dyn compatible - --> $DIR/dyn-incompatible-trait-should-use-self-2021.rs:9:13 + --> $DIR/dyn-incompatible-trait-should-use-self-2021.rs:8:13 | LL | fn f(a: dyn B) -> dyn B; | ^^^^^ `B` is not dyn compatible | note: for a trait to be dyn compatible it needs to allow building a vtable for more information, visit - --> $DIR/dyn-incompatible-trait-should-use-self-2021.rs:9:8 + --> $DIR/dyn-incompatible-trait-should-use-self-2021.rs:8:8 | LL | trait B { | - this trait is not dyn compatible... @@ -63,7 +40,12 @@ help: alternatively, consider constraining `f` so it does not apply to trait obj | LL | fn f(a: dyn B) -> dyn B where Self: Sized; | +++++++++++++++++ +help: you might have meant to use `Self` to refer to the implementing type + | +LL - fn f(a: dyn B) -> dyn B; +LL + fn f(a: Self) -> dyn B; + | -error: aborting due to 4 previous errors +error: aborting due to 2 previous errors For more information about this error, try `rustc --explain E0038`. diff --git a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self.rs b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self.rs index 63fe5ebaea49..d8e9d381dbda 100644 --- a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self.rs +++ b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self.rs @@ -1,12 +1,10 @@ trait A: Sized { fn f(a: dyn A) -> dyn A; - //~^ ERROR associated item referring to unboxed trait object for its own trait - //~| ERROR the trait `A` is not dyn compatible + //~^ ERROR the trait `A` is not dyn compatible } trait B { fn f(a: dyn B) -> dyn B; - //~^ ERROR associated item referring to unboxed trait object for its own trait - //~| ERROR the trait `B` is not dyn compatible + //~^ ERROR the trait `B` is not dyn compatible } trait C { fn f(&self, a: dyn C) -> dyn C; diff --git a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self.stderr b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self.stderr index e8384afed7a1..bda1d01e23ff 100644 --- a/tests/ui/suggestions/dyn-incompatible-trait-should-use-self.stderr +++ b/tests/ui/suggestions/dyn-incompatible-trait-should-use-self.stderr @@ -1,17 +1,3 @@ -error: associated item referring to unboxed trait object for its own trait - --> $DIR/dyn-incompatible-trait-should-use-self.rs:2:13 - | -LL | trait A: Sized { - | - in this trait -LL | fn f(a: dyn A) -> dyn A; - | ^^^^^ ^^^^^ - | -help: you might have meant to use `Self` to refer to the implementing type - | -LL - fn f(a: dyn A) -> dyn A; -LL + fn f(a: Self) -> Self; - | - error[E0038]: the trait `A` is not dyn compatible --> $DIR/dyn-incompatible-trait-should-use-self.rs:2:13 | @@ -26,30 +12,21 @@ LL | trait A: Sized { | - ^^^^^ ...because it requires `Self: Sized` | | | this trait is not dyn compatible... - -error: associated item referring to unboxed trait object for its own trait - --> $DIR/dyn-incompatible-trait-should-use-self.rs:7:13 - | -LL | trait B { - | - in this trait -LL | fn f(a: dyn B) -> dyn B; - | ^^^^^ ^^^^^ - | help: you might have meant to use `Self` to refer to the implementing type | -LL - fn f(a: dyn B) -> dyn B; -LL + fn f(a: Self) -> Self; +LL - fn f(a: dyn A) -> dyn A; +LL + fn f(a: Self) -> dyn A; | error[E0038]: the trait `B` is not dyn compatible - --> $DIR/dyn-incompatible-trait-should-use-self.rs:7:13 + --> $DIR/dyn-incompatible-trait-should-use-self.rs:6:13 | LL | fn f(a: dyn B) -> dyn B; | ^^^^^ `B` is not dyn compatible | note: for a trait to be dyn compatible it needs to allow building a vtable for more information, visit - --> $DIR/dyn-incompatible-trait-should-use-self.rs:7:8 + --> $DIR/dyn-incompatible-trait-should-use-self.rs:6:8 | LL | trait B { | - this trait is not dyn compatible... @@ -63,7 +40,12 @@ help: alternatively, consider constraining `f` so it does not apply to trait obj | LL | fn f(a: dyn B) -> dyn B where Self: Sized; | +++++++++++++++++ +help: you might have meant to use `Self` to refer to the implementing type + | +LL - fn f(a: dyn B) -> dyn B; +LL + fn f(a: Self) -> dyn B; + | -error: aborting due to 4 previous errors +error: aborting due to 2 previous errors For more information about this error, try `rustc --explain E0038`. diff --git a/tests/ui/suggestions/impl-trait-with-missing-bounds.stderr b/tests/ui/suggestions/impl-trait-with-missing-bounds.stderr index d0ce7c9ed4e8..b3f1865dd309 100644 --- a/tests/ui/suggestions/impl-trait-with-missing-bounds.stderr +++ b/tests/ui/suggestions/impl-trait-with-missing-bounds.stderr @@ -2,11 +2,10 @@ error[E0277]: `::Item` doesn't implement `Debug` --> $DIR/impl-trait-with-missing-bounds.rs:6:13 | LL | qux(constraint); - | --- ^^^^^^^^^^ `::Item` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | --- ^^^^^^^^^^ the trait `Debug` is not implemented for `::Item` | | | required by a bound introduced by this call | - = help: the trait `Debug` is not implemented for `::Item` note: required by a bound in `qux` --> $DIR/impl-trait-with-missing-bounds.rs:50:16 | @@ -22,11 +21,10 @@ error[E0277]: `::Item` doesn't implement `Debug` --> $DIR/impl-trait-with-missing-bounds.rs:14:13 | LL | qux(constraint); - | --- ^^^^^^^^^^ `::Item` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | --- ^^^^^^^^^^ the trait `Debug` is not implemented for `::Item` | | | required by a bound introduced by this call | - = help: the trait `Debug` is not implemented for `::Item` note: required by a bound in `qux` --> $DIR/impl-trait-with-missing-bounds.rs:50:16 | @@ -42,11 +40,10 @@ error[E0277]: `::Item` doesn't implement `Debug` --> $DIR/impl-trait-with-missing-bounds.rs:22:13 | LL | qux(constraint); - | --- ^^^^^^^^^^ `::Item` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | --- ^^^^^^^^^^ the trait `Debug` is not implemented for `::Item` | | | required by a bound introduced by this call | - = help: the trait `Debug` is not implemented for `::Item` note: required by a bound in `qux` --> $DIR/impl-trait-with-missing-bounds.rs:50:16 | @@ -62,11 +59,10 @@ error[E0277]: `::Item` doesn't implement `Debug` --> $DIR/impl-trait-with-missing-bounds.rs:30:13 | LL | qux(constraint); - | --- ^^^^^^^^^^ `::Item` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | --- ^^^^^^^^^^ the trait `Debug` is not implemented for `::Item` | | | required by a bound introduced by this call | - = help: the trait `Debug` is not implemented for `::Item` note: required by a bound in `qux` --> $DIR/impl-trait-with-missing-bounds.rs:50:16 | @@ -82,11 +78,10 @@ error[E0277]: `::Item` doesn't impl --> $DIR/impl-trait-with-missing-bounds.rs:37:13 | LL | qux(constraint); - | --- ^^^^^^^^^^ `::Item` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | --- ^^^^^^^^^^ the trait `Debug` is not implemented for `::Item` | | | required by a bound introduced by this call | - = help: the trait `Debug` is not implemented for `::Item` note: required by a bound in `qux` --> $DIR/impl-trait-with-missing-bounds.rs:50:16 | @@ -102,11 +97,10 @@ error[E0277]: `::Item` doesn't implement `Debug` --> $DIR/impl-trait-with-missing-bounds.rs:45:13 | LL | qux(constraint); - | --- ^^^^^^^^^^ `::Item` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | --- ^^^^^^^^^^ the trait `Debug` is not implemented for `::Item` | | | required by a bound introduced by this call | - = help: the trait `Debug` is not implemented for `::Item` note: required by a bound in `qux` --> $DIR/impl-trait-with-missing-bounds.rs:50:16 | diff --git a/tests/ui/suggestions/issue-105645.rs b/tests/ui/suggestions/issue-105645.rs index 681ce1c6e37a..f3ca8ccbb3c7 100644 --- a/tests/ui/suggestions/issue-105645.rs +++ b/tests/ui/suggestions/issue-105645.rs @@ -2,7 +2,7 @@ fn main() { let mut buf = [0u8; 50]; let mut bref = buf.as_slice(); foo(&mut bref); - //~^ ERROR 4:9: 4:18: the trait bound `&[u8]: std::io::Write` is not satisfied [E0277] + //~^ ERROR the trait bound `&[u8]: std::io::Write` is not satisfied [E0277] } fn foo(_: &mut impl std::io::Write) {} diff --git a/tests/ui/suggestions/issue-116434-2015.rs b/tests/ui/suggestions/issue-116434-2015.rs index bad9d02321cf..e0438cdef253 100644 --- a/tests/ui/suggestions/issue-116434-2015.rs +++ b/tests/ui/suggestions/issue-116434-2015.rs @@ -11,6 +11,7 @@ trait Foo { //~| HELP if this is a dyn-compatible trait, use `dyn` //~| ERROR the trait `Clone` is not dyn compatible [E0038] //~| HELP there is an associated type with the same name + //~| HELP use `Self` to refer to the implementing type } trait DbHandle: Sized {} @@ -26,6 +27,7 @@ trait DbInterface { //~| HELP if this is a dyn-compatible trait, use `dyn` //~| ERROR the trait `DbHandle` is not dyn compatible [E0038] //~| HELP there is an associated type with the same name + //~| HELP use `Self` to refer to the implementing type } fn main() {} diff --git a/tests/ui/suggestions/issue-116434-2015.stderr b/tests/ui/suggestions/issue-116434-2015.stderr index a0a99cc560db..cad5812da663 100644 --- a/tests/ui/suggestions/issue-116434-2015.stderr +++ b/tests/ui/suggestions/issue-116434-2015.stderr @@ -35,13 +35,18 @@ LL | fn foo() -> Clone; = note: the trait is not dyn compatible because it requires `Self: Sized` = note: for a trait to be dyn compatible it needs to allow building a vtable for more information, visit +help: you might have meant to use `Self` to refer to the implementing type + | +LL - fn foo() -> Clone; +LL + fn foo() -> Self; + | help: there is an associated type with the same name | LL | fn foo() -> Self::Clone; | ++++++ warning: trait objects without an explicit `dyn` are deprecated - --> $DIR/issue-116434-2015.rs:20:20 + --> $DIR/issue-116434-2015.rs:21:20 | LL | fn handle() -> DbHandle; | ^^^^^^^^ @@ -54,7 +59,7 @@ LL | fn handle() -> dyn DbHandle; | +++ warning: trait objects without an explicit `dyn` are deprecated - --> $DIR/issue-116434-2015.rs:20:20 + --> $DIR/issue-116434-2015.rs:21:20 | LL | fn handle() -> DbHandle; | ^^^^^^^^ @@ -68,19 +73,24 @@ LL | fn handle() -> dyn DbHandle; | +++ error[E0038]: the trait `DbHandle` is not dyn compatible - --> $DIR/issue-116434-2015.rs:20:20 + --> $DIR/issue-116434-2015.rs:21:20 | LL | fn handle() -> DbHandle; | ^^^^^^^^ `DbHandle` is not dyn compatible | note: for a trait to be dyn compatible it needs to allow building a vtable for more information, visit - --> $DIR/issue-116434-2015.rs:16:17 + --> $DIR/issue-116434-2015.rs:17:17 | LL | trait DbHandle: Sized {} | -------- ^^^^^ ...because it requires `Self: Sized` | | | this trait is not dyn compatible... +help: you might have meant to use `Self` to refer to the implementing type + | +LL - fn handle() -> DbHandle; +LL + fn handle() -> Self; + | help: there is an associated type with the same name | LL | fn handle() -> Self::DbHandle; diff --git a/tests/ui/suggestions/issue-81098.stderr b/tests/ui/suggestions/issue-81098.stderr index 4dc47a202824..36948469a311 100644 --- a/tests/ui/suggestions/issue-81098.stderr +++ b/tests/ui/suggestions/issue-81098.stderr @@ -2,23 +2,17 @@ error[E0277]: `()` doesn't implement `std::fmt::Display` --> $DIR/issue-81098.rs:3:13 | LL | fn wat() -> impl core::fmt::Display { - | ^^^^^^^^^^^^^^^^^^^^^^^ `()` cannot be formatted with the default formatter - | - = help: the trait `std::fmt::Display` is not implemented for `()` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead + | ^^^^^^^^^^^^^^^^^^^^^^^ the trait `std::fmt::Display` is not implemented for `()` error[E0277]: `()` doesn't implement `std::fmt::Display` --> $DIR/issue-81098.rs:9:12 | LL | fn ok() -> impl core::fmt::Display { - | ^^^^^^^^^^^^^^^^^^^^^^^ `()` cannot be formatted with the default formatter + | ^^^^^^^^^^^^^^^^^^^^^^^ the trait `std::fmt::Display` is not implemented for `()` LL | 1; | -- help: remove this semicolon | | | this expression has type `{integer}`, which implements `std::fmt::Display` - | - = help: the trait `std::fmt::Display` is not implemented for `()` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead error: aborting due to 2 previous errors diff --git a/tests/ui/suggestions/issue-97760.stderr b/tests/ui/suggestions/issue-97760.stderr index ddd143b967c4..1084ea7c9e0e 100644 --- a/tests/ui/suggestions/issue-97760.stderr +++ b/tests/ui/suggestions/issue-97760.stderr @@ -2,7 +2,10 @@ error[E0277]: `::Item` doesn't implement `std --> $DIR/issue-97760.rs:4:20 | LL | println!("{x}"); - | ^ `::Item` cannot be formatted with the default formatter + | -^- + | || + | |`::Item` cannot be formatted with the default formatter + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `::Item` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead diff --git a/tests/ui/suggestions/lifetimes/missing-lifetimes-in-signature.stderr b/tests/ui/suggestions/lifetimes/missing-lifetimes-in-signature.stderr index 0aa33d3b6fb1..ab067f2439c6 100644 --- a/tests/ui/suggestions/lifetimes/missing-lifetimes-in-signature.stderr +++ b/tests/ui/suggestions/lifetimes/missing-lifetimes-in-signature.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'a` --> $DIR/missing-lifetimes-in-signature.rs:37:11 | LL | fn baz(g: G, dest: &mut T) -> impl FnOnce() + '_ - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'a` here: `'a,` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'a` here + | +LL | fn baz<'a, G: 'a, T>(g: G, dest: &mut T) -> impl FnOnce() + '_ + | +++ error[E0700]: hidden type for `impl FnOnce()` captures lifetime that does not appear in bounds --> $DIR/missing-lifetimes-in-signature.rs:19:5 diff --git a/tests/ui/suggestions/missing-bound-in-derive-copy-impl-3.stderr b/tests/ui/suggestions/missing-bound-in-derive-copy-impl-3.stderr index 3f8b6f93e1f4..e3375b67c86d 100644 --- a/tests/ui/suggestions/missing-bound-in-derive-copy-impl-3.stderr +++ b/tests/ui/suggestions/missing-bound-in-derive-copy-impl-3.stderr @@ -21,7 +21,7 @@ error[E0277]: `K` doesn't implement `Debug` --> $DIR/missing-bound-in-derive-copy-impl-3.rs:12:14 | LL | pub loc: Vector2, - | ^^^^^^^^^^ `K` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^ the trait `Debug` is not implemented for `K` | note: required by a bound in `Vector2` --> $DIR/missing-bound-in-derive-copy-impl-3.rs:5:23 @@ -40,7 +40,7 @@ LL | #[derive(Debug, Copy, Clone)] | ----- in this derive macro expansion LL | pub struct AABB{ LL | pub loc: Vector2, - | ^^^^^^^^^^^^^^^^^^^ `K` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `K` | help: consider further restricting type parameter `K` with trait `Debug` | @@ -54,7 +54,7 @@ LL | #[derive(Debug, Copy, Clone)] | ----- in this derive macro expansion ... LL | pub size: Vector2 - | ^^^^^^^^^^^^^^^^^^^^ `K` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `K` | help: consider further restricting type parameter `K` with trait `Debug` | diff --git a/tests/ui/suggestions/missing-bound-in-derive-copy-impl.stderr b/tests/ui/suggestions/missing-bound-in-derive-copy-impl.stderr index 3766e3e2c7b5..645d6ebb3961 100644 --- a/tests/ui/suggestions/missing-bound-in-derive-copy-impl.stderr +++ b/tests/ui/suggestions/missing-bound-in-derive-copy-impl.stderr @@ -21,7 +21,7 @@ error[E0277]: `K` doesn't implement `Debug` --> $DIR/missing-bound-in-derive-copy-impl.rs:11:14 | LL | pub loc: Vector2, - | ^^^^^^^^^^ `K` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^ the trait `Debug` is not implemented for `K` | note: required by a bound in `Vector2` --> $DIR/missing-bound-in-derive-copy-impl.rs:4:23 @@ -78,7 +78,7 @@ LL | #[derive(Debug, Copy, Clone)] | ----- in this derive macro expansion LL | pub struct AABB { LL | pub loc: Vector2, - | ^^^^^^^^^^^^^^^^^^^ `K` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `K` | help: consider restricting type parameter `K` with trait `Debug` | @@ -111,7 +111,7 @@ LL | #[derive(Debug, Copy, Clone)] | ----- in this derive macro expansion ... LL | pub size: Vector2, - | ^^^^^^^^^^^^^^^^^^^^ `K` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `K` | help: consider restricting type parameter `K` with trait `Debug` | diff --git a/tests/ui/suggestions/option-content-move3.stderr b/tests/ui/suggestions/option-content-move3.stderr index a20dcce1ee31..faaf8a9df9d7 100644 --- a/tests/ui/suggestions/option-content-move3.stderr +++ b/tests/ui/suggestions/option-content-move3.stderr @@ -79,7 +79,7 @@ LL | let x = var; | variable moved due to use in closure | move occurs because `var` has type `NotCopyableButCloneable`, which does not implement the `Copy` trait | -help: clone the value before moving it into the closure +help: consider cloning the value before moving it into the closure | LL ~ { LL + let value = var.clone(); diff --git a/tests/ui/suggestions/path-display.stderr b/tests/ui/suggestions/path-display.stderr index 46d0b35825bc..0c7271b3c1c3 100644 --- a/tests/ui/suggestions/path-display.stderr +++ b/tests/ui/suggestions/path-display.stderr @@ -2,18 +2,23 @@ error[E0277]: `Path` doesn't implement `std::fmt::Display` --> $DIR/path-display.rs:5:20 | LL | println!("{}", path); - | ^^^^ `Path` cannot be formatted with the default formatter; call `.display()` on it + | -- ^^^^ `Path` cannot be formatted with the default formatter; call `.display()` on it + | | + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `Path` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead = note: call `.display()` or `.to_string_lossy()` to safely print paths, as they may contain non-Unicode data + = note: required for `&Path` to implement `std::fmt::Display` = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) error[E0277]: `PathBuf` doesn't implement `std::fmt::Display` --> $DIR/path-display.rs:9:20 | LL | println!("{}", path); - | ^^^^ `PathBuf` cannot be formatted with the default formatter; call `.display()` on it + | -- ^^^^ `PathBuf` cannot be formatted with the default formatter; call `.display()` on it + | | + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `PathBuf` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead diff --git a/tests/ui/suggestions/return-bindings.stderr b/tests/ui/suggestions/return-bindings.stderr index 8e396d17dc07..651998043e10 100644 --- a/tests/ui/suggestions/return-bindings.stderr +++ b/tests/ui/suggestions/return-bindings.stderr @@ -62,12 +62,16 @@ LL ~ error[E0308]: `if` and `else` have incompatible types --> $DIR/return-bindings.rs:30:9 | -LL | let s = if let Some(s) = opt_str { - | ______________________________________- -LL | | } else { - | |_____- expected because of this -LL | String::new() - | ^^^^^^^^^^^^^ expected `()`, found `String` +LL | let s = if let Some(s) = opt_str { + | ______________- - + | | ______________________________________| +LL | || } else { + | ||_____- expected because of this +LL | | String::new() + | | ^^^^^^^^^^^^^ expected `()`, found `String` +LL | | +LL | | }; + | |______- `if` and `else` have incompatible types | help: consider returning the local binding `s` | diff --git a/tests/ui/suggestions/suggest-full-enum-variant-for-local-module.rs b/tests/ui/suggestions/suggest-full-enum-variant-for-local-module.rs index 1dfc0786668f..807fba0ab7e7 100644 --- a/tests/ui/suggestions/suggest-full-enum-variant-for-local-module.rs +++ b/tests/ui/suggestions/suggest-full-enum-variant-for-local-module.rs @@ -6,5 +6,5 @@ mod option { } fn main() { - let _: option::O<()> = (); //~ ERROR 9:28: 9:30: mismatched types [E0308] + let _: option::O<()> = (); //~ ERROR mismatched types [E0308] } diff --git a/tests/ui/syntax-extension-minor.rs b/tests/ui/syntax-extension-minor.rs deleted file mode 100644 index 826990a89a53..000000000000 --- a/tests/ui/syntax-extension-minor.rs +++ /dev/null @@ -1,15 +0,0 @@ -//@ run-pass - -#![feature(concat_idents)] -#![expect(deprecated)] // concat_idents is deprecated - -pub fn main() { - struct Foo; - let _: concat_idents!(F, oo) = Foo; // Test that `concat_idents!` can be used in type positions - - let asdf_fdsa = "<.<".to_string(); - // concat_idents should have call-site hygiene. - assert!(concat_idents!(asd, f_f, dsa) == "<.<".to_string()); - - assert_eq!(stringify!(use_mention_distinction), "use_mention_distinction"); -} diff --git a/tests/ui/target-feature/abi-incompatible-target-feature-flag-enable.riscv.stderr b/tests/ui/target-feature/abi-incompatible-target-feature-flag-enable.riscv.stderr index 2dca0c220332..0b2d71f97d0d 100644 --- a/tests/ui/target-feature/abi-incompatible-target-feature-flag-enable.riscv.stderr +++ b/tests/ui/target-feature/abi-incompatible-target-feature-flag-enable.riscv.stderr @@ -7,13 +7,5 @@ warning: unstable feature specified for `-Ctarget-feature`: `d` | = note: this feature is not stably supported; its behavior can change in the future -warning: unstable feature specified for `-Ctarget-feature`: `f` - | - = note: this feature is not stably supported; its behavior can change in the future - -warning: unstable feature specified for `-Ctarget-feature`: `zicsr` - | - = note: this feature is not stably supported; its behavior can change in the future - -warning: 4 warnings emitted +warning: 2 warnings emitted diff --git a/tests/ui/target-feature/abi-incompatible-target-feature-flag-enable.rs b/tests/ui/target-feature/abi-incompatible-target-feature-flag-enable.rs index 302cceccf693..1006b078bab3 100644 --- a/tests/ui/target-feature/abi-incompatible-target-feature-flag-enable.rs +++ b/tests/ui/target-feature/abi-incompatible-target-feature-flag-enable.rs @@ -24,5 +24,3 @@ pub trait Freeze {} //~? WARN must be disabled to ensure that the ABI of the current target can be implemented correctly //~? WARN unstable feature specified for `-Ctarget-feature` -//[riscv]~? WARN unstable feature specified for `-Ctarget-feature` -//[riscv]~? WARN unstable feature specified for `-Ctarget-feature` diff --git a/tests/ui/target-feature/retpoline-target-feature-flag.by_feature1.stderr b/tests/ui/target-feature/retpoline-target-feature-flag.by_feature1.stderr index 2a0f5f01aef6..79e89823c517 100644 --- a/tests/ui/target-feature/retpoline-target-feature-flag.by_feature1.stderr +++ b/tests/ui/target-feature/retpoline-target-feature-flag.by_feature1.stderr @@ -1,4 +1,4 @@ -warning: target feature `retpoline-external-thunk` cannot be enabled with `-Ctarget-feature`: use `retpoline-external-thunk` target modifier flag instead +warning: target feature `retpoline-external-thunk` cannot be enabled with `-Ctarget-feature`: use `-Zretpoline-external-thunk` compiler flag instead | = note: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! = note: for more information, see issue #116344 diff --git a/tests/ui/target-feature/retpoline-target-feature-flag.by_feature2.stderr b/tests/ui/target-feature/retpoline-target-feature-flag.by_feature2.stderr index f7b6cb164477..f5ff15df6329 100644 --- a/tests/ui/target-feature/retpoline-target-feature-flag.by_feature2.stderr +++ b/tests/ui/target-feature/retpoline-target-feature-flag.by_feature2.stderr @@ -1,4 +1,4 @@ -warning: target feature `retpoline-indirect-branches` cannot be enabled with `-Ctarget-feature`: use `retpoline` target modifier flag instead +warning: target feature `retpoline-indirect-branches` cannot be enabled with `-Ctarget-feature`: use `-Zretpoline` compiler flag instead | = note: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! = note: for more information, see issue #116344 diff --git a/tests/ui/target-feature/retpoline-target-feature-flag.by_feature3.stderr b/tests/ui/target-feature/retpoline-target-feature-flag.by_feature3.stderr index 4f2cd1d1a522..158cca08a762 100644 --- a/tests/ui/target-feature/retpoline-target-feature-flag.by_feature3.stderr +++ b/tests/ui/target-feature/retpoline-target-feature-flag.by_feature3.stderr @@ -1,4 +1,4 @@ -warning: target feature `retpoline-indirect-calls` cannot be enabled with `-Ctarget-feature`: use `retpoline` target modifier flag instead +warning: target feature `retpoline-indirect-calls` cannot be enabled with `-Ctarget-feature`: use `-Zretpoline` compiler flag instead | = note: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! = note: for more information, see issue #116344 diff --git a/tests/ui/target-feature/retpoline-target-feature-flag.rs b/tests/ui/target-feature/retpoline-target-feature-flag.rs index de3c44c3ed0e..05c85860385a 100644 --- a/tests/ui/target-feature/retpoline-target-feature-flag.rs +++ b/tests/ui/target-feature/retpoline-target-feature-flag.rs @@ -16,6 +16,6 @@ #![no_core] extern crate minicore; -//[by_feature1]~? WARN target feature `retpoline-external-thunk` cannot be enabled with `-Ctarget-feature`: use `retpoline-external-thunk` target modifier flag instead -//[by_feature2]~? WARN target feature `retpoline-indirect-branches` cannot be enabled with `-Ctarget-feature`: use `retpoline` target modifier flag instead -//[by_feature3]~? WARN target feature `retpoline-indirect-calls` cannot be enabled with `-Ctarget-feature`: use `retpoline` target modifier flag instead +//[by_feature1]~? WARN target feature `retpoline-external-thunk` cannot be enabled with `-Ctarget-feature` +//[by_feature2]~? WARN target feature `retpoline-indirect-branches` cannot be enabled with `-Ctarget-feature` +//[by_feature3]~? WARN target feature `retpoline-indirect-calls` cannot be enabled with `-Ctarget-feature` diff --git a/tests/ui/thir-print/thir-tree-loop-match.rs b/tests/ui/thir-print/thir-tree-loop-match.rs new file mode 100644 index 000000000000..8c5f2244d546 --- /dev/null +++ b/tests/ui/thir-print/thir-tree-loop-match.rs @@ -0,0 +1,22 @@ +//@ check-pass +//@ compile-flags: -Zunpretty=thir-tree + +#![allow(incomplete_features)] +#![feature(loop_match)] + +fn boolean(mut state: bool) -> bool { + #[loop_match] + loop { + state = 'blk: { + match state { + true => { + #[const_continue] + break 'blk false; + } + false => return state, + } + } + } +} + +fn main() {} diff --git a/tests/ui/thir-print/thir-tree-loop-match.stdout b/tests/ui/thir-print/thir-tree-loop-match.stdout new file mode 100644 index 000000000000..828b93da6beb --- /dev/null +++ b/tests/ui/thir-print/thir-tree-loop-match.stdout @@ -0,0 +1,301 @@ +DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean): +params: [ + Param { + ty: bool + ty_span: Some($DIR/thir-tree-loop-match.rs:7:23: 7:27 (#0)) + self_kind: None + hir_id: Some(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).1)) + param: Some( + Pat: { + ty: bool + span: $DIR/thir-tree-loop-match.rs:7:12: 7:21 (#0) + kind: PatKind { + Binding { + name: "state" + mode: BindingMode(No, Mut) + var: LocalVarId(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).2)) + ty: bool + is_primary: true + subpattern: None + } + } + } + ) + } +] +body: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(28)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:7:37: 20:2 (#0) + kind: + Scope { + region_scope: Node(28) + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).28)) + value: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(28)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:7:37: 20:2 (#0) + kind: + Block { + targeted_by_break: false + span: $DIR/thir-tree-loop-match.rs:7:37: 20:2 (#0) + region_scope: Node(3) + safety_mode: Safe + stmts: [] + expr: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(28)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:9:5: 19:6 (#0) + kind: + Scope { + region_scope: Node(4) + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).4)) + value: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(28)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:9:5: 19:6 (#0) + kind: + NeverToAny { + source: + Expr { + ty: ! + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(28)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:9:5: 19:6 (#0) + kind: + LoopMatch { + state: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(5)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:10:9: 10:14 (#0) + kind: + Scope { + region_scope: Node(7) + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).7)) + value: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(5)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:10:9: 10:14 (#0) + kind: + VarRef { + id: LocalVarId(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).2)) + } + } + } + } + region_scope: Node(10) + match_span: $DIR/thir-tree-loop-match.rs:11:13: 17:14 (#0) + arms: [ + Arm { + pattern: + Pat: { + ty: bool + span: $DIR/thir-tree-loop-match.rs:12:17: 12:21 (#0) + kind: PatKind { + Constant { + value: Ty(bool, true) + } + } + } + guard: None + body: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(16)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:12:25: 15:18 (#0) + kind: + Scope { + region_scope: Node(17) + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).17)) + value: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(16)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:12:25: 15:18 (#0) + kind: + NeverToAny { + source: + Expr { + ty: ! + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(16)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:12:25: 15:18 (#0) + kind: + Block { + targeted_by_break: false + span: $DIR/thir-tree-loop-match.rs:12:25: 15:18 (#0) + region_scope: Node(18) + safety_mode: Safe + stmts: [ + Stmt { + kind: Expr { + scope: Node(21) + expr: + Expr { + ty: ! + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(21)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:14:21: 14:37 (#0) + kind: + Scope { + region_scope: Node(19) + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).19)) + value: + Expr { + ty: ! + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(21)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:14:21: 14:37 (#0) + kind: + ConstContinue ( + label: Node(10) + value: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(21)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:14:32: 14:37 (#0) + kind: + Scope { + region_scope: Node(20) + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).20)) + value: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(21)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:14:32: 14:37 (#0) + kind: + Literal( lit: Spanned { node: Bool(false), span: $DIR/thir-tree-loop-match.rs:14:32: 14:37 (#0) }, neg: false) + + } + } + } + ) + } + } + } + } + } + ] + expr: [] + } + } + } + } + } + } + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).16)) + scope: Node(16) + span: $DIR/thir-tree-loop-match.rs:12:17: 15:18 (#0) + } + Arm { + pattern: + Pat: { + ty: bool + span: $DIR/thir-tree-loop-match.rs:16:17: 16:22 (#0) + kind: PatKind { + Constant { + value: Ty(bool, false) + } + } + } + guard: None + body: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(24)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:16:26: 16:38 (#0) + kind: + Scope { + region_scope: Node(25) + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).25)) + value: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(24)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:16:26: 16:38 (#0) + kind: + NeverToAny { + source: + Expr { + ty: ! + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(24)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:16:26: 16:38 (#0) + kind: + Return { + value: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(24)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:16:33: 16:38 (#0) + kind: + Scope { + region_scope: Node(26) + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).26)) + value: + Expr { + ty: bool + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(24)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:16:33: 16:38 (#0) + kind: + VarRef { + id: LocalVarId(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).2)) + } + } + } + } + } + } + } + } + } + } + lint_level: Explicit(HirId(DefId(0:3 ~ thir_tree_loop_match[3c53]::boolean).24)) + scope: Node(24) + span: $DIR/thir-tree-loop-match.rs:16:17: 16:38 (#0) + } + ] + } + } + } + } + } + } + } + } + } + } + + +DefId(0:4 ~ thir_tree_loop_match[3c53]::main): +params: [ +] +body: + Expr { + ty: () + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(2)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:22:11: 22:13 (#0) + kind: + Scope { + region_scope: Node(2) + lint_level: Explicit(HirId(DefId(0:4 ~ thir_tree_loop_match[3c53]::main).2)) + value: + Expr { + ty: () + temp_lifetime: TempLifetime { temp_lifetime: Some(Node(2)), backwards_incompatible: None } + span: $DIR/thir-tree-loop-match.rs:22:11: 22:13 (#0) + kind: + Block { + targeted_by_break: false + span: $DIR/thir-tree-loop-match.rs:22:11: 22:13 (#0) + region_scope: Node(1) + safety_mode: Safe + stmts: [] + expr: [] + } + } + } + } + + diff --git a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-0.rs b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-0.rs index 9141d327aee8..ff1ce949f097 100644 --- a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-0.rs +++ b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-0.rs @@ -6,15 +6,15 @@ #[const_trait] trait Trait { - type Assoc: ~const Trait; + type Assoc: [const] Trait; fn func() -> i32; } -const fn unqualified() -> i32 { +const fn unqualified() -> i32 { T::Assoc::func() } -const fn qualified() -> i32 { +const fn qualified() -> i32 { ::Assoc::func() } diff --git a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-1.rs b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-1.rs index 19e86b50d332..5773f2281c39 100644 --- a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-1.rs +++ b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-1.rs @@ -5,7 +5,7 @@ #[const_trait] trait Trait { - type Assoc: ~const Trait; + type Assoc: [const] Trait; fn func() -> i32; } diff --git a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.current.stderr b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.current.stderr index 4cd87002e491..a0474e65efeb 100644 --- a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.current.stderr +++ b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.current.stderr @@ -1,10 +1,10 @@ -error[E0277]: the trait bound `U: ~const Other` is not satisfied +error[E0277]: the trait bound `U: [const] Other` is not satisfied --> $DIR/assoc-type-const-bound-usage-fail-2.rs:24:5 | LL | T::Assoc::::func(); | ^^^^^^^^^^^^^ -error[E0277]: the trait bound `U: ~const Other` is not satisfied +error[E0277]: the trait bound `U: [const] Other` is not satisfied --> $DIR/assoc-type-const-bound-usage-fail-2.rs:26:5 | LL | ::Assoc::::func(); diff --git a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.next.stderr b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.next.stderr index 4cd87002e491..a0474e65efeb 100644 --- a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.next.stderr +++ b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.next.stderr @@ -1,10 +1,10 @@ -error[E0277]: the trait bound `U: ~const Other` is not satisfied +error[E0277]: the trait bound `U: [const] Other` is not satisfied --> $DIR/assoc-type-const-bound-usage-fail-2.rs:24:5 | LL | T::Assoc::::func(); | ^^^^^^^^^^^^^ -error[E0277]: the trait bound `U: ~const Other` is not satisfied +error[E0277]: the trait bound `U: [const] Other` is not satisfied --> $DIR/assoc-type-const-bound-usage-fail-2.rs:26:5 | LL | ::Assoc::::func(); diff --git a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.rs b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.rs index e1c30b536112..5338c27bedca 100644 --- a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.rs +++ b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail-2.rs @@ -1,7 +1,7 @@ //@ revisions: current next //@[next] compile-flags: -Znext-solver -// Check that `~const` item bounds only hold if the where clauses on the +// Check that `[const]` item bounds only hold if the where clauses on the // associated type are also const. // i.e. check that we validate the const conditions for the associated type // when considering one of implied const bounds. @@ -10,9 +10,9 @@ #[const_trait] trait Trait { - type Assoc: ~const Trait + type Assoc: [const] Trait where - U: ~const Other; + U: [const] Other; fn func(); } @@ -20,14 +20,14 @@ trait Trait { #[const_trait] trait Other {} -const fn fails() { +const fn fails() { T::Assoc::::func(); - //~^ ERROR the trait bound `U: ~const Other` is not satisfied + //~^ ERROR the trait bound `U: [const] Other` is not satisfied ::Assoc::::func(); - //~^ ERROR the trait bound `U: ~const Other` is not satisfied + //~^ ERROR the trait bound `U: [const] Other` is not satisfied } -const fn works() { +const fn works() { T::Assoc::::func(); ::Assoc::::func(); } diff --git a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.current.stderr b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.current.stderr index 9c29a894749e..20b01d06e8d5 100644 --- a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.current.stderr +++ b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.current.stderr @@ -1,10 +1,10 @@ -error[E0277]: the trait bound `T: ~const Trait` is not satisfied +error[E0277]: the trait bound `T: [const] Trait` is not satisfied --> $DIR/assoc-type-const-bound-usage-fail.rs:17:5 | LL | T::Assoc::func(); | ^^^^^^^^ -error[E0277]: the trait bound `T: ~const Trait` is not satisfied +error[E0277]: the trait bound `T: [const] Trait` is not satisfied --> $DIR/assoc-type-const-bound-usage-fail.rs:19:5 | LL | ::Assoc::func(); diff --git a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.next.stderr b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.next.stderr index 9c29a894749e..20b01d06e8d5 100644 --- a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.next.stderr +++ b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.next.stderr @@ -1,10 +1,10 @@ -error[E0277]: the trait bound `T: ~const Trait` is not satisfied +error[E0277]: the trait bound `T: [const] Trait` is not satisfied --> $DIR/assoc-type-const-bound-usage-fail.rs:17:5 | LL | T::Assoc::func(); | ^^^^^^^^ -error[E0277]: the trait bound `T: ~const Trait` is not satisfied +error[E0277]: the trait bound `T: [const] Trait` is not satisfied --> $DIR/assoc-type-const-bound-usage-fail.rs:19:5 | LL | ::Assoc::func(); diff --git a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.rs b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.rs index 3761fea19684..4940b3a1aa6c 100644 --- a/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.rs +++ b/tests/ui/traits/const-traits/assoc-type-const-bound-usage-fail.rs @@ -1,7 +1,7 @@ //@ revisions: current next //@[next] compile-flags: -Znext-solver -// Check that `~const` item bounds only hold if the parent trait is `~const`. +// Check that `[const]` item bounds only hold if the parent trait is `[const]`. // i.e. check that we validate the const conditions for the associated type // when considering one of implied const bounds. @@ -9,18 +9,18 @@ #[const_trait] trait Trait { - type Assoc: ~const Trait; + type Assoc: [const] Trait; fn func(); } const fn unqualified() { T::Assoc::func(); - //~^ ERROR the trait bound `T: ~const Trait` is not satisfied + //~^ ERROR the trait bound `T: [const] Trait` is not satisfied ::Assoc::func(); - //~^ ERROR the trait bound `T: ~const Trait` is not satisfied + //~^ ERROR the trait bound `T: [const] Trait` is not satisfied } -const fn works() { +const fn works() { T::Assoc::func(); ::Assoc::func(); } diff --git a/tests/ui/traits/const-traits/assoc-type.current.stderr b/tests/ui/traits/const-traits/assoc-type.current.stderr index 7526369194b4..1e58efeedeea 100644 --- a/tests/ui/traits/const-traits/assoc-type.current.stderr +++ b/tests/ui/traits/const-traits/assoc-type.current.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `NonConstAdd: ~const Add` is not satisfied +error[E0277]: the trait bound `NonConstAdd: [const] Add` is not satisfied --> $DIR/assoc-type.rs:37:16 | LL | type Bar = NonConstAdd; @@ -7,8 +7,8 @@ LL | type Bar = NonConstAdd; note: required by a bound in `Foo::Bar` --> $DIR/assoc-type.rs:33:15 | -LL | type Bar: ~const Add; - | ^^^^^^^^^^ required by this bound in `Foo::Bar` +LL | type Bar: [const] Add; + | ^^^^^^^^^^^ required by this bound in `Foo::Bar` error: aborting due to 1 previous error diff --git a/tests/ui/traits/const-traits/assoc-type.next.stderr b/tests/ui/traits/const-traits/assoc-type.next.stderr index 7526369194b4..1e58efeedeea 100644 --- a/tests/ui/traits/const-traits/assoc-type.next.stderr +++ b/tests/ui/traits/const-traits/assoc-type.next.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `NonConstAdd: ~const Add` is not satisfied +error[E0277]: the trait bound `NonConstAdd: [const] Add` is not satisfied --> $DIR/assoc-type.rs:37:16 | LL | type Bar = NonConstAdd; @@ -7,8 +7,8 @@ LL | type Bar = NonConstAdd; note: required by a bound in `Foo::Bar` --> $DIR/assoc-type.rs:33:15 | -LL | type Bar: ~const Add; - | ^^^^^^^^^^ required by this bound in `Foo::Bar` +LL | type Bar: [const] Add; + | ^^^^^^^^^^^ required by this bound in `Foo::Bar` error: aborting due to 1 previous error diff --git a/tests/ui/traits/const-traits/assoc-type.rs b/tests/ui/traits/const-traits/assoc-type.rs index a169b61994cb..1faef1b0a325 100644 --- a/tests/ui/traits/const-traits/assoc-type.rs +++ b/tests/ui/traits/const-traits/assoc-type.rs @@ -30,12 +30,12 @@ impl Add for NonConstAdd { #[const_trait] trait Foo { - type Bar: ~const Add; + type Bar: [const] Add; } impl const Foo for NonConstAdd { type Bar = NonConstAdd; - //~^ ERROR the trait bound `NonConstAdd: ~const Add` is not satisfied + //~^ ERROR the trait bound `NonConstAdd: [const] Add` is not satisfied } #[const_trait] diff --git a/tests/ui/traits/const-traits/auxiliary/minicore.rs b/tests/ui/traits/const-traits/auxiliary/minicore.rs index 073337b2ac6d..d2133bbbcaea 100644 --- a/tests/ui/traits/const-traits/auxiliary/minicore.rs +++ b/tests/ui/traits/const-traits/auxiliary/minicore.rs @@ -86,14 +86,14 @@ enum ControlFlow { #[const_trait] #[lang = "fn"] #[rustc_paren_sugar] -pub trait Fn: ~const FnMut { +pub trait Fn: [const] FnMut { extern "rust-call" fn call(&self, args: Args) -> Self::Output; } #[const_trait] #[lang = "fn_mut"] #[rustc_paren_sugar] -pub trait FnMut: ~const FnOnce { +pub trait FnMut: [const] FnOnce { extern "rust-call" fn call_mut(&mut self, args: Args) -> Self::Output; } @@ -142,7 +142,7 @@ pub trait Drop { #[const_trait] pub trait Residual { - type TryType: ~const Try + Try; + type TryType: [const] Try + Try; } const fn size_of() -> usize { @@ -183,7 +183,7 @@ pub unsafe trait SliceIndex { impl const Index for [T] where - I: ~const SliceIndex<[T]>, + I: [const] SliceIndex<[T]>, { type Output = I::Output; @@ -195,7 +195,7 @@ where impl const Index for [T; N] where - [T]: ~const Index, + [T]: [const] Index, { type Output = <[T] as Index>::Output; @@ -265,7 +265,7 @@ use Option::*; const fn as_deref(opt: &Option) -> Option<&T::Target> where - T: ~const Deref, + T: [const] Deref, { match opt { Option::Some(t) => Option::Some(t.deref()), @@ -285,7 +285,7 @@ pub trait From: Sized { impl const Into for T where - U: ~const From, + U: [const] From, { fn into(self) -> U { U::from(self) @@ -323,7 +323,7 @@ pub trait PartialEq: PointeeSized { impl const PartialEq<&B> for &A where - A: ~const PartialEq, + A: [const] PartialEq, { fn eq(&self, other: &&B) -> bool { PartialEq::eq(*self, *other) @@ -373,7 +373,7 @@ impl<'a, T: PointeeSized> Pin<&'a T> { impl Pin

    { const fn as_ref(&self) -> Pin<&P::Target> where - P: ~const Deref, + P: [const] Deref, { unsafe { Pin::new_unchecked(&*self.pointer) } } @@ -403,7 +403,7 @@ impl Option { } } -impl const Deref for Pin

    { +impl const Deref for Pin

    { type Target = P::Target; fn deref(&self) -> &P::Target { Pin::get_ref(Pin::as_ref(self)) @@ -467,7 +467,7 @@ pub trait Clone: Sized { fn clone(&self) -> Self; fn clone_from(&mut self, source: &Self) where - Self: ~const Destruct, + Self: [const] Destruct, { *self = source.clone() } @@ -476,7 +476,7 @@ pub trait Clone: Sized { #[lang = "structural_peq"] pub trait StructuralPartialEq {} -pub const fn drop(_: T) {} +pub const fn drop(_: T) {} #[rustc_intrinsic] const fn const_eval_select( diff --git a/tests/ui/traits/const-traits/call-const-closure.rs b/tests/ui/traits/const-traits/call-const-closure.rs index 21f4374b8d53..70dfaf724c9b 100644 --- a/tests/ui/traits/const-traits/call-const-closure.rs +++ b/tests/ui/traits/const-traits/call-const-closure.rs @@ -15,7 +15,7 @@ impl Bar for () { const FOO: () = { (const || ().foo())(); - //~^ ERROR the trait bound `(): ~const Bar` is not satisfied + //~^ ERROR the trait bound `(): [const] Bar` is not satisfied // FIXME(const_trait_impl): The constness environment for const closures is wrong. }; diff --git a/tests/ui/traits/const-traits/call-const-closure.stderr b/tests/ui/traits/const-traits/call-const-closure.stderr index fe7c115aaab4..4bb8b2e9777e 100644 --- a/tests/ui/traits/const-traits/call-const-closure.stderr +++ b/tests/ui/traits/const-traits/call-const-closure.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `(): ~const Bar` is not satisfied +error[E0277]: the trait bound `(): [const] Bar` is not satisfied --> $DIR/call-const-closure.rs:17:18 | LL | (const || ().foo())(); diff --git a/tests/ui/traits/const-traits/call-const-in-tilde-const.rs b/tests/ui/traits/const-traits/call-const-in-conditionally-const.rs similarity index 86% rename from tests/ui/traits/const-traits/call-const-in-tilde-const.rs rename to tests/ui/traits/const-traits/call-const-in-conditionally-const.rs index b6d1517499d6..4e8c2cd171e6 100644 --- a/tests/ui/traits/const-traits/call-const-in-tilde-const.rs +++ b/tests/ui/traits/const-traits/call-const-in-conditionally-const.rs @@ -5,7 +5,7 @@ fn foo(); } -const fn foo() { +const fn foo() { const { T::foo() } //~^ ERROR the trait bound `T: const Foo` is not satisfied } diff --git a/tests/ui/traits/const-traits/call-const-in-tilde-const.stderr b/tests/ui/traits/const-traits/call-const-in-conditionally-const.stderr similarity index 80% rename from tests/ui/traits/const-traits/call-const-in-tilde-const.stderr rename to tests/ui/traits/const-traits/call-const-in-conditionally-const.stderr index b9dabceb5de4..f14b640ca315 100644 --- a/tests/ui/traits/const-traits/call-const-in-tilde-const.stderr +++ b/tests/ui/traits/const-traits/call-const-in-conditionally-const.stderr @@ -1,5 +1,5 @@ error[E0277]: the trait bound `T: const Foo` is not satisfied - --> $DIR/call-const-in-tilde-const.rs:9:13 + --> $DIR/call-const-in-conditionally-const.rs:9:13 | LL | const { T::foo() } | ^ diff --git a/tests/ui/traits/const-traits/call-const-trait-method-fail.rs b/tests/ui/traits/const-traits/call-const-trait-method-fail.rs index e06d04db8040..c03d3e950b0b 100644 --- a/tests/ui/traits/const-traits/call-const-trait-method-fail.rs +++ b/tests/ui/traits/const-traits/call-const-trait-method-fail.rs @@ -24,7 +24,7 @@ pub const fn add_i32(a: i32, b: i32) -> i32 { pub const fn add_u32(a: u32, b: u32) -> u32 { a.plus(b) - //~^ ERROR the trait bound `u32: ~const Plus` + //~^ ERROR the trait bound `u32: [const] Plus` } fn main() {} diff --git a/tests/ui/traits/const-traits/call-const-trait-method-fail.stderr b/tests/ui/traits/const-traits/call-const-trait-method-fail.stderr index 64850335c2ab..4aaf53344c90 100644 --- a/tests/ui/traits/const-traits/call-const-trait-method-fail.stderr +++ b/tests/ui/traits/const-traits/call-const-trait-method-fail.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `u32: ~const Plus` is not satisfied +error[E0277]: the trait bound `u32: [const] Plus` is not satisfied --> $DIR/call-const-trait-method-fail.rs:26:5 | LL | a.plus(b) diff --git a/tests/ui/traits/const-traits/call-const-trait-method-pass.rs b/tests/ui/traits/const-traits/call-const-trait-method-pass.rs index 3004647ede07..d66a11490c59 100644 --- a/tests/ui/traits/const-traits/call-const-trait-method-pass.rs +++ b/tests/ui/traits/const-traits/call-const-trait-method-pass.rs @@ -1,6 +1,5 @@ -//@ known-bug: #110395 - #![feature(const_trait_impl, const_ops)] +//@ check-pass struct Int(i32); diff --git a/tests/ui/traits/const-traits/call-const-trait-method-pass.stderr b/tests/ui/traits/const-traits/call-const-trait-method-pass.stderr deleted file mode 100644 index 7746f103ac36..000000000000 --- a/tests/ui/traits/const-traits/call-const-trait-method-pass.stderr +++ /dev/null @@ -1,20 +0,0 @@ -error: const `impl` for trait `PartialEq` which is not marked with `#[const_trait]` - --> $DIR/call-const-trait-method-pass.rs:15:12 - | -LL | impl const PartialEq for Int { - | ^^^^^^^^^ this trait is not `const` - | - = note: marking a trait with `#[const_trait]` ensures all default method bodies are `const` - = note: adding a non-const method body in the future would be a breaking change - -error[E0015]: cannot call non-const method `::eq` in constant functions - --> $DIR/call-const-trait-method-pass.rs:20:15 - | -LL | !self.eq(other) - | ^^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error: aborting due to 2 previous errors - -For more information about this error, try `rustc --explain E0015`. diff --git a/tests/ui/traits/const-traits/call-generic-in-impl.rs b/tests/ui/traits/const-traits/call-generic-in-impl.rs index 6149dc3d1263..f38590fa3c01 100644 --- a/tests/ui/traits/const-traits/call-generic-in-impl.rs +++ b/tests/ui/traits/const-traits/call-generic-in-impl.rs @@ -1,5 +1,4 @@ -//@ known-bug: #110395 -// FIXME(const_trait_impl) check-pass +//@ check-pass #![feature(const_trait_impl)] #[const_trait] @@ -7,7 +6,7 @@ trait MyPartialEq { fn eq(&self, other: &Self) -> bool; } -impl const MyPartialEq for T { +impl const MyPartialEq for T { fn eq(&self, other: &Self) -> bool { PartialEq::eq(self, other) } diff --git a/tests/ui/traits/const-traits/call-generic-in-impl.stderr b/tests/ui/traits/const-traits/call-generic-in-impl.stderr deleted file mode 100644 index a45dfd95b4a4..000000000000 --- a/tests/ui/traits/const-traits/call-generic-in-impl.stderr +++ /dev/null @@ -1,30 +0,0 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-in-impl.rs:10:9 - | -LL | impl const MyPartialEq for T { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-in-impl.rs:10:9 - | -LL | impl const MyPartialEq for T { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error[E0015]: cannot call non-const method `::eq` in constant functions - --> $DIR/call-generic-in-impl.rs:12:9 - | -LL | PartialEq::eq(self, other) - | ^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error: aborting due to 3 previous errors - -For more information about this error, try `rustc --explain E0015`. diff --git a/tests/ui/traits/const-traits/call-generic-method-chain.rs b/tests/ui/traits/const-traits/call-generic-method-chain.rs index 74beab71208a..1ad71c424a3b 100644 --- a/tests/ui/traits/const-traits/call-generic-method-chain.rs +++ b/tests/ui/traits/const-traits/call-generic-method-chain.rs @@ -1,8 +1,7 @@ //! Basic test for calling methods on generic type parameters in `const fn`. -//@ known-bug: #110395 //@ compile-flags: -Znext-solver -// FIXME(const_trait_impl) check-pass +//@ check-pass #![feature(const_trait_impl)] @@ -17,11 +16,11 @@ impl const PartialEq for S { } } -const fn equals_self(t: &T) -> bool { +const fn equals_self(t: &T) -> bool { *t == *t } -const fn equals_self_wrapper(t: &T) -> bool { +const fn equals_self_wrapper(t: &T) -> bool { equals_self(t) } diff --git a/tests/ui/traits/const-traits/call-generic-method-chain.stderr b/tests/ui/traits/const-traits/call-generic-method-chain.stderr deleted file mode 100644 index 40b4f14733f0..000000000000 --- a/tests/ui/traits/const-traits/call-generic-method-chain.stderr +++ /dev/null @@ -1,66 +0,0 @@ -error: const `impl` for trait `PartialEq` which is not marked with `#[const_trait]` - --> $DIR/call-generic-method-chain.rs:11:12 - | -LL | impl const PartialEq for S { - | ^^^^^^^^^ this trait is not `const` - | - = note: marking a trait with `#[const_trait]` ensures all default method bodies are `const` - = note: adding a non-const method body in the future would be a breaking change - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-chain.rs:20:25 - | -LL | const fn equals_self(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-chain.rs:20:25 - | -LL | const fn equals_self(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-chain.rs:24:33 - | -LL | const fn equals_self_wrapper(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-chain.rs:24:33 - | -LL | const fn equals_self_wrapper(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/call-generic-method-chain.rs:21:5 - | -LL | *t == *t - | ^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error[E0015]: cannot call non-const method `::eq` in constant functions - --> $DIR/call-generic-method-chain.rs:16:15 - | -LL | !self.eq(other) - | ^^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error: aborting due to 7 previous errors - -For more information about this error, try `rustc --explain E0015`. diff --git a/tests/ui/traits/const-traits/call-generic-method-dup-bound.rs b/tests/ui/traits/const-traits/call-generic-method-dup-bound.rs index ec615d8484cd..58f293b5ac5c 100644 --- a/tests/ui/traits/const-traits/call-generic-method-dup-bound.rs +++ b/tests/ui/traits/const-traits/call-generic-method-dup-bound.rs @@ -1,6 +1,5 @@ //@ compile-flags: -Znext-solver -//@ known-bug: #110395 -// FIXME(const_trait_impl) check-pass +//@ check-pass #![feature(const_trait_impl)] @@ -15,16 +14,16 @@ impl const PartialEq for S { } } -// This duplicate bound should not result in ambiguities. It should be equivalent to a single ~const -// bound. -const fn equals_self(t: &T) -> bool { +// This duplicate bound should not result in ambiguities. +// It should be equivalent to a single [const] bound. +const fn equals_self(t: &T) -> bool { *t == *t } trait A: PartialEq {} impl A for T {} -const fn equals_self2(t: &T) -> bool { +const fn equals_self2(t: &T) -> bool { *t == *t } diff --git a/tests/ui/traits/const-traits/call-generic-method-dup-bound.stderr b/tests/ui/traits/const-traits/call-generic-method-dup-bound.stderr deleted file mode 100644 index c74f5cf786c1..000000000000 --- a/tests/ui/traits/const-traits/call-generic-method-dup-bound.stderr +++ /dev/null @@ -1,74 +0,0 @@ -error: const `impl` for trait `PartialEq` which is not marked with `#[const_trait]` - --> $DIR/call-generic-method-dup-bound.rs:9:12 - | -LL | impl const PartialEq for S { - | ^^^^^^^^^ this trait is not `const` - | - = note: marking a trait with `#[const_trait]` ensures all default method bodies are `const` - = note: adding a non-const method body in the future would be a breaking change - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-dup-bound.rs:20:37 - | -LL | const fn equals_self(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-dup-bound.rs:20:37 - | -LL | const fn equals_self(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-dup-bound.rs:27:30 - | -LL | const fn equals_self2(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-dup-bound.rs:27:30 - | -LL | const fn equals_self2(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/call-generic-method-dup-bound.rs:21:5 - | -LL | *t == *t - | ^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error[E0015]: cannot call non-const method `::eq` in constant functions - --> $DIR/call-generic-method-dup-bound.rs:14:15 - | -LL | !self.eq(other) - | ^^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/call-generic-method-dup-bound.rs:28:5 - | -LL | *t == *t - | ^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error: aborting due to 8 previous errors - -For more information about this error, try `rustc --explain E0015`. diff --git a/tests/ui/traits/const-traits/call-generic-method-fail.rs b/tests/ui/traits/const-traits/call-generic-method-fail.rs index 66881334a298..4528f3b122f9 100644 --- a/tests/ui/traits/const-traits/call-generic-method-fail.rs +++ b/tests/ui/traits/const-traits/call-generic-method-fail.rs @@ -3,7 +3,7 @@ pub const fn equals_self(t: &T) -> bool { *t == *t - //~^ ERROR cannot call non-const operator in constant functions + //~^ ERROR the trait bound `T: [const] PartialEq` is not satisfied } fn main() {} diff --git a/tests/ui/traits/const-traits/call-generic-method-fail.stderr b/tests/ui/traits/const-traits/call-generic-method-fail.stderr index 6bacb986fef0..a2fba141f7b8 100644 --- a/tests/ui/traits/const-traits/call-generic-method-fail.stderr +++ b/tests/ui/traits/const-traits/call-generic-method-fail.stderr @@ -1,11 +1,9 @@ -error[E0015]: cannot call non-const operator in constant functions +error[E0277]: the trait bound `T: [const] PartialEq` is not satisfied --> $DIR/call-generic-method-fail.rs:5:5 | LL | *t == *t | ^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants error: aborting due to 1 previous error -For more information about this error, try `rustc --explain E0015`. +For more information about this error, try `rustc --explain E0277`. diff --git a/tests/ui/traits/const-traits/call-generic-method-nonconst.rs b/tests/ui/traits/const-traits/call-generic-method-nonconst.rs index 446a74eb7b7d..0efc8a954ded 100644 --- a/tests/ui/traits/const-traits/call-generic-method-nonconst.rs +++ b/tests/ui/traits/const-traits/call-generic-method-nonconst.rs @@ -14,7 +14,7 @@ impl Foo for S { } } -const fn equals_self(t: &T) -> bool { +const fn equals_self(t: &T) -> bool { true } diff --git a/tests/ui/traits/const-traits/call-generic-method-nonconst.stderr b/tests/ui/traits/const-traits/call-generic-method-nonconst.stderr index 11bbe8bbb408..9c1e0fee9e71 100644 --- a/tests/ui/traits/const-traits/call-generic-method-nonconst.stderr +++ b/tests/ui/traits/const-traits/call-generic-method-nonconst.stderr @@ -9,8 +9,8 @@ LL | pub const EQ: bool = equals_self(&S); note: required by a bound in `equals_self` --> $DIR/call-generic-method-nonconst.rs:17:25 | -LL | const fn equals_self(t: &T) -> bool { - | ^^^^^^^^^^ required by this bound in `equals_self` +LL | const fn equals_self(t: &T) -> bool { + | ^^^^^^^^^^^ required by this bound in `equals_self` error: aborting due to 1 previous error diff --git a/tests/ui/traits/const-traits/call-generic-method-pass.rs b/tests/ui/traits/const-traits/call-generic-method-pass.rs index af793b8da031..aa52a7b9e473 100644 --- a/tests/ui/traits/const-traits/call-generic-method-pass.rs +++ b/tests/ui/traits/const-traits/call-generic-method-pass.rs @@ -1,8 +1,7 @@ //! Basic test for calling methods on generic type parameters in `const fn`. //@ compile-flags: -Znext-solver -//@ known-bug: #110395 -// FIXME(const_trait_impl) check-pass +//@ check-pass #![feature(const_trait_impl)] @@ -17,7 +16,7 @@ impl const PartialEq for S { } } -const fn equals_self(t: &T) -> bool { +const fn equals_self(t: &T) -> bool { *t == *t } diff --git a/tests/ui/traits/const-traits/call-generic-method-pass.stderr b/tests/ui/traits/const-traits/call-generic-method-pass.stderr deleted file mode 100644 index 1a33ff5ab450..000000000000 --- a/tests/ui/traits/const-traits/call-generic-method-pass.stderr +++ /dev/null @@ -1,47 +0,0 @@ -error: const `impl` for trait `PartialEq` which is not marked with `#[const_trait]` - --> $DIR/call-generic-method-pass.rs:11:12 - | -LL | impl const PartialEq for S { - | ^^^^^^^^^ this trait is not `const` - | - = note: marking a trait with `#[const_trait]` ensures all default method bodies are `const` - = note: adding a non-const method body in the future would be a breaking change - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-pass.rs:20:25 - | -LL | const fn equals_self(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/call-generic-method-pass.rs:20:25 - | -LL | const fn equals_self(t: &T) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/call-generic-method-pass.rs:21:5 - | -LL | *t == *t - | ^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error[E0015]: cannot call non-const method `::eq` in constant functions - --> $DIR/call-generic-method-pass.rs:16:15 - | -LL | !self.eq(other) - | ^^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error: aborting due to 5 previous errors - -For more information about this error, try `rustc --explain E0015`. diff --git a/tests/ui/traits/const-traits/tilde-const-and-const-params.rs b/tests/ui/traits/const-traits/conditionally-const-and-const-params.rs similarity index 71% rename from tests/ui/traits/const-traits/tilde-const-and-const-params.rs rename to tests/ui/traits/const-traits/conditionally-const-and-const-params.rs index 428223d92c01..29553884b21c 100644 --- a/tests/ui/traits/const-traits/tilde-const-and-const-params.rs +++ b/tests/ui/traits/const-traits/conditionally-const-and-const-params.rs @@ -5,8 +5,8 @@ struct Foo; impl Foo { - fn add(self) -> Foo<{ A::add(N) }> { - //~^ ERROR `~const` is not allowed here + fn add(self) -> Foo<{ A::add(N) }> { + //~^ ERROR `[const]` is not allowed here //~| ERROR the trait bound `A: const Add42` is not satisfied Foo } @@ -23,8 +23,8 @@ impl const Add42 for () { } } -fn bar(_: Foo) -> Foo<{ A::add(N) }> { - //~^ ERROR `~const` is not allowed here +fn bar(_: Foo) -> Foo<{ A::add(N) }> { + //~^ ERROR `[const]` is not allowed here //~| ERROR the trait bound `A: const Add42` is not satisfied Foo } diff --git a/tests/ui/traits/const-traits/conditionally-const-and-const-params.stderr b/tests/ui/traits/const-traits/conditionally-const-and-const-params.stderr new file mode 100644 index 000000000000..f450bc6c9ab6 --- /dev/null +++ b/tests/ui/traits/const-traits/conditionally-const-and-const-params.stderr @@ -0,0 +1,39 @@ +error: `[const]` is not allowed here + --> $DIR/conditionally-const-and-const-params.rs:8:13 + | +LL | fn add(self) -> Foo<{ A::add(N) }> { + | ^^^^^^^^^ + | +note: this function is not `const`, so it cannot have `[const]` trait bounds + --> $DIR/conditionally-const-and-const-params.rs:8:8 + | +LL | fn add(self) -> Foo<{ A::add(N) }> { + | ^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-and-const-params.rs:26:9 + | +LL | fn bar(_: Foo) -> Foo<{ A::add(N) }> { + | ^^^^^^^^^ + | +note: this function is not `const`, so it cannot have `[const]` trait bounds + --> $DIR/conditionally-const-and-const-params.rs:26:4 + | +LL | fn bar(_: Foo) -> Foo<{ A::add(N) }> { + | ^^^ + +error[E0277]: the trait bound `A: const Add42` is not satisfied + --> $DIR/conditionally-const-and-const-params.rs:26:62 + | +LL | fn bar(_: Foo) -> Foo<{ A::add(N) }> { + | ^ + +error[E0277]: the trait bound `A: const Add42` is not satisfied + --> $DIR/conditionally-const-and-const-params.rs:8:45 + | +LL | fn add(self) -> Foo<{ A::add(N) }> { + | ^ + +error: aborting due to 4 previous errors + +For more information about this error, try `rustc --explain E0277`. diff --git a/tests/ui/traits/const-traits/tilde-const-assoc-fn-in-trait-impl.rs b/tests/ui/traits/const-traits/conditionally-const-assoc-fn-in-trait-impl.rs similarity index 83% rename from tests/ui/traits/const-traits/tilde-const-assoc-fn-in-trait-impl.rs rename to tests/ui/traits/const-traits/conditionally-const-assoc-fn-in-trait-impl.rs index 73b2bdc4e3f8..7f01c0b7a5c9 100644 --- a/tests/ui/traits/const-traits/tilde-const-assoc-fn-in-trait-impl.rs +++ b/tests/ui/traits/const-traits/conditionally-const-assoc-fn-in-trait-impl.rs @@ -5,11 +5,11 @@ #[const_trait] trait Main { - fn compute() -> u32; + fn compute() -> u32; } impl const Main for () { - fn compute() -> u32 { + fn compute() -> u32 { T::generate() } } diff --git a/tests/ui/traits/const-traits/tilde-const-in-struct-args.rs b/tests/ui/traits/const-traits/conditionally-const-in-struct-args.rs similarity index 84% rename from tests/ui/traits/const-traits/tilde-const-in-struct-args.rs rename to tests/ui/traits/const-traits/conditionally-const-in-struct-args.rs index e7ec3d31eb91..0c644694585a 100644 --- a/tests/ui/traits/const-traits/tilde-const-in-struct-args.rs +++ b/tests/ui/traits/const-traits/conditionally-const-in-struct-args.rs @@ -11,7 +11,7 @@ trait Trait {} const fn f< T: Trait< { - struct I>(U); + struct I>(U); 0 }, >, diff --git a/tests/ui/traits/const-traits/tilde-const-inherent-assoc-const-fn.rs b/tests/ui/traits/const-traits/conditionally-const-inherent-assoc-const-fn.rs similarity index 81% rename from tests/ui/traits/const-traits/tilde-const-inherent-assoc-const-fn.rs rename to tests/ui/traits/const-traits/conditionally-const-inherent-assoc-const-fn.rs index 0e010695587f..56478a6674b0 100644 --- a/tests/ui/traits/const-traits/tilde-const-inherent-assoc-const-fn.rs +++ b/tests/ui/traits/const-traits/conditionally-const-inherent-assoc-const-fn.rs @@ -10,7 +10,7 @@ trait Foo { struct Bar(T); impl Bar { - const fn foo(&self) where T: ~const Foo { + const fn foo(&self) where T: [const] Foo { self.0.foo() } } diff --git a/tests/ui/traits/const-traits/conditionally-const-invalid-places.rs b/tests/ui/traits/const-traits/conditionally-const-invalid-places.rs new file mode 100644 index 000000000000..52627004fb24 --- /dev/null +++ b/tests/ui/traits/const-traits/conditionally-const-invalid-places.rs @@ -0,0 +1,61 @@ +#![feature(const_trait_impl)] + +#[const_trait] +trait Trait {} + +// Regression test for issue #90052. +fn non_const_function() {} //~ ERROR `[const]` is not allowed + +struct Struct { field: T } //~ ERROR `[const]` is not allowed here +struct TupleStruct(T); //~ ERROR `[const]` is not allowed here +struct UnitStruct; //~ ERROR `[const]` is not allowed here +//~^ ERROR parameter `T` is never used + +enum Enum { Variant(T) } //~ ERROR `[const]` is not allowed here + +union Union { field: T } //~ ERROR `[const]` is not allowed here +//~^ ERROR field must implement `Copy` + +type Type = T; //~ ERROR `[const]` is not allowed here + +const CONSTANT: () = (); //~ ERROR `[const]` is not allowed here +//~^ ERROR generic const items are experimental + +trait NonConstTrait { + type Type: [const] Trait; + //~^ ERROR `[const]` is not allowed + //~| ERROR `[const]` is not allowed + fn non_const_function(); //~ ERROR `[const]` is not allowed + const CONSTANT: (); //~ ERROR `[const]` is not allowed + //~^ ERROR generic const items are experimental +} + +impl NonConstTrait for () { + type Type = (); //~ ERROR `[const]` is not allowed + //~^ ERROR overflow evaluating the requirement `(): Trait` + fn non_const_function() {} //~ ERROR `[const]` is not allowed + const CONSTANT: () = (); //~ ERROR `[const]` is not allowed + //~^ ERROR generic const items are experimental +} + +struct Implementor; + +impl Implementor { + type Type = (); //~ ERROR `[const]` is not allowed + //~^ ERROR inherent associated types are unstable + fn non_const_function() {} //~ ERROR `[const]` is not allowed + const CONSTANT: () = (); //~ ERROR `[const]` is not allowed + //~^ ERROR generic const items are experimental +} + +// non-const traits +trait Child0: [const] Trait {} //~ ERROR `[const]` is not allowed +trait Child1 where Self: [const] Trait {} //~ ERROR `[const]` is not allowed + +// non-const impl +impl Trait for T {} //~ ERROR `[const]` is not allowed + +// inherent impl (regression test for issue #117004) +impl Struct {} //~ ERROR `[const]` is not allowed + +fn main() {} diff --git a/tests/ui/traits/const-traits/conditionally-const-invalid-places.stderr b/tests/ui/traits/const-traits/conditionally-const-invalid-places.stderr new file mode 100644 index 000000000000..62319689861b --- /dev/null +++ b/tests/ui/traits/const-traits/conditionally-const-invalid-places.stderr @@ -0,0 +1,310 @@ +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:7:24 + | +LL | fn non_const_function() {} + | ^^^^^^^^^ + | +note: this function is not `const`, so it cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:7:4 + | +LL | fn non_const_function() {} + | ^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:9:16 + | +LL | struct Struct { field: T } + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:10:21 + | +LL | struct TupleStruct(T); + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:11:20 + | +LL | struct UnitStruct; + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:14:12 + | +LL | enum Enum { Variant(T) } + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:16:14 + | +LL | union Union { field: T } + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:19:12 + | +LL | type Type = T; + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:21:17 + | +LL | const CONSTANT: () = (); + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:25:16 + | +LL | type Type: [const] Trait; + | ^^^^^^^^^ + | +note: associated types in non-`#[const_trait]` traits cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:25:5 + | +LL | type Type: [const] Trait; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:25:32 + | +LL | type Type: [const] Trait; + | ^^^^^^^^^ + | +note: associated types in non-`#[const_trait]` traits cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:25:5 + | +LL | type Type: [const] Trait; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:28:28 + | +LL | fn non_const_function(); + | ^^^^^^^^^ + | +note: this function is not `const`, so it cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:28:8 + | +LL | fn non_const_function(); + | ^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:29:21 + | +LL | const CONSTANT: (); + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:34:16 + | +LL | type Type = (); + | ^^^^^^^^^ + | +note: associated types in non-const impls cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:34:5 + | +LL | type Type = (); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:36:28 + | +LL | fn non_const_function() {} + | ^^^^^^^^^ + | +note: this function is not `const`, so it cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:36:8 + | +LL | fn non_const_function() {} + | ^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:37:21 + | +LL | const CONSTANT: () = (); + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:44:16 + | +LL | type Type = (); + | ^^^^^^^^^ + | +note: inherent associated types cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:44:5 + | +LL | type Type = (); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:46:28 + | +LL | fn non_const_function() {} + | ^^^^^^^^^ + | +note: this function is not `const`, so it cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:46:8 + | +LL | fn non_const_function() {} + | ^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:47:21 + | +LL | const CONSTANT: () = (); + | ^^^^^^^^^ + | + = note: this item cannot have `[const]` trait bounds + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:52:13 + | +LL | trait Child0: [const] Trait {} + | ^^^^^^^^^ + | +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:52:1 + | +LL | trait Child0: [const] Trait {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:53:24 + | +LL | trait Child1 where Self: [const] Trait {} + | ^^^^^^^^^ + | +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:53:1 + | +LL | trait Child1 where Self: [const] Trait {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:56:7 + | +LL | impl Trait for T {} + | ^^^^^^^^^ + | +note: this impl is not `const`, so it cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:56:1 + | +LL | impl Trait for T {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: `[const]` is not allowed here + --> $DIR/conditionally-const-invalid-places.rs:59:7 + | +LL | impl Struct {} + | ^^^^^^^^^ + | +note: inherent impls cannot have `[const]` trait bounds + --> $DIR/conditionally-const-invalid-places.rs:59:1 + | +LL | impl Struct {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error[E0658]: generic const items are experimental + --> $DIR/conditionally-const-invalid-places.rs:21:15 + | +LL | const CONSTANT: () = (); + | ^^^^^^^^^^^^^^^^^^ + | + = note: see issue #113521 for more information + = help: add `#![feature(generic_const_items)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0658]: generic const items are experimental + --> $DIR/conditionally-const-invalid-places.rs:29:19 + | +LL | const CONSTANT: (); + | ^^^^^^^^^^^^^^^^^^ + | + = note: see issue #113521 for more information + = help: add `#![feature(generic_const_items)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0658]: generic const items are experimental + --> $DIR/conditionally-const-invalid-places.rs:37:19 + | +LL | const CONSTANT: () = (); + | ^^^^^^^^^^^^^^^^^^ + | + = note: see issue #113521 for more information + = help: add `#![feature(generic_const_items)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0658]: generic const items are experimental + --> $DIR/conditionally-const-invalid-places.rs:47:19 + | +LL | const CONSTANT: () = (); + | ^^^^^^^^^^^^^^^^^^ + | + = note: see issue #113521 for more information + = help: add `#![feature(generic_const_items)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0392]: type parameter `T` is never used + --> $DIR/conditionally-const-invalid-places.rs:11:19 + | +LL | struct UnitStruct; + | ^ unused type parameter + | + = help: consider removing `T`, referring to it in a field, or using a marker such as `PhantomData` + +error[E0740]: field must implement `Copy` or be wrapped in `ManuallyDrop<...>` to be used in a union + --> $DIR/conditionally-const-invalid-places.rs:16:33 + | +LL | union Union { field: T } + | ^^^^^^^^ + | + = note: union fields must not have drop side-effects, which is currently enforced via either `Copy` or `ManuallyDrop<...>` +help: wrap the field type in `ManuallyDrop<...>` + | +LL | union Union { field: std::mem::ManuallyDrop } + | +++++++++++++++++++++++ + + +error[E0275]: overflow evaluating the requirement `(): Trait` + --> $DIR/conditionally-const-invalid-places.rs:34:35 + | +LL | type Type = (); + | ^^ + | +note: required by a bound in `NonConstTrait::Type` + --> $DIR/conditionally-const-invalid-places.rs:25:34 + | +LL | type Type: [const] Trait; + | ^^^^^^^^^^^^^ required by this bound in `NonConstTrait::Type` + +error[E0658]: inherent associated types are unstable + --> $DIR/conditionally-const-invalid-places.rs:44:5 + | +LL | type Type = (); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: see issue #8995 for more information + = help: add `#![feature(inherent_associated_types)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error: aborting due to 30 previous errors + +Some errors have detailed explanations: E0275, E0392, E0658, E0740. +For more information about an error, try `rustc --explain E0275`. diff --git a/tests/ui/traits/const-traits/tilde-const-trait-assoc-tys.rs b/tests/ui/traits/const-traits/conditionally-const-trait-bound-assoc-tys.rs similarity index 71% rename from tests/ui/traits/const-traits/tilde-const-trait-assoc-tys.rs rename to tests/ui/traits/const-traits/conditionally-const-trait-bound-assoc-tys.rs index 53ddb5c0cdfc..b0bd8466f66b 100644 --- a/tests/ui/traits/const-traits/tilde-const-trait-assoc-tys.rs +++ b/tests/ui/traits/const-traits/conditionally-const-trait-bound-assoc-tys.rs @@ -4,11 +4,11 @@ #[const_trait] trait Trait { - type Assoc; + type Assoc; } impl const Trait for () { - type Assoc = T; + type Assoc = T; } #[const_trait] diff --git a/tests/ui/traits/const-traits/conditionally-const-trait-bound-syntax.rs b/tests/ui/traits/const-traits/conditionally-const-trait-bound-syntax.rs new file mode 100644 index 000000000000..89950c65ef64 --- /dev/null +++ b/tests/ui/traits/const-traits/conditionally-const-trait-bound-syntax.rs @@ -0,0 +1,9 @@ +//@ compile-flags: -Z parse-crate-root-only +//@ check-pass + +#![feature(const_trait_impl)] + +struct S< + T: for<'a> [const] Tr<'a> + 'static + [const] std::ops::Add, + T: for<'a: 'b> [const] m::Trait<'a>, +>; diff --git a/tests/ui/traits/const-traits/const-bound-on-not-const-associated-fn.rs b/tests/ui/traits/const-traits/const-bound-on-not-const-associated-fn.rs index c735f855bcea..941112727083 100644 --- a/tests/ui/traits/const-traits/const-bound-on-not-const-associated-fn.rs +++ b/tests/ui/traits/const-traits/const-bound-on-not-const-associated-fn.rs @@ -8,8 +8,8 @@ trait MyTrait { } trait OtherTrait { - fn do_something_else() where Self: ~const MyTrait; - //~^ ERROR `~const` is not allowed here + fn do_something_else() where Self: [const] MyTrait; + //~^ ERROR `[const]` is not allowed here } struct MyStruct(T); @@ -19,8 +19,8 @@ impl const MyTrait for u32 { } impl MyStruct { - pub fn foo(&self) where T: ~const MyTrait { - //~^ ERROR `~const` is not allowed here + pub fn foo(&self) where T: [const] MyTrait { + //~^ ERROR `[const]` is not allowed here self.0.do_something(); } } diff --git a/tests/ui/traits/const-traits/const-bound-on-not-const-associated-fn.stderr b/tests/ui/traits/const-traits/const-bound-on-not-const-associated-fn.stderr index 50ab52ade49c..c0af644d3deb 100644 --- a/tests/ui/traits/const-traits/const-bound-on-not-const-associated-fn.stderr +++ b/tests/ui/traits/const-traits/const-bound-on-not-const-associated-fn.stderr @@ -1,25 +1,25 @@ -error: `~const` is not allowed here - --> $DIR/const-bound-on-not-const-associated-fn.rs:11:40 +error: `[const]` is not allowed here + --> $DIR/const-bound-on-not-const-associated-fn.rs:11:38 | -LL | fn do_something_else() where Self: ~const MyTrait; - | ^^^^^^ +LL | fn do_something_else() where Self: [const] MyTrait; + | ^^^^^^^^^ | -note: this function is not `const`, so it cannot have `~const` trait bounds +note: this function is not `const`, so it cannot have `[const]` trait bounds --> $DIR/const-bound-on-not-const-associated-fn.rs:11:8 | -LL | fn do_something_else() where Self: ~const MyTrait; +LL | fn do_something_else() where Self: [const] MyTrait; | ^^^^^^^^^^^^^^^^^ -error: `~const` is not allowed here - --> $DIR/const-bound-on-not-const-associated-fn.rs:22:32 +error: `[const]` is not allowed here + --> $DIR/const-bound-on-not-const-associated-fn.rs:22:30 | -LL | pub fn foo(&self) where T: ~const MyTrait { - | ^^^^^^ +LL | pub fn foo(&self) where T: [const] MyTrait { + | ^^^^^^^^^ | -note: this function is not `const`, so it cannot have `~const` trait bounds +note: this function is not `const`, so it cannot have `[const]` trait bounds --> $DIR/const-bound-on-not-const-associated-fn.rs:22:12 | -LL | pub fn foo(&self) where T: ~const MyTrait { +LL | pub fn foo(&self) where T: [const] MyTrait { | ^^^ error: aborting due to 2 previous errors diff --git a/tests/ui/traits/const-traits/const-bounds-non-const-trait.rs b/tests/ui/traits/const-traits/const-bounds-non-const-trait.rs index e446eb154814..ae31d9ae0ac0 100644 --- a/tests/ui/traits/const-traits/const-bounds-non-const-trait.rs +++ b/tests/ui/traits/const-traits/const-bounds-non-const-trait.rs @@ -3,9 +3,9 @@ trait NonConst {} -const fn perform() {} -//~^ ERROR `~const` can only be applied to `#[const_trait]` traits -//~| ERROR `~const` can only be applied to `#[const_trait]` traits +const fn perform() {} +//~^ ERROR `[const]` can only be applied to `#[const_trait]` traits +//~| ERROR `[const]` can only be applied to `#[const_trait]` traits fn operate() {} //~^ ERROR `const` can only be applied to `#[const_trait]` traits diff --git a/tests/ui/traits/const-traits/const-bounds-non-const-trait.stderr b/tests/ui/traits/const-traits/const-bounds-non-const-trait.stderr index f97d3a9181e0..2ff5fb74031b 100644 --- a/tests/ui/traits/const-traits/const-bounds-non-const-trait.stderr +++ b/tests/ui/traits/const-traits/const-bounds-non-const-trait.stderr @@ -1,19 +1,19 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-bounds-non-const-trait.rs:6:21 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-bounds-non-const-trait.rs:6:19 | -LL | const fn perform() {} - | ^^^^^^ can't be applied to `NonConst` +LL | const fn perform() {} + | ^^^^^^^^^ can't be applied to `NonConst` | help: mark `NonConst` as `#[const_trait]` to allow it to have `const` implementations | LL | #[const_trait] trait NonConst {} | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-bounds-non-const-trait.rs:6:21 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-bounds-non-const-trait.rs:6:19 | -LL | const fn perform() {} - | ^^^^^^ can't be applied to `NonConst` +LL | const fn perform() {} + | ^^^^^^^^^ can't be applied to `NonConst` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `NonConst` as `#[const_trait]` to allow it to have `const` implementations diff --git a/tests/ui/traits/const-traits/const-closure-parse-not-item.rs b/tests/ui/traits/const-traits/const-closure-parse-not-item.rs index b1b0e68b90db..35127eda5c03 100644 --- a/tests/ui/traits/const-traits/const-closure-parse-not-item.rs +++ b/tests/ui/traits/const-traits/const-closure-parse-not-item.rs @@ -4,7 +4,7 @@ #![feature(const_trait_impl, const_closures)] #![allow(incomplete_features)] -const fn test() -> impl ~const Fn() { +const fn test() -> impl [const] Fn() { const move || {} } diff --git a/tests/ui/traits/const-traits/const-closure-parse-not-item.stderr b/tests/ui/traits/const-traits/const-closure-parse-not-item.stderr index 57afa2257b7d..cc9d9bd60229 100644 --- a/tests/ui/traits/const-traits/const-closure-parse-not-item.stderr +++ b/tests/ui/traits/const-traits/const-closure-parse-not-item.stderr @@ -1,29 +1,29 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closure-parse-not-item.rs:7:25 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closure-parse-not-item.rs:7:20 | -LL | const fn test() -> impl ~const Fn() { - | ^^^^^^ can't be applied to `Fn` +LL | const fn test() -> impl [const] Fn() { + | ^^^^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closure-parse-not-item.rs:7:25 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closure-parse-not-item.rs:7:20 | -LL | const fn test() -> impl ~const Fn() { - | ^^^^^^ can't be applied to `Fn` +LL | const fn test() -> impl [const] Fn() { + | ^^^^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closure-parse-not-item.rs:7:25 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closure-parse-not-item.rs:7:20 | -LL | const fn test() -> impl ~const Fn() { - | ^^^^^^ can't be applied to `Fn` +LL | const fn test() -> impl [const] Fn() { + | ^^^^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` diff --git a/tests/ui/traits/const-traits/const-closure-trait-method-fail.rs b/tests/ui/traits/const-traits/const-closure-trait-method-fail.rs index 8c6286426d32..cbcc4aa7c3cd 100644 --- a/tests/ui/traits/const-traits/const-closure-trait-method-fail.rs +++ b/tests/ui/traits/const-traits/const-closure-trait-method-fail.rs @@ -11,7 +11,7 @@ impl Tr for () { fn a(self) -> i32 { 42 } } -const fn need_const_closure i32>(x: T) -> i32 { +const fn need_const_closure i32>(x: T) -> i32 { x(()) } diff --git a/tests/ui/traits/const-traits/const-closure-trait-method-fail.stderr b/tests/ui/traits/const-traits/const-closure-trait-method-fail.stderr index 2a97846ccb44..7a146b9d8a11 100644 --- a/tests/ui/traits/const-traits/const-closure-trait-method-fail.stderr +++ b/tests/ui/traits/const-traits/const-closure-trait-method-fail.stderr @@ -1,19 +1,19 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closure-trait-method-fail.rs:14:32 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closure-trait-method-fail.rs:14:30 | -LL | const fn need_const_closure i32>(x: T) -> i32 { - | ^^^^^^ can't be applied to `FnOnce` +LL | const fn need_const_closure i32>(x: T) -> i32 { + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closure-trait-method-fail.rs:14:32 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closure-trait-method-fail.rs:14:30 | -LL | const fn need_const_closure i32>(x: T) -> i32 { - | ^^^^^^ can't be applied to `FnOnce` +LL | const fn need_const_closure i32>(x: T) -> i32 { + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` diff --git a/tests/ui/traits/const-traits/const-closure-trait-method.rs b/tests/ui/traits/const-traits/const-closure-trait-method.rs index ebee4daefbea..831d6e27946d 100644 --- a/tests/ui/traits/const-traits/const-closure-trait-method.rs +++ b/tests/ui/traits/const-traits/const-closure-trait-method.rs @@ -11,7 +11,7 @@ impl const Tr for () { fn a(self) -> i32 { 42 } } -const fn need_const_closure i32>(x: T) -> i32 { +const fn need_const_closure i32>(x: T) -> i32 { x(()) } diff --git a/tests/ui/traits/const-traits/const-closure-trait-method.stderr b/tests/ui/traits/const-traits/const-closure-trait-method.stderr index 9c63b7e63a65..6c003f87ada6 100644 --- a/tests/ui/traits/const-traits/const-closure-trait-method.stderr +++ b/tests/ui/traits/const-traits/const-closure-trait-method.stderr @@ -1,19 +1,19 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closure-trait-method.rs:14:32 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closure-trait-method.rs:14:30 | -LL | const fn need_const_closure i32>(x: T) -> i32 { - | ^^^^^^ can't be applied to `FnOnce` +LL | const fn need_const_closure i32>(x: T) -> i32 { + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closure-trait-method.rs:14:32 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closure-trait-method.rs:14:30 | -LL | const fn need_const_closure i32>(x: T) -> i32 { - | ^^^^^^ can't be applied to `FnOnce` +LL | const fn need_const_closure i32>(x: T) -> i32 { + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` diff --git a/tests/ui/traits/const-traits/const-closures.rs b/tests/ui/traits/const-traits/const-closures.rs index 98f8d039cd64..2f6f4dc4ba33 100644 --- a/tests/ui/traits/const-traits/const-closures.rs +++ b/tests/ui/traits/const-traits/const-closures.rs @@ -5,9 +5,9 @@ const fn answer_p1(f: &F) -> u8 where - F: ~const FnOnce() -> u8, - F: ~const FnMut() -> u8, - F: ~const Fn() -> u8, + F: [const] FnOnce() -> u8, + F: [const] FnMut() -> u8, + F: [const] Fn() -> u8, { f() * 7 } @@ -20,7 +20,7 @@ const fn answer_p2() -> u8 { answer_p1(&three) } -const fn answer u8>(f: &F) -> u8 { +const fn answer u8>(f: &F) -> u8 { f() + f() } diff --git a/tests/ui/traits/const-traits/const-closures.stderr b/tests/ui/traits/const-traits/const-closures.stderr index 92f3ba208207..c76a73418a53 100644 --- a/tests/ui/traits/const-traits/const-closures.stderr +++ b/tests/ui/traits/const-traits/const-closures.stderr @@ -1,76 +1,76 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closures.rs:8:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closures.rs:8:10 | -LL | F: ~const FnOnce() -> u8, - | ^^^^^^ can't be applied to `FnOnce` +LL | F: [const] FnOnce() -> u8, + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closures.rs:9:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closures.rs:9:10 | -LL | F: ~const FnMut() -> u8, - | ^^^^^^ can't be applied to `FnMut` +LL | F: [const] FnMut() -> u8, + | ^^^^^^^^^ can't be applied to `FnMut` | -note: `FnMut` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnMut` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closures.rs:10:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closures.rs:10:10 | -LL | F: ~const Fn() -> u8, - | ^^^^^^ can't be applied to `Fn` +LL | F: [const] Fn() -> u8, + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closures.rs:8:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closures.rs:8:10 | -LL | F: ~const FnOnce() -> u8, - | ^^^^^^ can't be applied to `FnOnce` +LL | F: [const] FnOnce() -> u8, + | ^^^^^^^^^ can't be applied to `FnOnce` | -note: `FnOnce` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnOnce` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closures.rs:9:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closures.rs:9:10 | -LL | F: ~const FnMut() -> u8, - | ^^^^^^ can't be applied to `FnMut` +LL | F: [const] FnMut() -> u8, + | ^^^^^^^^^ can't be applied to `FnMut` | -note: `FnMut` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `FnMut` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closures.rs:10:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closures.rs:10:10 | -LL | F: ~const Fn() -> u8, - | ^^^^^^ can't be applied to `Fn` +LL | F: [const] Fn() -> u8, + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closures.rs:23:20 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closures.rs:23:18 | -LL | const fn answer u8>(f: &F) -> u8 { - | ^^^^^^ can't be applied to `Fn` +LL | const fn answer u8>(f: &F) -> u8 { + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-closures.rs:23:20 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/const-closures.rs:23:18 | -LL | const fn answer u8>(f: &F) -> u8 { - | ^^^^^^ can't be applied to `Fn` +LL | const fn answer u8>(f: &F) -> u8 { + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` diff --git a/tests/ui/traits/const-traits/const-cond-for-rpitit.rs b/tests/ui/traits/const-traits/const-cond-for-rpitit.rs index 50bf93f9a032..da83e054dd9b 100644 --- a/tests/ui/traits/const-traits/const-cond-for-rpitit.rs +++ b/tests/ui/traits/const-traits/const-cond-for-rpitit.rs @@ -6,15 +6,15 @@ #[const_trait] pub trait Foo { - fn method(self) -> impl ~const Bar; + fn method(self) -> impl [const] Bar; } #[const_trait] pub trait Bar {} struct A(T); -impl const Foo for A where A: ~const Bar { - fn method(self) -> impl ~const Bar { +impl const Foo for A where A: [const] Bar { + fn method(self) -> impl [const] Bar { self } } diff --git a/tests/ui/traits/const-traits/const-default-method-bodies.rs b/tests/ui/traits/const-traits/const-default-method-bodies.rs index 0ef11a7f0c93..27e828c7ab91 100644 --- a/tests/ui/traits/const-traits/const-default-method-bodies.rs +++ b/tests/ui/traits/const-traits/const-default-method-bodies.rs @@ -23,7 +23,7 @@ impl const ConstDefaultFn for ConstImpl { const fn test() { NonConstImpl.a(); - //~^ ERROR the trait bound `NonConstImpl: ~const ConstDefaultFn` is not satisfied + //~^ ERROR the trait bound `NonConstImpl: [const] ConstDefaultFn` is not satisfied ConstImpl.a(); } diff --git a/tests/ui/traits/const-traits/const-default-method-bodies.stderr b/tests/ui/traits/const-traits/const-default-method-bodies.stderr index 903f7d37f9d8..03ca6f1d5115 100644 --- a/tests/ui/traits/const-traits/const-default-method-bodies.stderr +++ b/tests/ui/traits/const-traits/const-default-method-bodies.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `NonConstImpl: ~const ConstDefaultFn` is not satisfied +error[E0277]: the trait bound `NonConstImpl: [const] ConstDefaultFn` is not satisfied --> $DIR/const-default-method-bodies.rs:25:18 | LL | NonConstImpl.a(); diff --git a/tests/ui/traits/const-traits/const-drop-bound.rs b/tests/ui/traits/const-traits/const-drop-bound.rs index 4819da7c3a40..7fa9b10fa040 100644 --- a/tests/ui/traits/const-traits/const-drop-bound.rs +++ b/tests/ui/traits/const-traits/const-drop-bound.rs @@ -5,7 +5,7 @@ use std::marker::Destruct; -const fn foo(res: Result) -> Option where E: ~const Destruct { +const fn foo(res: Result) -> Option where E: [const] Destruct { match res { Ok(t) => Some(t), Err(_e) => None, @@ -16,8 +16,8 @@ pub struct Foo(T); const fn baz(res: Result, Foo>) -> Option> where - T: ~const Destruct, - E: ~const Destruct, + T: [const] Destruct, + E: [const] Destruct, { foo(res) } diff --git a/tests/ui/traits/const-traits/const-drop-fail-2.precise.stderr b/tests/ui/traits/const-traits/const-drop-fail-2.precise.stderr index 76207ea0939b..c2309ea6e122 100644 --- a/tests/ui/traits/const-traits/const-drop-fail-2.precise.stderr +++ b/tests/ui/traits/const-traits/const-drop-fail-2.precise.stderr @@ -5,17 +5,17 @@ LL | const _: () = check::>( | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | note: required for `ConstDropImplWithBounds` to implement `const Drop` - --> $DIR/const-drop-fail-2.rs:25:25 + --> $DIR/const-drop-fail-2.rs:25:26 | -LL | impl const Drop for ConstDropImplWithBounds { - | -------- ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | impl const Drop for ConstDropImplWithBounds { + | --------- ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ | | | unsatisfied trait bound introduced here note: required by a bound in `check` --> $DIR/const-drop-fail-2.rs:21:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error: aborting due to 1 previous error diff --git a/tests/ui/traits/const-traits/const-drop-fail-2.rs b/tests/ui/traits/const-traits/const-drop-fail-2.rs index 1bcc87e90703..3f98a9f715e8 100644 --- a/tests/ui/traits/const-traits/const-drop-fail-2.rs +++ b/tests/ui/traits/const-traits/const-drop-fail-2.rs @@ -18,11 +18,11 @@ trait A { fn a() { } } impl A for NonTrivialDrop {} -const fn check(_: T) {} +const fn check(_: T) {} struct ConstDropImplWithBounds(PhantomData); -impl const Drop for ConstDropImplWithBounds { +impl const Drop for ConstDropImplWithBounds { fn drop(&mut self) { T::a(); } @@ -35,7 +35,7 @@ const _: () = check::>( struct ConstDropImplWithNonConstBounds(PhantomData); -impl const Drop for ConstDropImplWithNonConstBounds { +impl const Drop for ConstDropImplWithNonConstBounds { fn drop(&mut self) { T::a(); } diff --git a/tests/ui/traits/const-traits/const-drop-fail-2.stock.stderr b/tests/ui/traits/const-traits/const-drop-fail-2.stock.stderr index 76207ea0939b..c2309ea6e122 100644 --- a/tests/ui/traits/const-traits/const-drop-fail-2.stock.stderr +++ b/tests/ui/traits/const-traits/const-drop-fail-2.stock.stderr @@ -5,17 +5,17 @@ LL | const _: () = check::>( | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | note: required for `ConstDropImplWithBounds` to implement `const Drop` - --> $DIR/const-drop-fail-2.rs:25:25 + --> $DIR/const-drop-fail-2.rs:25:26 | -LL | impl const Drop for ConstDropImplWithBounds { - | -------- ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | impl const Drop for ConstDropImplWithBounds { + | --------- ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ | | | unsatisfied trait bound introduced here note: required by a bound in `check` --> $DIR/const-drop-fail-2.rs:21:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error: aborting due to 1 previous error diff --git a/tests/ui/traits/const-traits/const-drop-fail.new_precise.stderr b/tests/ui/traits/const-traits/const-drop-fail.new_precise.stderr index f38e642bb63e..9c49ee56b0f4 100644 --- a/tests/ui/traits/const-traits/const-drop-fail.new_precise.stderr +++ b/tests/ui/traits/const-traits/const-drop-fail.new_precise.stderr @@ -10,8 +10,8 @@ LL | NonTrivialDrop, note: required by a bound in `check` --> $DIR/const-drop-fail.rs:24:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error[E0277]: the trait bound `NonTrivialDrop: const Destruct` is not satisfied --> $DIR/const-drop-fail.rs:35:5 @@ -25,8 +25,8 @@ LL | ConstImplWithDropGlue(NonTrivialDrop), note: required by a bound in `check` --> $DIR/const-drop-fail.rs:24:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error: aborting due to 2 previous errors diff --git a/tests/ui/traits/const-traits/const-drop-fail.new_stock.stderr b/tests/ui/traits/const-traits/const-drop-fail.new_stock.stderr index f38e642bb63e..9c49ee56b0f4 100644 --- a/tests/ui/traits/const-traits/const-drop-fail.new_stock.stderr +++ b/tests/ui/traits/const-traits/const-drop-fail.new_stock.stderr @@ -10,8 +10,8 @@ LL | NonTrivialDrop, note: required by a bound in `check` --> $DIR/const-drop-fail.rs:24:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error[E0277]: the trait bound `NonTrivialDrop: const Destruct` is not satisfied --> $DIR/const-drop-fail.rs:35:5 @@ -25,8 +25,8 @@ LL | ConstImplWithDropGlue(NonTrivialDrop), note: required by a bound in `check` --> $DIR/const-drop-fail.rs:24:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error: aborting due to 2 previous errors diff --git a/tests/ui/traits/const-traits/const-drop-fail.old_precise.stderr b/tests/ui/traits/const-traits/const-drop-fail.old_precise.stderr index f38e642bb63e..9c49ee56b0f4 100644 --- a/tests/ui/traits/const-traits/const-drop-fail.old_precise.stderr +++ b/tests/ui/traits/const-traits/const-drop-fail.old_precise.stderr @@ -10,8 +10,8 @@ LL | NonTrivialDrop, note: required by a bound in `check` --> $DIR/const-drop-fail.rs:24:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error[E0277]: the trait bound `NonTrivialDrop: const Destruct` is not satisfied --> $DIR/const-drop-fail.rs:35:5 @@ -25,8 +25,8 @@ LL | ConstImplWithDropGlue(NonTrivialDrop), note: required by a bound in `check` --> $DIR/const-drop-fail.rs:24:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error: aborting due to 2 previous errors diff --git a/tests/ui/traits/const-traits/const-drop-fail.old_stock.stderr b/tests/ui/traits/const-traits/const-drop-fail.old_stock.stderr index f38e642bb63e..9c49ee56b0f4 100644 --- a/tests/ui/traits/const-traits/const-drop-fail.old_stock.stderr +++ b/tests/ui/traits/const-traits/const-drop-fail.old_stock.stderr @@ -10,8 +10,8 @@ LL | NonTrivialDrop, note: required by a bound in `check` --> $DIR/const-drop-fail.rs:24:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error[E0277]: the trait bound `NonTrivialDrop: const Destruct` is not satisfied --> $DIR/const-drop-fail.rs:35:5 @@ -25,8 +25,8 @@ LL | ConstImplWithDropGlue(NonTrivialDrop), note: required by a bound in `check` --> $DIR/const-drop-fail.rs:24:19 | -LL | const fn check(_: T) {} - | ^^^^^^^^^^^^^^^ required by this bound in `check` +LL | const fn check(_: T) {} + | ^^^^^^^^^^^^^^^^ required by this bound in `check` error: aborting due to 2 previous errors diff --git a/tests/ui/traits/const-traits/const-drop-fail.rs b/tests/ui/traits/const-traits/const-drop-fail.rs index a7f3d5654de9..4513d71f6134 100644 --- a/tests/ui/traits/const-traits/const-drop-fail.rs +++ b/tests/ui/traits/const-traits/const-drop-fail.rs @@ -21,7 +21,7 @@ impl const Drop for ConstImplWithDropGlue { fn drop(&mut self) {} } -const fn check(_: T) {} +const fn check(_: T) {} macro_rules! check_all { ($($exp:expr),*$(,)?) => {$( diff --git a/tests/ui/traits/const-traits/const-drop.rs b/tests/ui/traits/const-traits/const-drop.rs index e2d87aeff47f..5df3a77f73ab 100644 --- a/tests/ui/traits/const-traits/const-drop.rs +++ b/tests/ui/traits/const-traits/const-drop.rs @@ -16,7 +16,7 @@ impl<'a> const Drop for S<'a> { } } -const fn a(_: T) {} +const fn a(_: T) {} //FIXME ~^ ERROR destructor of const fn b() -> u8 { @@ -108,7 +108,7 @@ fn main() { } } - // These types should pass because ~const in a non-const context should have no effect. + // These types should pass because [const] in a non-const context should have no effect. a(HasDropGlue(Box::new(0))); a(HasDropImpl); diff --git a/tests/ui/traits/const-traits/const-impl-trait.rs b/tests/ui/traits/const-traits/const-impl-trait.rs index d7fe43ef37ce..dc960422a4a4 100644 --- a/tests/ui/traits/const-traits/const-impl-trait.rs +++ b/tests/ui/traits/const-traits/const-impl-trait.rs @@ -8,23 +8,23 @@ use std::marker::Destruct; -const fn cmp(a: &impl ~const PartialEq) -> bool { +const fn cmp(a: &impl [const] PartialEq) -> bool { a == a } const fn wrap( - x: impl ~const PartialEq + ~const Destruct, -) -> impl ~const PartialEq + ~const Destruct { + x: impl [const] PartialEq + [const] Destruct, +) -> impl [const] PartialEq + [const] Destruct { x } #[const_trait] trait Foo { - fn huh() -> impl ~const PartialEq + ~const Destruct + Copy; + fn huh() -> impl [const] PartialEq + [const] Destruct + Copy; } impl const Foo for () { - fn huh() -> impl ~const PartialEq + ~const Destruct + Copy { + fn huh() -> impl [const] PartialEq + [const] Destruct + Copy { 123 } } @@ -43,16 +43,16 @@ trait T {} struct S; impl const T for S {} -const fn rpit() -> impl ~const T { +const fn rpit() -> impl [const] T { S } -const fn apit(_: impl ~const T + ~const Destruct) {} +const fn apit(_: impl [const] T + [const] Destruct) {} -const fn rpit_assoc_bound() -> impl IntoIterator { +const fn rpit_assoc_bound() -> impl IntoIterator { Some(S) } -const fn apit_assoc_bound(_: impl IntoIterator + ~const Destruct) {} +const fn apit_assoc_bound(_: impl IntoIterator + [const] Destruct) {} fn main() {} diff --git a/tests/ui/traits/const-traits/const-impl-trait.stderr b/tests/ui/traits/const-traits/const-impl-trait.stderr index 6783cec3960d..cbb68d8c9839 100644 --- a/tests/ui/traits/const-traits/const-impl-trait.stderr +++ b/tests/ui/traits/const-traits/const-impl-trait.stderr @@ -1,197 +1,17 @@ -error[E0635]: unknown feature `const_cmp` - --> $DIR/const-impl-trait.rs:7:30 +error[E0277]: the trait bound `(): const PartialEq` is not satisfied + --> $DIR/const-impl-trait.rs:34:17 | -LL | #![feature(const_trait_impl, const_cmp, const_destruct)] - | ^^^^^^^^^ - -error: `~const` can only be applied to `#[const_trait]` traits +LL | assert!(cmp(&())); + | --- ^^^ + | | + | required by a bound introduced by this call + | +note: required by a bound in `cmp` --> $DIR/const-impl-trait.rs:11:23 | -LL | const fn cmp(a: &impl ~const PartialEq) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL +LL | const fn cmp(a: &impl [const] PartialEq) -> bool { + | ^^^^^^^^^^^^^^^^^ required by this bound in `cmp` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:11:23 - | -LL | const fn cmp(a: &impl ~const PartialEq) -> bool { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` +error: aborting due to 1 previous error -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:16:13 - | -LL | x: impl ~const PartialEq + ~const Destruct, - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:16:13 - | -LL | x: impl ~const PartialEq + ~const Destruct, - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:17:11 - | -LL | ) -> impl ~const PartialEq + ~const Destruct { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:17:11 - | -LL | ) -> impl ~const PartialEq + ~const Destruct { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:23:22 - | -LL | fn huh() -> impl ~const PartialEq + ~const Destruct + Copy; - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:27:22 - | -LL | fn huh() -> impl ~const PartialEq + ~const Destruct + Copy { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:17:11 - | -LL | ) -> impl ~const PartialEq + ~const Destruct { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:27:22 - | -LL | fn huh() -> impl ~const PartialEq + ~const Destruct + Copy { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:27:22 - | -LL | fn huh() -> impl ~const PartialEq + ~const Destruct + Copy { - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:23:22 - | -LL | fn huh() -> impl ~const PartialEq + ~const Destruct + Copy; - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:23:22 - | -LL | fn huh() -> impl ~const PartialEq + ~const Destruct + Copy; - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:23:22 - | -LL | fn huh() -> impl ~const PartialEq + ~const Destruct + Copy; - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:23:22 - | -LL | fn huh() -> impl ~const PartialEq + ~const Destruct + Copy; - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/const-impl-trait.rs:23:22 - | -LL | fn huh() -> impl ~const PartialEq + ~const Destruct + Copy; - | ^^^^^^ can't be applied to `PartialEq` - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` - -error[E0015]: cannot call non-const operator in constants - --> $DIR/const-impl-trait.rs:35:13 - | -LL | assert!(wrap(123) == wrap(123)); - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = note: calls in constants are limited to constant functions, tuple structs and tuple variants - -error[E0015]: cannot call non-const operator in constants - --> $DIR/const-impl-trait.rs:36:13 - | -LL | assert!(wrap(123) != wrap(456)); - | ^^^^^^^^^^^^^^^^^^^^^^ - | - = note: calls in constants are limited to constant functions, tuple structs and tuple variants - -error[E0015]: cannot call non-const operator in constants - --> $DIR/const-impl-trait.rs:38:13 - | -LL | assert!(x == x); - | ^^^^^^ - | - = note: calls in constants are limited to constant functions, tuple structs and tuple variants - -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/const-impl-trait.rs:12:5 - | -LL | a == a - | ^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error: aborting due to 21 previous errors - -Some errors have detailed explanations: E0015, E0635. -For more information about an error, try `rustc --explain E0015`. +For more information about this error, try `rustc --explain E0277`. diff --git a/tests/ui/traits/const-traits/const-in-closure.rs b/tests/ui/traits/const-traits/const-in-closure.rs index ebc17a50c866..0657c5af5883 100644 --- a/tests/ui/traits/const-traits/const-in-closure.rs +++ b/tests/ui/traits/const-traits/const-in-closure.rs @@ -3,13 +3,14 @@ #![feature(const_trait_impl)] -#[const_trait] trait Trait { +#[const_trait] +trait Trait { fn method(); } const fn foo() { let _ = || { - // Make sure this doesn't enforce `T: ~const Trait` + // Make sure this doesn't enforce `T: [const] Trait` T::method(); }; } @@ -17,7 +18,9 @@ const fn foo() { fn bar() { let _ = || { // Make sure unconditionally const bounds propagate from parent. - const { T::method(); }; + const { + T::method(); + }; }; } diff --git a/tests/ui/traits/const-traits/const-opaque.no.stderr b/tests/ui/traits/const-traits/const-opaque.no.stderr index 47e692936e04..acf19ba96ab3 100644 --- a/tests/ui/traits/const-traits/const-opaque.no.stderr +++ b/tests/ui/traits/const-traits/const-opaque.no.stderr @@ -9,8 +9,8 @@ LL | let opaque = bar(()); note: required by a bound in `bar` --> $DIR/const-opaque.rs:26:17 | -LL | const fn bar(t: T) -> impl ~const Foo { - | ^^^^^^^^^^ required by this bound in `bar` +LL | const fn bar(t: T) -> impl [const] Foo { + | ^^^^^^^^^^^ required by this bound in `bar` error[E0277]: the trait bound `(): const Foo` is not satisfied --> $DIR/const-opaque.rs:33:12 diff --git a/tests/ui/traits/const-traits/const-opaque.rs b/tests/ui/traits/const-traits/const-opaque.rs index 96cdd7d9f261..56ebf0aefccf 100644 --- a/tests/ui/traits/const-traits/const-opaque.rs +++ b/tests/ui/traits/const-traits/const-opaque.rs @@ -9,7 +9,7 @@ trait Foo { fn method(&self); } -impl const Foo for (T,) { +impl const Foo for (T,) { fn method(&self) {} } @@ -23,7 +23,7 @@ impl Foo for () { fn method(&self) {} } -const fn bar(t: T) -> impl ~const Foo { +const fn bar(t: T) -> impl [const] Foo { (t,) } diff --git a/tests/ui/traits/const-traits/const-trait-bounds-trait-objects.rs b/tests/ui/traits/const-traits/const-trait-bounds-trait-objects.rs index 2dac1970835d..ece87529c3e3 100644 --- a/tests/ui/traits/const-traits/const-trait-bounds-trait-objects.rs +++ b/tests/ui/traits/const-traits/const-trait-bounds-trait-objects.rs @@ -7,12 +7,12 @@ trait Trait {} fn main() { let _: &dyn const Trait; //~ ERROR const trait bounds are not allowed in trait object types - let _: &dyn ~const Trait; //~ ERROR `~const` is not allowed here + let _: &dyn [const] Trait; //~ ERROR `[const]` is not allowed here } // Regression test for issue #119525. trait NonConst {} const fn handle(_: &dyn const NonConst) {} //~^ ERROR const trait bounds are not allowed in trait object types -const fn take(_: &dyn ~const NonConst) {} -//~^ ERROR `~const` is not allowed here +const fn take(_: &dyn [const] NonConst) {} +//~^ ERROR `[const]` is not allowed here diff --git a/tests/ui/traits/const-traits/const-trait-bounds-trait-objects.stderr b/tests/ui/traits/const-traits/const-trait-bounds-trait-objects.stderr index bd29b4b860b6..c58e2765168b 100644 --- a/tests/ui/traits/const-traits/const-trait-bounds-trait-objects.stderr +++ b/tests/ui/traits/const-traits/const-trait-bounds-trait-objects.stderr @@ -4,13 +4,13 @@ error: const trait bounds are not allowed in trait object types LL | let _: &dyn const Trait; | ^^^^^^^^^^^ -error: `~const` is not allowed here - --> $DIR/const-trait-bounds-trait-objects.rs:10:17 +error: `[const]` is not allowed here + --> $DIR/const-trait-bounds-trait-objects.rs:10:13 | -LL | let _: &dyn ~const Trait; - | ^^^^^^ +LL | let _: &dyn [const] Trait; + | ^^^^^^^^^^^ | - = note: trait objects cannot have `~const` trait bounds + = note: trait objects cannot have `[const]` trait bounds error: const trait bounds are not allowed in trait object types --> $DIR/const-trait-bounds-trait-objects.rs:15:25 @@ -18,13 +18,13 @@ error: const trait bounds are not allowed in trait object types LL | const fn handle(_: &dyn const NonConst) {} | ^^^^^^^^^^^^^^ -error: `~const` is not allowed here - --> $DIR/const-trait-bounds-trait-objects.rs:17:23 +error: `[const]` is not allowed here + --> $DIR/const-trait-bounds-trait-objects.rs:17:19 | -LL | const fn take(_: &dyn ~const NonConst) {} - | ^^^^^^ +LL | const fn take(_: &dyn [const] NonConst) {} + | ^^^^^^^^^^^ | - = note: trait objects cannot have `~const` trait bounds + = note: trait objects cannot have `[const]` trait bounds error: aborting due to 4 previous errors diff --git a/tests/ui/traits/const-traits/const-trait-impl-parameter-mismatch.rs b/tests/ui/traits/const-traits/const-trait-impl-parameter-mismatch.rs index b563b78f78a1..5376baf15e0f 100644 --- a/tests/ui/traits/const-traits/const-trait-impl-parameter-mismatch.rs +++ b/tests/ui/traits/const-traits/const-trait-impl-parameter-mismatch.rs @@ -6,14 +6,13 @@ // Regression test for issue #125877. //@ compile-flags: -Znext-solver -//@ normalize-stderr: "you are using [0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9]+)?( \([^)]*\))?" -> "you are using $$RUSTC_VERSION" #![feature(const_trait_impl, effects)] //~^ ERROR feature has been removed #[const_trait] trait Main { - fn compute() -> u32; + fn compute() -> u32; } impl const Main for () { diff --git a/tests/ui/traits/const-traits/const-trait-impl-parameter-mismatch.stderr b/tests/ui/traits/const-traits/const-trait-impl-parameter-mismatch.stderr index a04f98e68a63..736fde33ce3a 100644 --- a/tests/ui/traits/const-traits/const-trait-impl-parameter-mismatch.stderr +++ b/tests/ui/traits/const-traits/const-trait-impl-parameter-mismatch.stderr @@ -1,16 +1,16 @@ error[E0557]: feature has been removed - --> $DIR/const-trait-impl-parameter-mismatch.rs:11:30 + --> $DIR/const-trait-impl-parameter-mismatch.rs:10:30 | LL | #![feature(const_trait_impl, effects)] | ^^^^^^^ feature has been removed | - = note: removed in 1.84.0 (you are using $RUSTC_VERSION); see for more information + = note: removed in 1.84.0; see for more information = note: removed, redundant with `#![feature(const_trait_impl)]` error[E0049]: associated function `compute` has 0 type parameters but its trait declaration has 1 type parameter - --> $DIR/const-trait-impl-parameter-mismatch.rs:20:16 + --> $DIR/const-trait-impl-parameter-mismatch.rs:19:16 | -LL | fn compute() -> u32; +LL | fn compute() -> u32; | - expected 1 type parameter ... LL | fn compute<'x>() -> u32 { diff --git a/tests/ui/traits/const-traits/const_derives/derive-const-use.stderr b/tests/ui/traits/const-traits/const_derives/derive-const-use.stderr index 8297911a3f3c..ce61eb9a1ab9 100644 --- a/tests/ui/traits/const-traits/const_derives/derive-const-use.stderr +++ b/tests/ui/traits/const-traits/const_derives/derive-const-use.stderr @@ -1,9 +1,3 @@ -error[E0635]: unknown feature `const_cmp` - --> $DIR/derive-const-use.rs:3:30 - | -LL | #![feature(const_trait_impl, const_cmp, const_default_impls, derive_const)] - | ^^^^^^^^^ - error[E0635]: unknown feature `const_default_impls` --> $DIR/derive-const-use.rs:3:41 | @@ -28,23 +22,13 @@ LL | #[derive_const(Default, PartialEq)] = note: marking a trait with `#[const_trait]` ensures all default method bodies are `const` = note: adding a non-const method body in the future would be a breaking change -error: const `impl` for trait `PartialEq` which is not marked with `#[const_trait]` - --> $DIR/derive-const-use.rs:11:12 - | -LL | impl const PartialEq for A { - | ^^^^^^^^^ this trait is not `const` - | - = note: marking a trait with `#[const_trait]` ensures all default method bodies are `const` - = note: adding a non-const method body in the future would be a breaking change - -error: const `impl` for trait `PartialEq` which is not marked with `#[const_trait]` - --> $DIR/derive-const-use.rs:15:25 +error[E0277]: the trait bound `(): [const] PartialEq` is not satisfied + --> $DIR/derive-const-use.rs:16:14 | LL | #[derive_const(Default, PartialEq)] - | ^^^^^^^^^ this trait is not `const` - | - = note: marking a trait with `#[const_trait]` ensures all default method bodies are `const` - = note: adding a non-const method body in the future would be a breaking change + | --------- in this derive macro expansion +LL | pub struct S((), A); + | ^^ error[E0015]: cannot call non-const associated function `::default` in constants --> $DIR/derive-const-use.rs:18:35 @@ -54,14 +38,6 @@ LL | const _: () = assert!(S((), A) == S::default()); | = note: calls in constants are limited to constant functions, tuple structs and tuple variants -error[E0015]: cannot call non-const operator in constants - --> $DIR/derive-const-use.rs:18:23 - | -LL | const _: () = assert!(S((), A) == S::default()); - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: calls in constants are limited to constant functions, tuple structs and tuple variants - error[E0015]: cannot call non-const associated function `<() as Default>::default` in constant functions --> $DIR/derive-const-use.rs:16:14 | @@ -82,27 +58,7 @@ LL | pub struct S((), A); | = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/derive-const-use.rs:16:14 - | -LL | #[derive_const(Default, PartialEq)] - | --------- in this derive macro expansion -LL | pub struct S((), A); - | ^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants +error: aborting due to 7 previous errors -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/derive-const-use.rs:16:18 - | -LL | #[derive_const(Default, PartialEq)] - | --------- in this derive macro expansion -LL | pub struct S((), A); - | ^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error: aborting due to 12 previous errors - -Some errors have detailed explanations: E0015, E0635. +Some errors have detailed explanations: E0015, E0277, E0635. For more information about an error, try `rustc --explain E0015`. diff --git a/tests/ui/traits/const-traits/const_derives/derive-const-with-params.rs b/tests/ui/traits/const-traits/const_derives/derive-const-with-params.rs index 18b224af2780..b39f97b59382 100644 --- a/tests/ui/traits/const-traits/const_derives/derive-const-with-params.rs +++ b/tests/ui/traits/const-traits/const_derives/derive-const-with-params.rs @@ -1,5 +1,4 @@ -//@ known-bug: #110395 -// FIXME(const_trait_impl) check-pass +//@ check-pass #![feature(derive_const)] #![feature(const_trait_impl)] diff --git a/tests/ui/traits/const-traits/const_derives/derive-const-with-params.stderr b/tests/ui/traits/const-traits/const_derives/derive-const-with-params.stderr deleted file mode 100644 index d1dbf62d5666..000000000000 --- a/tests/ui/traits/const-traits/const_derives/derive-const-with-params.stderr +++ /dev/null @@ -1,35 +0,0 @@ -error: const `impl` for trait `PartialEq` which is not marked with `#[const_trait]` - --> $DIR/derive-const-with-params.rs:7:16 - | -LL | #[derive_const(PartialEq)] - | ^^^^^^^^^ this trait is not `const` - | - = note: marking a trait with `#[const_trait]` ensures all default method bodies are `const` - = note: adding a non-const method body in the future would be a breaking change - -error: `~const` can only be applied to `#[const_trait]` traits - | -note: `PartialEq` can't be used with `~const` because it isn't annotated with `#[const_trait]` - --> $SRC_DIR/core/src/cmp.rs:LL:COL - -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/derive-const-with-params.rs:8:23 - | -LL | #[derive_const(PartialEq)] - | --------- in this derive macro expansion -LL | pub struct Reverse(T); - | ^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/derive-const-with-params.rs:11:5 - | -LL | a == b - | ^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - -error: aborting due to 4 previous errors - -For more information about this error, try `rustc --explain E0015`. diff --git a/tests/ui/traits/const-traits/cross-crate.gatednc.stderr b/tests/ui/traits/const-traits/cross-crate.gatednc.stderr index 4d5abf643a8c..1da519151182 100644 --- a/tests/ui/traits/const-traits/cross-crate.gatednc.stderr +++ b/tests/ui/traits/const-traits/cross-crate.gatednc.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `cross_crate::NonConst: ~const cross_crate::MyTrait` is not satisfied +error[E0277]: the trait bound `cross_crate::NonConst: [const] cross_crate::MyTrait` is not satisfied --> $DIR/cross-crate.rs:19:14 | LL | NonConst.func(); diff --git a/tests/ui/traits/const-traits/default-method-body-is-const-body-checking.rs b/tests/ui/traits/const-traits/default-method-body-is-const-body-checking.rs index 96acdc300e0d..ea97f755d55c 100644 --- a/tests/ui/traits/const-traits/default-method-body-is-const-body-checking.rs +++ b/tests/ui/traits/const-traits/default-method-body-is-const-body-checking.rs @@ -4,13 +4,13 @@ trait Tr {} impl Tr for () {} -const fn foo() where T: ~const Tr {} +const fn foo() where T: [const] Tr {} #[const_trait] pub trait Foo { fn foo() { foo::<()>(); - //~^ ERROR the trait bound `(): ~const Tr` is not satisfied + //~^ ERROR the trait bound `(): [const] Tr` is not satisfied } } diff --git a/tests/ui/traits/const-traits/default-method-body-is-const-body-checking.stderr b/tests/ui/traits/const-traits/default-method-body-is-const-body-checking.stderr index b3017523b27d..2e236cecfb47 100644 --- a/tests/ui/traits/const-traits/default-method-body-is-const-body-checking.stderr +++ b/tests/ui/traits/const-traits/default-method-body-is-const-body-checking.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `(): ~const Tr` is not satisfied +error[E0277]: the trait bound `(): [const] Tr` is not satisfied --> $DIR/default-method-body-is-const-body-checking.rs:12:15 | LL | foo::<()>(); @@ -7,8 +7,8 @@ LL | foo::<()>(); note: required by a bound in `foo` --> $DIR/default-method-body-is-const-body-checking.rs:7:28 | -LL | const fn foo() where T: ~const Tr {} - | ^^^^^^^^^ required by this bound in `foo` +LL | const fn foo() where T: [const] Tr {} + | ^^^^^^^^^^ required by this bound in `foo` error: aborting due to 1 previous error diff --git a/tests/ui/traits/const-traits/default-method-body-is-const-same-trait-ck.rs b/tests/ui/traits/const-traits/default-method-body-is-const-same-trait-ck.rs index b3beba08237c..eb2c472e3bf7 100644 --- a/tests/ui/traits/const-traits/default-method-body-is-const-same-trait-ck.rs +++ b/tests/ui/traits/const-traits/default-method-body-is-const-same-trait-ck.rs @@ -7,7 +7,7 @@ pub trait Tr { fn b(&self) { ().a() - //~^ ERROR the trait bound `(): ~const Tr` is not satisfied + //~^ ERROR the trait bound `(): [const] Tr` is not satisfied } } diff --git a/tests/ui/traits/const-traits/default-method-body-is-const-same-trait-ck.stderr b/tests/ui/traits/const-traits/default-method-body-is-const-same-trait-ck.stderr index 2bd71c940e73..2dc2d4846174 100644 --- a/tests/ui/traits/const-traits/default-method-body-is-const-same-trait-ck.stderr +++ b/tests/ui/traits/const-traits/default-method-body-is-const-same-trait-ck.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `(): ~const Tr` is not satisfied +error[E0277]: the trait bound `(): [const] Tr` is not satisfied --> $DIR/default-method-body-is-const-same-trait-ck.rs:9:12 | LL | ().a() diff --git a/tests/ui/traits/const-traits/dont-ice-on-const-pred-for-bounds.rs b/tests/ui/traits/const-traits/dont-ice-on-const-pred-for-bounds.rs index 2295c2c3857c..d39e661ed920 100644 --- a/tests/ui/traits/const-traits/dont-ice-on-const-pred-for-bounds.rs +++ b/tests/ui/traits/const-traits/dont-ice-on-const-pred-for-bounds.rs @@ -13,7 +13,7 @@ trait Trait { type Assoc: const Trait; } -const fn needs_trait() {} +const fn needs_trait() {} fn test() { const { needs_trait::() }; diff --git a/tests/ui/traits/const-traits/dont-prefer-param-env-for-infer-self-ty.rs b/tests/ui/traits/const-traits/dont-prefer-param-env-for-infer-self-ty.rs index 08dcd7d80b34..f1fc98d72a54 100644 --- a/tests/ui/traits/const-traits/dont-prefer-param-env-for-infer-self-ty.rs +++ b/tests/ui/traits/const-traits/dont-prefer-param-env-for-infer-self-ty.rs @@ -5,11 +5,11 @@ #[const_trait] trait Foo {} -impl const Foo for (T,) where T: ~const Foo {} +impl const Foo for (T,) where T: [const] Foo {} -const fn needs_const_foo(_: impl ~const Foo + Copy) {} +const fn needs_const_foo(_: impl [const] Foo + Copy) {} -const fn test(t: T) { +const fn test(t: T) { needs_const_foo((t,)); } diff --git a/tests/ui/traits/const-traits/double-error-for-unimplemented-trait.rs b/tests/ui/traits/const-traits/double-error-for-unimplemented-trait.rs index f4b01efe9590..414b80ca0daa 100644 --- a/tests/ui/traits/const-traits/double-error-for-unimplemented-trait.rs +++ b/tests/ui/traits/const-traits/double-error-for-unimplemented-trait.rs @@ -7,7 +7,7 @@ trait Trait { type Out; } -const fn needs_const(_: &T) {} +const fn needs_const(_: &T) {} const IN_CONST: () = { needs_const(&()); diff --git a/tests/ui/traits/const-traits/double-error-for-unimplemented-trait.stderr b/tests/ui/traits/const-traits/double-error-for-unimplemented-trait.stderr index cd68cdaf8a2b..740a05be06ba 100644 --- a/tests/ui/traits/const-traits/double-error-for-unimplemented-trait.stderr +++ b/tests/ui/traits/const-traits/double-error-for-unimplemented-trait.stderr @@ -14,8 +14,8 @@ LL | trait Trait { note: required by a bound in `needs_const` --> $DIR/double-error-for-unimplemented-trait.rs:10:25 | -LL | const fn needs_const(_: &T) {} - | ^^^^^^^^^^^^ required by this bound in `needs_const` +LL | const fn needs_const(_: &T) {} + | ^^^^^^^^^^^^^ required by this bound in `needs_const` error[E0277]: the trait bound `(): Trait` is not satisfied --> $DIR/double-error-for-unimplemented-trait.rs:18:15 @@ -33,8 +33,8 @@ LL | trait Trait { note: required by a bound in `needs_const` --> $DIR/double-error-for-unimplemented-trait.rs:10:25 | -LL | const fn needs_const(_: &T) {} - | ^^^^^^^^^^^^ required by this bound in `needs_const` +LL | const fn needs_const(_: &T) {} + | ^^^^^^^^^^^^^ required by this bound in `needs_const` error: aborting due to 2 previous errors diff --git a/tests/ui/traits/const-traits/tilde-twice.rs b/tests/ui/traits/const-traits/duplicate-constness.rs similarity index 50% rename from tests/ui/traits/const-traits/tilde-twice.rs rename to tests/ui/traits/const-traits/duplicate-constness.rs index d341513b8a81..4b13abe3cf2c 100644 --- a/tests/ui/traits/const-traits/tilde-twice.rs +++ b/tests/ui/traits/const-traits/duplicate-constness.rs @@ -2,5 +2,5 @@ #![feature(const_trait_impl)] -struct S; -//~^ ERROR expected identifier, found `~` +struct S; +//~^ ERROR expected identifier, found `]` diff --git a/tests/ui/traits/const-traits/duplicate-constness.stderr b/tests/ui/traits/const-traits/duplicate-constness.stderr new file mode 100644 index 000000000000..27f69cd2386c --- /dev/null +++ b/tests/ui/traits/const-traits/duplicate-constness.stderr @@ -0,0 +1,8 @@ +error: expected identifier, found `]` + --> $DIR/duplicate-constness.rs:5:27 + | +LL | struct S; + | ^ expected identifier + +error: aborting due to 1 previous error + diff --git a/tests/ui/traits/const-traits/eval-bad-signature.rs b/tests/ui/traits/const-traits/eval-bad-signature.rs index 97c573ea6528..66e296d43880 100644 --- a/tests/ui/traits/const-traits/eval-bad-signature.rs +++ b/tests/ui/traits/const-traits/eval-bad-signature.rs @@ -7,7 +7,7 @@ trait Value { fn value() -> u32; } -const fn get_value() -> u32 { +const fn get_value() -> u32 { T::value() } diff --git a/tests/ui/traits/const-traits/feature-gate.rs b/tests/ui/traits/const-traits/feature-gate.rs index 921dfb054e30..5ad56ddcd334 100644 --- a/tests/ui/traits/const-traits/feature-gate.rs +++ b/tests/ui/traits/const-traits/feature-gate.rs @@ -10,12 +10,12 @@ trait T {} impl const T for S {} //[stock]~^ ERROR const trait impls are experimental -const fn f() {} //[stock]~ ERROR const trait impls are experimental +const fn f() {} //[stock]~ ERROR const trait impls are experimental fn g() {} //[stock]~ ERROR const trait impls are experimental macro_rules! discard { ($ty:ty) => {} } -discard! { impl ~const T } //[stock]~ ERROR const trait impls are experimental +discard! { impl [const] T } //[stock]~ ERROR const trait impls are experimental discard! { impl const T } //[stock]~ ERROR const trait impls are experimental fn main() {} diff --git a/tests/ui/traits/const-traits/feature-gate.stock.stderr b/tests/ui/traits/const-traits/feature-gate.stock.stderr index 78157d570563..37d76e7f3879 100644 --- a/tests/ui/traits/const-traits/feature-gate.stock.stderr +++ b/tests/ui/traits/const-traits/feature-gate.stock.stderr @@ -9,10 +9,10 @@ LL | impl const T for S {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: const trait impls are experimental - --> $DIR/feature-gate.rs:13:15 + --> $DIR/feature-gate.rs:13:13 | -LL | const fn f() {} - | ^^^^^^ +LL | const fn f() {} + | ^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable @@ -29,10 +29,10 @@ LL | fn g() {} = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: const trait impls are experimental - --> $DIR/feature-gate.rs:18:17 + --> $DIR/feature-gate.rs:18:12 | -LL | discard! { impl ~const T } - | ^^^^^^ +LL | discard! { impl [const] T } + | ^^^^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable diff --git a/tests/ui/traits/const-traits/function-pointer-does-not-require-const.rs b/tests/ui/traits/const-traits/function-pointer-does-not-require-const.rs index 61826e9977e8..8acd195e546b 100644 --- a/tests/ui/traits/const-traits/function-pointer-does-not-require-const.rs +++ b/tests/ui/traits/const-traits/function-pointer-does-not-require-const.rs @@ -6,7 +6,7 @@ pub trait Test {} impl Test for () {} -pub const fn test() {} +pub const fn test() {} pub const fn min_by_i32() -> fn() { test::<()> diff --git a/tests/ui/traits/const-traits/ice-112822-expected-type-for-param.rs b/tests/ui/traits/const-traits/ice-112822-expected-type-for-param.rs index 4cb013b93230..026f2c0d6032 100644 --- a/tests/ui/traits/const-traits/ice-112822-expected-type-for-param.rs +++ b/tests/ui/traits/const-traits/ice-112822-expected-type-for-param.rs @@ -1,9 +1,9 @@ #![feature(const_trait_impl)] -const fn test() -> impl ~const Fn() { - //~^ ERROR `~const` can only be applied to `#[const_trait]` traits - //~| ERROR `~const` can only be applied to `#[const_trait]` traits - //~| ERROR `~const` can only be applied to `#[const_trait]` traits +const fn test() -> impl [const] Fn() { + //~^ ERROR `[const]` can only be applied to `#[const_trait]` traits + //~| ERROR `[const]` can only be applied to `#[const_trait]` traits + //~| ERROR `[const]` can only be applied to `#[const_trait]` traits const move || { //~ ERROR const closures are experimental let sl: &[u8] = b"foo"; @@ -11,7 +11,6 @@ const fn test() -> impl ~const Fn() { [first, remainder @ ..] => { assert_eq!(first, &b'f'); //~^ ERROR cannot call non-const function - //~| ERROR cannot call non-const operator } [] => panic!(), } diff --git a/tests/ui/traits/const-traits/ice-112822-expected-type-for-param.stderr b/tests/ui/traits/const-traits/ice-112822-expected-type-for-param.stderr index 8d9371bf9f69..f340eaab0e33 100644 --- a/tests/ui/traits/const-traits/ice-112822-expected-type-for-param.stderr +++ b/tests/ui/traits/const-traits/ice-112822-expected-type-for-param.stderr @@ -8,44 +8,35 @@ LL | const move || { = help: add `#![feature(const_closures)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/ice-112822-expected-type-for-param.rs:3:25 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/ice-112822-expected-type-for-param.rs:3:20 | -LL | const fn test() -> impl ~const Fn() { - | ^^^^^^ can't be applied to `Fn` +LL | const fn test() -> impl [const] Fn() { + | ^^^^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/ice-112822-expected-type-for-param.rs:3:25 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/ice-112822-expected-type-for-param.rs:3:20 | -LL | const fn test() -> impl ~const Fn() { - | ^^^^^^ can't be applied to `Fn` +LL | const fn test() -> impl [const] Fn() { + | ^^^^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/ice-112822-expected-type-for-param.rs:3:25 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/ice-112822-expected-type-for-param.rs:3:20 | -LL | const fn test() -> impl ~const Fn() { - | ^^^^^^ can't be applied to `Fn` +LL | const fn test() -> impl [const] Fn() { + | ^^^^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` -error[E0015]: cannot call non-const operator in constant functions - --> $DIR/ice-112822-expected-type-for-param.rs:12:17 - | -LL | assert_eq!(first, &b'f'); - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants - = note: this error originates in the macro `assert_eq` (in Nightly builds, run with -Z macro-backtrace for more info) - error[E0015]: cannot call non-const function `core::panicking::assert_failed::<&u8, &u8>` in constant functions --> $DIR/ice-112822-expected-type-for-param.rs:12:17 | @@ -55,7 +46,7 @@ LL | assert_eq!(first, &b'f'); = note: calls in constant functions are limited to constant functions, tuple structs and tuple variants = note: this error originates in the macro `assert_eq` (in Nightly builds, run with -Z macro-backtrace for more info) -error: aborting due to 6 previous errors +error: aborting due to 5 previous errors Some errors have detailed explanations: E0015, E0658. For more information about an error, try `rustc --explain E0015`. diff --git a/tests/ui/traits/const-traits/ice-123664-unexpected-bound-var.rs b/tests/ui/traits/const-traits/ice-123664-unexpected-bound-var.rs index fadcaa398167..f1dbd9471617 100644 --- a/tests/ui/traits/const-traits/ice-123664-unexpected-bound-var.rs +++ b/tests/ui/traits/const-traits/ice-123664-unexpected-bound-var.rs @@ -1,8 +1,8 @@ #![allow(incomplete_features)] #![feature(generic_const_exprs, const_trait_impl)] -const fn with_positive() {} -//~^ ERROR `~const` can only be applied to `#[const_trait]` traits -//~| ERROR `~const` can only be applied to `#[const_trait]` traits +const fn with_positive() {} +//~^ ERROR `[const]` can only be applied to `#[const_trait]` traits +//~| ERROR `[const]` can only be applied to `#[const_trait]` traits pub fn main() {} diff --git a/tests/ui/traits/const-traits/ice-123664-unexpected-bound-var.stderr b/tests/ui/traits/const-traits/ice-123664-unexpected-bound-var.stderr index 821b257af880..d8d73173ec4c 100644 --- a/tests/ui/traits/const-traits/ice-123664-unexpected-bound-var.stderr +++ b/tests/ui/traits/const-traits/ice-123664-unexpected-bound-var.stderr @@ -1,19 +1,19 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/ice-123664-unexpected-bound-var.rs:4:27 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/ice-123664-unexpected-bound-var.rs:4:25 | -LL | const fn with_positive() {} - | ^^^^^^ can't be applied to `Fn` +LL | const fn with_positive() {} + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/ice-123664-unexpected-bound-var.rs:4:27 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/ice-123664-unexpected-bound-var.rs:4:25 | -LL | const fn with_positive() {} - | ^^^^^^ can't be applied to `Fn` +LL | const fn with_positive() {} + | ^^^^^^^^^ can't be applied to `Fn` | -note: `Fn` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `Fn` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/ops/function.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` diff --git a/tests/ui/traits/const-traits/ice-124857-combine-effect-const-infer-vars.rs b/tests/ui/traits/const-traits/ice-124857-combine-effect-const-infer-vars.rs index d6df1714314a..ea4db0515cd4 100644 --- a/tests/ui/traits/const-traits/ice-124857-combine-effect-const-infer-vars.rs +++ b/tests/ui/traits/const-traits/ice-124857-combine-effect-const-infer-vars.rs @@ -7,7 +7,7 @@ trait Foo {} impl const Foo for i32 {} -impl const Foo for T where T: ~const Foo {} +impl const Foo for T where T: [const] Foo {} //~^ ERROR conflicting implementations of trait `Foo` for type `i32` fn main() {} diff --git a/tests/ui/traits/const-traits/ice-124857-combine-effect-const-infer-vars.stderr b/tests/ui/traits/const-traits/ice-124857-combine-effect-const-infer-vars.stderr index 183c2c2cdf4a..5b417dcfe2cb 100644 --- a/tests/ui/traits/const-traits/ice-124857-combine-effect-const-infer-vars.stderr +++ b/tests/ui/traits/const-traits/ice-124857-combine-effect-const-infer-vars.stderr @@ -4,8 +4,8 @@ error[E0119]: conflicting implementations of trait `Foo` for type `i32` LL | impl const Foo for i32 {} | ---------------------- first implementation here LL | -LL | impl const Foo for T where T: ~const Foo {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ conflicting implementation for `i32` +LL | impl const Foo for T where T: [const] Foo {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ conflicting implementation for `i32` error: aborting due to 1 previous error diff --git a/tests/ui/traits/const-traits/impl-conditionally-const-trait.rs b/tests/ui/traits/const-traits/impl-conditionally-const-trait.rs new file mode 100644 index 000000000000..f3783c9e69b2 --- /dev/null +++ b/tests/ui/traits/const-traits/impl-conditionally-const-trait.rs @@ -0,0 +1,12 @@ +//! This test ensures that we can only implement `const Trait` for a type +//! and not have the conditionally const syntax in that position. + +#![feature(const_trait_impl)] + +struct S; +trait T {} + +impl [const] T for S {} +//~^ ERROR expected identifier, found `]` + +fn main() {} diff --git a/tests/ui/traits/const-traits/impl-conditionally-const-trait.stderr b/tests/ui/traits/const-traits/impl-conditionally-const-trait.stderr new file mode 100644 index 000000000000..fc8db61b9409 --- /dev/null +++ b/tests/ui/traits/const-traits/impl-conditionally-const-trait.stderr @@ -0,0 +1,8 @@ +error: expected identifier, found `]` + --> $DIR/impl-conditionally-const-trait.rs:9:12 + | +LL | impl [const] T for S {} + | ^ expected identifier + +error: aborting due to 1 previous error + diff --git a/tests/ui/traits/const-traits/impl-tilde-const-trait.rs b/tests/ui/traits/const-traits/impl-tilde-const-trait.rs deleted file mode 100644 index 05b26465c5b0..000000000000 --- a/tests/ui/traits/const-traits/impl-tilde-const-trait.rs +++ /dev/null @@ -1,9 +0,0 @@ -#![feature(const_trait_impl)] - -struct S; -trait T {} - -impl ~const T for S {} -//~^ ERROR expected a trait, found type - -fn main() {} diff --git a/tests/ui/traits/const-traits/impl-tilde-const-trait.stderr b/tests/ui/traits/const-traits/impl-tilde-const-trait.stderr deleted file mode 100644 index 4695728f8caa..000000000000 --- a/tests/ui/traits/const-traits/impl-tilde-const-trait.stderr +++ /dev/null @@ -1,8 +0,0 @@ -error: expected a trait, found type - --> $DIR/impl-tilde-const-trait.rs:6:6 - | -LL | impl ~const T for S {} - | ^^^^^^^^ - -error: aborting due to 1 previous error - diff --git a/tests/ui/traits/const-traits/inherent-impl-const-bounds.rs b/tests/ui/traits/const-traits/inherent-impl-const-bounds.rs index 5ead1353bcd9..941f05428037 100644 --- a/tests/ui/traits/const-traits/inherent-impl-const-bounds.rs +++ b/tests/ui/traits/const-traits/inherent-impl-const-bounds.rs @@ -12,7 +12,7 @@ impl const A for S {} impl const B for S {} impl S { - const fn a() where T: ~const B { + const fn a() where T: [const] B { } } diff --git a/tests/ui/traits/const-traits/issue-100222.rs b/tests/ui/traits/const-traits/issue-100222.rs index 55722d35075a..4c93272b224f 100644 --- a/tests/ui/traits/const-traits/issue-100222.rs +++ b/tests/ui/traits/const-traits/issue-100222.rs @@ -11,21 +11,28 @@ pub trait Index { } #[cfg_attr(any(ny, yy), const_trait)] -pub trait IndexMut where Self: Index { +pub trait IndexMut +where + Self: Index, +{ const C: ::Output; type Assoc = ::Output; fn foo(&mut self, x: ::Output) -> ::Output; } -impl Index for () { type Output = (); } +impl Index for () { + type Output = (); +} #[cfg(not(any(nn, yn)))] impl const IndexMut for <() as Index>::Output { const C: ::Output = (); type Assoc = ::Output; fn foo(&mut self, x: ::Output) -> ::Output - where ::Output:, - {} + where + ::Output:, + { + } } #[cfg(any(nn, yn))] @@ -33,8 +40,10 @@ impl IndexMut for <() as Index>::Output { const C: ::Output = (); type Assoc = ::Output; fn foo(&mut self, x: ::Output) -> ::Output - where ::Output:, - {} + where + ::Output:, + { + } } const C: <() as Index>::Output = (); diff --git a/tests/ui/traits/const-traits/issue-92111.rs b/tests/ui/traits/const-traits/issue-92111.rs index c8db5cc9e7ad..2450136793e0 100644 --- a/tests/ui/traits/const-traits/issue-92111.rs +++ b/tests/ui/traits/const-traits/issue-92111.rs @@ -14,7 +14,7 @@ pub struct S(i32); impl Tr for S {} -const fn a(t: T) {} +const fn a(t: T) {} fn main() { a(S(0)); diff --git a/tests/ui/traits/const-traits/issue-92230-wf-super-trait-env.rs b/tests/ui/traits/const-traits/issue-92230-wf-super-trait-env.rs index a3edc5ff8b10..0eb7f54d596f 100644 --- a/tests/ui/traits/const-traits/issue-92230-wf-super-trait-env.rs +++ b/tests/ui/traits/const-traits/issue-92230-wf-super-trait-env.rs @@ -10,7 +10,7 @@ pub trait Super {} #[const_trait] pub trait Sub: Super {} -impl const Super for &A where A: ~const Super {} -impl const Sub for &A where A: ~const Sub {} +impl const Super for &A where A: [const] Super {} +impl const Sub for &A where A: [const] Sub {} fn main() {} diff --git a/tests/ui/traits/const-traits/item-bound-entailment-fails.rs b/tests/ui/traits/const-traits/item-bound-entailment-fails.rs index f4bfcbda0ac4..029597ea1f03 100644 --- a/tests/ui/traits/const-traits/item-bound-entailment-fails.rs +++ b/tests/ui/traits/const-traits/item-bound-entailment-fails.rs @@ -2,27 +2,27 @@ #![feature(const_trait_impl)] #[const_trait] trait Foo { - type Assoc: ~const Bar + type Assoc: [const] Bar where - T: ~const Bar; + T: [const] Bar; } #[const_trait] trait Bar {} struct N(T); impl Bar for N where T: Bar {} struct C(T); -impl const Bar for C where T: ~const Bar {} +impl const Bar for C where T: [const] Bar {} impl const Foo for u32 { type Assoc = N - //~^ ERROR the trait bound `N: ~const Bar` is not satisfied + //~^ ERROR the trait bound `N: [const] Bar` is not satisfied where - T: ~const Bar; + T: [const] Bar; } impl const Foo for i32 { type Assoc = C - //~^ ERROR the trait bound `T: ~const Bar` is not satisfied + //~^ ERROR the trait bound `T: [const] Bar` is not satisfied where T: Bar; } diff --git a/tests/ui/traits/const-traits/item-bound-entailment-fails.stderr b/tests/ui/traits/const-traits/item-bound-entailment-fails.stderr index 7e72dc9abaa2..8e5894a32966 100644 --- a/tests/ui/traits/const-traits/item-bound-entailment-fails.stderr +++ b/tests/ui/traits/const-traits/item-bound-entailment-fails.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `N: ~const Bar` is not satisfied +error[E0277]: the trait bound `N: [const] Bar` is not satisfied --> $DIR/item-bound-entailment-fails.rs:17:21 | LL | type Assoc = N @@ -7,25 +7,25 @@ LL | type Assoc = N note: required by a bound in `Foo::Assoc` --> $DIR/item-bound-entailment-fails.rs:5:20 | -LL | type Assoc: ~const Bar - | ^^^^^^^^^^ required by this bound in `Foo::Assoc` +LL | type Assoc: [const] Bar + | ^^^^^^^^^^^ required by this bound in `Foo::Assoc` -error[E0277]: the trait bound `T: ~const Bar` is not satisfied +error[E0277]: the trait bound `T: [const] Bar` is not satisfied --> $DIR/item-bound-entailment-fails.rs:24:21 | LL | type Assoc = C | ^^^^ | -note: required for `C` to implement `~const Bar` +note: required for `C` to implement `[const] Bar` --> $DIR/item-bound-entailment-fails.rs:14:15 | -LL | impl const Bar for C where T: ~const Bar {} - | ^^^ ^^^^ ---------- unsatisfied trait bound introduced here +LL | impl const Bar for C where T: [const] Bar {} + | ^^^ ^^^^ ----------- unsatisfied trait bound introduced here note: required by a bound in `Foo::Assoc` --> $DIR/item-bound-entailment-fails.rs:5:20 | -LL | type Assoc: ~const Bar - | ^^^^^^^^^^ required by this bound in `Foo::Assoc` +LL | type Assoc: [const] Bar + | ^^^^^^^^^^^ required by this bound in `Foo::Assoc` error: aborting due to 2 previous errors diff --git a/tests/ui/traits/const-traits/item-bound-entailment.rs b/tests/ui/traits/const-traits/item-bound-entailment.rs index 11db57be8151..6e053adb3850 100644 --- a/tests/ui/traits/const-traits/item-bound-entailment.rs +++ b/tests/ui/traits/const-traits/item-bound-entailment.rs @@ -4,16 +4,16 @@ #![feature(const_trait_impl)] #[const_trait] trait Foo { - type Assoc: ~const Bar + type Assoc: [const] Bar where - T: ~const Bar; + T: [const] Bar; } #[const_trait] trait Bar {} struct N(T); impl Bar for N where T: Bar {} struct C(T); -impl const Bar for C where T: ~const Bar {} +impl const Bar for C where T: [const] Bar {} impl Foo for u32 { type Assoc = N @@ -24,7 +24,7 @@ impl Foo for u32 { impl const Foo for i32 { type Assoc = C where - T: ~const Bar; + T: [const] Bar; } fn main() {} diff --git a/tests/ui/traits/const-traits/mbe-bare-trait-objects-const-trait-bounds.rs b/tests/ui/traits/const-traits/mbe-bare-trait-objects-const-trait-bounds.rs index 820d3d63b62d..a5f6ae198f61 100644 --- a/tests/ui/traits/const-traits/mbe-bare-trait-objects-const-trait-bounds.rs +++ b/tests/ui/traits/const-traits/mbe-bare-trait-objects-const-trait-bounds.rs @@ -1,20 +1,24 @@ -// Ensure that we don't consider `const Trait` and `~const Trait` to +// Ensure that we don't consider `const Trait` to // match the macro fragment specifier `ty` as that would be a breaking // change theoretically speaking. Syntactically trait object types can // be "bare", i.e., lack the prefix `dyn`. // By contrast, `?Trait` *does* match `ty` and therefore an arm like // `?$Trait:path` would never be reached. // See `parser/macro/mbe-bare-trait-object-maybe-trait-bound.rs`. - -//@ check-pass +// `[const] Trait` is already an error for a `ty` fragment, +// so we do not need to prevent that. macro_rules! check { - ($Type:ty) => { compile_error!("ty"); }; + ($Type:ty) => { + compile_error!("ty"); + }; (const $Trait:path) => {}; - (~const $Trait:path) => {}; + ([const] $Trait:path) => {}; } check! { const Trait } -check! { ~const Trait } +check! { [const] Trait } +//~^ ERROR: expected identifier, found `]` +//~| ERROR: const trait impls are experimental fn main() {} diff --git a/tests/ui/traits/const-traits/mbe-bare-trait-objects-const-trait-bounds.stderr b/tests/ui/traits/const-traits/mbe-bare-trait-objects-const-trait-bounds.stderr new file mode 100644 index 000000000000..56dad5301a4a --- /dev/null +++ b/tests/ui/traits/const-traits/mbe-bare-trait-objects-const-trait-bounds.stderr @@ -0,0 +1,22 @@ +error: expected identifier, found `]` + --> $DIR/mbe-bare-trait-objects-const-trait-bounds.rs:20:16 + | +LL | ($Type:ty) => { + | -------- while parsing argument for this `ty` macro fragment +... +LL | check! { [const] Trait } + | ^ expected identifier + +error[E0658]: const trait impls are experimental + --> $DIR/mbe-bare-trait-objects-const-trait-bounds.rs:20:11 + | +LL | check! { [const] Trait } + | ^^^^^ + | + = note: see issue #67792 for more information + = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0658`. diff --git a/tests/ui/traits/const-traits/minicore-deref-fail.rs b/tests/ui/traits/const-traits/minicore-deref-fail.rs index f4a7678a0092..d9b33fa040a0 100644 --- a/tests/ui/traits/const-traits/minicore-deref-fail.rs +++ b/tests/ui/traits/const-traits/minicore-deref-fail.rs @@ -11,10 +11,12 @@ use minicore::*; struct Ty; impl Deref for Ty { type Target = (); - fn deref(&self) -> &Self::Target { &() } + fn deref(&self) -> &Self::Target { + &() + } } const fn foo() { *Ty; - //~^ ERROR the trait bound `Ty: ~const minicore::Deref` is not satisfied + //~^ ERROR the trait bound `Ty: [const] minicore::Deref` is not satisfied } diff --git a/tests/ui/traits/const-traits/minicore-deref-fail.stderr b/tests/ui/traits/const-traits/minicore-deref-fail.stderr index a1f840114fc8..4329b235756b 100644 --- a/tests/ui/traits/const-traits/minicore-deref-fail.stderr +++ b/tests/ui/traits/const-traits/minicore-deref-fail.stderr @@ -1,5 +1,5 @@ -error[E0277]: the trait bound `Ty: ~const minicore::Deref` is not satisfied - --> $DIR/minicore-deref-fail.rs:18:5 +error[E0277]: the trait bound `Ty: [const] minicore::Deref` is not satisfied + --> $DIR/minicore-deref-fail.rs:20:5 | LL | *Ty; | ^^^ diff --git a/tests/ui/traits/const-traits/minicore-drop-fail.rs b/tests/ui/traits/const-traits/minicore-drop-fail.rs index 274e5db21c4f..f3e7c7df4d41 100644 --- a/tests/ui/traits/const-traits/minicore-drop-fail.rs +++ b/tests/ui/traits/const-traits/minicore-drop-fail.rs @@ -19,7 +19,7 @@ impl Drop for NotDropImpl { impl Foo for () {} struct Conditional(T); -impl const Drop for Conditional where T: ~const Foo { +impl const Drop for Conditional where T: [const] Foo { fn drop(&mut self) {} } diff --git a/tests/ui/traits/const-traits/minicore-fn-fail.rs b/tests/ui/traits/const-traits/minicore-fn-fail.rs index ae1cbc6ca588..d4cd41a51ca5 100644 --- a/tests/ui/traits/const-traits/minicore-fn-fail.rs +++ b/tests/ui/traits/const-traits/minicore-fn-fail.rs @@ -8,14 +8,14 @@ extern crate minicore; use minicore::*; -const fn call_indirect(t: &T) { t() } +const fn call_indirect(t: &T) { t() } #[const_trait] trait Foo {} impl Foo for () {} -const fn foo() {} +const fn foo() {} const fn test() { call_indirect(&foo::<()>); - //~^ ERROR the trait bound `(): ~const Foo` is not satisfied + //~^ ERROR the trait bound `(): [const] Foo` is not satisfied } diff --git a/tests/ui/traits/const-traits/minicore-fn-fail.stderr b/tests/ui/traits/const-traits/minicore-fn-fail.stderr index 03c7ade87c01..c02a067774b5 100644 --- a/tests/ui/traits/const-traits/minicore-fn-fail.stderr +++ b/tests/ui/traits/const-traits/minicore-fn-fail.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `(): ~const Foo` is not satisfied +error[E0277]: the trait bound `(): [const] Foo` is not satisfied --> $DIR/minicore-fn-fail.rs:19:19 | LL | call_indirect(&foo::<()>); @@ -9,8 +9,8 @@ LL | call_indirect(&foo::<()>); note: required by a bound in `call_indirect` --> $DIR/minicore-fn-fail.rs:11:27 | -LL | const fn call_indirect(t: &T) { t() } - | ^^^^^^^^^^^ required by this bound in `call_indirect` +LL | const fn call_indirect(t: &T) { t() } + | ^^^^^^^^^^^^ required by this bound in `call_indirect` error: aborting due to 1 previous error diff --git a/tests/ui/traits/const-traits/minicore-works.rs b/tests/ui/traits/const-traits/minicore-works.rs index c79b4fc07dfd..ef08e84c02b8 100644 --- a/tests/ui/traits/const-traits/minicore-works.rs +++ b/tests/ui/traits/const-traits/minicore-works.rs @@ -21,7 +21,9 @@ const fn test_op() { let _y = Custom + Custom; } -const fn call_indirect(t: &T) { t() } +const fn call_indirect(t: &T) { + t() +} const fn call() { call_indirect(&call); diff --git a/tests/ui/traits/const-traits/mutually-exclusive-trait-bound-modifiers.rs b/tests/ui/traits/const-traits/mutually-exclusive-trait-bound-modifiers.rs index aaab8e819a39..5f47778a1404 100644 --- a/tests/ui/traits/const-traits/mutually-exclusive-trait-bound-modifiers.rs +++ b/tests/ui/traits/const-traits/mutually-exclusive-trait-bound-modifiers.rs @@ -1,13 +1,13 @@ #![feature(const_trait_impl)] -const fn maybe_const_maybe() {} -//~^ ERROR `~const` trait not allowed with `?` trait polarity modifier +const fn maybe_const_maybe() {} +//~^ ERROR `[const]` trait not allowed with `?` trait polarity modifier fn const_maybe() {} //~^ ERROR `const` trait not allowed with `?` trait polarity modifier -const fn maybe_const_negative() {} -//~^ ERROR `~const` trait not allowed with `!` trait polarity modifier +const fn maybe_const_negative() {} +//~^ ERROR `[const]` trait not allowed with `!` trait polarity modifier //~| ERROR negative bounds are not supported fn const_negative() {} diff --git a/tests/ui/traits/const-traits/mutually-exclusive-trait-bound-modifiers.stderr b/tests/ui/traits/const-traits/mutually-exclusive-trait-bound-modifiers.stderr index 18e4d160f5f4..429131f905f0 100644 --- a/tests/ui/traits/const-traits/mutually-exclusive-trait-bound-modifiers.stderr +++ b/tests/ui/traits/const-traits/mutually-exclusive-trait-bound-modifiers.stderr @@ -1,10 +1,10 @@ -error: `~const` trait not allowed with `?` trait polarity modifier - --> $DIR/mutually-exclusive-trait-bound-modifiers.rs:3:38 +error: `[const]` trait not allowed with `?` trait polarity modifier + --> $DIR/mutually-exclusive-trait-bound-modifiers.rs:3:39 | -LL | const fn maybe_const_maybe() {} - | ------ ^ +LL | const fn maybe_const_maybe() {} + | ------- ^ | | - | there is not a well-defined meaning for a `~const ?` trait + | there is not a well-defined meaning for a `[const] ?` trait error: `const` trait not allowed with `?` trait polarity modifier --> $DIR/mutually-exclusive-trait-bound-modifiers.rs:6:25 @@ -14,13 +14,13 @@ LL | fn const_maybe() {} | | | there is not a well-defined meaning for a `const ?` trait -error: `~const` trait not allowed with `!` trait polarity modifier - --> $DIR/mutually-exclusive-trait-bound-modifiers.rs:9:41 +error: `[const]` trait not allowed with `!` trait polarity modifier + --> $DIR/mutually-exclusive-trait-bound-modifiers.rs:9:42 | -LL | const fn maybe_const_negative() {} - | ------ ^ +LL | const fn maybe_const_negative() {} + | ------- ^ | | - | there is not a well-defined meaning for a `~const !` trait + | there is not a well-defined meaning for a `[const] !` trait error: `const` trait not allowed with `!` trait polarity modifier --> $DIR/mutually-exclusive-trait-bound-modifiers.rs:13:28 @@ -31,10 +31,10 @@ LL | fn const_negative() {} | there is not a well-defined meaning for a `const !` trait error: negative bounds are not supported - --> $DIR/mutually-exclusive-trait-bound-modifiers.rs:9:41 + --> $DIR/mutually-exclusive-trait-bound-modifiers.rs:9:42 | -LL | const fn maybe_const_negative() {} - | ^ +LL | const fn maybe_const_negative() {} + | ^ error: negative bounds are not supported --> $DIR/mutually-exclusive-trait-bound-modifiers.rs:13:28 diff --git a/tests/ui/traits/const-traits/non-const-op-in-closure-in-const.rs b/tests/ui/traits/const-traits/non-const-op-in-closure-in-const.rs index 8f11c8a6e557..86e3e5f769f8 100644 --- a/tests/ui/traits/const-traits/non-const-op-in-closure-in-const.rs +++ b/tests/ui/traits/const-traits/non-const-op-in-closure-in-const.rs @@ -7,7 +7,7 @@ trait Convert { fn to(self) -> T; } -impl const Convert for A where B: ~const From { +impl const Convert for A where B: [const] From { fn to(self) -> B { B::from(self) } diff --git a/tests/ui/traits/const-traits/non-const-op-in-closure-in-const.stderr b/tests/ui/traits/const-traits/non-const-op-in-closure-in-const.stderr index 190af5e7c2dd..8211b2b49bfd 100644 --- a/tests/ui/traits/const-traits/non-const-op-in-closure-in-const.stderr +++ b/tests/ui/traits/const-traits/non-const-op-in-closure-in-const.stderr @@ -1,19 +1,19 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/non-const-op-in-closure-in-const.rs:10:44 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/non-const-op-in-closure-in-const.rs:10:42 | -LL | impl const Convert for A where B: ~const From { - | ^^^^^^ can't be applied to `From` +LL | impl const Convert for A where B: [const] From { + | ^^^^^^^^^ can't be applied to `From` | -note: `From` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `From` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/convert/mod.rs:LL:COL -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/non-const-op-in-closure-in-const.rs:10:44 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/non-const-op-in-closure-in-const.rs:10:42 | -LL | impl const Convert for A where B: ~const From { - | ^^^^^^ can't be applied to `From` +LL | impl const Convert for A where B: [const] From { + | ^^^^^^^^^ can't be applied to `From` | -note: `From` can't be used with `~const` because it isn't annotated with `#[const_trait]` +note: `From` can't be used with `[const]` because it isn't annotated with `#[const_trait]` --> $SRC_DIR/core/src/convert/mod.rs:LL:COL = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` diff --git a/tests/ui/traits/const-traits/overlap-const-with-nonconst.min_spec.stderr b/tests/ui/traits/const-traits/overlap-const-with-nonconst.min_spec.stderr index bd822970ad1e..ed671bee63ab 100644 --- a/tests/ui/traits/const-traits/overlap-const-with-nonconst.min_spec.stderr +++ b/tests/ui/traits/const-traits/overlap-const-with-nonconst.min_spec.stderr @@ -3,8 +3,8 @@ error[E0119]: conflicting implementations of trait `Foo` for type `(_,)` | LL | / impl const Foo for T LL | | where -LL | | T: ~const Bar, - | |__________________- first implementation here +LL | | T: [const] Bar, + | |___________________- first implementation here ... LL | impl Foo for (T,) { | ^^^^^^^^^^^^^^^^^^^^ conflicting implementation for `(_,)` diff --git a/tests/ui/traits/const-traits/overlap-const-with-nonconst.rs b/tests/ui/traits/const-traits/overlap-const-with-nonconst.rs index eb66d03faa63..f45690b2f78b 100644 --- a/tests/ui/traits/const-traits/overlap-const-with-nonconst.rs +++ b/tests/ui/traits/const-traits/overlap-const-with-nonconst.rs @@ -15,7 +15,7 @@ trait Foo { } impl const Foo for T where - T: ~const Bar, + T: [const] Bar, { default fn method(&self) {} } @@ -27,7 +27,7 @@ impl Foo for (T,) { } } -const fn dispatch(t: T) { +const fn dispatch(t: T) { t.method(); } diff --git a/tests/ui/traits/const-traits/overlap-const-with-nonconst.spec.stderr b/tests/ui/traits/const-traits/overlap-const-with-nonconst.spec.stderr index cbdcb45f6beb..35f4d9184cf5 100644 --- a/tests/ui/traits/const-traits/overlap-const-with-nonconst.spec.stderr +++ b/tests/ui/traits/const-traits/overlap-const-with-nonconst.spec.stderr @@ -13,8 +13,8 @@ error[E0119]: conflicting implementations of trait `Foo` for type `(_,)` | LL | / impl const Foo for T LL | | where -LL | | T: ~const Bar, - | |__________________- first implementation here +LL | | T: [const] Bar, + | |___________________- first implementation here ... LL | impl Foo for (T,) { | ^^^^^^^^^^^^^^^^^^^^ conflicting implementation for `(_,)` diff --git a/tests/ui/traits/const-traits/predicate-entailment-fails.rs b/tests/ui/traits/const-traits/predicate-entailment-fails.rs index 266a49f9e386..0e6c277fd822 100644 --- a/tests/ui/traits/const-traits/predicate-entailment-fails.rs +++ b/tests/ui/traits/const-traits/predicate-entailment-fails.rs @@ -6,9 +6,9 @@ impl const Bar for () {} #[const_trait] trait TildeConst { - type Bar where T: ~const Bar; + type Bar where T: [const] Bar; - fn foo() where T: ~const Bar; + fn foo() where T: [const] Bar; } impl TildeConst for () { type Bar = () where T: const Bar; @@ -32,10 +32,10 @@ impl NeverConst for i32 { //~^ ERROR impl has stricter requirements than trait } impl const NeverConst for u32 { - type Bar = () where T: ~const Bar; + type Bar = () where T: [const] Bar; //~^ ERROR impl has stricter requirements than trait - fn foo() where T: ~const Bar {} + fn foo() where T: [const] Bar {} //~^ ERROR impl has stricter requirements than trait } diff --git a/tests/ui/traits/const-traits/predicate-entailment-fails.stderr b/tests/ui/traits/const-traits/predicate-entailment-fails.stderr index dfdc4d232508..cba7c979a42a 100644 --- a/tests/ui/traits/const-traits/predicate-entailment-fails.stderr +++ b/tests/ui/traits/const-traits/predicate-entailment-fails.stderr @@ -1,7 +1,7 @@ error[E0276]: impl has stricter requirements than trait --> $DIR/predicate-entailment-fails.rs:14:31 | -LL | type Bar where T: ~const Bar; +LL | type Bar where T: [const] Bar; | ----------- definition of `Bar` from trait ... LL | type Bar = () where T: const Bar; @@ -10,8 +10,8 @@ LL | type Bar = () where T: const Bar; error[E0276]: impl has stricter requirements than trait --> $DIR/predicate-entailment-fails.rs:17:26 | -LL | fn foo() where T: ~const Bar; - | -------------------------------- definition of `foo` from trait +LL | fn foo() where T: [const] Bar; + | --------------------------------- definition of `foo` from trait ... LL | fn foo() where T: const Bar {} | ^^^^^^^^^ impl has extra requirement `T: const Bar` @@ -40,8 +40,8 @@ error[E0276]: impl has stricter requirements than trait LL | type Bar where T: Bar; | ----------- definition of `Bar` from trait ... -LL | type Bar = () where T: ~const Bar; - | ^^^^^^^^^^ impl has extra requirement `T: ~const Bar` +LL | type Bar = () where T: [const] Bar; + | ^^^^^^^^^^^ impl has extra requirement `T: [const] Bar` error[E0276]: impl has stricter requirements than trait --> $DIR/predicate-entailment-fails.rs:38:26 @@ -49,8 +49,8 @@ error[E0276]: impl has stricter requirements than trait LL | fn foo() where T: Bar; | ------------------------- definition of `foo` from trait ... -LL | fn foo() where T: ~const Bar {} - | ^^^^^^^^^^ impl has extra requirement `T: ~const Bar` +LL | fn foo() where T: [const] Bar {} + | ^^^^^^^^^^^ impl has extra requirement `T: [const] Bar` error: aborting due to 6 previous errors diff --git a/tests/ui/traits/const-traits/predicate-entailment-passes.rs b/tests/ui/traits/const-traits/predicate-entailment-passes.rs index 28ae21891f38..fe8714831866 100644 --- a/tests/ui/traits/const-traits/predicate-entailment-passes.rs +++ b/tests/ui/traits/const-traits/predicate-entailment-passes.rs @@ -7,7 +7,7 @@ impl const Bar for () {} #[const_trait] trait TildeConst { - fn foo() where T: ~const Bar; + fn foo() where T: [const] Bar; } impl TildeConst for () { fn foo() where T: Bar {} @@ -21,7 +21,7 @@ impl AlwaysConst for i32 { fn foo() where T: Bar {} } impl const AlwaysConst for u32 { - fn foo() where T: ~const Bar {} + fn foo() where T: [const] Bar {} } fn main() {} diff --git a/tests/ui/traits/const-traits/specialization/const-default-bound-non-const-specialized-bound.rs b/tests/ui/traits/const-traits/specialization/const-default-bound-non-const-specialized-bound.rs index 5af9ee8614fd..212d869d94d3 100644 --- a/tests/ui/traits/const-traits/specialization/const-default-bound-non-const-specialized-bound.rs +++ b/tests/ui/traits/const-traits/specialization/const-default-bound-non-const-specialized-bound.rs @@ -1,5 +1,5 @@ -// Tests that trait bounds on specializing trait impls must be `~const` if the -// same bound is present on the default impl and is `~const` there. +// Tests that trait bounds on specializing trait impls must be `[const]` if the +// same bound is present on the default impl and is `[const]` there. //@ known-bug: #110395 // FIXME(const_trait_impl) ^ should error @@ -20,14 +20,14 @@ trait Bar { impl const Bar for T where - T: ~const Foo, + T: [const] Foo, { default fn bar() {} } impl Bar for T where - T: Foo, //FIXME ~ ERROR missing `~const` qualifier + T: Foo, //FIXME ~ ERROR missing `[const]` qualifier T: Specialize, { fn bar() {} @@ -40,7 +40,7 @@ trait Baz { impl const Baz for T where - T: ~const Foo, + T: [const] Foo, { default fn baz() {} } diff --git a/tests/ui/traits/const-traits/specialization/const-default-bound-non-const-specialized-bound.stderr b/tests/ui/traits/const-traits/specialization/const-default-bound-non-const-specialized-bound.stderr index 9166b8ca5d22..074e6237cc20 100644 --- a/tests/ui/traits/const-traits/specialization/const-default-bound-non-const-specialized-bound.stderr +++ b/tests/ui/traits/const-traits/specialization/const-default-bound-non-const-specialized-bound.stderr @@ -3,12 +3,12 @@ error[E0119]: conflicting implementations of trait `Bar` | LL | / impl const Bar for T LL | | where -LL | | T: ~const Foo, - | |__________________- first implementation here +LL | | T: [const] Foo, + | |___________________- first implementation here ... LL | / impl Bar for T LL | | where -LL | | T: Foo, //FIXME ~ ERROR missing `~const` qualifier +LL | | T: Foo, //FIXME ~ ERROR missing `[const]` qualifier LL | | T: Specialize, | |__________________^ conflicting implementation @@ -17,8 +17,8 @@ error[E0119]: conflicting implementations of trait `Baz` | LL | / impl const Baz for T LL | | where -LL | | T: ~const Foo, - | |__________________- first implementation here +LL | | T: [const] Foo, + | |___________________- first implementation here ... LL | / impl const Baz for T //FIXME ~ ERROR conflicting implementations of trait `Baz` LL | | where diff --git a/tests/ui/traits/const-traits/specialization/const-default-const-specialized.rs b/tests/ui/traits/const-traits/specialization/const-default-const-specialized.rs index 89ad61c3c31c..6991b7deda31 100644 --- a/tests/ui/traits/const-traits/specialization/const-default-const-specialized.rs +++ b/tests/ui/traits/const-traits/specialization/const-default-const-specialized.rs @@ -11,7 +11,7 @@ trait Value { fn value() -> u32; } -const fn get_value() -> u32 { +const fn get_value() -> u32 { T::value() } diff --git a/tests/ui/traits/const-traits/specialization/issue-95187-same-trait-bound-different-constness.rs b/tests/ui/traits/const-traits/specialization/issue-95187-same-trait-bound-different-constness.rs index d97469edaf97..754f1c6d09d5 100644 --- a/tests/ui/traits/const-traits/specialization/issue-95187-same-trait-bound-different-constness.rs +++ b/tests/ui/traits/const-traits/specialization/issue-95187-same-trait-bound-different-constness.rs @@ -1,4 +1,4 @@ -// Tests that `T: ~const Foo` in a specializing impl is treated as equivalent to +// Tests that `T: [const] Foo` in a specializing impl is treated as equivalent to // `T: Foo` in the default impl for the purposes of specialization (i.e., it // does not think that the user is attempting to specialize on trait `Foo`). @@ -28,7 +28,7 @@ where impl const Bar for T where - T: ~const Foo, + T: [const] Foo, T: Specialize, { fn bar() {} @@ -48,7 +48,7 @@ where impl const Baz for T where - T: ~const Foo, + T: [const] Foo, T: Specialize, { fn baz() {} diff --git a/tests/ui/traits/const-traits/specialization/non-const-default-const-specialized.rs b/tests/ui/traits/const-traits/specialization/non-const-default-const-specialized.rs index e9b494bc2c0d..b1a1b4a23995 100644 --- a/tests/ui/traits/const-traits/specialization/non-const-default-const-specialized.rs +++ b/tests/ui/traits/const-traits/specialization/non-const-default-const-specialized.rs @@ -11,7 +11,7 @@ trait Value { fn value() -> u32; } -const fn get_value() -> u32 { +const fn get_value() -> u32 { T::value() } diff --git a/tests/ui/traits/const-traits/specialization/issue-95186-specialize-on-tilde-const.rs b/tests/ui/traits/const-traits/specialization/specialize-on-conditionally-const.rs similarity index 72% rename from tests/ui/traits/const-traits/specialization/issue-95186-specialize-on-tilde-const.rs rename to tests/ui/traits/const-traits/specialization/specialize-on-conditionally-const.rs index d80370aee820..0106bb13875a 100644 --- a/tests/ui/traits/const-traits/specialization/issue-95186-specialize-on-tilde-const.rs +++ b/tests/ui/traits/const-traits/specialization/specialize-on-conditionally-const.rs @@ -1,4 +1,5 @@ -// Tests that `~const` trait bounds can be used to specialize const trait impls. +// Tests that `[const]` trait bounds can be used to specialize const trait impls. +// cc #95186 //@ check-pass @@ -21,7 +22,7 @@ impl const Foo for T { impl const Foo for T where - T: ~const Specialize, + T: [const] Specialize, { fn foo() {} } @@ -33,15 +34,15 @@ trait Bar { impl const Bar for T where - T: ~const Foo, + T: [const] Foo, { default fn bar() {} } impl const Bar for T where - T: ~const Foo, - T: ~const Specialize, + T: [const] Foo, + T: [const] Specialize, { fn bar() {} } diff --git a/tests/ui/traits/const-traits/specializing-constness-2.rs b/tests/ui/traits/const-traits/specializing-constness-2.rs index c1fe42b97512..86c2cee9fedb 100644 --- a/tests/ui/traits/const-traits/specializing-constness-2.rs +++ b/tests/ui/traits/const-traits/specializing-constness-2.rs @@ -17,7 +17,7 @@ impl A for T { } } -impl const A for T { +impl const A for T { fn a() -> u32 { 3 } @@ -25,7 +25,7 @@ impl const A for T { const fn generic() { ::a(); - //FIXME ~^ ERROR: the trait bound `T: ~const Sup` is not satisfied + //FIXME ~^ ERROR: the trait bound `T: [const] Sup` is not satisfied } fn main() {} diff --git a/tests/ui/traits/const-traits/specializing-constness-2.stderr b/tests/ui/traits/const-traits/specializing-constness-2.stderr index edba836aac35..850e6939daeb 100644 --- a/tests/ui/traits/const-traits/specializing-constness-2.stderr +++ b/tests/ui/traits/const-traits/specializing-constness-2.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `T: ~const A` is not satisfied +error[E0277]: the trait bound `T: [const] A` is not satisfied --> $DIR/specializing-constness-2.rs:27:6 | LL | ::a(); diff --git a/tests/ui/traits/const-traits/specializing-constness.rs b/tests/ui/traits/const-traits/specializing-constness.rs index 94b6da7124d2..b64d8b21b24b 100644 --- a/tests/ui/traits/const-traits/specializing-constness.rs +++ b/tests/ui/traits/const-traits/specializing-constness.rs @@ -14,7 +14,7 @@ pub trait A { #[const_trait] pub trait Spec {} -impl const A for T { +impl const A for T { default fn a() -> u32 { 2 } diff --git a/tests/ui/traits/const-traits/specializing-constness.stderr b/tests/ui/traits/const-traits/specializing-constness.stderr index 2ca70b53e4e2..f411ebcdfcac 100644 --- a/tests/ui/traits/const-traits/specializing-constness.stderr +++ b/tests/ui/traits/const-traits/specializing-constness.stderr @@ -1,8 +1,8 @@ error[E0119]: conflicting implementations of trait `A` --> $DIR/specializing-constness.rs:23:1 | -LL | impl const A for T { - | ---------------------------------- first implementation here +LL | impl const A for T { + | ----------------------------------- first implementation here ... LL | impl A for T { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ conflicting implementation diff --git a/tests/ui/traits/const-traits/staged-api.rs b/tests/ui/traits/const-traits/staged-api.rs index bf09a5f78038..d24b26be569c 100644 --- a/tests/ui/traits/const-traits/staged-api.rs +++ b/tests/ui/traits/const-traits/staged-api.rs @@ -23,7 +23,7 @@ impl const MyTrait for Foo { } #[rustc_allow_const_fn_unstable(const_trait_impl, unstable)] -const fn conditionally_const() { +const fn conditionally_const() { T::func(); } diff --git a/tests/ui/traits/const-traits/super-traits-fail-2.nn.stderr b/tests/ui/traits/const-traits/super-traits-fail-2.nn.stderr index 8f88e3aa8bc6..11f73cbf0c9c 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-2.nn.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-2.nn.stderr @@ -1,31 +1,31 @@ -error: `~const` is not allowed here - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` is not allowed here + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> $DIR/super-traits-fail-2.rs:11:1 | -LL | trait Bar: ~const Foo {} - | ^^^^^^^^^^^^^^^^^^^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations | LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -33,11 +33,11 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations diff --git a/tests/ui/traits/const-traits/super-traits-fail-2.ny.stderr b/tests/ui/traits/const-traits/super-traits-fail-2.ny.stderr index 087e80de788e..1767672e1804 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-2.ny.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-2.ny.stderr @@ -1,19 +1,19 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations | LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -21,11 +21,11 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -33,11 +33,11 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -45,11 +45,11 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations diff --git a/tests/ui/traits/const-traits/super-traits-fail-2.rs b/tests/ui/traits/const-traits/super-traits-fail-2.rs index 6cc9d7394767..781dacb81a19 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-2.rs +++ b/tests/ui/traits/const-traits/super-traits-fail-2.rs @@ -8,17 +8,17 @@ trait Foo { } #[cfg_attr(any(yy, ny), const_trait)] -trait Bar: ~const Foo {} -//[ny,nn]~^ ERROR: `~const` can only be applied to `#[const_trait]` -//[ny,nn]~| ERROR: `~const` can only be applied to `#[const_trait]` -//[ny,nn]~| ERROR: `~const` can only be applied to `#[const_trait]` -//[ny]~| ERROR: `~const` can only be applied to `#[const_trait]` -//[ny]~| ERROR: `~const` can only be applied to `#[const_trait]` -//[yn,nn]~^^^^^^ ERROR: `~const` is not allowed here +trait Bar: [const] Foo {} +//[ny,nn]~^ ERROR: `[const]` can only be applied to `#[const_trait]` +//[ny,nn]~| ERROR: `[const]` can only be applied to `#[const_trait]` +//[ny,nn]~| ERROR: `[const]` can only be applied to `#[const_trait]` +//[ny]~| ERROR: `[const]` can only be applied to `#[const_trait]` +//[ny]~| ERROR: `[const]` can only be applied to `#[const_trait]` +//[yn,nn]~^^^^^^ ERROR: `[const]` is not allowed here const fn foo(x: &T) { x.a(); - //[yy,yn]~^ ERROR the trait bound `T: ~const Foo` + //[yy,yn]~^ ERROR the trait bound `T: [const] Foo` //[nn,ny]~^^ ERROR cannot call non-const method `::a` in constant functions } diff --git a/tests/ui/traits/const-traits/super-traits-fail-2.yn.stderr b/tests/ui/traits/const-traits/super-traits-fail-2.yn.stderr index ee49810bacec..63c33a00234a 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-2.yn.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-2.yn.stderr @@ -1,16 +1,16 @@ -error: `~const` is not allowed here - --> $DIR/super-traits-fail-2.rs:11:12 +error: `[const]` is not allowed here + --> $DIR/super-traits-fail-2.rs:11:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> $DIR/super-traits-fail-2.rs:11:1 | -LL | trait Bar: ~const Foo {} - | ^^^^^^^^^^^^^^^^^^^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^ -error[E0277]: the trait bound `T: ~const Foo` is not satisfied +error[E0277]: the trait bound `T: [const] Foo` is not satisfied --> $DIR/super-traits-fail-2.rs:20:7 | LL | x.a(); diff --git a/tests/ui/traits/const-traits/super-traits-fail-2.yy.stderr b/tests/ui/traits/const-traits/super-traits-fail-2.yy.stderr index a213273c1c78..4ae4bbde99bb 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-2.yy.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-2.yy.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `T: ~const Foo` is not satisfied +error[E0277]: the trait bound `T: [const] Foo` is not satisfied --> $DIR/super-traits-fail-2.rs:20:7 | LL | x.a(); diff --git a/tests/ui/traits/const-traits/super-traits-fail-3.nnn.stderr b/tests/ui/traits/const-traits/super-traits-fail-3.nnn.stderr index a5ef716a62a5..c6a06d074c93 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-3.nnn.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-3.nnn.stderr @@ -1,51 +1,51 @@ -error: `~const` is not allowed here - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` is not allowed here + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> $DIR/super-traits-fail-3.rs:23:1 | -LL | trait Bar: ~const Foo {} - | ^^^^^^^^^^^^^^^^^^^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^ error[E0658]: const trait impls are experimental - --> $DIR/super-traits-fail-3.rs:23:12 + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: const trait impls are experimental - --> $DIR/super-traits-fail-3.rs:32:17 + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ +LL | const fn foo(x: &T) { + | ^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[const_trait]` to allow it to have `const` implementations | LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -53,11 +53,11 @@ help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[ LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -65,27 +65,27 @@ help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[ LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:32:17 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ can't be applied to `Bar` +LL | const fn foo(x: &T) { + | ^^^^^^^^^ can't be applied to `Bar` | help: enable `#![feature(const_trait_impl)]` in your crate and mark `Bar` as `#[const_trait]` to allow it to have `const` implementations | -LL | #[const_trait] trait Bar: ~const Foo {} +LL | #[const_trait] trait Bar: [const] Foo {} | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:32:17 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ can't be applied to `Bar` +LL | const fn foo(x: &T) { + | ^^^^^^^^^ can't be applied to `Bar` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: enable `#![feature(const_trait_impl)]` in your crate and mark `Bar` as `#[const_trait]` to allow it to have `const` implementations | -LL | #[const_trait] trait Bar: ~const Foo {} +LL | #[const_trait] trait Bar: [const] Foo {} | ++++++++++++++ error[E0015]: cannot call non-const method `::a` in constant functions diff --git a/tests/ui/traits/const-traits/super-traits-fail-3.nny.stderr b/tests/ui/traits/const-traits/super-traits-fail-3.nny.stderr index a5ef716a62a5..c6a06d074c93 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-3.nny.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-3.nny.stderr @@ -1,51 +1,51 @@ -error: `~const` is not allowed here - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` is not allowed here + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> $DIR/super-traits-fail-3.rs:23:1 | -LL | trait Bar: ~const Foo {} - | ^^^^^^^^^^^^^^^^^^^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^ error[E0658]: const trait impls are experimental - --> $DIR/super-traits-fail-3.rs:23:12 + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: const trait impls are experimental - --> $DIR/super-traits-fail-3.rs:32:17 + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ +LL | const fn foo(x: &T) { + | ^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[const_trait]` to allow it to have `const` implementations | LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -53,11 +53,11 @@ help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[ LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -65,27 +65,27 @@ help: enable `#![feature(const_trait_impl)]` in your crate and mark `Foo` as `#[ LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:32:17 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ can't be applied to `Bar` +LL | const fn foo(x: &T) { + | ^^^^^^^^^ can't be applied to `Bar` | help: enable `#![feature(const_trait_impl)]` in your crate and mark `Bar` as `#[const_trait]` to allow it to have `const` implementations | -LL | #[const_trait] trait Bar: ~const Foo {} +LL | #[const_trait] trait Bar: [const] Foo {} | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:32:17 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ can't be applied to `Bar` +LL | const fn foo(x: &T) { + | ^^^^^^^^^ can't be applied to `Bar` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: enable `#![feature(const_trait_impl)]` in your crate and mark `Bar` as `#[const_trait]` to allow it to have `const` implementations | -LL | #[const_trait] trait Bar: ~const Foo {} +LL | #[const_trait] trait Bar: [const] Foo {} | ++++++++++++++ error[E0015]: cannot call non-const method `::a` in constant functions diff --git a/tests/ui/traits/const-traits/super-traits-fail-3.nyn.stderr b/tests/ui/traits/const-traits/super-traits-fail-3.nyn.stderr index 024db4b6d68d..feca029aa6cb 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-3.nyn.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-3.nyn.stderr @@ -1,18 +1,18 @@ error[E0658]: const trait impls are experimental - --> $DIR/super-traits-fail-3.rs:23:12 + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: const trait impls are experimental - --> $DIR/super-traits-fail-3.rs:32:17 + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ +LL | const fn foo(x: &T) { + | ^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable diff --git a/tests/ui/traits/const-traits/super-traits-fail-3.nyy.stderr b/tests/ui/traits/const-traits/super-traits-fail-3.nyy.stderr index 024db4b6d68d..feca029aa6cb 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-3.nyy.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-3.nyy.stderr @@ -1,18 +1,18 @@ error[E0658]: const trait impls are experimental - --> $DIR/super-traits-fail-3.rs:23:12 + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: const trait impls are experimental - --> $DIR/super-traits-fail-3.rs:32:17 + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ +LL | const fn foo(x: &T) { + | ^^^^^^^^^ | = note: see issue #67792 for more information = help: add `#![feature(const_trait_impl)]` to the crate attributes to enable diff --git a/tests/ui/traits/const-traits/super-traits-fail-3.rs b/tests/ui/traits/const-traits/super-traits-fail-3.rs index d7e0cdc26edd..5370f607decb 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-3.rs +++ b/tests/ui/traits/const-traits/super-traits-fail-3.rs @@ -20,21 +20,21 @@ trait Foo { #[cfg_attr(any(yyy, yny, nyy, nyn), const_trait)] //[nyy,nyn]~^ ERROR: `const_trait` is a temporary placeholder for marking a trait that is suitable for `const` `impls` and all default bodies as `const`, which may be removed or renamed in the future -trait Bar: ~const Foo {} -//[yny,ynn,nny,nnn]~^ ERROR: `~const` can only be applied to `#[const_trait]` -//[yny,ynn,nny,nnn]~| ERROR: `~const` can only be applied to `#[const_trait]` -//[yny,ynn,nny,nnn]~| ERROR: `~const` can only be applied to `#[const_trait]` -//[yny]~^^^^ ERROR: `~const` can only be applied to `#[const_trait]` -//[yny]~| ERROR: `~const` can only be applied to `#[const_trait]` -//[yyn,ynn,nny,nnn]~^^^^^^ ERROR: `~const` is not allowed here +trait Bar: [const] Foo {} +//[yny,ynn,nny,nnn]~^ ERROR: `[const]` can only be applied to `#[const_trait]` +//[yny,ynn,nny,nnn]~| ERROR: `[const]` can only be applied to `#[const_trait]` +//[yny,ynn,nny,nnn]~| ERROR: `[const]` can only be applied to `#[const_trait]` +//[yny]~^^^^ ERROR: `[const]` can only be applied to `#[const_trait]` +//[yny]~| ERROR: `[const]` can only be applied to `#[const_trait]` +//[yyn,ynn,nny,nnn]~^^^^^^ ERROR: `[const]` is not allowed here //[nyy,nyn,nny,nnn]~^^^^^^^ ERROR: const trait impls are experimental -const fn foo(x: &T) { - //[yyn,ynn,nny,nnn]~^ ERROR: `~const` can only be applied to `#[const_trait]` - //[yyn,ynn,nny,nnn]~| ERROR: `~const` can only be applied to `#[const_trait]` +const fn foo(x: &T) { + //[yyn,ynn,nny,nnn]~^ ERROR: `[const]` can only be applied to `#[const_trait]` + //[yyn,ynn,nny,nnn]~| ERROR: `[const]` can only be applied to `#[const_trait]` //[nyy,nyn,nny,nnn]~^^^ ERROR: const trait impls are experimental x.a(); - //[yyn]~^ ERROR: the trait bound `T: ~const Foo` is not satisfied + //[yyn]~^ ERROR: the trait bound `T: [const] Foo` is not satisfied //[ynn,yny,nny,nnn]~^^ ERROR: cannot call non-const method `::a` in constant functions //[nyy,nyn]~^^^ ERROR: cannot call conditionally-const method `::a` in constant functions } diff --git a/tests/ui/traits/const-traits/super-traits-fail-3.ynn.stderr b/tests/ui/traits/const-traits/super-traits-fail-3.ynn.stderr index f22bdd472e53..d9112c91776d 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-3.ynn.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-3.ynn.stderr @@ -1,31 +1,31 @@ -error: `~const` is not allowed here - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` is not allowed here + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> $DIR/super-traits-fail-3.rs:23:1 | -LL | trait Bar: ~const Foo {} - | ^^^^^^^^^^^^^^^^^^^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations | LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -33,11 +33,11 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -45,27 +45,27 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:32:17 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ can't be applied to `Bar` +LL | const fn foo(x: &T) { + | ^^^^^^^^^ can't be applied to `Bar` | help: mark `Bar` as `#[const_trait]` to allow it to have `const` implementations | -LL | #[const_trait] trait Bar: ~const Foo {} +LL | #[const_trait] trait Bar: [const] Foo {} | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:32:17 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ can't be applied to `Bar` +LL | const fn foo(x: &T) { + | ^^^^^^^^^ can't be applied to `Bar` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Bar` as `#[const_trait]` to allow it to have `const` implementations | -LL | #[const_trait] trait Bar: ~const Foo {} +LL | #[const_trait] trait Bar: [const] Foo {} | ++++++++++++++ error[E0015]: cannot call non-const method `::a` in constant functions diff --git a/tests/ui/traits/const-traits/super-traits-fail-3.yny.stderr b/tests/ui/traits/const-traits/super-traits-fail-3.yny.stderr index 14b50815b8e9..3520b61a81c9 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-3.yny.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-3.yny.stderr @@ -1,19 +1,19 @@ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations | LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -21,11 +21,11 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -33,11 +33,11 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations @@ -45,11 +45,11 @@ help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations LL | #[const_trait] trait Foo { | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ can't be applied to `Foo` +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ can't be applied to `Foo` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Foo` as `#[const_trait]` to allow it to have `const` implementations diff --git a/tests/ui/traits/const-traits/super-traits-fail-3.yyn.stderr b/tests/ui/traits/const-traits/super-traits-fail-3.yyn.stderr index 3270611dace2..d714118df622 100644 --- a/tests/ui/traits/const-traits/super-traits-fail-3.yyn.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail-3.yyn.stderr @@ -1,39 +1,39 @@ -error: `~const` is not allowed here - --> $DIR/super-traits-fail-3.rs:23:12 +error: `[const]` is not allowed here + --> $DIR/super-traits-fail-3.rs:23:10 | -LL | trait Bar: ~const Foo {} - | ^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^ | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds +note: this trait is not a `#[const_trait]`, so it cannot have `[const]` trait bounds --> $DIR/super-traits-fail-3.rs:23:1 | -LL | trait Bar: ~const Foo {} - | ^^^^^^^^^^^^^^^^^^^^^^^^ +LL | trait Bar: [const] Foo {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:32:17 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ can't be applied to `Bar` +LL | const fn foo(x: &T) { + | ^^^^^^^^^ can't be applied to `Bar` | help: mark `Bar` as `#[const_trait]` to allow it to have `const` implementations | -LL | #[const_trait] trait Bar: ~const Foo {} +LL | #[const_trait] trait Bar: [const] Foo {} | ++++++++++++++ -error: `~const` can only be applied to `#[const_trait]` traits - --> $DIR/super-traits-fail-3.rs:32:17 +error: `[const]` can only be applied to `#[const_trait]` traits + --> $DIR/super-traits-fail-3.rs:32:15 | -LL | const fn foo(x: &T) { - | ^^^^^^ can't be applied to `Bar` +LL | const fn foo(x: &T) { + | ^^^^^^^^^ can't be applied to `Bar` | = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` help: mark `Bar` as `#[const_trait]` to allow it to have `const` implementations | -LL | #[const_trait] trait Bar: ~const Foo {} +LL | #[const_trait] trait Bar: [const] Foo {} | ++++++++++++++ -error[E0277]: the trait bound `T: ~const Foo` is not satisfied +error[E0277]: the trait bound `T: [const] Foo` is not satisfied --> $DIR/super-traits-fail-3.rs:36:7 | LL | x.a(); diff --git a/tests/ui/traits/const-traits/super-traits-fail.rs b/tests/ui/traits/const-traits/super-traits-fail.rs index 9fd6263118bd..15e05be4d862 100644 --- a/tests/ui/traits/const-traits/super-traits-fail.rs +++ b/tests/ui/traits/const-traits/super-traits-fail.rs @@ -7,7 +7,7 @@ trait Foo { fn a(&self); } #[const_trait] -trait Bar: ~const Foo {} +trait Bar: [const] Foo {} struct S; impl Foo for S { diff --git a/tests/ui/traits/const-traits/super-traits-fail.stderr b/tests/ui/traits/const-traits/super-traits-fail.stderr index 1f453edf0359..e19aa30cf95c 100644 --- a/tests/ui/traits/const-traits/super-traits-fail.stderr +++ b/tests/ui/traits/const-traits/super-traits-fail.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `S: ~const Foo` is not satisfied +error[E0277]: the trait bound `S: [const] Foo` is not satisfied --> $DIR/super-traits-fail.rs:17:20 | LL | impl const Bar for S {} diff --git a/tests/ui/traits/const-traits/super-traits.rs b/tests/ui/traits/const-traits/super-traits.rs index 73ddc037cd79..b5fd985ae439 100644 --- a/tests/ui/traits/const-traits/super-traits.rs +++ b/tests/ui/traits/const-traits/super-traits.rs @@ -8,7 +8,7 @@ trait Foo { } #[const_trait] -trait Bar: ~const Foo {} +trait Bar: [const] Foo {} struct S; impl const Foo for S { @@ -17,7 +17,7 @@ impl const Foo for S { impl const Bar for S {} -const fn foo(t: &T) { +const fn foo(t: &T) { t.a(); } diff --git a/tests/ui/traits/const-traits/syntactical-unstable.rs b/tests/ui/traits/const-traits/syntactical-unstable.rs index e192e80fabd9..5c542d327f15 100644 --- a/tests/ui/traits/const-traits/syntactical-unstable.rs +++ b/tests/ui/traits/const-traits/syntactical-unstable.rs @@ -1,6 +1,6 @@ //@ aux-build:staged-api.rs -// Ensure that we enforce const stability of traits in `~const`/`const` bounds. +// Ensure that we enforce const stability of traits in `[const]`/`const` bounds. #![feature(const_trait_impl)] @@ -10,19 +10,19 @@ extern crate staged_api; use staged_api::MyTrait; #[const_trait] -trait Foo: ~const MyTrait { +trait Foo: [const] MyTrait { //~^ ERROR use of unstable const library feature `unstable` - type Item: ~const MyTrait; + type Item: [const] MyTrait; //~^ ERROR use of unstable const library feature `unstable` } -const fn where_clause() where T: ~const MyTrait {} +const fn where_clause() where T: [const] MyTrait {} //~^ ERROR use of unstable const library feature `unstable` -const fn nested() where T: Deref {} +const fn nested() where T: Deref {} //~^ ERROR use of unstable const library feature `unstable` -const fn rpit() -> impl ~const MyTrait { Local } +const fn rpit() -> impl [const] MyTrait { Local } //~^ ERROR use of unstable const library feature `unstable` struct Local; diff --git a/tests/ui/traits/const-traits/syntactical-unstable.stderr b/tests/ui/traits/const-traits/syntactical-unstable.stderr index a2ce2f2b6e9d..657773d91217 100644 --- a/tests/ui/traits/const-traits/syntactical-unstable.stderr +++ b/tests/ui/traits/const-traits/syntactical-unstable.stderr @@ -1,43 +1,43 @@ error[E0658]: use of unstable const library feature `unstable` - --> $DIR/syntactical-unstable.rs:13:19 + --> $DIR/syntactical-unstable.rs:13:20 | -LL | trait Foo: ~const MyTrait { - | ------ ^^^^^^^ - | | - | trait is not stable as const yet +LL | trait Foo: [const] MyTrait { + | --------- ^^^^^^^ + | | + | trait is not stable as const yet | = help: add `#![feature(unstable)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: use of unstable const library feature `unstable` - --> $DIR/syntactical-unstable.rs:19:44 + --> $DIR/syntactical-unstable.rs:19:45 | -LL | const fn where_clause() where T: ~const MyTrait {} - | ------ ^^^^^^^ - | | - | trait is not stable as const yet +LL | const fn where_clause() where T: [const] MyTrait {} + | --------- ^^^^^^^ + | | + | trait is not stable as const yet | = help: add `#![feature(unstable)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: use of unstable const library feature `unstable` - --> $DIR/syntactical-unstable.rs:22:52 + --> $DIR/syntactical-unstable.rs:22:53 | -LL | const fn nested() where T: Deref {} - | ------ ^^^^^^^ - | | - | trait is not stable as const yet +LL | const fn nested() where T: Deref {} + | --------- ^^^^^^^ + | | + | trait is not stable as const yet | = help: add `#![feature(unstable)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: use of unstable const library feature `unstable` - --> $DIR/syntactical-unstable.rs:25:32 + --> $DIR/syntactical-unstable.rs:25:33 | -LL | const fn rpit() -> impl ~const MyTrait { Local } - | ------ ^^^^^^^ - | | - | trait is not stable as const yet +LL | const fn rpit() -> impl [const] MyTrait { Local } + | ------------ ^^^^^^^ + | | + | trait is not stable as const yet | = help: add `#![feature(unstable)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date @@ -52,12 +52,12 @@ LL | impl const MyTrait for Local { = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0658]: use of unstable const library feature `unstable` - --> $DIR/syntactical-unstable.rs:15:23 + --> $DIR/syntactical-unstable.rs:15:24 | -LL | type Item: ~const MyTrait; - | ------ ^^^^^^^ - | | - | trait is not stable as const yet +LL | type Item: [const] MyTrait; + | --------- ^^^^^^^ + | | + | trait is not stable as const yet | = help: add `#![feature(unstable)]` to the crate attributes to enable = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date diff --git a/tests/ui/traits/const-traits/syntax.rs b/tests/ui/traits/const-traits/syntax.rs index cfac6e0a93e3..b8e0f46e4f8f 100644 --- a/tests/ui/traits/const-traits/syntax.rs +++ b/tests/ui/traits/const-traits/syntax.rs @@ -1,8 +1,9 @@ //@ compile-flags: -Z parse-crate-root-only -//@ check-pass -#![feature(const_trait_bound_opt_out)] #![feature(const_trait_impl)] -// For now, this parses since an error does not occur until AST lowering. -impl ~const T {} +// This is going down the slice/array parsing route +impl [const] T {} +//~^ ERROR: expected identifier, found `]` + +impl const T {} diff --git a/tests/ui/traits/const-traits/syntax.stderr b/tests/ui/traits/const-traits/syntax.stderr new file mode 100644 index 000000000000..2e9807866b03 --- /dev/null +++ b/tests/ui/traits/const-traits/syntax.stderr @@ -0,0 +1,8 @@ +error: expected identifier, found `]` + --> $DIR/syntax.rs:6:12 + | +LL | impl [const] T {} + | ^ expected identifier + +error: aborting due to 1 previous error + diff --git a/tests/ui/traits/const-traits/tilde-const-and-const-params.stderr b/tests/ui/traits/const-traits/tilde-const-and-const-params.stderr deleted file mode 100644 index 95e684bd0c47..000000000000 --- a/tests/ui/traits/const-traits/tilde-const-and-const-params.stderr +++ /dev/null @@ -1,39 +0,0 @@ -error: `~const` is not allowed here - --> $DIR/tilde-const-and-const-params.rs:8:15 - | -LL | fn add(self) -> Foo<{ A::add(N) }> { - | ^^^^^^ - | -note: this function is not `const`, so it cannot have `~const` trait bounds - --> $DIR/tilde-const-and-const-params.rs:8:8 - | -LL | fn add(self) -> Foo<{ A::add(N) }> { - | ^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-and-const-params.rs:26:11 - | -LL | fn bar(_: Foo) -> Foo<{ A::add(N) }> { - | ^^^^^^ - | -note: this function is not `const`, so it cannot have `~const` trait bounds - --> $DIR/tilde-const-and-const-params.rs:26:4 - | -LL | fn bar(_: Foo) -> Foo<{ A::add(N) }> { - | ^^^ - -error[E0277]: the trait bound `A: const Add42` is not satisfied - --> $DIR/tilde-const-and-const-params.rs:26:61 - | -LL | fn bar(_: Foo) -> Foo<{ A::add(N) }> { - | ^ - -error[E0277]: the trait bound `A: const Add42` is not satisfied - --> $DIR/tilde-const-and-const-params.rs:8:44 - | -LL | fn add(self) -> Foo<{ A::add(N) }> { - | ^ - -error: aborting due to 4 previous errors - -For more information about this error, try `rustc --explain E0277`. diff --git a/tests/ui/traits/const-traits/tilde-const-invalid-places.rs b/tests/ui/traits/const-traits/tilde-const-invalid-places.rs deleted file mode 100644 index 9d220686771e..000000000000 --- a/tests/ui/traits/const-traits/tilde-const-invalid-places.rs +++ /dev/null @@ -1,61 +0,0 @@ -#![feature(const_trait_impl)] - -#[const_trait] -trait Trait {} - -// Regression test for issue #90052. -fn non_const_function() {} //~ ERROR `~const` is not allowed - -struct Struct { field: T } //~ ERROR `~const` is not allowed here -struct TupleStruct(T); //~ ERROR `~const` is not allowed here -struct UnitStruct; //~ ERROR `~const` is not allowed here -//~^ ERROR parameter `T` is never used - -enum Enum { Variant(T) } //~ ERROR `~const` is not allowed here - -union Union { field: T } //~ ERROR `~const` is not allowed here -//~^ ERROR field must implement `Copy` - -type Type = T; //~ ERROR `~const` is not allowed here - -const CONSTANT: () = (); //~ ERROR `~const` is not allowed here -//~^ ERROR generic const items are experimental - -trait NonConstTrait { - type Type: ~const Trait; - //~^ ERROR `~const` is not allowed - //~| ERROR `~const` is not allowed - fn non_const_function(); //~ ERROR `~const` is not allowed - const CONSTANT: (); //~ ERROR `~const` is not allowed - //~^ ERROR generic const items are experimental -} - -impl NonConstTrait for () { - type Type = (); //~ ERROR `~const` is not allowed - //~^ ERROR overflow evaluating the requirement `(): Trait` - fn non_const_function() {} //~ ERROR `~const` is not allowed - const CONSTANT: () = (); //~ ERROR `~const` is not allowed - //~^ ERROR generic const items are experimental -} - -struct Implementor; - -impl Implementor { - type Type = (); //~ ERROR `~const` is not allowed - //~^ ERROR inherent associated types are unstable - fn non_const_function() {} //~ ERROR `~const` is not allowed - const CONSTANT: () = (); //~ ERROR `~const` is not allowed - //~^ ERROR generic const items are experimental -} - -// non-const traits -trait Child0: ~const Trait {} //~ ERROR `~const` is not allowed -trait Child1 where Self: ~const Trait {} //~ ERROR `~const` is not allowed - -// non-const impl -impl Trait for T {} //~ ERROR `~const` is not allowed - -// inherent impl (regression test for issue #117004) -impl Struct {} //~ ERROR `~const` is not allowed - -fn main() {} diff --git a/tests/ui/traits/const-traits/tilde-const-invalid-places.stderr b/tests/ui/traits/const-traits/tilde-const-invalid-places.stderr deleted file mode 100644 index 8151b9aaa23d..000000000000 --- a/tests/ui/traits/const-traits/tilde-const-invalid-places.stderr +++ /dev/null @@ -1,310 +0,0 @@ -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:7:26 - | -LL | fn non_const_function() {} - | ^^^^^^ - | -note: this function is not `const`, so it cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:7:4 - | -LL | fn non_const_function() {} - | ^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:9:18 - | -LL | struct Struct { field: T } - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:10:23 - | -LL | struct TupleStruct(T); - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:11:22 - | -LL | struct UnitStruct; - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:14:14 - | -LL | enum Enum { Variant(T) } - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:16:16 - | -LL | union Union { field: T } - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:19:14 - | -LL | type Type = T; - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:21:19 - | -LL | const CONSTANT: () = (); - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:25:18 - | -LL | type Type: ~const Trait; - | ^^^^^^ - | -note: associated types in non-`#[const_trait]` traits cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:25:5 - | -LL | type Type: ~const Trait; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:25:33 - | -LL | type Type: ~const Trait; - | ^^^^^^ - | -note: associated types in non-`#[const_trait]` traits cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:25:5 - | -LL | type Type: ~const Trait; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:28:30 - | -LL | fn non_const_function(); - | ^^^^^^ - | -note: this function is not `const`, so it cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:28:8 - | -LL | fn non_const_function(); - | ^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:29:23 - | -LL | const CONSTANT: (); - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:34:18 - | -LL | type Type = (); - | ^^^^^^ - | -note: associated types in non-const impls cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:34:5 - | -LL | type Type = (); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:36:30 - | -LL | fn non_const_function() {} - | ^^^^^^ - | -note: this function is not `const`, so it cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:36:8 - | -LL | fn non_const_function() {} - | ^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:37:23 - | -LL | const CONSTANT: () = (); - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:44:18 - | -LL | type Type = (); - | ^^^^^^ - | -note: inherent associated types cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:44:5 - | -LL | type Type = (); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:46:30 - | -LL | fn non_const_function() {} - | ^^^^^^ - | -note: this function is not `const`, so it cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:46:8 - | -LL | fn non_const_function() {} - | ^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:47:23 - | -LL | const CONSTANT: () = (); - | ^^^^^^ - | - = note: this item cannot have `~const` trait bounds - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:52:15 - | -LL | trait Child0: ~const Trait {} - | ^^^^^^ - | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:52:1 - | -LL | trait Child0: ~const Trait {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:53:26 - | -LL | trait Child1 where Self: ~const Trait {} - | ^^^^^^ - | -note: this trait is not a `#[const_trait]`, so it cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:53:1 - | -LL | trait Child1 where Self: ~const Trait {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:56:9 - | -LL | impl Trait for T {} - | ^^^^^^ - | -note: this impl is not `const`, so it cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:56:1 - | -LL | impl Trait for T {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: `~const` is not allowed here - --> $DIR/tilde-const-invalid-places.rs:59:9 - | -LL | impl Struct {} - | ^^^^^^ - | -note: inherent impls cannot have `~const` trait bounds - --> $DIR/tilde-const-invalid-places.rs:59:1 - | -LL | impl Struct {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error[E0658]: generic const items are experimental - --> $DIR/tilde-const-invalid-places.rs:21:15 - | -LL | const CONSTANT: () = (); - | ^^^^^^^^^^^^^^^^^ - | - = note: see issue #113521 for more information - = help: add `#![feature(generic_const_items)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: generic const items are experimental - --> $DIR/tilde-const-invalid-places.rs:29:19 - | -LL | const CONSTANT: (); - | ^^^^^^^^^^^^^^^^^ - | - = note: see issue #113521 for more information - = help: add `#![feature(generic_const_items)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: generic const items are experimental - --> $DIR/tilde-const-invalid-places.rs:37:19 - | -LL | const CONSTANT: () = (); - | ^^^^^^^^^^^^^^^^^ - | - = note: see issue #113521 for more information - = help: add `#![feature(generic_const_items)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: generic const items are experimental - --> $DIR/tilde-const-invalid-places.rs:47:19 - | -LL | const CONSTANT: () = (); - | ^^^^^^^^^^^^^^^^^ - | - = note: see issue #113521 for more information - = help: add `#![feature(generic_const_items)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0392]: type parameter `T` is never used - --> $DIR/tilde-const-invalid-places.rs:11:19 - | -LL | struct UnitStruct; - | ^ unused type parameter - | - = help: consider removing `T`, referring to it in a field, or using a marker such as `PhantomData` - -error[E0740]: field must implement `Copy` or be wrapped in `ManuallyDrop<...>` to be used in a union - --> $DIR/tilde-const-invalid-places.rs:16:32 - | -LL | union Union { field: T } - | ^^^^^^^^ - | - = note: union fields must not have drop side-effects, which is currently enforced via either `Copy` or `ManuallyDrop<...>` -help: wrap the field type in `ManuallyDrop<...>` - | -LL | union Union { field: std::mem::ManuallyDrop } - | +++++++++++++++++++++++ + - -error[E0275]: overflow evaluating the requirement `(): Trait` - --> $DIR/tilde-const-invalid-places.rs:34:34 - | -LL | type Type = (); - | ^^ - | -note: required by a bound in `NonConstTrait::Type` - --> $DIR/tilde-const-invalid-places.rs:25:33 - | -LL | type Type: ~const Trait; - | ^^^^^^^^^^^^ required by this bound in `NonConstTrait::Type` - -error[E0658]: inherent associated types are unstable - --> $DIR/tilde-const-invalid-places.rs:44:5 - | -LL | type Type = (); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: see issue #8995 for more information - = help: add `#![feature(inherent_associated_types)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error: aborting due to 30 previous errors - -Some errors have detailed explanations: E0275, E0392, E0658, E0740. -For more information about an error, try `rustc --explain E0275`. diff --git a/tests/ui/traits/const-traits/tilde-const-syntax.rs b/tests/ui/traits/const-traits/tilde-const-syntax.rs deleted file mode 100644 index f9944c426cce..000000000000 --- a/tests/ui/traits/const-traits/tilde-const-syntax.rs +++ /dev/null @@ -1,9 +0,0 @@ -//@ compile-flags: -Z parse-crate-root-only -//@ check-pass - -#![feature(const_trait_impl)] - -struct S< - T: for<'a> ~const Tr<'a> + 'static + ~const std::ops::Add, - T: for<'a: 'b> ~const m::Trait<'a>, ->; diff --git a/tests/ui/traits/const-traits/tilde-twice.stderr b/tests/ui/traits/const-traits/tilde-twice.stderr deleted file mode 100644 index a809736a4f82..000000000000 --- a/tests/ui/traits/const-traits/tilde-twice.stderr +++ /dev/null @@ -1,8 +0,0 @@ -error: expected identifier, found `~` - --> $DIR/tilde-twice.rs:5:20 - | -LL | struct S; - | ^ expected identifier - -error: aborting due to 1 previous error - diff --git a/tests/ui/traits/const-traits/trait-where-clause-const.rs b/tests/ui/traits/const-traits/trait-where-clause-const.rs index 6f281ca57180..ccb514086cc8 100644 --- a/tests/ui/traits/const-traits/trait-where-clause-const.rs +++ b/tests/ui/traits/const-traits/trait-where-clause-const.rs @@ -12,11 +12,11 @@ trait Bar {} #[const_trait] trait Foo { fn a(); - fn b() where Self: ~const Bar; - fn c(); + fn b() where Self: [const] Bar; + fn c(); } -const fn test1() { +const fn test1() { T::a(); T::b(); //~^ ERROR the trait bound @@ -24,7 +24,7 @@ const fn test1() { //~^ ERROR the trait bound } -const fn test2() { +const fn test2() { T::a(); T::b(); T::c::(); diff --git a/tests/ui/traits/const-traits/trait-where-clause-const.stderr b/tests/ui/traits/const-traits/trait-where-clause-const.stderr index 4ebd7b9757fe..71f9bdff8786 100644 --- a/tests/ui/traits/const-traits/trait-where-clause-const.stderr +++ b/tests/ui/traits/const-traits/trait-where-clause-const.stderr @@ -1,4 +1,4 @@ -error[E0277]: the trait bound `T: ~const Bar` is not satisfied +error[E0277]: the trait bound `T: [const] Bar` is not satisfied --> $DIR/trait-where-clause-const.rs:21:5 | LL | T::b(); @@ -7,10 +7,10 @@ LL | T::b(); note: required by a bound in `Foo::b` --> $DIR/trait-where-clause-const.rs:15:24 | -LL | fn b() where Self: ~const Bar; - | ^^^^^^^^^^ required by this bound in `Foo::b` +LL | fn b() where Self: [const] Bar; + | ^^^^^^^^^^^ required by this bound in `Foo::b` -error[E0277]: the trait bound `T: ~const Bar` is not satisfied +error[E0277]: the trait bound `T: [const] Bar` is not satisfied --> $DIR/trait-where-clause-const.rs:23:12 | LL | T::c::(); @@ -19,8 +19,8 @@ LL | T::c::(); note: required by a bound in `Foo::c` --> $DIR/trait-where-clause-const.rs:16:13 | -LL | fn c(); - | ^^^^^^^^^^ required by this bound in `Foo::c` +LL | fn c(); + | ^^^^^^^^^^^ required by this bound in `Foo::c` error: aborting due to 2 previous errors diff --git a/tests/ui/traits/const-traits/trait-where-clause-run.rs b/tests/ui/traits/const-traits/trait-where-clause-run.rs index 2582a69acab2..c40f071f4572 100644 --- a/tests/ui/traits/const-traits/trait-where-clause-run.rs +++ b/tests/ui/traits/const-traits/trait-where-clause-run.rs @@ -10,7 +10,7 @@ trait Bar { #[const_trait] trait Foo { - fn foo() -> u8 where Self: ~const Bar { + fn foo() -> u8 where Self: [const] Bar { ::bar() * 6 } } diff --git a/tests/ui/traits/const-traits/trait-where-clause-self-referential.rs b/tests/ui/traits/const-traits/trait-where-clause-self-referential.rs index b6ac574a4fcd..3a5350cd4ea3 100644 --- a/tests/ui/traits/const-traits/trait-where-clause-self-referential.rs +++ b/tests/ui/traits/const-traits/trait-where-clause-self-referential.rs @@ -4,7 +4,7 @@ #[const_trait] trait Foo { - fn bar() where Self: ~const Foo; + fn bar() where Self: [const] Foo; } struct S; @@ -17,7 +17,7 @@ fn baz() { T::bar(); } -const fn qux() { +const fn qux() { T::bar(); } diff --git a/tests/ui/traits/const-traits/trait-where-clause.rs b/tests/ui/traits/const-traits/trait-where-clause.rs index 11f353f3f8ad..6aebab79090a 100644 --- a/tests/ui/traits/const-traits/trait-where-clause.rs +++ b/tests/ui/traits/const-traits/trait-where-clause.rs @@ -5,10 +5,10 @@ trait Bar {} trait Foo { fn a(); - fn b() where Self: ~const Bar; - //~^ ERROR `~const` is not allowed here - fn c(); - //~^ ERROR `~const` is not allowed here + fn b() where Self: [const] Bar; + //~^ ERROR `[const]` is not allowed here + fn c(); + //~^ ERROR `[const]` is not allowed here } fn test1() { diff --git a/tests/ui/traits/const-traits/trait-where-clause.stderr b/tests/ui/traits/const-traits/trait-where-clause.stderr index 3a15cc63f322..04c67903ef50 100644 --- a/tests/ui/traits/const-traits/trait-where-clause.stderr +++ b/tests/ui/traits/const-traits/trait-where-clause.stderr @@ -1,25 +1,25 @@ -error: `~const` is not allowed here - --> $DIR/trait-where-clause.rs:8:24 +error: `[const]` is not allowed here + --> $DIR/trait-where-clause.rs:8:22 | -LL | fn b() where Self: ~const Bar; - | ^^^^^^ +LL | fn b() where Self: [const] Bar; + | ^^^^^^^^^ | -note: this function is not `const`, so it cannot have `~const` trait bounds +note: this function is not `const`, so it cannot have `[const]` trait bounds --> $DIR/trait-where-clause.rs:8:8 | -LL | fn b() where Self: ~const Bar; +LL | fn b() where Self: [const] Bar; | ^ -error: `~const` is not allowed here - --> $DIR/trait-where-clause.rs:10:13 +error: `[const]` is not allowed here + --> $DIR/trait-where-clause.rs:10:11 | -LL | fn c(); - | ^^^^^^ +LL | fn c(); + | ^^^^^^^^^ | -note: this function is not `const`, so it cannot have `~const` trait bounds +note: this function is not `const`, so it cannot have `[const]` trait bounds --> $DIR/trait-where-clause.rs:10:8 | -LL | fn c(); +LL | fn c(); | ^ error[E0277]: the trait bound `T: Bar` is not satisfied @@ -31,8 +31,8 @@ LL | T::b(); note: required by a bound in `Foo::b` --> $DIR/trait-where-clause.rs:8:24 | -LL | fn b() where Self: ~const Bar; - | ^^^^^^^^^^ required by this bound in `Foo::b` +LL | fn b() where Self: [const] Bar; + | ^^^^^^^^^^^ required by this bound in `Foo::b` help: consider further restricting type parameter `T` with trait `Bar` | LL | fn test1() { @@ -47,8 +47,8 @@ LL | T::c::(); note: required by a bound in `Foo::c` --> $DIR/trait-where-clause.rs:10:13 | -LL | fn c(); - | ^^^^^^^^^^ required by this bound in `Foo::c` +LL | fn c(); + | ^^^^^^^^^^^ required by this bound in `Foo::c` help: consider further restricting type parameter `T` with trait `Bar` | LL | fn test1() { diff --git a/tests/ui/traits/const-traits/unsatisfied-const-trait-bound.rs b/tests/ui/traits/const-traits/unsatisfied-const-trait-bound.rs index 6d19ef771af1..c82b44275009 100644 --- a/tests/ui/traits/const-traits/unsatisfied-const-trait-bound.rs +++ b/tests/ui/traits/const-traits/unsatisfied-const-trait-bound.rs @@ -29,5 +29,5 @@ struct Container; fn accept0(_: Container<{ T::make() }>) {} // FIXME(const_trait_impl): Instead of suggesting `+ const Trait`, suggest -// changing `~const Trait` to `const Trait`. -const fn accept1(_: Container<{ T::make() }>) {} +// changing `[const] Trait` to `const Trait`. +const fn accept1(_: Container<{ T::make() }>) {} diff --git a/tests/ui/traits/const-traits/unsatisfied-const-trait-bound.stderr b/tests/ui/traits/const-traits/unsatisfied-const-trait-bound.stderr index 03e26615d7ed..3ed6dc69d0b7 100644 --- a/tests/ui/traits/const-traits/unsatisfied-const-trait-bound.stderr +++ b/tests/ui/traits/const-traits/unsatisfied-const-trait-bound.stderr @@ -64,74 +64,74 @@ LL | fn accept0(_: Container<{ T::make() }>) {} | ^^^^^^^^^^^^^ = note: ...which again requires evaluating type-level constant, completing the cycle note: cycle used when checking that `accept0` is well-formed - --> $DIR/unsatisfied-const-trait-bound.rs:29:1 + --> $DIR/unsatisfied-const-trait-bound.rs:29:35 | LL | fn accept0(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^ = note: see https://rustc-dev-guide.rust-lang.org/overview.html#queries and https://rustc-dev-guide.rust-lang.org/query.html for more information error[E0391]: cycle detected when caching mir of `accept1::{constant#0}` for CTFE - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ | note: ...which requires elaborating drops for `accept1::{constant#0}`... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ note: ...which requires borrow-checking `accept1::{constant#0}`... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ note: ...which requires promoting constants in MIR for `accept1::{constant#0}`... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ note: ...which requires const checking `accept1::{constant#0}`... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ note: ...which requires building MIR for `accept1::{constant#0}`... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ note: ...which requires building an abstract representation for `accept1::{constant#0}`... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ note: ...which requires building THIR for `accept1::{constant#0}`... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ note: ...which requires type-checking `accept1::{constant#0}`... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ note: ...which requires evaluating type-level constant... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ note: ...which requires const-evaluating + checking `accept1::{constant#0}`... - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ = note: ...which again requires caching mir of `accept1::{constant#0}` for CTFE, completing the cycle note: cycle used when const-evaluating + checking `accept1::{constant#0}` - --> $DIR/unsatisfied-const-trait-bound.rs:33:48 + --> $DIR/unsatisfied-const-trait-bound.rs:33:49 | -LL | const fn accept1(_: Container<{ T::make() }>) {} - | ^^^^^^^^^^^^^ +LL | const fn accept1(_: Container<{ T::make() }>) {} + | ^^^^^^^^^^^^^ = note: see https://rustc-dev-guide.rust-lang.org/overview.html#queries and https://rustc-dev-guide.rust-lang.org/query.html for more information error: aborting due to 3 previous errors diff --git a/tests/ui/traits/next-solver/coherence/coherence-fulfill-overflow.rs b/tests/ui/traits/next-solver/coherence/coherence-fulfill-overflow.rs index ff577da32c23..3fd22c7dbf0c 100644 --- a/tests/ui/traits/next-solver/coherence/coherence-fulfill-overflow.rs +++ b/tests/ui/traits/next-solver/coherence/coherence-fulfill-overflow.rs @@ -1,15 +1,17 @@ //@ compile-flags: -Znext-solver=coherence +#![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] #![recursion_limit = "10"] trait Trait {} -struct W(*const T); +struct W(*const T); trait TwoW {} -impl TwoW for W> {} +impl TwoW for W> {} -impl Trait for W {} -impl Trait for T {} +impl Trait for W {} +impl Trait for T {} //~^ ERROR conflicting implementations of trait `Trait` for type `W fn main() {} diff --git a/tests/ui/traits/next-solver/coherence/coherence-fulfill-overflow.stderr b/tests/ui/traits/next-solver/coherence/coherence-fulfill-overflow.stderr index 7d39c82d22f7..1827533a84d9 100644 --- a/tests/ui/traits/next-solver/coherence/coherence-fulfill-overflow.stderr +++ b/tests/ui/traits/next-solver/coherence/coherence-fulfill-overflow.stderr @@ -1,10 +1,10 @@ error[E0119]: conflicting implementations of trait `Trait` for type `W>>>>>>>>>>>>>>>>>>>>>>` - --> $DIR/coherence-fulfill-overflow.rs:12:1 + --> $DIR/coherence-fulfill-overflow.rs:14:1 | -LL | impl Trait for W {} - | ------------------------------------- first implementation here -LL | impl Trait for T {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ conflicting implementation for `W>>>>>>>>>>>>>>>>>>>>>>` +LL | impl Trait for W {} + | ---------------------------- first implementation here +LL | impl Trait for T {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^ conflicting implementation for `W>>>>>>>>>>>>>>>>>>>>>>` error: aborting due to 1 previous error diff --git a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs index 920f8add5079..9da79f7ac837 100644 --- a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs +++ b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.rs @@ -1,6 +1,7 @@ //@ revisions: with without //@ compile-flags: -Znext-solver #![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] // This test is incredibly subtle. At its core the goal is to get a coinductive cycle, // which, depending on its root goal, either holds or errors. We achieve this by getting @@ -17,20 +18,20 @@ // test for that. #[rustc_coinductive] -trait Trait {} -struct A(*const T); -struct B(*const T); +trait Trait {} +struct A(*const T); +struct B(*const T); -trait IncompleteGuidance {} -impl IncompleteGuidance for T {} -impl IncompleteGuidance for T {} -impl IncompleteGuidance for T {} +trait IncompleteGuidance {} +impl IncompleteGuidance for T {} +impl IncompleteGuidance for T {} +impl IncompleteGuidance for T {} -trait ImplGuidance {} -impl ImplGuidance for T {} -impl ImplGuidance for T {} +trait ImplGuidance {} +impl ImplGuidance for T {} +impl ImplGuidance for T {} -impl Trait for A +impl Trait for A where T: IncompleteGuidance, A: Trait, @@ -39,17 +40,17 @@ where { } -trait ToU8 {} +trait ToU8 {} impl ToU8 for () {} -impl Trait for B +impl Trait for B where T: ImplGuidance, A: Trait, { } -fn impls_trait, U: ?Sized, V: ?Sized, D: ?Sized>() {} +fn impls_trait, U, V, D>() {} fn with_bound() where diff --git a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.with.stderr b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.with.stderr index 9114bcadac0c..d27104de541b 100644 --- a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.with.stderr +++ b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.with.stderr @@ -1,25 +1,25 @@ error[E0277]: the trait bound `A: Trait<_, _, _>` is not satisfied - --> $DIR/incompleteness-unstable-result.rs:65:19 + --> $DIR/incompleteness-unstable-result.rs:66:19 | LL | impls_trait::, _, _, _>(); | ^^^^ the trait `Trait<_, _, _>` is not implemented for `A` | = help: the trait `Trait` is implemented for `A` note: required for `A` to implement `Trait<_, _, _>` - --> $DIR/incompleteness-unstable-result.rs:33:50 + --> $DIR/incompleteness-unstable-result.rs:34:18 | -LL | impl Trait for A - | ^^^^^^^^^^^^^^ ^^^^ +LL | impl Trait for A + | ^^^^^^^^^^^^^^ ^^^^ ... LL | A: Trait, | -------------- unsatisfied trait bound introduced here = note: 8 redundant requirements hidden = note: required for `A` to implement `Trait<_, _, _>` note: required by a bound in `impls_trait` - --> $DIR/incompleteness-unstable-result.rs:52:28 + --> $DIR/incompleteness-unstable-result.rs:53:19 | -LL | fn impls_trait, U: ?Sized, V: ?Sized, D: ?Sized>() {} - | ^^^^^^^^^^^^^^ required by this bound in `impls_trait` +LL | fn impls_trait, U, V, D>() {} + | ^^^^^^^^^^^^^^ required by this bound in `impls_trait` error: aborting due to 1 previous error diff --git a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.without.stderr b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.without.stderr index 9114bcadac0c..d27104de541b 100644 --- a/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.without.stderr +++ b/tests/ui/traits/next-solver/cycles/coinduction/incompleteness-unstable-result.without.stderr @@ -1,25 +1,25 @@ error[E0277]: the trait bound `A: Trait<_, _, _>` is not satisfied - --> $DIR/incompleteness-unstable-result.rs:65:19 + --> $DIR/incompleteness-unstable-result.rs:66:19 | LL | impls_trait::, _, _, _>(); | ^^^^ the trait `Trait<_, _, _>` is not implemented for `A` | = help: the trait `Trait` is implemented for `A` note: required for `A` to implement `Trait<_, _, _>` - --> $DIR/incompleteness-unstable-result.rs:33:50 + --> $DIR/incompleteness-unstable-result.rs:34:18 | -LL | impl Trait for A - | ^^^^^^^^^^^^^^ ^^^^ +LL | impl Trait for A + | ^^^^^^^^^^^^^^ ^^^^ ... LL | A: Trait, | -------------- unsatisfied trait bound introduced here = note: 8 redundant requirements hidden = note: required for `A` to implement `Trait<_, _, _>` note: required by a bound in `impls_trait` - --> $DIR/incompleteness-unstable-result.rs:52:28 + --> $DIR/incompleteness-unstable-result.rs:53:19 | -LL | fn impls_trait, U: ?Sized, V: ?Sized, D: ?Sized>() {} - | ^^^^^^^^^^^^^^ required by this bound in `impls_trait` +LL | fn impls_trait, U, V, D>() {} + | ^^^^^^^^^^^^^^ required by this bound in `impls_trait` error: aborting due to 1 previous error diff --git a/tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs b/tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs index f7ed0e100c46..326d888a55f3 100644 --- a/tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs +++ b/tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs @@ -1,23 +1,24 @@ //@ compile-flags: -Znext-solver #![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] // Check that we correctly rerun the trait solver for heads of cycles, // even if they are not the root. -struct A(*const T); -struct B(*const T); -struct C(*const T); +struct A(*const T); +struct B(*const T); +struct C(*const T); #[rustc_coinductive] trait Trait<'a, 'b> {} trait NotImplemented {} -impl<'a, 'b, T: ?Sized> Trait<'a, 'b> for A where B: Trait<'a, 'b> {} +impl<'a, 'b, T> Trait<'a, 'b> for A where B: Trait<'a, 'b> {} // With this the root of `B` is `A`, even if the other impl does // not have a cycle with `A`. This candidate never applies because of // the `A: NotImplemented` bound. -impl<'a, 'b, T: ?Sized> Trait<'a, 'b> for B +impl<'a, 'b, T> Trait<'a, 'b> for B where A: Trait<'a, 'b>, A: NotImplemented, @@ -31,7 +32,7 @@ where // use the impl itself to prove that adds region constraints as we uniquified the // regions in the `A: Trait<'a, 'b>` where-bound. As both the impl above // and the impl below now apply with some constraints, we failed with ambiguity. -impl<'a, 'b, T: ?Sized> Trait<'a, 'b> for B +impl<'a, 'b, T> Trait<'a, 'b> for B where A: NotImplemented, {} @@ -40,7 +41,7 @@ where // // Because of the coinductive cycle through `C` it also requires // 'a to be 'static. -impl<'a, T: ?Sized> Trait<'a, 'static> for B +impl<'a, T> Trait<'a, 'static> for B where C: Trait<'a, 'a>, {} @@ -48,14 +49,14 @@ where // In the first iteration of `B: Trait<'a, 'b>` we don't add any // constraints here, only after setting the provisional result to require // `'b == 'static` do we also add that constraint for `'a`. -impl<'a, 'b, T: ?Sized> Trait<'a, 'b> for C +impl<'a, 'b, T> Trait<'a, 'b> for C where B: Trait<'a, 'b>, {} -fn impls_trait<'a, 'b, T: Trait<'a, 'b> + ?Sized>() {} +fn impls_trait<'a, 'b, T: Trait<'a, 'b>>() {} -fn check<'a, T: ?Sized>() { +fn check<'a, T>() { impls_trait::<'a, 'static, A>(); //~^ ERROR lifetime may not live long enough } diff --git a/tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.stderr b/tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.stderr index 0cbd96540448..c88081736f3c 100644 --- a/tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.stderr +++ b/tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.stderr @@ -1,7 +1,7 @@ error: lifetime may not live long enough - --> $DIR/fixpoint-rerun-all-cycle-heads.rs:59:5 + --> $DIR/fixpoint-rerun-all-cycle-heads.rs:60:5 | -LL | fn check<'a, T: ?Sized>() { +LL | fn check<'a, T>() { | -- lifetime `'a` defined here LL | impls_trait::<'a, 'static, A>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ requires that `'a` must outlive `'static` diff --git a/tests/ui/traits/next-solver/cycles/inductive-fixpoint-hang.rs b/tests/ui/traits/next-solver/cycles/inductive-fixpoint-hang.rs index 9cbcc5a3cdf2..12feb1e2771c 100644 --- a/tests/ui/traits/next-solver/cycles/inductive-fixpoint-hang.rs +++ b/tests/ui/traits/next-solver/cycles/inductive-fixpoint-hang.rs @@ -1,4 +1,6 @@ //@ compile-flags: -Znext-solver +#![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] // This currently hangs if we do not erase constraints from // overflow. @@ -17,9 +19,9 @@ // the solver to hang without hitting the recursion limit. trait Trait {} -struct W(*const T); +struct W(*const T); -impl Trait for W> +impl Trait for W> where W: Trait, W: Trait, diff --git a/tests/ui/traits/next-solver/cycles/inductive-fixpoint-hang.stderr b/tests/ui/traits/next-solver/cycles/inductive-fixpoint-hang.stderr index a2a5c028cf8d..5ba3c511c171 100644 --- a/tests/ui/traits/next-solver/cycles/inductive-fixpoint-hang.stderr +++ b/tests/ui/traits/next-solver/cycles/inductive-fixpoint-hang.stderr @@ -1,11 +1,11 @@ error[E0275]: overflow evaluating the requirement `W<_>: Trait` - --> $DIR/inductive-fixpoint-hang.rs:31:19 + --> $DIR/inductive-fixpoint-hang.rs:33:19 | LL | impls_trait::>(); | ^^^^ | note: required by a bound in `impls_trait` - --> $DIR/inductive-fixpoint-hang.rs:28:19 + --> $DIR/inductive-fixpoint-hang.rs:30:19 | LL | fn impls_trait() {} | ^^^^^ required by this bound in `impls_trait` diff --git a/tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs b/tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs index b005b909aedb..88a1196b7e5b 100644 --- a/tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs +++ b/tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs @@ -1,6 +1,7 @@ //@ compile-flags: -Znext-solver //@ check-pass #![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] // A test showcasing that using a provisional cache can differ // from only tracking stack entries. @@ -59,9 +60,9 @@ trait B {} #[rustc_coinductive] trait C {} -impl A for T {} -impl B for T {} -impl C for T {} +impl A for T {} +impl B for T {} +impl C for T {} fn impls_a() {} diff --git a/tests/ui/traits/next-solver/dont-canonicalize-re-error.rs b/tests/ui/traits/next-solver/dont-canonicalize-re-error.rs index 57f814bc81ec..a2ed73b2c869 100644 --- a/tests/ui/traits/next-solver/dont-canonicalize-re-error.rs +++ b/tests/ui/traits/next-solver/dont-canonicalize-re-error.rs @@ -1,4 +1,6 @@ //@ compile-flags: -Znext-solver +#![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] trait Tr<'a> {} @@ -16,9 +18,9 @@ trait Tr<'a> {} // Then, when we recompute the goal `W: Constrain<'error>`, when // collecting ambiguities and overflows, we end up assembling a default // error candidate w/o ambiguity, which causes the goal to pass, and ICE. -impl<'a, A: ?Sized> Tr<'a> for W {} -struct W(A); -impl<'a, A: ?Sized> Tr<'a> for A where A: Constrain<'a> {} +impl<'a, A> Tr<'a> for W {} +struct W(A); +impl<'a, A> Tr<'a> for A where A: Constrain<'a> {} //~^ ERROR conflicting implementations of trait `Tr<'_>` for type `W<_>` trait Constrain<'a> {} diff --git a/tests/ui/traits/next-solver/dont-canonicalize-re-error.stderr b/tests/ui/traits/next-solver/dont-canonicalize-re-error.stderr index cf85c52fb42e..867efd4a0e7b 100644 --- a/tests/ui/traits/next-solver/dont-canonicalize-re-error.stderr +++ b/tests/ui/traits/next-solver/dont-canonicalize-re-error.stderr @@ -1,19 +1,22 @@ error[E0261]: use of undeclared lifetime name `'missing` - --> $DIR/dont-canonicalize-re-error.rs:25:26 + --> $DIR/dont-canonicalize-re-error.rs:27:26 | LL | impl Constrain<'missing> for W {} - | - ^^^^^^^^ undeclared lifetime - | | - | help: consider introducing lifetime `'missing` here: `'missing,` + | ^^^^^^^^ undeclared lifetime + | +help: consider introducing lifetime `'missing` here + | +LL | impl<'missing, A: Sized> Constrain<'missing> for W {} + | +++++++++ error[E0119]: conflicting implementations of trait `Tr<'_>` for type `W<_>` - --> $DIR/dont-canonicalize-re-error.rs:21:1 + --> $DIR/dont-canonicalize-re-error.rs:23:1 | -LL | impl<'a, A: ?Sized> Tr<'a> for W {} - | ----------------------------------- first implementation here -LL | struct W(A); -LL | impl<'a, A: ?Sized> Tr<'a> for A where A: Constrain<'a> {} - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ conflicting implementation for `W<_>` +LL | impl<'a, A> Tr<'a> for W {} + | --------------------------- first implementation here +LL | struct W(A); +LL | impl<'a, A> Tr<'a> for A where A: Constrain<'a> {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ conflicting implementation for `W<_>` error: aborting due to 2 previous errors diff --git a/tests/ui/traits/next-solver/normalize/normalize-region-obligations.rs b/tests/ui/traits/next-solver/normalize/normalize-region-obligations.rs index c4c2e695a1df..e1ffa4e29d69 100644 --- a/tests/ui/traits/next-solver/normalize/normalize-region-obligations.rs +++ b/tests/ui/traits/next-solver/normalize/normalize-region-obligations.rs @@ -1,6 +1,8 @@ //@ revisions: normalize_param_env normalize_obligation hrtb //@ check-pass //@ compile-flags: -Znext-solver +#![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] trait Foo { #[cfg(normalize_param_env)] @@ -11,11 +13,11 @@ trait Foo { type Gat<'b> where for<'a> >::Assoc: 'b; } -trait Mirror { type Assoc: ?Sized; } -impl Mirror for T { type Assoc = T; } +trait Mirror { type Assoc; } +impl Mirror for T { type Assoc = T; } -trait MirrorRegion<'a> { type Assoc: ?Sized; } -impl<'a, T: ?Sized> MirrorRegion<'a> for T { type Assoc = T; } +trait MirrorRegion<'a> { type Assoc; } +impl<'a, T> MirrorRegion<'a> for T { type Assoc = T; } impl Foo for T { #[cfg(normalize_param_env)] diff --git a/tests/ui/traits/next-solver/overflow/coherence-alias-hang.rs b/tests/ui/traits/next-solver/overflow/coherence-alias-hang.rs index f88f74680b9d..4874e2e1f99c 100644 --- a/tests/ui/traits/next-solver/overflow/coherence-alias-hang.rs +++ b/tests/ui/traits/next-solver/overflow/coherence-alias-hang.rs @@ -4,16 +4,18 @@ // Regression test for nalgebra hang . #![feature(lazy_type_alias)] +#![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] #![allow(incomplete_features)] -type Id = T; +type Id = T; trait NotImplemented {} -struct W(*const T, *const U); +struct W(*const T, *const U); trait Trait { - type Assoc: ?Sized; + type Assoc; } -impl Trait for W { +impl Trait for W { #[cfg(ai)] type Assoc = W>; #[cfg(ia)] @@ -22,8 +24,8 @@ impl Trait for W { type Assoc = W, Id>; } -trait Overlap {} -impl Overlap for W {} -impl Overlap for T {} +trait Overlap {} +impl Overlap for W {} +impl Overlap for T {} fn main() {} diff --git a/tests/ui/traits/next-solver/overflow/recursion-limit-normalizes-to-constraints.rs b/tests/ui/traits/next-solver/overflow/recursion-limit-normalizes-to-constraints.rs index dee5500aaddb..e5a57a44d498 100644 --- a/tests/ui/traits/next-solver/overflow/recursion-limit-normalizes-to-constraints.rs +++ b/tests/ui/traits/next-solver/overflow/recursion-limit-normalizes-to-constraints.rs @@ -1,5 +1,7 @@ //@ compile-flags: -Znext-solver=coherence //@ check-pass +#![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] // A regression test for trait-system-refactor-initiative#70. @@ -7,8 +9,8 @@ trait Trait { type Assoc; } -struct W(*mut T); -impl Trait for W> +struct W(*mut T); +impl Trait for W> where W: Trait, { @@ -20,6 +22,6 @@ impl> NoOverlap for T {} // `Projection( as Trait>::Assoc, u32)` should result in error even // though applying the impl results in overflow. This is necessary to match // the behavior of the old solver. -impl NoOverlap for W {} +impl NoOverlap for W {} fn main() {} diff --git a/tests/ui/traits/next-solver/supertrait-alias-1.rs b/tests/ui/traits/next-solver/supertrait-alias-1.rs index 579a44677c2e..2671eed7fcea 100644 --- a/tests/ui/traits/next-solver/supertrait-alias-1.rs +++ b/tests/ui/traits/next-solver/supertrait-alias-1.rs @@ -1,5 +1,7 @@ //@ compile-flags: -Znext-solver //@ check-pass +#![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] // Regression test for . // Tests that we don't try to replace `::Output` when replacing projections in the @@ -13,9 +15,9 @@ pub trait Super { type Output; } -fn bound() {} +fn bound() {} -fn visit_simd_operator() { +fn visit_simd_operator() { bound::::Output>>(); } diff --git a/tests/ui/traits/on_unimplemented_long_types.stderr b/tests/ui/traits/on_unimplemented_long_types.stderr index 1628466e0818..f32d99a42b12 100644 --- a/tests/ui/traits/on_unimplemented_long_types.stderr +++ b/tests/ui/traits/on_unimplemented_long_types.stderr @@ -2,7 +2,7 @@ error[E0277]: `Option>>` doesn't implement `std::fmt::Display --> $DIR/on_unimplemented_long_types.rs:3:17 | LL | pub fn foo() -> impl std::fmt::Display { - | ^^^^^^^^^^^^^^^^^^^^^^ `Option>>` cannot be formatted with the default formatter + | ^^^^^^^^^^^^^^^^^^^^^^ unsatisfied trait bound LL | LL | / Some(Some(Some(Some(Some(Some(Some(Some(Some(S... LL | | Some(Some(Some(Some(Some(Some(Some(Some(So... @@ -14,7 +14,6 @@ LL | | ))))))))))) | |_______________- return type was inferred to be `Option>>` here | = help: the trait `std::fmt::Display` is not implemented for `Option>>` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead = note: the full name for the type has been written to '$TEST_BUILD_DIR/on_unimplemented_long_types.long-type-$LONG_TYPE_HASH.txt' = note: consider using `--verbose` to print the full type name to the console diff --git a/tests/ui/traits/overflow-computing-ambiguity.rs b/tests/ui/traits/overflow-computing-ambiguity.rs index b8f11efeda28..88eeca56cdd1 100644 --- a/tests/ui/traits/overflow-computing-ambiguity.rs +++ b/tests/ui/traits/overflow-computing-ambiguity.rs @@ -1,12 +1,15 @@ +#![feature(rustc_attrs)] +#![rustc_no_implicit_bounds] + trait Hello {} -struct Foo<'a, T: ?Sized>(&'a T); +struct Foo<'a, T>(&'a T); -impl<'a, T: ?Sized> Hello for Foo<'a, &'a T> where Foo<'a, T>: Hello {} +impl<'a, T> Hello for Foo<'a, &'a T> where Foo<'a, T>: Hello {} impl Hello for Foo<'static, i32> {} -fn hello() {} +fn hello() {} fn main() { hello(); diff --git a/tests/ui/traits/overflow-computing-ambiguity.stderr b/tests/ui/traits/overflow-computing-ambiguity.stderr index a2e255865bf4..f3e91a29a9ca 100644 --- a/tests/ui/traits/overflow-computing-ambiguity.stderr +++ b/tests/ui/traits/overflow-computing-ambiguity.stderr @@ -1,5 +1,5 @@ error[E0283]: type annotations needed - --> $DIR/overflow-computing-ambiguity.rs:12:5 + --> $DIR/overflow-computing-ambiguity.rs:15:5 | LL | hello(); | ^^^^^ cannot infer type of the type parameter `T` declared on the function `hello` @@ -9,10 +9,10 @@ LL | hello(); Foo<'a, &'a T> Foo<'static, i32> note: required by a bound in `hello` - --> $DIR/overflow-computing-ambiguity.rs:9:22 + --> $DIR/overflow-computing-ambiguity.rs:12:13 | -LL | fn hello() {} - | ^^^^^ required by this bound in `hello` +LL | fn hello() {} + | ^^^^^ required by this bound in `hello` help: consider specifying the generic argument | LL | hello::(); diff --git a/tests/ui/traits/span-bug-issue-121414.stderr b/tests/ui/traits/span-bug-issue-121414.stderr index 744806a34150..2eeda00d9cb7 100644 --- a/tests/ui/traits/span-bug-issue-121414.stderr +++ b/tests/ui/traits/span-bug-issue-121414.stderr @@ -2,9 +2,12 @@ error[E0261]: use of undeclared lifetime name `'f` --> $DIR/span-bug-issue-121414.rs:5:22 | LL | impl<'a> Bar for Foo<'f> { - | - ^^ undeclared lifetime - | | - | help: consider introducing lifetime `'f` here: `'f,` + | ^^ undeclared lifetime + | +help: consider introducing lifetime `'f` here + | +LL | impl<'f, 'a> Bar for Foo<'f> { + | +++ error: aborting due to 1 previous error diff --git a/tests/ui/traits/suggest-remove-deref-issue-140166.stderr b/tests/ui/traits/suggest-remove-deref-issue-140166.stderr index 90f24d86d53e..7c61f957fdcc 100644 --- a/tests/ui/traits/suggest-remove-deref-issue-140166.stderr +++ b/tests/ui/traits/suggest-remove-deref-issue-140166.stderr @@ -4,7 +4,7 @@ error[E0277]: the trait bound `&Chars: Trait` is not satisfied LL | format_args!("{:?}", FlatMap(&Chars)); | ---- ^^^^^^^^^^^^^^^ the trait `Trait` is not implemented for `&Chars` | | - | required by a bound introduced by this call + | required by this formatting parameter | = help: the trait `Trait` is implemented for `Chars` note: required for `FlatMap<&Chars>` to implement `Debug` @@ -14,8 +14,6 @@ LL | impl std::fmt::Debug for FlatMap { | ----- ^^^^^^^^^^^^^^^ ^^^^^^^^^^ | | | unsatisfied trait bound introduced here -note: required by a bound in `core::fmt::rt::Argument::<'_>::new_debug` - --> $SRC_DIR/core/src/fmt/rt.rs:LL:COL error: aborting due to 1 previous error diff --git a/tests/ui/traits/trait-object-destructure.rs b/tests/ui/traits/trait-object-destructure.rs new file mode 100644 index 000000000000..6c091677c8ce --- /dev/null +++ b/tests/ui/traits/trait-object-destructure.rs @@ -0,0 +1,29 @@ +//! Regression test for destructuring trait references (`&dyn T`/`Box`). +//! Checks cases where number of `&`/`Box` patterns (n) matches/doesn't match references (m). +//! +//! Issue: https://github.com/rust-lang/rust/issues/15031 + +#![feature(box_patterns)] + +trait T { + fn foo(&self) {} +} + +impl T for isize {} + +fn main() { + // Valid cases: n < m (can dereference) + let &x = &(&1isize as &dyn T); + let &x = &&(&1isize as &dyn T); + let &&x = &&(&1isize as &dyn T); + + // Error cases: n == m (cannot dereference trait object) + let &x = &1isize as &dyn T; //~ ERROR type `&dyn T` cannot be dereferenced + let &&x = &(&1isize as &dyn T); //~ ERROR type `&dyn T` cannot be dereferenced + let box x = Box::new(1isize) as Box; //~ ERROR type `Box` cannot be dereferenced + + // Error cases: n > m (type mismatch) + let &&x = &1isize as &dyn T; //~ ERROR mismatched types + let &&&x = &(&1isize as &dyn T); //~ ERROR mismatched types + let box box x = Box::new(1isize) as Box; //~ ERROR mismatched types +} diff --git a/tests/ui/destructure-trait-ref.stderr b/tests/ui/traits/trait-object-destructure.stderr similarity index 87% rename from tests/ui/destructure-trait-ref.stderr rename to tests/ui/traits/trait-object-destructure.stderr index 0b5ea551a578..c7c832dc40af 100644 --- a/tests/ui/destructure-trait-ref.stderr +++ b/tests/ui/traits/trait-object-destructure.stderr @@ -1,23 +1,23 @@ error[E0033]: type `&dyn T` cannot be dereferenced - --> $DIR/destructure-trait-ref.rs:28:9 + --> $DIR/trait-object-destructure.rs:21:9 | LL | let &x = &1isize as &dyn T; | ^^ type `&dyn T` cannot be dereferenced error[E0033]: type `&dyn T` cannot be dereferenced - --> $DIR/destructure-trait-ref.rs:29:10 + --> $DIR/trait-object-destructure.rs:22:10 | LL | let &&x = &(&1isize as &dyn T); | ^^ type `&dyn T` cannot be dereferenced error[E0033]: type `Box` cannot be dereferenced - --> $DIR/destructure-trait-ref.rs:30:9 + --> $DIR/trait-object-destructure.rs:23:9 | LL | let box x = Box::new(1isize) as Box; | ^^^^^ type `Box` cannot be dereferenced error[E0308]: mismatched types - --> $DIR/destructure-trait-ref.rs:34:10 + --> $DIR/trait-object-destructure.rs:26:10 | LL | let &&x = &1isize as &dyn T; | ^^ ----------------- this expression has type `&dyn T` @@ -33,7 +33,7 @@ LL + let &x = &1isize as &dyn T; | error[E0308]: mismatched types - --> $DIR/destructure-trait-ref.rs:38:11 + --> $DIR/trait-object-destructure.rs:27:11 | LL | let &&&x = &(&1isize as &dyn T); | ^^ -------------------- this expression has type `&&dyn T` @@ -49,7 +49,7 @@ LL + let &&x = &(&1isize as &dyn T); | error[E0308]: mismatched types - --> $DIR/destructure-trait-ref.rs:42:13 + --> $DIR/trait-object-destructure.rs:28:13 | LL | let box box x = Box::new(1isize) as Box; | ^^^^^ ------------------------------ this expression has type `Box` diff --git a/tests/ui/type-alias-impl-trait/bounds-are-checked3.stderr b/tests/ui/type-alias-impl-trait/bounds-are-checked3.stderr index c0f6d6780976..01d24cabf48d 100644 --- a/tests/ui/type-alias-impl-trait/bounds-are-checked3.stderr +++ b/tests/ui/type-alias-impl-trait/bounds-are-checked3.stderr @@ -2,9 +2,8 @@ error[E0277]: `T` doesn't implement `std::fmt::Display` --> $DIR/bounds-are-checked3.rs:9:35 | LL | type Foo = (impl Debug, Struct); - | ^^^^^^^^^ `T` cannot be formatted with the default formatter + | ^^^^^^^^^ the trait `std::fmt::Display` is not implemented for `T` | - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `Struct` --> $DIR/bounds-are-checked3.rs:5:18 | diff --git a/tests/ui/type-alias-impl-trait/generic_duplicate_param_use2.stderr b/tests/ui/type-alias-impl-trait/generic_duplicate_param_use2.stderr index ef0e73f1481f..193f0c92c9de 100644 --- a/tests/ui/type-alias-impl-trait/generic_duplicate_param_use2.stderr +++ b/tests/ui/type-alias-impl-trait/generic_duplicate_param_use2.stderr @@ -2,7 +2,7 @@ error[E0277]: `T` doesn't implement `Debug` --> $DIR/generic_duplicate_param_use2.rs:12:5 | LL | t - | ^ `T` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^ the trait `Debug` is not implemented for `T` | note: required by a bound in an opaque type --> $DIR/generic_duplicate_param_use2.rs:8:23 diff --git a/tests/ui/type-alias-impl-trait/generic_duplicate_param_use4.stderr b/tests/ui/type-alias-impl-trait/generic_duplicate_param_use4.stderr index 0932c72ff934..f0d1e93b0b76 100644 --- a/tests/ui/type-alias-impl-trait/generic_duplicate_param_use4.stderr +++ b/tests/ui/type-alias-impl-trait/generic_duplicate_param_use4.stderr @@ -2,7 +2,7 @@ error[E0277]: `U` doesn't implement `Debug` --> $DIR/generic_duplicate_param_use4.rs:12:5 | LL | u - | ^ `U` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^ the trait `Debug` is not implemented for `U` | note: required by a bound in an opaque type --> $DIR/generic_duplicate_param_use4.rs:8:23 diff --git a/tests/ui/type-alias-impl-trait/generic_underconstrained2.stderr b/tests/ui/type-alias-impl-trait/generic_underconstrained2.stderr index 429c3b9175a5..1e3c454a5bc2 100644 --- a/tests/ui/type-alias-impl-trait/generic_underconstrained2.stderr +++ b/tests/ui/type-alias-impl-trait/generic_underconstrained2.stderr @@ -2,7 +2,7 @@ error[E0277]: `U` doesn't implement `Debug` --> $DIR/generic_underconstrained2.rs:9:33 | LL | fn underconstrained(_: U) -> Underconstrained { - | ^^^^^^^^^^^^^^^^^^^ `U` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `U` | note: required by a bound on the type alias `Underconstrained` --> $DIR/generic_underconstrained2.rs:5:26 @@ -18,7 +18,7 @@ error[E0277]: `V` doesn't implement `Debug` --> $DIR/generic_underconstrained2.rs:19:43 | LL | fn underconstrained2(_: U, _: V) -> Underconstrained2 { - | ^^^^^^^^^^^^^^^^^^^^ `V` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `V` | note: required by a bound on the type alias `Underconstrained2` --> $DIR/generic_underconstrained2.rs:15:27 @@ -34,7 +34,7 @@ error[E0277]: `U` doesn't implement `Debug` --> $DIR/generic_underconstrained2.rs:9:33 | LL | fn underconstrained(_: U) -> Underconstrained { - | ^^^^^^^^^^^^^^^^^^^ `U` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `U` | note: required by a bound on the type alias `Underconstrained` --> $DIR/generic_underconstrained2.rs:5:26 @@ -51,7 +51,7 @@ error[E0277]: `V` doesn't implement `Debug` --> $DIR/generic_underconstrained2.rs:19:43 | LL | fn underconstrained2(_: U, _: V) -> Underconstrained2 { - | ^^^^^^^^^^^^^^^^^^^^ `V` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ^^^^^^^^^^^^^^^^^^^^ the trait `Debug` is not implemented for `V` | note: required by a bound on the type alias `Underconstrained2` --> $DIR/generic_underconstrained2.rs:15:27 diff --git a/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.stderr b/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.edition2015.stderr similarity index 93% rename from tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.stderr rename to tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.edition2015.stderr index 0bf9dccfad85..4f1e769bc6c4 100644 --- a/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.stderr +++ b/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.edition2015.stderr @@ -1,5 +1,5 @@ error[E0700]: hidden type for `impl PlusOne` captures lifetime that does not appear in bounds - --> $DIR/imply_bounds_from_bounds_param.rs:26:5 + --> $DIR/imply_bounds_from_bounds_param.rs:29:5 | LL | fn test<'a>(y: &'a mut i32) -> impl PlusOne { | -- ------------ opaque type defined here diff --git a/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.edition2024.stderr b/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.edition2024.stderr new file mode 100644 index 000000000000..a7135e8f05f4 --- /dev/null +++ b/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.edition2024.stderr @@ -0,0 +1,91 @@ +error[E0499]: cannot borrow `z` as mutable more than once at a time + --> $DIR/imply_bounds_from_bounds_param.rs:36:27 + | +LL | let mut thing = test(&mut z); + | ------ first mutable borrow occurs here +LL | let mut thing2 = test(&mut z); + | ^^^^^^ second mutable borrow occurs here +LL | thing.plus_one(); + | ----- first borrow later used here + | +note: this call may capture more lifetimes than intended, because Rust 2024 has adjusted the `impl Trait` lifetime capture rules + --> $DIR/imply_bounds_from_bounds_param.rs:35:21 + | +LL | let mut thing = test(&mut z); + | ^^^^^^^^^^^^ +help: use the precise capturing `use<...>` syntax to make the captures explicit + | +LL | fn test<'a>(y: &'a mut i32) -> impl PlusOne + use<> { + | +++++++ + +error[E0502]: cannot borrow `z` as immutable because it is also borrowed as mutable + --> $DIR/imply_bounds_from_bounds_param.rs:38:5 + | +LL | let mut thing = test(&mut z); + | ------ mutable borrow occurs here +... +LL | assert_eq!(z, 43); + | ^^^^^^^^^^^^^^^^^ immutable borrow occurs here +... +LL | thing.plus_one(); + | ----- mutable borrow later used here + | +note: this call may capture more lifetimes than intended, because Rust 2024 has adjusted the `impl Trait` lifetime capture rules + --> $DIR/imply_bounds_from_bounds_param.rs:35:21 + | +LL | let mut thing = test(&mut z); + | ^^^^^^^^^^^^ + = note: this error originates in the macro `assert_eq` (in Nightly builds, run with -Z macro-backtrace for more info) +help: use the precise capturing `use<...>` syntax to make the captures explicit + | +LL | fn test<'a>(y: &'a mut i32) -> impl PlusOne + use<> { + | +++++++ + +error[E0502]: cannot borrow `z` as immutable because it is also borrowed as mutable + --> $DIR/imply_bounds_from_bounds_param.rs:40:5 + | +LL | let mut thing = test(&mut z); + | ------ mutable borrow occurs here +... +LL | assert_eq!(z, 44); + | ^^^^^^^^^^^^^^^^^ immutable borrow occurs here +LL | thing.plus_one(); + | ----- mutable borrow later used here + | +note: this call may capture more lifetimes than intended, because Rust 2024 has adjusted the `impl Trait` lifetime capture rules + --> $DIR/imply_bounds_from_bounds_param.rs:35:21 + | +LL | let mut thing = test(&mut z); + | ^^^^^^^^^^^^ + = note: this error originates in the macro `assert_eq` (in Nightly builds, run with -Z macro-backtrace for more info) +help: use the precise capturing `use<...>` syntax to make the captures explicit + | +LL | fn test<'a>(y: &'a mut i32) -> impl PlusOne + use<> { + | +++++++ + +error[E0502]: cannot borrow `z` as immutable because it is also borrowed as mutable + --> $DIR/imply_bounds_from_bounds_param.rs:42:5 + | +LL | let mut thing = test(&mut z); + | ------ mutable borrow occurs here +... +LL | assert_eq!(z, 45); + | ^^^^^^^^^^^^^^^^^ immutable borrow occurs here +LL | } + | - mutable borrow might be used here, when `thing` is dropped and runs the destructor for type `impl PlusOne` + | +note: this call may capture more lifetimes than intended, because Rust 2024 has adjusted the `impl Trait` lifetime capture rules + --> $DIR/imply_bounds_from_bounds_param.rs:35:21 + | +LL | let mut thing = test(&mut z); + | ^^^^^^^^^^^^ + = note: this error originates in the macro `assert_eq` (in Nightly builds, run with -Z macro-backtrace for more info) +help: use the precise capturing `use<...>` syntax to make the captures explicit + | +LL | fn test<'a>(y: &'a mut i32) -> impl PlusOne + use<> { + | +++++++ + +error: aborting due to 4 previous errors + +Some errors have detailed explanations: E0499, E0502. +For more information about an error, try `rustc --explain E0499`. diff --git a/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.rs b/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.rs index 5d5645077c2a..672019ba73ac 100644 --- a/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.rs +++ b/tests/ui/type-alias-impl-trait/imply_bounds_from_bounds_param.rs @@ -1,3 +1,6 @@ +//@revisions: edition2015 edition2024 +//@[edition2015] edition:2015 +//@[edition2024] edition:2024 #![feature(impl_trait_in_assoc_type)] trait Callable { @@ -24,17 +27,17 @@ impl Callable for T { fn test<'a>(y: &'a mut i32) -> impl PlusOne { <&'a mut i32 as Callable>::call(y) - //~^ ERROR hidden type for `impl PlusOne` captures lifetime that does not appear in bounds + //[edition2015]~^ ERROR hidden type for `impl PlusOne` captures lifetime that does not appear in bounds } fn main() { let mut z = 42; let mut thing = test(&mut z); - let mut thing2 = test(&mut z); + let mut thing2 = test(&mut z); //[edition2024]~ ERROR cannot borrow `z` as mutable more than once thing.plus_one(); - assert_eq!(z, 43); + assert_eq!(z, 43); //[edition2024]~ ERROR cannot borrow `z` as immutable thing2.plus_one(); - assert_eq!(z, 44); + assert_eq!(z, 44); //[edition2024]~ ERROR cannot borrow `z` as immutable thing.plus_one(); - assert_eq!(z, 45); + assert_eq!(z, 45); //[edition2024]~ ERROR cannot borrow `z` as immutable } diff --git a/tests/ui/type-alias-impl-trait/nested.stderr b/tests/ui/type-alias-impl-trait/nested.stderr index 59911f65a234..f72830b864d1 100644 --- a/tests/ui/type-alias-impl-trait/nested.stderr +++ b/tests/ui/type-alias-impl-trait/nested.stderr @@ -15,7 +15,9 @@ error[E0277]: `Bar` doesn't implement `Debug` --> $DIR/nested.rs:17:22 | LL | println!("{:?}", bar()); - | ^^^^^ `Bar` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | ---- ^^^^^ `Bar` cannot be formatted using `{:?}` because it doesn't implement `Debug` + | | + | required by this formatting parameter | = help: the trait `Debug` is not implemented for `Bar` = note: this error originates in the macro `$crate::format_args_nl` which comes from the expansion of the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/tests/ui/type/binding-assigned-block-without-tail-expression.stderr b/tests/ui/type/binding-assigned-block-without-tail-expression.stderr index 3e96d7f317b4..ff34facf3892 100644 --- a/tests/ui/type/binding-assigned-block-without-tail-expression.stderr +++ b/tests/ui/type/binding-assigned-block-without-tail-expression.stderr @@ -5,7 +5,9 @@ LL | 42; | - help: remove this semicolon ... LL | println!("{}", x); - | ^ `()` cannot be formatted with the default formatter + | -- ^ `()` cannot be formatted with the default formatter + | | + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `()` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead @@ -18,7 +20,9 @@ LL | let y = {}; | -- this empty block is missing a tail expression ... LL | println!("{}", y); - | ^ `()` cannot be formatted with the default formatter + | -- ^ `()` cannot be formatted with the default formatter + | | + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `()` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead @@ -31,7 +35,9 @@ LL | "hi"; | - help: remove this semicolon ... LL | println!("{}", z); - | ^ `()` cannot be formatted with the default formatter + | -- ^ `()` cannot be formatted with the default formatter + | | + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `()` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead @@ -47,7 +53,9 @@ LL | | }; | |_____- this block is missing a tail expression ... LL | println!("{}", s); - | ^ `()` cannot be formatted with the default formatter + | -- ^ `()` cannot be formatted with the default formatter + | | + | required by this formatting parameter | = help: the trait `std::fmt::Display` is not implemented for `()` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead diff --git a/tests/ui/type/option-ref-advice.rs b/tests/ui/type/option-ref-advice.rs index 2dcee5a2eb91..435b15d01e3c 100644 --- a/tests/ui/type/option-ref-advice.rs +++ b/tests/ui/type/option-ref-advice.rs @@ -3,9 +3,9 @@ fn takes_option(_arg: Option<&String>) {} fn main() { - takes_option(&None); //~ ERROR 6:18: 6:23: mismatched types [E0308] + takes_option(&None); //~ ERROR mismatched types [E0308] let x = String::from("x"); let res = Some(x); - takes_option(&res); //~ ERROR 10:18: 10:22: mismatched types [E0308] + takes_option(&res); //~ ERROR mismatched types [E0308] } diff --git a/tests/ui/type/pattern_types/derives_fail.stderr b/tests/ui/type/pattern_types/derives_fail.stderr index 78bef726341d..6b2e27494f0e 100644 --- a/tests/ui/type/pattern_types/derives_fail.stderr +++ b/tests/ui/type/pattern_types/derives_fail.stderr @@ -26,9 +26,7 @@ LL | #[derive(Clone, Copy, PartialEq, Eq, Debug, Ord, PartialOrd, Hash, Default) | ----- in this derive macro expansion LL | #[repr(transparent)] LL | struct Nanoseconds(NanoI32); - | ^^^^^^^ `(i32) is 0..=999999999` cannot be formatted using `{:?}` because it doesn't implement `Debug` - | - = help: the trait `Debug` is not implemented for `(i32) is 0..=999999999` + | ^^^^^^^ the trait `Debug` is not implemented for `(i32) is 0..=999999999` error[E0277]: the trait bound `(i32) is 0..=999999999: Ord` is not satisfied --> $DIR/derives_fail.rs:11:20 diff --git a/tests/ui/typeck/consider-borrowing-141810-1.stderr b/tests/ui/typeck/consider-borrowing-141810-1.stderr index 9291721ac712..35ca6793eee0 100644 --- a/tests/ui/typeck/consider-borrowing-141810-1.stderr +++ b/tests/ui/typeck/consider-borrowing-141810-1.stderr @@ -1,20 +1,17 @@ error[E0308]: `if` and `else` have incompatible types --> $DIR/consider-borrowing-141810-1.rs:4:12 | -LL | let x = if true { - | ______________- -LL | | &true - | | ----- expected because of this -LL | | } else if false { - | | ____________^ -LL | || true -LL | || } else { -LL | || true -LL | || }; - | || ^ - | ||_____| - | |_____`if` and `else` have incompatible types - | expected `&bool`, found `bool` +LL | let x = if true { + | ------- `if` and `else` have incompatible types +LL | &true + | ----- expected because of this +LL | } else if false { + | ____________^ +LL | | true +LL | | } else { +LL | | true +LL | | }; + | |_____^ expected `&bool`, found `bool` | help: consider borrowing here | diff --git a/tests/ui/typeck/consider-borrowing-141810-2.stderr b/tests/ui/typeck/consider-borrowing-141810-2.stderr index dd229897283b..44ecb5a4a945 100644 --- a/tests/ui/typeck/consider-borrowing-141810-2.stderr +++ b/tests/ui/typeck/consider-borrowing-141810-2.stderr @@ -1,18 +1,15 @@ error[E0308]: `if` and `else` have incompatible types --> $DIR/consider-borrowing-141810-2.rs:4:12 | -LL | let x = if true { - | ______________- -LL | | &() - | | --- expected because of this -LL | | } else if false { - | | ____________^ -LL | || } else { -LL | || }; - | || ^ - | ||_____| - | |_____`if` and `else` have incompatible types - | expected `&()`, found `()` +LL | let x = if true { + | ------- `if` and `else` have incompatible types +LL | &() + | --- expected because of this +LL | } else if false { + | ____________^ +LL | | } else { +LL | | }; + | |_____^ expected `&()`, found `()` error: aborting due to 1 previous error diff --git a/tests/ui/typeck/consider-borrowing-141810-3.stderr b/tests/ui/typeck/consider-borrowing-141810-3.stderr index 0b0c5f191a0d..3adf8ba1a892 100644 --- a/tests/ui/typeck/consider-borrowing-141810-3.stderr +++ b/tests/ui/typeck/consider-borrowing-141810-3.stderr @@ -1,18 +1,15 @@ error[E0308]: `if` and `else` have incompatible types --> $DIR/consider-borrowing-141810-3.rs:4:12 | -LL | let x = if true { - | ______________- -LL | | &() - | | --- expected because of this -LL | | } else if false { - | | ____________^ -LL | || -LL | || }; - | || ^ - | ||_____| - | |_____`if` and `else` have incompatible types - | expected `&()`, found `()` +LL | let x = if true { + | ------- `if` and `else` have incompatible types +LL | &() + | --- expected because of this +LL | } else if false { + | ____________^ +LL | | +LL | | }; + | |_____^ expected `&()`, found `()` | = note: `if` expressions without `else` evaluate to `()` = note: consider adding an `else` block that evaluates to the expected type diff --git a/tests/ui/typeck/inference-method-chain-diverging-fallback.rs b/tests/ui/typeck/inference-method-chain-diverging-fallback.rs new file mode 100644 index 000000000000..8f549b7d9d68 --- /dev/null +++ b/tests/ui/typeck/inference-method-chain-diverging-fallback.rs @@ -0,0 +1,19 @@ +//! Test type inference in method chains with diverging fallback. +//! Verifies that closure type in `unwrap_or_else` is properly inferred +//! when chained with other combinators and contains a diverging path. + +//@ run-pass + +fn produce() -> Result<&'static str, T> { + Ok("22") +} + +fn main() { + // The closure's error type `T` must unify with `ParseIntError`, + // while the success type must be `usize` (from parse()) + let x: usize = produce() + .and_then(|x| x.parse::()) // Explicit turbofish for clarity + .unwrap_or_else(|_| panic!()); // Diverging fallback + + assert_eq!(x, 22); +} diff --git a/tests/ui/typeck/issue-100246.rs b/tests/ui/typeck/issue-100246.rs index 8f0b34bab0c8..e05bb2a1362a 100644 --- a/tests/ui/typeck/issue-100246.rs +++ b/tests/ui/typeck/issue-100246.rs @@ -25,6 +25,6 @@ fn downcast<'a, W: ?Sized>() -> std::io::Result<&'a W> { struct Other; fn main() -> std::io::Result<()> { - let other: Other = downcast()?;//~ERROR 28:24: 28:35: `?` operator has incompatible types + let other: Other = downcast()?; //~ ERROR `?` operator has incompatible types Ok(()) } diff --git a/tests/ui/typeck/issue-74086.rs b/tests/ui/typeck/issue-74086.rs index 9b7c0d7cc6e2..1993cc7db350 100644 --- a/tests/ui/typeck/issue-74086.rs +++ b/tests/ui/typeck/issue-74086.rs @@ -1,5 +1,4 @@ fn main() { static BUG: fn(_) -> u8 = |_| 8; - //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions [E0121] - //~| ERROR the placeholder `_` is not allowed within types on item signatures for static items + //~^ ERROR the placeholder `_` is not allowed within types on item signatures for static items } diff --git a/tests/ui/typeck/issue-74086.stderr b/tests/ui/typeck/issue-74086.stderr index 95ebf9a906c1..25f454ac0c32 100644 --- a/tests/ui/typeck/issue-74086.stderr +++ b/tests/ui/typeck/issue-74086.stderr @@ -1,15 +1,9 @@ -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/issue-74086.rs:2:20 - | -LL | static BUG: fn(_) -> u8 = |_| 8; - | ^ not allowed in type signatures - error[E0121]: the placeholder `_` is not allowed within types on item signatures for static items --> $DIR/issue-74086.rs:2:20 | LL | static BUG: fn(_) -> u8 = |_| 8; | ^ not allowed in type signatures -error: aborting due to 2 previous errors +error: aborting due to 1 previous error For more information about this error, try `rustc --explain E0121`. diff --git a/tests/ui/typeck/issue-81885.rs b/tests/ui/typeck/issue-81885.rs index fb3949478a4d..d73c77b8f3a2 100644 --- a/tests/ui/typeck/issue-81885.rs +++ b/tests/ui/typeck/issue-81885.rs @@ -1,9 +1,7 @@ const TEST4: fn() -> _ = 42; - //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions - //~| ERROR the placeholder `_` is not allowed within types on item signatures for constant items +//~^ ERROR the placeholder `_` is not allowed within types on item signatures for constant items fn main() { const TEST5: fn() -> _ = 42; - //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions - //~| ERROR the placeholder `_` is not allowed within types on item signatures for constant items + //~^ ERROR the placeholder `_` is not allowed within types on item signatures for constant items } diff --git a/tests/ui/typeck/issue-81885.stderr b/tests/ui/typeck/issue-81885.stderr index 91c08bd82350..25a6bb632ef1 100644 --- a/tests/ui/typeck/issue-81885.stderr +++ b/tests/ui/typeck/issue-81885.stderr @@ -1,27 +1,15 @@ -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/issue-81885.rs:1:22 - | -LL | const TEST4: fn() -> _ = 42; - | ^ not allowed in type signatures - error[E0121]: the placeholder `_` is not allowed within types on item signatures for constant items --> $DIR/issue-81885.rs:1:22 | LL | const TEST4: fn() -> _ = 42; | ^ not allowed in type signatures -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/issue-81885.rs:6:26 - | -LL | const TEST5: fn() -> _ = 42; - | ^ not allowed in type signatures - error[E0121]: the placeholder `_` is not allowed within types on item signatures for constant items - --> $DIR/issue-81885.rs:6:26 + --> $DIR/issue-81885.rs:5:26 | LL | const TEST5: fn() -> _ = 42; | ^ not allowed in type signatures -error: aborting due to 4 previous errors +error: aborting due to 2 previous errors For more information about this error, try `rustc --explain E0121`. diff --git a/tests/ui/typeck/issue-89275.rs b/tests/ui/typeck/issue-89275.rs index b91c00175487..6e4211de1857 100644 --- a/tests/ui/typeck/issue-89275.rs +++ b/tests/ui/typeck/issue-89275.rs @@ -25,5 +25,5 @@ fn downcast<'a, W: ?Sized>() -> &'a W { struct Other; fn main() { - let other: &mut Other = downcast();//~ERROR 28:29: 28:39: mismatched types [E0308] + let other: &mut Other = downcast();//~ ERROR mismatched types [E0308] } diff --git a/tests/ui/typeck/point-at-type-param-in-path-expr.stderr b/tests/ui/typeck/point-at-type-param-in-path-expr.stderr index 14642b25c994..3701b3e37988 100644 --- a/tests/ui/typeck/point-at-type-param-in-path-expr.stderr +++ b/tests/ui/typeck/point-at-type-param-in-path-expr.stderr @@ -2,10 +2,8 @@ error[E0277]: `()` doesn't implement `std::fmt::Display` --> $DIR/point-at-type-param-in-path-expr.rs:4:19 | LL | let x = foo::<()>; - | ^^ `()` cannot be formatted with the default formatter + | ^^ the trait `std::fmt::Display` is not implemented for `()` | - = help: the trait `std::fmt::Display` is not implemented for `()` - = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead note: required by a bound in `foo` --> $DIR/point-at-type-param-in-path-expr.rs:1:11 | diff --git a/tests/ui/typeck/type-placeholder-fn-in-const.rs b/tests/ui/typeck/type-placeholder-fn-in-const.rs index bbb95a5798af..1600534dd4f8 100644 --- a/tests/ui/typeck/type-placeholder-fn-in-const.rs +++ b/tests/ui/typeck/type-placeholder-fn-in-const.rs @@ -2,14 +2,12 @@ struct MyStruct; trait Test { const TEST: fn() -> _; - //~^ ERROR: the placeholder `_` is not allowed within types on item signatures for functions [E0121] - //~| ERROR: the placeholder `_` is not allowed within types on item signatures for associated constants [E0121] + //~^ ERROR: the placeholder `_` is not allowed within types on item signatures for associated constants [E0121] } impl Test for MyStruct { const TEST: fn() -> _ = 42; - //~^ ERROR: the placeholder `_` is not allowed within types on item signatures for functions [E0121] - //~| ERROR: the placeholder `_` is not allowed within types on item signatures for associated constants [E0121] + //~^ ERROR: the placeholder `_` is not allowed within types on item signatures for associated constants [E0121] } fn main() {} diff --git a/tests/ui/typeck/type-placeholder-fn-in-const.stderr b/tests/ui/typeck/type-placeholder-fn-in-const.stderr index 92b47bd4781c..a29752948fe3 100644 --- a/tests/ui/typeck/type-placeholder-fn-in-const.stderr +++ b/tests/ui/typeck/type-placeholder-fn-in-const.stderr @@ -1,17 +1,5 @@ -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/type-placeholder-fn-in-const.rs:10:25 - | -LL | const TEST: fn() -> _ = 42; - | ^ not allowed in type signatures - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/type-placeholder-fn-in-const.rs:4:25 - | -LL | const TEST: fn() -> _; - | ^ not allowed in type signatures - error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants - --> $DIR/type-placeholder-fn-in-const.rs:10:25 + --> $DIR/type-placeholder-fn-in-const.rs:9:25 | LL | const TEST: fn() -> _ = 42; | ^ not allowed in type signatures @@ -22,6 +10,6 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures LL | const TEST: fn() -> _; | ^ not allowed in type signatures -error: aborting due to 4 previous errors +error: aborting due to 2 previous errors For more information about this error, try `rustc --explain E0121`. diff --git a/tests/ui/typeck/typeck_type_placeholder_item.rs b/tests/ui/typeck/typeck_type_placeholder_item.rs index d7351f2e51a8..dc7903619193 100644 --- a/tests/ui/typeck/typeck_type_placeholder_item.rs +++ b/tests/ui/typeck/typeck_type_placeholder_item.rs @@ -33,7 +33,6 @@ fn test7(x: _) { let _x: usize = x; } fn test8(_f: fn() -> _) { } //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions -//~^^ ERROR the placeholder `_` is not allowed within types on item signatures for functions struct Test9; @@ -67,6 +66,8 @@ struct Test10 { a: _, //~^ ERROR the placeholder `_` is not allowed within types on item signatures for structs b: (_, _), + //~^ ERROR the placeholder `_` is not allowed within types on item signatures for structs + //~| ERROR the placeholder `_` is not allowed within types on item signatures for structs } pub fn main() { @@ -99,7 +100,6 @@ pub fn main() { fn fn_test8(_f: fn() -> _) { } //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions - //~^^ ERROR the placeholder `_` is not allowed within types on item signatures for functions struct FnTest9; @@ -123,6 +123,8 @@ pub fn main() { a: _, //~^ ERROR the placeholder `_` is not allowed within types on item signatures for structs b: (_, _), + //~^ ERROR the placeholder `_` is not allowed within types on item signatures for structs + //~| ERROR the placeholder `_` is not allowed within types on item signatures for structs } fn fn_test11(_: _) -> (_, _) { panic!() } @@ -141,12 +143,14 @@ trait T { //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions fn method_test2(&self, x: _) -> _; //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions + //~| ERROR the placeholder `_` is not allowed within types on item signatures for functions fn method_test3(&self) -> _; //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions fn assoc_fn_test1(x: _); //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions fn assoc_fn_test2(x: _) -> _; //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions + //~| ERROR the placeholder `_` is not allowed within types on item signatures for functions fn assoc_fn_test3() -> _; //~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions } @@ -158,9 +162,11 @@ trait BadTrait<_> {} //~^ ERROR expected identifier, found reserved identifier `_` impl BadTrait<_> for BadStruct<_> {} //~^ ERROR the placeholder `_` is not allowed within types on item signatures for implementations +//~| ERROR the placeholder `_` is not allowed within types on item signatures for implementations fn impl_trait() -> impl BadTrait<_> { -//~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions +//~^ ERROR the placeholder `_` is not allowed within types on item signatures for opaque types +//~| ERROR the placeholder `_` is not allowed within types on item signatures for opaque types unimplemented!() } @@ -180,7 +186,8 @@ struct Struct; trait Trait {} impl Trait for Struct {} type Y = impl Trait<_>; -//~^ ERROR the placeholder `_` is not allowed within types on item signatures for type aliases +//~^ ERROR the placeholder `_` is not allowed within types on item signatures for opaque types +//~| ERROR the placeholder `_` is not allowed within types on item signatures for opaque types #[define_opaque(Y)] fn foo() -> Y { Struct @@ -197,6 +204,7 @@ trait Qux { // type E: _; // FIXME: make the parser propagate the existence of `B` type F: std::ops::Fn(_); //~^ ERROR the placeholder `_` is not allowed within types on item signatures for associated types + //~| ERROR the placeholder `_` is not allowed within types on item signatures for associated types } impl Qux for Struct { //~^ ERROR not all trait items implemented, missing: `F` diff --git a/tests/ui/typeck/typeck_type_placeholder_item.stderr b/tests/ui/typeck/typeck_type_placeholder_item.stderr index 7184244f5dc9..53476f6c8074 100644 --- a/tests/ui/typeck/typeck_type_placeholder_item.stderr +++ b/tests/ui/typeck/typeck_type_placeholder_item.stderr @@ -1,35 +1,35 @@ error: expected identifier, found reserved identifier `_` - --> $DIR/typeck_type_placeholder_item.rs:154:18 + --> $DIR/typeck_type_placeholder_item.rs:158:18 | LL | struct BadStruct<_>(_); | ^ expected identifier, found reserved identifier error: expected identifier, found reserved identifier `_` - --> $DIR/typeck_type_placeholder_item.rs:157:16 + --> $DIR/typeck_type_placeholder_item.rs:161:16 | LL | trait BadTrait<_> {} | ^ expected identifier, found reserved identifier error: expected identifier, found reserved identifier `_` - --> $DIR/typeck_type_placeholder_item.rs:167:19 + --> $DIR/typeck_type_placeholder_item.rs:173:19 | LL | struct BadStruct1<_, _>(_); | ^ expected identifier, found reserved identifier error: expected identifier, found reserved identifier `_` - --> $DIR/typeck_type_placeholder_item.rs:167:22 + --> $DIR/typeck_type_placeholder_item.rs:173:22 | LL | struct BadStruct1<_, _>(_); | ^ expected identifier, found reserved identifier error: expected identifier, found reserved identifier `_` - --> $DIR/typeck_type_placeholder_item.rs:172:19 + --> $DIR/typeck_type_placeholder_item.rs:178:19 | LL | struct BadStruct2<_, T>(_, T); | ^ expected identifier, found reserved identifier error: associated constant in `impl` without body - --> $DIR/typeck_type_placeholder_item.rs:207:5 + --> $DIR/typeck_type_placeholder_item.rs:215:5 | LL | const C: _; | ^^^^^^^^^^- @@ -37,7 +37,7 @@ LL | const C: _; | help: provide a definition for the constant: `= ;` error[E0403]: the name `_` is already used for a generic parameter in this item's generic parameters - --> $DIR/typeck_type_placeholder_item.rs:167:22 + --> $DIR/typeck_type_placeholder_item.rs:173:22 | LL | struct BadStruct1<_, _>(_); | - ^ already used @@ -106,72 +106,87 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures | LL | fn test6(_: _) { } | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn test6(_: _) { } -LL + fn test6(_: T) { } - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions --> $DIR/typeck_type_placeholder_item.rs:25:18 | LL | fn test6_b(_: _, _: T) { } | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn test6_b(_: _, _: T) { } -LL + fn test6_b(_: U, _: T) { } - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions --> $DIR/typeck_type_placeholder_item.rs:28:30 | LL | fn test6_c(_: _, _: (T, K, L, A, B)) { } | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn test6_c(_: _, _: (T, K, L, A, B)) { } -LL + fn test6_c(_: U, _: (T, K, L, A, B)) { } - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions --> $DIR/typeck_type_placeholder_item.rs:31:13 | LL | fn test7(x: _) { let _x: usize = x; } | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn test7(x: _) { let _x: usize = x; } -LL + fn test7(x: T) { let _x: usize = x; } - | - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:34:22 - | -LL | fn test8(_f: fn() -> _) { } - | ^ - | | - | not allowed in type signatures - | help: use type parameters instead: `T` error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions --> $DIR/typeck_type_placeholder_item.rs:34:22 | LL | fn test8(_f: fn() -> _) { } | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/typeck_type_placeholder_item.rs:66:8 | -help: use type parameters instead +LL | a: _, + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/typeck_type_placeholder_item.rs:68:9 | -LL - fn test8(_f: fn() -> _) { } -LL + fn test8(_f: fn() -> T) { } +LL | b: (_, _), + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/typeck_type_placeholder_item.rs:68:12 | +LL | b: (_, _), + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/typeck_type_placeholder_item.rs:123:12 + | +LL | a: _, + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/typeck_type_placeholder_item.rs:125:13 + | +LL | b: (_, _), + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/typeck_type_placeholder_item.rs:125:16 + | +LL | b: (_, _), + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/typeck_type_placeholder_item.rs:158:21 + | +LL | struct BadStruct<_>(_); + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/typeck_type_placeholder_item.rs:173:25 + | +LL | struct BadStruct1<_, _>(_); + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs + --> $DIR/typeck_type_placeholder_item.rs:178:25 + | +LL | struct BadStruct2<_, T>(_, T); + | ^ not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:48:26 + --> $DIR/typeck_type_placeholder_item.rs:47:26 | LL | fn test11(x: &usize) -> &_ { | -^ @@ -180,7 +195,7 @@ LL | fn test11(x: &usize) -> &_ { | help: replace with the correct return type: `&&usize` error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:53:52 + --> $DIR/typeck_type_placeholder_item.rs:52:52 | LL | unsafe fn test12(x: *const usize) -> *const *const _ { | --------------^ @@ -189,7 +204,7 @@ LL | unsafe fn test12(x: *const usize) -> *const *const _ { | help: replace with the correct return type: `*const *const usize` error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:59:24 + --> $DIR/typeck_type_placeholder_item.rs:58:24 | LL | fn clone(&self) -> _ { Test9 } | ^ not allowed in type signatures @@ -201,7 +216,7 @@ LL + fn clone(&self) -> Test9 { Test9 } | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:62:37 + --> $DIR/typeck_type_placeholder_item.rs:61:37 | LL | fn clone_from(&mut self, other: _) { *self = Test9; } | ^ not allowed in type signatures @@ -212,33 +227,14 @@ LL - fn clone_from(&mut self, other: _) { *self = Test9; } LL + fn clone_from(&mut self, other: &Test9) { *self = Test9; } | -error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs - --> $DIR/typeck_type_placeholder_item.rs:67:8 - | -LL | a: _, - | ^ not allowed in type signatures -LL | -LL | b: (_, _), - | ^ ^ not allowed in type signatures - | | - | not allowed in type signatures - | -help: use type parameters instead - | -LL ~ struct Test10 { -LL ~ a: T, -LL | -LL ~ b: (T, T), - | - error: missing type for `static` item - --> $DIR/typeck_type_placeholder_item.rs:73:13 + --> $DIR/typeck_type_placeholder_item.rs:74:13 | LL | static A = 42; | ^ help: provide a type for the static variable: `: i32` error[E0121]: the placeholder `_` is not allowed within types on item signatures for static variables - --> $DIR/typeck_type_placeholder_item.rs:75:15 + --> $DIR/typeck_type_placeholder_item.rs:76:15 | LL | static B: _ = 42; | ^ not allowed in type signatures @@ -250,7 +246,7 @@ LL + static B: i32 = 42; | error[E0121]: the placeholder `_` is not allowed within types on item signatures for static variables - --> $DIR/typeck_type_placeholder_item.rs:77:22 + --> $DIR/typeck_type_placeholder_item.rs:78:22 | LL | static C: Option<_> = Some(42); | ^ not allowed in type signatures @@ -262,7 +258,7 @@ LL + static C: Option = Some(42); | error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:79:21 + --> $DIR/typeck_type_placeholder_item.rs:80:21 | LL | fn fn_test() -> _ { 5 } | ^ @@ -271,7 +267,7 @@ LL | fn fn_test() -> _ { 5 } | help: replace with the correct return type: `i32` error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:82:23 + --> $DIR/typeck_type_placeholder_item.rs:83:23 | LL | fn fn_test2() -> (_, _) { (5, 5) } | -^--^- @@ -281,7 +277,7 @@ LL | fn fn_test2() -> (_, _) { (5, 5) } | help: replace with the correct return type: `(i32, i32)` error[E0121]: the placeholder `_` is not allowed within types on item signatures for static variables - --> $DIR/typeck_type_placeholder_item.rs:85:22 + --> $DIR/typeck_type_placeholder_item.rs:86:22 | LL | static FN_TEST3: _ = "test"; | ^ not allowed in type signatures @@ -293,7 +289,7 @@ LL + static FN_TEST3: &str = "test"; | error[E0121]: the placeholder `_` is not allowed within types on item signatures for static variables - --> $DIR/typeck_type_placeholder_item.rs:88:22 + --> $DIR/typeck_type_placeholder_item.rs:89:22 | LL | static FN_TEST4: _ = 145; | ^ not allowed in type signatures @@ -305,7 +301,7 @@ LL + static FN_TEST4: i32 = 145; | error[E0121]: the placeholder `_` is not allowed within types on item signatures for static variables - --> $DIR/typeck_type_placeholder_item.rs:91:23 + --> $DIR/typeck_type_placeholder_item.rs:92:23 | LL | static FN_TEST5: (_, _) = (1, 2); | ^ ^ not allowed in type signatures @@ -319,49 +315,22 @@ LL + static FN_TEST5: (i32, i32) = (1, 2); | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:94:20 + --> $DIR/typeck_type_placeholder_item.rs:95:20 | LL | fn fn_test6(_: _) { } | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn fn_test6(_: _) { } -LL + fn fn_test6(_: T) { } - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:97:20 + --> $DIR/typeck_type_placeholder_item.rs:98:20 | LL | fn fn_test7(x: _) { let _x: usize = x; } | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn fn_test7(x: _) { let _x: usize = x; } -LL + fn fn_test7(x: T) { let _x: usize = x; } - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:100:29 - | -LL | fn fn_test8(_f: fn() -> _) { } - | ^ - | | - | not allowed in type signatures - | help: use type parameters instead: `T` - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:100:29 + --> $DIR/typeck_type_placeholder_item.rs:101:29 | LL | fn fn_test8(_f: fn() -> _) { } | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn fn_test8(_f: fn() -> _) { } -LL + fn fn_test8(_f: fn() -> T) { } - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions --> $DIR/typeck_type_placeholder_item.rs:115:28 @@ -387,33 +356,14 @@ LL - fn clone_from(&mut self, other: _) { *self = FnTest9; } LL + fn clone_from(&mut self, other: &FnTest9) { *self = FnTest9; } | -error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs - --> $DIR/typeck_type_placeholder_item.rs:123:12 - | -LL | a: _, - | ^ not allowed in type signatures -LL | -LL | b: (_, _), - | ^ ^ not allowed in type signatures - | | - | not allowed in type signatures - | -help: use type parameters instead - | -LL ~ struct FnTest10 { -LL ~ a: T, -LL | -LL ~ b: (T, T), - | - error[E0282]: type annotations needed - --> $DIR/typeck_type_placeholder_item.rs:128:21 + --> $DIR/typeck_type_placeholder_item.rs:130:21 | LL | fn fn_test11(_: _) -> (_, _) { panic!() } | ^ cannot infer type error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:128:28 + --> $DIR/typeck_type_placeholder_item.rs:130:28 | LL | fn fn_test11(_: _) -> (_, _) { panic!() } | ^ ^ not allowed in type signatures @@ -421,7 +371,7 @@ LL | fn fn_test11(_: _) -> (_, _) { panic!() } | not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:132:30 + --> $DIR/typeck_type_placeholder_item.rs:134:30 | LL | fn fn_test12(x: i32) -> (_, _) { (x, x) } | -^--^- @@ -431,7 +381,7 @@ LL | fn fn_test12(x: i32) -> (_, _) { (x, x) } | help: replace with the correct return type: `(i32, i32)` error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:135:33 + --> $DIR/typeck_type_placeholder_item.rs:137:33 | LL | fn fn_test13(x: _) -> (i32, _) { (x, x) } | ------^- @@ -439,152 +389,116 @@ LL | fn fn_test13(x: _) -> (i32, _) { (x, x) } | | not allowed in type signatures | help: replace with the correct return type: `(i32, i32)` -error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs - --> $DIR/typeck_type_placeholder_item.rs:154:21 - | -LL | struct BadStruct<_>(_); - | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - struct BadStruct<_>(_); -LL + struct BadStruct(T); - | - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:140:31 - | -LL | fn method_test1(&self, x: _); - | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn method_test1(&self, x: _); -LL + fn method_test1(&self, x: T); - | - error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions --> $DIR/typeck_type_placeholder_item.rs:142:31 | -LL | fn method_test2(&self, x: _) -> _; - | ^ ^ not allowed in type signatures - | | - | not allowed in type signatures - | -help: use type parameters instead - | -LL - fn method_test2(&self, x: _) -> _; -LL + fn method_test2(&self, x: T) -> T; - | +LL | fn method_test1(&self, x: _); + | ^ not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions --> $DIR/typeck_type_placeholder_item.rs:144:31 | -LL | fn method_test3(&self) -> _; +LL | fn method_test2(&self, x: _) -> _; | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn method_test3(&self) -> _; -LL + fn method_test3(&self) -> T; - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:146:26 + --> $DIR/typeck_type_placeholder_item.rs:144:37 + | +LL | fn method_test2(&self, x: _) -> _; + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/typeck_type_placeholder_item.rs:147:31 + | +LL | fn method_test3(&self) -> _; + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/typeck_type_placeholder_item.rs:149:26 | LL | fn assoc_fn_test1(x: _); | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn assoc_fn_test1(x: _); -LL + fn assoc_fn_test1(x: T); - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:148:26 + --> $DIR/typeck_type_placeholder_item.rs:151:26 | LL | fn assoc_fn_test2(x: _) -> _; - | ^ ^ not allowed in type signatures - | | - | not allowed in type signatures - | -help: use type parameters instead - | -LL - fn assoc_fn_test2(x: _) -> _; -LL + fn assoc_fn_test2(x: T) -> T; - | + | ^ not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:150:28 + --> $DIR/typeck_type_placeholder_item.rs:151:32 + | +LL | fn assoc_fn_test2(x: _) -> _; + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions + --> $DIR/typeck_type_placeholder_item.rs:154:28 | LL | fn assoc_fn_test3() -> _; | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn assoc_fn_test3() -> _; -LL + fn assoc_fn_test3() -> T; - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for implementations - --> $DIR/typeck_type_placeholder_item.rs:159:15 + --> $DIR/typeck_type_placeholder_item.rs:163:32 | LL | impl BadTrait<_> for BadStruct<_> {} - | ^ ^ not allowed in type signatures - | | - | not allowed in type signatures + | ^ not allowed in type signatures -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:162:34 +error[E0121]: the placeholder `_` is not allowed within types on item signatures for implementations + --> $DIR/typeck_type_placeholder_item.rs:163:15 + | +LL | impl BadTrait<_> for BadStruct<_> {} + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for opaque types + --> $DIR/typeck_type_placeholder_item.rs:167:34 | LL | fn impl_trait() -> impl BadTrait<_> { | ^ not allowed in type signatures -error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs - --> $DIR/typeck_type_placeholder_item.rs:167:25 - | -LL | struct BadStruct1<_, _>(_); - | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - struct BadStruct1<_, _>(_); -LL + struct BadStruct1(T); - | - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for structs - --> $DIR/typeck_type_placeholder_item.rs:172:25 - | -LL | struct BadStruct2<_, T>(_, T); - | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - struct BadStruct2<_, T>(_, T); -LL + struct BadStruct2(U, T); - | - error[E0121]: the placeholder `_` is not allowed within types on item signatures for type aliases - --> $DIR/typeck_type_placeholder_item.rs:176:14 + --> $DIR/typeck_type_placeholder_item.rs:182:14 | LL | type X = Box<_>; | ^ not allowed in type signatures -error[E0121]: the placeholder `_` is not allowed within types on item signatures for type aliases - --> $DIR/typeck_type_placeholder_item.rs:182:21 +error[E0121]: the placeholder `_` is not allowed within types on item signatures for opaque types + --> $DIR/typeck_type_placeholder_item.rs:188:21 | LL | type Y = impl Trait<_>; | ^ not allowed in type signatures +error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types + --> $DIR/typeck_type_placeholder_item.rs:198:14 + | +LL | type B = _; + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types + --> $DIR/typeck_type_placeholder_item.rs:211:14 + | +LL | type A = _; + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types + --> $DIR/typeck_type_placeholder_item.rs:213:14 + | +LL | type B = _; + | ^ not allowed in type signatures + error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants - --> $DIR/typeck_type_placeholder_item.rs:207:14 + --> $DIR/typeck_type_placeholder_item.rs:200:14 | LL | const C: _; | ^ not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants - --> $DIR/typeck_type_placeholder_item.rs:195:14 + --> $DIR/typeck_type_placeholder_item.rs:215:14 + | +LL | const C: _; + | ^ not allowed in type signatures + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants + --> $DIR/typeck_type_placeholder_item.rs:202:14 | LL | const D: _ = 42; | ^ not allowed in type signatures @@ -596,13 +510,13 @@ LL + const D: i32 = 42; | error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants - --> $DIR/typeck_type_placeholder_item.rs:210:14 + --> $DIR/typeck_type_placeholder_item.rs:218:14 | LL | const D: _ = 42; | ^ not allowed in type signatures error[E0046]: not all trait items implemented, missing: `F` - --> $DIR/typeck_type_placeholder_item.rs:201:1 + --> $DIR/typeck_type_placeholder_item.rs:209:1 | LL | type F: std::ops::Fn(_); | ----------------------- `F` from trait @@ -611,7 +525,7 @@ LL | impl Qux for Struct { | ^^^^^^^^^^^^^^^^^^^ missing `F` in implementation error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:218:31 + --> $DIR/typeck_type_placeholder_item.rs:226:31 | LL | fn value() -> Option<&'static _> { | ----------------^- @@ -620,7 +534,7 @@ LL | fn value() -> Option<&'static _> { | help: replace with the correct return type: `Option<&'static u8>` error[E0121]: the placeholder `_` is not allowed within types on item signatures for constants - --> $DIR/typeck_type_placeholder_item.rs:223:17 + --> $DIR/typeck_type_placeholder_item.rs:231:17 | LL | const _: Option<_> = map(value); | ^ not allowed in type signatures @@ -632,7 +546,7 @@ LL + const _: Option = map(value); | error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:227:31 + --> $DIR/typeck_type_placeholder_item.rs:235:31 | LL | fn evens_squared(n: usize) -> _ { | ^ @@ -641,19 +555,19 @@ LL | fn evens_squared(n: usize) -> _ { | help: replace with an appropriate return type: `impl Iterator` error[E0121]: the placeholder `_` is not allowed within types on item signatures for constants - --> $DIR/typeck_type_placeholder_item.rs:232:10 + --> $DIR/typeck_type_placeholder_item.rs:240:10 | LL | const _: _ = (1..10).filter(|x| x % 2 == 0).map(|x| x * x); | ^ not allowed in type signatures | -note: however, the inferred type `Map, {closure@typeck_type_placeholder_item.rs:232:29}>, {closure@typeck_type_placeholder_item.rs:232:49}>` cannot be named - --> $DIR/typeck_type_placeholder_item.rs:232:14 +note: however, the inferred type `Map, {closure@typeck_type_placeholder_item.rs:240:29}>, {closure@typeck_type_placeholder_item.rs:240:49}>` cannot be named + --> $DIR/typeck_type_placeholder_item.rs:240:14 | LL | const _: _ = (1..10).filter(|x| x % 2 == 0).map(|x| x * x); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types - --> $DIR/typeck_type_placeholder_item.rs:41:24 + --> $DIR/typeck_type_placeholder_item.rs:40:24 | LL | fn test9(&self) -> _ { () } | ^ @@ -662,16 +576,10 @@ LL | fn test9(&self) -> _ { () } | help: replace with the correct return type: `()` error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item.rs:44:27 + --> $DIR/typeck_type_placeholder_item.rs:43:27 | LL | fn test10(&self, _x : _) { } | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn test10(&self, _x : _) { } -LL + fn test10(&self, _x : T) { } - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types --> $DIR/typeck_type_placeholder_item.rs:107:31 @@ -687,68 +595,62 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures | LL | fn fn_test10(&self, _x : _) { } | ^ not allowed in type signatures - | -help: use type parameters instead - | -LL - fn fn_test10(&self, _x : _) { } -LL + fn fn_test10(&self, _x : T) { } - | error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types - --> $DIR/typeck_type_placeholder_item.rs:203:14 - | -LL | type A = _; - | ^ not allowed in type signatures - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types - --> $DIR/typeck_type_placeholder_item.rs:205:14 - | -LL | type B = _; - | ^ not allowed in type signatures - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types - --> $DIR/typeck_type_placeholder_item.rs:191:14 - | -LL | type B = _; - | ^ not allowed in type signatures - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants - --> $DIR/typeck_type_placeholder_item.rs:193:14 - | -LL | const C: _; - | ^ not allowed in type signatures - -error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types - --> $DIR/typeck_type_placeholder_item.rs:198:26 + --> $DIR/typeck_type_placeholder_item.rs:205:26 | LL | type F: std::ops::Fn(_); | ^ not allowed in type signatures +error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types + --> $DIR/typeck_type_placeholder_item.rs:205:26 + | +LL | type F: std::ops::Fn(_); + | ^ not allowed in type signatures + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for opaque types + --> $DIR/typeck_type_placeholder_item.rs:167:34 + | +LL | fn impl_trait() -> impl BadTrait<_> { + | ^ not allowed in type signatures + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + +error[E0121]: the placeholder `_` is not allowed within types on item signatures for opaque types + --> $DIR/typeck_type_placeholder_item.rs:188:21 + | +LL | type Y = impl Trait<_>; + | ^ not allowed in type signatures + | + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + error[E0015]: cannot call non-const function `map::` in constants - --> $DIR/typeck_type_placeholder_item.rs:223:22 + --> $DIR/typeck_type_placeholder_item.rs:231:22 | LL | const _: Option<_> = map(value); | ^^^^^^^^^^ | = note: calls in constants are limited to constant functions, tuple structs and tuple variants -error[E0015]: cannot call non-const method ` as Iterator>::filter::<{closure@$DIR/typeck_type_placeholder_item.rs:232:29: 232:32}>` in constants - --> $DIR/typeck_type_placeholder_item.rs:232:22 +error[E0015]: cannot call non-const method ` as Iterator>::filter::<{closure@$DIR/typeck_type_placeholder_item.rs:240:29: 240:32}>` in constants + --> $DIR/typeck_type_placeholder_item.rs:240:22 | LL | const _: _ = (1..10).filter(|x| x % 2 == 0).map(|x| x * x); | ^^^^^^^^^^^^^^^^^^^^^^ | = note: calls in constants are limited to constant functions, tuple structs and tuple variants -error[E0015]: cannot call non-const method `, {closure@$DIR/typeck_type_placeholder_item.rs:232:29: 232:32}> as Iterator>::map::` in constants - --> $DIR/typeck_type_placeholder_item.rs:232:45 +error[E0015]: cannot call non-const method `, {closure@$DIR/typeck_type_placeholder_item.rs:240:29: 240:32}> as Iterator>::map::` in constants + --> $DIR/typeck_type_placeholder_item.rs:240:45 | LL | const _: _ = (1..10).filter(|x| x % 2 == 0).map(|x| x * x); | ^^^^^^^^^^^^^^ | = note: calls in constants are limited to constant functions, tuple structs and tuple variants -error: aborting due to 75 previous errors +error: aborting due to 83 previous errors Some errors have detailed explanations: E0015, E0046, E0121, E0282, E0403. For more information about an error, try `rustc --explain E0015`. diff --git a/tests/ui/typeck/typeck_type_placeholder_item_help.rs b/tests/ui/typeck/typeck_type_placeholder_item_help.rs index ff6182588c72..ab433aaaf162 100644 --- a/tests/ui/typeck/typeck_type_placeholder_item_help.rs +++ b/tests/ui/typeck/typeck_type_placeholder_item_help.rs @@ -11,8 +11,7 @@ const TEST3: _ = Some(42); //~^ ERROR the placeholder `_` is not allowed within types on item signatures for constants const TEST4: fn() -> _ = 42; -//~^ ERROR the placeholder `_` is not allowed within types on item signatures for functions -//~| ERROR the placeholder `_` is not allowed within types on item signatures for constant items +//~^ ERROR the placeholder `_` is not allowed within types on item signatures for constant items trait Test5 { const TEST5: _ = 42; diff --git a/tests/ui/typeck/typeck_type_placeholder_item_help.stderr b/tests/ui/typeck/typeck_type_placeholder_item_help.stderr index afdd58e0a038..5066e2eaa523 100644 --- a/tests/ui/typeck/typeck_type_placeholder_item_help.stderr +++ b/tests/ui/typeck/typeck_type_placeholder_item_help.stderr @@ -31,12 +31,6 @@ LL - const TEST3: _ = Some(42); LL + const TEST3: Option = Some(42); | -error[E0121]: the placeholder `_` is not allowed within types on item signatures for functions - --> $DIR/typeck_type_placeholder_item_help.rs:13:22 - | -LL | const TEST4: fn() -> _ = 42; - | ^ not allowed in type signatures - error[E0121]: the placeholder `_` is not allowed within types on item signatures for constant items --> $DIR/typeck_type_placeholder_item_help.rs:13:22 | @@ -44,7 +38,7 @@ LL | const TEST4: fn() -> _ = 42; | ^ not allowed in type signatures error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants - --> $DIR/typeck_type_placeholder_item_help.rs:25:18 + --> $DIR/typeck_type_placeholder_item_help.rs:24:18 | LL | const TEST6: _ = 13; | ^ not allowed in type signatures @@ -56,7 +50,7 @@ LL + const TEST6: i32 = 13; | error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants - --> $DIR/typeck_type_placeholder_item_help.rs:18:18 + --> $DIR/typeck_type_placeholder_item_help.rs:17:18 | LL | const TEST5: _ = 42; | ^ not allowed in type signatures @@ -68,7 +62,7 @@ LL + const TEST5: i32 = 42; | error[E0308]: mismatched types - --> $DIR/typeck_type_placeholder_item_help.rs:30:28 + --> $DIR/typeck_type_placeholder_item_help.rs:29:28 | LL | let _: Option = test1(); | ------------- ^^^^^^^ expected `Option`, found `Option` @@ -79,7 +73,7 @@ LL | let _: Option = test1(); found enum `Option` error[E0308]: mismatched types - --> $DIR/typeck_type_placeholder_item_help.rs:31:18 + --> $DIR/typeck_type_placeholder_item_help.rs:30:18 | LL | let _: f64 = test1(); | --- ^^^^^^^ expected `f64`, found `Option` @@ -89,7 +83,7 @@ LL | let _: f64 = test1(); = note: expected type `f64` found enum `Option` -error: aborting due to 9 previous errors +error: aborting due to 8 previous errors Some errors have detailed explanations: E0121, E0308. For more information about an error, try `rustc --explain E0121`. diff --git a/tests/ui/underscore-imports/issue-110164.stderr b/tests/ui/underscore-imports/issue-110164.ed2015.stderr similarity index 56% rename from tests/ui/underscore-imports/issue-110164.stderr rename to tests/ui/underscore-imports/issue-110164.ed2015.stderr index d8a4b6bbb754..f34b5ab5dde7 100644 --- a/tests/ui/underscore-imports/issue-110164.stderr +++ b/tests/ui/underscore-imports/issue-110164.ed2015.stderr @@ -1,17 +1,17 @@ error: expected identifier, found reserved identifier `_` - --> $DIR/issue-110164.rs:5:5 + --> $DIR/issue-110164.rs:8:5 | LL | use _::a; | ^ expected identifier, found reserved identifier error: expected identifier, found reserved identifier `_` - --> $DIR/issue-110164.rs:8:5 + --> $DIR/issue-110164.rs:10:5 | LL | use _::*; | ^ expected identifier, found reserved identifier error: expected identifier, found reserved identifier `_` - --> $DIR/issue-110164.rs:13:9 + --> $DIR/issue-110164.rs:14:9 | LL | use _::a; | ^ expected identifier, found reserved identifier @@ -23,41 +23,17 @@ LL | use _::*; | ^ expected identifier, found reserved identifier error[E0432]: unresolved import `self::*` - --> $DIR/issue-110164.rs:1:5 + --> $DIR/issue-110164.rs:4:5 | LL | use self::*; | ^^^^^^^ cannot glob-import a module into itself error[E0432]: unresolved import `crate::*` - --> $DIR/issue-110164.rs:3:5 + --> $DIR/issue-110164.rs:6:5 | LL | use crate::*; | ^^^^^^^^ cannot glob-import a module into itself -error[E0432]: unresolved import `_` - --> $DIR/issue-110164.rs:8:5 - | -LL | use _::*; - | ^ `_` is not a valid crate or module name - -error[E0432]: unresolved import `_` - --> $DIR/issue-110164.rs:5:5 - | -LL | use _::a; - | ^ `_` is not a valid crate or module name - -error[E0432]: unresolved import `_` - --> $DIR/issue-110164.rs:13:9 - | -LL | use _::a; - | ^ `_` is not a valid crate or module name - -error[E0432]: unresolved import `_` - --> $DIR/issue-110164.rs:16:9 - | -LL | use _::*; - | ^ `_` is not a valid crate or module name - -error: aborting due to 10 previous errors +error: aborting due to 6 previous errors For more information about this error, try `rustc --explain E0432`. diff --git a/tests/ui/underscore-imports/issue-110164.ed2021.stderr b/tests/ui/underscore-imports/issue-110164.ed2021.stderr new file mode 100644 index 000000000000..f34b5ab5dde7 --- /dev/null +++ b/tests/ui/underscore-imports/issue-110164.ed2021.stderr @@ -0,0 +1,39 @@ +error: expected identifier, found reserved identifier `_` + --> $DIR/issue-110164.rs:8:5 + | +LL | use _::a; + | ^ expected identifier, found reserved identifier + +error: expected identifier, found reserved identifier `_` + --> $DIR/issue-110164.rs:10:5 + | +LL | use _::*; + | ^ expected identifier, found reserved identifier + +error: expected identifier, found reserved identifier `_` + --> $DIR/issue-110164.rs:14:9 + | +LL | use _::a; + | ^ expected identifier, found reserved identifier + +error: expected identifier, found reserved identifier `_` + --> $DIR/issue-110164.rs:16:9 + | +LL | use _::*; + | ^ expected identifier, found reserved identifier + +error[E0432]: unresolved import `self::*` + --> $DIR/issue-110164.rs:4:5 + | +LL | use self::*; + | ^^^^^^^ cannot glob-import a module into itself + +error[E0432]: unresolved import `crate::*` + --> $DIR/issue-110164.rs:6:5 + | +LL | use crate::*; + | ^^^^^^^^ cannot glob-import a module into itself + +error: aborting due to 6 previous errors + +For more information about this error, try `rustc --explain E0432`. diff --git a/tests/ui/underscore-imports/issue-110164.rs b/tests/ui/underscore-imports/issue-110164.rs index 6fd13414500d..bb080c5e471d 100644 --- a/tests/ui/underscore-imports/issue-110164.rs +++ b/tests/ui/underscore-imports/issue-110164.rs @@ -1,19 +1,18 @@ +//@ revisions: ed2015 ed2021 +//@[ed2015] edition: 2015 +//@[ed2021] edition: 2021 use self::*; //~^ ERROR unresolved import `self::*` use crate::*; //~^ ERROR unresolved import `crate::*` use _::a; //~^ ERROR expected identifier, found reserved identifier `_` -//~| ERROR unresolved import `_` use _::*; //~^ ERROR expected identifier, found reserved identifier `_` -//~| ERROR unresolved import `_` fn main() { use _::a; //~^ ERROR expected identifier, found reserved identifier `_` - //~| ERROR unresolved import `_` use _::*; //~^ ERROR expected identifier, found reserved identifier `_` - //~| ERROR unresolved import `_` } diff --git a/tests/ui/underscore-imports/multiple-uses.ed2015.stderr b/tests/ui/underscore-imports/multiple-uses.ed2015.stderr new file mode 100644 index 000000000000..a295586fa16f --- /dev/null +++ b/tests/ui/underscore-imports/multiple-uses.ed2015.stderr @@ -0,0 +1,49 @@ +error: expected identifier, found reserved identifier `_` + --> $DIR/multiple-uses.rs:4:9 + | +LL | pub use _::{a, b}; + | ^ expected identifier, found reserved identifier + +error: expected identifier, found reserved identifier `_` + --> $DIR/multiple-uses.rs:6:18 + | +LL | pub use std::{a, _}; + | ^ expected identifier, found reserved identifier + +error: expected identifier, found reserved identifier `_` + --> $DIR/multiple-uses.rs:9:18 + | +LL | pub use std::{b, _, c}; + | ^ expected identifier, found reserved identifier + +error: expected identifier, found reserved identifier `_` + --> $DIR/multiple-uses.rs:12:15 + | +LL | pub use std::{_, d}; + | ^ expected identifier, found reserved identifier + +error[E0432]: unresolved import `std::a` + --> $DIR/multiple-uses.rs:6:15 + | +LL | pub use std::{a, _}; + | ^ no `a` in the root + +error[E0432]: unresolved imports `std::b`, `std::c` + --> $DIR/multiple-uses.rs:9:15 + | +LL | pub use std::{b, _, c}; + | ^ ^ + | | | + | | no `c` in the root + | | help: a similar name exists in the module: `rc` + | no `b` in the root + +error[E0432]: unresolved import `std::d` + --> $DIR/multiple-uses.rs:12:18 + | +LL | pub use std::{_, d}; + | ^ no `d` in the root + +error: aborting due to 7 previous errors + +For more information about this error, try `rustc --explain E0432`. diff --git a/tests/ui/underscore-imports/multiple-uses.ed2021.stderr b/tests/ui/underscore-imports/multiple-uses.ed2021.stderr new file mode 100644 index 000000000000..a295586fa16f --- /dev/null +++ b/tests/ui/underscore-imports/multiple-uses.ed2021.stderr @@ -0,0 +1,49 @@ +error: expected identifier, found reserved identifier `_` + --> $DIR/multiple-uses.rs:4:9 + | +LL | pub use _::{a, b}; + | ^ expected identifier, found reserved identifier + +error: expected identifier, found reserved identifier `_` + --> $DIR/multiple-uses.rs:6:18 + | +LL | pub use std::{a, _}; + | ^ expected identifier, found reserved identifier + +error: expected identifier, found reserved identifier `_` + --> $DIR/multiple-uses.rs:9:18 + | +LL | pub use std::{b, _, c}; + | ^ expected identifier, found reserved identifier + +error: expected identifier, found reserved identifier `_` + --> $DIR/multiple-uses.rs:12:15 + | +LL | pub use std::{_, d}; + | ^ expected identifier, found reserved identifier + +error[E0432]: unresolved import `std::a` + --> $DIR/multiple-uses.rs:6:15 + | +LL | pub use std::{a, _}; + | ^ no `a` in the root + +error[E0432]: unresolved imports `std::b`, `std::c` + --> $DIR/multiple-uses.rs:9:15 + | +LL | pub use std::{b, _, c}; + | ^ ^ + | | | + | | no `c` in the root + | | help: a similar name exists in the module: `rc` + | no `b` in the root + +error[E0432]: unresolved import `std::d` + --> $DIR/multiple-uses.rs:12:18 + | +LL | pub use std::{_, d}; + | ^ no `d` in the root + +error: aborting due to 7 previous errors + +For more information about this error, try `rustc --explain E0432`. diff --git a/tests/ui/underscore-imports/multiple-uses.rs b/tests/ui/underscore-imports/multiple-uses.rs new file mode 100644 index 000000000000..31dd1862429e --- /dev/null +++ b/tests/ui/underscore-imports/multiple-uses.rs @@ -0,0 +1,16 @@ +//@ revisions: ed2015 ed2021 +//@[ed2015] edition: 2015 +//@[ed2021] edition: 2021 +pub use _::{a, b}; +//~^ ERROR expected identifier, found reserved identifier `_` +pub use std::{a, _}; +//~^ ERROR expected identifier, found reserved identifier `_` +//~| ERROR unresolved import `std::a` +pub use std::{b, _, c}; +//~^ ERROR expected identifier, found reserved identifier `_` +//~| ERROR unresolved imports `std::b`, `std::c` +pub use std::{_, d}; +//~^ ERROR expected identifier, found reserved identifier `_` +//~| ERROR unresolved import `std::d` + +fn main() {} diff --git a/tests/ui/unpretty/ast-const-trait-bound.rs b/tests/ui/unpretty/ast-const-trait-bound.rs index f4de86bb0d03..761bff87a628 100644 --- a/tests/ui/unpretty/ast-const-trait-bound.rs +++ b/tests/ui/unpretty/ast-const-trait-bound.rs @@ -1,4 +1,4 @@ //@ compile-flags: -Zunpretty=normal //@ check-pass -fn foo() where T: ~const Bar {} +fn foo() where T: [const] Bar {} diff --git a/tests/ui/unpretty/ast-const-trait-bound.stdout b/tests/ui/unpretty/ast-const-trait-bound.stdout index f4de86bb0d03..761bff87a628 100644 --- a/tests/ui/unpretty/ast-const-trait-bound.stdout +++ b/tests/ui/unpretty/ast-const-trait-bound.stdout @@ -1,4 +1,4 @@ //@ compile-flags: -Zunpretty=normal //@ check-pass -fn foo() where T: ~const Bar {} +fn foo() where T: [const] Bar {} diff --git a/tests/ui/unpretty/deprecated-attr.rs b/tests/ui/unpretty/deprecated-attr.rs index 0c80203e9652..e2ab5efb5d87 100644 --- a/tests/ui/unpretty/deprecated-attr.rs +++ b/tests/ui/unpretty/deprecated-attr.rs @@ -16,3 +16,8 @@ pub struct SinceAndNote; #[deprecated(note = "here's why this is deprecated", since = "1.2.3")] pub struct FlippedOrder; + +pub fn f() { + // Attribute is ignored here (with a warning), but still preserved in HIR + #[deprecated] 0 +} diff --git a/tests/ui/unpretty/deprecated-attr.stdout b/tests/ui/unpretty/deprecated-attr.stdout index 97d863b2e943..a2b645d00d06 100644 --- a/tests/ui/unpretty/deprecated-attr.stdout +++ b/tests/ui/unpretty/deprecated-attr.stdout @@ -24,3 +24,12 @@ struct SinceAndNote; #[attr = Deprecation {deprecation: Deprecation {since: NonStandard("1.2.3"), note: "here's why this is deprecated"}}] struct FlippedOrder; + +fn f() { + + // Attribute is ignored here (with a warning), but still preserved in HIR + #[attr = Deprecation {deprecation: + Deprecation {since: + Unspecified}}] + 0 +} diff --git a/tests/ui/unpretty/diagnostic-attr.stdout b/tests/ui/unpretty/diagnostic-attr.stdout index 81d71b91d815..3b15a845d68f 100644 --- a/tests/ui/unpretty/diagnostic-attr.stdout +++ b/tests/ui/unpretty/diagnostic-attr.stdout @@ -12,6 +12,4 @@ extern crate std; trait ImportantTrait { } #[diagnostic::do_not_recommend] -impl ImportantTrait for T where T: Clone - {#![diagnostic::do_not_recommend] -} +impl ImportantTrait for T where T: Clone { } diff --git a/tests/ui/unpretty/exhaustive-asm.hir.stdout b/tests/ui/unpretty/exhaustive-asm.hir.stdout index 810db69bff16..ec9bda573312 100644 --- a/tests/ui/unpretty/exhaustive-asm.hir.stdout +++ b/tests/ui/unpretty/exhaustive-asm.hir.stdout @@ -26,7 +26,7 @@ mod expressions { mod items { /// ItemKind::GlobalAsm - mod item_global_asm {/// ItemKind::GlobalAsm + mod item_global_asm { global_asm! (".globl my_asm_func"); } } diff --git a/tests/ui/unpretty/exhaustive.expanded.stdout b/tests/ui/unpretty/exhaustive.expanded.stdout index cd1a5d0af08a..9df027b69b2f 100644 --- a/tests/ui/unpretty/exhaustive.expanded.stdout +++ b/tests/ui/unpretty/exhaustive.expanded.stdout @@ -12,7 +12,6 @@ #![feature(auto_traits)] #![feature(box_patterns)] #![feature(builtin_syntax)] -#![feature(concat_idents)] #![feature(const_trait_impl)] #![feature(decl_macro)] #![feature(deref_patterns)] @@ -309,7 +308,6 @@ mod expressions { - // concat_idents is deprecated @@ -606,7 +604,7 @@ mod types { let _: impl Send + 'static; let _: impl 'static + Send; let _: impl ?Sized; - let _: impl ~const Clone; + let _: impl [const] Clone; let _: impl for<'a> Send; } /// TyKind::Paren @@ -622,8 +620,12 @@ mod types { /*! there is no syntax for this */ } /// TyKind::MacCall - #[expect(deprecated)] - fn ty_mac_call() { let _: T; let _: T; let _: T; } + fn ty_mac_call() { + macro_rules! ty { ($ty:ty) => { $ty } } + let _: T; + let _: T; + let _: T; + } /// TyKind::CVarArgs fn ty_c_var_args() { /*! FIXME: todo */ diff --git a/tests/ui/unpretty/exhaustive.hir.stderr b/tests/ui/unpretty/exhaustive.hir.stderr index 58f7ff0f5981..ac8079ae089e 100644 --- a/tests/ui/unpretty/exhaustive.hir.stderr +++ b/tests/ui/unpretty/exhaustive.hir.stderr @@ -1,17 +1,17 @@ error[E0697]: closures cannot be static - --> $DIR/exhaustive.rs:211:9 + --> $DIR/exhaustive.rs:210:9 | LL | static || value; | ^^^^^^^^^ error[E0697]: closures cannot be static - --> $DIR/exhaustive.rs:212:9 + --> $DIR/exhaustive.rs:211:9 | LL | static move || value; | ^^^^^^^^^^^^^^ error[E0728]: `await` is only allowed inside `async` functions and blocks - --> $DIR/exhaustive.rs:241:13 + --> $DIR/exhaustive.rs:240:13 | LL | fn expr_await() { | --------------- this is not `async` @@ -20,19 +20,19 @@ LL | fut.await; | ^^^^^ only allowed inside `async` functions and blocks error: in expressions, `_` can only be used on the left-hand side of an assignment - --> $DIR/exhaustive.rs:290:9 + --> $DIR/exhaustive.rs:289:9 | LL | _; | ^ `_` not allowed here error[E0214]: parenthesized type parameters may only be used with a `Fn` trait - --> $DIR/exhaustive.rs:300:9 + --> $DIR/exhaustive.rs:299:9 | LL | x::(); | ^^^^^ only `Fn` traits may use parentheses error[E0214]: parenthesized type parameters may only be used with a `Fn` trait - --> $DIR/exhaustive.rs:301:9 + --> $DIR/exhaustive.rs:300:9 | LL | x::(T, T) -> T; | ^^^^^^^^^^^^^^ only `Fn` traits may use parentheses @@ -44,31 +44,31 @@ LL + x:: -> T; | error[E0214]: parenthesized type parameters may only be used with a `Fn` trait - --> $DIR/exhaustive.rs:302:9 + --> $DIR/exhaustive.rs:301:9 | LL | crate::() -> ()::expressions::() -> ()::expr_path; | ^^^^^^^^^^^^^^^ only `Fn` traits may use parentheses error[E0214]: parenthesized type parameters may only be used with a `Fn` trait - --> $DIR/exhaustive.rs:302:26 + --> $DIR/exhaustive.rs:301:26 | LL | crate::() -> ()::expressions::() -> ()::expr_path; | ^^^^^^^^^^^^^^^^^^^^^ only `Fn` traits may use parentheses error[E0214]: parenthesized type parameters may only be used with a `Fn` trait - --> $DIR/exhaustive.rs:305:9 + --> $DIR/exhaustive.rs:304:9 | LL | core::()::marker::()::PhantomData; | ^^^^^^^^ only `Fn` traits may use parentheses error[E0214]: parenthesized type parameters may only be used with a `Fn` trait - --> $DIR/exhaustive.rs:305:19 + --> $DIR/exhaustive.rs:304:19 | LL | core::()::marker::()::PhantomData; | ^^^^^^^^^^ only `Fn` traits may use parentheses error: `yield` can only be used in `#[coroutine]` closures, or `gen` blocks - --> $DIR/exhaustive.rs:392:9 + --> $DIR/exhaustive.rs:391:9 | LL | yield; | ^^^^^ @@ -79,7 +79,7 @@ LL | #[coroutine] fn expr_yield() { | ++++++++++++ error[E0703]: invalid ABI: found `C++` - --> $DIR/exhaustive.rs:472:23 + --> $DIR/exhaustive.rs:471:23 | LL | unsafe extern "C++" {} | ^^^^^ invalid ABI @@ -87,7 +87,7 @@ LL | unsafe extern "C++" {} = note: invoke `rustc --print=calling-conventions` for a full list of supported calling conventions error: `..` patterns are not allowed here - --> $DIR/exhaustive.rs:679:13 + --> $DIR/exhaustive.rs:678:13 | LL | let ..; | ^^ @@ -95,13 +95,13 @@ LL | let ..; = note: only allowed in tuple, tuple struct, and slice patterns error[E0214]: parenthesized type parameters may only be used with a `Fn` trait - --> $DIR/exhaustive.rs:794:16 + --> $DIR/exhaustive.rs:793:16 | LL | let _: T() -> !; | ^^^^^^^^ only `Fn` traits may use parentheses error[E0562]: `impl Trait` is not allowed in the type of variable bindings - --> $DIR/exhaustive.rs:809:16 + --> $DIR/exhaustive.rs:808:16 | LL | let _: impl Send; | ^^^^^^^^^ @@ -112,7 +112,7 @@ LL | let _: impl Send; = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0562]: `impl Trait` is not allowed in the type of variable bindings - --> $DIR/exhaustive.rs:810:16 + --> $DIR/exhaustive.rs:809:16 | LL | let _: impl Send + 'static; | ^^^^^^^^^^^^^^^^^^^ @@ -123,7 +123,7 @@ LL | let _: impl Send + 'static; = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0562]: `impl Trait` is not allowed in the type of variable bindings - --> $DIR/exhaustive.rs:811:16 + --> $DIR/exhaustive.rs:810:16 | LL | let _: impl 'static + Send; | ^^^^^^^^^^^^^^^^^^^ @@ -134,7 +134,7 @@ LL | let _: impl 'static + Send; = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0562]: `impl Trait` is not allowed in the type of variable bindings - --> $DIR/exhaustive.rs:812:16 + --> $DIR/exhaustive.rs:811:16 | LL | let _: impl ?Sized; | ^^^^^^^^^^^ @@ -145,10 +145,10 @@ LL | let _: impl ?Sized; = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0562]: `impl Trait` is not allowed in the type of variable bindings - --> $DIR/exhaustive.rs:813:16 + --> $DIR/exhaustive.rs:812:16 | -LL | let _: impl ~const Clone; - | ^^^^^^^^^^^^^^^^^ +LL | let _: impl [const] Clone; + | ^^^^^^^^^^^^^^^^^^ | = note: `impl Trait` is only allowed in arguments and return types of functions and methods = note: see issue #63065 for more information @@ -156,7 +156,7 @@ LL | let _: impl ~const Clone; = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date error[E0562]: `impl Trait` is not allowed in the type of variable bindings - --> $DIR/exhaustive.rs:814:16 + --> $DIR/exhaustive.rs:813:16 | LL | let _: impl for<'a> Send; | ^^^^^^^^^^^^^^^^^ diff --git a/tests/ui/unpretty/exhaustive.hir.stdout b/tests/ui/unpretty/exhaustive.hir.stdout index 5d6e3907d757..2d347ec6e88d 100644 --- a/tests/ui/unpretty/exhaustive.hir.stdout +++ b/tests/ui/unpretty/exhaustive.hir.stdout @@ -11,7 +11,6 @@ #![feature(auto_traits)] #![feature(box_patterns)] #![feature(builtin_syntax)] -#![feature(concat_idents)] #![feature(const_trait_impl)] #![feature(decl_macro)] #![feature(deref_patterns)] @@ -50,20 +49,14 @@ mod prelude { } } -//! inner single-line doc comment -/*! +/// inner single-line doc comment +/** * inner multi-line doc comment */ #[doc = "inner doc attribute"] #[allow(dead_code, unused_variables)] #[no_std] -mod attributes {//! inner single-line doc comment - /*! - * inner multi-line doc comment - */ - #![doc = "inner doc attribute"] - #![allow(dead_code, unused_variables)] - #![no_std] +mod attributes { /// outer single-line doc comment /** @@ -349,7 +342,6 @@ mod expressions { - // concat_idents is deprecated @@ -413,25 +405,25 @@ mod expressions { } mod items { /// ItemKind::ExternCrate - mod item_extern_crate {/// ItemKind::ExternCrate + mod item_extern_crate { extern crate core; extern crate self as unpretty; extern crate core as _; } /// ItemKind::Use - mod item_use {/// ItemKind::Use + mod item_use { use ::{}; use crate::expressions; use crate::items::item_use; use core::*; } /// ItemKind::Static - mod item_static {/// ItemKind::Static + mod item_static { static A: () = { }; static mut B: () = { }; } /// ItemKind::Const - mod item_const {/// ItemKind::Const + mod item_const { const A: () = { }; trait TraitItems { const @@ -445,7 +437,7 @@ mod items { } } /// ItemKind::Fn - mod item_fn {/// ItemKind::Fn + mod item_fn { const unsafe extern "C" fn f() { } async unsafe extern "C" fn g() -> @@ -460,21 +452,19 @@ mod items { } } /// ItemKind::Mod - mod item_mod {/// ItemKind::Mod - } + mod item_mod { } /// ItemKind::ForeignMod - mod item_foreign_mod {/// ItemKind::ForeignMod + mod item_foreign_mod { extern "Rust" { } extern "C" { } } /// ItemKind::GlobalAsm: see exhaustive-asm.rs /// ItemKind::TyAlias - mod item_ty_alias {/// ItemKind::GlobalAsm: see exhaustive-asm.rs - /// ItemKind::TyAlias + mod item_ty_alias { type Type<'a> where T: 'a = T; } /// ItemKind::Enum - mod item_enum {/// ItemKind::Enum + mod item_enum { enum Void { } enum Empty { Unit, @@ -490,7 +480,7 @@ mod items { } } /// ItemKind::Struct - mod item_struct {/// ItemKind::Struct + mod item_struct { struct Unit; struct Tuple(); struct Newtype(Unit); @@ -501,45 +491,40 @@ mod items { } } /// ItemKind::Union - mod item_union {/// ItemKind::Union + mod item_union { union Generic<'a, T> where T: 'a { t: T, } } /// ItemKind::Trait - mod item_trait {/// ItemKind::Trait + mod item_trait { auto unsafe trait Send { } trait Trait<'a>: Sized where Self: 'a { } } /// ItemKind::TraitAlias - mod item_trait_alias {/// ItemKind::TraitAlias + mod item_trait_alias { trait Trait = Sized where for<'a> T: 'a; } /// ItemKind::Impl - mod item_impl {/// ItemKind::Impl + mod item_impl { impl () { } impl () { } impl Default for () { } impl const Default for () { } } /// ItemKind::MacCall - mod item_mac_call {/// ItemKind::MacCall - } + mod item_mac_call { } /// ItemKind::MacroDef - mod item_macro_def {/// ItemKind::MacroDef + mod item_macro_def { macro_rules! mac { () => {...}; } macro stringify { () => {} } } /// ItemKind::Delegation - /*! FIXME: todo */ - mod item_delegation {/// ItemKind::Delegation - /*! FIXME: todo */ - } + /** FIXME: todo */ + mod item_delegation { } /// ItemKind::DelegationMac - /*! FIXME: todo */ - mod item_delegation_mac {/// ItemKind::DelegationMac - /*! FIXME: todo */ - } + /** FIXME: todo */ + mod item_delegation_mac { } } mod patterns { /// PatKind::Missing @@ -690,29 +675,33 @@ mod types { /// TyKind::Paren fn ty_paren() { let _: T; } /// TyKind::Typeof - /*! unused for now */ + /** unused for now */ fn ty_typeof() { } /// TyKind::Infer fn ty_infer() { let _: _; } /// TyKind::ImplicitSelf - /*! there is no syntax for this */ + /** there is no syntax for this */ fn ty_implicit_self() { } /// TyKind::MacCall - #[expect(deprecated)] - fn ty_mac_call() { let _: T; let _: T; let _: T; } + fn ty_mac_call() { + macro_rules! ty { ($ty:ty) => { $ty } } + let _: T; + let _: T; + let _: T; + } /// TyKind::CVarArgs - /*! FIXME: todo */ + /** FIXME: todo */ fn ty_c_var_args() { } /// TyKind::Pat fn ty_pat() { let _: u32 is 1..=RangeMax; } } mod visibilities { /// VisibilityKind::Public - mod visibility_public {/// VisibilityKind::Public + mod visibility_public { struct Pub; } /// VisibilityKind::Restricted - mod visibility_restricted {/// VisibilityKind::Restricted + mod visibility_restricted { struct PubCrate; struct PubSelf; struct PubSuper; diff --git a/tests/ui/unpretty/exhaustive.rs b/tests/ui/unpretty/exhaustive.rs index 60ad3564689d..5bf1118058cb 100644 --- a/tests/ui/unpretty/exhaustive.rs +++ b/tests/ui/unpretty/exhaustive.rs @@ -11,7 +11,6 @@ #![feature(auto_traits)] #![feature(box_patterns)] #![feature(builtin_syntax)] -#![feature(concat_idents)] #![feature(const_trait_impl)] #![feature(decl_macro)] #![feature(deref_patterns)] @@ -810,7 +809,7 @@ mod types { let _: impl Send + 'static; //[hir]~ ERROR `impl Trait` is not allowed let _: impl 'static + Send; //[hir]~ ERROR `impl Trait` is not allowed let _: impl ?Sized; //[hir]~ ERROR `impl Trait` is not allowed - let _: impl ~const Clone; //[hir]~ ERROR `impl Trait` is not allowed + let _: impl [const] Clone; //[hir]~ ERROR `impl Trait` is not allowed let _: impl for<'a> Send; //[hir]~ ERROR `impl Trait` is not allowed } @@ -835,11 +834,13 @@ mod types { } /// TyKind::MacCall - #[expect(deprecated)] // concat_idents is deprecated fn ty_mac_call() { - let _: concat_idents!(T); - let _: concat_idents![T]; - let _: concat_idents! { T }; + macro_rules! ty { + ($ty:ty) => { $ty } + } + let _: ty!(T); + let _: ty![T]; + let _: ty! { T }; } /// TyKind::CVarArgs diff --git a/tests/ui/unsized-locals/yote.rs b/tests/ui/unsized-locals/yote.rs index aa5b68a30782..1de75a6ce617 100644 --- a/tests/ui/unsized-locals/yote.rs +++ b/tests/ui/unsized-locals/yote.rs @@ -1,4 +1,2 @@ -//@ normalize-stderr: "you are using [0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9]+)?( \([^)]*\))?" -> "you are using $$RUSTC_VERSION" - #![feature(unsized_locals)] //~ERROR feature has been removed #![crate_type = "lib"] diff --git a/tests/ui/unsized-locals/yote.stderr b/tests/ui/unsized-locals/yote.stderr index 655aad5360cc..8e7da64038a4 100644 --- a/tests/ui/unsized-locals/yote.stderr +++ b/tests/ui/unsized-locals/yote.stderr @@ -1,10 +1,10 @@ error[E0557]: feature has been removed - --> $DIR/yote.rs:3:12 + --> $DIR/yote.rs:1:12 | LL | #![feature(unsized_locals)] | ^^^^^^^^^^^^^^ feature has been removed | - = note: removed in CURRENT_RUSTC_VERSION (you are using $RUSTC_VERSION) + = note: removed in CURRENT_RUSTC_VERSION = note: removed due to implementation concerns; see https://github.com/rust-lang/rust/issues/111942 error: aborting due to 1 previous error diff --git a/tests/ui/wf/ice-hir-wf-check-anon-const-issue-122199.rs b/tests/ui/wf/ice-hir-wf-check-anon-const-issue-122199.rs index 53f07a94fd15..ad7d972879ff 100644 --- a/tests/ui/wf/ice-hir-wf-check-anon-const-issue-122199.rs +++ b/tests/ui/wf/ice-hir-wf-check-anon-const-issue-122199.rs @@ -5,7 +5,6 @@ trait Trait { //~^ ERROR the name `N` is already used for a generic parameter in this item's generic parameters //~| ERROR expected value, found builtin type `u32` //~| ERROR defaults for const parameters are only allowed in `struct`, `enum`, `type`, or `trait` definitions - //~| ERROR associated item referring to unboxed trait object for its own trait bar //~^ ERROR cannot find value `bar` in this scope } diff --git a/tests/ui/wf/ice-hir-wf-check-anon-const-issue-122199.stderr b/tests/ui/wf/ice-hir-wf-check-anon-const-issue-122199.stderr index a085dd6ac576..e10bb98c1349 100644 --- a/tests/ui/wf/ice-hir-wf-check-anon-const-issue-122199.stderr +++ b/tests/ui/wf/ice-hir-wf-check-anon-const-issue-122199.stderr @@ -20,7 +20,7 @@ LL | fn fnc(&self) -> dyn Trait { | ^^^ not a value error[E0425]: cannot find value `bar` in this scope - --> $DIR/ice-hir-wf-check-anon-const-issue-122199.rs:9:9 + --> $DIR/ice-hir-wf-check-anon-const-issue-122199.rs:8:9 | LL | bar | ^^^ not found in this scope @@ -45,22 +45,7 @@ error: defaults for const parameters are only allowed in `struct`, `enum`, `type LL | fn fnc(&self) -> dyn Trait { | ^^^^^^^^^^^^^^^^^^^^^^^^ -error: associated item referring to unboxed trait object for its own trait - --> $DIR/ice-hir-wf-check-anon-const-issue-122199.rs:4:48 - | -LL | trait Trait { - | ----- in this trait -... -LL | fn fnc(&self) -> dyn Trait { - | ^^^^^^^^^ - | -help: you might have meant to use `Self` to refer to the implementing type - | -LL - fn fnc(&self) -> dyn Trait { -LL + fn fnc(&self) -> Self { - | - -error: aborting due to 7 previous errors +error: aborting due to 6 previous errors Some errors have detailed explanations: E0391, E0403, E0423, E0425. For more information about an error, try `rustc --explain E0391`. diff --git a/tests/ui/wf/issue-87495.stderr b/tests/ui/wf/issue-87495.stderr index 0c293e3576d6..bf79535df116 100644 --- a/tests/ui/wf/issue-87495.stderr +++ b/tests/ui/wf/issue-87495.stderr @@ -13,6 +13,11 @@ LL | trait T { LL | const CONST: (bool, dyn T); | ^^^^^ ...because it contains this associated `const` = help: consider moving `CONST` to another trait +help: you might have meant to use `Self` to refer to the implementing type + | +LL - const CONST: (bool, dyn T); +LL + const CONST: (bool, Self); + | error: aborting due to 1 previous error diff --git a/triagebot.toml b/triagebot.toml index 98eb99d9c607..6385528e7b6e 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -1023,6 +1023,10 @@ Otherwise, you can ignore this comment. [mentions."src/tools/x"] message = "`src/tools/x` was changed. Bump version of Cargo.toml in `src/tools/x` so tidy will suggest installing the new version." +[mentions."src/tools/tidy"] +message = "There are changes to the `tidy` tool." +cc = ["@jieyouxu"] + [mentions."src/tools/tidy/src/deps.rs"] message = "The list of allowed third-party dependencies may have been modified! You must ensure that any new dependencies have compatible licenses before merging." cc = ["@davidtwco", "@wesleywiser"] @@ -1164,7 +1168,7 @@ cc = ["@ehuss"] [mentions."src/doc/rustc-dev-guide"] message = "The rustc-dev-guide subtree was changed. If this PR *only* touches the dev guide consider submitting a PR directly to [rust-lang/rustc-dev-guide](https://github.com/rust-lang/rustc-dev-guide/pulls) otherwise thank you for updating the dev guide with your changes." -cc = ["@BoxyUwU", "@jieyouxu", "@kobzol"] +cc = ["@BoxyUwU", "@jieyouxu", "@kobzol", "@tshepang"] [mentions."compiler/rustc_codegen_ssa/src/codegen_attrs.rs"] cc = ["@jdonszelmann"]